summaryrefslogtreecommitdiff
path: root/main.py
diff options
context:
space:
mode:
authorPacien TRAN-GIRARD2014-10-24 18:05:48 +0000
committerPacien TRAN-GIRARD2014-10-24 18:05:48 +0000
commit4403fda939ef42aeffeccb343d74f3dc3b840f91 (patch)
tree63fd704f15f3030f1455aad0ef92403c5d093c70 /main.py
parent16529a0d212e1387eacd590c0e5e1b1a13dc2641 (diff)
parentbdf9099df8c2a4636b0ad0e710b73330877eef37 (diff)
downloadwikistats-4403fda939ef42aeffeccb343d74f3dc3b840f91.tar.gz
Merge branch 'refactor' into 'master'
Refactor See merge request !1
Diffstat (limited to 'main.py')
-rw-r--r--main.py101
1 files changed, 70 insertions, 31 deletions
diff --git a/main.py b/main.py
index 0d12a1e..da7c6ae 100644
--- a/main.py
+++ b/main.py
@@ -1,62 +1,101 @@
1from banapedia.Ban import *
2import bandict
3from collections import Counter 1from collections import Counter
2import webbrowser
3
4import pygal 4import pygal
5import numpy as np 5import numpy as np
6import pygeoip
7
8from wikibania.ban.BanDB import BanDB
9from wikibania.ban.BanDBWrapper import BanDBWrapper
10import sysproxy
11
12
13# PARAMS
14
15GEOIP_DB = "/usr/share/GeoIP/GeoIP.dat"
6 16
7BAN_MAP_FILE = "output/ban-map.svg" 17BAN_MAP_FILE = "output/ban-map.svg"
8BAN_DURATION_MAP_FILE = "output/ban-duration-map.svg" 18BAN_DURATION_MAP_FILE = "output/ban-duration-map.svg"
9HIST_FILE = "output/histogram.svg" 19HIST_FILE = "output/histogram.svg"
20STATS_FILE = "output/stats.txt"
21
22BAN_DB_FILE = "resources/ban_list.json"
23
24FETCH_SAMPLES = 2000
25FETCH_DB = False
26DUMP_DB = False
27LOAD_DB = False
28
29OPEN_FILES = False
30
31
32# SETUP
10 33
11SAMPLES = 1000 34sysproxy.configure_system_proxy()
35geoip_looker = pygeoip.GeoIP(GEOIP_DB)
12 36
13BAN_FILE = "resources/ban_list.json" 37ban_db = BanDB(geoip_looker)
14 38
15ban_dict_list = bandict.BanList(BAN_FILE, samples=SAMPLES, from_internet=True) 39if FETCH_DB:
40 ban_db.fetch(FETCH_SAMPLES)
41if LOAD_DB:
42 ban_db.load_file(BAN_DB_FILE)
43if DUMP_DB:
44 ban_db.dump_file(BAN_DB_FILE)
16 45
17# ======== HISTOGRAM ======= # 46ban_db_wrapper = BanDBWrapper(ban_db)
18 47
19ban_durations = ban_dict_list.get_durations()
20(ban_durations_bars, bins) = np.histogram(ban_durations,
21 bins=[round(365/12*x)
22 for x in range(1, 50+2)
23 ]
24 )
25 48
26print("[INFO]", "Generating histogram") 49# HISTOGRAM
50
51ban_durations = ban_db_wrapper.get_all_durations()
52duration_bins = [round(365 / 12 * x) for x in range(1, 50 + 2)]
53(ban_durations_bars, bins) = np.histogram(ban_durations, bins=duration_bins)
54
27bar_chart = pygal.Bar(legend_at_bottom=True) 55bar_chart = pygal.Bar(legend_at_bottom=True)
28bar_chart.title = "Active Wikipedia bans by duration (%d samples)" % SAMPLES 56bar_chart.title = "Active Wikipedia bans by duration (%d samples)" % len(ban_db.list())
29bar_chart.x_labels = map(str, range(1, len(ban_durations_bars)+1)) 57bar_chart.x_labels = map(str, range(1, len(ban_durations_bars) + 1))
30bar_chart.add("Number of active bans", ban_durations_bars) 58bar_chart.add("Number of active bans", ban_durations_bars)
31bar_chart.render_to_file(HIST_FILE) 59bar_chart.render_to_file(HIST_FILE)
32print("[INFO]", "Histogram generation complete")
33 60
34# ======= NB BAN MAP ======= # 61if OPEN_FILES:
62 webbrowser.open(HIST_FILE, 2)
63
64
65# BAN DURATION STATS
66
67with open(STATS_FILE, "w") as stats_file:
68 stats_file.write("Mean: %.2f days\n" % np.mean(ban_durations))
69 stats_file.write("Median: %.2f days\n" % np.median(ban_durations))
70 stats_file.write("Deviation: %.2f\n" % np.std(ban_durations))
71 stats_file.write("Variance: %.2f\n" % np.var(ban_durations))
35 72
36country_ban_list = ban_dict_list.get_countries() 73if OPEN_FILES:
74 webbrowser.open(STATS_FILE, 2)
75
76
77# NB BAN MAP
78
79country_ban_list = ban_db_wrapper.get_all_countries()
37nb_bans_by_country = Counter(country_ban_list) 80nb_bans_by_country = Counter(country_ban_list)
38 81
39print("[INFO]", "Generating ban map")
40worldmap_chart = pygal.Worldmap(legend_at_bottom=True) 82worldmap_chart = pygal.Worldmap(legend_at_bottom=True)
41worldmap_chart.title = "World active Wikipedia bans by country (%d samples)" % SAMPLES 83worldmap_chart.title = "World active Wikipedia bans by country (%d samples)" % len(ban_db.list())
42worldmap_chart.add("Active bans", nb_bans_by_country) 84worldmap_chart.add("Active bans", nb_bans_by_country)
43worldmap_chart.render_to_file(BAN_MAP_FILE) 85worldmap_chart.render_to_file(BAN_MAP_FILE)
44print("[INFO]", "Ban map generation complete") 86
87if OPEN_FILES:
88 webbrowser.open(BAN_MAP_FILE, 2)
45 89
46 90
47# ======= BAN DURATION MAP ======= # 91# BAN DURATION MAP
48 92
49average_ban_duration_by_country = ban_dict_list.average_ban_by_country() 93average_ban_duration_by_country = ban_db_wrapper.calc_average_duration_by_country()
50 94
51print("[INFO]", "Generating ban duration map")
52worldmap_chart = pygal.Worldmap(legend_at_bottom=True) 95worldmap_chart = pygal.Worldmap(legend_at_bottom=True)
53worldmap_chart.title = "Average Wikipedia ban duration by country (%d samples)" % SAMPLES 96worldmap_chart.title = "Average Wikipedia ban duration by country (%d samples)" % len(ban_db.list())
54worldmap_chart.add("Average ban duration (months)", average_ban_duration_by_country) 97worldmap_chart.add("Average ban duration (months)", average_ban_duration_by_country)
55worldmap_chart.render_to_file(BAN_DURATION_MAP_FILE) 98worldmap_chart.render_to_file(BAN_DURATION_MAP_FILE)
56print("[INFO]", "Ban duration map generation complete")
57 99
58print("Some additional stats about ban durations:") 100if OPEN_FILES:
59print(" Mean: %.2f days" % np.mean(ban_durations)) 101 webbrowser.open(BAN_DURATION_MAP_FILE, 2)
60print(" Median: %.2f days" % np.median(ban_durations))
61print(" Deviation: %.2f" % np.std(ban_durations))
62print(" Variance: %.2f" % np.var(ban_durations))