diff options
author | Pacien TRAN-GIRARD | 2014-10-24 18:05:48 +0000 |
---|---|---|
committer | Pacien TRAN-GIRARD | 2014-10-24 18:05:48 +0000 |
commit | 4403fda939ef42aeffeccb343d74f3dc3b840f91 (patch) | |
tree | 63fd704f15f3030f1455aad0ef92403c5d093c70 /main.py | |
parent | 16529a0d212e1387eacd590c0e5e1b1a13dc2641 (diff) | |
parent | bdf9099df8c2a4636b0ad0e710b73330877eef37 (diff) | |
download | wikistats-4403fda939ef42aeffeccb343d74f3dc3b840f91.tar.gz |
Merge branch 'refactor' into 'master'
Refactor
See merge request !1
Diffstat (limited to 'main.py')
-rw-r--r-- | main.py | 101 |
1 files changed, 70 insertions, 31 deletions
@@ -1,62 +1,101 @@ | |||
1 | from banapedia.Ban import * | ||
2 | import bandict | ||
3 | from collections import Counter | 1 | from collections import Counter |
2 | import webbrowser | ||
3 | |||
4 | import pygal | 4 | import pygal |
5 | import numpy as np | 5 | import numpy as np |
6 | import pygeoip | ||
7 | |||
8 | from wikibania.ban.BanDB import BanDB | ||
9 | from wikibania.ban.BanDBWrapper import BanDBWrapper | ||
10 | import sysproxy | ||
11 | |||
12 | |||
13 | # PARAMS | ||
14 | |||
15 | GEOIP_DB = "/usr/share/GeoIP/GeoIP.dat" | ||
6 | 16 | ||
7 | BAN_MAP_FILE = "output/ban-map.svg" | 17 | BAN_MAP_FILE = "output/ban-map.svg" |
8 | BAN_DURATION_MAP_FILE = "output/ban-duration-map.svg" | 18 | BAN_DURATION_MAP_FILE = "output/ban-duration-map.svg" |
9 | HIST_FILE = "output/histogram.svg" | 19 | HIST_FILE = "output/histogram.svg" |
20 | STATS_FILE = "output/stats.txt" | ||
21 | |||
22 | BAN_DB_FILE = "resources/ban_list.json" | ||
23 | |||
24 | FETCH_SAMPLES = 2000 | ||
25 | FETCH_DB = False | ||
26 | DUMP_DB = False | ||
27 | LOAD_DB = False | ||
28 | |||
29 | OPEN_FILES = False | ||
30 | |||
31 | |||
32 | # SETUP | ||
10 | 33 | ||
11 | SAMPLES = 1000 | 34 | sysproxy.configure_system_proxy() |
35 | geoip_looker = pygeoip.GeoIP(GEOIP_DB) | ||
12 | 36 | ||
13 | BAN_FILE = "resources/ban_list.json" | 37 | ban_db = BanDB(geoip_looker) |
14 | 38 | ||
15 | ban_dict_list = bandict.BanList(BAN_FILE, samples=SAMPLES, from_internet=True) | 39 | if FETCH_DB: |
40 | ban_db.fetch(FETCH_SAMPLES) | ||
41 | if LOAD_DB: | ||
42 | ban_db.load_file(BAN_DB_FILE) | ||
43 | if DUMP_DB: | ||
44 | ban_db.dump_file(BAN_DB_FILE) | ||
16 | 45 | ||
17 | # ======== HISTOGRAM ======= # | 46 | ban_db_wrapper = BanDBWrapper(ban_db) |
18 | 47 | ||
19 | ban_durations = ban_dict_list.get_durations() | ||
20 | (ban_durations_bars, bins) = np.histogram(ban_durations, | ||
21 | bins=[round(365/12*x) | ||
22 | for x in range(1, 50+2) | ||
23 | ] | ||
24 | ) | ||
25 | 48 | ||
26 | print("[INFO]", "Generating histogram") | 49 | # HISTOGRAM |
50 | |||
51 | ban_durations = ban_db_wrapper.get_all_durations() | ||
52 | duration_bins = [round(365 / 12 * x) for x in range(1, 50 + 2)] | ||
53 | (ban_durations_bars, bins) = np.histogram(ban_durations, bins=duration_bins) | ||
54 | |||
27 | bar_chart = pygal.Bar(legend_at_bottom=True) | 55 | bar_chart = pygal.Bar(legend_at_bottom=True) |
28 | bar_chart.title = "Active Wikipedia bans by duration (%d samples)" % SAMPLES | 56 | bar_chart.title = "Active Wikipedia bans by duration (%d samples)" % len(ban_db.list()) |
29 | bar_chart.x_labels = map(str, range(1, len(ban_durations_bars)+1)) | 57 | bar_chart.x_labels = map(str, range(1, len(ban_durations_bars) + 1)) |
30 | bar_chart.add("Number of active bans", ban_durations_bars) | 58 | bar_chart.add("Number of active bans", ban_durations_bars) |
31 | bar_chart.render_to_file(HIST_FILE) | 59 | bar_chart.render_to_file(HIST_FILE) |
32 | print("[INFO]", "Histogram generation complete") | ||
33 | 60 | ||
34 | # ======= NB BAN MAP ======= # | 61 | if OPEN_FILES: |
62 | webbrowser.open(HIST_FILE, 2) | ||
63 | |||
64 | |||
65 | # BAN DURATION STATS | ||
66 | |||
67 | with open(STATS_FILE, "w") as stats_file: | ||
68 | stats_file.write("Mean: %.2f days\n" % np.mean(ban_durations)) | ||
69 | stats_file.write("Median: %.2f days\n" % np.median(ban_durations)) | ||
70 | stats_file.write("Deviation: %.2f\n" % np.std(ban_durations)) | ||
71 | stats_file.write("Variance: %.2f\n" % np.var(ban_durations)) | ||
35 | 72 | ||
36 | country_ban_list = ban_dict_list.get_countries() | 73 | if OPEN_FILES: |
74 | webbrowser.open(STATS_FILE, 2) | ||
75 | |||
76 | |||
77 | # NB BAN MAP | ||
78 | |||
79 | country_ban_list = ban_db_wrapper.get_all_countries() | ||
37 | nb_bans_by_country = Counter(country_ban_list) | 80 | nb_bans_by_country = Counter(country_ban_list) |
38 | 81 | ||
39 | print("[INFO]", "Generating ban map") | ||
40 | worldmap_chart = pygal.Worldmap(legend_at_bottom=True) | 82 | worldmap_chart = pygal.Worldmap(legend_at_bottom=True) |
41 | worldmap_chart.title = "World active Wikipedia bans by country (%d samples)" % SAMPLES | 83 | worldmap_chart.title = "World active Wikipedia bans by country (%d samples)" % len(ban_db.list()) |
42 | worldmap_chart.add("Active bans", nb_bans_by_country) | 84 | worldmap_chart.add("Active bans", nb_bans_by_country) |
43 | worldmap_chart.render_to_file(BAN_MAP_FILE) | 85 | worldmap_chart.render_to_file(BAN_MAP_FILE) |
44 | print("[INFO]", "Ban map generation complete") | 86 | |
87 | if OPEN_FILES: | ||
88 | webbrowser.open(BAN_MAP_FILE, 2) | ||
45 | 89 | ||
46 | 90 | ||
47 | # ======= BAN DURATION MAP ======= # | 91 | # BAN DURATION MAP |
48 | 92 | ||
49 | average_ban_duration_by_country = ban_dict_list.average_ban_by_country() | 93 | average_ban_duration_by_country = ban_db_wrapper.calc_average_duration_by_country() |
50 | 94 | ||
51 | print("[INFO]", "Generating ban duration map") | ||
52 | worldmap_chart = pygal.Worldmap(legend_at_bottom=True) | 95 | worldmap_chart = pygal.Worldmap(legend_at_bottom=True) |
53 | worldmap_chart.title = "Average Wikipedia ban duration by country (%d samples)" % SAMPLES | 96 | worldmap_chart.title = "Average Wikipedia ban duration by country (%d samples)" % len(ban_db.list()) |
54 | worldmap_chart.add("Average ban duration (months)", average_ban_duration_by_country) | 97 | worldmap_chart.add("Average ban duration (months)", average_ban_duration_by_country) |
55 | worldmap_chart.render_to_file(BAN_DURATION_MAP_FILE) | 98 | worldmap_chart.render_to_file(BAN_DURATION_MAP_FILE) |
56 | print("[INFO]", "Ban duration map generation complete") | ||
57 | 99 | ||
58 | print("Some additional stats about ban durations:") | 100 | if OPEN_FILES: |
59 | print(" Mean: %.2f days" % np.mean(ban_durations)) | 101 | webbrowser.open(BAN_DURATION_MAP_FILE, 2) |
60 | print(" Median: %.2f days" % np.median(ban_durations)) | ||
61 | print(" Deviation: %.2f" % np.std(ban_durations)) | ||
62 | print(" Variance: %.2f" % np.var(ban_durations)) | ||