summaryrefslogtreecommitdiff
path: root/main.py
diff options
context:
space:
mode:
authorPacien TRAN-GIRARD2014-10-24 01:45:28 +0200
committerPacien TRAN-GIRARD2014-10-24 01:45:28 +0200
commit5e4f38688e4c14846b8264970a79c482c1ca7012 (patch)
tree6d150aa8f26eed939aabc9de1db3239a46cb7e05 /main.py
downloadwikistats-5e4f38688e4c14846b8264970a79c482c1ca7012.tar.gz
Initial commit
Diffstat (limited to 'main.py')
-rw-r--r--main.py119
1 files changed, 119 insertions, 0 deletions
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..8187e1f
--- /dev/null
+++ b/main.py
@@ -0,0 +1,119 @@
1from banapedia.Ban import *
2from collections import Counter
3import json
4import pygal
5import numpy as np
6import urllib.request
7
8__author__ = 'pacien'
9
10
11BAN_MAP_FILE = "output/ban-map.svg"
12BAN_DURATION_MAP_FILE = "output/ban-duration-map.svg"
13HIST_FILE = "output/histogram.svg"
14
15BAN_FILE = "resources/ban_list.json"
16
17SAMPLES = 30000
18SAMPLES_BY_QUERY = 500
19
20
21def configure_proxy():
22 proxy = urllib.request.ProxyHandler(urllib.request.getproxies())
23 opener = urllib.request.build_opener(proxy)
24 urllib.request.install_opener(opener)
25
26
27def load_from_internet():
28 configure_proxy()
29 return fetch_multipart_ban_dict(SAMPLES, SAMPLES_BY_QUERY)
30
31
32def load_from_local():
33 with open(BAN_FILE, "r") as ban_dict_file:
34 return json.load(ban_dict_file)
35
36
37def write_to_local(ban_dict_list):
38 with open(BAN_FILE, "w") as ban_dict_file:
39 json.dump(ban_dict_list, ban_dict_file, indent="\t")
40
41
42# ban_dict_list = load_from_internet()
43# write_to_local(ban_dict_list)
44
45ban_dict_list = load_from_local()
46
47ban_list = map_bans(ban_dict_list)
48
49
50########## HISTOGRAM ##########
51
52ban_durations = [ban.get_duration() for ban in ban_list]
53(ban_durations, bins) = np.histogram(ban_durations, bins=[round(365/12*x) for x in range(1, 50+2)])
54
55print("[INFO]", "Generating histogram")
56bar_chart = pygal.Bar(legend_at_bottom=True)
57bar_chart.title = "Active Wikipedia bans by duration (%d samples)" % SAMPLES
58bar_chart.x_labels = map(str, range(1, len(ban_durations)+1))
59bar_chart.add("Number of active bans", ban_durations)
60bar_chart.render_to_file(HIST_FILE)
61print("[INFO]", "Histogram generation complete")
62
63
64########## NB BAN MAP ##########
65
66def count_by_country(ban_list):
67 country_ban_list = [ban.get_country_code() for ban in ban_list]
68 return Counter(country_ban_list)
69
70nb_bans_by_country = count_by_country(ban_list)
71
72print("[INFO]", "Generating ban map")
73worldmap_chart = pygal.Worldmap(legend_at_bottom=True)
74worldmap_chart.title = "World active Wikipedia bans by country (%d samples)" % SAMPLES
75worldmap_chart.add("Active bans", nb_bans_by_country)
76worldmap_chart.render_to_file(BAN_MAP_FILE)
77print("[INFO]", "Ban map generation complete")
78
79
80########## BAN DURATION MAP ##########
81
82def group_by_country(ban_list):
83 ban_duration_by_country = {}
84
85 for ban in ban_list:
86 country_code = ban.get_country_code()
87
88 if country_code not in ban_duration_by_country.keys():
89 ban_duration_by_country[country_code] = []
90
91 ban_duration_by_country[country_code].append(ban)
92
93 return ban_duration_by_country
94
95
96def calc_average_ban_by_country(ban_by_country_dict):
97 average_ban_duration_by_country = {}
98
99 for country, bans in ban_by_country_dict.items():
100 average = np.mean([ban.get_duration() for ban in bans])
101 average_ban_duration_by_country[country] = average
102
103 return average_ban_duration_by_country
104
105ban_duration_by_country = group_by_country(ban_list)
106average_ban_duration_by_country = calc_average_ban_by_country(ban_duration_by_country)
107
108average_ban_duration_by_country = {country: duration/30 for country, duration in average_ban_duration_by_country.items()}
109
110print("[INFO]", "Generating ban duration map")
111worldmap_chart = pygal.Worldmap(legend_at_bottom=True)
112worldmap_chart.title = "Average Wikipedia ban duration by country (%d samples)" % SAMPLES
113worldmap_chart.add("Average ban duration (months)", average_ban_duration_by_country)
114worldmap_chart.render_to_file(BAN_DURATION_MAP_FILE)
115print("[INFO]", "Ban duration map generation complete")
116
117print("\nTHIS WAS A TRIUMPH!")
118print("I'M MAKING A NOTE HERE:")
119print("HUGE [SUCCESS]")