1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
from banapedia.Ban import *
from collections import Counter
import json
import pygal
import numpy as np
import urllib.request
__author__ = 'pacien'
BAN_MAP_FILE = "output/ban-map.svg"
BAN_DURATION_MAP_FILE = "output/ban-duration-map.svg"
HIST_FILE = "output/histogram.svg"
BAN_FILE = "resources/ban_list.json"
SAMPLES = 30000
SAMPLES_BY_QUERY = 500
def configure_proxy():
proxy = urllib.request.ProxyHandler(urllib.request.getproxies())
opener = urllib.request.build_opener(proxy)
urllib.request.install_opener(opener)
def load_from_internet():
configure_proxy()
return fetch_multipart_ban_dict(SAMPLES, SAMPLES_BY_QUERY)
def load_from_local():
with open(BAN_FILE, "r") as ban_dict_file:
return json.load(ban_dict_file)
def write_to_local(ban_dict_list):
with open(BAN_FILE, "w") as ban_dict_file:
json.dump(ban_dict_list, ban_dict_file, indent="\t")
# ban_dict_list = load_from_internet()
# write_to_local(ban_dict_list)
ban_dict_list = load_from_local()
ban_list = map_bans(ban_dict_list)
########## HISTOGRAM ##########
ban_durations = [ban.get_duration() for ban in ban_list]
(ban_durations, bins) = np.histogram(ban_durations, bins=[round(365/12*x) for x in range(1, 50+2)])
print("[INFO]", "Generating histogram")
bar_chart = pygal.Bar(legend_at_bottom=True)
bar_chart.title = "Active Wikipedia bans by duration (%d samples)" % SAMPLES
bar_chart.x_labels = map(str, range(1, len(ban_durations)+1))
bar_chart.add("Number of active bans", ban_durations)
bar_chart.render_to_file(HIST_FILE)
print("[INFO]", "Histogram generation complete")
########## NB BAN MAP ##########
def count_by_country(ban_list):
country_ban_list = [ban.get_country_code() for ban in ban_list]
return Counter(country_ban_list)
nb_bans_by_country = count_by_country(ban_list)
print("[INFO]", "Generating ban map")
worldmap_chart = pygal.Worldmap(legend_at_bottom=True)
worldmap_chart.title = "World active Wikipedia bans by country (%d samples)" % SAMPLES
worldmap_chart.add("Active bans", nb_bans_by_country)
worldmap_chart.render_to_file(BAN_MAP_FILE)
print("[INFO]", "Ban map generation complete")
########## BAN DURATION MAP ##########
def group_by_country(ban_list):
ban_duration_by_country = {}
for ban in ban_list:
country_code = ban.get_country_code()
if country_code not in ban_duration_by_country.keys():
ban_duration_by_country[country_code] = []
ban_duration_by_country[country_code].append(ban)
return ban_duration_by_country
def calc_average_ban_by_country(ban_by_country_dict):
average_ban_duration_by_country = {}
for country, bans in ban_by_country_dict.items():
average = np.mean([ban.get_duration() for ban in bans])
average_ban_duration_by_country[country] = average
return average_ban_duration_by_country
ban_duration_by_country = group_by_country(ban_list)
average_ban_duration_by_country = calc_average_ban_by_country(ban_duration_by_country)
average_ban_duration_by_country = {country: duration/30 for country, duration in average_ban_duration_by_country.items()}
print("[INFO]", "Generating ban duration map")
worldmap_chart = pygal.Worldmap(legend_at_bottom=True)
worldmap_chart.title = "Average Wikipedia ban duration by country (%d samples)" % SAMPLES
worldmap_chart.add("Average ban duration (months)", average_ban_duration_by_country)
worldmap_chart.render_to_file(BAN_DURATION_MAP_FILE)
print("[INFO]", "Ban duration map generation complete")
print("\nTHIS WAS A TRIUMPH!")
print("I'M MAKING A NOTE HERE:")
print("HUGE [SUCCESS]")
|