1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
import urllib.request
import json
import numpy as np
from banapedia.wapi.WikipediaQuery import BlockQuery
from banapedia.Ban import *
class BanList():
def __init__(self, data_file, samples=30000, samples_by_query=500,
from_internet=False):
if from_internet:
proxy = urllib.request.ProxyHandler(urllib.request.getproxies())
opener = urllib.request.build_opener(proxy)
urllib.request.install_opener(opener)
self.dict_list = self.fetch_multipart(samples, samples_by_query)
else:
with open(data_file, "r") as ban_dict_file:
self.dict_list = json.load(ban_dict_file)
self.ban_list = []
for ban_dict in self.dict_list:
self.ban_list.append(Ban(
ban_dict["user"],
datetime.strptime(ban_dict["timestamp"], ISO_TIMESTAMP),
datetime.strptime(ban_dict["expiry"], ISO_TIMESTAMP),
))
def fetch_multipart(self, n, query_limit):
ban_dict_list = []
n_fetched = 0
continue_token = None
print("[INFO]", "Fetching %d bans" % n)
while n_fetched < n:
to_fetch = min(query_limit, n - n_fetched)
query = BlockQuery(
bkprop=["user", "timestamp", "expiry"],
bkshow=["temp", "ip"],
limit=to_fetch,
continue_token=continue_token,
)
results = query.fetch_result()
ban_dict_list.extend(results["query"]["blocks"])
continue_token = results["query-continue"]["blocks"]["bkcontinue"]
n_fetched += to_fetch
print("[INFO]", "Fetched %d over %d bans" % (n_fetched, n))
print("[INFO]", "Bans fetching complete")
return ban_dict_list
def write_to_file(self, outfile):
with open(outfile, "w") as ban_dict_file:
json.dump(self.dict_list, ban_dict_file, indent="\t")
def get_durations(self):
return [ban.get_duration() for ban in self.ban_list]
def get_countries(self):
return [ban.get_country_code() for ban in self.ban_list]
def __iter__(self):
return self.dict_list.__iter__()
def items(self):
return self.dict_list.items()
def by_country(self):
ban_duration_by_country = {}
for ban in self.ban_list:
country_code = ban.get_country_code()
if country_code not in ban_duration_by_country.keys():
ban_duration_by_country[country_code] = []
ban_duration_by_country[country_code].append(ban)
return ban_duration_by_country
def average_ban_by_country(self):
average_ban_duration_ctry = {}
for country, bans in self.by_country().items():
average = np.mean([ban.get_duration() for ban in bans])
average_ban_duration_ctry[country] = average
# In months
average_ban_duration_ctry = {country: duration/30
for country, duration in
average_ban_duration_ctry.items()}
return average_ban_duration_ctry
|