summaryrefslogtreecommitdiff
path: root/wikibania/ban
diff options
context:
space:
mode:
Diffstat (limited to 'wikibania/ban')
-rw-r--r--wikibania/ban/Ban.py37
-rw-r--r--wikibania/ban/BanDB.py50
-rw-r--r--wikibania/ban/BanDBWrapper.py25
-rw-r--r--wikibania/ban/__init__.py1
4 files changed, 113 insertions, 0 deletions
diff --git a/wikibania/ban/Ban.py b/wikibania/ban/Ban.py
new file mode 100644
index 0000000..e06ca89
--- /dev/null
+++ b/wikibania/ban/Ban.py
@@ -0,0 +1,37 @@
1from datetime import datetime
2
3import pygeoip
4
5
6ISO_TIMESTAMP = "%Y-%m-%dT%H:%M:%SZ"
7
8
9class Ban:
10 def __init__(self, geoip_looker, user=None, timestamp=None, expiry=None, timestamp_format=ISO_TIMESTAMP):
11 self.geoip_looker = geoip_looker
12 self.timestamp_format = timestamp_format
13
14 self.user = user
15 self.timestamp = timestamp
16 self.expiry = expiry
17
18 def items(self):
19 return {
20 "user": self.user,
21 "timestamp": self.timestamp.strftime(ISO_TIMESTAMP),
22 "expiry": self.expiry.strftime(ISO_TIMESTAMP),
23 }
24
25 def hydrate(self, ban_dict):
26 self.user = ban_dict["user"]
27 self.timestamp = datetime.strptime(ban_dict["timestamp"], ISO_TIMESTAMP)
28 self.expiry = datetime.strptime(ban_dict["expiry"], ISO_TIMESTAMP)
29
30 def calc_duration(self):
31 return (self.expiry - self.timestamp).days
32
33 def lookup_country_code(self):
34 try:
35 return self.geoip_looker.country_code_by_addr(self.user).lower()
36 except pygeoip.GeoIPError:
37 return "UNKNOWN"
diff --git a/wikibania/ban/BanDB.py b/wikibania/ban/BanDB.py
new file mode 100644
index 0000000..e83aa3c
--- /dev/null
+++ b/wikibania/ban/BanDB.py
@@ -0,0 +1,50 @@
1import json
2
3from wikibania.ban.Ban import Ban
4from wikibania.wapi.WikipediaQuery import BlockQuery
5
6
7class BanDB:
8 def __init__(self, geoip_looker):
9 self.geoip_looker = geoip_looker
10 self.bans = []
11
12 def list(self):
13 return self.bans
14
15 def load(self, ban_list):
16 for entry in ban_list:
17 ban = Ban(self.geoip_looker)
18 ban.hydrate(entry)
19 self.bans.append(ban)
20
21 def load_file(self, file_name):
22 with open(file_name, "r") as file:
23 entries = json.load(file)
24 self.load(entries)
25
26 def dump(self):
27 return [ban.items() for ban in self.bans]
28
29 def dump_file(self, file_name):
30 with open(file_name, "w") as file:
31 ban_list = self.dump()
32 json.dump(ban_list, file)
33
34 def fetch(self, nb_samples, query_limit=500, continue_token=None):
35 fetch = min(nb_samples, query_limit)
36
37 query = BlockQuery(
38 properties=["user", "timestamp", "expiry"],
39 show=["temp", "ip"],
40 limit=fetch,
41 continue_token=continue_token,
42 )
43 results = query.fetch_result()
44
45 entries = results["query"]["blocks"]
46 self.load(entries)
47
48 if nb_samples - fetch > 0:
49 continue_token = results["query-continue"]["blocks"]["bkcontinue"]
50 self.fetch(nb_samples - fetch, query_limit, continue_token)
diff --git a/wikibania/ban/BanDBWrapper.py b/wikibania/ban/BanDBWrapper.py
new file mode 100644
index 0000000..8f396b0
--- /dev/null
+++ b/wikibania/ban/BanDBWrapper.py
@@ -0,0 +1,25 @@
1from collections import defaultdict
2
3import numpy
4
5
6class BanDBWrapper:
7 def __init__(self, ban_db):
8 self.ban_db = ban_db
9
10 def get_all_durations(self):
11 return [ban.calc_duration() for ban in self.ban_db.list()]
12
13 def get_all_countries(self):
14 return [ban.lookup_country_code() for ban in self.ban_db.list()]
15
16 def get_durations_by_country(self):
17 return [(ban.lookup_country_code(), ban.calc_duration()) for ban in self.ban_db.list()]
18
19 def calc_average_duration_by_country(self):
20 ban_durations_by_country = defaultdict(list)
21
22 for country, ban_duration in self.get_durations_by_country():
23 ban_durations_by_country[country].append(ban_duration)
24
25 return {country: numpy.mean(ban_durations) for country, ban_durations in ban_durations_by_country.items()}
diff --git a/wikibania/ban/__init__.py b/wikibania/ban/__init__.py
new file mode 100644
index 0000000..792d600
--- /dev/null
+++ b/wikibania/ban/__init__.py
@@ -0,0 +1 @@
#