diff options
author | Pacien TRAN-GIRARD | 2014-10-24 19:59:05 +0200 |
---|---|---|
committer | Pacien TRAN-GIRARD | 2014-10-24 19:59:05 +0200 |
commit | bdf9099df8c2a4636b0ad0e710b73330877eef37 (patch) | |
tree | 63fd704f15f3030f1455aad0ef92403c5d093c70 /wikibania/ban | |
parent | 16529a0d212e1387eacd590c0e5e1b1a13dc2641 (diff) | |
download | wikistats-bdf9099df8c2a4636b0ad0e710b73330877eef37.tar.gz |
Very cleaner, much class, such readable, wow
Diffstat (limited to 'wikibania/ban')
-rw-r--r-- | wikibania/ban/Ban.py | 37 | ||||
-rw-r--r-- | wikibania/ban/BanDB.py | 50 | ||||
-rw-r--r-- | wikibania/ban/BanDBWrapper.py | 25 | ||||
-rw-r--r-- | wikibania/ban/__init__.py | 1 |
4 files changed, 113 insertions, 0 deletions
diff --git a/wikibania/ban/Ban.py b/wikibania/ban/Ban.py new file mode 100644 index 0000000..e06ca89 --- /dev/null +++ b/wikibania/ban/Ban.py | |||
@@ -0,0 +1,37 @@ | |||
1 | from datetime import datetime | ||
2 | |||
3 | import pygeoip | ||
4 | |||
5 | |||
6 | ISO_TIMESTAMP = "%Y-%m-%dT%H:%M:%SZ" | ||
7 | |||
8 | |||
9 | class Ban: | ||
10 | def __init__(self, geoip_looker, user=None, timestamp=None, expiry=None, timestamp_format=ISO_TIMESTAMP): | ||
11 | self.geoip_looker = geoip_looker | ||
12 | self.timestamp_format = timestamp_format | ||
13 | |||
14 | self.user = user | ||
15 | self.timestamp = timestamp | ||
16 | self.expiry = expiry | ||
17 | |||
18 | def items(self): | ||
19 | return { | ||
20 | "user": self.user, | ||
21 | "timestamp": self.timestamp.strftime(ISO_TIMESTAMP), | ||
22 | "expiry": self.expiry.strftime(ISO_TIMESTAMP), | ||
23 | } | ||
24 | |||
25 | def hydrate(self, ban_dict): | ||
26 | self.user = ban_dict["user"] | ||
27 | self.timestamp = datetime.strptime(ban_dict["timestamp"], ISO_TIMESTAMP) | ||
28 | self.expiry = datetime.strptime(ban_dict["expiry"], ISO_TIMESTAMP) | ||
29 | |||
30 | def calc_duration(self): | ||
31 | return (self.expiry - self.timestamp).days | ||
32 | |||
33 | def lookup_country_code(self): | ||
34 | try: | ||
35 | return self.geoip_looker.country_code_by_addr(self.user).lower() | ||
36 | except pygeoip.GeoIPError: | ||
37 | return "UNKNOWN" | ||
diff --git a/wikibania/ban/BanDB.py b/wikibania/ban/BanDB.py new file mode 100644 index 0000000..e83aa3c --- /dev/null +++ b/wikibania/ban/BanDB.py | |||
@@ -0,0 +1,50 @@ | |||
1 | import json | ||
2 | |||
3 | from wikibania.ban.Ban import Ban | ||
4 | from wikibania.wapi.WikipediaQuery import BlockQuery | ||
5 | |||
6 | |||
7 | class BanDB: | ||
8 | def __init__(self, geoip_looker): | ||
9 | self.geoip_looker = geoip_looker | ||
10 | self.bans = [] | ||
11 | |||
12 | def list(self): | ||
13 | return self.bans | ||
14 | |||
15 | def load(self, ban_list): | ||
16 | for entry in ban_list: | ||
17 | ban = Ban(self.geoip_looker) | ||
18 | ban.hydrate(entry) | ||
19 | self.bans.append(ban) | ||
20 | |||
21 | def load_file(self, file_name): | ||
22 | with open(file_name, "r") as file: | ||
23 | entries = json.load(file) | ||
24 | self.load(entries) | ||
25 | |||
26 | def dump(self): | ||
27 | return [ban.items() for ban in self.bans] | ||
28 | |||
29 | def dump_file(self, file_name): | ||
30 | with open(file_name, "w") as file: | ||
31 | ban_list = self.dump() | ||
32 | json.dump(ban_list, file) | ||
33 | |||
34 | def fetch(self, nb_samples, query_limit=500, continue_token=None): | ||
35 | fetch = min(nb_samples, query_limit) | ||
36 | |||
37 | query = BlockQuery( | ||
38 | properties=["user", "timestamp", "expiry"], | ||
39 | show=["temp", "ip"], | ||
40 | limit=fetch, | ||
41 | continue_token=continue_token, | ||
42 | ) | ||
43 | results = query.fetch_result() | ||
44 | |||
45 | entries = results["query"]["blocks"] | ||
46 | self.load(entries) | ||
47 | |||
48 | if nb_samples - fetch > 0: | ||
49 | continue_token = results["query-continue"]["blocks"]["bkcontinue"] | ||
50 | self.fetch(nb_samples - fetch, query_limit, continue_token) | ||
diff --git a/wikibania/ban/BanDBWrapper.py b/wikibania/ban/BanDBWrapper.py new file mode 100644 index 0000000..8f396b0 --- /dev/null +++ b/wikibania/ban/BanDBWrapper.py | |||
@@ -0,0 +1,25 @@ | |||
1 | from collections import defaultdict | ||
2 | |||
3 | import numpy | ||
4 | |||
5 | |||
6 | class BanDBWrapper: | ||
7 | def __init__(self, ban_db): | ||
8 | self.ban_db = ban_db | ||
9 | |||
10 | def get_all_durations(self): | ||
11 | return [ban.calc_duration() for ban in self.ban_db.list()] | ||
12 | |||
13 | def get_all_countries(self): | ||
14 | return [ban.lookup_country_code() for ban in self.ban_db.list()] | ||
15 | |||
16 | def get_durations_by_country(self): | ||
17 | return [(ban.lookup_country_code(), ban.calc_duration()) for ban in self.ban_db.list()] | ||
18 | |||
19 | def calc_average_duration_by_country(self): | ||
20 | ban_durations_by_country = defaultdict(list) | ||
21 | |||
22 | for country, ban_duration in self.get_durations_by_country(): | ||
23 | ban_durations_by_country[country].append(ban_duration) | ||
24 | |||
25 | return {country: numpy.mean(ban_durations) for country, ban_durations in ban_durations_by_country.items()} | ||
diff --git a/wikibania/ban/__init__.py b/wikibania/ban/__init__.py new file mode 100644 index 0000000..792d600 --- /dev/null +++ b/wikibania/ban/__init__.py | |||
@@ -0,0 +1 @@ | |||
# | |||