diff options
author | Pacien TRAN-GIRARD | 2014-10-24 18:05:48 +0000 |
---|---|---|
committer | Pacien TRAN-GIRARD | 2014-10-24 18:05:48 +0000 |
commit | 4403fda939ef42aeffeccb343d74f3dc3b840f91 (patch) | |
tree | 63fd704f15f3030f1455aad0ef92403c5d093c70 /wikibania | |
parent | 16529a0d212e1387eacd590c0e5e1b1a13dc2641 (diff) | |
parent | bdf9099df8c2a4636b0ad0e710b73330877eef37 (diff) | |
download | wikistats-4403fda939ef42aeffeccb343d74f3dc3b840f91.tar.gz |
Merge branch 'refactor' into 'master'
Refactor
See merge request !1
Diffstat (limited to 'wikibania')
-rw-r--r-- | wikibania/__init__.py | 1 | ||||
-rw-r--r-- | wikibania/api/Query.py | 24 | ||||
-rw-r--r-- | wikibania/api/__init__.py | 1 | ||||
-rw-r--r-- | wikibania/ban/Ban.py | 37 | ||||
-rw-r--r-- | wikibania/ban/BanDB.py | 50 | ||||
-rw-r--r-- | wikibania/ban/BanDBWrapper.py | 25 | ||||
-rw-r--r-- | wikibania/ban/__init__.py | 1 | ||||
-rw-r--r-- | wikibania/wapi/WikipediaQuery.py | 48 | ||||
-rw-r--r-- | wikibania/wapi/__init__.py | 1 |
9 files changed, 188 insertions, 0 deletions
diff --git a/wikibania/__init__.py b/wikibania/__init__.py new file mode 100644 index 0000000..792d600 --- /dev/null +++ b/wikibania/__init__.py | |||
@@ -0,0 +1 @@ | |||
# | |||
diff --git a/wikibania/api/Query.py b/wikibania/api/Query.py new file mode 100644 index 0000000..0928478 --- /dev/null +++ b/wikibania/api/Query.py | |||
@@ -0,0 +1,24 @@ | |||
1 | import urllib.parse | ||
2 | import urllib.request | ||
3 | import json | ||
4 | |||
5 | |||
6 | class Query: | ||
7 | def __init__(self, base_url="", params=None, encoding="utf8"): | ||
8 | if params is None: | ||
9 | params = {} | ||
10 | |||
11 | self.base_url = base_url | ||
12 | self.params = params | ||
13 | self.encoding = encoding | ||
14 | |||
15 | def fetch_raw_result(self): | ||
16 | post_query = urllib.parse.urlencode(self.params) | ||
17 | post_query = post_query.encode(self.encoding) | ||
18 | document = urllib.request.urlopen(self.base_url, post_query) | ||
19 | return document.read().decode(self.encoding) | ||
20 | |||
21 | |||
22 | class JSONQuery(Query): | ||
23 | def fetch_result(self): | ||
24 | return json.loads(self.fetch_raw_result()) | ||
diff --git a/wikibania/api/__init__.py b/wikibania/api/__init__.py new file mode 100644 index 0000000..792d600 --- /dev/null +++ b/wikibania/api/__init__.py | |||
@@ -0,0 +1 @@ | |||
# | |||
diff --git a/wikibania/ban/Ban.py b/wikibania/ban/Ban.py new file mode 100644 index 0000000..e06ca89 --- /dev/null +++ b/wikibania/ban/Ban.py | |||
@@ -0,0 +1,37 @@ | |||
1 | from datetime import datetime | ||
2 | |||
3 | import pygeoip | ||
4 | |||
5 | |||
6 | ISO_TIMESTAMP = "%Y-%m-%dT%H:%M:%SZ" | ||
7 | |||
8 | |||
9 | class Ban: | ||
10 | def __init__(self, geoip_looker, user=None, timestamp=None, expiry=None, timestamp_format=ISO_TIMESTAMP): | ||
11 | self.geoip_looker = geoip_looker | ||
12 | self.timestamp_format = timestamp_format | ||
13 | |||
14 | self.user = user | ||
15 | self.timestamp = timestamp | ||
16 | self.expiry = expiry | ||
17 | |||
18 | def items(self): | ||
19 | return { | ||
20 | "user": self.user, | ||
21 | "timestamp": self.timestamp.strftime(ISO_TIMESTAMP), | ||
22 | "expiry": self.expiry.strftime(ISO_TIMESTAMP), | ||
23 | } | ||
24 | |||
25 | def hydrate(self, ban_dict): | ||
26 | self.user = ban_dict["user"] | ||
27 | self.timestamp = datetime.strptime(ban_dict["timestamp"], ISO_TIMESTAMP) | ||
28 | self.expiry = datetime.strptime(ban_dict["expiry"], ISO_TIMESTAMP) | ||
29 | |||
30 | def calc_duration(self): | ||
31 | return (self.expiry - self.timestamp).days | ||
32 | |||
33 | def lookup_country_code(self): | ||
34 | try: | ||
35 | return self.geoip_looker.country_code_by_addr(self.user).lower() | ||
36 | except pygeoip.GeoIPError: | ||
37 | return "UNKNOWN" | ||
diff --git a/wikibania/ban/BanDB.py b/wikibania/ban/BanDB.py new file mode 100644 index 0000000..e83aa3c --- /dev/null +++ b/wikibania/ban/BanDB.py | |||
@@ -0,0 +1,50 @@ | |||
1 | import json | ||
2 | |||
3 | from wikibania.ban.Ban import Ban | ||
4 | from wikibania.wapi.WikipediaQuery import BlockQuery | ||
5 | |||
6 | |||
7 | class BanDB: | ||
8 | def __init__(self, geoip_looker): | ||
9 | self.geoip_looker = geoip_looker | ||
10 | self.bans = [] | ||
11 | |||
12 | def list(self): | ||
13 | return self.bans | ||
14 | |||
15 | def load(self, ban_list): | ||
16 | for entry in ban_list: | ||
17 | ban = Ban(self.geoip_looker) | ||
18 | ban.hydrate(entry) | ||
19 | self.bans.append(ban) | ||
20 | |||
21 | def load_file(self, file_name): | ||
22 | with open(file_name, "r") as file: | ||
23 | entries = json.load(file) | ||
24 | self.load(entries) | ||
25 | |||
26 | def dump(self): | ||
27 | return [ban.items() for ban in self.bans] | ||
28 | |||
29 | def dump_file(self, file_name): | ||
30 | with open(file_name, "w") as file: | ||
31 | ban_list = self.dump() | ||
32 | json.dump(ban_list, file) | ||
33 | |||
34 | def fetch(self, nb_samples, query_limit=500, continue_token=None): | ||
35 | fetch = min(nb_samples, query_limit) | ||
36 | |||
37 | query = BlockQuery( | ||
38 | properties=["user", "timestamp", "expiry"], | ||
39 | show=["temp", "ip"], | ||
40 | limit=fetch, | ||
41 | continue_token=continue_token, | ||
42 | ) | ||
43 | results = query.fetch_result() | ||
44 | |||
45 | entries = results["query"]["blocks"] | ||
46 | self.load(entries) | ||
47 | |||
48 | if nb_samples - fetch > 0: | ||
49 | continue_token = results["query-continue"]["blocks"]["bkcontinue"] | ||
50 | self.fetch(nb_samples - fetch, query_limit, continue_token) | ||
diff --git a/wikibania/ban/BanDBWrapper.py b/wikibania/ban/BanDBWrapper.py new file mode 100644 index 0000000..8f396b0 --- /dev/null +++ b/wikibania/ban/BanDBWrapper.py | |||
@@ -0,0 +1,25 @@ | |||
1 | from collections import defaultdict | ||
2 | |||
3 | import numpy | ||
4 | |||
5 | |||
6 | class BanDBWrapper: | ||
7 | def __init__(self, ban_db): | ||
8 | self.ban_db = ban_db | ||
9 | |||
10 | def get_all_durations(self): | ||
11 | return [ban.calc_duration() for ban in self.ban_db.list()] | ||
12 | |||
13 | def get_all_countries(self): | ||
14 | return [ban.lookup_country_code() for ban in self.ban_db.list()] | ||
15 | |||
16 | def get_durations_by_country(self): | ||
17 | return [(ban.lookup_country_code(), ban.calc_duration()) for ban in self.ban_db.list()] | ||
18 | |||
19 | def calc_average_duration_by_country(self): | ||
20 | ban_durations_by_country = defaultdict(list) | ||
21 | |||
22 | for country, ban_duration in self.get_durations_by_country(): | ||
23 | ban_durations_by_country[country].append(ban_duration) | ||
24 | |||
25 | return {country: numpy.mean(ban_durations) for country, ban_durations in ban_durations_by_country.items()} | ||
diff --git a/wikibania/ban/__init__.py b/wikibania/ban/__init__.py new file mode 100644 index 0000000..792d600 --- /dev/null +++ b/wikibania/ban/__init__.py | |||
@@ -0,0 +1 @@ | |||
# | |||
diff --git a/wikibania/wapi/WikipediaQuery.py b/wikibania/wapi/WikipediaQuery.py new file mode 100644 index 0000000..3f544b6 --- /dev/null +++ b/wikibania/wapi/WikipediaQuery.py | |||
@@ -0,0 +1,48 @@ | |||
1 | from ..api.Query import JSONQuery | ||
2 | |||
3 | WIKIPEDIA_QUERY_BASE_URL = "https://en.wikipedia.org/w/api.php" | ||
4 | LIST_SEPARATOR = "|" | ||
5 | |||
6 | |||
7 | class WikipediaQuery(JSONQuery): | ||
8 | def __init__(self, params=None): | ||
9 | if params is None: | ||
10 | params = {} | ||
11 | |||
12 | params.update({ | ||
13 | "action": "query", | ||
14 | "format": "json", | ||
15 | }) | ||
16 | super(WikipediaQuery, self).__init__(base_url=WIKIPEDIA_QUERY_BASE_URL, params=params) | ||
17 | |||
18 | |||
19 | class ListQuery(WikipediaQuery): | ||
20 | def __init__(self, list_name, params=None): | ||
21 | if params is None: | ||
22 | params = {} | ||
23 | |||
24 | params.update({ | ||
25 | "list": list_name, | ||
26 | }) | ||
27 | super(ListQuery, self).__init__(params) | ||
28 | |||
29 | |||
30 | class BlockQuery(ListQuery): | ||
31 | def __init__(self, properties=None, show=None, sort="newer", limit=500, continue_token=None): | ||
32 | if properties is None: | ||
33 | properties = [] | ||
34 | |||
35 | if show is None: | ||
36 | show = [] | ||
37 | |||
38 | params = { | ||
39 | "bkprop": LIST_SEPARATOR.join(properties), | ||
40 | "bkshow": LIST_SEPARATOR.join(show), | ||
41 | "bkdir": sort, | ||
42 | "bklimit": limit, | ||
43 | } | ||
44 | |||
45 | if continue_token is not None: | ||
46 | params.update({"bkcontinue": continue_token}) | ||
47 | |||
48 | super(BlockQuery, self).__init__("blocks", params=params) | ||
diff --git a/wikibania/wapi/__init__.py b/wikibania/wapi/__init__.py new file mode 100644 index 0000000..792d600 --- /dev/null +++ b/wikibania/wapi/__init__.py | |||
@@ -0,0 +1 @@ | |||
# | |||