summaryrefslogtreecommitdiff
path: root/banapedia
diff options
context:
space:
mode:
authorPacien TRAN-GIRARD2014-10-24 01:45:28 +0200
committerPacien TRAN-GIRARD2014-10-24 01:45:28 +0200
commit5e4f38688e4c14846b8264970a79c482c1ca7012 (patch)
tree6d150aa8f26eed939aabc9de1db3239a46cb7e05 /banapedia
downloadwikistats-5e4f38688e4c14846b8264970a79c482c1ca7012.tar.gz
Initial commit
Diffstat (limited to 'banapedia')
-rw-r--r--banapedia/Ban.py76
-rw-r--r--banapedia/__init__.py3
-rw-r--r--banapedia/api/Query.py23
-rw-r--r--banapedia/api/__init__.py1
-rw-r--r--banapedia/wapi/WikipediaQuery.py42
-rw-r--r--banapedia/wapi/__init__.py1
6 files changed, 146 insertions, 0 deletions
diff --git a/banapedia/Ban.py b/banapedia/Ban.py
new file mode 100644
index 0000000..d8666b4
--- /dev/null
+++ b/banapedia/Ban.py
@@ -0,0 +1,76 @@
1from banapedia.wapi.WikipediaQuery import BlockQuery
2from datetime import datetime
3import pygeoip
4
5__author__ = 'pacien'
6
7
8GEOIP_FILE = "/usr/share/GeoIP/GeoIP.dat"
9geoip = pygeoip.GeoIP(GEOIP_FILE)
10
11ISO_TIMESTAMP = "%Y-%m-%dT%H:%M:%SZ"
12
13
14class Ban:
15 def __init__(self, ip, start, end):
16 self.ip = ip
17 self.start = start
18 self.end = end
19 self.country_code = None
20
21 def get_duration(self):
22 return (self.end - self.start).days
23
24 def get_country_code(self):
25 if self.country_code is not None:
26 return self.country_code
27
28 country_code = ""
29
30 try:
31 country_code = geoip.country_code_by_addr(self.ip).lower()
32 except pygeoip.GeoIPError:
33 print("[ERROR]", "Could not determine country for ip", self.ip)
34
35 self.country_code = country_code
36 return country_code
37
38
39def map_ban(ban_dict):
40 return Ban(
41 ban_dict["user"],
42 datetime.strptime(ban_dict["timestamp"], ISO_TIMESTAMP),
43 datetime.strptime(ban_dict["expiry"], ISO_TIMESTAMP),
44 )
45
46
47def map_bans(ban_dict_list):
48 ban_list = []
49 for ban_dict in ban_dict_list:
50 ban_list.append(map_ban(ban_dict))
51
52 return ban_list
53
54
55def fetch_multipart_ban_dict(n, query_limit):
56 ban_dict_list = []
57 n_fetched = 0
58 continue_token = None
59
60 print("[INFO]", "Fetching %d bans" % n)
61 while n_fetched < n:
62 to_fetch = min(query_limit, n - n_fetched)
63 query = BlockQuery(
64 bkprop=["user", "timestamp", "expiry"],
65 bkshow=["temp", "ip"],
66 limit=to_fetch,
67 continue_token=continue_token,
68 )
69 results = query.fetch_result()
70 ban_dict_list.extend(results["query"]["blocks"])
71 continue_token = results["query-continue"]["blocks"]["bkcontinue"]
72 n_fetched += to_fetch
73 print("[INFO]", "Fetched %d over %d bans" % (n_fetched, n))
74
75 print("[INFO]", "Bans fetching complete")
76 return ban_dict_list
diff --git a/banapedia/__init__.py b/banapedia/__init__.py
new file mode 100644
index 0000000..93efc48
--- /dev/null
+++ b/banapedia/__init__.py
@@ -0,0 +1,3 @@
1__author__ = 'pacien'
2
3
diff --git a/banapedia/api/Query.py b/banapedia/api/Query.py
new file mode 100644
index 0000000..7453df9
--- /dev/null
+++ b/banapedia/api/Query.py
@@ -0,0 +1,23 @@
1import urllib.parse
2import urllib.request
3import json
4
5__author__ = 'pacien'
6
7
8class Query:
9 def __init__(self, base_url="", params={}, encoding="utf8"):
10 self.base_url = base_url
11 self.params = params
12 self.encoding = encoding
13
14 def fetch_raw_result(self):
15 post_query = urllib.parse.urlencode(self.params)
16 post_query = post_query.encode(self.encoding)
17 document = urllib.request.urlopen(self.base_url, post_query)
18 return document.read().decode(self.encoding)
19
20
21class JSONQuery(Query):
22 def fetch_result(self):
23 return json.loads(self.fetch_raw_result())
diff --git a/banapedia/api/__init__.py b/banapedia/api/__init__.py
new file mode 100644
index 0000000..a0f842f
--- /dev/null
+++ b/banapedia/api/__init__.py
@@ -0,0 +1 @@
__author__ = 'pacien'
diff --git a/banapedia/wapi/WikipediaQuery.py b/banapedia/wapi/WikipediaQuery.py
new file mode 100644
index 0000000..d3d2f94
--- /dev/null
+++ b/banapedia/wapi/WikipediaQuery.py
@@ -0,0 +1,42 @@
1from ..api.Query import JSONQuery
2
3__author__ = 'pacien'
4
5WIKIPEDIA_QUERY_BASE_URL = "https://en.wikipedia.org/w/api.php"
6LIST_SEPARATOR = "|"
7DEFAULT_BKPROP = ["id", "user", "userid", "by", "byid", "timestamp", "expiry", "reason", "range", "flags"]
8DEFAULT_BKSHOW = ["account", "temp", "ip", "range"]
9
10
11class WikipediaQuery(JSONQuery):
12 def __init__(self, params={}):
13 params.update({
14 "action": "query",
15 "format": "json",
16 })
17 JSONQuery.__init__(self, base_url=WIKIPEDIA_QUERY_BASE_URL, params=params)
18
19
20class ListQuery(WikipediaQuery):
21 def __init__(self, list_name, params={}):
22 params.update({
23 "list": list_name,
24 })
25 WikipediaQuery.__init__(self, params)
26
27
28class BlockQuery(ListQuery):
29 def __init__(self, bkprop=DEFAULT_BKPROP, bkshow=DEFAULT_BKSHOW, bkdir="newer", limit=500, continue_token=None):
30 params = {
31 "bkprop": LIST_SEPARATOR.join(bkprop),
32 "bkshow": LIST_SEPARATOR.join(bkshow),
33 "bkdir": bkdir,
34 "bklimit": limit,
35 }
36
37 if continue_token is not None:
38 params.update({"bkcontinue": continue_token})
39
40 ListQuery.__init__(self, "blocks", params=params)
41
42
diff --git a/banapedia/wapi/__init__.py b/banapedia/wapi/__init__.py
new file mode 100644
index 0000000..a0f842f
--- /dev/null
+++ b/banapedia/wapi/__init__.py
@@ -0,0 +1 @@
__author__ = 'pacien'