diff options
Diffstat (limited to 'banapedia')
-rw-r--r-- | banapedia/Ban.py | 76 | ||||
-rw-r--r-- | banapedia/__init__.py | 3 | ||||
-rw-r--r-- | banapedia/api/Query.py | 23 | ||||
-rw-r--r-- | banapedia/api/__init__.py | 1 | ||||
-rw-r--r-- | banapedia/wapi/WikipediaQuery.py | 42 | ||||
-rw-r--r-- | banapedia/wapi/__init__.py | 1 |
6 files changed, 146 insertions, 0 deletions
diff --git a/banapedia/Ban.py b/banapedia/Ban.py new file mode 100644 index 0000000..d8666b4 --- /dev/null +++ b/banapedia/Ban.py | |||
@@ -0,0 +1,76 @@ | |||
1 | from banapedia.wapi.WikipediaQuery import BlockQuery | ||
2 | from datetime import datetime | ||
3 | import pygeoip | ||
4 | |||
5 | __author__ = 'pacien' | ||
6 | |||
7 | |||
8 | GEOIP_FILE = "/usr/share/GeoIP/GeoIP.dat" | ||
9 | geoip = pygeoip.GeoIP(GEOIP_FILE) | ||
10 | |||
11 | ISO_TIMESTAMP = "%Y-%m-%dT%H:%M:%SZ" | ||
12 | |||
13 | |||
14 | class Ban: | ||
15 | def __init__(self, ip, start, end): | ||
16 | self.ip = ip | ||
17 | self.start = start | ||
18 | self.end = end | ||
19 | self.country_code = None | ||
20 | |||
21 | def get_duration(self): | ||
22 | return (self.end - self.start).days | ||
23 | |||
24 | def get_country_code(self): | ||
25 | if self.country_code is not None: | ||
26 | return self.country_code | ||
27 | |||
28 | country_code = "" | ||
29 | |||
30 | try: | ||
31 | country_code = geoip.country_code_by_addr(self.ip).lower() | ||
32 | except pygeoip.GeoIPError: | ||
33 | print("[ERROR]", "Could not determine country for ip", self.ip) | ||
34 | |||
35 | self.country_code = country_code | ||
36 | return country_code | ||
37 | |||
38 | |||
39 | def map_ban(ban_dict): | ||
40 | return Ban( | ||
41 | ban_dict["user"], | ||
42 | datetime.strptime(ban_dict["timestamp"], ISO_TIMESTAMP), | ||
43 | datetime.strptime(ban_dict["expiry"], ISO_TIMESTAMP), | ||
44 | ) | ||
45 | |||
46 | |||
47 | def map_bans(ban_dict_list): | ||
48 | ban_list = [] | ||
49 | for ban_dict in ban_dict_list: | ||
50 | ban_list.append(map_ban(ban_dict)) | ||
51 | |||
52 | return ban_list | ||
53 | |||
54 | |||
55 | def fetch_multipart_ban_dict(n, query_limit): | ||
56 | ban_dict_list = [] | ||
57 | n_fetched = 0 | ||
58 | continue_token = None | ||
59 | |||
60 | print("[INFO]", "Fetching %d bans" % n) | ||
61 | while n_fetched < n: | ||
62 | to_fetch = min(query_limit, n - n_fetched) | ||
63 | query = BlockQuery( | ||
64 | bkprop=["user", "timestamp", "expiry"], | ||
65 | bkshow=["temp", "ip"], | ||
66 | limit=to_fetch, | ||
67 | continue_token=continue_token, | ||
68 | ) | ||
69 | results = query.fetch_result() | ||
70 | ban_dict_list.extend(results["query"]["blocks"]) | ||
71 | continue_token = results["query-continue"]["blocks"]["bkcontinue"] | ||
72 | n_fetched += to_fetch | ||
73 | print("[INFO]", "Fetched %d over %d bans" % (n_fetched, n)) | ||
74 | |||
75 | print("[INFO]", "Bans fetching complete") | ||
76 | return ban_dict_list | ||
diff --git a/banapedia/__init__.py b/banapedia/__init__.py new file mode 100644 index 0000000..93efc48 --- /dev/null +++ b/banapedia/__init__.py | |||
@@ -0,0 +1,3 @@ | |||
1 | __author__ = 'pacien' | ||
2 | |||
3 | |||
diff --git a/banapedia/api/Query.py b/banapedia/api/Query.py new file mode 100644 index 0000000..7453df9 --- /dev/null +++ b/banapedia/api/Query.py | |||
@@ -0,0 +1,23 @@ | |||
1 | import urllib.parse | ||
2 | import urllib.request | ||
3 | import json | ||
4 | |||
5 | __author__ = 'pacien' | ||
6 | |||
7 | |||
8 | class Query: | ||
9 | def __init__(self, base_url="", params={}, encoding="utf8"): | ||
10 | self.base_url = base_url | ||
11 | self.params = params | ||
12 | self.encoding = encoding | ||
13 | |||
14 | def fetch_raw_result(self): | ||
15 | post_query = urllib.parse.urlencode(self.params) | ||
16 | post_query = post_query.encode(self.encoding) | ||
17 | document = urllib.request.urlopen(self.base_url, post_query) | ||
18 | return document.read().decode(self.encoding) | ||
19 | |||
20 | |||
21 | class JSONQuery(Query): | ||
22 | def fetch_result(self): | ||
23 | return json.loads(self.fetch_raw_result()) | ||
diff --git a/banapedia/api/__init__.py b/banapedia/api/__init__.py new file mode 100644 index 0000000..a0f842f --- /dev/null +++ b/banapedia/api/__init__.py | |||
@@ -0,0 +1 @@ | |||
__author__ = 'pacien' | |||
diff --git a/banapedia/wapi/WikipediaQuery.py b/banapedia/wapi/WikipediaQuery.py new file mode 100644 index 0000000..d3d2f94 --- /dev/null +++ b/banapedia/wapi/WikipediaQuery.py | |||
@@ -0,0 +1,42 @@ | |||
1 | from ..api.Query import JSONQuery | ||
2 | |||
3 | __author__ = 'pacien' | ||
4 | |||
5 | WIKIPEDIA_QUERY_BASE_URL = "https://en.wikipedia.org/w/api.php" | ||
6 | LIST_SEPARATOR = "|" | ||
7 | DEFAULT_BKPROP = ["id", "user", "userid", "by", "byid", "timestamp", "expiry", "reason", "range", "flags"] | ||
8 | DEFAULT_BKSHOW = ["account", "temp", "ip", "range"] | ||
9 | |||
10 | |||
11 | class WikipediaQuery(JSONQuery): | ||
12 | def __init__(self, params={}): | ||
13 | params.update({ | ||
14 | "action": "query", | ||
15 | "format": "json", | ||
16 | }) | ||
17 | JSONQuery.__init__(self, base_url=WIKIPEDIA_QUERY_BASE_URL, params=params) | ||
18 | |||
19 | |||
20 | class ListQuery(WikipediaQuery): | ||
21 | def __init__(self, list_name, params={}): | ||
22 | params.update({ | ||
23 | "list": list_name, | ||
24 | }) | ||
25 | WikipediaQuery.__init__(self, params) | ||
26 | |||
27 | |||
28 | class BlockQuery(ListQuery): | ||
29 | def __init__(self, bkprop=DEFAULT_BKPROP, bkshow=DEFAULT_BKSHOW, bkdir="newer", limit=500, continue_token=None): | ||
30 | params = { | ||
31 | "bkprop": LIST_SEPARATOR.join(bkprop), | ||
32 | "bkshow": LIST_SEPARATOR.join(bkshow), | ||
33 | "bkdir": bkdir, | ||
34 | "bklimit": limit, | ||
35 | } | ||
36 | |||
37 | if continue_token is not None: | ||
38 | params.update({"bkcontinue": continue_token}) | ||
39 | |||
40 | ListQuery.__init__(self, "blocks", params=params) | ||
41 | |||
42 | |||
diff --git a/banapedia/wapi/__init__.py b/banapedia/wapi/__init__.py new file mode 100644 index 0000000..a0f842f --- /dev/null +++ b/banapedia/wapi/__init__.py | |||
@@ -0,0 +1 @@ | |||
__author__ = 'pacien' | |||