From bdf9099df8c2a4636b0ad0e710b73330877eef37 Mon Sep 17 00:00:00 2001 From: Pacien TRAN-GIRARD Date: Fri, 24 Oct 2014 19:59:05 +0200 Subject: Very cleaner, much class, such readable, wow --- wikibania/ban/Ban.py | 37 ++++++++++++++++++++++++++++++++ wikibania/ban/BanDB.py | 50 +++++++++++++++++++++++++++++++++++++++++++ wikibania/ban/BanDBWrapper.py | 25 ++++++++++++++++++++++ wikibania/ban/__init__.py | 1 + 4 files changed, 113 insertions(+) create mode 100644 wikibania/ban/Ban.py create mode 100644 wikibania/ban/BanDB.py create mode 100644 wikibania/ban/BanDBWrapper.py create mode 100644 wikibania/ban/__init__.py (limited to 'wikibania/ban') diff --git a/wikibania/ban/Ban.py b/wikibania/ban/Ban.py new file mode 100644 index 0000000..e06ca89 --- /dev/null +++ b/wikibania/ban/Ban.py @@ -0,0 +1,37 @@ +from datetime import datetime + +import pygeoip + + +ISO_TIMESTAMP = "%Y-%m-%dT%H:%M:%SZ" + + +class Ban: + def __init__(self, geoip_looker, user=None, timestamp=None, expiry=None, timestamp_format=ISO_TIMESTAMP): + self.geoip_looker = geoip_looker + self.timestamp_format = timestamp_format + + self.user = user + self.timestamp = timestamp + self.expiry = expiry + + def items(self): + return { + "user": self.user, + "timestamp": self.timestamp.strftime(ISO_TIMESTAMP), + "expiry": self.expiry.strftime(ISO_TIMESTAMP), + } + + def hydrate(self, ban_dict): + self.user = ban_dict["user"] + self.timestamp = datetime.strptime(ban_dict["timestamp"], ISO_TIMESTAMP) + self.expiry = datetime.strptime(ban_dict["expiry"], ISO_TIMESTAMP) + + def calc_duration(self): + return (self.expiry - self.timestamp).days + + def lookup_country_code(self): + try: + return self.geoip_looker.country_code_by_addr(self.user).lower() + except pygeoip.GeoIPError: + return "UNKNOWN" diff --git a/wikibania/ban/BanDB.py b/wikibania/ban/BanDB.py new file mode 100644 index 0000000..e83aa3c --- /dev/null +++ b/wikibania/ban/BanDB.py @@ -0,0 +1,50 @@ +import json + +from wikibania.ban.Ban import Ban +from wikibania.wapi.WikipediaQuery import BlockQuery + + +class BanDB: + def __init__(self, geoip_looker): + self.geoip_looker = geoip_looker + self.bans = [] + + def list(self): + return self.bans + + def load(self, ban_list): + for entry in ban_list: + ban = Ban(self.geoip_looker) + ban.hydrate(entry) + self.bans.append(ban) + + def load_file(self, file_name): + with open(file_name, "r") as file: + entries = json.load(file) + self.load(entries) + + def dump(self): + return [ban.items() for ban in self.bans] + + def dump_file(self, file_name): + with open(file_name, "w") as file: + ban_list = self.dump() + json.dump(ban_list, file) + + def fetch(self, nb_samples, query_limit=500, continue_token=None): + fetch = min(nb_samples, query_limit) + + query = BlockQuery( + properties=["user", "timestamp", "expiry"], + show=["temp", "ip"], + limit=fetch, + continue_token=continue_token, + ) + results = query.fetch_result() + + entries = results["query"]["blocks"] + self.load(entries) + + if nb_samples - fetch > 0: + continue_token = results["query-continue"]["blocks"]["bkcontinue"] + self.fetch(nb_samples - fetch, query_limit, continue_token) diff --git a/wikibania/ban/BanDBWrapper.py b/wikibania/ban/BanDBWrapper.py new file mode 100644 index 0000000..8f396b0 --- /dev/null +++ b/wikibania/ban/BanDBWrapper.py @@ -0,0 +1,25 @@ +from collections import defaultdict + +import numpy + + +class BanDBWrapper: + def __init__(self, ban_db): + self.ban_db = ban_db + + def get_all_durations(self): + return [ban.calc_duration() for ban in self.ban_db.list()] + + def get_all_countries(self): + return [ban.lookup_country_code() for ban in self.ban_db.list()] + + def get_durations_by_country(self): + return [(ban.lookup_country_code(), ban.calc_duration()) for ban in self.ban_db.list()] + + def calc_average_duration_by_country(self): + ban_durations_by_country = defaultdict(list) + + for country, ban_duration in self.get_durations_by_country(): + ban_durations_by_country[country].append(ban_duration) + + return {country: numpy.mean(ban_durations) for country, ban_durations in ban_durations_by_country.items()} diff --git a/wikibania/ban/__init__.py b/wikibania/ban/__init__.py new file mode 100644 index 0000000..792d600 --- /dev/null +++ b/wikibania/ban/__init__.py @@ -0,0 +1 @@ +# -- cgit v1.2.3