From bdf9099df8c2a4636b0ad0e710b73330877eef37 Mon Sep 17 00:00:00 2001 From: Pacien TRAN-GIRARD Date: Fri, 24 Oct 2014 19:59:05 +0200 Subject: Very cleaner, much class, such readable, wow --- bandict/__init__.py | 91 ----------------------------------------------------- 1 file changed, 91 deletions(-) delete mode 100644 bandict/__init__.py (limited to 'bandict/__init__.py') diff --git a/bandict/__init__.py b/bandict/__init__.py deleted file mode 100644 index 389ae0e..0000000 --- a/bandict/__init__.py +++ /dev/null @@ -1,91 +0,0 @@ -import urllib.request -import json -import numpy as np -from banapedia.wapi.WikipediaQuery import BlockQuery -from banapedia.Ban import * - - -class BanList(): - - def __init__(self, data_file, samples=30000, samples_by_query=500, - from_internet=False): - if from_internet: - proxy = urllib.request.ProxyHandler(urllib.request.getproxies()) - opener = urllib.request.build_opener(proxy) - urllib.request.install_opener(opener) - self.dict_list = self.fetch_multipart(samples, samples_by_query) - else: - with open(data_file, "r") as ban_dict_file: - self.dict_list = json.load(ban_dict_file) - self.ban_list = [] - for ban_dict in self.dict_list: - self.ban_list.append(Ban( - ban_dict["user"], - datetime.strptime(ban_dict["timestamp"], ISO_TIMESTAMP), - datetime.strptime(ban_dict["expiry"], ISO_TIMESTAMP), - )) - - def fetch_multipart(self, n, query_limit): - ban_dict_list = [] - n_fetched = 0 - continue_token = None - - print("[INFO]", "Fetching %d bans" % n) - while n_fetched < n: - to_fetch = min(query_limit, n - n_fetched) - query = BlockQuery( - bkprop=["user", "timestamp", "expiry"], - bkshow=["temp", "ip"], - limit=to_fetch, - continue_token=continue_token, - ) - results = query.fetch_result() - ban_dict_list.extend(results["query"]["blocks"]) - continue_token = results["query-continue"]["blocks"]["bkcontinue"] - n_fetched += to_fetch - print("[INFO]", "Fetched %d over %d bans" % (n_fetched, n)) - - print("[INFO]", "Bans fetching complete") - return ban_dict_list - - def write_to_file(self, outfile): - with open(outfile, "w") as ban_dict_file: - json.dump(self.dict_list, ban_dict_file, indent="\t") - - def get_durations(self): - return [ban.get_duration() for ban in self.ban_list] - - def get_countries(self): - return [ban.get_country_code() for ban in self.ban_list] - - def __iter__(self): - return self.dict_list.__iter__() - - def items(self): - return self.dict_list.items() - - def by_country(self): - ban_duration_by_country = {} - - for ban in self.ban_list: - country_code = ban.get_country_code() - - if country_code not in ban_duration_by_country.keys(): - ban_duration_by_country[country_code] = [] - - ban_duration_by_country[country_code].append(ban) - - return ban_duration_by_country - - def average_ban_by_country(self): - average_ban_duration_ctry = {} - - for country, bans in self.by_country().items(): - average = np.mean([ban.get_duration() for ban in bans]) - average_ban_duration_ctry[country] = average - - # In months - average_ban_duration_ctry = {country: duration/30 - for country, duration in - average_ban_duration_ctry.items()} - return average_ban_duration_ctry -- cgit v1.2.3