From bdf9099df8c2a4636b0ad0e710b73330877eef37 Mon Sep 17 00:00:00 2001 From: Pacien TRAN-GIRARD Date: Fri, 24 Oct 2014 19:59:05 +0200 Subject: Very cleaner, much class, such readable, wow --- main.py | 101 ++++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 70 insertions(+), 31 deletions(-) (limited to 'main.py') diff --git a/main.py b/main.py index 0d12a1e..da7c6ae 100644 --- a/main.py +++ b/main.py @@ -1,62 +1,101 @@ -from banapedia.Ban import * -import bandict from collections import Counter +import webbrowser + import pygal import numpy as np +import pygeoip + +from wikibania.ban.BanDB import BanDB +from wikibania.ban.BanDBWrapper import BanDBWrapper +import sysproxy + + +# PARAMS + +GEOIP_DB = "/usr/share/GeoIP/GeoIP.dat" BAN_MAP_FILE = "output/ban-map.svg" BAN_DURATION_MAP_FILE = "output/ban-duration-map.svg" HIST_FILE = "output/histogram.svg" +STATS_FILE = "output/stats.txt" + +BAN_DB_FILE = "resources/ban_list.json" + +FETCH_SAMPLES = 2000 +FETCH_DB = False +DUMP_DB = False +LOAD_DB = False + +OPEN_FILES = False + + +# SETUP -SAMPLES = 1000 +sysproxy.configure_system_proxy() +geoip_looker = pygeoip.GeoIP(GEOIP_DB) -BAN_FILE = "resources/ban_list.json" +ban_db = BanDB(geoip_looker) -ban_dict_list = bandict.BanList(BAN_FILE, samples=SAMPLES, from_internet=True) +if FETCH_DB: + ban_db.fetch(FETCH_SAMPLES) +if LOAD_DB: + ban_db.load_file(BAN_DB_FILE) +if DUMP_DB: + ban_db.dump_file(BAN_DB_FILE) -# ======== HISTOGRAM ======= # +ban_db_wrapper = BanDBWrapper(ban_db) -ban_durations = ban_dict_list.get_durations() -(ban_durations_bars, bins) = np.histogram(ban_durations, - bins=[round(365/12*x) - for x in range(1, 50+2) - ] - ) -print("[INFO]", "Generating histogram") +# HISTOGRAM + +ban_durations = ban_db_wrapper.get_all_durations() +duration_bins = [round(365 / 12 * x) for x in range(1, 50 + 2)] +(ban_durations_bars, bins) = np.histogram(ban_durations, bins=duration_bins) + bar_chart = pygal.Bar(legend_at_bottom=True) -bar_chart.title = "Active Wikipedia bans by duration (%d samples)" % SAMPLES -bar_chart.x_labels = map(str, range(1, len(ban_durations_bars)+1)) +bar_chart.title = "Active Wikipedia bans by duration (%d samples)" % len(ban_db.list()) +bar_chart.x_labels = map(str, range(1, len(ban_durations_bars) + 1)) bar_chart.add("Number of active bans", ban_durations_bars) bar_chart.render_to_file(HIST_FILE) -print("[INFO]", "Histogram generation complete") -# ======= NB BAN MAP ======= # +if OPEN_FILES: + webbrowser.open(HIST_FILE, 2) + + +# BAN DURATION STATS + +with open(STATS_FILE, "w") as stats_file: + stats_file.write("Mean: %.2f days\n" % np.mean(ban_durations)) + stats_file.write("Median: %.2f days\n" % np.median(ban_durations)) + stats_file.write("Deviation: %.2f\n" % np.std(ban_durations)) + stats_file.write("Variance: %.2f\n" % np.var(ban_durations)) -country_ban_list = ban_dict_list.get_countries() +if OPEN_FILES: + webbrowser.open(STATS_FILE, 2) + + +# NB BAN MAP + +country_ban_list = ban_db_wrapper.get_all_countries() nb_bans_by_country = Counter(country_ban_list) -print("[INFO]", "Generating ban map") worldmap_chart = pygal.Worldmap(legend_at_bottom=True) -worldmap_chart.title = "World active Wikipedia bans by country (%d samples)" % SAMPLES +worldmap_chart.title = "World active Wikipedia bans by country (%d samples)" % len(ban_db.list()) worldmap_chart.add("Active bans", nb_bans_by_country) worldmap_chart.render_to_file(BAN_MAP_FILE) -print("[INFO]", "Ban map generation complete") + +if OPEN_FILES: + webbrowser.open(BAN_MAP_FILE, 2) -# ======= BAN DURATION MAP ======= # +# BAN DURATION MAP -average_ban_duration_by_country = ban_dict_list.average_ban_by_country() +average_ban_duration_by_country = ban_db_wrapper.calc_average_duration_by_country() -print("[INFO]", "Generating ban duration map") worldmap_chart = pygal.Worldmap(legend_at_bottom=True) -worldmap_chart.title = "Average Wikipedia ban duration by country (%d samples)" % SAMPLES +worldmap_chart.title = "Average Wikipedia ban duration by country (%d samples)" % len(ban_db.list()) worldmap_chart.add("Average ban duration (months)", average_ban_duration_by_country) worldmap_chart.render_to_file(BAN_DURATION_MAP_FILE) -print("[INFO]", "Ban duration map generation complete") -print("Some additional stats about ban durations:") -print(" Mean: %.2f days" % np.mean(ban_durations)) -print(" Median: %.2f days" % np.median(ban_durations)) -print(" Deviation: %.2f" % np.std(ban_durations)) -print(" Variance: %.2f" % np.var(ban_durations)) +if OPEN_FILES: + webbrowser.open(BAN_DURATION_MAP_FILE, 2) -- cgit v1.2.3