From 9539c83b76244e21a562143fc1c22c3000968e5d Mon Sep 17 00:00:00 2001 From: Minijackson Date: Thu, 23 Oct 2014 20:43:03 +0200 Subject: Adding gathering of data (watchers, revisions) + beginnig of histogram --- res/Pages | 50 +++++++++++ res/data-out | 202 +++++++++++++++++++++++++++++++++++++++++++++ src/downloader/__init__.py | 1 - src/histogram/__init__.py | 19 +++++ src/main.py | 68 +++++++++++++++ 5 files changed, 339 insertions(+), 1 deletion(-) create mode 100644 res/Pages create mode 100644 res/data-out create mode 100644 src/histogram/__init__.py create mode 100644 src/main.py diff --git a/res/Pages b/res/Pages new file mode 100644 index 0000000..d00c484 --- /dev/null +++ b/res/Pages @@ -0,0 +1,50 @@ +Paris +13 Vendémiaire +Place de l'Hôtel-de-Ville +Battle of Paris (1814) +Siege of Paris (1590) +Siege of Paris (845) +Siege of Paris (1870–71) +Timeline of Paris +Hôtel de Ville (Paris Métro) +Autonomous Port of Paris +Hôtel de Ville, Paris +Saint-Jacques Tower +Bazar de l'Hôtel de Ville +Rue de Rivoli, Paris +Saint-Merri +Hôtel-Dieu de Paris +Fontaine du Palmier +Place du Châtelet +St-Gervais-et-St-Protais +Pont au Change +Rue des Lombards +Châtelet (Paris Métro) +Stravinsky Fountain +Île de la Cité +Théâtre du Châtelet +IRCAM +Cité (Paris Métro) +La Force Prison +4th arrondissement of Paris +Le Duc des Lombards +Musée de Notre Dame de Paris +Conciergerie +Prefecture of Police of Paris +Le Baiser Salé +Roman Catholic Archdiocese of Paris +Parvis Notre-Dame – place Jean-Paul-II +Sunset/Sunside +Siege of Paris (885–86) +Notre Dame de Paris +Palais de Justice, Paris +Rue de la Ferronnerie +Sainte-Chapelle +Centre Georges Pompidou +Petit Pont +Place Joachim-du-Bellay +Holy Innocents' Cemetery +Fontaine des Innocents +Fontaine Maubuée +Court of Cassation (France) +Direction Régionale de Police Judiciaire de Paris \ No newline at end of file diff --git a/res/data-out b/res/data-out new file mode 100644 index 0000000..f22a646 --- /dev/null +++ b/res/data-out @@ -0,0 +1,202 @@ +{ + "Place du Châtelet": { + "watchers": 0, + "revisions": 47 + }, + "4th arrondissement of Paris": { + "watchers": 0, + "revisions": 170 + }, + "Siege of Paris (845)": { + "watchers": 0, + "revisions": 91 + }, + "Hôtel de Ville (Paris Métro)": { + "watchers": 0, + "revisions": 65 + }, + "Rue de Rivoli, Paris": { + "watchers": 0, + "revisions": 114 + }, + "St-Gervais-et-St-Protais": { + "watchers": 0, + "revisions": 110 + }, + "Paris": { + "watchers": 859, + "revisions": 13750 + }, + "Notre Dame de Paris": { + "watchers": 161, + "revisions": 3559 + }, + "Sunset/Sunside": { + "watchers": 0, + "revisions": 13 + }, + "Battle of Paris (1814)": { + "watchers": 0, + "revisions": 163 + }, + "Place Joachim-du-Bellay": { + "watchers": 0, + "revisions": 23 + }, + "IRCAM": { + "watchers": 0, + "revisions": 235 + }, + "Petit Pont": { + "watchers": 0, + "revisions": 80 + }, + "Hôtel de Ville, Paris": { + "watchers": 0, + "revisions": 231 + }, + "Autonomous Port of Paris": { + "watchers": 0, + "revisions": 12 + }, + "Place de l'Hôtel-de-Ville": { + "watchers": 0, + "revisions": 78 + }, + "Conciergerie": { + "watchers": 32, + "revisions": 291 + }, + "La Force Prison": { + "watchers": 0, + "revisions": 76 + }, + "Théâtre du Châtelet": { + "watchers": 0, + "revisions": 77 + }, + "Châtelet (Paris Métro)": { + "watchers": 0, + "revisions": 124 + }, + "Siege of Paris (1590)": { + "watchers": 0, + "revisions": 52 + }, + "Palais de Justice, Paris": { + "watchers": 0, + "revisions": 96 + }, + "Île de la Cité": { + "watchers": 36, + "revisions": 242 + }, + "Timeline of Paris": { + "watchers": 0, + "revisions": 1034 + }, + "Roman Catholic Archdiocese of Paris": { + "watchers": 0, + "revisions": 166 + }, + "Siege of Paris (885–86)": { + "watchers": 38, + "revisions": 232 + }, + "Centre Georges Pompidou": { + "watchers": 64, + "revisions": 1021 + }, + "Holy Innocents' Cemetery": { + "watchers": 0, + "revisions": 99 + }, + "Saint-Merri": { + "watchers": 0, + "revisions": 66 + }, + "Le Baiser Salé": { + "watchers": 0, + "revisions": 16 + }, + "Saint-Jacques Tower": { + "watchers": 0, + "revisions": 104 + }, + "Sainte-Chapelle": { + "watchers": 44, + "revisions": 483 + }, + "Fontaine du Palmier": { + "watchers": 0, + "revisions": 39 + }, + "Fontaine des Innocents": { + "watchers": 0, + "revisions": 95 + }, + "Fontaine Maubuée": { + "watchers": 0, + "revisions": 18 + }, + "Hôtel-Dieu de Paris": { + "watchers": 0, + "revisions": 108 + }, + "Rue de la Ferronnerie": { + "watchers": 0, + "revisions": 23 + }, + "Le Duc des Lombards": { + "watchers": 0, + "revisions": 16 + }, + "Cité (Paris Métro)": { + "watchers": 0, + "revisions": 67 + }, + "Bazar de l'Hôtel de Ville": { + "watchers": 0, + "revisions": 39 + }, + "13 Vendémiaire": { + "watchers": 0, + "revisions": 149 + }, + "Rue des Lombards": { + "watchers": 0, + "revisions": 13 + }, + "Direction Régionale de Police Judiciaire de Paris": { + "watchers": 0, + "revisions": 59 + }, + "Musée de Notre Dame de Paris": { + "watchers": 0, + "revisions": 15 + }, + "Court of Cassation (France)": { + "watchers": 0, + "revisions": 504 + }, + "Pont au Change": { + "watchers": 0, + "revisions": 55 + }, + "Stravinsky Fountain": { + "watchers": 0, + "revisions": 104 + }, + "Siege of Paris (1870–71)": { + "watchers": 31, + "revisions": 272 + }, + "Prefecture of Police of Paris": { + "watchers": 0, + "revisions": 162 + }, + "Parvis Notre-Dame – place Jean-Paul-II": { + "watchers": 0, + "revisions": 11 + } +} \ No newline at end of file diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py index baabb36..831ea06 100644 --- a/src/downloader/__init__.py +++ b/src/downloader/__init__.py @@ -57,7 +57,6 @@ class Downloader(): limit = 0 temp_result = self.download(endpoint, data_type, params, temp_limit) - print(list(temp_result[0].keys())) if "query-continue" in temp_result[0].keys(): params[continue_name] = temp_result[0]["query-continue"][data_type][continue_name] else: diff --git a/src/histogram/__init__.py b/src/histogram/__init__.py new file mode 100644 index 0000000..a01612f --- /dev/null +++ b/src/histogram/__init__.py @@ -0,0 +1,19 @@ +""" +Module used to generate the histogram +""" +import pygal + + +class Histogram(): + """Class used to generate an Histogram""" + def __init__(self, outfile, title="", data=""): + self.outfile = outfile + self.title = title + self.data = data + self.hist = pygal.Histogram() + + def feed(self, data): + self.data = data + + def generate(self): + pass diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..304e089 --- /dev/null +++ b/src/main.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# −*− coding: UTF−8 −*− + +import downloader +import histogram +import json + +# Constants +RES_DIR = "../res/" + +# Gather data +api = downloader.WikimediaAPI() +down = downloader.Downloader() + +endpoint, geosearch_params = api.get_pages_around() +geosearch_results = down.download(endpoint, "geosearch", + geosearch_params, limit=50) + +pages_title = [] +for result_fragment in geosearch_results: + for page_properties in result_fragment["query"]["geosearch"]: + pages_title.append(page_properties["title"]) + +with open(RES_DIR + "Pages", "w") as f: + f.write('\n'.join(pages_title)) + +data_count = len(pages_title) +data = {} +i = 1 + +for page in pages_title: + print("\rGathering data, please wait: " + str(100*i/data_count) + "%", end="") + endpoint, watchers_params = api.get_watchers(page) + endpoint, revisions_params = api.get_revisions(page) + watchers_results = down.download(endpoint, "info", + watchers_params, limit=500) + revisions_results = down.download(endpoint, "revisions", + revisions_params, limit=100000) + for page_id, page in watchers_results[0]["query"]["pages"].items(): + page_title = page["title"] + if "watchers" in page.keys(): + page_watchers = page["watchers"] + else: + page_watchers = 0 + + page_revisions = 0 + for revisions_result in revisions_results: + for page_id, page in revisions_result["query"]["pages"].items(): + page_revisions += len(page["revisions"]) + + data[page_title] = { + "watchers": page_watchers, + "revisions": page_revisions + } + i += 1 + +with open(RES_DIR + "data-out", "w") as f: + f.write(json.dumps(data, indent=4, ensure_ascii=False)) + +# Make histograms +watcher_revisions = histogram.Histogram("../res/warev.svg", + title="Nombre de watchers par rapport \ + au nombre de révisions") + +watcher_contributers = histogram.Histogram("../res/waco.svg", + title="Nombre de watchers par \ + rapport au nombre de \ + contributeurs") -- cgit v1.2.3