From 9539c83b76244e21a562143fc1c22c3000968e5d Mon Sep 17 00:00:00 2001 From: Minijackson Date: Thu, 23 Oct 2014 20:43:03 +0200 Subject: Adding gathering of data (watchers, revisions) + beginnig of histogram --- src/main.py | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 src/main.py (limited to 'src/main.py') diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..304e089 --- /dev/null +++ b/src/main.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# −*− coding: UTF−8 −*− + +import downloader +import histogram +import json + +# Constants +RES_DIR = "../res/" + +# Gather data +api = downloader.WikimediaAPI() +down = downloader.Downloader() + +endpoint, geosearch_params = api.get_pages_around() +geosearch_results = down.download(endpoint, "geosearch", + geosearch_params, limit=50) + +pages_title = [] +for result_fragment in geosearch_results: + for page_properties in result_fragment["query"]["geosearch"]: + pages_title.append(page_properties["title"]) + +with open(RES_DIR + "Pages", "w") as f: + f.write('\n'.join(pages_title)) + +data_count = len(pages_title) +data = {} +i = 1 + +for page in pages_title: + print("\rGathering data, please wait: " + str(100*i/data_count) + "%", end="") + endpoint, watchers_params = api.get_watchers(page) + endpoint, revisions_params = api.get_revisions(page) + watchers_results = down.download(endpoint, "info", + watchers_params, limit=500) + revisions_results = down.download(endpoint, "revisions", + revisions_params, limit=100000) + for page_id, page in watchers_results[0]["query"]["pages"].items(): + page_title = page["title"] + if "watchers" in page.keys(): + page_watchers = page["watchers"] + else: + page_watchers = 0 + + page_revisions = 0 + for revisions_result in revisions_results: + for page_id, page in revisions_result["query"]["pages"].items(): + page_revisions += len(page["revisions"]) + + data[page_title] = { + "watchers": page_watchers, + "revisions": page_revisions + } + i += 1 + +with open(RES_DIR + "data-out", "w") as f: + f.write(json.dumps(data, indent=4, ensure_ascii=False)) + +# Make histograms +watcher_revisions = histogram.Histogram("../res/warev.svg", + title="Nombre de watchers par rapport \ + au nombre de révisions") + +watcher_contributers = histogram.Histogram("../res/waco.svg", + title="Nombre de watchers par \ + rapport au nombre de \ + contributeurs") -- cgit v1.2.3