summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMinijackson2014-10-23 20:43:03 +0200
committerMinijackson2014-10-23 20:43:03 +0200
commit9539c83b76244e21a562143fc1c22c3000968e5d (patch)
tree2518598983a38ad02733bc7f0ecbaa750987caa3 /src
parent25682dfbad10996b2e23ccbb3fef87636d919c03 (diff)
downloadwikistats-histogram.tar.gz
Adding gathering of data (watchers, revisions) + beginnig of histogramhistogram
Diffstat (limited to 'src')
-rw-r--r--src/downloader/__init__.py1
-rw-r--r--src/histogram/__init__.py19
-rw-r--r--src/main.py68
3 files changed, 87 insertions, 1 deletions
diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py
index baabb36..831ea06 100644
--- a/src/downloader/__init__.py
+++ b/src/downloader/__init__.py
@@ -57,7 +57,6 @@ class Downloader():
57 limit = 0 57 limit = 0
58 58
59 temp_result = self.download(endpoint, data_type, params, temp_limit) 59 temp_result = self.download(endpoint, data_type, params, temp_limit)
60 print(list(temp_result[0].keys()))
61 if "query-continue" in temp_result[0].keys(): 60 if "query-continue" in temp_result[0].keys():
62 params[continue_name] = temp_result[0]["query-continue"][data_type][continue_name] 61 params[continue_name] = temp_result[0]["query-continue"][data_type][continue_name]
63 else: 62 else:
diff --git a/src/histogram/__init__.py b/src/histogram/__init__.py
new file mode 100644
index 0000000..a01612f
--- /dev/null
+++ b/src/histogram/__init__.py
@@ -0,0 +1,19 @@
1"""
2Module used to generate the histogram
3"""
4import pygal
5
6
7class Histogram():
8 """Class used to generate an Histogram"""
9 def __init__(self, outfile, title="", data=""):
10 self.outfile = outfile
11 self.title = title
12 self.data = data
13 self.hist = pygal.Histogram()
14
15 def feed(self, data):
16 self.data = data
17
18 def generate(self):
19 pass
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..304e089
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,68 @@
1#!/usr/bin/env python
2# −*− coding: UTF−8 −*−
3
4import downloader
5import histogram
6import json
7
8# Constants
9RES_DIR = "../res/"
10
11# Gather data
12api = downloader.WikimediaAPI()
13down = downloader.Downloader()
14
15endpoint, geosearch_params = api.get_pages_around()
16geosearch_results = down.download(endpoint, "geosearch",
17 geosearch_params, limit=50)
18
19pages_title = []
20for result_fragment in geosearch_results:
21 for page_properties in result_fragment["query"]["geosearch"]:
22 pages_title.append(page_properties["title"])
23
24with open(RES_DIR + "Pages", "w") as f:
25 f.write('\n'.join(pages_title))
26
27data_count = len(pages_title)
28data = {}
29i = 1
30
31for page in pages_title:
32 print("\rGathering data, please wait: " + str(100*i/data_count) + "%", end="")
33 endpoint, watchers_params = api.get_watchers(page)
34 endpoint, revisions_params = api.get_revisions(page)
35 watchers_results = down.download(endpoint, "info",
36 watchers_params, limit=500)
37 revisions_results = down.download(endpoint, "revisions",
38 revisions_params, limit=100000)
39 for page_id, page in watchers_results[0]["query"]["pages"].items():
40 page_title = page["title"]
41 if "watchers" in page.keys():
42 page_watchers = page["watchers"]
43 else:
44 page_watchers = 0
45
46 page_revisions = 0
47 for revisions_result in revisions_results:
48 for page_id, page in revisions_result["query"]["pages"].items():
49 page_revisions += len(page["revisions"])
50
51 data[page_title] = {
52 "watchers": page_watchers,
53 "revisions": page_revisions
54 }
55 i += 1
56
57with open(RES_DIR + "data-out", "w") as f:
58 f.write(json.dumps(data, indent=4, ensure_ascii=False))
59
60# Make histograms
61watcher_revisions = histogram.Histogram("../res/warev.svg",
62 title="Nombre de watchers par rapport \
63 au nombre de révisions")
64
65watcher_contributers = histogram.Histogram("../res/waco.svg",
66 title="Nombre de watchers par \
67 rapport au nombre de \
68 contributeurs")