diff options
author | Minijackson | 2014-10-23 20:43:03 +0200 |
---|---|---|
committer | Minijackson | 2014-10-23 20:43:03 +0200 |
commit | 9539c83b76244e21a562143fc1c22c3000968e5d (patch) | |
tree | 2518598983a38ad02733bc7f0ecbaa750987caa3 /src | |
parent | 25682dfbad10996b2e23ccbb3fef87636d919c03 (diff) | |
download | wikistats-histogram.tar.gz |
Adding gathering of data (watchers, revisions) + beginnig of histogramhistogram
Diffstat (limited to 'src')
-rw-r--r-- | src/downloader/__init__.py | 1 | ||||
-rw-r--r-- | src/histogram/__init__.py | 19 | ||||
-rw-r--r-- | src/main.py | 68 |
3 files changed, 87 insertions, 1 deletions
diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py index baabb36..831ea06 100644 --- a/src/downloader/__init__.py +++ b/src/downloader/__init__.py | |||
@@ -57,7 +57,6 @@ class Downloader(): | |||
57 | limit = 0 | 57 | limit = 0 |
58 | 58 | ||
59 | temp_result = self.download(endpoint, data_type, params, temp_limit) | 59 | temp_result = self.download(endpoint, data_type, params, temp_limit) |
60 | print(list(temp_result[0].keys())) | ||
61 | if "query-continue" in temp_result[0].keys(): | 60 | if "query-continue" in temp_result[0].keys(): |
62 | params[continue_name] = temp_result[0]["query-continue"][data_type][continue_name] | 61 | params[continue_name] = temp_result[0]["query-continue"][data_type][continue_name] |
63 | else: | 62 | else: |
diff --git a/src/histogram/__init__.py b/src/histogram/__init__.py new file mode 100644 index 0000000..a01612f --- /dev/null +++ b/src/histogram/__init__.py | |||
@@ -0,0 +1,19 @@ | |||
1 | """ | ||
2 | Module used to generate the histogram | ||
3 | """ | ||
4 | import pygal | ||
5 | |||
6 | |||
7 | class Histogram(): | ||
8 | """Class used to generate an Histogram""" | ||
9 | def __init__(self, outfile, title="", data=""): | ||
10 | self.outfile = outfile | ||
11 | self.title = title | ||
12 | self.data = data | ||
13 | self.hist = pygal.Histogram() | ||
14 | |||
15 | def feed(self, data): | ||
16 | self.data = data | ||
17 | |||
18 | def generate(self): | ||
19 | pass | ||
diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..304e089 --- /dev/null +++ b/src/main.py | |||
@@ -0,0 +1,68 @@ | |||
1 | #!/usr/bin/env python | ||
2 | # −*− coding: UTF−8 −*− | ||
3 | |||
4 | import downloader | ||
5 | import histogram | ||
6 | import json | ||
7 | |||
8 | # Constants | ||
9 | RES_DIR = "../res/" | ||
10 | |||
11 | # Gather data | ||
12 | api = downloader.WikimediaAPI() | ||
13 | down = downloader.Downloader() | ||
14 | |||
15 | endpoint, geosearch_params = api.get_pages_around() | ||
16 | geosearch_results = down.download(endpoint, "geosearch", | ||
17 | geosearch_params, limit=50) | ||
18 | |||
19 | pages_title = [] | ||
20 | for result_fragment in geosearch_results: | ||
21 | for page_properties in result_fragment["query"]["geosearch"]: | ||
22 | pages_title.append(page_properties["title"]) | ||
23 | |||
24 | with open(RES_DIR + "Pages", "w") as f: | ||
25 | f.write('\n'.join(pages_title)) | ||
26 | |||
27 | data_count = len(pages_title) | ||
28 | data = {} | ||
29 | i = 1 | ||
30 | |||
31 | for page in pages_title: | ||
32 | print("\rGathering data, please wait: " + str(100*i/data_count) + "%", end="") | ||
33 | endpoint, watchers_params = api.get_watchers(page) | ||
34 | endpoint, revisions_params = api.get_revisions(page) | ||
35 | watchers_results = down.download(endpoint, "info", | ||
36 | watchers_params, limit=500) | ||
37 | revisions_results = down.download(endpoint, "revisions", | ||
38 | revisions_params, limit=100000) | ||
39 | for page_id, page in watchers_results[0]["query"]["pages"].items(): | ||
40 | page_title = page["title"] | ||
41 | if "watchers" in page.keys(): | ||
42 | page_watchers = page["watchers"] | ||
43 | else: | ||
44 | page_watchers = 0 | ||
45 | |||
46 | page_revisions = 0 | ||
47 | for revisions_result in revisions_results: | ||
48 | for page_id, page in revisions_result["query"]["pages"].items(): | ||
49 | page_revisions += len(page["revisions"]) | ||
50 | |||
51 | data[page_title] = { | ||
52 | "watchers": page_watchers, | ||
53 | "revisions": page_revisions | ||
54 | } | ||
55 | i += 1 | ||
56 | |||
57 | with open(RES_DIR + "data-out", "w") as f: | ||
58 | f.write(json.dumps(data, indent=4, ensure_ascii=False)) | ||
59 | |||
60 | # Make histograms | ||
61 | watcher_revisions = histogram.Histogram("../res/warev.svg", | ||
62 | title="Nombre de watchers par rapport \ | ||
63 | au nombre de révisions") | ||
64 | |||
65 | watcher_contributers = histogram.Histogram("../res/waco.svg", | ||
66 | title="Nombre de watchers par \ | ||
67 | rapport au nombre de \ | ||
68 | contributeurs") | ||