Adding gathering of data (watchers, revisions) + beginnig of histogramhistogram

author: Minijackson 2014-10-23 20:43:03 +0200
committer: Minijackson 2014-10-23 20:43:03 +0200
commit: 9539c83b76244e21a562143fc1c22c3000968e5d (patch)
tree: 2518598983a38ad02733bc7f0ecbaa750987caa3 /src
parent: 25682dfbad10996b2e23ccbb3fef87636d919c03 (diff)
download: wikistats-histogram.tar.gz
3 files changed, 87 insertions, 1 deletions
diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py
index baabb36..831ea06 100644
--- a/src/downloader/__init__.py
+++ b/src/downloader/__init__.py
@@ -57,7 +57,6 @@ class Downloader():
                    limit = 0
                temp_result = self.download(endpoint, data_type, params, temp_limit)
-                print(list(temp_result[0].keys()))
                if "query-continue" in temp_result[0].keys():
                    params[continue_name] = temp_result[0]["query-continue"][data_type][continue_name]
                else:
diff --git a/src/histogram/__init__.py b/src/histogram/__init__.py
new file mode 100644
index 0000000..a01612f
--- /dev/null
+++ b/src/histogram/__init__.py
@@ -0,0 +1,19 @@
+"""
+Module used to generate the histogram
+"""
+import pygal
+class Histogram():
+    """Class used to generate an Histogram"""
+    def __init__(self, outfile, title="", data=""):
+        self.outfile = outfile
+        self.title = title
+        self.data = data
+        self.hist = pygal.Histogram()
+    def feed(self, data):
+        self.data = data
+    def generate(self):
+        pass
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..304e089
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+# −*− coding: UTF−8 −*−
+import downloader
+import histogram
+import json
+# Constants
+RES_DIR = "../res/"
+# Gather data
+api = downloader.WikimediaAPI()
+down = downloader.Downloader()
+endpoint, geosearch_params = api.get_pages_around()
+geosearch_results = down.download(endpoint, "geosearch",
+                                  geosearch_params, limit=50)
+pages_title = []
+for result_fragment in geosearch_results:
+    for page_properties in result_fragment["query"]["geosearch"]:
+        pages_title.append(page_properties["title"])
+with open(RES_DIR + "Pages", "w") as f:
+    f.write('\n'.join(pages_title))
+data_count = len(pages_title)
+data = {}
+i = 1
+for page in pages_title:
+    print("\rGathering data, please wait: " + str(100*i/data_count) + "%", end="")
+    endpoint, watchers_params = api.get_watchers(page)
+    endpoint, revisions_params = api.get_revisions(page)
+    watchers_results = down.download(endpoint, "info",
+                                     watchers_params, limit=500)
+    revisions_results = down.download(endpoint, "revisions",
+                                      revisions_params, limit=100000)
+    for page_id, page in watchers_results[0]["query"]["pages"].items():
+        page_title = page["title"]
+        if "watchers" in page.keys():
+            page_watchers = page["watchers"]
+        else:
+            page_watchers = 0
+    page_revisions = 0
+    for revisions_result in revisions_results:
+        for page_id, page in revisions_result["query"]["pages"].items():
+            page_revisions += len(page["revisions"])
+    data[page_title] = {
+        "watchers": page_watchers,
+        "revisions": page_revisions
+    }
+    i += 1
+with open(RES_DIR + "data-out", "w") as f:
+    f.write(json.dumps(data, indent=4, ensure_ascii=False))
+# Make histograms
+watcher_revisions = histogram.Histogram("../res/warev.svg",
+                                        title="Nombre de watchers par rapport \
+                                        au nombre de révisions")
+watcher_contributers = histogram.Histogram("../res/waco.svg",
+                                           title="Nombre de watchers par \
+                                           rapport au nombre de \
+                                           contributeurs")
author	Minijackson	2014-10-23 20:43:03 +0200
committer	Minijackson	2014-10-23 20:43:03 +0200
commit	9539c83b76244e21a562143fc1c22c3000968e5d (patch)
tree	2518598983a38ad02733bc7f0ecbaa750987caa3 /src
parent	25682dfbad10996b2e23ccbb3fef87636d919c03 (diff)
download	wikistats-histogram.tar.gz

diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py index baabb36..831ea06 100644 --- a/src/downloader/__init__.py +++ b/src/downloader/__init__.py
@@ -57,7 +57,6 @@ class Downloader():
57	limit = 0	57	limit = 0
58		58
59	temp_result = self.download(endpoint, data_type, params, temp_limit)	59	temp_result = self.download(endpoint, data_type, params, temp_limit)
60	print(list(temp_result[0].keys()))
61	if "query-continue" in temp_result[0].keys():	60	if "query-continue" in temp_result[0].keys():
62	params[continue_name] = temp_result[0]["query-continue"][data_type][continue_name]	61	params[continue_name] = temp_result[0]["query-continue"][data_type][continue_name]
63	else:	62	else:


diff --git a/src/histogram/__init__.py b/src/histogram/__init__.py new file mode 100644 index 0000000..a01612f --- /dev/null +++ b/src/histogram/__init__.py
@@ -0,0 +1,19 @@
		1	"""
		2	Module used to generate the histogram
		3	"""
		4	import pygal
		5
		6
		7	class Histogram():
		8	"""Class used to generate an Histogram"""
		9	def __init__(self, outfile, title="", data=""):
		10	self.outfile = outfile
		11	self.title = title
		12	self.data = data
		13	self.hist = pygal.Histogram()
		14
		15	def feed(self, data):
		16	self.data = data
		17
		18	def generate(self):
		19	pass


diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..304e089 --- /dev/null +++ b/src/main.py
@@ -0,0 +1,68 @@
		1	#!/usr/bin/env python
		2	# −− coding: UTF−8 −−
		3
		4	import downloader
		5	import histogram
		6	import json
		7
		8	# Constants
		9	RES_DIR = "../res/"
		10
		11	# Gather data
		12	api = downloader.WikimediaAPI()
		13	down = downloader.Downloader()
		14
		15	endpoint, geosearch_params = api.get_pages_around()
		16	geosearch_results = down.download(endpoint, "geosearch",
		17	geosearch_params, limit=50)
		18
		19	pages_title = []
		20	for result_fragment in geosearch_results:
		21	for page_properties in result_fragment["query"]["geosearch"]:
		22	pages_title.append(page_properties["title"])
		23
		24	with open(RES_DIR + "Pages", "w") as f:
		25	f.write('\n'.join(pages_title))
		26
		27	data_count = len(pages_title)
		28	data = {}
		29	i = 1
		30
		31	for page in pages_title:
		32	print("\rGathering data, please wait: " + str(100*i/data_count) + "%", end="")
		33	endpoint, watchers_params = api.get_watchers(page)
		34	endpoint, revisions_params = api.get_revisions(page)
		35	watchers_results = down.download(endpoint, "info",
		36	watchers_params, limit=500)
		37	revisions_results = down.download(endpoint, "revisions",
		38	revisions_params, limit=100000)
		39	for page_id, page in watchers_results[0]["query"]["pages"].items():
		40	page_title = page["title"]
		41	if "watchers" in page.keys():
		42	page_watchers = page["watchers"]
		43	else:
		44	page_watchers = 0
		45
		46	page_revisions = 0
		47	for revisions_result in revisions_results:
		48	for page_id, page in revisions_result["query"]["pages"].items():
		49	page_revisions += len(page["revisions"])
		50
		51	data[page_title] = {
		52	"watchers": page_watchers,
		53	"revisions": page_revisions
		54	}
		55	i += 1
		56
		57	with open(RES_DIR + "data-out", "w") as f:
		58	f.write(json.dumps(data, indent=4, ensure_ascii=False))
		59
		60	# Make histograms
		61	watcher_revisions = histogram.Histogram("../res/warev.svg",
		62	title="Nombre de watchers par rapport \
		63	au nombre de révisions")
		64
		65	watcher_contributers = histogram.Histogram("../res/waco.svg",
		66	title="Nombre de watchers par \
		67	rapport au nombre de \
		68	contributeurs")