From 25682dfbad10996b2e23ccbb3fef87636d919c03 Mon Sep 17 00:00:00 2001 From: Minijackson Date: Thu, 23 Oct 2014 18:50:44 +0200 Subject: Removing interpreter + Adding watchers + revisions + geosearch --- src/downloader/__init__.py | 71 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 3 deletions(-) (limited to 'src/downloader/__init__.py') diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py index 3c7706f..baabb36 100644 --- a/src/downloader/__init__.py +++ b/src/downloader/__init__.py @@ -20,12 +20,13 @@ class Downloader(): self.data_types = { "revisions": "rv", "recentchanges": "rc", - "contributors": "pc" + "contributors": "pc", + "geosearch": "gs", } def download(self, endpoint, data_type, params={}, limit=0): """ - Download the given URL with POST parameters and return the source code + Download the given URL with GET parameters and return the source code with rccontinue support. If limit equals 0, then it suppose that the limit is given whithin the paramaters """ @@ -56,11 +57,12 @@ class Downloader(): limit = 0 temp_result = self.download(endpoint, data_type, params, temp_limit) + print(list(temp_result[0].keys())) if "query-continue" in temp_result[0].keys(): params[continue_name] = temp_result[0]["query-continue"][data_type][continue_name] else: limit = 0 - results.append(temp_result) + results.append(temp_result[0]) return results def download_in_file(self, url, output_file_path): @@ -163,3 +165,66 @@ class WikimediaAPI(): "titles": page, } return self.endpoint, url_params + + def get_watchers(self, page="Main_Page"): + """ + Get the url corresponding to the list of contributors of a given page. + (https://www.mediawiki.org/wiki/API:Properties#info_.2F_in) + + Use the 'page' parameter to specify the Wikipedia page(s) + """ + + url_params = { + "action": "query", + "prop": "info", + "format": self.return_format, + "inprop": "watchers", + "titles": page, + } + return self.endpoint, url_params + + def get_revisions(self, page="Main_Page"): + """ + Get the url corresponding to the list of revisions of a given page. + (https://www.mediawiki.org/wiki/API:Properties#revisions_.2F_rv) + + Use the 'page' parameter to specify the Wikipedia page(s) + + The namespace is used to restrict the results to a certain level. It + can be (Main) which is the default one, "Wikipedia", "File" or + others. It will be converted to an int corresponding to the pcnamespace + parameter. See https://meta.wikimedia.org/wiki/Help:Namespace + """ + + url_params = { + "action": "query", + "prop": "revisions", + "format": self.return_format, + "rvprop": "ids", + "titles": page, + } + return self.endpoint, url_params + + def get_pages_around(self, location="48.8567|2.3508", + radius=10000, namespace="(Main)"): + """ + Get the url corresponding to the pages around coordinates which by + default are the coordinates of paris. + (https://www.mediawiki.org/wiki/Extension:GeoData#list.3Dgeosearch) + + Use the 'location' parameter to specify the coordinates + + The namespace is used to restrict the results to a certain level. It + can be (Main) which is the default one, "Wikipedia", "File" or + others. It will be converted to an int corresponding to the pcnamespace + parameter. See https://meta.wikimedia.org/wiki/Help:Namespace + """ + + url_params = { + "action": "query", + "list": "geosearch", + "format": self.return_format, + "gscoord": location, + "gsradius": radius, + } + return self.endpoint, url_params -- cgit v1.2.3