summaryrefslogtreecommitdiff
path: root/src/downloader
diff options
context:
space:
mode:
authorMinijackson2014-10-23 18:50:44 +0200
committerMinijackson2014-10-23 18:50:44 +0200
commit25682dfbad10996b2e23ccbb3fef87636d919c03 (patch)
tree07712c97be51c2963c443dfa3d88afefcf5ae771 /src/downloader
parentceb12bbde074e7de52619508a8bc3ebacf0b4f85 (diff)
downloadwikistats-25682dfbad10996b2e23ccbb3fef87636d919c03.tar.gz
Removing interpreter + Adding watchers + revisions + geosearch
Diffstat (limited to 'src/downloader')
-rw-r--r--src/downloader/__init__.py71
1 files changed, 68 insertions, 3 deletions
diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py
index 3c7706f..baabb36 100644
--- a/src/downloader/__init__.py
+++ b/src/downloader/__init__.py
@@ -20,12 +20,13 @@ class Downloader():
20 self.data_types = { 20 self.data_types = {
21 "revisions": "rv", 21 "revisions": "rv",
22 "recentchanges": "rc", 22 "recentchanges": "rc",
23 "contributors": "pc" 23 "contributors": "pc",
24 "geosearch": "gs",
24 } 25 }
25 26
26 def download(self, endpoint, data_type, params={}, limit=0): 27 def download(self, endpoint, data_type, params={}, limit=0):
27 """ 28 """
28 Download the given URL with POST parameters and return the source code 29 Download the given URL with GET parameters and return the source code
29 with rccontinue support. If limit equals 0, then it suppose that the 30 with rccontinue support. If limit equals 0, then it suppose that the
30 limit is given whithin the paramaters 31 limit is given whithin the paramaters
31 """ 32 """
@@ -56,11 +57,12 @@ class Downloader():
56 limit = 0 57 limit = 0
57 58
58 temp_result = self.download(endpoint, data_type, params, temp_limit) 59 temp_result = self.download(endpoint, data_type, params, temp_limit)
60 print(list(temp_result[0].keys()))
59 if "query-continue" in temp_result[0].keys(): 61 if "query-continue" in temp_result[0].keys():
60 params[continue_name] = temp_result[0]["query-continue"][data_type][continue_name] 62 params[continue_name] = temp_result[0]["query-continue"][data_type][continue_name]
61 else: 63 else:
62 limit = 0 64 limit = 0
63 results.append(temp_result) 65 results.append(temp_result[0])
64 return results 66 return results
65 67
66 def download_in_file(self, url, output_file_path): 68 def download_in_file(self, url, output_file_path):
@@ -163,3 +165,66 @@ class WikimediaAPI():
163 "titles": page, 165 "titles": page,
164 } 166 }
165 return self.endpoint, url_params 167 return self.endpoint, url_params
168
169 def get_watchers(self, page="Main_Page"):
170 """
171 Get the url corresponding to the list of contributors of a given page.
172 (https://www.mediawiki.org/wiki/API:Properties#info_.2F_in)
173
174 Use the 'page' parameter to specify the Wikipedia page(s)
175 """
176
177 url_params = {
178 "action": "query",
179 "prop": "info",
180 "format": self.return_format,
181 "inprop": "watchers",
182 "titles": page,
183 }
184 return self.endpoint, url_params
185
186 def get_revisions(self, page="Main_Page"):
187 """
188 Get the url corresponding to the list of revisions of a given page.
189 (https://www.mediawiki.org/wiki/API:Properties#revisions_.2F_rv)
190
191 Use the 'page' parameter to specify the Wikipedia page(s)
192
193 The namespace is used to restrict the results to a certain level. It
194 can be (Main) which is the default one, "Wikipedia", "File" or
195 others. It will be converted to an int corresponding to the pcnamespace
196 parameter. See https://meta.wikimedia.org/wiki/Help:Namespace
197 """
198
199 url_params = {
200 "action": "query",
201 "prop": "revisions",
202 "format": self.return_format,
203 "rvprop": "ids",
204 "titles": page,
205 }
206 return self.endpoint, url_params
207
208 def get_pages_around(self, location="48.8567|2.3508",
209 radius=10000, namespace="(Main)"):
210 """
211 Get the url corresponding to the pages around coordinates which by
212 default are the coordinates of paris.
213 (https://www.mediawiki.org/wiki/Extension:GeoData#list.3Dgeosearch)
214
215 Use the 'location' parameter to specify the coordinates
216
217 The namespace is used to restrict the results to a certain level. It
218 can be (Main) which is the default one, "Wikipedia", "File" or
219 others. It will be converted to an int corresponding to the pcnamespace
220 parameter. See https://meta.wikimedia.org/wiki/Help:Namespace
221 """
222
223 url_params = {
224 "action": "query",
225 "list": "geosearch",
226 "format": self.return_format,
227 "gscoord": location,
228 "gsradius": radius,
229 }
230 return self.endpoint, url_params