diff options
author | Minijackson | 2014-10-23 18:50:44 +0200 |
---|---|---|
committer | Minijackson | 2014-10-23 18:50:44 +0200 |
commit | 25682dfbad10996b2e23ccbb3fef87636d919c03 (patch) | |
tree | 07712c97be51c2963c443dfa3d88afefcf5ae771 /src/downloader | |
parent | ceb12bbde074e7de52619508a8bc3ebacf0b4f85 (diff) | |
download | wikistats-25682dfbad10996b2e23ccbb3fef87636d919c03.tar.gz |
Removing interpreter + Adding watchers + revisions + geosearch
Diffstat (limited to 'src/downloader')
-rw-r--r-- | src/downloader/__init__.py | 71 |
1 files changed, 68 insertions, 3 deletions
diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py index 3c7706f..baabb36 100644 --- a/src/downloader/__init__.py +++ b/src/downloader/__init__.py | |||
@@ -20,12 +20,13 @@ class Downloader(): | |||
20 | self.data_types = { | 20 | self.data_types = { |
21 | "revisions": "rv", | 21 | "revisions": "rv", |
22 | "recentchanges": "rc", | 22 | "recentchanges": "rc", |
23 | "contributors": "pc" | 23 | "contributors": "pc", |
24 | "geosearch": "gs", | ||
24 | } | 25 | } |
25 | 26 | ||
26 | def download(self, endpoint, data_type, params={}, limit=0): | 27 | def download(self, endpoint, data_type, params={}, limit=0): |
27 | """ | 28 | """ |
28 | Download the given URL with POST parameters and return the source code | 29 | Download the given URL with GET parameters and return the source code |
29 | with rccontinue support. If limit equals 0, then it suppose that the | 30 | with rccontinue support. If limit equals 0, then it suppose that the |
30 | limit is given whithin the paramaters | 31 | limit is given whithin the paramaters |
31 | """ | 32 | """ |
@@ -56,11 +57,12 @@ class Downloader(): | |||
56 | limit = 0 | 57 | limit = 0 |
57 | 58 | ||
58 | temp_result = self.download(endpoint, data_type, params, temp_limit) | 59 | temp_result = self.download(endpoint, data_type, params, temp_limit) |
60 | print(list(temp_result[0].keys())) | ||
59 | if "query-continue" in temp_result[0].keys(): | 61 | if "query-continue" in temp_result[0].keys(): |
60 | params[continue_name] = temp_result[0]["query-continue"][data_type][continue_name] | 62 | params[continue_name] = temp_result[0]["query-continue"][data_type][continue_name] |
61 | else: | 63 | else: |
62 | limit = 0 | 64 | limit = 0 |
63 | results.append(temp_result) | 65 | results.append(temp_result[0]) |
64 | return results | 66 | return results |
65 | 67 | ||
66 | def download_in_file(self, url, output_file_path): | 68 | def download_in_file(self, url, output_file_path): |
@@ -163,3 +165,66 @@ class WikimediaAPI(): | |||
163 | "titles": page, | 165 | "titles": page, |
164 | } | 166 | } |
165 | return self.endpoint, url_params | 167 | return self.endpoint, url_params |
168 | |||
169 | def get_watchers(self, page="Main_Page"): | ||
170 | """ | ||
171 | Get the url corresponding to the list of contributors of a given page. | ||
172 | (https://www.mediawiki.org/wiki/API:Properties#info_.2F_in) | ||
173 | |||
174 | Use the 'page' parameter to specify the Wikipedia page(s) | ||
175 | """ | ||
176 | |||
177 | url_params = { | ||
178 | "action": "query", | ||
179 | "prop": "info", | ||
180 | "format": self.return_format, | ||
181 | "inprop": "watchers", | ||
182 | "titles": page, | ||
183 | } | ||
184 | return self.endpoint, url_params | ||
185 | |||
186 | def get_revisions(self, page="Main_Page"): | ||
187 | """ | ||
188 | Get the url corresponding to the list of revisions of a given page. | ||
189 | (https://www.mediawiki.org/wiki/API:Properties#revisions_.2F_rv) | ||
190 | |||
191 | Use the 'page' parameter to specify the Wikipedia page(s) | ||
192 | |||
193 | The namespace is used to restrict the results to a certain level. It | ||
194 | can be (Main) which is the default one, "Wikipedia", "File" or | ||
195 | others. It will be converted to an int corresponding to the pcnamespace | ||
196 | parameter. See https://meta.wikimedia.org/wiki/Help:Namespace | ||
197 | """ | ||
198 | |||
199 | url_params = { | ||
200 | "action": "query", | ||
201 | "prop": "revisions", | ||
202 | "format": self.return_format, | ||
203 | "rvprop": "ids", | ||
204 | "titles": page, | ||
205 | } | ||
206 | return self.endpoint, url_params | ||
207 | |||
208 | def get_pages_around(self, location="48.8567|2.3508", | ||
209 | radius=10000, namespace="(Main)"): | ||
210 | """ | ||
211 | Get the url corresponding to the pages around coordinates which by | ||
212 | default are the coordinates of paris. | ||
213 | (https://www.mediawiki.org/wiki/Extension:GeoData#list.3Dgeosearch) | ||
214 | |||
215 | Use the 'location' parameter to specify the coordinates | ||
216 | |||
217 | The namespace is used to restrict the results to a certain level. It | ||
218 | can be (Main) which is the default one, "Wikipedia", "File" or | ||
219 | others. It will be converted to an int corresponding to the pcnamespace | ||
220 | parameter. See https://meta.wikimedia.org/wiki/Help:Namespace | ||
221 | """ | ||
222 | |||
223 | url_params = { | ||
224 | "action": "query", | ||
225 | "list": "geosearch", | ||
226 | "format": self.return_format, | ||
227 | "gscoord": location, | ||
228 | "gsradius": radius, | ||
229 | } | ||
230 | return self.endpoint, url_params | ||