diff options
author | Pacien TRAN-GIRARD | 2014-10-22 10:46:11 +0200 |
---|---|---|
committer | Pacien TRAN-GIRARD | 2014-10-22 10:46:11 +0200 |
commit | 413f7faa5b3235ea0e00b68132b7e92917831dee (patch) | |
tree | c2269ef3ad8fc801a0234b61f68f487aadd34dda /src/downloader | |
parent | 046384051a6fd2aeab692263e9d533d363e45572 (diff) | |
parent | 1ae5575c25242c538d30bd303092f99b9e78b716 (diff) | |
download | wikistats-413f7faa5b3235ea0e00b68132b7e92917831dee.tar.gz |
Merge remote-tracking branch 'origin/master'
Resolved conflicts:
src/downloader/__init__.py
Diffstat (limited to 'src/downloader')
-rw-r--r-- | src/downloader/__init__.py | 108 |
1 files changed, 39 insertions, 69 deletions
diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py index f256ddc..15fba41 100644 --- a/src/downloader/__init__.py +++ b/src/downloader/__init__.py | |||
@@ -11,7 +11,6 @@ import urllib.parse | |||
11 | 11 | ||
12 | class Downloader(): | 12 | class Downloader(): |
13 | """Class used to download a given webpage considering system proxy""" | 13 | """Class used to download a given webpage considering system proxy""" |
14 | |||
15 | def __init__(self): | 14 | def __init__(self): |
16 | """ Downloader class constructor """ | 15 | """ Downloader class constructor """ |
17 | self.proxy = urllib.request.ProxyHandler(urllib.request.getproxies()) | 16 | self.proxy = urllib.request.ProxyHandler(urllib.request.getproxies()) |
@@ -32,39 +31,6 @@ class WikimediaAPI(): | |||
32 | """ | 31 | """ |
33 | Class used to generate wikimedia API urls for several uses | 32 | Class used to generate wikimedia API urls for several uses |
34 | """ | 33 | """ |
35 | |||
36 | RCNAMESPACES_CODES = { | ||
37 | "(Main)": "0", | ||
38 | "Talk": "1", | ||
39 | "User talk": "2", | ||
40 | "Wikipedia": "3", | ||
41 | "Wikipedia talk": "4", | ||
42 | "File": "5", | ||
43 | "File talk": "6", | ||
44 | "MediaWiki": "7", | ||
45 | "MediaWiki talk": "8", | ||
46 | "Template": "9", | ||
47 | "Template talk": "10", | ||
48 | "Help": "11", | ||
49 | "Help talk": "12", | ||
50 | "Category": "13", | ||
51 | "Category talk": "14", | ||
52 | # Custom Wikipedia namespaces | ||
53 | "Portal": "100", | ||
54 | "Portal talk": "101", | ||
55 | "Book": "108", | ||
56 | "Book talk": "109", | ||
57 | "Draft": "118", | ||
58 | "Draft talk": "119", | ||
59 | "Education Program": "446", | ||
60 | "Education Program talk": "447", | ||
61 | "TimedText": "710", | ||
62 | "TimedText talk": "711", | ||
63 | "Module": "828", | ||
64 | "Module talk": "829", | ||
65 | "Topic": "2600" | ||
66 | } | ||
67 | |||
68 | def __init__(self, endpoint="http://en.wikipedia.org/w/api.php", | 34 | def __init__(self, endpoint="http://en.wikipedia.org/w/api.php", |
69 | return_format="json"): | 35 | return_format="json"): |
70 | """ | 36 | """ |
@@ -80,18 +46,6 @@ class WikimediaAPI(): | |||
80 | self.endpoint = endpoint | 46 | self.endpoint = endpoint |
81 | self.return_format = return_format | 47 | self.return_format = return_format |
82 | 48 | ||
83 | def gen_query_url(self, parms): | ||
84 | """ | ||
85 | Generate the query URL. | ||
86 | |||
87 | :param parms: URL parameters dict | ||
88 | :return: query URL | ||
89 | """ | ||
90 | parms["action"] = "query" | ||
91 | parms["format"] = self.return_format | ||
92 | parms_str = urllib.parse.urlencode(parms) | ||
93 | return urllib.parse.urljoin(self.endpoint, "?" + parms_str) | ||
94 | |||
95 | def get_recent_changes(self, namespace="(Main)"): | 49 | def get_recent_changes(self, namespace="(Main)"): |
96 | """ | 50 | """ |
97 | Get the url corresponding to the latest changes made to the wiki. | 51 | Get the url corresponding to the latest changes made to the wiki. |
@@ -102,27 +56,43 @@ class WikimediaAPI(): | |||
102 | others. It will be converted to an int corresponding to the rcnamespace | 56 | others. It will be converted to an int corresponding to the rcnamespace |
103 | parameter. See https://meta.wikimedia.org/wiki/Help:Namespace | 57 | parameter. See https://meta.wikimedia.org/wiki/Help:Namespace |
104 | """ | 58 | """ |
105 | return self.gen_query_url({ | 59 | rcnamespaces = { |
60 | "(Main)": "0", | ||
61 | "Talk": "1", | ||
62 | "User talk": "2", | ||
63 | "Wikipedia": "3", | ||
64 | "Wikipedia talk": "4", | ||
65 | "File": "5", | ||
66 | "File talk": "6", | ||
67 | "MediaWiki": "7", | ||
68 | "MediaWiki talk": "8", | ||
69 | "Template": "9", | ||
70 | "Template talk": "10", | ||
71 | "Help": "11", | ||
72 | "Help talk": "12", | ||
73 | "Category": "13", | ||
74 | "Category talk": "14", | ||
75 | # Custom Wikipedia namespaces | ||
76 | "Portal": "100", | ||
77 | "Portal talk": "101", | ||
78 | "Book": "108", | ||
79 | "Book talk": "109", | ||
80 | "Draft": "118", | ||
81 | "Draft talk": "119", | ||
82 | "Education Program": "446", | ||
83 | "Education Program talk": "447", | ||
84 | "TimedText": "710", | ||
85 | "TimedText talk": "711", | ||
86 | "Module": "828", | ||
87 | "Module talk": "829", | ||
88 | "Topic": "2600" | ||
89 | } | ||
90 | |||
91 | url_params = { | ||
92 | "action": "query", | ||
106 | "list": "recentchanges", | 93 | "list": "recentchanges", |
107 | "rcnamespace": WikimediaAPI.RCNAMESPACES_CODES[namespace], | 94 | "format": self.return_format, |
108 | }) | 95 | "rcnamespace": rcnamespaces[namespace], |
109 | 96 | } | |
110 | def get_geo_pages(self, lat, long, radius, limit=500, namespace="(Main)"): | 97 | url_params_str = urllib.parse.urlencode(url_params) |
111 | """ | 98 | return urllib.parse.urljoin(self.endpoint, "?" + url_params_str) |
112 | Generate the query URL performing a geographic search of articles | ||
113 | located near the given coordinates. | ||
114 | |||
115 | :param lat: Latitude | ||
116 | :param long: Longitude | ||
117 | :param radius: radius (in m) to look inside | ||
118 | :param limit: maximum number of results (max 500) | ||
119 | :param namespace: namespace to restrict the search in | ||
120 | :return: | ||
121 | """ | ||
122 | return self.gen_query_url({ | ||
123 | "list": "geosearch", | ||
124 | "gscoord": "%f|%f" % (lat, long), | ||
125 | "gsradius": "%f" % radius, | ||
126 | "gsnamespace": WikimediaAPI.RCNAMESPACES_CODES[namespace], | ||
127 | "gslimit": limit, | ||
128 | }) | ||