diff options
Diffstat (limited to 'src/downloader')
-rw-r--r-- | src/downloader/__init__.py | 88 |
1 files changed, 49 insertions, 39 deletions
diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py index 15fba41..3a02ebe 100644 --- a/src/downloader/__init__.py +++ b/src/downloader/__init__.py | |||
@@ -11,6 +11,7 @@ import urllib.parse | |||
11 | 11 | ||
12 | class Downloader(): | 12 | class Downloader(): |
13 | """Class used to download a given webpage considering system proxy""" | 13 | """Class used to download a given webpage considering system proxy""" |
14 | |||
14 | def __init__(self): | 15 | def __init__(self): |
15 | """ Downloader class constructor """ | 16 | """ Downloader class constructor """ |
16 | self.proxy = urllib.request.ProxyHandler(urllib.request.getproxies()) | 17 | self.proxy = urllib.request.ProxyHandler(urllib.request.getproxies()) |
@@ -31,6 +32,39 @@ class WikimediaAPI(): | |||
31 | """ | 32 | """ |
32 | Class used to generate wikimedia API urls for several uses | 33 | Class used to generate wikimedia API urls for several uses |
33 | """ | 34 | """ |
35 | |||
36 | RCNAMESPACES_CODES = { | ||
37 | "(Main)": "0", | ||
38 | "Talk": "1", | ||
39 | "User talk": "2", | ||
40 | "Wikipedia": "3", | ||
41 | "Wikipedia talk": "4", | ||
42 | "File": "5", | ||
43 | "File talk": "6", | ||
44 | "MediaWiki": "7", | ||
45 | "MediaWiki talk": "8", | ||
46 | "Template": "9", | ||
47 | "Template talk": "10", | ||
48 | "Help": "11", | ||
49 | "Help talk": "12", | ||
50 | "Category": "13", | ||
51 | "Category talk": "14", | ||
52 | # Custom Wikipedia namespaces | ||
53 | "Portal": "100", | ||
54 | "Portal talk": "101", | ||
55 | "Book": "108", | ||
56 | "Book talk": "109", | ||
57 | "Draft": "118", | ||
58 | "Draft talk": "119", | ||
59 | "Education Program": "446", | ||
60 | "Education Program talk": "447", | ||
61 | "TimedText": "710", | ||
62 | "TimedText talk": "711", | ||
63 | "Module": "828", | ||
64 | "Module talk": "829", | ||
65 | "Topic": "2600" | ||
66 | } | ||
67 | |||
34 | def __init__(self, endpoint="http://en.wikipedia.org/w/api.php", | 68 | def __init__(self, endpoint="http://en.wikipedia.org/w/api.php", |
35 | return_format="json"): | 69 | return_format="json"): |
36 | """ | 70 | """ |
@@ -46,6 +80,18 @@ class WikimediaAPI(): | |||
46 | self.endpoint = endpoint | 80 | self.endpoint = endpoint |
47 | self.return_format = return_format | 81 | self.return_format = return_format |
48 | 82 | ||
83 | def gen_query_url(self, parms): | ||
84 | """ | ||
85 | Generate the query URL. | ||
86 | |||
87 | :param parms: URL parameters dict | ||
88 | :return: query URL | ||
89 | """ | ||
90 | parms["action"] = "query" | ||
91 | parms["format"] = self.return_format | ||
92 | parms_str = urllib.parse.urlencode(parms) | ||
93 | return urllib.parse.urljoin(self.endpoint, "?" + parms_str) | ||
94 | |||
49 | def get_recent_changes(self, namespace="(Main)"): | 95 | def get_recent_changes(self, namespace="(Main)"): |
50 | """ | 96 | """ |
51 | Get the url corresponding to the latest changes made to the wiki. | 97 | Get the url corresponding to the latest changes made to the wiki. |
@@ -56,43 +102,7 @@ class WikimediaAPI(): | |||
56 | others. It will be converted to an int corresponding to the rcnamespace | 102 | others. It will be converted to an int corresponding to the rcnamespace |
57 | parameter. See https://meta.wikimedia.org/wiki/Help:Namespace | 103 | parameter. See https://meta.wikimedia.org/wiki/Help:Namespace |
58 | """ | 104 | """ |
59 | rcnamespaces = { | 105 | return self.gen_query_url({ |
60 | "(Main)": "0", | ||
61 | "Talk": "1", | ||
62 | "User talk": "2", | ||
63 | "Wikipedia": "3", | ||
64 | "Wikipedia talk": "4", | ||
65 | "File": "5", | ||
66 | "File talk": "6", | ||
67 | "MediaWiki": "7", | ||
68 | "MediaWiki talk": "8", | ||
69 | "Template": "9", | ||
70 | "Template talk": "10", | ||
71 | "Help": "11", | ||
72 | "Help talk": "12", | ||
73 | "Category": "13", | ||
74 | "Category talk": "14", | ||
75 | # Custom Wikipedia namespaces | ||
76 | "Portal": "100", | ||
77 | "Portal talk": "101", | ||
78 | "Book": "108", | ||
79 | "Book talk": "109", | ||
80 | "Draft": "118", | ||
81 | "Draft talk": "119", | ||
82 | "Education Program": "446", | ||
83 | "Education Program talk": "447", | ||
84 | "TimedText": "710", | ||
85 | "TimedText talk": "711", | ||
86 | "Module": "828", | ||
87 | "Module talk": "829", | ||
88 | "Topic": "2600" | ||
89 | } | ||
90 | |||
91 | url_params = { | ||
92 | "action": "query", | ||
93 | "list": "recentchanges", | 106 | "list": "recentchanges", |
94 | "format": self.return_format, | 107 | "rcnamespace": WikimediaAPI.RCNAMESPACES_CODES[namespace], |
95 | "rcnamespace": rcnamespaces[namespace], | 108 | }) |
96 | } | ||
97 | url_params_str = urllib.parse.urlencode(url_params) | ||
98 | return urllib.parse.urljoin(self.endpoint, "?" + url_params_str) | ||