summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/downloader/__init__.py88
1 files changed, 49 insertions, 39 deletions
diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py
index 15fba41..3a02ebe 100644
--- a/src/downloader/__init__.py
+++ b/src/downloader/__init__.py
@@ -11,6 +11,7 @@ import urllib.parse
11 11
12class Downloader(): 12class Downloader():
13 """Class used to download a given webpage considering system proxy""" 13 """Class used to download a given webpage considering system proxy"""
14
14 def __init__(self): 15 def __init__(self):
15 """ Downloader class constructor """ 16 """ Downloader class constructor """
16 self.proxy = urllib.request.ProxyHandler(urllib.request.getproxies()) 17 self.proxy = urllib.request.ProxyHandler(urllib.request.getproxies())
@@ -31,6 +32,39 @@ class WikimediaAPI():
31 """ 32 """
32 Class used to generate wikimedia API urls for several uses 33 Class used to generate wikimedia API urls for several uses
33 """ 34 """
35
36 RCNAMESPACES_CODES = {
37 "(Main)": "0",
38 "Talk": "1",
39 "User talk": "2",
40 "Wikipedia": "3",
41 "Wikipedia talk": "4",
42 "File": "5",
43 "File talk": "6",
44 "MediaWiki": "7",
45 "MediaWiki talk": "8",
46 "Template": "9",
47 "Template talk": "10",
48 "Help": "11",
49 "Help talk": "12",
50 "Category": "13",
51 "Category talk": "14",
52 # Custom Wikipedia namespaces
53 "Portal": "100",
54 "Portal talk": "101",
55 "Book": "108",
56 "Book talk": "109",
57 "Draft": "118",
58 "Draft talk": "119",
59 "Education Program": "446",
60 "Education Program talk": "447",
61 "TimedText": "710",
62 "TimedText talk": "711",
63 "Module": "828",
64 "Module talk": "829",
65 "Topic": "2600"
66 }
67
34 def __init__(self, endpoint="http://en.wikipedia.org/w/api.php", 68 def __init__(self, endpoint="http://en.wikipedia.org/w/api.php",
35 return_format="json"): 69 return_format="json"):
36 """ 70 """
@@ -46,6 +80,18 @@ class WikimediaAPI():
46 self.endpoint = endpoint 80 self.endpoint = endpoint
47 self.return_format = return_format 81 self.return_format = return_format
48 82
83 def gen_query_url(self, parms):
84 """
85 Generate the query URL.
86
87 :param parms: URL parameters dict
88 :return: query URL
89 """
90 parms["action"] = "query"
91 parms["format"] = self.return_format
92 parms_str = urllib.parse.urlencode(parms)
93 return urllib.parse.urljoin(self.endpoint, "?" + parms_str)
94
49 def get_recent_changes(self, namespace="(Main)"): 95 def get_recent_changes(self, namespace="(Main)"):
50 """ 96 """
51 Get the url corresponding to the latest changes made to the wiki. 97 Get the url corresponding to the latest changes made to the wiki.
@@ -56,43 +102,7 @@ class WikimediaAPI():
56 others. It will be converted to an int corresponding to the rcnamespace 102 others. It will be converted to an int corresponding to the rcnamespace
57 parameter. See https://meta.wikimedia.org/wiki/Help:Namespace 103 parameter. See https://meta.wikimedia.org/wiki/Help:Namespace
58 """ 104 """
59 rcnamespaces = { 105 return self.gen_query_url({
60 "(Main)": "0",
61 "Talk": "1",
62 "User talk": "2",
63 "Wikipedia": "3",
64 "Wikipedia talk": "4",
65 "File": "5",
66 "File talk": "6",
67 "MediaWiki": "7",
68 "MediaWiki talk": "8",
69 "Template": "9",
70 "Template talk": "10",
71 "Help": "11",
72 "Help talk": "12",
73 "Category": "13",
74 "Category talk": "14",
75 # Custom Wikipedia namespaces
76 "Portal": "100",
77 "Portal talk": "101",
78 "Book": "108",
79 "Book talk": "109",
80 "Draft": "118",
81 "Draft talk": "119",
82 "Education Program": "446",
83 "Education Program talk": "447",
84 "TimedText": "710",
85 "TimedText talk": "711",
86 "Module": "828",
87 "Module talk": "829",
88 "Topic": "2600"
89 }
90
91 url_params = {
92 "action": "query",
93 "list": "recentchanges", 106 "list": "recentchanges",
94 "format": self.return_format, 107 "rcnamespace": WikimediaAPI.RCNAMESPACES_CODES[namespace],
95 "rcnamespace": rcnamespaces[namespace], 108 })
96 }
97 url_params_str = urllib.parse.urlencode(url_params)
98 return urllib.parse.urljoin(self.endpoint, "?" + url_params_str)