diff options
-rw-r--r-- | src/downloader/__init__.py | 64 |
1 files changed, 50 insertions, 14 deletions
diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py index cfcf20a..4dd6169 100644 --- a/src/downloader/__init__.py +++ b/src/downloader/__init__.py | |||
@@ -6,15 +6,13 @@ program, including manipulation of the wikimedia API. | |||
6 | """ | 6 | """ |
7 | 7 | ||
8 | import urllib.request | 8 | import urllib.request |
9 | # For system proxy | ||
10 | import os | ||
11 | 9 | ||
12 | 10 | ||
13 | class Downloader(): | 11 | class Downloader(): |
14 | """Class used to download a given webpage considering system proxy""" | 12 | """Class used to download a given webpage considering system proxy""" |
15 | def __init__(self): | 13 | def __init__(self): |
16 | self.proxy_address = os.environ.get("HTTP_Proxy") | 14 | """ Downloader class constructor """ |
17 | self.proxy = urllib.request.ProxyHandler({'http': self.proxy_address}) | 15 | self.proxy = urllib.request.ProxyHandler(urllib.request.getproxies()) |
18 | self.opener = urllib.request.build_opener(self.proxy) | 16 | self.opener = urllib.request.build_opener(self.proxy) |
19 | urllib.request.install_opener(self.opener) | 17 | urllib.request.install_opener(self.opener) |
20 | 18 | ||
@@ -31,13 +29,19 @@ class Downloader(): | |||
31 | class WikimediaAPI(): | 29 | class WikimediaAPI(): |
32 | """ | 30 | """ |
33 | Class used to generate wikimedia API urls for several uses | 31 | Class used to generate wikimedia API urls for several uses |
34 | |||
35 | The endpoint for this project should be "http://en.wikipedia.org/w/api.php" | ||
36 | but can be other wiki api endpoint made with the Wikimedia software. | ||
37 | The return_format can be one of json, php, wddx, xml, yaml, raw, txt, dbg, | ||
38 | dump or none. | ||
39 | """ | 32 | """ |
40 | def __init__(self, endpoint, return_format): | 33 | def __init__(self, endpoint="http://en.wikipedia.org/w/api.php", |
34 | return_format="json"): | ||
35 | """ | ||
36 | WikimediaAPI class constructor | ||
37 | |||
38 | The endpoint for this project should be | ||
39 | "http://en.wikipedia.org/w/api.php" but it can be any other wiki | ||
40 | api endpoint made with the Wikimedia software. | ||
41 | |||
42 | The return_format can be one of json, php, wddx, xml, yaml, raw, txt, | ||
43 | dbg, dump or none. | ||
44 | """ | ||
41 | self.endpoint = endpoint | 45 | self.endpoint = endpoint |
42 | self.return_format = return_format | 46 | self.return_format = return_format |
43 | 47 | ||
@@ -47,8 +51,40 @@ class WikimediaAPI(): | |||
47 | (https://www.mediawiki.org/wiki/API:Recentchanges) | 51 | (https://www.mediawiki.org/wiki/API:Recentchanges) |
48 | 52 | ||
49 | The namespace is used to restrict the results to a certain level. It | 53 | The namespace is used to restrict the results to a certain level. It |
50 | can be "(Main)" which is the default one, "Wikipedia", "File" or | 54 | can be (Main) which is the default one, "Wikipedia", "File" or |
51 | others. See https://meta.wikimedia.org/wiki/Help:Namespace | 55 | others. It will be converted to an int corresponding to the rcnamespace |
56 | parameter. See https://meta.wikimedia.org/wiki/Help:Namespace | ||
52 | """ | 57 | """ |
53 | return self.base_url + "?action=query&list=recentchanges&format="\ | 58 | rcnamespaces = { |
54 | + self.return_format + "&namespace=" + namespace | 59 | "(Main)": "0", |
60 | "Talk": "1", | ||
61 | "User talk": "2", | ||
62 | "Wikipedia": "3", | ||
63 | "Wikipedia talk": "4", | ||
64 | "File": "5", | ||
65 | "File talk": "6", | ||
66 | "MediaWiki": "7", | ||
67 | "MediaWiki talk": "8", | ||
68 | "Template": "9", | ||
69 | "Template talk": "10", | ||
70 | "Help": "11", | ||
71 | "Help talk": "12", | ||
72 | "Category": "13", | ||
73 | "Category talk": "14", | ||
74 | # Custom Wikipedia namespaces | ||
75 | "Portal": "100", | ||
76 | "Portal talk": "101", | ||
77 | "Book": "108", | ||
78 | "Book talk": "109", | ||
79 | "Draft": "118", | ||
80 | "Draft talk": "119", | ||
81 | "Education Program": "446", | ||
82 | "Education Program talk": "447", | ||
83 | "TimedText": "710", | ||
84 | "TimedText talk": "711", | ||
85 | "Module": "828", | ||
86 | "Module talk": "829", | ||
87 | "Topic": "2600" | ||
88 | } | ||
89 | return self.endpoint + "?action=query&list=recentchanges&format="\ | ||
90 | + self.return_format + "&namespace=" + rcnamespaces[namespace] | ||