From 362f22b854c2c2e74fc19fd4840dfb5392d0198b Mon Sep 17 00:00:00 2001 From: Pacien TRAN-GIRARD Date: Wed, 22 Oct 2014 10:19:43 +0200 Subject: Generate query URL in a function --- src/downloader/__init__.py | 88 ++++++++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 39 deletions(-) (limited to 'src/downloader/__init__.py') diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py index 15fba41..3a02ebe 100644 --- a/src/downloader/__init__.py +++ b/src/downloader/__init__.py @@ -11,6 +11,7 @@ import urllib.parse class Downloader(): """Class used to download a given webpage considering system proxy""" + def __init__(self): """ Downloader class constructor """ self.proxy = urllib.request.ProxyHandler(urllib.request.getproxies()) @@ -31,6 +32,39 @@ class WikimediaAPI(): """ Class used to generate wikimedia API urls for several uses """ + + RCNAMESPACES_CODES = { + "(Main)": "0", + "Talk": "1", + "User talk": "2", + "Wikipedia": "3", + "Wikipedia talk": "4", + "File": "5", + "File talk": "6", + "MediaWiki": "7", + "MediaWiki talk": "8", + "Template": "9", + "Template talk": "10", + "Help": "11", + "Help talk": "12", + "Category": "13", + "Category talk": "14", + # Custom Wikipedia namespaces + "Portal": "100", + "Portal talk": "101", + "Book": "108", + "Book talk": "109", + "Draft": "118", + "Draft talk": "119", + "Education Program": "446", + "Education Program talk": "447", + "TimedText": "710", + "TimedText talk": "711", + "Module": "828", + "Module talk": "829", + "Topic": "2600" + } + def __init__(self, endpoint="http://en.wikipedia.org/w/api.php", return_format="json"): """ @@ -46,6 +80,18 @@ class WikimediaAPI(): self.endpoint = endpoint self.return_format = return_format + def gen_query_url(self, parms): + """ + Generate the query URL. + + :param parms: URL parameters dict + :return: query URL + """ + parms["action"] = "query" + parms["format"] = self.return_format + parms_str = urllib.parse.urlencode(parms) + return urllib.parse.urljoin(self.endpoint, "?" + parms_str) + def get_recent_changes(self, namespace="(Main)"): """ Get the url corresponding to the latest changes made to the wiki. @@ -56,43 +102,7 @@ class WikimediaAPI(): others. It will be converted to an int corresponding to the rcnamespace parameter. See https://meta.wikimedia.org/wiki/Help:Namespace """ - rcnamespaces = { - "(Main)": "0", - "Talk": "1", - "User talk": "2", - "Wikipedia": "3", - "Wikipedia talk": "4", - "File": "5", - "File talk": "6", - "MediaWiki": "7", - "MediaWiki talk": "8", - "Template": "9", - "Template talk": "10", - "Help": "11", - "Help talk": "12", - "Category": "13", - "Category talk": "14", - # Custom Wikipedia namespaces - "Portal": "100", - "Portal talk": "101", - "Book": "108", - "Book talk": "109", - "Draft": "118", - "Draft talk": "119", - "Education Program": "446", - "Education Program talk": "447", - "TimedText": "710", - "TimedText talk": "711", - "Module": "828", - "Module talk": "829", - "Topic": "2600" - } - - url_params = { - "action": "query", + return self.gen_query_url({ "list": "recentchanges", - "format": self.return_format, - "rcnamespace": rcnamespaces[namespace], - } - url_params_str = urllib.parse.urlencode(url_params) - return urllib.parse.urljoin(self.endpoint, "?" + url_params_str) + "rcnamespace": WikimediaAPI.RCNAMESPACES_CODES[namespace], + }) -- cgit v1.2.3