""" Init file of the downloader module. The downloader module is used to take care of the downloading part of the program, including manipulation of the wikimedia API. """ import urllib.request class Downloader(): """Class used to download a given webpage considering system proxy""" def __init__(self): """ Downloader class constructor """ self.proxy = urllib.request.ProxyHandler(urllib.request.getproxies()) self.opener = urllib.request.build_opener(self.proxy) urllib.request.install_opener(self.opener) def download(self, url): """ Download the given URL and return the source code """ return urllib.request.urlopen(url).read().decode("utf8") def download_in_file(self, url, output_file_path): """ Download the given URL and write to the given file """ with open(output_file_path, "w") as output_file: output_file.write(self.download(url)) class WikimediaAPI(): """ Class used to generate wikimedia API urls for several uses """ def __init__(self, endpoint="http://en.wikipedia.org/w/api.php", return_format="json"): """ WikimediaAPI class constructor The endpoint for this project should be "http://en.wikipedia.org/w/api.php" but it can be any other wiki api endpoint made with the Wikimedia software. The return_format can be one of json, php, wddx, xml, yaml, raw, txt, dbg, dump or none. """ self.endpoint = endpoint self.return_format = return_format def get_recent_changes(self, namespace="(Main)"): """ Get the url corresponding to the latest changes made to the wiki. (https://www.mediawiki.org/wiki/API:Recentchanges) The namespace is used to restrict the results to a certain level. It can be (Main) which is the default one, "Wikipedia", "File" or others. It will be converted to an int corresponding to the rcnamespace parameter. See https://meta.wikimedia.org/wiki/Help:Namespace """ rcnamespaces = { "(Main)": "0", "Talk": "1", "User talk": "2", "Wikipedia": "3", "Wikipedia talk": "4", "File": "5", "File talk": "6", "MediaWiki": "7", "MediaWiki talk": "8", "Template": "9", "Template talk": "10", "Help": "11", "Help talk": "12", "Category": "13", "Category talk": "14", # Custom Wikipedia namespaces "Portal": "100", "Portal talk": "101", "Book": "108", "Book talk": "109", "Draft": "118", "Draft talk": "119", "Education Program": "446", "Education Program talk": "447", "TimedText": "710", "TimedText talk": "711", "Module": "828", "Module talk": "829", "Topic": "2600" } return self.endpoint + "?action=query&list=recentchanges&format="\ + self.return_format + "&namespace=" + rcnamespaces[namespace]