From 0decc784b1427b6c188a8e81d9e9344faa7c9a46 Mon Sep 17 00:00:00 2001 From: Minijackson Date: Wed, 15 Oct 2014 10:27:07 +0200 Subject: Movind Downloader and WikimediaAPI classes into __init__.py file + Adding .gitignore --- src/downloader/__init__.py | 54 ++++++++++++++++++++++++++++++++++++++++++++ src/downloader/downloader.py | 26 --------------------- src/downloader/wikimedia.py | 29 ------------------------ 3 files changed, 54 insertions(+), 55 deletions(-) create mode 100644 src/downloader/__init__.py delete mode 100644 src/downloader/downloader.py delete mode 100644 src/downloader/wikimedia.py (limited to 'src/downloader') diff --git a/src/downloader/__init__.py b/src/downloader/__init__.py new file mode 100644 index 0000000..cfcf20a --- /dev/null +++ b/src/downloader/__init__.py @@ -0,0 +1,54 @@ +""" +Init file of the downloader module. + +The downloader module is used to take care of the downloading part of the +program, including manipulation of the wikimedia API. +""" + +import urllib.request +# For system proxy +import os + + +class Downloader(): + """Class used to download a given webpage considering system proxy""" + def __init__(self): + self.proxy_address = os.environ.get("HTTP_Proxy") + self.proxy = urllib.request.ProxyHandler({'http': self.proxy_address}) + self.opener = urllib.request.build_opener(self.proxy) + urllib.request.install_opener(self.opener) + + def download(self, url): + """ Download the given URL and return the source code """ + return urllib.request.urlopen(url).read().decode("utf8") + + def download_in_file(self, url, output_file_path): + """ Download the given URL and write to the given file """ + with open(output_file_path, "w") as output_file: + output_file.write(self.download(url)) + + +class WikimediaAPI(): + """ + Class used to generate wikimedia API urls for several uses + + The endpoint for this project should be "http://en.wikipedia.org/w/api.php" + but can be other wiki api endpoint made with the Wikimedia software. + The return_format can be one of json, php, wddx, xml, yaml, raw, txt, dbg, + dump or none. + """ + def __init__(self, endpoint, return_format): + self.endpoint = endpoint + self.return_format = return_format + + def get_recent_changes(self, namespace="(Main)"): + """ + Get the url corresponding to the latest changes made to the wiki. + (https://www.mediawiki.org/wiki/API:Recentchanges) + + The namespace is used to restrict the results to a certain level. It + can be "(Main)" which is the default one, "Wikipedia", "File" or + others. See https://meta.wikimedia.org/wiki/Help:Namespace + """ + return self.base_url + "?action=query&list=recentchanges&format="\ + + self.return_format + "&namespace=" + namespace diff --git a/src/downloader/downloader.py b/src/downloader/downloader.py deleted file mode 100644 index 740caf8..0000000 --- a/src/downloader/downloader.py +++ /dev/null @@ -1,26 +0,0 @@ -""" -Module used to download a given webpage with several options related to the -wikimedia API -""" - -import urllib.request -# For system proxy -import os - - -class Downloader(): - """Class used to download a given webpage considering system proxy""" - def __init__(self): - self.proxy_address = os.environ.get("HTTP_Proxy") - self.proxy = urllib.request.ProxyHandler({'http': self.proxy_address}) - self.opener = urllib.request.build_opener(self.proxy) - urllib.request.install_opener(self.opener) - - def download(self, url): - """ Download the given URL and return the source code """ - return urllib.request.urlopen(url).read().decode("utf8") - - def download_in_file(self, url, output_file_path): - """ Download the given URL and write to the given file """ - with open(output_file_path, "w") as output_file: - output_file.write(self.download(url)) diff --git a/src/downloader/wikimedia.py b/src/downloader/wikimedia.py deleted file mode 100644 index 1905d0a..0000000 --- a/src/downloader/wikimedia.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Module used to generate wikimedia API urls for several uses -""" - - -class WikimediaAPI(): - """ - Class used to generate wikimedia API urls for several uses - - The endpoint for this project should be "http://en.wikipedia.org/w/api.php" - but can be other wiki api endpoint made with the Wikimedia software. - The return_format can be one of json, php, wddx, xml, yaml, raw, txt, dbg, - dump or none. - """ - def __init__(self, endpoint, return_format): - self.endpoint = endpoint - self.return_format = return_format - - def get_recent_changes(self, namespace="(Main)"): - """ - Get the url corresponding to the latest changes made to the wiki. - (https://www.mediawiki.org/wiki/API:Recentchanges) - - The namespace is used to restrict the results to a certain level. It - can be "(Main)" which is the default one, "Wikipedia", "File" or - others. See https://meta.wikimedia.org/wiki/Help:Namespace - """ - return self.base_url + "?action=query&list=recentchanges&format="\ - + self.return_format + "&namespace=" + namespace -- cgit v1.2.3