From 1dde37d17ee8649b124ccbf4a2ab5962811916b5 Mon Sep 17 00:00:00 2001 From: Minijackson Date: Fri, 10 Oct 2014 15:39:09 +0200 Subject: Adding beginning of downloader class and module The downloader module is used to take care of the downloading part of the program, including manipulation of the wikimedia API therefore the Downloader class won't be the only one in the downloader module --- src/downloader/downloader.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 src/downloader/downloader.py (limited to 'src') diff --git a/src/downloader/downloader.py b/src/downloader/downloader.py new file mode 100644 index 0000000..740caf8 --- /dev/null +++ b/src/downloader/downloader.py @@ -0,0 +1,26 @@ +""" +Module used to download a given webpage with several options related to the +wikimedia API +""" + +import urllib.request +# For system proxy +import os + + +class Downloader(): + """Class used to download a given webpage considering system proxy""" + def __init__(self): + self.proxy_address = os.environ.get("HTTP_Proxy") + self.proxy = urllib.request.ProxyHandler({'http': self.proxy_address}) + self.opener = urllib.request.build_opener(self.proxy) + urllib.request.install_opener(self.opener) + + def download(self, url): + """ Download the given URL and return the source code """ + return urllib.request.urlopen(url).read().decode("utf8") + + def download_in_file(self, url, output_file_path): + """ Download the given URL and write to the given file """ + with open(output_file_path, "w") as output_file: + output_file.write(self.download(url)) -- cgit v1.2.3