diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/downloader/downloader.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/src/downloader/downloader.py b/src/downloader/downloader.py new file mode 100644 index 0000000..740caf8 --- /dev/null +++ b/src/downloader/downloader.py | |||
@@ -0,0 +1,26 @@ | |||
1 | """ | ||
2 | Module used to download a given webpage with several options related to the | ||
3 | wikimedia API | ||
4 | """ | ||
5 | |||
6 | import urllib.request | ||
7 | # For system proxy | ||
8 | import os | ||
9 | |||
10 | |||
11 | class Downloader(): | ||
12 | """Class used to download a given webpage considering system proxy""" | ||
13 | def __init__(self): | ||
14 | self.proxy_address = os.environ.get("HTTP_Proxy") | ||
15 | self.proxy = urllib.request.ProxyHandler({'http': self.proxy_address}) | ||
16 | self.opener = urllib.request.build_opener(self.proxy) | ||
17 | urllib.request.install_opener(self.opener) | ||
18 | |||
19 | def download(self, url): | ||
20 | """ Download the given URL and return the source code """ | ||
21 | return urllib.request.urlopen(url).read().decode("utf8") | ||
22 | |||
23 | def download_in_file(self, url, output_file_path): | ||
24 | """ Download the given URL and write to the given file """ | ||
25 | with open(output_file_path, "w") as output_file: | ||
26 | output_file.write(self.download(url)) | ||