diff options
author | Minijackson | 2014-10-10 15:39:09 +0200 |
---|---|---|
committer | Minijackson | 2014-10-10 15:39:09 +0200 |
commit | 1dde37d17ee8649b124ccbf4a2ab5962811916b5 (patch) | |
tree | 315d8c5ef905e04cb8fd8c9ea59b80dd133da1b6 /src/downloader | |
parent | ac9450dc36159782518484247c241c83c8de91f6 (diff) | |
download | wikistats-1dde37d17ee8649b124ccbf4a2ab5962811916b5.tar.gz |
Adding beginning of downloader class and module
The downloader module is used to take care of the downloading part of the
program, including manipulation of the wikimedia API therefore the
Downloader class won't be the only one in the downloader module
Diffstat (limited to 'src/downloader')
-rw-r--r-- | src/downloader/downloader.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/src/downloader/downloader.py b/src/downloader/downloader.py new file mode 100644 index 0000000..740caf8 --- /dev/null +++ b/src/downloader/downloader.py | |||
@@ -0,0 +1,26 @@ | |||
1 | """ | ||
2 | Module used to download a given webpage with several options related to the | ||
3 | wikimedia API | ||
4 | """ | ||
5 | |||
6 | import urllib.request | ||
7 | # For system proxy | ||
8 | import os | ||
9 | |||
10 | |||
11 | class Downloader(): | ||
12 | """Class used to download a given webpage considering system proxy""" | ||
13 | def __init__(self): | ||
14 | self.proxy_address = os.environ.get("HTTP_Proxy") | ||
15 | self.proxy = urllib.request.ProxyHandler({'http': self.proxy_address}) | ||
16 | self.opener = urllib.request.build_opener(self.proxy) | ||
17 | urllib.request.install_opener(self.opener) | ||
18 | |||
19 | def download(self, url): | ||
20 | """ Download the given URL and return the source code """ | ||
21 | return urllib.request.urlopen(url).read().decode("utf8") | ||
22 | |||
23 | def download_in_file(self, url, output_file_path): | ||
24 | """ Download the given URL and write to the given file """ | ||
25 | with open(output_file_path, "w") as output_file: | ||
26 | output_file.write(self.download(url)) | ||