diff options
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | src/downloader/__init__.py (renamed from src/downloader/wikimedia.py) | 27 | ||||
-rw-r--r-- | src/downloader/downloader.py | 26 |
3 files changed, 29 insertions, 27 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4b8079b --- /dev/null +++ b/.gitignore | |||
@@ -0,0 +1,3 @@ | |||
1 | __pycache__ | ||
2 | *~ | ||
3 | *.swp | ||
diff --git a/src/downloader/wikimedia.py b/src/downloader/__init__.py index 1905d0a..cfcf20a 100644 --- a/src/downloader/wikimedia.py +++ b/src/downloader/__init__.py | |||
@@ -1,7 +1,32 @@ | |||
1 | """ | 1 | """ |
2 | Module used to generate wikimedia API urls for several uses | 2 | Init file of the downloader module. |
3 | |||
4 | The downloader module is used to take care of the downloading part of the | ||
5 | program, including manipulation of the wikimedia API. | ||
3 | """ | 6 | """ |
4 | 7 | ||
8 | import urllib.request | ||
9 | # For system proxy | ||
10 | import os | ||
11 | |||
12 | |||
13 | class Downloader(): | ||
14 | """Class used to download a given webpage considering system proxy""" | ||
15 | def __init__(self): | ||
16 | self.proxy_address = os.environ.get("HTTP_Proxy") | ||
17 | self.proxy = urllib.request.ProxyHandler({'http': self.proxy_address}) | ||
18 | self.opener = urllib.request.build_opener(self.proxy) | ||
19 | urllib.request.install_opener(self.opener) | ||
20 | |||
21 | def download(self, url): | ||
22 | """ Download the given URL and return the source code """ | ||
23 | return urllib.request.urlopen(url).read().decode("utf8") | ||
24 | |||
25 | def download_in_file(self, url, output_file_path): | ||
26 | """ Download the given URL and write to the given file """ | ||
27 | with open(output_file_path, "w") as output_file: | ||
28 | output_file.write(self.download(url)) | ||
29 | |||
5 | 30 | ||
6 | class WikimediaAPI(): | 31 | class WikimediaAPI(): |
7 | """ | 32 | """ |
diff --git a/src/downloader/downloader.py b/src/downloader/downloader.py deleted file mode 100644 index 740caf8..0000000 --- a/src/downloader/downloader.py +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | """ | ||
2 | Module used to download a given webpage with several options related to the | ||
3 | wikimedia API | ||
4 | """ | ||
5 | |||
6 | import urllib.request | ||
7 | # For system proxy | ||
8 | import os | ||
9 | |||
10 | |||
11 | class Downloader(): | ||
12 | """Class used to download a given webpage considering system proxy""" | ||
13 | def __init__(self): | ||
14 | self.proxy_address = os.environ.get("HTTP_Proxy") | ||
15 | self.proxy = urllib.request.ProxyHandler({'http': self.proxy_address}) | ||
16 | self.opener = urllib.request.build_opener(self.proxy) | ||
17 | urllib.request.install_opener(self.opener) | ||
18 | |||
19 | def download(self, url): | ||
20 | """ Download the given URL and return the source code """ | ||
21 | return urllib.request.urlopen(url).read().decode("utf8") | ||
22 | |||
23 | def download_in_file(self, url, output_file_path): | ||
24 | """ Download the given URL and write to the given file """ | ||
25 | with open(output_file_path, "w") as output_file: | ||
26 | output_file.write(self.download(url)) | ||