-
Notifications
You must be signed in to change notification settings - Fork 0
/
flickrDownloader.py
74 lines (65 loc) · 2.46 KB
/
flickrDownloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from flickrapi import FlickrAPI
import requests
import os
import sys
import time
def get_urls(image_tag,max_amount, key, secret):
# using the flickr api library to get the urls for returned images
flickr = FlickrAPI(key, secret)
images = flickr.walk(text=image_tag, tag_mode='all', tags=image_tag, extras='url_o', per_page=50, sort='relevance')
count = 0
urls = []
for image in images:
if count < max_amount:
count = count + 1
print("Fetching url for image number {}".format(count))
try:
url = image.get('url_o')
if url is not None:
urls.append(url)
else:
print("Url for image number {} returned None".format(count))
except:
print("Url for image number {} could not be fetched".format(count))
else:
print("Done fetching urls, fetched {} urls out of {}".format(len(urls), max_amount))
break
return urls
def download_images(urls, output_folder):
# downloading images from a list of urls
if not os.path.isdir(output_folder):
os.mkdir(output_folder)
print("Starting download of {} files".format(len(urls)))
for idx, url in enumerate(urls):
try:
path_to_write = os.path.join(output_folder, url.split("/")[-1])
if not os.path.exists(path_to_write):
response = requests.get(url, stream = True)
outfile = open(path_to_write, 'wb')
outfile.write(response.content)
outfile.close()
print("Done downloading {} of {}".format(idx, len(urls)))
time.sleep(5.0) # waiting for 5 seconds to avoid being limited or temporarily blocked
else:
print("Skipped {} because it already exists".format(idx, len(urls)))
except:
print("Failed to download url number {}".format(idx))
print("Done.")
def get_keys_from_file(path):
# reading the flickr api keys from a .txt file
f = open(path, "r")
lines = f.readlines()
key = lines[0].strip()
secret = lines[1].strip()
return key, secret
def main():
tag = sys.argv[1]
max_amount = int(sys.argv[2])
out = sys.argv[3]
key, secret = get_keys_from_file("./keys.txt")
urls = get_urls(tag,max_amount, key, secret)
download_images(urls, out)
if __name__=='__main__':
main()