1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
| import os from contextlib import closing import threading import requests
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36' }
out_dir = './out' thread_num = 4
if not os.path.exists(out_dir): os.mkdir(out_dir)
def download(img_url, img_name): if os.path.isfile(os.path.join(out_dir, img_name)): return with closing(requests.get(img_url, stream=True, headers=headers)) as r: rc = r.status_code if 299 < rc or rc < 200: print 'returnCode%s\t%s' % (rc, img_url) return content_length = int(r.headers.get('content-length', '0')) if content_length == 0: print 'size0\t%s' % img_url return with open(os.path.join(out_dir, img_name), 'wb') as f: for data in r.iter_content(1024): f.write(data)
def get_imgurl_generate(): for i in range(1, 161): yield ("http://mp3.aikeu.com/15626/{}.mp3".format(i), "{}.mp3".format(i))
lock = threading.Lock()
def loop(imgs): print 'thread %s is running...' % threading.current_thread().name
while True: try: with lock: img_url, img_name = next(imgs) except StopIteration: break try: download(img_url, img_name) except: print 'exceptfail\t%s' % img_url print 'thread %s is end...' % threading.current_thread().name
img_gen = get_imgurl_generate()
for i in range(0, thread_num): t = threading.Thread(target=loop, name='LoopThread%s' %i, args=(img_gen,)) t.start()
|