#coding:utf-8 import random import re import os import time import threading import Queue import traceback import logging from threading import Thread from selenium import webdriver
lock = threading.Lock()
def async(f): """异步装饰器""" def wrapper(*args, **kwargs): thr = Thread(target=f, args=args, kwargs=kwargs) thr.start() return wrapper
class TmallCookie(object): def init(self): # cookie 队列 self.cookie_queue = Queue.Queue() self.cookie_list = list() self.load_cookie() self.parse_cookie() self.save_cookie()
def load_cookie(self):
"""加载本地已保存的 cookie"""
lock.acquire()
with open("tmall_cookie.txt", "r") as f:
cookie_list = f.readlines()
lock.release()
for i in cookie_list:
self.cookie_queue.put(i.strip())
@async
def parse_cookie(self):
"""
请求 cookie,并将 cookie 保存至 cookie 列表
:return:
"""
urls = ['https://detail.tmall.com/item.htm?id=562345301295',
'https://detail.tmall.com/item.htm?id=553941537843',
'https://detail.tmall.com/item.htm?id=558646979307',
'https://list.tmall.com/search_product.htm?spm=a221t.1812074.2005984841.8.44d84208RXceJT&q=%B9%E2%C3'
'%E6%CE%C4%D0%D8&from=.list.pc_1_searchbutton&acm=lb-zebra-7777-1443323.1003.4.1158540&type=p&scm=100'
'3.4.lb-zebra-7777-1443323.OTHER_14748278648600_1158540',
'https://list.tmall.com/spu_detail.htm?fmtab=sp&cat=50105508&spuid=877471268&suid=4e5fd39570486fdf2a'
'9b3077572be7ab&rn=1e0abfcf6995e918ab6c7bc00d6e9be2'
]
option = webdriver.ChromeOptions()
option.add_argument('disable-infobars')
option.add_argument('disable-gpu')
option.add_argument('--headless')
option.add_argument("--no-sandbox")
# option.add_argument("window-size=1024,768")
while True:
if self.cookie_queue.qsize() < 20000:
try:
# driver = webdriver.Chrome('C:\\chromedriver.exe', chrome_options=option)
driver = webdriver.Chrome('./chromedriver', chrome_options=option)
driver.set_page_load_timeout(120)
url = random.choice(urls)
driver.get(url)
time.sleep(5)
try:
cookies = driver.get_cookies()
cookie_string = []
for cookie_info in cookies:
cookie_string.append(u'%s=%s' % (cookie_info.get(u'name'), cookie_info.get(u'value')))
cookie_string = '; '.join(cookie_string)
driver.close()
driver.quit()
except Exception as e:
pass
try:
_tb_token_ = re.findall("(_tb_token_=.*?;)", cookie_string)[0]
t = re.findall("(t=[a-z0-9]+)", cookie_string)[0]
cna = re.findall("(cna=.*?;)", cookie_string)[0]
cookie2 = re.findall("(cookie2=.*?;)", cookie_string)[0].replace(";", "")
cookie = _tb_token_ + " " + t + "; " + cna + " " + cookie2
try:
enc = re.findall("(enc=.*?;)", cookie_string)[0]
cookie = _tb_token_ + " " + t + "; " + cna + " " + enc + " " + cookie2
except:
pass
print cookie
self.cookie_queue.put(cookie)
lock.acquire()
self.cookie_list.append(cookie)
lock.release()
except Exception as e:
pass
except Exception as e:
print traceback.format_exc()
else:
time.sleep(300)
@async
def save_cookie(self):
"""
清空之前的 cookie 文件,将当天抓取的 cookie 保存至文件
:return:
"""
while True:
time.sleep(1)
if len(self.cookie_list) > 10:
lock.acquire()
# with open("tmall_cookie.txt", "w") as f1:
# f1.truncate()
# time.sleep(5)
with open("tmall_cookie.txt", "a") as f2:
for cookie in self.cookie_list:
f2.write(cookie)
f2.write("\n")
self.cookie_list = []
lock.release()
def get_cookie(self):
"""
获取一个 cookie
:return: tmall cookie
"""
while True:
try:
cookie = self.cookie_queue.get(timeout=5)
break
except Exception as e:
logging.warning("Get cookie error: %s" % e)
time.sleep(5)
if self.cookie_queue.qsize() <= 5000:
self.cookie_queue.put(cookie)
return cookie
if name == 'main': cookie = TmallCookie() # while True: # print cookie.get_cookie() # time.sleep(2)
1
a7a2 2018-06-11 13:31:04 +08:00
按照经验不是你代码的问题 而是你用的 webdriver 库跟 chromedriver 之间的问题 同样在 macOS 下也有这个问题
可以尝试调用 kill 之类结束它,就是自己管理 |
2
1109599636 2018-07-07 17:31:43 +08:00
我以前写的时候是换的火狐的驱动....
|