大家好,又见面了,我是你们的朋友全栈君。
import re
import requests
def getHTMLText(url):
try:
headers={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36',
'cookie': 'cna=NQPPFYICwyoCAW8OdtofQqLm; tracknick=tb31551803; tg=0; enc=rmQMsTLREKmqi0wCPiaesTYWq1FQEkZvJR9RxYQe31E%2B8H%2Bgsg29O3QjlWBYNsk5B4hWdVnoyl9FmqezxIlA0A%3D%3D; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; t=8b87e0b54eedb63ec79bf6e58120539b; uc3=lg2=V32FPkk%2Fw0dUvg%3D%3D&nk2=F5RGNw0oDsOWmg%3D%3D&vt3=F8dBxdsbDKQbUlBPQ8E%3D&id2=Vy0Qmj0GGZVBZA%3D%3D; lgc=tb31551803; uc4=id4=0%40VXqe5nOGv2p3QUXK2yfB2w2AQ2Rw&nk4=0%40FY4NAqkeavMG4lO%2Fk5N%2Fb5R2Zw6o; _cc_=URm48syIZQ%3D%3D; mt=ci=118_1; JSESSIONID=790849CCFE2C514ABA531615EB3CA8E8; l=cBOcsO94QbHsxX8kBOCNquI8LPbOSIRAkuPRwCcXi_5dc6L_6w_OoSyK1Fp62jWdtfTB4JuaUM29-etkiKy06Pt-g3fP.; isg=BJ2dqUHwn35VGntR9LG1laqSrHmXutEM-IpwbF9i2fQjFr1IJwrh3GuEQQoQmOnE',
}
r=requests.get(url,timeout=30,headers=headers)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
return""
def parsePage(ilt,html):
try:
plt=re.findall(r'\"view_price\":\"\d+\.\d*\"',html)
tlt=re.findall(r'\"raw_title\":\".*?\"',html)
for i in range(len(plt)):
price=eval(plt[i].split('\"')[3])
title=tlt[i].split('\"')[3]
ilt.append([title,price])
except:
print("解析出错")
def printGoodsList(ilt):
tplt="{:^4}\t{:^8}\t{:^16}"
print(tplt.format("序号","价格","商品名称"))
count=0
for g in ilt:
count=count+1
print(tplt.format(count,g[0],g[1]))
if __name__ == '__main__' :
goods="书包"
depth=2
start_url="https://s.taobao.com/search?q="+goods
infoList=[]
for i in range(depth):
try:
url=start_url+"$S="+str(44*i)
html=getHTMLText(url)
parsePage(infoList,html)
except:
continue
printGoodsList(infoList)
发布者:全栈程序员-用户IM,转载请注明出处:https://javaforall.cn/150125.html原文链接:https://javaforall.cn
【正版授权,激活自己账号】: Jetbrains全家桶Ide使用,1年售后保障,每天仅需1毛
【官方授权 正版激活】: 官方授权 正版激活 支持Jetbrains家族下所有IDE 使用个人JB账号...