淘宝——手机

来源:互联网 发布:.net程序员自我评价 编辑:程序博客网 时间:2024/04/28 04:26


import re,requests,os,json,time
from bs4 import BeautifulSoup
h={'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.103 Safari/537.36'}
s=requests.Session()
if not os.path.isdir('d://mtao'):
os.mkdir('d://mtao')


urls=['https://s.m.taobao.com/search?event_submit_do_new_search_auction=1&_input_charset=utf-8&topSearch=1&atype=b&searchfrom=1&action=home%3Aredirect_app_action&from=1&q=%E6%89%8B%E6%9C%BA&sst=1&n=20&buying=buyitnow&m=api4h5&abtest=21&wlsort=21&style=list&closeModues=nav%2Cselecthot%2Conesearch&page={0}'.format(i) for i in range(1,101)]
def comment_info(item_id):
p_links=['https://rate.tmall.com/list_detail_rate.htm?itemId={0}&spuId=566150620&sellerId=2616970884&order=3&currentPage={1}&append=0&content=1&tagId=&posi=&picture='.format(item_id,i) for i in range(1,10000)]
for p_link in p_links:
print(p_link)
data=s.get(p_link,headers=h).content
if len(data)<10000:
break
data=data.decode('gbk','ignore' )
n=data.find('{')
m=data.rfind('}')
p=json.loads(data[n:m+1])
l=[]
for i in p['rateList']:
item_p={}
item_p['评论时间']=i['rateDate']
item_p['物品类型']=i['auctionSku']
item_p['评论']=i['rateContent'] 
l.append(item_p)
with open('d://mtao//%s.txt'%item_id,'a',errors='replace') as f:
f.write(str(l))
print('-----评论------')


for url in urls:
data=s.get(url,headers=h,timeout=6).content.decode('utf-8')
j=json.loads(data)
items=j['listItem']
for i in items:
item={}
item['act']=i['act']
item['title']=i['title']
item['area']=i['area']
item['item_id']=i['item_id']
item['location']=i['location']
item['nick']=i['nick']
item['originalPrice']=i['originalPrice']
item['price']=i['price']
with open('d://mtaobao.text','a',errors='ignore') as w:
w.write(str(item)+'\n')
t='d://mtao//%s.txt'%item['item_id']
if os.path.isdir(t):
print('已存在')
continue
comment_info(item['item_id'])





0 0
原创粉丝点击