某徒步旅游网站python爬虫小练习

来源:互联网 发布:java集合框架中抽象类 编辑:程序博客网 时间:2024/05/17 09:04

yxk周边游网站python爬虫小练习(跨页面)

代码很简单,关键一个是 encoding="utf_8_sig" , 否则乱码,注意writerow()处理list

import warningswarnings.filterwarnings("ignore")
import csv from urllib.request import urlopenfrom bs4 import BeautifulSoupcsvFile=open("C:\\Users\\dell-pc\\Documents\\Python Scripts\\Untitled Folder\\uxiake.csv","wt",newline="",encoding="utf_8_sig")writer=csv.writer(csvFile)try:    for i in range(9):        num=str(i+1)        html=urlopen("http://www.youxiake.com/around?place_id=2&days=0&month=0&holiday=0&price=0&tag=0&class_id=0&gts=&gte=&p="+num)        bs0bj=BeautifulSoup(html)        total=bs0bj.find("div",{"class":"m-bd-con current"}).findAll("div",{"class":"act-item clearfix"})        for i in total:            putin=[]            head=i.find("h3").find("a").get_text()            style=i.find("h3").find("span").get_text()            place=i.find("div",{"class":"z-subMsg"}).findAll("p")[0].get_text()            days=i.find("span",{"class":"days"}).get_text()            venue=i.find("span",{"class":"venue"}).get_text()            putin.append([head,style,place,days,venue])            for i in putin:                writer.writerow(i)finally:    csvFile.close()