Python之抓取网页元素

来源:互联网 发布:仓库出入库软件 编辑:程序博客网 时间:2024/05/22 00:28
import urllib.requestfrom bs4 import BeautifulSoupurl = "http://www.wal-martchina.com/walmart/store/14_hubei.htm"user_agent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"request = urllib.request.Request(url)request.add_header("User-Agent", user_agent)content = urllib.request.urlopen(request)soup = BeautifulSoup(content,from_encoding="gb18030")#店名shopname = soup.find_all('td', {"class": "xl714445"})#地址addresss = soup.find_all('td', {"class": "xl684445"})#联系电话phones = soup.find_all('td', {"class": "xl744445"})for shop in shopname:    print("店铺名称:"+shop.text.lstrip().rstrip())print("----------------------------------------------")for address in addresss:      print("店铺地址:"+address.text.lstrip().rstrip())sum = 0for phone in phones:    if sum % 2 == 0:        print("联系电话:" + phone.text.lstrip().rstrip())    else:        print("交通路线:" + phone.text.lstrip().rstrip())        print('---------------------------------------------------')    sum += 1
原创粉丝点击