python : BeautifulSoup 网页 table 抓取实例
来源:互联网 发布:sor文件打开软件 编辑:程序博客网 时间:2024/06/04 17:59
从 http://www.lottery.gov.cn/ 抓取 体彩 排列5 历史数据
get_pl5.py
# -*- coding: utf-8 -*-import os,sysimport urllibimport urllib2from BeautifulSoup import BeautifulSoup# 体彩 排列5URL = "http://www.lottery.gov.cn/historykj/history.jspx?_ltype=plw"page = urllib2.urlopen(URL)soup = BeautifulSoup(page)page.close()fp = open("pl5.txt","w")tables = soup.findAll('table')tab = tables[0]for tr in tab.tbody.findAll('tr'): for td in tr.findAll('td'): text = td.getText().encode('cp936')+'!' fp.write(text) fp.write('\n')#fp.close()
阅读全文