Python爬取并分析网页【基本版】

来源:互联网 发布:mac 全屏切换快捷键 编辑:程序博客网 时间:2024/05/14 16:07
#!/usr/bin/python# -*- coding: UTF-8 -*-import urllibfrom pyquery import PyQuery as pqimport codecs# fetch pageprint 'fetch page...'url = 'http://www.7dsw.com/toplastupdate/1.html'resp = urllib.urlopen(url)page = resp.read()page = page.decode('gbk')#get dataprint 'parse page...'doc = pq(page)wanted = doc('#newscontent ul a')n = len(wanted)books = []caps = []i = 0while i<n:    bookname = wanted[i].text    captitle = wanted[i+1].text    books.append(bookname)    caps.append(captitle)    i+=2print 'save data...'#save data#fp = open('save.txt','wb')fp = codecs.open('intimate.txt','a','utf-8')for i in range(len(books)):    fp.write(books[i])    fp.write(",")    fp.write(caps[i])    fp.write("\r\n")fp.close()
0 0
原创粉丝点击