网页取数据bs4

来源:互联网 发布:javascript有用吗 编辑:程序博客网 时间:2024/05/20 08:00
# -*- coding: utf-8 -*-from bs4 import BeautifulSoupdef file2soup(ffile):    with open(ffile,'r+b') as f:        f.encoding='utf-8'        soup=BeautifulSoup(f,'lxml')        # print (soup)        return soup    pass# def soup2list(soup,ulist):#     # print (soup.div)#     for i in soup.find('tbody').children:#         if isinstance(i,bs4.element.Tag):#             o=i('td')#             ulist.append([o[0].string,o[1].string,o[3].string])#     return ulistdef soup2list(soup,ulist):    # print (soup.div)    for i in soup.find('tbody').children:        if len(i)>1:#去空行            o=i('td')#取出td标签  "td"加引号            a,b,c=0,1,3            ulist.append([o[a].string,o[b].string,o[c].string])    return ulisturl = 'http://bj.58.com/pinpaigongyu/pn/{ppp}/?minprice=2000_4000'ffile='d://best.txt'ulist=[]soup=file2soup(ffile)ulist=soup2list(soup,ulist)print (ulist)print ('{:0>3}\t  {:+<15}\t  {: >5}\t'.format('排名','校名','总分'))for i in range(11):    u=ulist[i]    #{:起头+一个填充符+对齐方式+字符长度}    print ('{:0>3}\t  {:+<15}\t  {: >5}\t'.format(u[0], u[1], u[2] ))