混乱的学习历程 list。爬取58同城二手房,判断列表内字符串位置用(列表名).index(字符串名)

来源:互联网 发布:保洁派单软件 编辑:程序博客网 时间:2024/04/30 05:34
import urllib.requestimport osimport redef url_open(url):    req = urllib.request.Request(url)    req.add_header('User-Agent ','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.2372.400 QQBrowser/9.5.10548.400')    response = urllib.request.urlopen(url)    html = response.read()    return htmlnum=list(range(1,5))for a in num:    a=str(a)    url="http://jdz.58.com/ershoufang/pn"+a+"/"    print(url)    html=url_open(url).decode("utf-8")    '''    zongjia=re.compile(r'<div .*?qj-listright btall">.*?class="pri">(.*?)</b>(.*?)&nbsp;&nbsp;.*?(/d/d/d/d.*?)<br>.*?class="showroom">(.*?)</span>(/d{1,2}/d.*?)<br>.*?</div>')    print(zongjia)    <div .*?"qj-listright btall">.*?class="pri">(.*?)</b> (.*?)\s*?&nbsp;&nbsp;.*?(\d\d\d\d.*?)\s.*?class="showroom">.*?(\S*?).*?</span>.*?(\d{1,4}\S*?).*?</div>    <div class="qj-listright btall".*?class="pri">(\d{1,3}).*?&nbsp;&nbsp;\D*(\d{3,4}).*?"showroom">\D*(\S*?)\D*?(\d{2,4}).*?</div>    '''    name=re.compile(r'img lazy_src=.*?alt="(.*?)"')    name_list=name.findall(html)    for name in name_list:        print(name)        zongjia = re.compile(r"class='pri.*?(\d{2,3}.\d|\d{2,3})\D*?(\d\d\d\d)\D*?(\d).*?(\d)\D*?(\d{2,4}.\d|\d{2,3})")        zongjia_list = zongjia.findall(html)        num=name_list.index(name)        print(zongjia_list[num])
0 0
原创粉丝点击