python 爬虫简单使用

来源:互联网 发布:疯狂java讲义最新版pdf 编辑:程序博客网 时间:2024/06/06 17:02

昨天实在无聊,顺手写了个爬虫玩。把某个网站的妹子图给扒完了。还是。。咳咳。。注意身体啊。

import requestsimport reimport urllib.requestfrom bs4     import BeautifulSoupurlj=[]urlg=[]def paiong(int):    kaishi=requests.session()    url="http://jandan.net/ooxx/page-"+str(int)    r=kaishi.get(url=url)    demo = r.text    soup = BeautifulSoup(demo, 'lxml')  # 解析器:lxml    tagone = soup.find_all('img')    for i in  tagone:        print(i)        oo=str(i)        jpg=re.findall('\/\/+(.*jpg)', oo)        gif=re.findall('\/\/(.*?gif)', oo)        if jpg!=None or jpg!='':            for i in jpg:                urlj.append(i)                #print(i)        if gif!=None or jpg!='':            for l in gif:                urlg.append(l)                #print(l)        print('\n')for k in range(1,160):    paiong(k)#print(urlj)#print(urlg)numj=0for test in urlj:    f = open(str(numj)+'.jpg','wb')     req = urllib.request.urlopen('http://'+str(test))    buf = req.read()    f.write(buf)    numj += 1for test1 in urlj:    f = open(str(numj)+'.gif','wb')     req = urllib.request.urlopen('http://'+str(test1))    buf = req.read()    #bufstr = buf.decode('utf-8','ignore')    f.write(buf)    numj += 1



原创粉丝点击