python3爬取糗事百科实例

来源：互联网发布：brooks跑鞋矩阵编辑：程序博客网时间：2024/06/09 20:14

完整代码：

# -*- coding: utf-8 -*-"""Spyder EditorThis is a temporary script file."""import requestsfrom bs4 import BeautifulSoupfrom time import sleepimport osimport shutildef get_qb(page_num):    list_qs = []    url = 'http://www.qiushibaike.com/hot'    for page in range(1, page_num + 1):        cnt = 0        qbhot = '{0}/page/{1}'.format(url, str(page))        print("开始获取第{0}页糗事...".format(page))        r = requests.get(qbhot)        soup = BeautifulSoup(r.text, "lxml")        for tag in soup.find_all("div",attrs = {"class":'content'}):            if tag.contents[1].string != None:                content = tag.contents[1].string.strip()                list_qs.append(content)                cnt += 1        print('{0}条'.format(cnt))        if page % 4 is 0:            sleep(1)    return list_qsdef main():    page_num = 13     file_qb = "F:\\qiubai\\"    if os.path.isfile(file_qb):        os.remove(file_qb)    if os.path.isdir(file_qb):        shutil.rmtree(file_qb,True)    os.mkdir(file_qb)    file_qb = file_qb +"qiubai.txt"    ls_qs = get_qb(page_num)    with open(file_qb, 'w', encoding='utf-8') as f:        f.write('\n'.join(ls_qs))    print("完成!")main()

结果：
这里写图片描述

这里写图片描述

阅读全文

0 0