跟着鬼哥学爬虫-8-python微信-3-集合进去糗事百科!

来源:互联网 发布:java 数组接收 split 编辑:程序博客网 时间:2024/06/07 02:37

 跟着鬼哥学爬虫-8-python微信-3-集合进去糗事百科!


还是直接上代码吧,集合进去前面文章我们用到的糗事百科!!


代码中判断,聊天信息中是否存在'来个段子',个人聊天,和群聊天中,return是自动回复的内容,这个时候会自动回复一条段子信息。


大家可以直接翻看以前的文章查看糗百的段子获取过程。


# -*- coding: utf-8 -*-import itchatimport sysimport randomfrom bs4 import BeautifulSoupimport urllib2import urllibimport refrom tuling import get_response#个人聊天信息的回复@itchat.msg_register('Text')def text_reply(msg):    if u'来个段子' in msg['Text']:        return getContent(1)#自动回复群消息@itchat.msg_register('Text', isGroupChat = True)def group_reply(msg):    print msg['ActualNickName'] + '--' + msg['Text']    if u'来个段子' in msg['Text']:        return getContent(1)#随机获取一条糗事百科def getContent(n):    n=random.randint(1,200)    url = 'http://www.qiushibaike.com/text/page/' + str(n) + '/'    #url = 'http://www.qiushibaike.com/8hr/page/'+str(n)+'/'    print url    heads = {        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36',        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',        'Connection': 'keep-alive',        'Upgrade-Insecure-Requests': '1',        'Referer': 'http://www.qiushibaike.com/',        'Accept-Language': 'zh-CN,zh;q=0.8',        'Cookie': '_xsrf=2|db27040e|6b4ed8d9536590d4ec5d2064cc2bef4f|1474364551; _qqq_uuid_="2|1:0|10:1474364551|10:_qqq_uuid_|56:MzBlNWFkOGE3MWEyMzc1MWIxMTE3MDBlZjM2M2RkZWQxYzU5YTg1Yw==|1dd2a4f4ceacad26b5da9cc295d2965226ea25ee73289855cf032629c4992698"; Hm_lvt_2670efbdd59c7e3ed3749b458cafaa37=1474364592; Hm_lpvt_2670efbdd59c7e3ed3749b458cafaa37=1474364595; _ga=GA1.2.1125329542.1474364596'    }    res = urllib2.Request(url, headers=heads)    response = urllib2.urlopen(res)    fuckDuanzi=[]    html = response.read()    soup = BeautifulSoup(html, "lxml")    someData = soup.select("div.content span")    num = 0    for some in someData:        num = num + 1        fuckDuanzi.append(some.text)    mess=fuckDuanzi[random.randint(1,19)]    return messif __name__ == "__main__":    reload(sys)    sys.setdefaultencoding('utf-8')    itchat.auto_login(True, enableCmdQR = True)    itchat.run()







0 0