新闻更新时通过微信提醒(以SJTU奖学金信息为例)

来源:互联网 发布:fast迅捷网络fwr200 编辑:程序博客网 时间:2024/06/06 15:37

需要:Python,easy-install,pip,wxpy
Python为语言编写环境。easy-install为Python包管理工具,是安装pip的前置需求。使用pip安装通过Python控制微信的wxpy库。wxpy库请于GitHub搜索。

# -*- coding: UTF-8 -*-#author: 不饮者#function: crawl SJTU scholarship news. Inform you when news are updated.#details:crawl every 5 minutes, maximum runtime is 5 hours.#!!!WARNING!!!WARNING!!!WARNING!!!WARNING!!!WARNING!!!WARNING!!!#This program uses Web Wechat. Thus your phone Wechat's receiving function is stopped!from wxpy import *import urllibimport urllib2import reimport timeimport sys#prepare for Chinese character reading & writingreload(sys)sys.setdefaultencoding('utf-8')try:    # connect Web WeChat    bot = Bot()    bot.file_helper.send('Hello.')    bot.file_helper.send('Wechat robot, me.')    bot.file_helper.send('Informed when sholarship news updated, you.')    bot.file_helper.send('Scholarship Watcher, started.')    #initialization    currentNews = []    currentHref = []    newsCnt = 0    loopCnt = 1    url = r"http://xsb.seiee.sjtu.edu.cn/xsb/list/611-1-20.htm"#SJTU scholarship webpage    for i in range(60):#maximum runtime: 5 hours        # get current news and href        request = urllib2.Request(url)        response = urllib2.urlopen(request)        content = response.read().decode('utf-8')        text = 'style="overflow-x:hidden;" href="(.*?)" title="(.*?)" target'        pattern = re.compile(text, re.S)        items = re.findall(pattern, content)        for item in items:            if newsCnt == 20:                break            href = 'http://xsb.seiee.sjtu.edu.cn' + item[0]            news = item[1]            news = news.decode('utf-8')            currentNews.append(news)            currentHref.append(href)            newsCnt += 1        # get old news and href        newsList = open(r'...\newsList.txt', 'r+')#news information file address        oldText = newsList.read()        oldText = oldText.decode("utf-8")        oldText = oldText.split('\n')        # find difference        if len(oldText) == 40:            oldFirstNews = oldText[0]            for i in range(20):                if (currentNews[i] == oldFirstNews) or (currentNews[i] == oldFirstNews[1:]):                    # SOMETIMES txt file reading will add an empty character                    break                else:                    bot.file_helper.send('Update, Scholarship News!')                    bot.file_helper.send(currentNews[i] + ' ' + currentHref[i])        else:#exception handling            bot.file_helper.send('Original txt file, not correctly prepared.')            bot.file_helper.send('Txt file, rebuilt.')            bot.file_helper.send('First three news, now:')            fhText = u''            for i in range(3):                fhText += (currentNews[i]+' '+currentHref[i]+'\n')            bot.file_helper.send(fhText)        # update information        newsList.seek(0)        newsList.truncate()        for i in range(19):            newsList.write(currentNews[i])            newsList.write('\n')            newsList.write(currentHref[i])            newsList.write('\n')        newsList.write(currentNews[19])        newsList.write('\n')        newsList.write(currentHref[19])        newsList.close()        #set interval behaviours        print 'Target webpage has been crawled for',loopCnt,'times'        loopCnt += 1        time.sleep(300)except urllib2.URLError, e:    if hasattr(e,"code"):        print e.code    if hasattr(e,"reason"):        print e.reason
阅读全文
0 0