Python 抓取中国天气网天气数据

来源:互联网 发布:中国重工业 知乎 编辑:程序博客网 时间:2024/05/02 13:16

         众所周知中国天气网提供的有个公共天气预报API接口,但是不知道咋的一直停在3月4号不更新了。最近做个天

气方面的APP需要用到一些天气数据,360的接口公司不让用。只好自己写一个python脚本放数据中心。


先发三个尚还能用的接口,据说万年历有未来7天预报的api。

http://weather.com.cn/data/zs/101280601.html  各种各样的指数

http://weather.com.cn/data/cityinfo/101280601.html   简洁天气信息

http://weather.com.cn/data/sk/101280601.html   实况天气


获取未来15天温度

#coding=utf-8import urllib,re,MySQLdb,ConfigParser,datetime,timey=time.strftime("%Y")m=time.strftime("%m")d=time.strftime("%d")now=y+'-'+m+'-'+dtodaylow=99;todayhigh=99;print nowdef getHtml(url):    page = urllib.urlopen(url)    html = page.read()    return htmldef getRel(html,reg):    mre = re.compile(reg)    relList = re.findall(mre,html)    return relListhtml = getHtml("http://weather.com.cn/weather/101280601.shtml")reg1 = r'<p class="tem tem\d{1}">\s<span>(.+)</span><i>°C</i>'  #temputer reg2 = r'</span>\s</em>\s<i>(.+)</i>'     #windreg3 = r'<p class="wea">(.+)</p>\s<p class="tem tem\d">'     #index   reg4 = r'<section class="mask ct">\s.+\s.+\s.+7d1"><b>(.+)</b>'        #feelingreg5 = r'<section class="mask ct">\s.+\s.+\s.+7d1"><b>.+</b>(.+)</aside>'       #wearing suggesttemputer=getRel(html,reg1)todayhigh=temputer[0]todaylow=temputer[1]i=0j=0while i<len(temputer):    temputer[i]=temputer[i]+"℃~"+temputer[i+1]+"℃"    del temputer[i+1]    i=i+1    j=j+1wind=getRel(html,reg2)index=getRel(html,reg3)feeling=getRel(html,reg4)suggest=getRel(html,reg5)mlist=temputer+index+wind+feeling+suggestprint mlist_mlist = ['temp1','temp2','temp3','temp4','temp5','temp6','temp7','weather1','weather2','weather3','weather4','weather5','weather6','weather7','wind1','wind2','wind3','wind4','wind5','wind6','wind7','index','index48_d']f=open(r'Weather.html','w')f.write('{\"weatherinfo\":{\"city\":\"深圳\",\"city_en\":\"shenzhen\",\"date_y\":\"'+y+'年'+m+'月'+d+'日\",\"week\":\"星期五\",')      len=len(_mlist)for i in range(0,len-1):    f.write('\"'+_mlist[i]+'":"'+mlist[i]+'",')f.write('\"'+_mlist[len-1]+'":"'+mlist[len-1]+'"'+'}}')f.close()try:    conn=MySQLdb.connect(host='localhost',user='root',passwd='123456',db='weather',charset='utf8')    cur=conn.cursor()    val=[now,todaylow,todayhigh,'-1']    print val    cur.execute('insert into record values(%s,%s,%s,%s)',val)    conn.commit()    cur.close()    conn.close()except MySQLdb.Error,e:    print "Mysql Error %d: %s" % (e.args[0], e.args[1])print 'Update and inser Database Success'#raw_input()


获取实时空气值(从Pm.in抓取)

#coding=utf-8import urllib,re,MySQLdb,time,datetimey=time.strftime("%Y")m=time.strftime("%m")d=time.strftime("%d")now=y+'-'+m+'-'+dpm='-1'print nowdef getHtml(url):    page = urllib.urlopen(url)    html = page.read()    return htmldef getRel(html,reg):    mre = re.compile(reg)    relList = re.findall(mre,html)    return relListhtml = getHtml("http://www.pm25.in/shenzhen")reg1 = r'<div class.*="value">\s+([\d|.]+)\s+</div>'#air valuereg2 = r'<div class="level">\s+<h4>\s+(.+)\s+</h4>'   #indexreg3 = r'<p>建议采取的措施:\s+(.+)\s+</p>'           #excress suggestval=getRel(html,reg1)index=getRel(html,reg2)suggest=getRel(html,reg3)mlist=val+index+suggestpm=str(mlist[1])print mlist_mlist = ['aqi','pm2_5_24h','PM10/1h','CO/1h','NO2/1h','O3/1h','O3/8h','SO2/1h','quality','suggest']print _mlistf=open(r'AirCondition.html','w')f.write('[{')len=len(mlist)for i in range(0,len-2):    f.write('\"'+_mlist[i]+'\":'+mlist[i]+',')    f.write('\"'+_mlist[len-2]+'\":\"'+mlist[len-2]+'\",')f.write('\"'+_mlist[len-1]+'\":\"'+mlist[len-1]+'\"')f.write('}]')f.close()print now,pmtry:    conn=MySQLdb.connect(host='localhost',user='root',passwd='123456',db='weather',charset='utf8')    cur=conn.cursor()    sql = "UPDATE record SET pm2_5 = %s WHERE date = '%s'" % (str(pm),str(now))    print sql    cur.execute(sql)    conn.commit()    cur.close()    conn.close()except MySQLdb.Error,e:    print "Mysql Error %d: %s" % (e.args[0], e.args[1])print 'Air update and inser Database Success'#raw_input()


然后写个线程个半个小时更新下,最后用SimpleHttpServer运行就能访问了

# -*- coding: utf-8 -*-  import SimpleHTTPServerimport SocketServerPORT = 80Handler = SimpleHTTPServer.SimpleHTTPRequestHandlerhttpd = SocketServer.TCPServer(("", PORT), Handler)print "serving at port", PORThttpd.serve_forever()





PS:运行两天后发现中国天气网有个大坑,见下图,那就是晚上抓取的数据木有白天的温度,也就是最高温度。

     


果断转移阵地,从2345拿数据。

#coding=utf-8import urllib,re,ConfigParser,datetime,timedef getHtml(url):    page = urllib.urlopen(url)    html = page.read()    return htmldef getRel(html,reg):    mre = re.compile(reg)    relList = re.findall(mre,html)    return relListhtml = getHtml("http://tianqi.2345.com/shenzhen/59493.htm")reg1 = r'<font class="[a-z]{3,4}">(-?\w+).{0,2}</font>'  #temputer reg2 = r'</span>\s</em>\s<i>(.+)</i>'     #windreg3 = r'<p class="wea">(.+)</p>\s<p class="tem tem\d">'     #index   reg4 = r'<section class="mask ct">\s.+\s.+\s.+7d1"><b>(.+)</b>'        #feelingreg5 = r'<section class="mask ct">\s.+\s.+\s.+7d1"><b>.+</b>(.+)</aside>'       #wearing suggesttemputer=getRel(html,reg1)print temputer   




0 0
原创粉丝点击