python 网页爬取数据并存到数据库中

来源:互联网 发布:php员工管理系统源码 编辑:程序博客网 时间:2024/05/17 04:06
# -*- coding: utf-8 -*-import urllibimport xml.dom.minidomimport MySQLdbimport datetimeTimePoint = datetime.datetime.now()#mysqldb=MySQLdb.Connect(host="localhost",user="root",passwd="root",db="pythondb",charset="utf8")cursor = db.cursor()urlPrefix = 'http://121.28.49.85:8080/datas/hour/'regiondIds = ['130000']for regiondId in regiondIds:   fullUrl = urlPrefix + regiondId + '.xml'   data=urllib.urlopen(fullUrl).read()   dom = xml.dom.minidom.parseString(data)   root = dom.documentElement   pointers = root.getElementsByTagName("Pointer")   #print len(pointers)   '''   INSERT INTO DATARETEST(Province, City, Area,          PositionName, StationCode, Latitude, Longitude, AQI, Quality,         CO, NO2, O3, O3_8h, PM10, PM2_5, PM2_5_24h, SO2)         VALUES ('河北', '石家庄', '发觉县', '观测站', 'a001',         212, 212, 21, '有', 12, 21, 21, 21, 21, 21, 21, 12)   '''   for pointer in pointers:      Province = '河北省'      City = pointer.getElementsByTagName('City')[0].firstChild.nodeValue.encode("utf-8")      Area = pointer.getElementsByTagName('Region')[0].firstChild.nodeValue.encode("utf-8")      PositionName = pointer.getElementsByTagName('Name')[0].firstChild.nodeValue.encode("utf-8")      StationCode = pointer.getElementsByTagName('Color')[0].firstChild.nodeValue.encode("utf-8")      Latitude = pointer.getElementsByTagName('CLat')[0].firstChild.nodeValue.encode("utf-8")      Longitude = pointer.getElementsByTagName('CLng')[0].firstChild.nodeValue.encode("utf-8")      AQI = pointer.getElementsByTagName('AQI')[0].firstChild.nodeValue.encode("utf-8")      Quality = pointer.getElementsByTagName('Level')[0].firstChild.nodeValue.encode("utf-8")      polls = pointer.getElementsByTagName('Poll')      for poll in polls:         CO = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue         NO2 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue         O3 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue         PM10 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue         O3_8h = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue         PM2_5 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue         PM2_5_24h = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue         SO2 = pointer.getElementsByTagName('Value')[0].firstChild.nodeValue      if CO == '--':         CO = 0      if NO2 == '--':         NO2 = 0      if O3 == '--':         O3 = 0       if PM10 == '--':         PM10 = 0       if O3_8h == '--':         O3_8h = 0       if PM2_5 == '--':         PM2_5 = 0       if PM2_5_24h == '--':         PM2_5_24h = 0      if SO2 == '--':         SO2 = 0                    cursor.execute('INSERT INTO DATARETEST(Province, City, Area, \      PositionName, StationCode, Latitude, Longitude, AQI, Quality,\      CO, NO2, O3, O3_8h, PM10, PM2_5, PM2_5_24h, SO2, TimePoint)\      VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)', \      (Province, City, Area, PositionName, StationCode, Latitude, Longitude, AQI, Quality,      CO, NO2, O3, O3_8h, PM10, PM2_5, PM2_5_24h, SO2, TimePoint))   db.commit()   db.close()           '''   INSERT INTO DATARETEST(Province, City, Area,          PositionName, StationCode, Latitude, Longitude, AQI, Quality,         CO, NO2, O3, O3_8h, PM10, PM2_5, PM2_5_24h, SO2)         VALUES ('河北', '石家庄', '发觉县', '观测站', 'a001',         212, 212, 21, '有', 12, 21, 21, 21, 21, '32', 21, 12)'''                                    

0 0
原创粉丝点击