python爬虫获取全国天气信息

来源:互联网 发布:电子线路仿真软件 编辑:程序博客网 时间:2024/04/28 05:51

了解了一些python爬虫后,自己就试着抓取一些感兴趣的信息,首先就是从中国气象网站上爬取全国城市的天气信息了。话不多说就直接贴代码了。

#coding:utf-8'''Created on 2016年7月5日@author: FangLiang'''import reimport urllib.requestfrom collections import OrderedDictclass GetWeather():    def __init__(self):        self.url_list = []        #### 华北        self.url_list.append(r'http://www.weather.com.cn/textFC/hb.shtml')        #### 东北        self.url_list.append(r'http://www.weather.com.cn/textFC/db.shtml')        #### 华东        self.url_list.append(r'http://www.weather.com.cn/textFC/hd.shtml')        #### 华中        self.url_list.append(r'http://www.weather.com.cn/textFC/hz.shtml')        #### 华南        self.url_list.append(r'http://www.weather.com.cn/textFC/hn.shtml')        #### 西北        self.url_list.append(r'http://www.weather.com.cn/textFC/xb.shtml')        #### 西南        self.url_list.append(r'http://www.weather.com.cn/textFC/xn.shtml')        #### 港澳台        self.url_list.append(r'http://www.weather.com.cn/textFC/gat.shtml')        self.city_weather_dict = OrderedDict()        self.re_city_weather_str = r'<tr>.*?width="96".*?target="_blank">(.*?)</a>.*?width="82">(.*?)</td>.*?width="86">(.*?)</td>.*?width="97">(.*?)</td>.*?width="85">(.*?)</td>'        self.re_days_str = r'day_tabs.*?<li.*?>(.*?)</li>.*?<li>(.*?)</li>.*?<li>(.*?)</li>.*?<li>(.*?)</li>.*?<li>(.*?)</li>.*?<li>(.*?)</li>.*?<li>(.*?)</li>.*?<li>(.*?)</li>'        self.re_city_weather = re.compile(self.re_city_weather_str)        self.re_days = re.compile(self.re_days_str,re.S)    def getData(self):        n = 0        f = open(r'd:\data.txt','wt')        for url in self.url_list:            page = urllib.request.urlopen(url)            html = page.read().decode('utf-8')            if n == 0:                day_list = re.findall(self.re_days,html)                for days in day_list:                    day_str = '城市'.ljust(4,' ')                    n = 0                    for day in days:                        if n == 0:                            day_str = day_str + day.ljust(18,' ')                        else:                            day_str = day_str + day.ljust(24,' ')                        n += 1                    print(day_str)                    f.writelines(day_str+'\n')            city_weather_list = re.findall(self.re_city_weather,html)            for city_weather in city_weather_list:                if city_weather[0] in self.city_weather_dict.keys():                    self.city_weather_dict[city_weather[0]].append(city_weather[1:])                else:                    self.city_weather_dict[city_weather[0]] = [city_weather[1:]]            for city_name in self.city_weather_dict:                weather_str = city_name.ljust(4,' ') + '实时天气:' + self.city_weather_dict[city_name][0][2].ljust(4,' ') + '  实时温度:' + self.city_weather_dict[city_name][0][3].ljust(4,' ')                for weather in self.city_weather_dict[city_name][1:]:                    weather_str = weather_str + '  白天:' + weather[0].ljust(4,' ') + '  夜晚:' + weather[2].ljust(4,' ') + '  气温:' + weather[1].ljust(4,' ') + '~' + weather[3].ljust(4,' ')                print(weather_str)                f.writelines(weather_str+'\n')            n += 1        f.close()if __name__ == '__main__':    getWeather = GetWeather()    getWeather.getData()

输出效果图如下:
这里写图片描述

需要注意的是,在使用ljust对齐字符串时,含有中文时,需要使用全角空格

0 0
原创粉丝点击