R语言抓取pm2.5数据绘制全国pm2.5分布图
来源:互联网 发布:域名污染 gfw 编辑:程序博客网 时间:2024/04/27 21:21
library(rworldmap)
library(XML)
###Set date like yyyy-mm-dd
StartDate = format(Sys.time() - 86400 ,"%Y-%m-%d")
EndDate = format(Sys.time() - 86400 ,"%Y-%m-%d")
Start = Sys.time()
cat("Obtaining data ...\n")
website = paste("http://datacenter.mep.gov.cn/report/air_daily/air_dairy.jsp?city=&startdate=",StartDate,"&enddate=",EndDate,"&page=",1,sep = "")
pmweb = readLines(website,encoding="UTF-8")
pmweb = paste(pmweb,collapse = "")
pmhtml = htmlParse(pmweb,encoding="UTF-8")
totalPages = as.numeric(xmlValue(getNodeSet(pmhtml,"//font[@color='#004e98']")[[2]]))
cat("\tThere are",totalPages-1,"pages need to be downloaded ...\n")
pmtotal = getNodeSet(pmhtml,"//table/tr/td")
pmvalue = sapply(X=pmtotal,FUN=xmlValue)
pmvalue = pmvalue[-1:(-match(x = "首要污染物",table = pmvalue))]
pmvalue = pmvalue[1:((1:length(pmvalue))[grepl(pattern = "记录总数",x = pmvalue)]-1)]
pm = matrix(pmvalue,byrow = T,ncol = 6)
pm = data.frame(city=pm[,2],date=pm[,3],pm=as.numeric(pm[,4]),type=pm[,6],degree=pm[,5])
if(totalPages>=2)
{
for(page in 2:totalPages )
{
cat("Obtaining data for page",page,"...\n")
website = paste("http://datacenter.mep.gov.cn/report/air_daily/air_dairy.jsp?city=&startdate=",StartDate,"&enddate=",EndDate,"&page=",page,sep = "")
pmweb = readLines(website,encoding="UTF-8")
pmweb = paste(pmweb,collapse = "")
pmhtml = htmlParse(pmweb,encoding="UTF-8")
pmtotal = getNodeSet(pmhtml,"//table/tr/td")
pmvalue = sapply(X=pmtotal,FUN=xmlValue)
pmvalue = pmvalue[-1:(-match(x = "首要污染物",table = pmvalue))]
pmvalue = pmvalue[1:((1:length(pmvalue))[grepl(pattern = "记录总数",x = pmvalue)]-1)]
pm.new = matrix(pmvalue,byrow = T,ncol = 6)
pm.new = data.frame(city=pm.new[,2],date=pm.new[,3],pm=as.numeric(pm.new[,4]),type=pm.new[,6],degree=pm.new[,5])
pm = rbind(pm,pm.new)
}
}
if(version$os=="mingw32")
{
ChinaLocation = read.csv(file = "China.Cities.Location.Win.csv")
}else
ChinaLocation = read.csv(file = "China.Cities.Location.Linux.csv")
pmCityLocation = ChinaLocation[match(x = pm[,1],table = ChinaLocation[,1]),]
pmCity = cbind(pm,pmCityLocation)
pmCity = pmCity[complete.cases(pmCity),]
pm.max = max(pmCity$pm)
red = seq(from=0,to=1,length.out=pm.max)
green = seq(from=1,to=0,length.out=pm.max)
blue = seq(from=0,to=0,length.out=pm.max)
col = rgb(red=red,green=green,blue=blue,alpha=0.5)
cex = pmCity$pm/100 + 5
pmCity = data.frame(pmCity,color=col[pmCity$pm])
mapBubbles(dF = pmCity,
oceanCol = "skyblue",
borderCol = "grey",
landCol = "lightgreen",
nameX = "GPSLontitude",
nameY = "GPSLatitude",
nameZSize = "pm",
mapRegion = "China",
nameZColour = "color",
addColourLegend = F)
title(main=paste(StartDate,"-",EndDate,"全国",nrow(pmCity),"个主要城市PM2.5分布情况",sep=""))
title(sub=paste("数据来源:","中华人民共和国环境保护部数据中心 drawed by R author:Conda",sep=""),col.sub="grey")
Stop = Sys.time()
Cost = Stop - Start
library(XML)
###Set date like yyyy-mm-dd
StartDate = format(Sys.time() - 86400 ,"%Y-%m-%d")
EndDate = format(Sys.time() - 86400 ,"%Y-%m-%d")
Start = Sys.time()
cat("Obtaining data ...\n")
website = paste("http://datacenter.mep.gov.cn/report/air_daily/air_dairy.jsp?city=&startdate=",StartDate,"&enddate=",EndDate,"&page=",1,sep = "")
pmweb = readLines(website,encoding="UTF-8")
pmweb = paste(pmweb,collapse = "")
pmhtml = htmlParse(pmweb,encoding="UTF-8")
totalPages = as.numeric(xmlValue(getNodeSet(pmhtml,"//font[@color='#004e98']")[[2]]))
cat("\tThere are",totalPages-1,"pages need to be downloaded ...\n")
pmtotal = getNodeSet(pmhtml,"//table/tr/td")
pmvalue = sapply(X=pmtotal,FUN=xmlValue)
pmvalue = pmvalue[-1:(-match(x = "首要污染物",table = pmvalue))]
pmvalue = pmvalue[1:((1:length(pmvalue))[grepl(pattern = "记录总数",x = pmvalue)]-1)]
pm = matrix(pmvalue,byrow = T,ncol = 6)
pm = data.frame(city=pm[,2],date=pm[,3],pm=as.numeric(pm[,4]),type=pm[,6],degree=pm[,5])
if(totalPages>=2)
{
for(page in 2:totalPages )
{
cat("Obtaining data for page",page,"...\n")
website = paste("http://datacenter.mep.gov.cn/report/air_daily/air_dairy.jsp?city=&startdate=",StartDate,"&enddate=",EndDate,"&page=",page,sep = "")
pmweb = readLines(website,encoding="UTF-8")
pmweb = paste(pmweb,collapse = "")
pmhtml = htmlParse(pmweb,encoding="UTF-8")
pmtotal = getNodeSet(pmhtml,"//table/tr/td")
pmvalue = sapply(X=pmtotal,FUN=xmlValue)
pmvalue = pmvalue[-1:(-match(x = "首要污染物",table = pmvalue))]
pmvalue = pmvalue[1:((1:length(pmvalue))[grepl(pattern = "记录总数",x = pmvalue)]-1)]
pm.new = matrix(pmvalue,byrow = T,ncol = 6)
pm.new = data.frame(city=pm.new[,2],date=pm.new[,3],pm=as.numeric(pm.new[,4]),type=pm.new[,6],degree=pm.new[,5])
pm = rbind(pm,pm.new)
}
}
if(version$os=="mingw32")
{
ChinaLocation = read.csv(file = "China.Cities.Location.Win.csv")
}else
ChinaLocation = read.csv(file = "China.Cities.Location.Linux.csv")
pmCityLocation = ChinaLocation[match(x = pm[,1],table = ChinaLocation[,1]),]
pmCity = cbind(pm,pmCityLocation)
pmCity = pmCity[complete.cases(pmCity),]
pm.max = max(pmCity$pm)
red = seq(from=0,to=1,length.out=pm.max)
green = seq(from=1,to=0,length.out=pm.max)
blue = seq(from=0,to=0,length.out=pm.max)
col = rgb(red=red,green=green,blue=blue,alpha=0.5)
cex = pmCity$pm/100 + 5
pmCity = data.frame(pmCity,color=col[pmCity$pm])
mapBubbles(dF = pmCity,
oceanCol = "skyblue",
borderCol = "grey",
landCol = "lightgreen",
nameX = "GPSLontitude",
nameY = "GPSLatitude",
nameZSize = "pm",
mapRegion = "China",
nameZColour = "color",
addColourLegend = F)
title(main=paste(StartDate,"-",EndDate,"全国",nrow(pmCity),"个主要城市PM2.5分布情况",sep=""))
title(sub=paste("数据来源:","中华人民共和国环境保护部数据中心 drawed by R author:Conda",sep=""),col.sub="grey")
Stop = Sys.time()
Cost = Stop - Start
cat(paste("\n\n\nIt cost",Cost,"minutes.\n"))
转载自 http://blog.sina.com.cn/s/blog_403aa80a0101gp11.html
0 0
- R语言抓取pm2.5数据绘制全国pm2.5分布图
- 全球PM2.5分布图
- python爬虫抓取全国pm2.5的空气质量(2015.12.21版)
- Windows 8 全国pm2.5查看隐私声明
- 城市空气pm2.5数据调用实现
- 如何应对pm2.5
- HDU5182 PM2.5
- hdu 5182 PM2.5
- 【HDOJ】5182 ( PM2.5 )
- hdu 5182 PM2.5
- hdu 5182 PM2.5
- PM2.5传感器
- ML2016-PM2.5 Prediction
- PM2.5检测
- pm2
- pm2
- pm2
- PM2
- Android中的ListView及其优化(待续)
- <streams.h>头文件打不开
- 模拟实现Spring IoC功能
- Quartz.Net定时任务框架,实现灵活的定时任务
- Unity Manual之Rigidbody 刚体
- R语言抓取pm2.5数据绘制全国pm2.5分布图
- IO流笔记01
- Linux学习笔记:系统启动引导过程
- leetcode 130: Surrounded Regions
- ListView的工作原理及优化
- Unity_角色控制详解
- Oracle清理回收站的方法
- TYVJ 1330 乳草的入侵
- /sys/class/gpio 文件接口操作IO端口(s3c2440)