R语言_car_可视化

来源:互联网 发布:java怎么以分数形式 编辑:程序博客网 时间:2024/05/22 14:05
library(ggplot2)
library(plyr)
library(reshape2)
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#ggplot()不要分行写,都放在一行,要不然不好使!!!!!!!!!!!!!!!!
#!!!!!!!!!!!!!!!!!!!!!!!!!
vehicles<-read.csv("F:/Data/exe/vehicles.csv",stringsAsFactors=F)
head(vehicles)
colors<-c("green","red","yellow","blue")
colors_factors<-factor(colors)
colors[1]<colors[2]
nrow(vehicles)
ncol(vehicles)
names(vehicles)
length(unique(vehicles[,"year"]))#35
first_year<-min(vehicles[,"year"])
last_year<-max(vehicles[,"year"])
length(unique(vehicles$year))#验证是否每一年都有数据
head(vehicles$fuelType1)
table(vehicles$fuelType1)
vehicles$trany[vehicles$trany==""]<-NA
head(vehicles$trany)
head(substr(vehicles$trany,1,4))
#ifsubstr(vehicles$trany,1,4)=="Auto",则为"Auto",否则为"Manual"
vehicles$trany2<-ifelse(substr(vehicles$trany,1,4)=="Auto","Auto","Manual")
head(vehicles$trany2)
#将新变量变成因子
vehicles$trany2<-as.factor(vehicles$trany2)
table(vehicles$trany2)
table(vehicles$sCharger)
table(vehicles$year)
table(vehicles$trany2,vehicles$fuelType1)
with(vehicles,table(sCharger,year))
class(vehicles$sCharger)
class(vehicles$trany2)
class(vehicles$tCharger)
unique(vehicles$tCharger)#R错误的将T认为是True
mpgByYr<-ddply(vehicles,~year,summarise,
avgMPG=mean(comb08),avgHghy=mean(highway08),avgCity=mean(city08))
head(mpgByYr)
#点geom_smooth()平滑的条件均值,阴影的区域
ggplot(mpgByYr,aes(year,avgMPG))+geom_point()+xlab("Year")+ylab("Average MPG")+ggtitle("All cars")
gasCars<-subset(vehicles,fuelType1 %in% c("Regular Gasoline","Premium Gasoline","Midgrade Gasoline")
                &fuelType2==""&atvType!="Hybrid")
mpgByYr_Gas<-ddply(gasCars,~year,summarise,avgMPG=mean(comb08))
ggplot(mpgByYr_Gas,aes(year,avgMPG))+geom_point()+geom_smooth()
+xlab("Year")+ylab("Average MPG")+ggtitle("Gasoline cars")
class(gasCars$displ)#"numeric"
typeof(gasCars$displ)#"double"
#gasCars$displ<-as.numeric(gasCars$displ)
ggplot(gasCars,aes(displ,comb08))+geom_point()+geom_smooth()
#单调递减,得到:下车燃油效率会更高
#下面看是否近年来生产了更多的小车,若是则可以解释最近燃油效率有大幅提升了
avgCarSize<-ddply(gasCars,~year,summarise,avgDispl=mean(displ))
ggplot(avgCarSize,aes(year,avgDispl))+geom_point()+geom_smooth()
+xlab("Year")+ylab("Average engine displacement(1)")
byYear<-ddply(gasCars,~year,summarise,avgDispl=mean(displ),avgMPG=mean(comb08))
head(byYear)
#ggplot具有分面的功能,在同一张图但不同面上来逐年显示平均油耗和平均排量之间的关系
byYear中的两列(avgMPG,avgDispl)现在溶解在byYear2的一列中(value)
byYear2<-melt(byYear,id="year")
levels(byYear2$variable)<-c("Average MPG","Avg engine displacement")
head(byYear2)
tail(byYear2)
nrow(byYear)
nrow(byYear2)
ggplot(byYear2,aes(year,value))+geom_point()+geom_smooth()+facet_wrap(~variable,ncol=1,scales="free_y")+xlab("Year")+ylab("")
typeof(vehicles$cylinders)#"integer"
class(vehicles$cylinders)#"integer"
gasCars4<-subset(gasCars,cylinders==4)
#箱线图
#factor(year)可以表示数据里中的每一年
ggplot(gasCars4,aes(factor(year),comb08))+geom_boxplot()+facet_wrap(~trany2,ncol=1)+theme(axis.text=element_text(angle=45))+xlab("Year")+ylab("MPG")
#每一年手动挡的车的占比情况
ggplot(gasCars4,aes(factor(year),fill=factor(trany2)))+geom_bar(position="fill")+labs(x="Year",ylab="Proportion of cars",fill="Transmission")+theme(axis.text.x=element_text(angle=45))+geom_hline(yintercept=0.5,linetype=2)
carsMake<-ddply(gasCars4,~year,summarise,numberOfMakes=length(unique(make)))
length(unique(gasCars4$make))
tail(carsMake)
ggplot(carsMake,aes(year,numberOfMakes))+geom_point()+labs(x="Year",y="Number of available makes")+ggtitle("Four cylinder cars")
#每一年的生产商
#对每一块的makeb变量应用一个函数
uniqMakes<-dlply(gasCars4,~year,function(x)unique(x$make))
#每一年都生产四缸的生产商(交集)
#intersection:交集
commonMakes<-Reduce(intersect,uniqMakes)
carCommonMakes4<-subset(gasCas4,make%in%commonMakes)
avgMPG_commonMakes<-ddply(carCommonMakes4,~year+make,summarise,avgMPG=mean(comb08))
ggplot(avgMPG_commonMakes,aes(year,avgMPG))+geom_line()+facet_wrap(~make,nrow=3)
head(avgMPG_commonMakes)
#ddply:输入数据框data frame,返回数据框
#dlply:输入数据框,返回list
uniqMakes<-dlply(gasCars4,~year,function(x) unique(x$make))
原创粉丝点击