R语言培训-第二期

来源:互联网 发布:php 命名空间 编辑:程序博客网 时间:2024/04/28 13:00

内容概况

•            重命名

•            类型转换

•            缺失值处理

•            创建新变量 删除变量

•            排序

•            合并(行/列) 筛选

•            vlookup

•            透视表

#########工作空间配置##############setwd("D:\\Documents\\work\\R培训\\第二期")  #可以用 / or \\getwd()########数据读取##############holiday_analysis_data<-read.csv("节假日分析.csv",stringsAsFactors=FALSE)holiday_base_table<-read.csv("节假日.csv")#数据集:holiday_analysis_data#列:holiday_analysis_data$产品名称, holiday_analysis_data[1], holiday_analysis_data[,1]#行:holiday_analysis_data[2,]#值:holiday_analysis_data[2,2]#查看属性:str()#基本统计信息:summary()#寻求帮助:?函数,?函数(), help(函数)head(holiday_analysis_data,3)tail(holiday_analysis_data,10)str(holiday_analysis_data)     summary(holiday_analysis_data) #########重命名################rename() names()names(holiday_analysis_data)names(holiday_analysis_data)[4]<-"cash"names(holiday_analysis_data)<-c("product_id","product_name","date","cash")#########数据类型转换#########数据类型:字符串character,数值numeric,日期date,因子factor#判断:is.character() is.numeric() is.data.frame() is.factor() typeof()#转换:as.character() as.numeric() as.data.frame() as.factor() as.Date()str(holiday_analysis_data)#factorholiday_base_table<-read.csv("节假日.csv")str(holiday_base_table)typeof(c(1,2,3,4))sum(c(1,2,3,4))sum(as.factor(c(1,2,3,4)))#日期值date:"2015-12-01"唯一结构#输入的date存在多种结构:2015/12/01,2015-12-01,20151201,#                        12/01/2015,  12/01/15,  2015年12月1日# %Y 2015; %y 15; %m 12, %d 01等,可参考实战page73as.Date("2015/12/01")as.Date("2015-12-01")as.Date("20151201","%Y%m%d")as.Date("12/01/2015","%m/%d/%Y") #错误as.Date("12/01/2015","%m/%d/%Y")as.Date("12/01/15","%m/%d/%y")as.Date("2015年12月1日","%Y年%m月%d日")Sys.Date()#character,numericstr(holiday_analysis_data)typeof(holiday_analysis_data$product_id)holiday_analysis_data$product_id<-as.character(holiday_analysis_data$product_id)holiday_analysis_data$product_id<-as.character(holiday_analysis_data$product_id)holiday_analysis_data$date<-as.Date(holiday_analysis_data$date)holiday_analysis_data$cash<-as.numeric(holiday_analysis_data$cash) #报错啦~~########缺失值处理############is.na() not availabely<-c(1,2,3,4,NA)is.na(y)y[is.na(y)]<-0holiday_analysis_data$cash[is.na(holiday_analysis_data$cash)]<-0########创建新变量 & 删除变量############1.计算 2.函数 3.条件赋值#1holiday_analysis_data$cash_fake<-holiday_analysis_data$cash*2 #2#install.packages("lubridate")library(lubridate)holiday_analysis_data$year<-year(holiday_analysis_data$date)  #year,month只能处理日期型holiday_analysis_data$month<-month(holiday_analysis_data$date)#3holiday_analysis_data$level[holiday_analysis_data$cash<=1000]<-"低收入"holiday_analysis_data$level[holiday_analysis_data$cash<=5000 & holiday_analysis_data$cash>1000]<-"中等收入"holiday_analysis_data$level[holiday_analysis_data$cash>5000]<-"高收入"#逻辑运算符:& | > >= < <= ==(判断,而不是赋值) != <> holiday_analysis_data$cash_fake<-NULL##########排序###################order() sort() rank()order(c(2,3,5,1,0)) #返回序列号:min——maxsort(c(2,3,5,1,0))  #按从小到大排序rank(c(2,3,5,1,0))  #返回名次order(holiday_analysis_data$cash)holiday_analysis_data_order<-holiday_analysis_data[order(-holiday_analysis_data$cash),]########数据合并##############列合并merge cbind  行合并rbindhead(holiday_base_table)str(holiday_base_table)holiday_base_table$date<-as.Date(holiday_base_table$date)data_merge<-merge(holiday_analysis_data,holiday_base_table,by="date") #左匹配str(data_merge)str(holiday_analysis_data)data_cbind<-cbind(holiday_analysis_data,holiday_base_table)  #报错啦data_cbind<-cbind(holiday_analysis_data[1:10,],holiday_base_table[1:10,])rbind()data1_rbind<-holiday_base_table[1:10,]data2_rbind<-holiday_base_table[32:41,]data1_rbinddata2_rbindrbind(data1_rbind,data2_rbind)########筛选#############subset, whichtown_data<-subset(data_merge,product_name=="南浔古镇")town_2015_data<-subset(data_merge,product_name=="南浔古镇" & year=="2015")town_cash_data<-subset(data_merge,cash<100 | cash >10000)town_data<-data_merge[which(data_merge$product_name=="南浔古镇"),]town_2015_data<-data_merge[which(data_merge$product_name=="南浔古镇" & data_merge$year=="2015"),]#######vlookup##########holiday_base_tableholiday_analysis_dataholiday_analysis_data$holiday<-holiday_base_table[match(holiday_analysis_data$date,holiday_base_table$date),2]holiday_analysis_data$holiday<-holiday_base_table[match(holiday_analysis_data$date,holiday_base_table$date),2]#结构:A$new_col<-B[match(A$key,B$key),B$col]#########透视表#####################计数 table   #求和、求平均 aggregatestr(holiday_analysis_data)table(holiday_analysis_data$product_name)table(holiday_analysis_data$product_name,holiday_analysis_data$level)table(holiday_analysis_data$product_name,holiday_analysis_data$level,holiday_analysis_data$year)aggregate(town_2015_data$cash,by=list(town_2015_data$holiday),FUN=mean)#aggregate(目标值, by=list(维度), FUN)aggregate(town_2015_data$cash, by=list(town_2015_data$holiday), mean)


0 0
原创粉丝点击