用R数据处理

来源:互联网 发布:有声广告制作软件 编辑:程序博客网 时间:2024/06/05 16:53
#数据清洗#
mydata<-read.table("C:/Users/admin/Desktop/data.txt",header=FALSE,stringsAsFactors=FALSE,skip=1)
names(mydata)[1:5]<-c("ID","time1","time2","type","record")
install.packages("tidyr")
library(tidyr)
update.packages("tidyr")
tidyr::unite(mydata,"time",time1,time2,sep=" ",remove=TRUE)
duplicated(mydata$ID)
newdata<-mydata[mydata$type!=4,]
duplicated(newdata$ID)
#获取每天的血糖日均值#
#2017/06/02的血糖日均值#
locate1<-which(newdata$ID==4992)
num1<-locate1
sum1<-0
for(i in 1:num1)
{sum1<-sum1+newdata[i,"record"]}
ave1<-sum1/num1
print(ave1)
#用for循环来求2017/06/03-2017/06/30之间的各血糖日均值#
id<-c(4992,5199,5359,5538,5704,5875,6054,6221,6391,6608,6839,7086,7318,7515,7672,7860,8037,8245,8465,8645,8808,8978,9176,9356,9530,9701,9855,10015,10072)
locate<-c(0,0,0)
num<-c(0,0,0)
sum<-c(0,0,0)
ave<-c(0,0,0)
for(i in 2:29)
{locate[i]<-which(newdata$ID==id[i]);
 locate[1]<-99;
 num[i]<-locate[i]-locate[i-1];
 sum[i]<-0;
 for(j in 1:num[i])
   sum[i]<-sum[i]+newdata[(locate[i-1]+j),"record"];
 ave[i]<-sum[i]/num[i];
 print(ave[i]);
 }
#任取三天,获取出现高血糖的时段(暂时没想出可行的方法)#
#绘制2017/06/02各个时段的血糖值#
data1<-subset(newdata,ID>=4858&ID<=4992,select=c(time2,record))
plot(data1$time2,data1$record,type="b")
#绘制2017/06/03各个时段的血糖值#
data2<-subset(newdata,ID>=4993&ID<=5199,select=c(time2,record))
plot(data2$time2,data2$record,type="b")
#绘制2017/06/04各个时段的血糖值#
data3<-subset(newdata,ID>=5200&ID<=5359,select=c(time2,record)) 
plot(data3$time2,data3$record,type="b")
#由图可得出血糖值超出9的时段#









原创粉丝点击