R语言 实例操作2

来源:互联网 发布:snmpv3 trap java 编辑:程序博客网 时间:2024/05/16 11:48

for循环、assign()函数、paste()函数、sapply()函数、自编简单函数

setwd("D:/FDU/mine/mydata")options("scipen"=100, "digits"=4)dat = read.csv( file='D:/FDU/mine/mydata/data.csv', header = T)dat <- within(dat, {  WAVE <- as.factor(WAVE)  GENDER <- as.factor(GENDER)  edu <- as.factor(edu)  smoke <- as.factor(smoke)  drink <- as.factor(drink)})###该数据库是针对同一人群七年随访的数据,20个变量。################将income和index变量分组的函数fun1 <- function(x){  q <- quantile(x$income, probs = c(1/3, 2/3), na.rm = T, names = F) ###计算三分位数  p <- quantile(x$index, probs = c(1/3, 2/3), na.rm = T, names = F)  x <- within(x, {    incomel <- NA    incomel[income < q[1]] <- "low income"    incomel[income >= q[1] & income < q[2]] <- "middle income"    incomel[income >= q[2]] <- "high income"    indexl <- NA    indexl[index < p[1]] <- "low urbanization"    indexl[index >= p[1] & index < p[2]] <- "middle urbanization"    indexl[index >= p[2]] <- "high urbanization"    incomel <- as.factor(incomel)    indexl <- as.factor(indexl)  })}##将七年的数据拆成七个数据框,后续还会用到。对income和index分别按照三分位数分组后再合并for(j in c(1993, 1997, 2000, 2004, 2006, 2009, 2011)){  assign(paste("y", j, sep = ""), fun1(subset(dat, WAVE == j)))}datn <- rbind(y1993, y1997)datn <- rbind(datn, y2000)datn <- rbind(datn, y2004)datn <- rbind(datn, y2006)datn <- rbind(datn, y2009)datn <- rbind(datn, y2011)#############################连续性变量描述性分析####连续性变量为数据框的第5, 8, 9, 10, 15, 17, 19列,共7个变量。##对x数据框的7个变量计算中位数fun2 <- function(x){  var <- c(5, 8, 9, 10, 15, 17, 19)  media <- sapply(x[, var[1:7]], median, na.rm = T)  return(round(media, 2))}##对x数据框的7个变量计算四分位数fun3 <- function(x){  var <- c(5, 8, 9, 10, 15, 17, 19)  Q <- sapply(x[, var[1:7]], quantile, probs = c(0.25, 0.75), names = F, na.rm = T)  return(round(Q, 2))}##输出形式########**缺失值NA怎么让它显示为空格而不是NA???paste函数的用法**fun4 <- function(x){  rslt <- paste(fun2(x), "(", fun3(x)[1, ], ",", fun3(x)[2, ], ")")  return(rslt)}col <- c(fun4(y1993), fun4(y1997), fun4(y2000), fun4(y2004), fun4(y2006), fun4(y2009), fun4(y2011))##Kruskal-Wallis test及p值输出形式fun5 <- function(x){  kru <- kruskal.test(datn[, x] ~ datn$WAVE)  if (kru[[3]] < 0.0001) print(paste("<", 0.001, sep = ""))  else print(round(kru[[3]], 3))}col2 <- c(fun5(8), fun5(9), fun5(10), fun5(15), fun5(17), fun5(19))##输出矩阵m1 <- matrix(nrow = 7, ncol = 8)m1[, 1:7] <- matrix(col, byrow = F) m1[2:7, 8] <- matrix(col2, byrow = F)##############################分类变量描述性分析,6个变量##计算七年第i列分类变量的频数和比例,每个变量的结果以矩阵形式输出for(i in c(4, 7, 13, 14, 21, 22)){  assign(paste("n", i, sep = ""), table(datn[, i], datn[, 3]))  assign(paste("prop", i, sep = ""), prop.table(table(datn[, i], datn[, 3]), 2))}##对6个分类变量进行卡方检验, 结果为列表list <- list(n4, n7, n13, n14, n21, n22)chi <- sapply(list, chisq.test)##输出形式fun6 <- function(x, y){  rslt <- paste(x, "(", round(y, digits = 2), ")")  return(rslt)}fun7 <- function(x){  if (chi[[3, x]] < 0.0001) print(paste("<", 0.001, sep = ""))  else print(round(chi[[3, x]], 3))}##矩阵输出m2 <- matrix(nrow = 21, ncol = 8)m2[1, 1:7] <- c(5457, 7577, 7715, 7603, 7494, 7380, 6456)m2[2:3, 1:7] <- matrix(fun6(n4, prop4), byrow = F) m2[5:7, 1:7] <- matrix(fun6(n7, prop7), byrow = F)m2[9:10, 1:7] <- matrix(fun6(n13, prop13), byrow = F)m2[12:13, 1:7] <- matrix(fun6(n14, prop14), byrow = F)m2[15:17, 1:7] <- matrix(fun6(n21, prop21), byrow = F)m2[19:21, 1:7] <- matrix(fun6(n22, prop22), byrow = F)m2[4, 8] <- fun7(2)m2[8, 8] <- fun7(3)m2[11, 8] <- fun7(4)m2[14, 8] <- fun7(5)m2[18, 8] <- fun7(6)m <- rbind(m2, m1)