R中data.table与dlyr的使用

来源:互联网 发布:正规淘宝刷平台有哪些 编辑:程序博客网 时间:2024/06/05 05:00
# 生成实验数据
set.seed(1234)smalldat <- data.frame(group1 = rep(1:2, each = 5),                        group2 = rep(c('a','b'), times = 5),                        x = rnorm(10))# convert to data.frame to data.tablelibrary(data.table)smalldat <- data.table(smalldat)# convert aggregated variable into raw data file
# 添加汇总的列smalldat[, aggGroup1 := mean(x), by = group1]# aggregate with 2 variablessmalldat[, aggGroup1.2 := mean(x), by = list(group1, group2)]
# 具有相似功能的plyr包的使用library(plyr)# create aggregated dataaggdat1 <- ddply(smalldat, .(group1), summarize,                 aggGroup1plyr = mean(x))aggdat12 <- ddply(smalldat, .(group1, group2),  summarize,                  aggGroup1.1plyr = mean(x))# join back into data
# 类似merge的使用smalldat <- join(smalldat, aggdat1, by = 'group1')smalldat <- join(smalldat, aggdat12, by = c('group1', 'group2'))# print datasmalldat
# 按照某些给定字段去除重复的记录# Set keys - this sorts the data based on these valuessetkeyv(smalldat, c('group1','group2'))# keep unique observations (I also remove the variable x)uniqdat <- subset(unique(smalldat), select = -x)# print datauniqdat

0 0