Benchmark study: Computing group summaries¶
The following makes use of the packages data.table, dplyr, memisc, rbenchmark. You may need to
install them from CRAN using the code
install.packages(c("data.table","dplyr","memisc","rbenchmark"))
if you want to run this on your computer. (The packages are already installed on the notebook container, however.)
library(data.table)
library(memisc)
library(dplyr)
library(rbenchmark)
load("BData.RData")
load("SData.RData")
grouped_summary_benchmark_1 <- benchmark(
aggregate =
aggregate(X1~a+b,data=BDataF, FUN=mean),
`with + tapply` =
with(BDataF,tapply(X1,list(a,b),mean)),
data.table =
BDataT[,mean(X1),by=.(a,b)],
`group_by + summarize` =
BDTbl %>% group_by(a,b) %>% summarize(mean(X1)),
`select + group_by + summarize` =
BDTbl %>% select(X1,a,b) %>% group_by(a,b) %>%
summarize(mean(X1)),
withGroups =
with(Groups(BDataF,~a+b),mean(X1)),
columns = c("test","user.self","relative"),
replications = 100,
order = NULL,
relative = "user.self"
)
grouped_summary_benchmark_2 <- benchmark(
aggregate =
aggregate(X1~a+b,data=SDataF, FUN=mean),
`with + tapply` =
with(SDataF,tapply(X1,list(a,b),mean)),
data.table =
SDataT[,mean(X1),by=.(a,b)],
`group_by + summarize` =
SDTbl %>% group_by(a,b) %>% summarize(mean(X1)),
`select + group_by + summarize` =
SDTbl %>% select(X1,a,b) %>% group_by(a,b) %>%
summarize(mean(X1)),
withGroups =
with(Groups(SDataF,~a+b),mean(X1)),
columns = c("test","user.self","relative"),
replications = 100,
order = NULL,
relative = "user.self"
)
save(grouped_summary_benchmark_1,
grouped_summary_benchmark_2,
"grouped-summary-benchmark.RData")
- R file: benchmark-group-summaries.R
- Rmarkdown file: benchmark-group-summaries.Rmd
- Jupyter notebook file: benchmark-group-summaries.ipynb
- Interactive version of the Jupyter notebook (shuts down after 60s):
- Interactive version of the Jupyter notebook (sign in required):