Below is my solution to accommodate indeterminate number of data columns. Many thanks to @Col 's suggestions for the join
approach and the example of setName
, the latest satisfies all my requirement. While it is not as easy to read as the group_map
solution, the performance gain is worth it for me.
library(data.table)
set.seed(1)
times <- 1e5
cols <- 4
df3 <- as.data.frame(x = matrix(rnorm(times * cols, mean = 5), ncol = cols))
df3 <- cbind(grp = rep(seq_len(1e3), each = 100), df3)
DT <- setDT(df3)
setnames(DT, c("V1", "V2", "V3", 'V4'), c("A", "B", "C", "X"))
DT_way2 <- function(DT1) {
data_cols <- setdiff(colnames(DT1), c("grp"))
normal_data_cols <- setdiff(data_cols, c("X"))
sum_DT <- DT1[ , lapply(.SD, max), .SDcols = normal_data_cols, by = grp]
sum_DT[ , X := rowMeans(.SD), by = grp]
DT1[sum_DT,on = 'grp', by = .EACHI,
j = lapply(setNames(data_cols, data_cols),
function(x) match(TRUE, get(x) < 0.5 * get(paste0("i.", x))))]
}
DT_way2(DT)
#> grp A B C X
#> 1: 1 14 16 9 11
#> 2: 2 34 4 21 1
#> 3: 3 5 11 8 3
#> 4: 4 24 17 1 3
#> 5: 5 3 16 12 10
#> ---
#> 996: 996 2 9 5 5
#> 997: 997 6 21 18 17
#> 998: 998 3 3 13 19
#> 999: 999 1 4 5 9
#> 1000: 1000 1 2 2 13