I have the following data:
num_var_1 <- rnorm(1000, 10, 1)
num_var_2 <- rnorm(1000, 10, 5)
num_var_3 <- rnorm(1000, 10, 10)
num_var_4 <- rnorm(1000, 10, 10)
num_var_5 <- rnorm(1000, 10, 10)
factor_1 <- c("A","B", "C")
factor_2 <- c("AA","BB", "CC")
factor_3 <- c("AAA","BBB", "CCC", "DDD")
factor_4 <- c("AAAA","BBBB", "CCCC", "DDDD", "EEEE")
factor_5 <- c("AAAAA","BBBBB", "CCCCC", "DDDDD", "EEEEE", "FFFFFF")
factor_var_1 <- as.factor(sample(factor_1, 1000, replace=TRUE, prob=c(0.3, 0.5, 0.2)))
factor_var_2 <- as.factor(sample(factor_2, 1000, replace=TRUE, prob=c(0.5, 0.3, 0.2)))
factor_var_3 <- as.factor(sample(factor_3, 1000, replace=TRUE, prob=c(0.5, 0.2, 0.2, 0.1)))
factor_var_4 <- as.factor(sample(factor_4, 1000, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))
factor_var_5 <- as.factor(sample(factor_4, 1000, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))
id = 1:1000
my_data = data.frame(id,num_var_1, num_var_2, num_var_3, num_var_4, num_var_5, factor_var_1, factor_var_2, factor_var_3, factor_var_4, factor_var_5)
I also have another data frame that stores "conditions that can be used to select rows from my_data" - each row acts as a set of conditions:
res2 <- structure(list(num_var_1 = c(13.0594020922422, NA, 11.4862445222133,
9.61245560356254, 9.09042162012824, 11.2655306773389, 8.17775208814389,
10.3368491267469, NA, NA), num_var_2 = c(NA, 15.5787995721324,
0.0478277041848836, 18.2224315520234, NA, 7.96911807784013, NA,
8.05245674365955, 4.27070199562321, NA), num_var_3 = c(32.5008410879472,
40.8727408367019, NA, 46.8308850184899, 0.356266194135838, 20.6076522107065,
-22.1119350917156, NA, 47.4219529058096, 47.7495176181095), num_var_4 = c(-10.5323504910751,
NA, -21.9504025438917, NA, -8.07098853672244, 36.6252011463549,
-10.729434726805, NA, NA, 14.5063601192342), num_var_5 = c(25.736287930052,
4.7650948325899, NA, -8.57811306991066, 3.36041241809802, -14.8050577655299,
-18.0775264320681, NA, NA, NA), factor_var_1 = c("A B", "B B",
NA, NA, "C C C", "A B", "B C", "B B C", "A B", NA), factor_var_2 = c("AA AA",
"AA CC CC", "AA CC", "AA CC CC", "BB", "AA AA", "CC", NA, NA,
"AA AA CC"), factor_var_3 = c(NA, NA, "AAA AAA CCC", NA, "AAA AAA AAA AAA",
"AAA DDD DDD", NA, NA, "BBB CCC", NA), factor_var_4 = c("AAAA EEEE",
"AAAA AAAA", NA, NA, NA, "BBBB", "AAAA AAAA BBBB DDDD DDDD",
"AAAA CCCC", "AAAA AAAA", "AAAA BBBB"), factor_var_5 = c("AAAA BBBB CCCC DDDD",
NA, NA, NA, "AAAA AAAA", NA, NA, "BBBB", "AAAA AAAA AAAA EEEE",
NA)), class = "data.frame", row.names = c(NA, -10L))
num_var_1 num_var_2 num_var_3 num_var_4 num_var_5 factor_var_1 factor_var_2 factor_var_3 factor_var_4 factor_var_5
1 13.059402 NA 32.5008411 -10.532350 25.736288 A B AA AA <NA> AAAA EEEE AAAA BBBB CCCC DDDD
2 NA 15.5787996 40.8727408 NA 4.765095 B B AA CC CC <NA> AAAA AAAA <NA>
3 11.486245 0.0478277 NA -21.950403 NA <NA> AA CC AAA AAA CCC <NA> <NA>
4 9.612456 18.2224316 46.8308850 NA -8.578113 <NA> AA CC CC <NA> <NA> <NA>
5 9.090422 NA 0.3562662 -8.070989 3.360412 C C C BB AAA AAA AAA AAA <NA> AAAA AAAA
6 11.265531 7.9691181 20.6076522 36.625201 -14.805058 A B AA AA AAA DDD DDD BBBB <NA>
7 8.177752 NA -22.1119351 -10.729435 -18.077526 B C CC <NA> AAAA AAAA BBBB DDDD DDDD <NA>
8 10.336849 8.0524567 NA NA NA B B C <NA> <NA> AAAA CCCC BBBB
9 NA 4.2707020 47.4219529 NA NA A B <NA> BBB CCC AAAA AAAA AAAA AAAA AAAA EEEE
10 NA NA 47.7495176 14.506360 NA <NA> AA AA CC <NA> AAAA BBBB <NA>
I would like to (directly) select rows from "my_data" using the "conditions" from the 10th row of "res2" - in theory, this should look something like this:
# code 1
final_result <- my_data[which(my_data$num_var_1 < res2[10,1] & my_data$num_var_2 < res2[10,2] & my_data$num_var_3 < res2[10,3] & my_data$num_var_4 < res2[10,4] & my_data$num_var_5 < res2[10,5] & my_data$factor_var_1 == res2[10,6] & my_data$factor_var_2 == res2[10,7] & my_data$factor_var_3 == res2[10,8] & my_data$factor_var_4 == res2[10,9] & my_data$factor_var_5 == res2[10,10]), ]
However, the above code does not work.
I was able to write this code by explicitly specifying the conditions:
#code 2
final_result <- my_data[which(my_data$num_var_2 < 4.66 & my_data$num_var_3 < 22.33 & my_data$factor_var_1 %in% c( "B", "C") & my_data$factor_var_2 %in% c( "AA", "BB") & my_data$factor_var_3 %in% c( "AAA", "DDD") & my_data$factor_var_4 %in% c( "AAAA", "CCCC", "EEEE" )), ]
But is there a way to write "code 1" in the same format as "code 2"?
Thanks!