Hi,
I am trying to filter a dataframe based on numeric values across all columns in R followed by counting the number of values passing the filtering cut-off. I usually do it manually on each column (eg. Stim 1), however I am interested to apply to all the columns in the dataframe. I have provided the example datasets (Input and output/expected results). Is there a way to obtain the expected result?
Thank you,
Toufiq
# Input data
dput(Input_Data)
structure(list(`Stim 1` = c(0.883643926, 0.248614376, 0.518091486,
0.535221236, 0.415450436, -0.940323826, -0.723796576, -0.824290276,
NA, -0.806255146, -0.747521326, NA, 3.20247786, 1.10402434, 1.005757776
), `Stim 2` = c(1.005757776, 1.005757776, 4.51601548, 3, 7.78620408,
-0.706674058, -0.572657338, -0.686018538, -0.514713298, -0.532390248,
-0.462136378, -0.512892468, 1, 1.5, 2.5), `Stim 3` = c(7.798089,
9.2058061, 5.5408169, 1.52159119, 2.63042701, NA, 1.3857699,
-0.152939869, -0.050295909, -0.337659179, -0.058902499, -0.072916919,
-0.410700949, -0.079817359, -0.313859499), `Stim 4` = c(1.41324408,
9.6038562, 1.71087962, 2.95921938, 4.82199712, 3.17140358, 1.15931318,
NA, 1.58997338, 4.76858598, NA, -0.002674678, -0.235496858, 0.065630452,
-0.175745228), `Stim 5` = c(-0.167945369, 1.41324408, 1.41324408,
0.741171721, 2.494610191, -0.532343489, -0.358607189, -0.442774239,
-0.103589789, 0.213156301, -0.022826199, -0.096645979, 1.215920941,
3.377354481, 0.033402621)), class = "data.frame", row.names = c("Feature_1",
"Feature_2", "Feature_3", "Feature_4", "Feature_5", "Feature_6",
"Feature_7", "Feature_8", "Feature_9", "Feature_10", "Feature_11",
"Feature_12", "Feature_13", "Feature_14", "Feature_15"))
#> Stim 1 Stim 2 Stim 3 Stim 4 Stim 5
#> Feature_1 0.8836439 1.0057578 7.79808900 1.413244080 -0.16794537
#> Feature_2 0.2486144 1.0057578 9.20580610 9.603856200 1.41324408
#> Feature_3 0.5180915 4.5160155 5.54081690 1.710879620 1.41324408
#> Feature_4 0.5352212 3.0000000 1.52159119 2.959219380 0.74117172
#> Feature_5 0.4154504 7.7862041 2.63042701 4.821997120 2.49461019
#> Feature_6 -0.9403238 -0.7066741 NA 3.171403580 -0.53234349
#> Feature_7 -0.7237966 -0.5726573 1.38576990 1.159313180 -0.35860719
#> Feature_8 -0.8242903 -0.6860185 -0.15293987 NA -0.44277424
#> Feature_9 NA -0.5147133 -0.05029591 1.589973380 -0.10358979
#> Feature_10 -0.8062551 -0.5323902 -0.33765918 4.768585980 0.21315630
#> Feature_11 -0.7475213 -0.4621364 -0.05890250 NA -0.02282620
#> Feature_12 NA -0.5128925 -0.07291692 -0.002674678 -0.09664598
#> Feature_13 3.2024779 1.0000000 -0.41070095 -0.235496858 1.21592094
#> Feature_14 1.1040243 1.5000000 -0.07981736 0.065630452 3.37735448
#> Feature_15 1.0057578 2.5000000 -0.31385950 -0.175745228 0.03340262
# Now, filter the data on all columns based on >= 1 | <= 0.5
dput(Output_Data_1)
structure(list(`Stim 1` = c(NA, 0.248614376, NA, NA, 0.415450436,
-0.940323826, -0.723796576, -0.824290276, NA, -0.806255146, -0.747521326,
NA, 3.20247786, 1.10402434, 1.005757776), `Stim 2` = c(1.005757776,
1.005757776, 4.51601548, 3, 7.78620408, -0.706674058, -0.572657338,
-0.686018538, -0.514713298, -0.532390248, -0.462136378, -0.512892468,
1, 1.5, 2.5), `Stim 3` = c(7.798089, 9.2058061, 5.5408169, 1.52159119,
2.63042701, NA, 1.3857699, -0.152939869, -0.050295909, -0.337659179,
-0.058902499, -0.072916919, -0.410700949, -0.079817359, -0.313859499
), `Stim 4` = c(1.41324408, 9.6038562, 1.71087962, 2.95921938,
4.82199712, 3.17140358, 1.15931318, NA, 1.58997338, 4.76858598,
NA, -0.002674678, -0.235496858, 0.065630452, -0.175745228), `Stim 5` = c(-0.167945369,
1.41324408, 1.41324408, NA, 2.494610191, -0.532343489, -0.358607189,
-0.442774239, -0.103589789, 0.213156301, -0.022826199, -0.096645979,
1.215920941, 3.377354481, 0.033402621)), class = "data.frame", row.names = c("Feature_1",
"Feature_2", "Feature_3", "Feature_4", "Feature_5", "Feature_6",
"Feature_7", "Feature_8", "Feature_9", "Feature_10", "Feature_11",
"Feature_12", "Feature_13", "Feature_14", "Feature_15"))
#> Stim 1 Stim 2 Stim 3 Stim 4 Stim 5
#> Feature_1 NA 1.0057578 7.79808900 1.413244080 -0.16794537
#> Feature_2 0.2486144 1.0057578 9.20580610 9.603856200 1.41324408
#> Feature_3 NA 4.5160155 5.54081690 1.710879620 1.41324408
#> Feature_4 NA 3.0000000 1.52159119 2.959219380 NA
#> Feature_5 0.4154504 7.7862041 2.63042701 4.821997120 2.49461019
#> Feature_6 -0.9403238 -0.7066741 NA 3.171403580 -0.53234349
#> Feature_7 -0.7237966 -0.5726573 1.38576990 1.159313180 -0.35860719
#> Feature_8 -0.8242903 -0.6860185 -0.15293987 NA -0.44277424
#> Feature_9 NA -0.5147133 -0.05029591 1.589973380 -0.10358979
#> Feature_10 -0.8062551 -0.5323902 -0.33765918 4.768585980 0.21315630
#> Feature_11 -0.7475213 -0.4621364 -0.05890250 NA -0.02282620
#> Feature_12 NA -0.5128925 -0.07291692 -0.002674678 -0.09664598
#> Feature_13 3.2024779 1.0000000 -0.41070095 -0.235496858 1.21592094
#> Feature_14 1.1040243 1.5000000 -0.07981736 0.065630452 3.37735448
#> Feature_15 1.0057578 2.5000000 -0.31385950 -0.175745228 0.03340262
# count the number of values passing the filtering cut-off
dput(Output_Data_2)
structure(list(`Stim 1` = 10L, `Stim 2` = 15L, `Stim 3` = 14L,
`Stim 4` = 13L, `Stim 5` = 14L), class = "data.frame", row.names = "Counts")
#> Stim 1 Stim 2 Stim 3 Stim 4 Stim 5
#> Counts 10 15 14 13 14
# Filter on one column at a time ( time consuming step)
library(tidyverse)
df_filter <-filter(Input_Data, `Stim 1` >= 1 | `Stim 1` <= 0.5)
dim(df_filter)
Created on 2022-02-21 by the reprex package (v2.0.1)