Hi,
I am trying to filter a data.frame
containing values across all columns in R followed by printing (subsetting the dataframe) and counting the values passing the filtering cut-off. Basically, I am interested in filtering, subsetting dataframe <= 0.9
and then count no. of values across all columns passing the cut-off. I have provided the example datasets and function.
dput(Data)
structure(list(`S 1` = c(0.883643926, 0.248614376, 0.518091486,
0.535221236, 0.415450436, -0.940323826, -0.723796576, -0.824290276,
NA, -0.806255146, -0.747521326, NA, 3.20247786, 1.10402434, 1.005757776
), `S 2` = c(1.005757776, 1.005757776, 4.51601548, 3, 7.78620408,
-0.706674058, -0.572657338, -0.686018538, -0.514713298, -0.532390248,
-0.462136378, -0.512892468, 1, 1.5, 2.5), `S 3` = c(7.798089,
9.2058061, 5.5408169, 1.52159119, 2.63042701, NA, 1.3857699,
-0.152939869, -0.050295909, -0.337659179, -0.058902499, -0.072916919,
-0.410700949, -0.079817359, -0.313859499), `S 4` = c(1.41324408,
9.6038562, 1.71087962, 2.95921938, 4.82199712, 3.17140358, 1.15931318,
NA, 1.58997338, 4.76858598, NA, -0.002674678, -0.235496858, 0.065630452,
-0.175745228), `S 5` = c(-0.167945369, 1.41324408, 1.41324408,
0.741171721, 2.494610191, -0.532343489, -0.358607189, -0.442774239,
-0.103589789, 0.213156301, -0.022826199, -0.096645979, 1.215920941,
3.377354481, 0.033402621)), class = "data.frame", row.names = c("Entity_1",
"Entity_2", "Entity_3", "Entity_4", "Entity_5", "Entity_6",
"Entity_7", "Entity_8", "Entity_9", "Entity_10", "Entity_11",
"Entity_12", "Entity_13", "Entity_14", "Entity_15"))
#> S 1 S 2 S 3 S 4 S 5
#> Entity_1 0.8836439 1.0057578 7.79808900 1.413244080 -0.16794537
#> Entity_2 0.2486144 1.0057578 9.20580610 9.603856200 1.41324408
#> Entity_3 0.5180915 4.5160155 5.54081690 1.710879620 1.41324408
#> Entity_4 0.5352212 3.0000000 1.52159119 2.959219380 0.74117172
#> Entity_5 0.4154504 7.7862041 2.63042701 4.821997120 2.49461019
#> Entity_6 -0.9403238 -0.7066741 NA 3.171403580 -0.53234349
#> Entity_7 -0.7237966 -0.5726573 1.38576990 1.159313180 -0.35860719
#> Entity_8 -0.8242903 -0.6860185 -0.15293987 NA -0.44277424
#> Entity_9 NA -0.5147133 -0.05029591 1.589973380 -0.10358979
#> Entity_10 -0.8062551 -0.5323902 -0.33765918 4.768585980 0.21315630
#> Entity_11 -0.7475213 -0.4621364 -0.05890250 NA -0.02282620
#> Entity_12 NA -0.5128925 -0.07291692 -0.002674678 -0.09664598
#> Entity_13 3.2024779 1.0000000 -0.41070095 -0.235496858 1.21592094
#> Entity_14 1.1040243 1.5000000 -0.07981736 0.065630452 3.37735448
#> Entity_15 1.0057578 2.5000000 -0.31385950 -0.175745228 0.03340262
my_filter <- function(xx) {
xx <- xx[!is.na(xx)]
sum(ifelse((xx <= 0.9), TRUE, FALSE))
}
# Apply on all columns with "S" in the column name
library(tidyverse)
Data %>%
summarise(across(contains("S"), my_filter)) %>%
as.data.frame(row.names="counts") -> df_filter
dput(df_filter)
structure(list(`S 1` = 10L, `S 2` = 7L, `S 3` = 8L, `S 4` = 4L,
`S 5` = 10L), class = "data.frame", row.names = "counts")
#> S 1 S 2 S 3 S 4 S 5
#> counts 10 7 8 4 10
# Print/subset all the values from the table passing the cut-off
What function or formula to be used for printing/saving the values..
Created on 2022-06-02 by the reprex package (v2.0.1)
Thank you,
Toufiq