filter var and then calculate like

I have this base

# A tibble: 15 x 5
      id A     B     C     vars 
   <int> <lgl> <lgl> <lgl> <chr>
 1     1 FALSE TRUE  FALSE team5
 2     2 TRUE  FALSE TRUE  team9
 3     3 FALSE TRUE  FALSE team7
 4     4 FALSE FALSE FALSE team2
 5     5 TRUE  TRUE  TRUE  team8
 6     6 FALSE TRUE  FALSE team2
 7     7 TRUE  TRUE  FALSE team2
 8     8 FALSE FALSE FALSE team6
 9     9 TRUE  TRUE  TRUE  team6
10    10 FALSE FALSE FALSE team7
11    11 TRUE  FALSE TRUE  team6
12    12 TRUE  TRUE  FALSE team7
13    13 FALSE TRUE  TRUE  team9
14    14 TRUE  FALSE TRUE  team1
15    15 FALSE FALSE FALSE team4

and I need to make a loop where I filter by variables A,B and C and then calculate how many unique values of vars are present after filtering.

For example, filtering by variable A would look like this

# A tibble: 7 x 5
     id A     B     C     vars 
  <int> <lgl> <lgl> <lgl> <chr>
1     2 TRUE  FALSE TRUE  team9
2     5 TRUE  TRUE  TRUE  team8
3     7 TRUE  TRUE  FALSE team2
4     9 TRUE  TRUE  TRUE  team6
5    11 TRUE  FALSE TRUE  team6
6    12 TRUE  TRUE  FALSE team7
7    14 TRUE  FALSE TRUE  team1

and the answer is 75% (8 teams in total and 6 are presents filtering by A)

In the case of B the answer is 75% and in C it is 50%.

I need to do it in loop and that it is applicable to bigger bases. That's why I put this tibble as an example.

Is this what you want?

library(tidyverse)

set.seed(1)
df <- tibble(
  a = sample(0:1, size = 10, replace = T),
  b = sample(0:1, size = 10, replace = T),
  c = sample(0:1, size = 10, replace = T)) %>% 
  mutate_all(as.logical) %>% 
  mutate(id = 1:10,
         vars = c("team1","team2","team3","team4","team5","team6","team7","team8","team9","team10"))


df_long <- df %>% 
  pivot_longer(cols = c("a", "b", "c")) 

tot <- length(unique(df_long$vars))
ft <- unique(df_long$name)

for(i in ft){
  
  a <- df_long %>% 
    filter(name == i & value == T) %>% 
    unique() %>% 
    count() %>% 
    mutate(`%` = n/tot*100)
  
  print(a)

}

Doesn't work. Doesn't calculate the unique teams.
For example, when I filter by A the answer is 75% (6/8)/

> # A tibble: 7 x 5
>      id A     B     C     vars 
>   <int> <lgl> <lgl> <lgl> <chr>
> 1     2 TRUE  FALSE TRUE  team9
> 2     5 TRUE  TRUE  TRUE  team8
> 3     7 TRUE  TRUE  FALSE team2
> 4     9 TRUE  TRUE  TRUE  team6
> 5    11 TRUE  FALSE TRUE  team6
> 6    12 TRUE  TRUE  FALSE team7
> 7    14 TRUE  FALSE TRUE  team1

But the loops returns this:

# A tibble: 1 x 2
      n   `%`
  <int> <dbl>
1     7  87.5

Now should works:

library(tidyverse)

df <- tribble(
  ~id, ~A,     ~B,     ~C,     ~vars,
  
  1, FALSE, TRUE,  FALSE, "team5",
  2, TRUE,  FALSE, TRUE,  "team9",
  3, FALSE,TRUE,  FALSE, "team7",
  4, FALSE, FALSE, FALSE,"team2",
  5, TRUE,  TRUE,  TRUE,  "team8",
  6, FALSE, TRUE,  FALSE, "team2",
  7, TRUE,  TRUE,  FALSE,"team2",
  8, FALSE, FALSE, FALSE, "team6",
  9, TRUE,  TRUE,  TRUE,  "team6",
  10, FALSE, FALSE, FALSE, "team7",
  11, TRUE,  FALSE, TRUE,  "team6",
  12, TRUE,  TRUE,  FALSE, "team7",
  13, FALSE, TRUE,  TRUE,  "team9",
  14, TRUE,  FALSE, TRUE,  "team1",
  15, FALSE, FALSE, FALSE, "team4",
)


df_long <- df %>% 
  pivot_longer(cols = c("A", "B", "C")) 

tot <- length(unique(df_long$vars))
ft <- unique(df_long$name)

for(i in ft){
  
  a <- df_long %>% 
    filter(name == i & value == T) %>% 
    select(vars) %>% 
    unique() %>% 
    count() %>% 
    mutate(`%` = n/tot*100,
           team = i) %>% 
    relocate(team)
  
  print(a)
  
}

I fix it doing this:

df_long <- df %>% 
  pivot_longer(cols = c("A", "B", "C"))  %>%
  select(-1)

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.