How to create new variable depending on condition of two or more variables?

What is the appropriate use of tidyverse to create a new variable based on two or more variables?

# package library 
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.2.2
#> Warning: package 'ggplot2' was built under R version 4.2.3
#> Warning: package 'tibble' was built under R version 4.2.3
#> Warning: package 'tidyr' was built under R version 4.2.2
#> Warning: package 'readr' was built under R version 4.2.2
#> Warning: package 'purrr' was built under R version 4.2.2
#> Warning: package 'dplyr' was built under R version 4.2.3
#> Warning: package 'stringr' was built under R version 4.2.2
#> Warning: package 'forcats' was built under R version 4.2.2
#> Warning: package 'lubridate' was built under R version 4.2.2

# sample data 
set.seed(2)

# create
sample_data <- tibble(
  id = as.character(seq(1, 25, 1)),
  var_1 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE),
  var_2 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE),
  var_3 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE),
  var_4 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE),
  var_5 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE)
)

# view
sample_data
#> # A tibble: 25 × 6
#>    id    var_1 var_2 var_3 var_4 var_5
#>    <chr> <lgl> <lgl> <lgl> <lgl> <lgl>
#>  1 1     TRUE  TRUE  FALSE FALSE TRUE 
#>  2 2     TRUE  FALSE TRUE  TRUE  FALSE
#>  3 3     FALSE TRUE  FALSE FALSE TRUE 
#>  4 4     FALSE FALSE TRUE  FALSE TRUE 
#>  5 5     FALSE FALSE TRUE  FALSE FALSE
#>  6 6     FALSE TRUE  TRUE  FALSE TRUE 
#>  7 7     TRUE  FALSE FALSE TRUE  FALSE
#>  8 8     TRUE  TRUE  TRUE  FALSE TRUE 
#>  9 9     TRUE  FALSE TRUE  TRUE  FALSE
#> 10 10    FALSE FALSE TRUE  FALSE TRUE 
#> # ℹ 15 more rows

# create new variable where if any two of var_1:var_5 are TRUE, then TRUE
sample_data$two_True <- rowSums(sample_data[, ] == TRUE) > 1

# view
sample_data
#> # A tibble: 25 × 7
#>    id    var_1 var_2 var_3 var_4 var_5 two_True
#>    <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>   
#>  1 1     TRUE  TRUE  FALSE FALSE TRUE  TRUE    
#>  2 2     TRUE  FALSE TRUE  TRUE  FALSE TRUE    
#>  3 3     FALSE TRUE  FALSE FALSE TRUE  TRUE    
#>  4 4     FALSE FALSE TRUE  FALSE TRUE  TRUE    
#>  5 5     FALSE FALSE TRUE  FALSE FALSE FALSE   
#>  6 6     FALSE TRUE  TRUE  FALSE TRUE  TRUE    
#>  7 7     TRUE  FALSE FALSE TRUE  FALSE TRUE    
#>  8 8     TRUE  TRUE  TRUE  FALSE TRUE  TRUE    
#>  9 9     TRUE  FALSE TRUE  TRUE  FALSE TRUE    
#> 10 10    FALSE FALSE TRUE  FALSE TRUE  TRUE    
#> # ℹ 15 more rows

# create new variable where if any of var_1:var_5 are true, then TRUE
sample_data <- sample_data %>%
  # use of rowwise, mutate, and if_any will detect if any are true
  rowwise %>%
  mutate(
    any_true = if_any(.cols = starts_with("var_"), isTRUE)
  ) %>%
  ungroup()
  # but how to detect if two or more are true?
  
# view
sample_data
#> # A tibble: 25 × 8
#>    id    var_1 var_2 var_3 var_4 var_5 two_True any_true
#>    <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>    <lgl>   
#>  1 1     TRUE  TRUE  FALSE FALSE TRUE  TRUE     TRUE    
#>  2 2     TRUE  FALSE TRUE  TRUE  FALSE TRUE     TRUE    
#>  3 3     FALSE TRUE  FALSE FALSE TRUE  TRUE     TRUE    
#>  4 4     FALSE FALSE TRUE  FALSE TRUE  TRUE     TRUE    
#>  5 5     FALSE FALSE TRUE  FALSE FALSE FALSE    TRUE    
#>  6 6     FALSE TRUE  TRUE  FALSE TRUE  TRUE     TRUE    
#>  7 7     TRUE  FALSE FALSE TRUE  FALSE TRUE     TRUE    
#>  8 8     TRUE  TRUE  TRUE  FALSE TRUE  TRUE     TRUE    
#>  9 9     TRUE  FALSE TRUE  TRUE  FALSE TRUE     TRUE    
#> 10 10    FALSE FALSE TRUE  FALSE TRUE  TRUE     TRUE    
#> # ℹ 15 more rows

Created on 2023-06-14 with reprex v2.0.2

d <- data.frame(
  id = as.character(seq(1, 25, 1)),
  var_1 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE),
  var_2 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE),
  var_3 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE),
  var_4 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE),
  var_5 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE)
)

d$two_true <- FALSE

twos <- which(rowSums(d[,-1]) == 2)
d[twos,"two_true"] <- TRUE
d
#>    id var_1 var_2 var_3 var_4 var_5 two_true
#> 1   1  TRUE  TRUE FALSE  TRUE FALSE    FALSE
#> 2   2 FALSE FALSE  TRUE  TRUE  TRUE    FALSE
#> 3   3  TRUE  TRUE FALSE FALSE FALSE     TRUE
#> 4   4 FALSE  TRUE FALSE FALSE  TRUE     TRUE
#> 5   5  TRUE  TRUE  TRUE  TRUE FALSE    FALSE
#> 6   6  TRUE  TRUE  TRUE  TRUE FALSE    FALSE
#> 7   7 FALSE  TRUE FALSE  TRUE FALSE     TRUE
#> 8   8 FALSE FALSE  TRUE FALSE FALSE    FALSE
#> 9   9  TRUE FALSE  TRUE FALSE  TRUE    FALSE
#> 10 10 FALSE FALSE FALSE  TRUE FALSE    FALSE
#> 11 11 FALSE  TRUE FALSE  TRUE FALSE     TRUE
#> 12 12 FALSE FALSE FALSE FALSE  TRUE    FALSE
#> 13 13 FALSE FALSE  TRUE FALSE  TRUE     TRUE
#> 14 14  TRUE FALSE  TRUE  TRUE FALSE    FALSE
#> 15 15  TRUE FALSE  TRUE FALSE  TRUE    FALSE
#> 16 16 FALSE FALSE FALSE FALSE  TRUE    FALSE
#> 17 17 FALSE FALSE  TRUE FALSE  TRUE     TRUE
#> 18 18  TRUE  TRUE FALSE FALSE  TRUE    FALSE
#> 19 19  TRUE FALSE FALSE FALSE  TRUE     TRUE
#> 20 20  TRUE FALSE FALSE FALSE  TRUE     TRUE
#> 21 21 FALSE FALSE  TRUE  TRUE  TRUE    FALSE
#> 22 22 FALSE FALSE FALSE  TRUE FALSE    FALSE
#> 23 23 FALSE  TRUE  TRUE FALSE FALSE     TRUE
#> 24 24 FALSE FALSE  TRUE  TRUE  TRUE    FALSE
#> 25 25  TRUE FALSE FALSE FALSE FALSE    FALSE
d
#>    id var_1 var_2 var_3 var_4 var_5 two_true
#> 1   1  TRUE  TRUE FALSE  TRUE FALSE    FALSE
#> 2   2 FALSE FALSE  TRUE  TRUE  TRUE    FALSE
#> 3   3  TRUE  TRUE FALSE FALSE FALSE     TRUE
#> 4   4 FALSE  TRUE FALSE FALSE  TRUE     TRUE
#> 5   5  TRUE  TRUE  TRUE  TRUE FALSE    FALSE
#> 6   6  TRUE  TRUE  TRUE  TRUE FALSE    FALSE
#> 7   7 FALSE  TRUE FALSE  TRUE FALSE     TRUE
#> 8   8 FALSE FALSE  TRUE FALSE FALSE    FALSE
#> 9   9  TRUE FALSE  TRUE FALSE  TRUE    FALSE
#> 10 10 FALSE FALSE FALSE  TRUE FALSE    FALSE
#> 11 11 FALSE  TRUE FALSE  TRUE FALSE     TRUE
#> 12 12 FALSE FALSE FALSE FALSE  TRUE    FALSE
#> 13 13 FALSE FALSE  TRUE FALSE  TRUE     TRUE
#> 14 14  TRUE FALSE  TRUE  TRUE FALSE    FALSE
#> 15 15  TRUE FALSE  TRUE FALSE  TRUE    FALSE
#> 16 16 FALSE FALSE FALSE FALSE  TRUE    FALSE
#> 17 17 FALSE FALSE  TRUE FALSE  TRUE     TRUE
#> 18 18  TRUE  TRUE FALSE FALSE  TRUE    FALSE
#> 19 19  TRUE FALSE FALSE FALSE  TRUE     TRUE
#> 20 20  TRUE FALSE FALSE FALSE  TRUE     TRUE
#> 21 21 FALSE FALSE  TRUE  TRUE  TRUE    FALSE
#> 22 22 FALSE FALSE FALSE  TRUE FALSE    FALSE
#> 23 23 FALSE  TRUE  TRUE FALSE FALSE     TRUE
#> 24 24 FALSE FALSE  TRUE  TRUE  TRUE    FALSE
#> 25 25  TRUE FALSE FALSE FALSE FALSE    FALSE

Created on 2023-06-14 with reprex v2.0.2

1 Like

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.