What is the appropriate use of tidyverse to create a new variable based on two or more variables?
# package library
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.2.2
#> Warning: package 'ggplot2' was built under R version 4.2.3
#> Warning: package 'tibble' was built under R version 4.2.3
#> Warning: package 'tidyr' was built under R version 4.2.2
#> Warning: package 'readr' was built under R version 4.2.2
#> Warning: package 'purrr' was built under R version 4.2.2
#> Warning: package 'dplyr' was built under R version 4.2.3
#> Warning: package 'stringr' was built under R version 4.2.2
#> Warning: package 'forcats' was built under R version 4.2.2
#> Warning: package 'lubridate' was built under R version 4.2.2
# sample data
set.seed(2)
# create
sample_data <- tibble(
id = as.character(seq(1, 25, 1)),
var_1 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE),
var_2 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE),
var_3 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE),
var_4 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE),
var_5 = sample(x = c(TRUE, FALSE), size = 25, replace = TRUE)
)
# view
sample_data
#> # A tibble: 25 × 6
#> id var_1 var_2 var_3 var_4 var_5
#> <chr> <lgl> <lgl> <lgl> <lgl> <lgl>
#> 1 1 TRUE TRUE FALSE FALSE TRUE
#> 2 2 TRUE FALSE TRUE TRUE FALSE
#> 3 3 FALSE TRUE FALSE FALSE TRUE
#> 4 4 FALSE FALSE TRUE FALSE TRUE
#> 5 5 FALSE FALSE TRUE FALSE FALSE
#> 6 6 FALSE TRUE TRUE FALSE TRUE
#> 7 7 TRUE FALSE FALSE TRUE FALSE
#> 8 8 TRUE TRUE TRUE FALSE TRUE
#> 9 9 TRUE FALSE TRUE TRUE FALSE
#> 10 10 FALSE FALSE TRUE FALSE TRUE
#> # ℹ 15 more rows
# create new variable where if any two of var_1:var_5 are TRUE, then TRUE
sample_data$two_True <- rowSums(sample_data[, ] == TRUE) > 1
# view
sample_data
#> # A tibble: 25 × 7
#> id var_1 var_2 var_3 var_4 var_5 two_True
#> <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
#> 1 1 TRUE TRUE FALSE FALSE TRUE TRUE
#> 2 2 TRUE FALSE TRUE TRUE FALSE TRUE
#> 3 3 FALSE TRUE FALSE FALSE TRUE TRUE
#> 4 4 FALSE FALSE TRUE FALSE TRUE TRUE
#> 5 5 FALSE FALSE TRUE FALSE FALSE FALSE
#> 6 6 FALSE TRUE TRUE FALSE TRUE TRUE
#> 7 7 TRUE FALSE FALSE TRUE FALSE TRUE
#> 8 8 TRUE TRUE TRUE FALSE TRUE TRUE
#> 9 9 TRUE FALSE TRUE TRUE FALSE TRUE
#> 10 10 FALSE FALSE TRUE FALSE TRUE TRUE
#> # ℹ 15 more rows
# create new variable where if any of var_1:var_5 are true, then TRUE
sample_data <- sample_data %>%
# use of rowwise, mutate, and if_any will detect if any are true
rowwise %>%
mutate(
any_true = if_any(.cols = starts_with("var_"), isTRUE)
) %>%
ungroup()
# but how to detect if two or more are true?
# view
sample_data
#> # A tibble: 25 × 8
#> id var_1 var_2 var_3 var_4 var_5 two_True any_true
#> <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
#> 1 1 TRUE TRUE FALSE FALSE TRUE TRUE TRUE
#> 2 2 TRUE FALSE TRUE TRUE FALSE TRUE TRUE
#> 3 3 FALSE TRUE FALSE FALSE TRUE TRUE TRUE
#> 4 4 FALSE FALSE TRUE FALSE TRUE TRUE TRUE
#> 5 5 FALSE FALSE TRUE FALSE FALSE FALSE TRUE
#> 6 6 FALSE TRUE TRUE FALSE TRUE TRUE TRUE
#> 7 7 TRUE FALSE FALSE TRUE FALSE TRUE TRUE
#> 8 8 TRUE TRUE TRUE FALSE TRUE TRUE TRUE
#> 9 9 TRUE FALSE TRUE TRUE FALSE TRUE TRUE
#> 10 10 FALSE FALSE TRUE FALSE TRUE TRUE TRUE
#> # ℹ 15 more rows
Created on 2023-06-14 with reprex v2.0.2