Conditional column based on a series of columns

Hi RStudio community, 
I would like to create two new column given below dataset if any of you can help me. Thanks
# Create column A= "Yes"  if any of the rows (Trt1-Trt4) are not missing, otherwise A= "No"
# Create column B= "Yes" if any of the rows (Trt1-Trt4) contains D, otherwise B= "No" if not missing.

data <- data.frame(Id = c(1L,	1L,	1L,	1L,	1L,	2L,	2L,	2L,	2L,	3L,	3L,	3L,	3L,	3L,	4L,	4L,	4L,	4L,	4L),
                   date1 = c("2020-01-20",	"2020-02-20",	"2020-03-20",	"2020-04-20",	"2020-05-10",	"2020-01-20",	"2020-02-15",	"2020-03-20",	"2020-04-15",	"2020-05-10",	"2020-07-15",	"2020-08-15",	"2020-10-25",	"2020-11-10",	"2020-04-10",	"2020-04-10",	"2020-08-15",	"2020-10-25",	"2020-10-27"),
                   date2 = c("2020-01-20",	"2020-03-25",	"2020-03-20",	"2020-05-15",	"2020-06-12",	"2020-02-20",	"2020-03-20",	"2020-04-22",	"2020-05-15",	"2020-06-12",	"2020-08-20",	"2020-09-22",	"2020-11-15",	"2021-01-12",	"2020-05-12",	"2020-08-20",	"2020-09-22",	"2020-11-15",	"2020-10-29"),
                   date3 = c("2020-02-20",	"2020-04-25",	"2020-03-29",	"2020-06-15",	"2020-06-22",	"2020-03-20",	"2020-04-20",	"2020-05-22",	"2020-06-15",	"2020-06-22",	"2020-08-30",	"2020-09-27",	"2020-11-19",	"2021-01-28",	"2020-05-27",	"2020-08-26",	"2020-09-28",	"2020-11-18",	"2020-111-27"),
                   date4 = c("2020-03-20",	"2020-05-25",	"2020-03-30",	"2020-07-15",	"2020-06-29",	"2020-03-25",	"2020-04-27",	"2020-05-28",	"2020-06-19",	"2020-06-28",	"2020-09-30",	"2020-10-27",	"2020-11-29",	"2021-01-28",	"2020-06-27",	"2020-09-26",	"2020-09-28",	"2020-12-18",	"2020-111-28"),
                   Trt1 = factor(c("A",	"B",	"C",	"A",	"D",	" ",	"B",	"C",	"D",	"A",	"B",	" ",	"C",	"D",	"D",	"B",	"C",	"C",	"D")),
                   Trt2 = factor(c("A",	"D",	"A",	" ",	"D",	" ",	"A",	" ",	"A",	"B",	"B",	" ",	"A",	"A",	"A",	"C",	"A",	"A",	"B")),
                   Trt3 = factor(c("A",	"C",	"B",	"B",	"D",	" ",	"C",	"A",	"B",	"D",	"B",	" ",	"C",	"B",	"D",	"B",	"B",	"C",	"D")),
                   Trt4 = factor(c("C",	"A",	"B",	"A",	"D",	" ",	" ",	"C",	"A",	" ",	"B",	" ",	"A",	"D",	"A",	"C",	"C",	"A",	"B")),
                   stringsAsFactors = FALSE)

Is this what you need?

library(dplyr)
data <- data.frame(Id = c(1L,   1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L),
                   date1 = c("2020-01-20",  "2020-02-20",   "2020-03-20",   "2020-04-20",   "2020-05-10",   "2020-01-20",   "2020-02-15",   "2020-03-20",   "2020-04-15",   "2020-05-10",   "2020-07-15",   "2020-08-15",   "2020-10-25",   "2020-11-10",   "2020-04-10",   "2020-04-10",   "2020-08-15",   "2020-10-25",   "2020-10-27"),
                   date2 = c("2020-01-20",  "2020-03-25",   "2020-03-20",   "2020-05-15",   "2020-06-12",   "2020-02-20",   "2020-03-20",   "2020-04-22",   "2020-05-15",   "2020-06-12",   "2020-08-20",   "2020-09-22",   "2020-11-15",   "2021-01-12",   "2020-05-12",   "2020-08-20",   "2020-09-22",   "2020-11-15",   "2020-10-29"),
                   date3 = c("2020-02-20",  "2020-04-25",   "2020-03-29",   "2020-06-15",   "2020-06-22",   "2020-03-20",   "2020-04-20",   "2020-05-22",   "2020-06-15",   "2020-06-22",   "2020-08-30",   "2020-09-27",   "2020-11-19",   "2021-01-28",   "2020-05-27",   "2020-08-26",   "2020-09-28",   "2020-11-18",   "2020-111-27"),
                   date4 = c("2020-03-20",  "2020-05-25",   "2020-03-30",   "2020-07-15",   "2020-06-29",   "2020-03-25",   "2020-04-27",   "2020-05-28",   "2020-06-19",   "2020-06-28",   "2020-09-30",   "2020-10-27",   "2020-11-29",   "2021-01-28",   "2020-06-27",   "2020-09-26",   "2020-09-28",   "2020-12-18",   "2020-111-28"),
                   Trt1 = factor(c("A", "B",    "C",    "A",    "D",    " ",    "B",    "C",    "D",    "A",    "B",    " ",    "C",    "D",    "D",    "B",    "C",    "C",    "D")),
                   Trt2 = factor(c("A", "D",    "A",    " ",    "D",    " ",    "A",    " ",    "A",    "B",    "B",    " ",    "A",    "A",    "A",    "C",    "A",    "A",    "B")),
                   Trt3 = factor(c("A", "C",    "B",    "B",    "D",    " ",    "C",    "A",    "B",    "D",    "B",    " ",    "C",    "B",    "D",    "B",    "B",    "C",    "D")),
                   Trt4 = factor(c("C", "A",    "B",    "A",    "D",    " ",    " ",    "C",    "A",    " ",    "B",    " ",    "A",    "D",    "A",    "C",    "C",    "A",    "B")),
                   stringsAsFactors = FALSE)

data2 <- data |> rowwise() |> 
  mutate(A = ifelse(any(c_across(Trt1:Trt4) != " "), "Yes", "No"),
         B = ifelse(any(c_across(Trt1:Trt4) == "D"), "Yes", "No"))
data2
#> # A tibble: 19 × 11
#> # Rowwise: 
#>       Id date1      date2      date3   date4 Trt1  Trt2  Trt3  Trt4  A     B    
#>    <int> <chr>      <chr>      <chr>   <chr> <fct> <fct> <fct> <fct> <chr> <chr>
#>  1     1 2020-01-20 2020-01-20 2020-0… 2020… "A"   "A"   "A"   "C"   Yes   No   
#>  2     1 2020-02-20 2020-03-25 2020-0… 2020… "B"   "D"   "C"   "A"   Yes   Yes  
#>  3     1 2020-03-20 2020-03-20 2020-0… 2020… "C"   "A"   "B"   "B"   Yes   No   
#>  4     1 2020-04-20 2020-05-15 2020-0… 2020… "A"   " "   "B"   "A"   Yes   No   
#>  5     1 2020-05-10 2020-06-12 2020-0… 2020… "D"   "D"   "D"   "D"   Yes   Yes  
#>  6     2 2020-01-20 2020-02-20 2020-0… 2020… " "   " "   " "   " "   No    No   
#>  7     2 2020-02-15 2020-03-20 2020-0… 2020… "B"   "A"   "C"   " "   Yes   No   
#>  8     2 2020-03-20 2020-04-22 2020-0… 2020… "C"   " "   "A"   "C"   Yes   No   
#>  9     2 2020-04-15 2020-05-15 2020-0… 2020… "D"   "A"   "B"   "A"   Yes   Yes  
#> 10     3 2020-05-10 2020-06-12 2020-0… 2020… "A"   "B"   "D"   " "   Yes   Yes  
#> 11     3 2020-07-15 2020-08-20 2020-0… 2020… "B"   "B"   "B"   "B"   Yes   No   
#> 12     3 2020-08-15 2020-09-22 2020-0… 2020… " "   " "   " "   " "   No    No   
#> 13     3 2020-10-25 2020-11-15 2020-1… 2020… "C"   "A"   "C"   "A"   Yes   No   
#> 14     3 2020-11-10 2021-01-12 2021-0… 2021… "D"   "A"   "B"   "D"   Yes   Yes  
#> 15     4 2020-04-10 2020-05-12 2020-0… 2020… "D"   "A"   "D"   "A"   Yes   Yes  
#> 16     4 2020-04-10 2020-08-20 2020-0… 2020… "B"   "C"   "B"   "C"   Yes   No   
#> 17     4 2020-08-15 2020-09-22 2020-0… 2020… "C"   "A"   "B"   "C"   Yes   No   
#> 18     4 2020-10-25 2020-11-15 2020-1… 2020… "C"   "A"   "C"   "A"   Yes   No   
#> 19     4 2020-10-27 2020-10-29 2020-1… 2020… "D"   "B"   "D"   "B"   Yes   Yes

Created on 2023-02-13 with reprex v2.0.2

Thank you very much for quick response and support. If you please only add the missing statement for column B. For example row 6 and 12 should be NA. I mean B=NA if the rows for Trt1:Trt4 are missing or blank. Thank you again.

data2 <- data |> rowwise() |> 
  mutate(A = ifelse(any(c_across(Trt1:Trt4) != " "), "Yes", "No"),
          B = case_when(
            any(c_across(Trt1:Trt4) == "D") ~ "Yes",
            all(c_across(Trt1:Trt4) == " ") ~ NA_character_,
            TRUE ~ "No"
         ))

That is great for column B, I want same for column A ,and add a new column C=Yes if (Trt1-Trt4) contains D or C, otherwise C=No and missing or blank=NA. Appreciate all your help. Thanks

Please try using what I have written as a template for meeting the additional requirements. If you get stuck, post the code you have written and we can go from there.

1 Like

I would like to run the below code, but no luck with running it. Missing or blanks for all columns should be NA, but in column A , (Trt1-Trt4) all row missing coded as No . Also I would like to have a new column C=Yes if (Trt1-Trt4) contains D or C. Appreciate any help. Thanks

data3 <- data |> rowwise() |> 
  mutate(A = ifelse(any(c_across(Trt1:Trt4) != " "), "Yes", all(c_across(Trt1:Trt4) == " ") ~ NA_character_,
                    TRUE ~ "No"),
         B = case_when(
           any(c_across(Trt1:Trt4) == "D") ~ "Yes",
           all(c_across(Trt1:Trt4) == " ") ~ NA_character_,
           TRUE ~ "No"
         ),
         C = case_when(
           any(c_across(Trt1:Trt4) == c("D"|"C") ~ "Yes",
           all(c_across(Trt1:Trt4) == " ") ~ NA_character_,
           TRUE ~ "No"
         ))

Does this give you what you want?

library(dplyr)
data <- data.frame(Id = c(1L,   1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L),
                   date1 = c("2020-01-20",  "2020-02-20",   "2020-03-20",   "2020-04-20",   "2020-05-10",   "2020-01-20",   "2020-02-15",   "2020-03-20",   "2020-04-15",   "2020-05-10",   "2020-07-15",   "2020-08-15",   "2020-10-25",   "2020-11-10",   "2020-04-10",   "2020-04-10",   "2020-08-15",   "2020-10-25",   "2020-10-27"),
                   date2 = c("2020-01-20",  "2020-03-25",   "2020-03-20",   "2020-05-15",   "2020-06-12",   "2020-02-20",   "2020-03-20",   "2020-04-22",   "2020-05-15",   "2020-06-12",   "2020-08-20",   "2020-09-22",   "2020-11-15",   "2021-01-12",   "2020-05-12",   "2020-08-20",   "2020-09-22",   "2020-11-15",   "2020-10-29"),
                   date3 = c("2020-02-20",  "2020-04-25",   "2020-03-29",   "2020-06-15",   "2020-06-22",   "2020-03-20",   "2020-04-20",   "2020-05-22",   "2020-06-15",   "2020-06-22",   "2020-08-30",   "2020-09-27",   "2020-11-19",   "2021-01-28",   "2020-05-27",   "2020-08-26",   "2020-09-28",   "2020-11-18",   "2020-111-27"),
                   date4 = c("2020-03-20",  "2020-05-25",   "2020-03-30",   "2020-07-15",   "2020-06-29",   "2020-03-25",   "2020-04-27",   "2020-05-28",   "2020-06-19",   "2020-06-28",   "2020-09-30",   "2020-10-27",   "2020-11-29",   "2021-01-28",   "2020-06-27",   "2020-09-26",   "2020-09-28",   "2020-12-18",   "2020-111-28"),
                   Trt1 = factor(c("A", "B",    "C",    "A",    "D",    " ",    "B",    "C",    "D",    "A",    "B",    " ",    "C",    "D",    "D",    "B",    "C",    "C",    "D")),
                   Trt2 = factor(c("A", "D",    "A",    " ",    "D",    " ",    "A",    " ",    "A",    "B",    "B",    " ",    "A",    "A",    "A",    "C",    "A",    "A",    "B")),
                   Trt3 = factor(c("A", "C",    "B",    "B",    "D",    " ",    "C",    "A",    "B",    "D",    "B",    " ",    "C",    "B",    "D",    "B",    "B",    "C",    "D")),
                   Trt4 = factor(c("C", "A",    "B",    "A",    "D",    " ",    " ",    "C",    "A",    " ",    "B",    " ",    "A",    "D",    "A",    "C",    "C",    "A",    "B")),
                   stringsAsFactors = FALSE)

data3 <- data |> rowwise() |> 
  mutate(A = case_when(any(c_across(Trt1:Trt4) != " ")~ "Yes", 
                      TRUE ~ NA_character_),
         B = case_when(
           any(c_across(Trt1:Trt4) == "D") ~ "Yes",
           all(c_across(Trt1:Trt4) == " ") ~ NA_character_,
           TRUE ~ "No"
         ),
         C = case_when(
           any(c_across(Trt1:Trt4) %in% c("D","C")) ~ "Yes",
               all(c_across(Trt1:Trt4) == " ") ~ NA_character_,
               TRUE ~ "No"
           ))

Hi FJCC. I highly appreciate your time and help. Yes, it is great. My last question in this posting is how to address NA instead of blanks if we have in the same dataset below. I tried the below code, but I did not get the expected result. Thanks

data <- data.frame(Id = c(1L,	1L,	1L,	1L,	1L,	2L,	2L,	2L,	2L,	3L,	3L,	3L,	3L,	3L,	4L,	4L,	4L,	4L,	4L),
                   date1 = c("2020-01-20",	"2020-02-20",	"2020-03-20",	"2020-04-20",	"2020-05-10",	"2020-01-20",	"2020-02-15",	"2020-03-20",	"2020-04-15",	"2020-05-10",	"2020-07-15",	"2020-08-15",	"2020-10-25",	"2020-11-10",	"2020-04-10",	"2020-04-10",	"2020-08-15",	"2020-10-25",	"2020-10-27"),
                   date2 = c("2020-01-20",	"2020-03-25",	"2020-03-20",	"2020-05-15",	"2020-06-12",	"2020-02-20",	"2020-03-20",	"2020-04-22",	"2020-05-15",	"2020-06-12",	"2020-08-20",	"2020-09-22",	"2020-11-15",	"2021-01-12",	"2020-05-12",	"2020-08-20",	"2020-09-22",	"2020-11-15",	"2020-10-29"),
                   date3 = c("2020-02-20",	"2020-04-25",	"2020-03-29",	"2020-06-15",	"2020-06-22",	"2020-03-20",	"2020-04-20",	"2020-05-22",	"2020-06-15",	"2020-06-22",	"2020-08-30",	"2020-09-27",	"2020-11-19",	"2021-01-28",	"2020-05-27",	"2020-08-26",	"2020-09-28",	"2020-11-18",	"2020-111-27"),
                   date4 = c("2020-03-20",	"2020-05-25",	"2020-03-30",	"2020-07-15",	"2020-06-29",	"2020-03-25",	"2020-04-27",	"2020-05-28",	"2020-06-19",	"2020-06-28",	"2020-09-30",	"2020-10-27",	"2020-11-29",	"2021-01-28",	"2020-06-27",	"2020-09-26",	"2020-09-28",	"2020-12-18",	"2020-111-28"),
                   Trt1 = factor(c("A",	"B",	"C",	"A",	"D",	NA,	"B",	"C",	"D",	"A",	"B",	NA,	"C",	"D",	"D",	"B",	"C",	"C",	"D")),
                   Trt2 = factor(c("A",	"D",	"A",	NA,	"D",	NA,	"A",	NA,	"A",	"B",	"B",	NA,	"A",	"A",	"A",	"C",	"A",	"A",	"B")),
                   Trt3 = factor(c("A",	"C",	"B",	"B",	"D",	NA,	"C",	"A",	"B",	"D",	"B",	NA,	"C",	"B",	"D",	"B",	"B",	"C",	"D")),
                   Trt4 = factor(c("C",	"A",	"B",	"A",	"D",	NA,	NA,	"C",	"A",	NA,	"B",	NA,	"A",	"D",	"A",	"C",	"C",	"A",	"B")),
                   stringsAsFactors = FALSE)
# How to address NA in the dataset:
data2 <- data |> rowwise() |> 
  mutate(A = case_when(any(c_across(Trt1:Trt4) != NA)~ "Yes", 
                       TRUE ~ NA_character_),
         B = case_when(
           any(c_across(Trt1:Trt4) == "D") ~ "Yes",
           all(c_across(Trt1:Trt4) == NA) ~ NA_character_,
           TRUE ~ "No"
         ),
         C = case_when(
           any(c_across(Trt1:Trt4) %in% c("D","C")) ~ "Yes",
           all(c_across(Trt1:Trt4) == NA) ~ NA_character_,
           TRUE ~ "No"
         ))

Functions like all(), any(), and if() are looking for a TRUE value. Comparisons using == will not return TRUE when used with NA; they will return NA. To test for NA, use the is.na() function.

x <- NA
x
[1] NA
x == NA
[1] NA
is.na(x)
[1] TRUE

Ok, what is the best way to address this NA issue. Can we run something like below code. Thanks

data2 <- data |> rowwise() |> 
  mutate(A = case_when(any(c_across(!is.na(Trt1:Trt4)))~ "Yes", 
         B = case_when(
           any(c_across(Trt1:Trt4) == "D") ~ "Yes",
           all(c_across(!is.na(Trt1:Trt4))) ~ "No",
         ),
         C = case_when(
           any(c_across(Trt1:Trt4) %in% c("D","C")) ~ "Yes",
           all(c_across(!is.na(Trt1:Trt4))) ~ "No"
         ))

c_across() returns a vector when you give it column names and you should then pass that vector to the function that calculates the quantity for that row. So, use

!is.na(c_across(Trt1:Trt4))

instead of

c_across(!is.na(Trt1:Trt4))

Ok, Please check if you can help me with running this code. Thanks

data2 <- data |> rowwise() |> 
  mutate(A = case_when((!is.na(c_across(Trt1:Trt4)))~ "Yes", 
                        ~ NA_character_),
         B = case_when(
           any(c_across(Trt1:Trt4) == "D") ~ "Yes",
           all((!is.na(c_across(Trt1:Trt4)))) ~ NA_character_,
            ~ "No"
         ),
         C = case_when(
           any(c_across(Trt1:Trt4) %in% c("D","C")) ~ "Yes",
           all((!is.na(c_across(Trt1:Trt4)))) ~ NA_character_,
            ~ "No"
         ))

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.