Hello!
Is there a faster solution for these 2 questions without using row-wise? It worked well on the subset of the sample, but using the complete sample ( ~ 10 million rows) it's been loading for 3H +.
Thank you!
library(tidyverse)
#> Warning: package 'tibble' was built under R version 4.1.2
df <- data.frame(flag1 = c(FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE),
flag2 = c(FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE),
flag3 = c(TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE))
dfNew <- df |> rowwise() |>
mutate(AnyTrue = sum(c_across(flag1:flag3)) > 0,
OneTrue = sum(c_across(flag1:flag3)) == 1,
TwoTrue = sum(c_across(flag1:flag3)) == 2,
MoreThanTwo = sum(c_across(flag1:flag3)) > 2)
dfNew
library(tidyverse)
#> Warning: package 'tibble' was built under R version 4.1.2
df<- tribble(~A,~B,~C,~D,
"I123","I121","I1908","I129",
"I128","I123","I124","I109",
"I126","I1855","I129","I183",
"I121","I163","F121","I8773",
"I123","I129","I1563","I121",
"I129","I1665","I128", "F843",
"X","Y","Z","ZZ")
df <- df |> mutate(Row=row_number())
Long <- df |> pivot_longer(cols = A:D,names_to = "name")
Long <- Long |> group_by(Row) |>
summarize(C1 = any(str_detect(value,"I123|I128")),
C2 = any(str_detect(value,"I121")),
C3 = any(str_detect(value,"I129"))) |>
rowwise() |>
mutate(WHICH=which(c_across(C1:C3))[1])
FINAL <- inner_join(df,Long,by="Row")
FINAL