unify unique values for several groups of columns automatically

juandmaz · January 25, 2024, 12:19am

If I have this data.base:

id x_1   x_2   x_3   y_1   y_2   y_3   z_1   z_2   z_3   e_1   e_2   e_3   h_1   h_2   h_3   g_1   g_2  
  <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1     1 a     NA    NA    c     NA    NA    a     NA    NA    b     NA    NA    d     NA    NA    NA    NA   
2     2 a     a     NA    b     NA    NA    a     b     NA    c     c     NA    a     a     NA    a     NA   
3     3 d     d     d     a     a     a     c     c     c     d     d     d     b     c     a     NA    d    
4     4 c     NA    NA    NA    NA    NA    d     NA    NA    a     NA    NA    a     NA    NA    a     NA   
5     5 c     c     c     d     d     NA    b     a     c     a     a     a     b     b     b     b     b    
6     6 b     b     NA    a     a     NA    a     a     NA    b     b     NA    c     c     NA    NA    b

and I need to create for all columns except for id and x_1,x_2 and x_3, a column that encompasses the unique values of the variables, how can I do it?
This works

df %>%
    mutate(g = coalesce(g_1, g_2, g_3),
           z= coalesce(z_1, z_2, z_3),
           e= coalesce(e_1, e_2, e_3),
           h= coalesce(h_1, h_2, h_3),
           y= coalesce(y_1, y_2, y_3)) %>%
    select(id, contains("x"),g,z,e,h,y)

  id x_1   x_2   x_3   g     z     e     h     y    
  <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1     1 a     NA    NA    NA    a     b     d     c    
2     2 a     a     NA    a     a     c     a     b    
3     3 d     d     d     d     c     d     b     a    
4     4 c     NA    NA    a     d     a     a     NA   
5     5 c     c     c     b     b     a     b     d    
6     6 b     b     NA    b     a     b     c     a

Can it be done automatically or do I have to do each group of variables one by one?
this is a small example, I have no problem to do it with 5 variables but I really have to do it for 50 variables

nirgrahamuk · January 25, 2024, 10:13am

library(tidyverse)

(small_df <- data.frame(
  g_1 = rep(NA, 3),
  g_2 = c(rep(NA, 2), 1),
  g_3 = 3:1,
  h_1 = rep(NA, 3),
  h_2 = c(rep(NA, 2), 1),
  h_3 = 4:2
))

# what you do

mutate(small_df,
  g = coalesce(g_1, g_2, g_3),
  h = coalesce(h_1, h_2, h_3)
)

# what you could do
(vargroups <- enframe(names(small_df)) |>
  tidyr::separate_wider_delim(
    cols = "value",
    delim = "_",
    names = c("prefix", "suffix"),
    cols_remove = FALSE
  ) |> summarise(clist = list(value),
                 .by = prefix) |> 
    deframe())


(to_add <- imap_dfc(vargroups,\(x,y)transmute(small_df,
                         {{y}}:=coalesce(!!!syms(x)))))

(result <- bind_cols(small_df,to_add))

system · March 7, 2024, 10:14am

This topic was automatically closed 42 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.