Can't join 2 dfs like I need it

I have this 2 databases:

df1

# A tibble: 15 x 6
      id match  var1  var2  var3 var4 
   <int> <chr> <int> <int> <int> <chr>
 1     1 B         5    10     1 T    
 2     2 B         3     5     8 F    
 3     3 B         7     7     8 NA   
 4     4 A        10     5     8 NA   
 5     5 B         9     2     3 NA   
 6     6 B         7     4     1 NA   
 7     7 B         7    10     2 NA   
 8     8 B         7     5     2 NA   
 9     9 A         4     5     7 NA   
10    10 A         8     2     2 NA   
11    11 A         2     3     1 NA   
12    12 B         4     6     8 NA   
13    13 B         2     8     5 NA   
14    14 A         3     8     9 NA   
15    15 B         1     3     1 NA
df2

# A tibble: 15 x 3
      id color var8 
   <int> <chr> <chr>
 1     1 F     A    
 2     2 F     B    
 3     3 F     A    
 4     4 F     B    
 5     5 T     B    
 6     6 F     A    
 7     7 T     B    
 8     8 F     A    
 9     9 T     B    
10    10 T     A    
11    11 F     B    
12    12 T     A    
13    13 F     A    
14    14 F     A    
15    15 T     B

I want to fill in the values of var4 for df1 for observations that have match=="A" and in turn add a new variable that comes from 'color' if this var has a value in df2.

I do this:

df3<- df1 %>%
  filter(match=="A") %>%
  left_join(df2[,c("id", "color")]) %>%
  mutate(var4= if_else(color =="T", "Si", "No")) %>%
  mutate(bike= if_else(var4=="Si", "bici", "auto"))

And I have this:

# A tibble: 5 x 7
     id match  var1  var2  var3 var4  bike 
  <int> <chr> <int> <int> <int> <chr> <chr>
1     4 A        10     5     8 No    auto 
2     9 A         4     5     7 Si    bici 
3    10 A         8     2     2 Si    bici 
4    11 A         2     3     1 No    auto 
5    14 A         3     8     9 No    auto

So I want for these ids to fill var4 information in df1 and also bring bike to df1.

My desired output is this:

# A tibble: 15 x 7
      id match  var1  var2  var3 var4  bike 
   <int> <chr> <int> <int> <int> <chr> <lgl>
 1     1 B         5    10     1 T     NA   
 2     2 B         3     5     8 F     NA   
 3     3 B         7     7     8 NA    NA   
 4     4 A        10     5     8 No    auto   
 5     5 B         9     2     3 NA    NA   
 6     6 B         7     4     1 NA    NA   
 7     7 B         7    10     2 NA    NA   
 8     8 B         7     5     2 NA    NA   
 9     9 A         4     5     7 Si    bici   
10    10 A         8     2     2 Si    bici   
11    11 A         2     3     1 No    bici   
12    12 B         4     6     8 NA    NA   
13    13 B         2     8     5 NA    NA   
14    14 A         3     8     9 No    bici   
15    15 B         1     3     1 NA    NA

How can I do it?

is this what you want?

library(tidyverse)

df1<-tribble(~id,~match,~var1,~var2,~var3,~var4,
1,'B',5,10,1,T,
2,'B',3,5,8,F,
3,'B',7,7,8,NA_real_,
4,'A',10,5,8,NA_real_,
5,'B',9,2,3,NA_real_,
6,'B',7,4,1,NA_real_,
7,'B',7,10,2,NA_real_,
8,'B',7,5,2,NA_real_,
9,'A',4,5,7,NA_real_,
10,'A',8,2,2,NA_real_,
11,'A',2,3,1,NA_real_,
12,'B',4,6,8,NA_real_,
13,'B',2,8,5,NA_real_,
14,'A',3,8,9,NA_real_,
15,'B',1,3,1,NA_real_,)

df2<-tribble(~id,~color,~var8,
1,F,'A',
2,F,'B',
3,F,'A',
4,F,'B',
5,T,'B',
6,F,'A',
7,T,'B',
8,F,'A',
9,T,'B',
10,T,'A',
11,F,'B',
12,T,'A',
13,F,'A',
14,F,'A',
15,T,'B')


df1 %>%
  left_join(df2[,c("id", "color")]) %>%
  mutate(var4= case_when(color == T & match=="A" ~"Si", 
                         T ~"No")) %>%
  mutate(bike= case_when(var4=="Si" & match=="A"~ "bici", 
                         var4=="No" & match=="A"~ "auto"))


ciao, Filippo

This doesn't work. The output that returns with your code is this:

# A tibble: 15 x 8
      id match  var1  var2  var3 var4  color bike 
   <dbl> <chr> <dbl> <dbl> <dbl> <chr> <lgl> <chr>
 1     1 B         5    10     1 No    FALSE NA   
 2     2 B         3     5     8 No    FALSE NA   
 3     3 B         7     7     8 No    FALSE NA   
 4     4 A        10     5     8 No    FALSE auto 
 5     5 B         9     2     3 No    TRUE  NA   
 6     6 B         7     4     1 No    FALSE NA   
 7     7 B         7    10     2 No    TRUE  NA   
 8     8 B         7     5     2 No    FALSE NA   
 9     9 A         4     5     7 Si    TRUE  bici 
10    10 A         8     2     2 Si    TRUE  bici 
11    11 A         2     3     1 No    FALSE auto 
12    12 B         4     6     8 No    TRUE  NA   
13    13 B         2     8     5 No    FALSE NA   
14    14 A         3     8     9 No    FALSE auto 
15    15 B         1     3     1 No    TRUE  NA

It isn't my desired output. Look that the 2 rows in my output are T and F and here there are "No" and also almost every values in var4 are "No" in your output and in my desired ouput there are NA.

Can you share your df using dput(head(YOURDF,15))

library(tidyverse)

df1<-tribble(~id,~match,~var1,~var2,~var3,~var4,
             1,'B',5,10,1,"T",
             2,'B',3,5,8,"F",
             3,'B',7,7,8,NA,
             4,'A',10,5,8,NA,
             5,'B',9,2,3,NA,
             6,'B',7,4,1,NA,
             7,'B',7,10,2,NA,
             8,'B',7,5,2,NA,
             9,'A',4,5,7,NA,
             10,'A',8,2,2,NA,
             11,'A',2,3,1,NA,
             12,'B',4,6,8,NA,
             13,'B',2,8,5,NA,
             14,'A',3,8,9,NA,
             15,'B',1,3,1,NA)

df2<-tribble(~id,~color,~var8,
             1,"F",'A',
             2,"F",'B',
             3,"F",'A',
             4,"F",'B',
             5,"T",'B',
             6,"F",'A',
             7,"T",'B',
             8,"F",'A',
             9,"T",'B',
             10,"T",'A',
             11,"F",'B',
             12,"T",'A',
             13,"F",'A',
             14,"F",'A',
             15,"T",'B')

df3<- df1 %>%
  filter(match=="A") %>%
  left_join(df2[,c("id", "color")]) %>%
  mutate(var4= if_else(color =="T", "Si", "No")) %>%
  mutate(bike= if_else(var4=="Si", "bici", "auto")) %>% select(-color)

dplyr::rows_update(x=df1 %>% mutate(bike=NA_character_),
                   y=df3,
                   by = "id")
# A tibble: 15 x 7
      id match  var1  var2  var3 var4  bike 
   <dbl> <chr> <dbl> <dbl> <dbl> <chr> <chr>
 1     1 B         5    10     1 T     NA   
 2     2 B         3     5     8 F     NA   
 3     3 B         7     7     8 NA    NA   
 4     4 A        10     5     8 No    auto 
 5     5 B         9     2     3 NA    NA   
 6     6 B         7     4     1 NA    NA   
 7     7 B         7    10     2 NA    NA   
 8     8 B         7     5     2 NA    NA   
 9     9 A         4     5     7 Si    bici 
10    10 A         8     2     2 Si    bici 
11    11 A         2     3     1 No    auto 
12    12 B         4     6     8 NA    NA   
13    13 B         2     8     5 NA    NA   
14    14 A         3     8     9 No    auto 
15    15 B         1     3     1 NA    NA
1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.