I have 2 data tables. One has lots of variables for many groups. Each observation has a unique ID. The other contains only a few variables for a few groups. It has the same IDs as the first table. The variables in the second table are present in the first but are always missing for the groups in the second. I need to replace the missing values in table 1 with those in table 2. There needs to be some sort of join but I can not figure out the correct way to do it. In the example below df1 + df2 need to become df3.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df1 <- structure(list(id = 1:10,
group = c("a", "a", "a", "b", "b",
"c", "c", "c", "d", "d"),
var1 = c(NA, NA, NA, NA, NA, 58L,
NA, 23L, NA, 30L),
var2 = c(NA, NA, NA, NA, NA, 84L,
NA, 89L, NA, 91L),
var3 = c("O", "X", "H", "S", "T",
"E", "U", "L", "I", "B"),
var4 = c("t", "v", "u", "p", "s",
"m", "k", "f", "e", "g")),
row.names = c(NA, -10L),
class = c("tbl_df", "tbl", "data.frame"))
df2 <- structure(list(id = 1:5,
group = c("a", "a", "a", "b", "b"),
var1 = c(2L, 8L, 46L, 14L, 92L),
var2 = c(18L, 15L, 2L, 63L, 28L)),
row.names = c(NA, -5L),
class = c("tbl_df", "tbl", "data.frame"))
df3 <- structure(list(id = 1:10,
group = c("a", "a", "a", "b", "b",
"c", "c", "c", "d", "d"),
var1 = c(2L, 8L, 46L, 14L, 92L, 58L,
NA, 23L, NA, 30L),
var2 = c(18L, 15L, 2L, 63L, 28L, 84L,
NA, 89L, NA, 91L),
var3 = c("O", "X", "H", "S", "T",
"E", "U", "L", "I", "B"),
var4 = c("t", "v", "u", "p", "s",
"m", "k", "f", "e", "g")),
row.names = c(NA, -10L),
class = c("tbl_df", "tbl", "data.frame"))
df1
#> # A tibble: 10 x 6
#> id group var1 var2 var3 var4
#> <int> <chr> <int> <int> <chr> <chr>
#> 1 1 a NA NA O t
#> 2 2 a NA NA X v
#> 3 3 a NA NA H u
#> 4 4 b NA NA S p
#> 5 5 b NA NA T s
#> 6 6 c 58 84 E m
#> 7 7 c NA NA U k
#> 8 8 c 23 89 L f
#> 9 9 d NA NA I e
#> 10 10 d 30 91 B g
df2
#> # A tibble: 5 x 4
#> id group var1 var2
#> <int> <chr> <int> <int>
#> 1 1 a 2 18
#> 2 2 a 8 15
#> 3 3 a 46 2
#> 4 4 b 14 63
#> 5 5 b 92 28
df3
#> # A tibble: 10 x 6
#> id group var1 var2 var3 var4
#> <int> <chr> <int> <int> <chr> <chr>
#> 1 1 a 2 18 O t
#> 2 2 a 8 15 X v
#> 3 3 a 46 2 H u
#> 4 4 b 14 63 S p
#> 5 5 b 92 28 T s
#> 6 6 c 58 84 E m
#> 7 7 c NA NA U k
#> 8 8 c 23 89 L f
#> 9 9 d NA NA I e
#> 10 10 d 30 91 B g