Add rows to data frame if "missing" scenario occurs

person week wage
a 1 10
a 2 11
b 1 11
b 2 15
c 1 17
c 2 18

How can I add rows for the person "d" if there are no data available to him? That is, when my data frame does not have any data for "d", I want to have rows for "d" and assign zero as wage. Here is my desired output when "d" is missing:

person week wage
a 1 10
a 2 11
b 1 11
b 2 15
c 1 17
c 2 18
d 1 0
d 2 0
library(dplyr)
# When person "d" is missing
df1 <- tibble(
  person = rep(c("a", "b", "c"), each = 2),
  week = rep(1:2, times = 3),
  wage = c(10, 11, 11, 15, 17, 18)
)

# When person "d" is not missing (code will not do anything)
df2 <- tibble(
  person = rep(c("a", "b", "c", "d"), each = 2),
  week = rep(1:2, times = 4),
  wage = c(10, 11, 11, 15, 17, 18, 12, 11)
)

Is this the sort of thing you want to do?

library(tidyverse)
#> Warning: package 'tibble' was built under R version 4.1.2
df1 <- tibble(
  person = rep(c("a", "b", "c"), each = 2),
  week = rep(1:2, times = 3),
  wage = c(10, 11, 11, 15, 17, 18)
)
People <- c("a", "b", "c", "d")
UniWeeks <- unique(df1$week)
MyFunc <- function(p, DF, WEEKS) {
  if (!p %in% DF$person ) {
    tmp <- data.frame(person = p, week = WEEKS, wage = 0)
    rbind(DF, tmp)
  }
}
df2 <- map_dfr(People, MyFunc, DF = df1, WEEKS = UniWeeks)
df2
#> # A tibble: 8 x 3
#>   person  week  wage
#>   <chr>  <int> <dbl>
#> 1 a          1    10
#> 2 a          2    11
#> 3 b          1    11
#> 4 b          2    15
#> 5 c          1    17
#> 6 c          2    18
#> 7 d          1     0
#> 8 d          2     0

Created on 2022-03-11 by the reprex package (v2.0.1)

Thank you! It is indeed what I want to do. However, some issues remain:

  • Suppose more than one person is missing e.g. "c" and "d". In this case, when I apply your code it does not generate the desired output.

  • When data is complete i.e. none of the people is missing, the code returns an empty data frame. However, I want it to return the actual data.

library(tidyverse)
# new data -----
df1 <- tibble(
  person = rep(c("a", "b"), each = 2),
  week = rep(1:2, times = 2),
  wage = c(10, 11, 11, 15)
)
df1
#> # A tibble: 4 x 3
#>   person  week  wage
#>   <chr>  <int> <dbl>
#> 1 a          1    10
#> 2 a          2    11
#> 3 b          1    11
#> 4 b          2    15
# code that works when only one missing----
People <- c("a", "b", "c", "d")
UniWeeks <- unique(df1$week)
MyFunc <- function(p, DF, WEEKS) {
  if (!p %in% DF$person ) {
    tmp <- data.frame(person = p, week = WEEKS, wage = 0)
    rbind(DF, tmp)
  }
}
map_dfr(People, MyFunc, DF = df1, WEEKS = UniWeeks)
#> # A tibble: 12 x 3
#>    person  week  wage
#>    <chr>  <int> <dbl>
#>  1 a          1    10
#>  2 a          2    11
#>  3 b          1    11
#>  4 b          2    15
#>  5 c          1     0
#>  6 c          2     0
#>  7 a          1    10
#>  8 a          2    11
#>  9 b          1    11
#> 10 b          2    15
#> 11 d          1     0
#> 12 d          2     0

Use factor levels to specify which persons should be present in the data frame and then use tidyr::complete().

library(tidyverse)

df1 <- tibble(
    person = rep(c("a", "b", "c"), each = 2),
    week = rep(1:2, times = 3),
    wage = c(10, 11, 11, 15, 17, 18)
)

df1 %>% 
    mutate(person = factor(person, levels = c("a", "b", "c", "d"))) %>% 
    complete(person, week, fill = list(wage = 0))
#> # A tibble: 8 × 3
#>   person  week  wage
#>   <fct>  <int> <dbl>
#> 1 a          1    10
#> 2 a          2    11
#> 3 b          1    11
#> 4 b          2    15
#> 5 c          1    17
#> 6 c          2    18
#> 7 d          1     0
#> 8 d          2     0

Created on 2022-03-12 by the reprex package (v2.0.1)

4 Likes

Or like this (you always have to specify the levels you want to work on and @andresrcs method is a nice one):

suppressPackageStartupMessages(
  suppressWarnings(
    {library(tibble)
     library(dplyr)
    }
  )
)
# your data
df1 <- tibble(
  person = rep(c("a", "b"), each = 2),
  week = rep(1:2, times = 2),
  wage = c(10, 11, 11, 15)
)
# all possible combinations (knowing we have also week3 and persons c and d)
df0 <-expand.grid(
  person=c("a", "b", "c", "d"),
  week=c(1,2,3),
  wage=0
)
# combine 
rbind(df1,df0) %>%
  group_by(person,week) %>%
  summarise(wage=sum(wage),.groups = "drop")
#> # A tibble: 12 x 3
#>    person  week  wage
#>    <chr>  <dbl> <dbl>
#>  1 a          1    10
#>  2 a          2    11
#>  3 a          3     0
#>  4 b          1    11
#>  5 b          2    15
#>  6 b          3     0
#>  7 c          1     0
#>  8 c          2     0
#>  9 c          3     0
#> 10 d          1     0
#> 11 d          2     0
#> 12 d          3     0
Created on 2022-03-12 by the reprex package (v2.0.1)
1 Like

Many thanks to all of you! I have learned some cool tricks!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.