Management of dates (break down the dates)

Dear All,

I am working a study on sick leave using register data. From the register, I got only starting dates and end dates of sick leaves for each individual. But the dates are not broken down year by year. For instance , for person A, there are only data for start date (1-may-2016) and end date (14-feb-2018).
So, I would like to know how I can split out the dates year by year (ie. 1/5/16 to14/2/18 will be divided into 1/5/16-31/12/16, 1/1/2017-31/12/17, 1/1/18-14/2/18) in order to calculate total number of sick leaves for each year.

The example data created for the question is as follow;

sick_leave <- tribble(
  ~id,~from,~to, 
  1, "1/1/2018", "3/10/2020",
  2, "1/1/2016", "1/1/2021", 
  3, "2/1/2018", "2/6/2018",
  3, "2/7/2018", "31/12/2018",
  4, "2/10/2018", "2/2/2019",
  4, "31/12/2019", "1/1/2021",
  5, "2/10/2017", "20/5/2018",
  6, "2/3/2021", "31/12/2021",
  7, "1/1/2016", "5/6/2016"
) %>% mutate(from=dmy(from),to=dmy(to))

Using the indicated packages you could do the following:

library(tibble)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)

sick_leave <- tribble(
  ~id,~from,~to, 
  1, "1/1/2018", "3/10/2020",
  2, "1/1/2016", "1/1/2021", 
  3, "2/1/2018", "2/6/2018",
  3, "2/7/2018", "31/12/2018",
  4, "2/10/2018", "2/2/2019",
  4, "31/12/2019", "1/1/2021",
  5, "2/10/2017", "20/5/2018",
  6, "2/3/2021", "31/12/2021",
  7, "1/1/2016", "5/6/2016"
) %>% mutate(from=dmy(from),to=dmy(to)) 

sick_leave %>%
  rowwise() %>%
  mutate(
    sick_interval=lubridate::interval(from,to) ,
    sick_years=list(seq(year(from),year(to)))
    ) %>%
  unnest_longer(sick_years) %>% 
  mutate(sick_years_s = dmy(paste0('01/01/',sick_years)),
         sick_years_e = dmy(paste0('31/12/',sick_years)), 
         sick_years_i = lubridate::interval(sick_years_s,sick_years_e), 
         sick_interval_year=intersect(sick_interval,sick_years_i),
         from_year = date(int_start(sick_interval_year)),
         to_year   = date(int_end(sick_interval_year)) 
         ) %>%
  select(id,from,to,from_year,to_year) 
#> # A tibble: 20 x 5
#>       id from       to         from_year  to_year   
#>    <dbl> <date>     <date>     <date>     <date>    
#>  1     1 2018-01-01 2020-10-03 2018-01-01 2018-12-31
#>  2     1 2018-01-01 2020-10-03 2019-01-01 2019-12-31
#>  3     1 2018-01-01 2020-10-03 2020-01-01 2020-10-03
#>  4     2 2016-01-01 2021-01-01 2016-01-01 2016-12-31
#>  5     2 2016-01-01 2021-01-01 2017-01-01 2017-12-31
#>  6     2 2016-01-01 2021-01-01 2018-01-01 2018-12-31
#>  7     2 2016-01-01 2021-01-01 2019-01-01 2019-12-31
#>  8     2 2016-01-01 2021-01-01 2020-01-01 2020-12-31
#>  9     2 2016-01-01 2021-01-01 2021-01-01 2021-01-01
#> 10     3 2018-01-02 2018-06-02 2018-01-02 2018-06-02
#> 11     3 2018-07-02 2018-12-31 2018-07-02 2018-12-31
#> 12     4 2018-10-02 2019-02-02 2018-10-02 2018-12-31
#> 13     4 2018-10-02 2019-02-02 2019-01-01 2019-02-02
#> 14     4 2019-12-31 2021-01-01 2019-12-31 2019-12-31
#> 15     4 2019-12-31 2021-01-01 2020-01-01 2020-12-31
#> 16     4 2019-12-31 2021-01-01 2021-01-01 2021-01-01
#> 17     5 2017-10-02 2018-05-20 2017-10-02 2017-12-31
#> 18     5 2017-10-02 2018-05-20 2018-01-01 2018-05-20
#> 19     6 2021-03-02 2021-12-31 2021-03-02 2021-12-31
#> 20     7 2016-01-01 2016-06-05 2016-01-01 2016-06-05

Created on 2020-08-17 by the reprex package (v0.3.0)

1 Like

Another approach using tidyr::uncount().

library(dplyr, warn.conflicts = FALSE)
library(tidyr)
library(lubridate, warn.conflicts = FALSE)

sick_leave <- tribble(
  ~id, ~from, ~to,
  1, "1/1/2018", "3/10/2020",
  2, "1/1/2016", "1/1/2021",
  3, "2/1/2018", "2/6/2018",
  3, "2/7/2018", "31/12/2018",
  4, "2/10/2018", "2/2/2019",
  4, "31/12/2019", "1/1/2021",
  5, "2/10/2017", "20/5/2018",
  6, "2/3/2021", "31/12/2021",
  7, "1/1/2016", "5/6/2016"
)

sick_leave %>%
  mutate(from = dmy(from), to = dmy(to), years = year(to) - year(from) + 1) %>%
  uncount(years) %>%
  group_by(id, from, to) %>%
  mutate(year_helper = seq(first(year(from)), last(year(to)), by = 1)) %>%
  ungroup() %>%
  mutate(
    new_from = if_else(from > make_date(year_helper, 1, 1),
      from, make_date(year_helper, 1, 1)
    ),
    new_to = if_else(to < make_date(year_helper, 12, 31),
      to, make_date(year_helper, 12, 31)
    ),
    interval = new_to - new_from + 1
  )
#> # A tibble: 20 x 7
#>       id from       to         year_helper new_from   new_to     interval
#>    <dbl> <date>     <date>           <dbl> <date>     <date>     <drtn>  
#>  1     1 2018-01-01 2020-10-03        2018 2018-01-01 2018-12-31 365 days
#>  2     1 2018-01-01 2020-10-03        2019 2019-01-01 2019-12-31 365 days
#>  3     1 2018-01-01 2020-10-03        2020 2020-01-01 2020-10-03 277 days
#>  4     2 2016-01-01 2021-01-01        2016 2016-01-01 2016-12-31 366 days
#>  5     2 2016-01-01 2021-01-01        2017 2017-01-01 2017-12-31 365 days
#>  6     2 2016-01-01 2021-01-01        2018 2018-01-01 2018-12-31 365 days
#>  7     2 2016-01-01 2021-01-01        2019 2019-01-01 2019-12-31 365 days
#>  8     2 2016-01-01 2021-01-01        2020 2020-01-01 2020-12-31 366 days
#>  9     2 2016-01-01 2021-01-01        2021 2021-01-01 2021-01-01   1 days
#> 10     3 2018-01-02 2018-06-02        2018 2018-01-02 2018-06-02 152 days
#> 11     3 2018-07-02 2018-12-31        2018 2018-07-02 2018-12-31 183 days
#> 12     4 2018-10-02 2019-02-02        2018 2018-10-02 2018-12-31  91 days
#> 13     4 2018-10-02 2019-02-02        2019 2019-01-01 2019-02-02  33 days
#> 14     4 2019-12-31 2021-01-01        2019 2019-12-31 2019-12-31   1 days
#> 15     4 2019-12-31 2021-01-01        2020 2020-01-01 2020-12-31 366 days
#> 16     4 2019-12-31 2021-01-01        2021 2021-01-01 2021-01-01   1 days
#> 17     5 2017-10-02 2018-05-20        2017 2017-10-02 2017-12-31  91 days
#> 18     5 2017-10-02 2018-05-20        2018 2018-01-01 2018-05-20 140 days
#> 19     6 2021-03-02 2021-12-31        2021 2021-03-02 2021-12-31 305 days
#> 20     7 2016-01-01 2016-06-05        2016 2016-01-01 2016-06-05 157 days

Created on 2020-08-17 by the reprex package (v0.3.0)

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.