COVID incidence weekly to daily

Hi there, I need to split the weekly COVID incidence data to daily by calculating average from the weekly data.
The goal is to calculate Re (effective reproduction number). COVID is reported now as weekly. Repex as pasted below. Hope you will help me in this. Thank you.

dates and I columns are orginal reported data. dates2 and I_2 are desired outcome columns.

covid.incidence <- tibble::tribble(
                           ~dates,    ~I,      ~dates2, ~I_2,
                     "24/03/2023", 4467L, "18/03/2023", 638L,
                     "31/03/2023", 5225L, "19/03/2023", 638L,
                               NA,    NA, "20/03/2023", 638L,
                               NA,    NA, "21/03/2023", 638L,
                               NA,    NA, "22/03/2023", 638L,
                               NA,    NA, "23/03/2023", 638L,
                               NA,    NA, "24/03/2023", 638L,
                               NA,    NA, "25/03/2023", 746L,
                               NA,    NA, "26/03/2023", 746L,
                               NA,    NA, "27/03/2023", 746L,
                               NA,    NA, "28/03/2023", 746L,
                               NA,    NA, "29/03/2023", 746L,
                               NA,    NA, "30/03/2023", 746L,
                               NA,    NA, "31/03/2023", 746L
                     )
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union

covid.incidence <- tibble::tribble(
  ~dates,    ~I,      ~dates2, ~I_2,
  "24/03/2023", 4467L, "18/03/2023", 638L,
  "31/03/2023", 5225L, "19/03/2023", 638L,
  NA,    NA, "20/03/2023", 638L,
  NA,    NA, "21/03/2023", 638L,
  NA,    NA, "22/03/2023", 638L,
  NA,    NA, "23/03/2023", 638L,
  NA,    NA, "24/03/2023", 638L,
  NA,    NA, "25/03/2023", 746L,
  NA,    NA, "26/03/2023", 746L,
  NA,    NA, "27/03/2023", 746L,
  NA,    NA, "28/03/2023", 746L,
  NA,    NA, "29/03/2023", 746L,
  NA,    NA, "30/03/2023", 746L,
  NA,    NA, "31/03/2023", 746L
)
head(covid.incidence,3)
#> # A tibble: 3 × 4
#>   dates          I dates2       I_2
#>   <chr>      <int> <chr>      <int>
#> 1 24/03/2023  4467 18/03/2023   638
#> 2 31/03/2023  5225 19/03/2023   638
#> 3 <NA>          NA 20/03/2023   638

# convert character date representations to
# date objects; format can be restored for
# purposes of tables and plots afterwards

covid.incidence$dates <- dmy(covid.incidence$dates)
covid.incidence$dates2 <- dmy(covid.incidence$dates2)
head(covid.incidence,3)
#> # A tibble: 3 × 4
#>   dates          I dates2       I_2
#>   <date>     <int> <date>     <int>
#> 1 2023-03-24  4467 2023-03-18   638
#> 2 2023-03-31  5225 2023-03-19   638
#> 3 NA            NA 2023-03-20   638

# given
weeklies <- covid.incidence[1:2,1:2]
# produce
covid.incidence[1:14,3:4]
#> # A tibble: 14 × 2
#>    dates2       I_2
#>    <date>     <int>
#>  1 2023-03-18   638
#>  2 2023-03-19   638
#>  3 2023-03-20   638
#>  4 2023-03-21   638
#>  5 2023-03-22   638
#>  6 2023-03-23   638
#>  7 2023-03-24   638
#>  8 2023-03-25   746
#>  9 2023-03-26   746
#> 10 2023-03-27   746
#> 11 2023-03-28   746
#> 12 2023-03-29   746
#> 13 2023-03-30   746
#> 14 2023-03-31   746
# where
covid.incidence[1,4] == unique(round(covid.incidence[1,2]/7,))
#>       I_2
#> [1,] TRUE

# receiver object
the_dailies <- data.frame(dates2 = structure(numeric(0), class = "Date"), 
                I_2 = integer(0))

# functions
make_week <- function(x) return(covid.incidence$dates[x] + 1:7)
make_mean <- function(x) {
  the_mean = rep(covid.incidence$I[x]/7,7)
  return(round(the_mean,0))
}


compose <- function(x) {
  d = data.frame(dates2 = make_week(x),
                 I_2 = make_mean(x))
  return(d)
}

for(i in seq_along(weeklies$dates)) the_dailies = rbind(the_dailies,compose(i))

the_dailies 
#>        dates2 I_2
#> 1  2023-03-25 638
#> 2  2023-03-26 638
#> 3  2023-03-27 638
#> 4  2023-03-28 638
#> 5  2023-03-29 638
#> 6  2023-03-30 638
#> 7  2023-03-31 638
#> 8  2023-04-01 746
#> 9  2023-04-02 746
#> 10 2023-04-03 746
#> 11 2023-04-04 746
#> 12 2023-04-05 746
#> 13 2023-04-06 746
#> 14 2023-04-07 746

Created on 2023-04-04 with reprex v2.0.2

1 Like

Just for fun, here is the answer received from with the OP as a prompt from ChatGPT Mar 23 Version

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)

covid.incidence <- tibble::tribble(
  ~dates,    ~I,      ~dates2, ~I_2,
  "24/03/2023", 4467L, "18/03/2023", 638L,
  "31/03/2023", 5225L, "19/03/2023", 638L,
  NA,    NA, "20/03/2023", 638L,
  NA,    NA, "21/03/2023", 638L,
  NA,    NA, "22/03/2023", 638L,
  NA,    NA, "23/03/2023", 638L,
  NA,    NA, "24/03/2023", 638L,
  NA,    NA, "25/03/2023", 746L,
  NA,    NA, "26/03/2023", 746L,
  NA,    NA, "27/03/2023", 746L,
  NA,    NA, "28/03/2023", 746L,
  NA,    NA, "29/03/2023", 746L,
  NA,    NA, "30/03/2023", 746L,
  NA,    NA, "31/03/2023", 746L
)

# Convert dates to date format
covid.incidence$dates <- as.Date(covid.incidence$dates, "%d/%m/%Y")

# Create a new column for the week start date
covid.incidence <- covid.incidence %>%
  mutate(week_start = as.Date(dates - weekdays(dates) + 1))
#> Error in `mutate()`:
#> ℹ In argument: `week_start = as.Date(dates - weekdays(dates) + 1)`.
#> Caused by error in `unclass(as.Date(e1)) - e2`:
#> ! non-numeric argument to binary operator
#> Backtrace:
#>      ▆
#>   1. ├─covid.incidence %>% ...
#>   2. ├─dplyr::mutate(...)
#>   3. ├─dplyr:::mutate.data.frame(., week_start = as.Date(dates - weekdays(dates) + 1))
#>   4. │ └─dplyr:::mutate_cols(.data, dplyr_quosures(...), by)
#>   5. │   ├─base::withCallingHandlers(...)
#>   6. │   └─dplyr:::mutate_col(dots[[i]], data, mask, new_columns)
#>   7. │     └─mask$eval_all_mutate(quo)
#>   8. │       └─dplyr (local) eval()
#>   9. ├─base::as.Date(dates - weekdays(dates) + 1)
#>  10. ├─base::`-.Date`(dates, weekdays(dates))
#>  11. │ └─base::.Date(unclass(as.Date(e1)) - e2)
#>  12. └─base::.handleSimpleError(...)
#>  13.   └─dplyr (local) h(simpleError(msg, call))
#>  14.     └─rlang::abort(message, class = error_class, parent = parent, call = error_call)

# Group by week start date and calculate the average daily incidence
daily.incidence <- covid.incidence %>%
  group_by(week_start) %>%
  summarize(daily_incidence = sum(I, na.rm = TRUE)/7) %>%
  ungroup()
#> Error in `group_by()`:
#> ! Must group by variables found in `.data`.
#> ✖ Column `week_start` is not found.
#> Backtrace:
#>     ▆
#>  1. ├─... %>% ungroup()
#>  2. ├─dplyr::ungroup(.)
#>  3. ├─dplyr::summarize(., daily_incidence = sum(I, na.rm = TRUE)/7)
#>  4. ├─dplyr::group_by(., week_start)
#>  5. └─dplyr:::group_by.data.frame(., week_start)
#>  6.   └─dplyr::group_by_prepare(.data, ..., .add = .add, error_call = current_env())
#>  7.     └─rlang::abort(bullets, call = error_call)

# Create a sequence of dates from the start of the first week to the end of the last week
date_seq <- seq(min(daily.incidence$week_start), max(daily.incidence$week_start) + 6, by = "day")
#> Error in seq(min(daily.incidence$week_start), max(daily.incidence$week_start) + : object 'daily.incidence' not found

# Create a new tibble with the daily incidence and corresponding dates
daily_covid <- tibble(dates2 = date_seq,
                      I_2 = rep(daily.incidence$daily_incidence, each = 7))
#> Error in eval_tidy(xs[[j]], mask): object 'date_seq' not found

# Format the dates
daily_covid$dates2 <- format(daily_covid$dates2, "%d/%m/%Y")
#> Error in format(daily_covid$dates2, "%d/%m/%Y"): object 'daily_covid' not found

# Join the daily incidence tibble with the original tibble to get the desired outcome
covid.incidence <- covid.incidence %>%
  select(-I, -dates2) %>%
  left_join(daily_covid, by = "dates") %>%
  select(dates, I_2, dates2)
#> Error in is.data.frame(y): object 'daily_covid' not found
1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.