Happy Saturday everyone!
I have this weekly sale data and want to create a daily timeseries. The missing date would be given the same values using na.locf
.
Question: can I assign the start date of week #1 to January 1st of the year and go from there?
Thank you!
library(zoo)
library(tidyverse)
library(lubridate)
DF <- structure(list(Year = c(1999, 2000), Wk01 = c(2.421, 3.39), Wk02 = c(3.518,
2.591), Wk03 = c(4.539, 2.211), Wk04 = c(3.251, 3.487), Wk05 = c(4.05,
3.908), Wk06 = c(4.129, 3.714), Wk07 = c(3.967, 3.481), Wk08 = c(1.317,
3.939), Wk09 = c(3.507, 3.77), Wk10 = c(3.235, 3.347), Wk11 = c(3.856,
4.607), Wk12 = c(4.9, 4.871), Wk13 = c(3.534, 6.704), Wk14 = c(3.673,
7.236), Wk15 = c(3.631, 6.559), Wk16 = c(4.789, 6.037), Wk17 = c(6.597,
7.838), Wk18 = c(4.542, 5.466), Wk19 = c(4.4, 5.692), Wk20 = c(5.935,
5.403), Wk21 = c(5.937, 5.56), Wk22 = c(11.755, 6.933), Wk23 = c(11.755,
11.005), Wk24 = c(10.484, 13.286), Wk25 = c(12.688, 10.646),
Wk26 = c(13.811, 12.303), Wk27 = c(29.954, 11.826), Wk28 = c(28.48,
8.961), Wk29 = c(37.812, 7.795), Wk30 = c(20.841, 7.813),
Wk31 = c(13.835, 5.609), Wk32 = c(15.241, 6.538), Wk33 = c(20.373,
6.046), Wk34 = c(11.184, 4.521), Wk35 = c(9.519, 5.489),
Wk36 = c(8.578, 7.567), Wk37 = c(7.154, 5.377), Wk38 = c(6.025,
5.541), Wk39 = c(6.655, 5.213), Wk40 = c(5.827, 5.072), Wk41 = c(5.841,
5.046), Wk42 = c(5.823, 4.59), Wk43 = c(5.607, 4.423), Wk44 = c(5.394,
4.27), Wk45 = c(5.359, 3.959), Wk46 = c(5.177, 3.621), Wk47 = c(5.976,
3.704), Wk48 = c(6.358, 3.242), Wk49 = c(5.025, 3.143), Wk50 = c(4.339,
1.786), Wk51 = c(4.228, 1.442), Wk52 = c(4.872, 3.225)), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"))
DF
#> # A tibble: 2 x 53
#> Year Wk01 Wk02 Wk03 Wk04 Wk05 Wk06 Wk07 Wk08 Wk09 Wk10 Wk11
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1999 2.42 3.52 4.54 3.25 4.05 4.13 3.97 1.32 3.51 3.24 3.86
#> 2 2000 3.39 2.59 2.21 3.49 3.91 3.71 3.48 3.94 3.77 3.35 4.61
#> # ... with 41 more variables: Wk12 <dbl>, Wk13 <dbl>, Wk14 <dbl>,
#> # Wk15 <dbl>, Wk16 <dbl>, Wk17 <dbl>, Wk18 <dbl>, Wk19 <dbl>,
#> # Wk20 <dbl>, Wk21 <dbl>, Wk22 <dbl>, Wk23 <dbl>, Wk24 <dbl>,
#> # Wk25 <dbl>, Wk26 <dbl>, Wk27 <dbl>, Wk28 <dbl>, Wk29 <dbl>,
#> # Wk30 <dbl>, Wk31 <dbl>, Wk32 <dbl>, Wk33 <dbl>, Wk34 <dbl>,
#> # Wk35 <dbl>, Wk36 <dbl>, Wk37 <dbl>, Wk38 <dbl>, Wk39 <dbl>,
#> # Wk40 <dbl>, Wk41 <dbl>, Wk42 <dbl>, Wk43 <dbl>, Wk44 <dbl>,
#> # Wk45 <dbl>, Wk46 <dbl>, Wk47 <dbl>, Wk48 <dbl>, Wk49 <dbl>,
#> # Wk50 <dbl>, Wk51 <dbl>, Wk52 <dbl>
DF_lg <- DF %>%
gather(key = "WeekNr", value = "Sale", -Year) %>%
separate(WeekNr, into = c("dummy", "Week"),
sep = "k",
remove = FALSE) %>%
mutate(Date = as.Date(paste(Year, Week, 1, sep = "-"), "%Y-%U-%u")) %>%
arrange(Year) %>%
select(Date, Year, Week, Sale)
DF_lg
#> # A tibble: 104 x 4
#> Date Year Week Sale
#> <date> <dbl> <chr> <dbl>
#> 1 1999-01-04 1999 01 2.42
#> 2 1999-01-11 1999 02 3.52
#> 3 1999-01-18 1999 03 4.54
#> 4 1999-01-25 1999 04 3.25
#> 5 1999-02-01 1999 05 4.05
#> 6 1999-02-08 1999 06 4.13
#> 7 1999-02-15 1999 07 3.97
#> 8 1999-02-22 1999 08 1.32
#> 9 1999-03-01 1999 09 3.51
#> 10 1999-03-08 1999 10 3.24
#> # ... with 94 more rows
# create a daily date data frame
start_date <- as.Date("1999-01-01")
end_date <- as.Date("2000-12-31")
daily_vec <- tibble(Date = seq(start_date,
end_date,
by = 'day'))
# merge then do interpolation
DF_daily <- DF_lg %>%
right_join(daily_vec, by = 'Date') %>%
na.locf(na.rm = FALSE)
DF_daily
#> # A tibble: 731 x 4
#> Date Year Week Sale
#> <date> <dbl> <chr> <dbl>
#> 1 1999-01-01 NA <NA> NA
#> 2 1999-01-02 NA <NA> NA
#> 3 1999-01-03 NA <NA> NA
#> 4 1999-01-04 1999 01 2.42
#> 5 1999-01-05 1999 01 2.42
#> 6 1999-01-06 1999 01 2.42
#> 7 1999-01-07 1999 01 2.42
#> 8 1999-01-08 1999 01 2.42
#> 9 1999-01-09 1999 01 2.42
#> 10 1999-01-10 1999 01 2.42
#> # ... with 721 more rows