Below is my code so far from sample time series data.
My questions are:
- How do I apply
last_fit()
from the result oftune_grid()
? - How do I apply this best model to extra new data (say, df_test, or some imaginary df_test2, or even df_train, or any of the CV folds) with only
yearr
,monthh
, etc., but noy
to predicty
?
#rm(list=ls())
# Libs
suppressPackageStartupMessages(library(readr))
suppressPackageStartupMessages(library(tidymodels))
suppressPackageStartupMessages(library(glmnet))
suppressPackageStartupMessages(library(lubridate))
suppressPackageStartupMessages(library(stacks))
suppressPackageStartupMessages(library(recipes))
suppressPackageStartupMessages(library(workflows))
suppressPackageStartupMessages(library(tune))
suppressPackageStartupMessages(library(yardstick))
# Data
df = read_csv('https://github.com/andrew-couch/Tidy-Tuesday/raw/master/Season%202/Data/example_retail_sales.csv', show_col_types = FALSE)
# Dates feature engineering
dff = df |>
mutate(yearr = year(ds),
monthh = month(ds),
quarterr = quarter(ds),
semesterr = semester(ds),
ydayy = yday(ds)) |>
select(-ds)
# Sample
df_split = initial_time_split(dff)
df_train = training(df_split)
df_test = testing(df_split)
folds = vfold_cv(df_train)
# Elastic net
spec_enet = linear_reg(mode = 'regression', penalty = tune(), mixture = tune()) |>
set_engine('glmnet')
# Recipes
rec_df = df_train |>
recipe(y ~ .)
# IGNORED
holidays <- c("AllSouls", "AshWednesday", "ChristmasEve", "Easter",
"ChristmasDay", "GoodFriday", "NewYearsDay", "PalmSunday")
rec_generic = rec_df |>
#step_dummy(all_nominal()) |>
#step_zv(all_numeric(), all_outcomes()) |>
#step_normalize(all_numeric(), -all_outcomes()) |>
#update_role(datee, new_role = 'ID') #|> # CHANGE
#step_zv(all_predictors())
step_holiday(ds, holidays = holidays) |> # NEED DATEE
step_rm(ds)
# IGNORED
# Controls
metric = metric_set(rmse)
#grid_ctrl = control_stack_grid()
#res_ctrl = control_stack_resamples()
# Workflow
wf_enet = workflow() |>
add_model(spec_enet) |>
add_recipe(rec_df)
enet_grid = tibble(penalty = 10^seq(-4, -1, length.out = 30),
mixture = seq(from = 0, to = 1, by = (1/29)))
# Tune enet hyperparameters
tune_enet = wf_enet |>
tune_grid(folds,
grid = enet_grid,
control = control_grid(save_pred = TRUE),
metrics = metric_set(rmse))
enet_best = tune_enet |>
show_best(metric = "rmse")
#autoplot(tune_enet)
tune_enet |>
collect_predictions()
#> # A tibble: 6,570 x 7
#> id .pred .row penalty mixture y .config
#> <chr> <dbl> <int> <dbl> <dbl> <dbl> <chr>
#> 1 Fold01 197658. 14 0.0001 0 150087 Preprocessor1_Model01
#> 2 Fold01 201480. 27 0.0001 0 192319 Preprocessor1_Model01
#> 3 Fold01 232054. 35 0.0001 0 202520 Preprocessor1_Model01
#> 4 Fold01 205302. 40 0.0001 0 196039 Preprocessor1_Model01
#> 5 Fold01 197658. 50 0.0001 0 192380 Preprocessor1_Model01
#> 6 Fold01 228232. 58 0.0001 0 222663 Preprocessor1_Model01
#> 7 Fold01 224410. 69 0.0001 0 222807 Preprocessor1_Model01
#> 8 Fold01 193837. 73 0.0001 0 207853 Preprocessor1_Model01
#> 9 Fold01 209123. 77 0.0001 0 245027 Preprocessor1_Model01
#> 10 Fold01 197658. 86 0.0001 0 220650 Preprocessor1_Model01
#> # ... with 6,560 more rows
Created on 2022-01-10 by the reprex package (v2.0.1)