Any example codes besides the documentation would be great.
It seems that extract_fit_engine()
is the way to go. Below shows reprex of simple version, which I can get the coefficients from glmnet
through tidymodels
, but I've been stuck on trying to get column coefficients with resampling, tuning, etc. -- I've been unable to configure a correct workflow()
to use pull_workflow_fit()
or extract_fit_engine()
(not shown below).
# for reprex
library(reprex)
library(plyr)
library(janitor)
#>
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#>
#> chisq.test, fisher.test
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(stringr)
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#> method from
#> required_pkgs.model_spec parsnip
library(glmnet)
#> Loading required package: Matrix
#>
#> Attaching package: 'Matrix'
#> The following objects are masked from 'package:tidyr':
#>
#> expand, pack, unpack
#> Loaded glmnet 4.1-1
Sys.setenv(TZ = 'GMT')
options(scipen = 99999)
#rm(list=ls())
# Synthesise time series data
df = data.frame(yearr = sample(2015:2021, 2000, replace = TRUE),
monthh = sample(1:12, 2000, replace = TRUE),
dayy = sample(1:29, 2000, replace = TRUE)) |>
mutate(datee = ymd(paste(yearr, monthh, dayy)),
weekk = week(datee),
quarterr = quarter(datee),
semesterr = semester(datee),
doyy = yday(datee),
yy = sample(0:100, 2000, replace = TRUE) + (130 * yearr) + (2 * monthh) + (2 * weekk),
dummyy = round(sample(0:1, 2000, replace = TRUE))) |>
filter(!is.na(datee)) |>
arrange(-desc(datee)) |>
mutate(ii = row_number())
#> Warning: 3 failed to parse.
####################
# SIMPLE VERSION
####################
# Set up necessary elastic net objects
spec_enet = linear_reg(mode = 'regression', penalty = 0, mixture = 0.6) |>
set_engine('glmnet')
rec_generic = df |>
recipe(yy ~ .) |>
step_rm(datee) |> # For analysis
prep()
time_only = do.call('c', df |> select(datee)) # Save date variable from step_rm
df_baked = rec_generic |>
bake(NULL)
# Easy way to get coefficients
fit_enet = spec_enet |>
fit(yy ~ ., data = df_baked)
# Column coefficients
tidy_enet = fit_enet |>
tidy()
tidy_enet # coefficients table AKA 'feature weights' or 'betas'
#> # A tibble: 10 x 3
#> term estimate penalty
#> <chr> <dbl> <dbl>
#> 1 (Intercept) 153877. 0
#> 2 yearr 53.7 0
#> 3 monthh 1.86 0
#> 4 dayy 0 0
#> 5 weekk 0.406 0
#> 6 quarterr 1.27 0
#> 7 semesterr 0 0
#> 8 doyy 0.00750 0
#> 9 dummyy 0 0
#> 10 ii 0.263 0
###################################
# WITH RESAMPLES, GRID SEARCH, ETC.
###################################
# For tune_grid parameters
spec_enet_tune = linear_reg(mode = 'regression', penalty = tune(), mixture = tune()) |>
set_engine('glmnet')
# rec_iteration is without prep()
rec_iteration = df_baked |>
recipe(yy ~ .) |>
step_zv(doyy)
folds = df |>
mc_cv(prop = 3/4, times = 10)
metric = metric_set(rmse)
grid_pen_mix = expand_grid(penalty = seq(0, 100, by = 25),
mixture = seq(0, 1, by = 0.25))
# Goes under ctrl
glmnet_vars = function(x) {
# `x` will be a workflow object
mod <- extract_fit_engine(x) #library(hardhat) # https://tune.tidymodels.org/reference/extract-tune.html
# `df` is the number of model terms for each penalty value
tibble(penalty = mod$lambda, num_vars = mod$df)
}
ctrl <- control_grid(extract = glmnet_vars, verbose = TRUE)
tune_attempt2 = tune_grid(spec_enet_tune,
rec_iteration,
resamples = folds, # way above
grid = grid_pen_mix,
metrics = metric,
control = ctrl) # Where do I put the workflow?
#> i Resample01: preprocessor 1/1
#> v Resample01: preprocessor 1/1
...
#> v Resample10: preprocessor 1/1, model 5/5
#> i Resample10: preprocessor 1/1, model 5/5 (predictions)
tune_attempt2[[5]][[1]]
#> # A tibble: 25 x 4
#> penalty mixture .extracts .config
#> <dbl> <dbl> <list> <chr>
#> 1 100 0 <try-errr [1]> Preprocessor1_Model01
#> 2 100 0 <try-errr [1]> Preprocessor1_Model02
#> 3 100 0 <try-errr [1]> Preprocessor1_Model03
#> 4 100 0 <try-errr [1]> Preprocessor1_Model04
#> 5 100 0 <try-errr [1]> Preprocessor1_Model05
#> 6 100 0.25 <try-errr [1]> Preprocessor1_Model06
#> 7 100 0.25 <try-errr [1]> Preprocessor1_Model07
#> 8 100 0.25 <try-errr [1]> Preprocessor1_Model08
#> 9 100 0.25 <try-errr [1]> Preprocessor1_Model09
#> 10 100 0.25 <try-errr [1]> Preprocessor1_Model10
#> # ... with 15 more rows
# Error in UseMethod("extract_fit_engine"): no applicable method for 'extract_fit_engine' for 'workflow'
# How do I get glmnet/linear_reg object from tune_grid?
Created on 2021-09-07 by the reprex package (v2.0.0)
I looked through below posts + answers, looked through documentation numerous times, but I've been struggling for some weeks on how to achieve this.