I have what I think is weird behavior where if I do a step_dummy() BEFORE a step_normalize() (the recommended order from use_glmnet(), the model fitting errs out with the "Warning all models failed. See the .notes column" error. But, if I instead do step_normalize() and then step_dummy(), it works just fine. I have a reproducible example below ... sorry it's sort of long because I couldn't reproduce it with different data.
library(tidyverse) # for graphing and data cleaning
library(tidymodels) # for modeling
#> Registered S3 method overwritten by 'tune':
#> method from
#> required_pkgs.model_spec parsnip
library(naniar) # for analyzing missing values
library(vip) # for variable importance plots
#>
#> Attaching package: 'vip'
#> The following object is masked from 'package:utils':
#>
#> vi
hotels <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-11/hotels.csv')
#> Rows: 119390 Columns: 32
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (13): hotel, arrival_date_month, meal, country, market_segment, distrib...
#> dbl (18): is_canceled, lead_time, arrival_date_year, arrival_date_week_numb...
#> date (1): reservation_status_date
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hotels_mod <- hotels %>%
mutate(is_canceled = as.factor(is_canceled)) %>%
mutate(across(where(is.character), as.factor)) %>%
select(-arrival_date_year,
-reservation_status,
-reservation_status_date) %>%
add_n_miss() %>%
filter(n_miss_all == 0) %>%
select(-n_miss_all)
set.seed(494)
# Randomly assigns 50% of the data to training.
hotels_split <- initial_split(hotels_mod,
prop = .5,
strata = is_canceled)
hotels_training <- training(hotels_split)
hotels_testing <- testing(hotels_split)
hotel_recipe <- recipe(is_canceled ~ .,
data = hotels_training) %>%
step_mutate_at(children, babies, previous_cancellations,
fn = ~ as.numeric(. > 0)) %>%
step_mutate_at(agent, company,
fn = ~ as.numeric(. == "NULL")) %>%
step_mutate(country,
country_grp = fct_lump_n(country, n = 5)) %>%
step_rm(country) %>%
## If I put this step_dummy() AFTER step_normalize(), it runs fine
step_dummy(all_nominal(),
-all_outcomes()) %>%
step_normalize(all_predictors(),
-all_nominal(),
-all_outcomes())
hotel_lasso_mod <-
# Define a lasso model
logistic_reg(mixture = 1) %>%
# Set the engine to "glmnet"
set_engine("glmnet") %>%
# The parameters we will tune.
set_args(penalty = tune()) %>%
# Use "regression"
set_mode("classification")
hotel_lasso_wf <-
# Set up the workflow
workflow() %>%
# Add the recipe
add_recipe(hotel_recipe) %>%
# Add the modeling
add_model(hotel_lasso_mod)
set.seed(494) # for reproducibility
#5-fold cv
hotel_cv <- vfold_cv(hotels_training, v = 5)
# potential penalty parameters
penalty_grid <- grid_regular(penalty(),
levels = 10)
hotel_lasso_tune <-
hotel_lasso_wf %>%
tune_grid(
resamples = hotel_cv,
grid = penalty_grid
)
#> x Fold1: preprocessor 1/1, model 1/1: Error in lognet(xd, is.sparse, ix, jx, y, w...
#> x Fold2: preprocessor 1/1, model 1/1: Error in lognet(xd, is.sparse, ix, jx, y, w...
#> x Fold3: preprocessor 1/1, model 1/1: Error in lognet(xd, is.sparse, ix, jx, y, w...
#> x Fold4: preprocessor 1/1, model 1/1: Error in lognet(xd, is.sparse, ix, jx, y, w...
#> x Fold5: preprocessor 1/1, model 1/1: Error in lognet(xd, is.sparse, ix, jx, y, w...
#> Warning: All models failed. See the `.notes` column.
Created on 2021-09-09 by the reprex package (v2.0.0)