I am trying to recreate the example from the two-stage interaction effect selection using lasso from the Feature Engineering and Selection book FES by adopting the code that can be found here: 07_Detecting_Interaction_Effects/7_04_The_Brute-Force_Approach_to_Identifying_Predictive_Interactions/ames_glmnet.R.
However, I cannot get it quite right most probably because the step_interact does not accept the variable interaction_subset which contains the formulae of all the interactions as i get this error:
B | error: Error in `step_interact()`:
Caused by error in `map()`:
ℹ In index: 38.
Caused by error in `purrr::reduce()`:
! Must supply `.init` when `.x` is empty.
There were issues with some computations A: x10 B: x10
As an aside, during the second phase do I only use the main effects and their interactions or should i refit the model with all the variables and the interactions of the main effects?
Here is the full code:
library(caret)
library(glmnet)
library(tidymodels)
library(AmesHousing)
library(gridExtra)
library(stringr)
# ------------------------------------------------------------------------------
ames <- make_ames()
set.seed(955)
ames_split <- initial_split(ames)
ames_train <- training(ames_split)
set.seed(24873)
ames_folds <- vfold_cv(ames_train)
lasso_spec <-
linear_reg(penalty = tune(), mixture = 1) %>%
set_mode("regression") %>%
set_engine("glmnet")
main_rec <-
recipe(Sale_Price ~ Bldg_Type + Neighborhood + Year_Built +
Gr_Liv_Area + Full_Bath + Year_Sold + Lot_Area +
Central_Air + Longitude + Latitude + MS_SubClass +
Alley + Lot_Frontage + Pool_Area + Garage_Finish +
Foundation + Land_Contour + Roof_Style,
data = ames_train) %>%
step_log(Sale_Price, base = 10) %>%
step_BoxCox(Lot_Area, Gr_Liv_Area, Lot_Frontage) %>%
step_other(Neighborhood, threshold = 0.05) %>%
step_dummy(all_nominal()) %>%
step_zv(all_predictors()) %>%
step_bs(Longitude, Latitude, options = list(df = 5)) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors())
lasso_workflow <- workflow() %>%
add_recipe(main_rec) %>%
add_model(lasso_spec)
lasso_params<- grid_regular(penalty(), levels = 6)
lasso_res <- tune_grid(
lasso_workflow,
resamples = ames_folds,
grid = lasso_params
)
lasso_res %>%
show_best("rmse")
best_penalty <- select_best(lasso_res)
lasso_final <- finalize_workflow(lasso_workflow, best_penalty)
lasso_final_fit <- fit(lasso_final, data = ames_train)
tidy(lasso_final_fit) %>% filter(estimate >0)
interaction_subset <- t(combn(as.character(tidy(lasso_final_fit) %>% filter(estimate >0, term != "(Intercept)")%>%pull(term)), 2))
colnames(interaction_subset) <- c("var1", "var2")
interaction_subset <-
interaction_subset %>%
as_tibble() %>%
mutate(
term =
paste0(
"starts_with('",
var1,
"'):starts_with('",
var2,
"')"
)
) %>%
pull(term) %>%
paste(collapse = "+")
interaction_subset <- paste("~", interaction_subset)
interaction_subset <- as.formula(interaction_subset)
two_stage_rec <-
recipe(Sale_Price ~ Bldg_Type + Neighborhood + Year_Built + Gr_Liv_Area + Full_Bath + Year_Sold + Lot_Area +
Central_Air + Longitude + Latitude + MS_SubClass + Alley + Lot_Frontage + Pool_Area + Garage_Finish +
Foundation + Land_Contour + Roof_Style, data = ames_train) %>%
step_log(Sale_Price, base = 10) %>%
step_BoxCox(Lot_Area, Gr_Liv_Area, Lot_Frontage) %>%
step_other(Neighborhood, threshold = 0.05) %>%
step_dummy(all_nominal()) %>%
step_interact(interaction_subset) %>%
step_zv(all_predictors()) %>%
step_bs(Longitude, Latitude, options = list(df = 5)) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors())
lasso_workflow_updated <- workflow() %>%
add_recipe(two_stage_rec) %>%
add_model(lasso_spec)
lasso_res_updated <- tune_grid(
lasso_workflow_updated,
resamples = ames_folds,
grid = lasso_params
)