Dear all,
In this example, I need to tune one hyperparameter in the recipe and others in the model. Apparently, the code runs without problems; however, the rank_results()
function does not work on the object returned by workflow_map()
.
Is there a more consistent way to specify which hyperparameters belong to the model and which belong to the recipe that I am not considering in the code?
Apparently, the tuning was successful:
> tunagem
# A workflow set/tibble: 2 × 4
wflow_id info option result
<chr> <list> <list> <list>
1 modelo_lm <tibble [1 × 4]> <opts[4]> <tune[+]>
2 modelo_rf <tibble [1 × 4]> <opts[4]> <tune[+]>
I believe the problem is that the hyperparameters are not well specified and not clearly defined for each of the models.
library(tidymodels)
library(parallelly)
tidymodels::tidymodels_prefer()
cores <- parallelly::availableCores(logical = FALSE)
cl <- parallel::makeForkCluster(cores)
doParallel::registerDoParallel(cl)
random_dados <- function(n = 1L, sigma2 = 2){
x <- runif(n = n, 8, 18)
y <- 45 * tanh(x/1.7 - 7) + 57 + rnorm(n = n, mean = 0, sd = sigma2^2)
tibble(x = x, y = y)
}
# Gerando dados -----------------------------------------------------------
set.seed(123)
dados <- random_dados(n = 2e3, sigma2 = 2)
# Realizando hold-out -----------------------------------------------------
dados_split <- initial_split(dados, prop = 0.8, strata = y)
treino <- training(dados_split)
teste <- testing(dados_split)
# 5-fold cross-validation -------------------------------------------------
cv <- vfold_cv(treino, v = 5, strata = y, repeats = 5)
# Definindo alguns modelos para comparar ----------------------------------
modelo_linear <- linear_reg() %>%
set_engine("lm")
modelo_rf <- rand_forest(min_n = tune(), trees = tune(), mtry = tune()) %>%
set_engine("ranger") %>%
set_mode("regression")
# Definindo receita -------------------------------------------------------
receita <- recipe(y ~ ., data = treino) |>
step_scale(all_predictors()) |>
step_center(all_predictors()) |>
step_poly(x, degree = tune("p"))
# Parâmetros da receita ---------------------------------------------------
parametros_receita <-
receita |>
hardhat::extract_parameter_set_dials() |>
update(
p = degree_int(c(1L, 15L))
)
# Parâmetros do modelo ----------------------------------------------------
parametros_modelo <-
modelo_rf |>
hardhat::extract_parameter_set_dials() |>
update(
trees = trees(c(1L, 1000L)),
min_n = min_n(c(2L, 10L)),
mtry = finalize(mtry(), treino)
)
# parametros <- dplyr::bind_rows(parametros_receita, parametros_modelo)
parametros <- dials::parameters(
list(
p = degree_int(c(1L, 25L)),
trees = trees(c(1L, 1000L)),
min_n = min_n(c(2L, 10L)),
mtry = finalize(mtry(), treino)
)
)
# Definindo workflow ------------------------------------------------------
wf <-
workflow_set(
preproc = list("modelo" = receita),
models = list(
lm = modelo_linear,
rf = modelo_rf
)
) |>
option_add(param_info = parametros_receita, id = "modelo_lm") |>
option_add(param_info = parametros_modelo, id = "modelo_rf")
# Tunando -----------------------------------------------------------------
ctrl <-
control_grid(
save_pred = TRUE,
save_workflow = TRUE,
parallel_over = "everything"
)
tunagem <-
wf |>
workflow_map(
fn = "tune_grid",
grid = 5,
resamples = cv,
control = ctrl,
param_info = parametros,
seed = 123
)
foreach::registerDoSEQ()
parallel::stopCluster(cl)