I want to tune the xgboost model using bayesian optimization by tidymodels but when defining the range of hyperparameter values there is a problem. Anyone can help me?? I want specific hyperparameter combinations so i use expand.grid() function
library(visdat)
library(tidyverse)
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#> method from
#> required_pkgs.model_spec parsnip
#> Warning: package 'dials' was built under R version 4.1.3
#> Warning: package 'parsnip' was built under R version 4.1.3
library(patchwork)
library(readxl)
library(ranger)
library(reprex)
#> Warning: package 'reprex' was built under R version 4.1.3
data <- mtcars
data$cyl <- as.factor(data$cyl)
data$vs <- as.factor(data$vs)
data$am <- as.factor(data$am)
data$gear <- as.factor(data$gear)
data$carb <- as.factor(data$carb)
set.seed(123)
data.training <- data
xgboost_recipe <-
recipe(formula = mpg ~ ., data = data.training) %>%
step_novel(all_nominal(), -all_outcomes()) %>%
step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE) %>%
step_zv(all_predictors())
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
loss_reduction = tune(), sample_size = tune(), mtry = tune()) %>%
set_mode("regression") %>%
set_engine("xgboost")
# define hyperparameter range
params <- expand.grid(
trees = seq(500,1000,by=200),
min_n = seq(10,40, by=10),
tree_depth = seq(6L, 10L, by=2L),
learn_rate = seq(0.1,1, by=0.5),
loss_reduction = seq(0.1,1, by=0.5),
sample_prop = seq(0.1,1, by=0.5), mtry=seq(2,7, by=1))
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_recipe) %>%
add_model(xgboost_spec)
# resampling
set.seed(123)
resampling <- vfold_cv(data = data.training, v = 10, strata = mpg)
#> Warning: The number of observations in each quantile is below the recommended threshold of 20.
#> * Stratification will use 1 breaks instead.
#> Warning: Too little data to stratify.
#> * Resampling will be unstratified.
# Tune use bayesian optimization
doParallel::registerDoParallel()
set.seed(456)
res <-
tune_bayes(xgboost_workflow,
iter = 6,
resamples = resampling,
param_info = params,
metrics = metric_set(mae),
control = control_bayes(verbose = TRUE,
save_pred = TRUE)
)
#> ! There are 1728 tuning parameters and 5 grid points were requested. This is
#> likely to cause numerical issues in the first few search iterations.
#> Error in UseMethod("grid_latin_hypercube"): no applicable method for 'grid_latin_hypercube' applied to an object of class "data.frame"
Created on 2022-06-18 by the reprex package (v2.0.1)