Hi everyone,
I'm having some issues with the show_best() and select_best() functions when trying to do hyperparameter tuning with a custom metric.
My metric is quadratic weighted kappa. I'm able to get results from collect_metrics, but when I try select_best(metric = <custom_metric_name>), I get
Error in `.filter_perf_metrics()`:
! No results are available. Please use `collect_metrics()` to see if there were any issues.
The results in my collect_metrics table show the .metric as "kap", so I've tried that too: select_best(metric = "kap"), but I get another error:
Error in `select_best()`:
! "kap" was not in the metric set. Please choose from: "quad_kap".
Here is my full reprex. Does anyone know how I can retrieve my best result?
set.seed(123)
library(tidymodels)
packageVersion('tidymodels')
#> [1] '1.4.1'
packageVersion('yardstick')
#> [1] '1.3.2'
packageVersion('tune')
#> [1] '2.0.1'
penguins = penguins |>
mutate(
across(where(is.character), as.factor)
) |>
drop_na()
penguins_split = initial_split(
penguins, prop = 0.8, strata = species
)
penguins_train = training(penguins_split)
penguins_test = testing(penguins_split)
# Lasso
multinom_model = multinom_reg(penalty = tune(), mixture = 1) |>
set_engine("glmnet")
multinom_recipe = recipe(species ~ ., data = penguins_train) |>
step_dummy(all_nominal_predictors(), one_hot = TRUE) |>
step_zv(all_predictors()) |>
step_normalize(all_predictors())
multinom_recipe |>
prep() |>
bake(new_data = penguins_train)
#> # A tibble: 265 × 10
#> bill_length_mm bill_depth_mm flipper_length_mm body_mass_g species
#> <dbl> <dbl> <dbl> <dbl> <fct>
#> 1 -0.695 0.414 -0.439 -1.22 Adelie
#> 2 -1.37 1.06 -0.583 -0.968 Adelie
#> 3 -0.956 0.314 -1.45 -0.748 Adelie
#> 4 -0.900 1.21 -0.439 0.574 Adelie
#> 5 -0.546 0.214 -1.37 -1.28 Adelie
#> 6 -1.01 2.02 -0.727 -0.528 Adelie
#> 7 -1.76 1.97 -0.224 0.228 Adelie
#> 8 -0.993 0.914 -0.439 -0.968 Adelie
#> 9 -0.285 1.76 -0.296 0.354 Adelie
#> 10 -1.79 0.614 -1.23 -1.13 Adelie
#> # ℹ 255 more rows
#> # ℹ 5 more variables: island_Biscoe <dbl>, island_Dream <dbl>,
#> # island_Torgersen <dbl>, sex_female <dbl>, sex_male <dbl>
( multinom_work = workflow() |>
add_model(multinom_model) |>
add_recipe(multinom_recipe)
)
#> ══ Workflow ════════════════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: multinom_reg()
#>
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 3 Recipe Steps
#>
#> • step_dummy()
#> • step_zv()
#> • step_normalize()
#>
#> ── Model ───────────────────────────────────────────────────────────────────────
#> Multinomial Regression Model Specification (classification)
#>
#> Main Arguments:
#> penalty = tune()
#> mixture = 1
#>
#> Computational engine: glmnet
( penguins_vf = rsample::vfold_cv(penguins_train, v=5, strata = species) )
#> # 5-fold cross-validation using stratification
#> # A tibble: 5 × 2
#> splits id
#> <list> <chr>
#> 1 <split [211/54]> Fold1
#> 2 <split [212/53]> Fold2
#> 3 <split [212/53]> Fold3
#> 4 <split [212/53]> Fold4
#> 5 <split [213/52]> Fold5
quad_kap = function(data, truth, estimate, na_rm = TRUE, ...) {
kap(
data = data,
truth = !!rlang::enquo(truth),
estimate = !!rlang::enquo(estimate),
weighting = "quadratic",
na_rm = na_rm,
...
)
}
quad_kap = new_class_metric(quad_kap, "maximize")
( multinom_grid = multinom_work |>
tune_grid(
penguins_vf,
control = control_grid(save_pred = TRUE),
metrics = metric_set(quad_kap)
)
)
#> # Tuning results
#> # 5-fold cross-validation using stratification
#> # A tibble: 5 × 5
#> splits id .metrics .notes .predictions
#> <list> <chr> <list> <list> <list>
#> 1 <split [211/54]> Fold1 <tibble [10 × 5]> <tibble [0 × 4]> <tibble [540 × 5]>
#> 2 <split [212/53]> Fold2 <tibble [10 × 5]> <tibble [0 × 4]> <tibble [530 × 5]>
#> 3 <split [212/53]> Fold3 <tibble [10 × 5]> <tibble [0 × 4]> <tibble [530 × 5]>
#> 4 <split [212/53]> Fold4 <tibble [10 × 5]> <tibble [0 × 4]> <tibble [530 × 5]>
#> 5 <split [213/52]> Fold5 <tibble [10 × 5]> <tibble [0 × 4]> <tibble [520 × 5]>
multinom_grid |>
collect_metrics()
#> # A tibble: 10 × 7
#> penalty .metric .estimator mean n std_err .config
#> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
#> 1 1.05e-10 kap multiclass 1 5 0 pre0_mod01_post0
#> 2 1.44e- 9 kap multiclass 1 5 0 pre0_mod02_post0
#> 3 1.40e- 8 kap multiclass 1 5 0 pre0_mod03_post0
#> 4 1.81e- 7 kap multiclass 1 5 0 pre0_mod04_post0
#> 5 1.45e- 6 kap multiclass 1 5 0 pre0_mod05_post0
#> 6 1.80e- 5 kap multiclass 1 5 0 pre0_mod06_post0
#> 7 1.71e- 4 kap multiclass 1 5 0 pre0_mod07_post0
#> 8 1.61e- 3 kap multiclass 1 5 0 pre0_mod08_post0
#> 9 1.70e- 2 kap multiclass 0.995 5 0.00480 pre0_mod09_post0
#> 10 6.15e- 1 kap multiclass 0 5 0 pre0_mod10_post0
multinom_grid |>
select_best(metric = "quad_kap")
#> Error in `.filter_perf_metrics()`:
#> ! No results are available. Please use `collect_metrics()` to see if
#> there were any issues.