select_best fails with custom metric_set

Hi everyone,

I'm having some issues with the show_best() and select_best() functions when trying to do hyperparameter tuning with a custom metric.

My metric is quadratic weighted kappa. I'm able to get results from collect_metrics, but when I try select_best(metric = <custom_metric_name>), I get

Error in `.filter_perf_metrics()`:
! No results are available. Please use `collect_metrics()` to see if there were any issues.

The results in my collect_metrics table show the .metric as "kap", so I've tried that too: select_best(metric = "kap"), but I get another error:

Error in `select_best()`:
! "kap" was not in the metric set. Please choose from: "quad_kap".

Here is my full reprex. Does anyone know how I can retrieve my best result?

set.seed(123)
library(tidymodels)

packageVersion('tidymodels')
#> [1] '1.4.1'
packageVersion('yardstick')
#> [1] '1.3.2'
packageVersion('tune')
#> [1] '2.0.1'

penguins = penguins |>
  mutate(
    across(where(is.character), as.factor)
  ) |>
  drop_na()

penguins_split = initial_split(
  penguins, prop = 0.8, strata = species
)

penguins_train = training(penguins_split)
penguins_test = testing(penguins_split)

# Lasso
multinom_model = multinom_reg(penalty = tune(), mixture = 1) |>
  set_engine("glmnet")

multinom_recipe = recipe(species ~ ., data = penguins_train) |>
  step_dummy(all_nominal_predictors(), one_hot = TRUE) |>
  step_zv(all_predictors()) |>
  step_normalize(all_predictors())

multinom_recipe |>
  prep() |>
  bake(new_data = penguins_train)
#> # A tibble: 265 × 10
#>    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g species
#>             <dbl>         <dbl>             <dbl>       <dbl> <fct>  
#>  1         -0.695         0.414            -0.439      -1.22  Adelie 
#>  2         -1.37          1.06             -0.583      -0.968 Adelie 
#>  3         -0.956         0.314            -1.45       -0.748 Adelie 
#>  4         -0.900         1.21             -0.439       0.574 Adelie 
#>  5         -0.546         0.214            -1.37       -1.28  Adelie 
#>  6         -1.01          2.02             -0.727      -0.528 Adelie 
#>  7         -1.76          1.97             -0.224       0.228 Adelie 
#>  8         -0.993         0.914            -0.439      -0.968 Adelie 
#>  9         -0.285         1.76             -0.296       0.354 Adelie 
#> 10         -1.79          0.614            -1.23       -1.13  Adelie 
#> # ℹ 255 more rows
#> # ℹ 5 more variables: island_Biscoe <dbl>, island_Dream <dbl>,
#> #   island_Torgersen <dbl>, sex_female <dbl>, sex_male <dbl>

( multinom_work = workflow() |>
    add_model(multinom_model) |>
    add_recipe(multinom_recipe) 
)
#> ══ Workflow ════════════════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: multinom_reg()
#> 
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 3 Recipe Steps
#> 
#> • step_dummy()
#> • step_zv()
#> • step_normalize()
#> 
#> ── Model ───────────────────────────────────────────────────────────────────────
#> Multinomial Regression Model Specification (classification)
#> 
#> Main Arguments:
#>   penalty = tune()
#>   mixture = 1
#> 
#> Computational engine: glmnet

( penguins_vf = rsample::vfold_cv(penguins_train, v=5, strata = species) )
#> #  5-fold cross-validation using stratification 
#> # A tibble: 5 × 2
#>   splits           id   
#>   <list>           <chr>
#> 1 <split [211/54]> Fold1
#> 2 <split [212/53]> Fold2
#> 3 <split [212/53]> Fold3
#> 4 <split [212/53]> Fold4
#> 5 <split [213/52]> Fold5

quad_kap = function(data, truth, estimate, na_rm = TRUE, ...) {
  kap(
    data = data,
    truth = !!rlang::enquo(truth),
    estimate = !!rlang::enquo(estimate),
    weighting = "quadratic",
    na_rm = na_rm,
    ...
  )
}
quad_kap = new_class_metric(quad_kap, "maximize")

( multinom_grid = multinom_work |>
    tune_grid(
      penguins_vf,
      control = control_grid(save_pred = TRUE),
      metrics = metric_set(quad_kap)
    )
)
#> # Tuning results
#> # 5-fold cross-validation using stratification 
#> # A tibble: 5 × 5
#>   splits           id    .metrics          .notes           .predictions      
#>   <list>           <chr> <list>            <list>           <list>            
#> 1 <split [211/54]> Fold1 <tibble [10 × 5]> <tibble [0 × 4]> <tibble [540 × 5]>
#> 2 <split [212/53]> Fold2 <tibble [10 × 5]> <tibble [0 × 4]> <tibble [530 × 5]>
#> 3 <split [212/53]> Fold3 <tibble [10 × 5]> <tibble [0 × 4]> <tibble [530 × 5]>
#> 4 <split [212/53]> Fold4 <tibble [10 × 5]> <tibble [0 × 4]> <tibble [530 × 5]>
#> 5 <split [213/52]> Fold5 <tibble [10 × 5]> <tibble [0 × 4]> <tibble [520 × 5]>

multinom_grid |>
  collect_metrics()
#> # A tibble: 10 × 7
#>     penalty .metric .estimator  mean     n std_err .config         
#>       <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>           
#>  1 1.05e-10 kap     multiclass 1         5 0       pre0_mod01_post0
#>  2 1.44e- 9 kap     multiclass 1         5 0       pre0_mod02_post0
#>  3 1.40e- 8 kap     multiclass 1         5 0       pre0_mod03_post0
#>  4 1.81e- 7 kap     multiclass 1         5 0       pre0_mod04_post0
#>  5 1.45e- 6 kap     multiclass 1         5 0       pre0_mod05_post0
#>  6 1.80e- 5 kap     multiclass 1         5 0       pre0_mod06_post0
#>  7 1.71e- 4 kap     multiclass 1         5 0       pre0_mod07_post0
#>  8 1.61e- 3 kap     multiclass 1         5 0       pre0_mod08_post0
#>  9 1.70e- 2 kap     multiclass 0.995     5 0.00480 pre0_mod09_post0
#> 10 6.15e- 1 kap     multiclass 0         5 0       pre0_mod10_post0

multinom_grid |>
  select_best(metric = "quad_kap")
#> Error in `.filter_perf_metrics()`:
#> ! No results are available. Please use `collect_metrics()` to see if
#>   there were any issues.