Ok I have figured this out. It was to do with the order of prediction classes provided to roc_curve()
. Here is a reproducible example below.
dfr <- iris %>% mutate(Species=factor(Species))
data_split <- initial_split(dfr,prob=0.70,strata=Species)
data_train <- training(data_split)
data_test <- testing(data_split)
data_tune_cv <- vfold_cv(data_train,v=10,repeats=1,strata=Species)
recipe <- data_train %>% recipe(Species ~ .)
rf_spec <- rand_forest(mtry = tune(), min_n = tune(), trees = tune()) %>%
set_mode("classification") %>%
set_engine("ranger", num.threads = (parallel::detectCores()-1), importance = "impurity")
rf_wf <- workflow() %>%
add_recipe(recipe) %>%
rf_metrics <- metric_set(roc_auc,accuracy,bal_accuracy,f_meas,kap,mcc,precision,recall)
rf_grid <- grid_latin_hypercube(mtry=finalize(mtry(),data_train), min_n(), trees(), size=20)
rf_tune <- tune_grid(rf_wf, resamples=data_tune_cv, grid=rf_grid)
rf_best <- select_best(rf_tune,metric="roc_auc")
rf_training_pred <- rf_wf %>%
finalize_workflow(rf_best) %>%
fit_resamples(data_tune_cv,control = control_grid(save_pred=TRUE)) %>%
# incorrect roc
rf_training_pred %>%
roc_curve(truth=Species,.pred_setosa,.pred_virginica,.pred_versicolor) %>%
# correct roc
rf_training_pred %>%
roc_curve(truth=Species,.pred_setosa,.pred_versicolor,.pred_virginica) %>%
Created on 2022-09-22 by the reprex package (v2.0.1)
