library(tidyverse)
library(tidymodels)
library(discrim)
library(themis)
df <- read_csv('df.csv')
split <- initial_split(data = df, strata = Pass)
train <- training(split)
test <- testing(split)
df_recipe <- recipes::recipe(Pass ~., data = train) %>%
step_upsample(Pass) %>%
recipes::step_normalize(all_predictors()) %>%
recipes::step_nzv(all_predictors())
set.seed(1)
cv <- vfold_cv(data = train, v = 5, repeats = 5, strata = Pass)
knnmodel <- nearest_neighbor() %>%
set_engine("kknn") %>%
set_mode("classification")
knnwf <- workflow() %>%
add_recipe(df_recipe) %>%
add_model(knnmodel)
set.seed(1)
knnres <- knnwf %>%
tune::fit_resamples(
resamples = cv,
metrics = metric_set(roc_auc, sens, spec, accuracy),
control = control_resamples(save_pred = TRUE, verbose = TRUE)
)
knnres %>%
collect_predictions() %>%
final_wf <- workflow() %>%
add_recipe(df_recipe) %>%
add_model(knnmodel)
finalres <- final_wf %>%
last_fit(split)
Questions:
- Do I need to set a separate variable such as
prep_df_recipe <- prep(df_recipe)
In order to use it in workflow?
-
Do I need to pass a preprocessed training data to vfold_cv?
-
Do I need to prep my test dataset as well before last fit?
Thank you!