Hi All,
First post here, so apologies if I have missed anything.
I am trying to combine cross validation, roles and hyperparameter tuning. So far, I run into errors when running the tuning. I create a unique id, as in my dataset, there is data that I would like to assign a role (thus exclude from modelling), but keep within the dataframe. Any advice would be appreciated.
Thanks in advance!
library(tidymodels)
library(ids)
diamonds <- ggplot2::diamonds
diamonds <- sample_n(diamonds, 1000)
diamonds$ln_price <- log10(diamonds$price)
diamonds_sub <- diamonds %>%
select(-price) %>%
mutate(cut = as.factor(cut)) %>%
mutate(color = as.factor(color)) %>%
mutate(clarity = as.factor(clarity)) %>%
mutate(unique_id = ids::random_id(n = nrow(diamonds), bytes = 4))
set.seed(12345)
split <- initial_split(diamonds_sub, prop = 0.7) #Sampling
train_data <- training(split)
test_data <- testing(split)
diamond_recipe <- recipe(ln_price ~., data = train_data) %>%
update_role(unique_id, new_role = "unique_id") %>%
step_corr(all_numeric()) %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_zv(all_numeric(), -all_outcomes()) %>%
step_normalize(all_numeric(), -all_outcomes())
diamond_folds <- vfold_cv(train_data, v = 5)
#Lasso Model as Example
lasso_spec <- linear_reg(penalty = tune(), mixture = 1) %>% set_mode("regression") %>% set_engine("glmnet")
lasso_wf <- workflow() %>% add_recipe(diamond_recipe) %>% add_model(lasso_spec)
lasso_params <- parameters(lasso_wf)
#Trying to exclude "unique_id" from the validation
lasso_tuned <- lasso_wf %>% tune_bayes(resamples = diamond_folds,param_info = lasso_params, iter = 3, metrics = metric_set(rmse))