I am trying to fit multilevel models using tidymodels. When I fit individual models, I do not have issues but when I combine them in workflow set, I get these errors.
I have seen posts with similar error and tried to update my code but it doesn't seem to work still.
When I look at the training and validation sets after the recipe has been applied, I do find all columns in the dataframe. Not sure why the error persists. When I use non multilevel algorithms,I do not have this error.
I'm hoping someone might be able to help with this error:
pacman::p_load(labelled,forcats,rstanarm,tidymodels,dplyr,parsnip,baguette,future,finetune,rules,rsample,
multilevelmod,ranger,earth,readr,stacks)
plan(multisession, workers=6)
load(url("http://alecri.github.io/downloads/data/dental.RData"))
dental_long <- pivot_longer(dental, cols = starts_with("y"),
names_to = "measurement", values_to = "distance") %>%
mutate(
age = parse_number(measurement),
measurement = fct_inorder(paste("Measure at age", age)),
s_id=as.factor(id)
) %>%
set_variable_labels(
age = "Age of the child at measurement",
measurement = "Label for time measurement",
distance = "Measurement"
) %>% select(-measurement,-id)
#split data into train, validation and test sets
set.seed(11)
splitsx <- group_initial_split(dental_long, group = s_id,prop = 0.8)
dental_train <- training(splitsx)
dental_val <- testing(splitsx)
#Create cross-validation folds
foldsx <- group_vfold_cv(dental_train, v = 3,group = s_id,repeats = 3)
#Create simple recipe for modelling
simple_recipex <- recipe(distance ~ ., data = dental_train) %>%
update_role(s_id,new_role = 'id')
#Recipe with polynomial terms
poly_recipex <- recipe(distance ~ ., data = dental_train) %>%
update_role(s_id,new_role = 'id') %>%
step_scale(all_numeric_predictors()) %>%
step_poly(all_numeric_predictors(),degree = 2,keep_original_cols = F) %>%
step_dummy(all_nominal_predictors()) %>%
step_interact(~all_numeric_predictors():all_numeric_predictors())
# df <- prep(poly_recipex) %>% bake(dental_train)
mixed_basic_recipex <- recipe(distance ~ ., data = dental_train)
mixed_poly_recipex <- recipe(distance ~ ., data = dental_train) %>%
step_scale(all_numeric_predictors()) %>%
step_poly(all_numeric_predictors(),degree = 2,keep_original_cols = F) %>%
step_dummy(all_nominal_predictors() & !matches('s_id')) #%>%
# step_interact(~all_numeric_predictors():all_numeric_predictors() )
# df1 <- prep(mixed_poly_recipex) %>% bake(dental_train)
#mixed model
lmer_specx <-
linear_reg() %>%
set_mode("regression") %>%
set_engine("lmer")
bayes_specx <- linear_reg() %>%
set_mode("regression") %>%
set_engine("stan_glmer")
fullx <-
workflow_set(
preproc = list( mixed_poly = poly_recipex
),
models = list(bayesMixed = bayes_specx,lmmixed = lmer_specx
)
)
#settings for bayes tuning and metrics
bayes_ctrl <-
control_bayes(
save_pred = TRUE,
parallel_over = "everything",
save_workflow = TRUE,
verbose = TRUE,
no_improve = 20
)
rmse_res <- metric_set(rmse,rsq)
basicbkx <- prep(mixed_basic_recipex) %>% bake(dental_train)
polybkx <- prep(mixed_poly_recipex) %>% bake(dental_train)
polyform <- reformulate(c(setdiff(colnames(polybkx), c("distance",'s_id')),'-s_id + (1 | s_id)'),
response="distance")
basicform <- reformulate(c(setdiff(colnames(basicbkx), c("distance",'s_id')),'-s_id + (1 | s_id)'),
response="distance")
all_wfx1 <- fullx %>%
# update_workflow_model(id='basic_bayesMixed',spec=bayes_specx,
# formula = basicform) %>%
update_workflow_model(id='mixed_poly_bayesMixed',spec=bayes_specx,
formula = polyform) %>%
# update_workflow_model(id='basic_lmmixed',spec=lmer_specx,
# formula = basicform) %>%
update_workflow_model(id='mixed_poly_lmmixed',spec=lmer_specx,
formula = polyform)
test_results <-
all_wfx1 %>%
workflow_map(
"tune_bayes",
seed = 10,
resamples = foldsx,
control = bayes_ctrl
)