Error in eval(predvars, data, env): object 's_id' not found

I am trying to fit multilevel models using tidymodels. When I fit individual models, I do not have issues but when I combine them in workflow set, I get these errors.

I have seen posts with similar error and tried to update my code but it doesn't seem to work still.

When I look at the training and validation sets after the recipe has been applied, I do find all columns in the dataframe. Not sure why the error persists. When I use non multilevel algorithms,I do not have this error.

I'm hoping someone might be able to help with this error:

pacman::p_load(labelled,forcats,rstanarm,tidymodels,dplyr,parsnip,baguette,future,finetune,rules,rsample,
               multilevelmod,ranger,earth,readr,stacks)

plan(multisession, workers=6)

load(url("http://alecri.github.io/downloads/data/dental.RData"))


dental_long <- pivot_longer(dental, cols = starts_with("y"), 
                            names_to = "measurement", values_to = "distance") %>% 
  mutate(
    age = parse_number(measurement),
    measurement = fct_inorder(paste("Measure at age", age)),
    s_id=as.factor(id)
  ) %>% 
  set_variable_labels(
    age = "Age of the child at measurement",
    measurement = "Label for time measurement",
    distance = "Measurement"
  ) %>% select(-measurement,-id)

#split data into train, validation and test sets
set.seed(11)
splitsx      <- group_initial_split(dental_long, group = s_id,prop = 0.8)

dental_train <- training(splitsx)
dental_val <- testing(splitsx)

#Create cross-validation folds
foldsx <- group_vfold_cv(dental_train, v = 3,group = s_id,repeats = 3)

#Create simple recipe for modelling
simple_recipex <- recipe(distance ~ ., data = dental_train) %>% 
  update_role(s_id,new_role = 'id')

#Recipe with polynomial terms
poly_recipex <- recipe(distance ~ ., data = dental_train) %>% 
  update_role(s_id,new_role = 'id') %>%
  step_scale(all_numeric_predictors()) %>%
  step_poly(all_numeric_predictors(),degree = 2,keep_original_cols = F) %>%
  step_dummy(all_nominal_predictors()) %>% 
  step_interact(~all_numeric_predictors():all_numeric_predictors())

# df <- prep(poly_recipex) %>% bake(dental_train)

mixed_basic_recipex <- recipe(distance ~ ., data = dental_train)

mixed_poly_recipex <- recipe(distance ~ ., data = dental_train) %>% 
  step_scale(all_numeric_predictors()) %>%
  step_poly(all_numeric_predictors(),degree = 2,keep_original_cols = F) %>%
  step_dummy(all_nominal_predictors() & !matches('s_id')) #%>% 
  # step_interact(~all_numeric_predictors():all_numeric_predictors() )


# df1 <- prep(mixed_poly_recipex) %>% bake(dental_train)

#mixed model
lmer_specx <-
  linear_reg() %>%
  set_mode("regression") %>%
  set_engine("lmer")

bayes_specx <- linear_reg() %>%
  set_mode("regression") %>%
  set_engine("stan_glmer")


fullx <- 
  workflow_set(
    preproc = list( mixed_poly = poly_recipex
    ), 
    models = list(bayesMixed = bayes_specx,lmmixed = lmer_specx
                  
    )
  )

#settings for bayes tuning and metrics
bayes_ctrl <-
  control_bayes(
    save_pred = TRUE,
    parallel_over = "everything",
    save_workflow = TRUE,
    verbose = TRUE,
    no_improve = 20
  )
rmse_res <- metric_set(rmse,rsq)

basicbkx <- prep(mixed_basic_recipex) %>% bake(dental_train)
polybkx <- prep(mixed_poly_recipex) %>% bake(dental_train)

polyform <- reformulate(c(setdiff(colnames(polybkx), c("distance",'s_id')),'-s_id + (1 | s_id)'), 
                        response="distance")
basicform <- reformulate(c(setdiff(colnames(basicbkx), c("distance",'s_id')),'-s_id + (1 | s_id)'), 
                         response="distance")

all_wfx1 <- fullx %>% 
  # update_workflow_model(id='basic_bayesMixed',spec=bayes_specx,
  #                       formula = basicform) %>% 
  update_workflow_model(id='mixed_poly_bayesMixed',spec=bayes_specx,
                        formula = polyform) %>%
  # update_workflow_model(id='basic_lmmixed',spec=lmer_specx,
  #                       formula = basicform) %>%
  update_workflow_model(id='mixed_poly_lmmixed',spec=lmer_specx,
                        formula = polyform)

test_results <-
  all_wfx1 %>%
  workflow_map(
    "tune_bayes",
    seed = 10,
    resamples = foldsx,
    control = bayes_ctrl
  )

This topic was automatically closed 90 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.