I'm working with some traffic data and I'm trying to make a model evaluation on which model would best fit the data set. I believe there is some basic step I'm missing because I've restrained to a maximum of 3 predictors setting up dummy variables , centering and scaling.
I'm trying to determine which model best fits to describe accident severity or collision_severity
based off three other predictors (all factors): type_of_collision
, pcf_violation_category
& chp_vehicle_type_at_fault
Please find below a reprex of my attempt:
~collision_severity, ~type_of_collision, ~pcf_violation_category, ~chp_vehicle_type_at_fault,
"property damage only", "sideswipe", "unsafe starting or backing", "passenger car, station",
"property damage only", "rear end", "following too closely", "passenger car, station",
"property damage only", "broadside", "traffic signals and signs", "passenger car, station",
"property damage only", "hit object", "speeding", "passenger car, station",
"property damage only", "sideswipe", "improper turning", "mini-vans",
"property damage only", "sideswipe", "automobile right of way", "passenger car, station",
"property damage only", "hit object", "dui", "pickups & panels",
"property damage only", "sideswipe", "unsafe lane change", "two axle truck",
"pain", "rear end", "speeding", "passenger car, station",
"property damage only", "sideswipe", "improper turning", "motorcycle",
#> # A tibble: 97 x 4
#> collision_severity type_of_collision pcf_violation_cate~ chp_vehicle_type_~
#> <chr> <chr> <chr> <chr>
#> 1 property damage only sideswipe unsafe starting or~ passenger car, st~
#> 2 property damage only rear end following too clos~ passenger car, st~
#> 3 property damage only broadside traffic signals an~ passenger car, st~
#> 4 property damage only hit object speeding passenger car, st~
#> 5 property damage only sideswipe improper turning mini-vans
#> 6 property damage only sideswipe automobile right o~ passenger car, st~
#> 7 property damage only hit object dui pickups & panels
#> 8 property damage only sideswipe unsafe lane change two axle truck
#> 9 pain rear end speeding passenger car, st~
#> 10 property damage only sideswipe improper turning motorcycle
#> # ... with 87 more rows
# Splitting data into train and test data sets.
dfml_split <-
rsample::initial_split(df_ml, strata = collision_severity)
#> Error in eval_select_impl(NULL, .vars, expr(c(!!!dots)), include = .include, : object 'df_ml' not found
dfml_train <- rsample::training(dfml_split)
#> Error in analysis(x): object 'dfml_split' not found
dfml_test <- rsample::testing(dfml_split)
#> Error in assessment(x): object 'dfml_split' not found
# Creating folds:
dfml_folds <-
rsample::vfold_cv(dfml_train, strata = collision_severity,
v = 2)
#> Error in eval_select_impl(NULL, .vars, expr(c(!!!dots)), include = .include, : object 'dfml_train' not found
# Creating Recipes:
# Collision Severity as a dependent variable (response), all independent variables (predictors)
dfml_recipe <-
recipes::recipe(collision_severity ~ type_of_collision,
data = dfml_test) %>%
recipes::step_dummy(all_nominal_predictors()) %>%
recipes::step_center(all_numeric_predictors()) %>%
#> Error in recipes::recipe(collision_severity ~ type_of_collision, data = dfml_test) %>% : could not find function "%>%"
# Collision Severity as a dependent variable (response), independent: type_of_collision
dfml_recipe2 <-
recipes::recipe(collision_severity ~ type_of_collision + pcf_violation_category,
data = dfml_test) %>%
recipes::step_dummy(all_nominal_predictors()) %>%
recipes::step_center(all_numeric_predictors(), -all_outcomes()) %>%
recipes::step_scale(all_numeric_predictors(), -all_outcomes())
#> Error in recipes::recipe(collision_severity ~ type_of_collision + pcf_violation_category, : could not find function "%>%"
# Collision Severity as a dependent variable (response), independent: type_of_collision
dfml_recipe3 <-
collision_severity ~ type_of_collision + pcf_violation_category + chp_vehicle_type_at_fault,
data = dfml_test
) %>%
recipes::step_dummy(all_nominal_predictors()) %>%
recipes::step_center(all_numeric_predictors(), -all_outcomes()) %>%
recipes::step_scale(all_numeric_predictors(), -all_outcomes())
#> Error in recipes::recipe(collision_severity ~ type_of_collision + pcf_violation_category + : could not find function "%>%"
# Review of my recipes:
#> Error in eval(expr, envir, enclos): object 'dfml_recipe' not found
#> Error in eval(expr, envir, enclos): object 'dfml_recipe2' not found
#> Error in eval(expr, envir, enclos): object 'dfml_recipe3' not found
# Setting Model Specs:
log_reg <- parsnip::logistic_reg() %>%
#> Error in parsnip::logistic_reg() %>% parsnip::set_engine("glm"): could not find function "%>%"
lda_spec <- discrim::discrim_linear() %>%
parsnip::set_mode("classification") %>%
#> Error in discrim::discrim_linear() %>% parsnip::set_mode("classification") %>% : could not find function "%>%"
tree_spec <- baguette::bag_tree() %>%
parsnip::set_mode("classification") %>%
#> Error in baguette::bag_tree() %>% parsnip::set_mode("classification") %>% : could not find function "%>%"
knn_spec <- parsnip::nearest_neighbor() %>%
parsnip::set_engine("kknn") %>%
#> Error in parsnip::nearest_neighbor() %>% parsnip::set_engine("kknn") %>% : could not find function "%>%"
bays_spec <- parsnip::decision_tree() %>%
parsnip::set_mode("classification") %>%
#> Error in parsnip::decision_tree() %>% parsnip::set_mode("classification") %>% : could not find function "%>%"
svm_spec <- parsnip::svm_poly() %>%
parsnip::set_mode("classification") %>%
#> Error in parsnip::svm_poly() %>% parsnip::set_mode("classification") %>% : could not find function "%>%"
rf_spec <- parsnip::rand_forest() %>%
parsnip::set_mode("classification") %>%
#> Error in parsnip::rand_forest() %>% parsnip::set_mode("classification") %>% : could not find function "%>%"
xgboost_spec <-parsnip::boost_tree() %>%
parsnip::set_mode("classification") %>%
#> Error in parsnip::boost_tree() %>% parsnip::set_mode("classification") %>% : could not find function "%>%"
# Workflow:
dfml_wfset <- workflowsets::workflow_set(
list(dfml_recipe, dfml_recipe2, dfml_recipe3),
# lda_spec,
cross = TRUE
#> Registered S3 method overwritten by 'tune':
#> method from
#> required_pkgs.model_spec parsnip
#> Error in workflowsets::workflow_set(list(dfml_recipe, dfml_recipe2, dfml_recipe3), : object 'dfml_recipe' not found
#> Error in eval(expr, envir, enclos): object 'dfml_wfset' not found
# Processing:
# doParallel::registerDoParallel()
dfml_rs <- workflowsets::workflow_map(
resamples = dfml_folds,
metrics = metric_set(sensitivity, specificity, kap)
#> Error in rlang::list2(...): object 'dfml_folds' not found
#> Execution stopped; returning current results
#> Error in rlang::is_list(x): object 'dfml_wfset' not found
#> Error in eval(expr, envir, enclos): object 'dfml_rs' not found
