Hi guys, I have follow Julia blog to learn how to use tidymodel, in predict board games topic I have used tune_race_anova to fit model but it run out error.
I try to find the reason by type in show_notes(.Last.tune.result) and it said: Error in map(., str_replace_all, " ", "_"): could not find function "map" which is quite weird because I have install purrr and stringr package from the beginning.
This is the first time I try to create a reprex and ask for help in R community so if I make the reprex a little bit ugly or wrong some how please kindly advice me further. Thanks
#import data
library(tidymodels)
library(tidyverse)
ratings <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-01-25/ratings.csv")
#> Rows: 21831 Columns: 10
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (3): name, url, thumbnail
#> dbl (7): num, id, year, rank, average, bayes_average, users_rated
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
details <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-01-25/details.csv")
#> Rows: 21631 Columns: 23
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (10): primary, description, boardgamecategory, boardgamemechanic, boardg...
#> dbl (13): num, id, yearpublished, minplayers, maxplayers, playingtime, minpl...
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ratings_joined <-
ratings %>%
left_join(details, by = "id")
#Fitting model
set.seed(123)
game_split <-
ratings_joined %>%
select(name, average, matches("min|max"), boardgamecategory) %>%
na.omit() %>%
initial_split(strata = average)
game_train <- training(game_split)
game_test <- testing(game_split)
set.seed(234)
game_folds <- vfold_cv(game_train, strata = average)
game_folds
#> # 10-fold cross-validation using stratification
#> # A tibble: 10 × 2
#> splits id
#> <list> <chr>
#> 1 <split [14407/1602]> Fold01
#> 2 <split [14407/1602]> Fold02
#> 3 <split [14407/1602]> Fold03
#> 4 <split [14408/1601]> Fold04
#> 5 <split [14408/1601]> Fold05
#> 6 <split [14408/1601]> Fold06
#> 7 <split [14408/1601]> Fold07
#> 8 <split [14408/1601]> Fold08
#> 9 <split [14410/1599]> Fold09
#> 10 <split [14410/1599]> Fold10
library(textrecipes)
split_category <- function(x) {
x %>%
str_split(", ") %>%
map(str_remove_all, "[:punct:]") %>%
map(str_squish) %>%
map(str_to_lower) %>%
map(str_replace_all, " ", "_")
}
game_rec <-
recipe(average ~ ., data = game_train) %>%
update_role(name, new_role = "id") %>%
step_tokenize(boardgamecategory, custom_token = split_category) %>%
step_tokenfilter(boardgamecategory, max_tokens = 30) %>%
step_tf(boardgamecategory)
game_prep <- prep(game_rec)
bake(game_prep, new_data = NULL) %>% skimr::skim()
Name | Piped data |
Number of rows | 16009 |
Number of columns | 37 |
_______________________ | |
Column type frequency: | |
factor | 1 |
numeric | 36 |
________________________ | |
Group variables | None |
Data summary
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
name | 0 | 1 | FALSE | 15781 | Rob: 6, Cha: 4, Ali: 3, Aro: 3 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
minplayers | 0 | 1 | 2.01 | 0.69 | 0.00 | 2.00 | 2.00 | 2.00 | 10.00 | ▇▁▁▁▁ |
maxplayers | 0 | 1 | 5.73 | 16.43 | 0.00 | 4.00 | 4.00 | 6.00 | 999.00 | ▇▁▁▁▁ |
minplaytime | 0 | 1 | 66.25 | 517.90 | 0.00 | 20.00 | 30.00 | 60.00 | 60000.00 | ▇▁▁▁▁ |
maxplaytime | 0 | 1 | 93.98 | 609.41 | 0.00 | 25.00 | 45.00 | 90.00 | 60000.00 | ▇▁▁▁▁ |
minage | 0 | 1 | 9.64 | 3.65 | 0.00 | 8.00 | 10.00 | 12.00 | 25.00 | ▂▇▆▁▁ |
average | 0 | 1 | 6.42 | 0.93 | 1.31 | 5.83 | 6.45 | 7.04 | 9.57 | ▁▁▆▇▁ |
tf_boardgamecategory_abstract_strategy | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_action_dexterity | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_adventure | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_ancient | 0 | 1 | 0.03 | 0.18 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_animals | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_bluffing | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_card_game | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
tf_boardgamecategory_childrens_game | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_deduction | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_dice | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_economic | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_exploration | 0 | 1 | 0.04 | 0.20 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_fantasy | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_fighting | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_horror | 0 | 1 | 0.03 | 0.18 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_humor | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_medieval | 0 | 1 | 0.05 | 0.21 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_miniatures | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_movies_tv_radio_theme | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_nautical | 0 | 1 | 0.03 | 0.17 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_negotiation | 0 | 1 | 0.03 | 0.17 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_party_game | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_print_play | 0 | 1 | 0.03 | 0.16 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_puzzle | 0 | 1 | 0.03 | 0.17 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_racing | 0 | 1 | 0.03 | 0.17 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_realtime | 0 | 1 | 0.04 | 0.18 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_science_fiction | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_trivia | 0 | 1 | 0.03 | 0.16 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
tf_boardgamecategory_wargame | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
tf_boardgamecategory_world_war_ii | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
xgb_spec <-
boost_tree(
trees = tune(),
mtry = tune(),
min_n = tune(),
learn_rate = 0.01
) %>%
set_engine("xgboost") %>%
set_mode("regression")
xgb_wf <- workflow(game_rec, xgb_spec)
xgb_wf
#> ══ Workflow ════════════════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: boost_tree()
#>
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 3 Recipe Steps
#>
#> • step_tokenize()
#> • step_tokenfilter()
#> • step_tf()
#>
#> ── Model ───────────────────────────────────────────────────────────────────────
#> Boosted Tree Model Specification (regression)
#>
#> Main Arguments:
#> mtry = tune()
#> trees = tune()
#> min_n = tune()
#> learn_rate = 0.01
#>
#> Computational engine: xgboost
library(finetune)
doParallel::registerDoParallel()
set.seed(234)
xgb_game_rs <-
tune_race_anova(
xgb_wf,
game_folds,
grid = 20
)
#> i Creating pre-processing data to finalize unknown parameter: mtry
#> Warning: All models failed. Run `show_notes(.Last.tune.result)` for more
#> information.
#> Error in `test_parameters_gls()`:
#> ! There were no valid metrics for the ANOVA model.
xgb_game_rs
#> Error in eval(expr, envir, enclos): object 'xgb_game_rs' not found
Created on 2022-07-27 by the reprex package (v2.0.1)