Hi, I just want to confirm: will the importance weights be passed to the weight
option of xgb.train()
? I see no weight
in my result.
boost_tree_spec <- function(engine = "xgboost", mode = "classification", ...) {
trees = tune()
) %>%
set_mode(mode) %>%
set_engine(engine, ...)
xgboost_recipe <- function(data) {
data %>%
recipes::recipe(signal ~ ., data = data) %>%
update_role(symbol, new_role = "info") %>%
step_date(date, features = "dow", keep_original_cols = FALSE) %>%
final_nthread <- tune_nthread <- 0
tune_tree_method <- "hist"
tune_max_bin <- 256
final_max_bin <- 512
final_tree_method <- "hist"
data <- tibble(
date = sample(
seq(as.Date("1999/01/01"), as.Date("2000/01/01"), by = "day"),
f1 = sample(0:1000, 100),
symbol = sample(c("s1", "s2", "s3", "s4"), 100, replace = TRUE),
importance = importance_weights(sample(0:100, 100)),
signal = sample(c("open", "close"), 100, replace = TRUE)
data %>%
#> Rows: 100
#> Columns: 5
#> $ date <date> 1999-01-25, 1999-03-06, 1999-06-24, 1999-10-23, 1999-08-19…
#> $ f1 <int> 314, 814, 983, 888, 620, 750, 913, 297, 439, 155, 460, 698,…
#> $ symbol <chr> "s1", "s4", "s3", "s2", "s4", "s3", "s2", "s2", "s2", "s2",…
#> $ importance <imp_wts> 50, 0, 89, 74, 98, 82, 65, 31, 75, 32, 100, 52, 35, 71,…
#> $ signal <chr> "open", "close", "open", "open", "open", "close", "close", …
split <- data %>%
initial_split(strata = signal)
tune_data <- split %>%
training() %>%
#> # A tibble: 74 × 5
#> date f1 symbol importance signal
#> <date> <int> <chr> <imp_wts> <chr>
#> 1 1999-03-06 814 s4 0 close
#> 2 1999-08-02 750 s3 82 close
#> 3 1999-01-26 913 s2 65 close
#> 4 1999-11-02 297 s2 31 close
#> 5 1999-07-26 460 s1 100 close
#> 6 1999-01-23 338 s2 71 close
#> 7 1999-12-18 465 s3 39 close
#> 8 1999-02-17 294 s2 64 close
#> 9 1999-11-26 749 s4 7 close
#> 10 1999-09-24 333 s1 37 close
#> # … with 64 more rows
tune_result <- workflow() %>%
nthread = tune_nthread,
tree_method = tune_tree_method,
max_bin = tune_max_bin
) %>%
add_recipe(xgboost_recipe(training(split))) %>%
add_case_weights(importance) %>%
resamples = vfold_cv(tune_data, v = 2, strata = signal),
grid = 5,
control = control_grid(
verbose = TRUE
metrics = metric_set(roc_auc)
#> i Fold1: preprocessor 1/1
#> ✓ Fold1: preprocessor 1/1
#> i Fold1: preprocessor 1/1, model 1/1
#> ✓ Fold1: preprocessor 1/1, model 1/1
#> i Fold1: preprocessor 1/1, model 1/1 (predictions)
#> i Fold2: preprocessor 1/1
#> ✓ Fold2: preprocessor 1/1
#> i Fold2: preprocessor 1/1, model 1/1
#> ✓ Fold2: preprocessor 1/1, model 1/1
#> i Fold2: preprocessor 1/1, model 1/1 (predictions)
best_params <- tune_result %>%
final_workflow <- workflow() %>%
nthread = final_nthread,
tree_method = final_tree_method,
max_bin = final_max_bin
) %>%
add_recipe(xgboost_recipe(training(split))) %>%
add_case_weights(importance) %>%
final_workflow %>%
last_fit(split) %>%
collect_metrics() %>%
#> # A tibble: 2 × 4
#> .metric .estimator .estimate .config
#> <chr> <chr> <dbl> <chr>
#> 1 accuracy binary 0.5 Preprocessor1_Model1
#> 2 roc_auc binary 0.542 Preprocessor1_Model1
model <- final_workflow %>%
fit(training(split)) %>%
#> ══ Workflow [trained] ══════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: boost_tree()
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 2 Recipe Steps
#> • step_date()
#> • step_dummy()
#> ── Case Weights ────────────────────────────────────────────────────────────────
#> importance
#> ── Model ───────────────────────────────────────────────────────────────────────
#> ##### xgb.Booster
#> raw: 959.3 Kb
#> call:
#> xgboost::xgb.train(params = list(eta = 0.3, max_depth = 6, gamma = 0,
#> colsample_bytree = 1, colsample_bynode = 1, min_child_weight = 1,
#> subsample = 1, objective = "binary:logistic"), data = x$data,
#> nrounds = 1019L, watchlist = x$watchlist, verbose = 0, nthread = 0,
#> tree_method = "hist", max_bin = 512)
#> params (as set within xgb.train):
#> eta = "0.3", max_depth = "6", gamma = "0", colsample_bytree = "1", colsample_bynode = "1", min_child_weight = "1", subsample = "1", objective = "binary:logistic", nthread = "0", tree_method = "hist", max_bin = "512", validate_parameters = "TRUE"
#> xgb.attributes:
#> niter
#> callbacks:
#> cb.evaluation.log()
#> # of features: 7
#> niter: 1019
#> nfeatures : 7
#> evaluation_log:
#> iter training_logloss
#> 1 0.564311269
#> 2 0.496895742
#> ---
#> 1018 0.001333734
#> 1019 0.001333456
Created on 2022-07-16 by the reprex package (v2.0.1)
Session info
#> ──────────────────────────────────────────────────────────────────────────────