While using tidymodels and stacks, got the exact same error as FatBertLee trying to predict:
Error: Can't subset columns that don't exist.
x Location 3 doesn't exist.
i There are only 2 columns.
Run `rlang::last_error()` to see where the error occurred.
Sample from train data:
structure(list(id = c(24, 269, 316, 382, 424, 505, 551, 572,
716, 794, 848, 971, 1016, 1076, 1133, 1189, 1234, 1312, 1365,
1532), ticker = c("ABEV3", "ALPA4", "ALSC3", "ALUP11", "AMAR3",
"ANIM3", "ARTR3", "ARZZ3", "BBRK3", "BEEF3", "BEMA3", "BPHA3",
"BRAP4", "BRFS3", "BRKM5", "BRML3", "BRPR3", "BTOW3", "BVMF3",
"CCRO3"), data = structure(c(16525, 16525, 16525, 16525, 16525,
16525, 16525, 16525, 16525, 16525, 16525, 16525, 16525, 16525,
16525, 16525, 16525, 16525, 16525, 16525), class = "Date"), quarter = c(2015.1,
2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1,
2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1, 2015.1,
2015.1, 2015.1, 2015.1), ret3m = c(0.1648611076, 0.4624908206,
0.0662460568, 0.1028571429, -0.0282563749, -0.5321228611, -0.3796526055,
-0.0484496124, 0.1234567901, -0.2246835443, 0.0394890001, -0.7213114754,
-0.2236070381, 0.0180122226, -0.3681792074, 0.0992861778, 0.2915019763,
-0.0927694407, 0.1747368421, 0.1073369565), lret = c(0.0245911872,
-0.1918554545, -0.2690459849, -0.1322073384, -0.293577729, -0.5385714286,
-0.0052356021, -0.1671799162, -0.2081447964, 0.1805309735, 0.2878354861,
-0.1428571429, -0.206195547, 0.1062529384, 0.2842835131, -0.280110117,
0.0209923664, -0.2422233554, -0.0283757382, -0.1956378057), alvo = structure(c(2L,
3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 1L, 3L, 3L, 1L, 1L, 3L, 2L,
3L, 2L, 3L), .Label = c("buy", "keep", "sell"), class = "factor"),
p_l = c(22.7777943671, 17.013232239, 14.3857944343, 5.3290052757,
81.6607210863, 7.0093163352, 6.2590675835, 19.1883018103,
-20.310374125, -1.2175821514, 9.4653309443, -0.4779179664,
-4.6062594326, 22.7418227837, 16.3991693792, 18.9767524219,
16.2272067076, -32.6503400222, 20.0746301096, 24.3704146153
), vpa = c(2.9345399772, 4.4559425807, 11.9759963667, 19.1280966065,
6.3576040791, 7.9663703125, 6.3450343162, 6.6939288694, 2.9627168318,
-0.1624250847, 8.7869794656, 3.0582252548, 26.7151370268,
17.5274286146, 4.3957933828, 21.0059526254, 20.4579281623,
11.8652171379, 10.8462018391, 2.3057105308), lpa = c(0.8099994101,
0.5754344538, 1.1747700189, 3.6216890398, 0.1726656318, 2.1713957927,
1.1982615461, 1.2794253625, -0.1344140676, -6.0365536664,
1.0269054571, -1.4228383275, -2.2990454956, 2.7834180488,
0.67076568, 0.8926711812, 0.8054374505, -0.6110196704, 0.5559255607,
0.6688437705), roe = c(27.6022619, 12.9138659965, 9.8093718716,
18.9338704957, 2.7158915475, 27.2570280759, 18.8850286135,
19.1132201654, -4.5368516535, 3716.51563365, 11.6866718663,
-46.5249682066, -8.6057784141, 15.880355927, 15.2592631543,
4.249610561, 3.937043107, -5.1496712056, 5.1255321353, 29.0081413769
), payout = c(1.0195292727, 0.931375658, 0.270250729, 0.3893211108,
0.633884108, 0.2684079991, 0.4913832489, 0.4968062245, -5.7744104024,
0, 0.6504980525, 0, -0.7461901921, 0.340716886, 0.9037266218,
0.5255796718, 7.4942170294, 0, 0.7291198294, 1.1855942496
), dy12m = c(0.0447696477, 0.0547441923, 0.018785944, 0.0730569948,
0.0077624113, 0.0382930355, 0.0785074202, 0.0258910991, 0.2843084212,
0, 0.0687242798, 0, 0.1619948253, 0.0149819515, 0.0551080729,
0.0276959756, 0.4618303793, 0, 0.0363204615, 0.0486489158
), p_vpa = c(6.2871864562, 2.197066013, 1.4111560727, 1.0089869576,
2.2178166216, 1.9105313214, 1.1820267041, 3.667502371, 0.9214515443,
-45.2516310079, 1.1061821685, 0.2223511819, 0.39640448, 3.6114824023,
2.5023924107, 0.8064380751, 0.6388721231, 1.6813851587, 1.0289316173,
7.0694043257), ativo_circulante = c(19241017000, 2349169000,
458771000, 2256679000, 1935689000, 372126000, 1329203000,
668561000, 172137000, 4902444000, 280288000, 874442000, 944722000,
17774588000, 15339781000, 1190542000, 898780000, 3609719000,
3118127000, 3384242000), liq_corr = c(0.8839175747, 1.8308613158,
1.2004998037, 1.6443639188, 2.2566264077, 2.1538559489, 0.7579286675,
3.0155975841, 3.430185521, 2.038108597, 2.4949751204, 0.6588875853,
0.7662929788, 1.8775702843, 0.9387753091, 1.3318633571, 1.9499653954,
1.4177171092, 1.4075117803, 0.6784915709), divida_bruta = c(2691043000,
573308000, 1376110000, 4124207000, 1175307000, 121178000,
5855332000, 98138000, 0, 6395461000, 76266000, 814095000,
1199600000, 12721903000, 23126794000, 5363491000, 4338561000,
2302677000, 1982951000, 11538724000), quant_on = c(15713667000,
241609000, 159060920, 461243596, 185532000, 82865593, 344444000,
88682000, 184936000, 178002062, 50923870, 363051086, 122171000,
851501628, 451668652, 462653000, 298228000, 255484410, 1801392256,
1765587200), ibov3m = c(0.0543782982, 0.0543782982, 0.0543782982,
0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982,
0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982,
0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982, 0.0543782982,
0.0543782982, 0.0543782982), volume3m = c(194265323.885,
7227171.01639, 5639329.77049, 2949280.65574, 1908696.86885,
14730050.7541, 8171198.16393, 2894382.19672, 2480344.04918,
6642555.04918, 1400969.86885, 1363838.03279, 23186487.6721,
145235386.705, 31063969.0984, 39373887.7049, 31119377.2951,
9243986.29508, 134097842.066, 89693737.7049), volat3m = c(0.204997689,
0.3147291039, 0.3302444855, 0.3489084169, 0.3506794611, 0.9888233707,
0.702416568, 0.4027960265, 0.4375357642, 0.4431475177, 0.3459458746,
0.5925460944, 0.5092286168, 0.2317206403, 0.6327772099, 0.4529299407,
0.3838324421, 0.5354411807, 0.4211022767, 0.4499589031)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame")) "tbl", "data.frame"))
Code used:
pacman::p_load(tidyverse, tidymodels, xgboost, nnet, caret, stacks)
rec <-
recipe(alvo ~ .,
data = train %>%
update_role(id, ticker, data, quarter, ret3m, lret,ibov3m,
volat3m, new_role = "ID")
wflow <-
workflow() %>%
add_recipe(rec)
ctrl_grid <- control_stack_grid()
xb_spec <-
boost_tree(trees = 500,
min_n = tune(),
mtry = tune()) %>%
set_engine("xgboost") %>%
set_mode("classification")
xb_wflow <-
wflow %>%
add_model(xb_spec)
RNGkind("L'Ecuyer-CMRG")
set.seed(1090943296)
xb_res <-
tune_grid(
object = xb_wflow,
resamples = cv_folds,
grid = 10,
control = ctrl_grid)
# nnet model ----
nn_spec <-
mlp(hidden_units = tune(),
penalty = tune(),
epochs = tune()) %>%
set_engine("nnet") %>%
set_mode("classification")
nnet_rec <-
rec %>%
step_normalize(all_predictors())
nn_wflow <-
workflow() %>%
add_model(nn_spec) %>%
add_recipe(nnet_rec)
RNGkind("L'Ecuyer-CMRG")
set.seed(1090943296)
nn_res <-
tune_grid(
object = nn_wflow,
resamples = cv_folds,
grid = 10,
control = ctrl_grid)
# stack model ----
stack_model <-
stacks() %>%
add_candidates(xb_res) %>%
add_candidates(nn_res) %>%
blend_predictions() %>%
fit_members()
# predict ----
stack_pred <- predict(stack_model,
test,
type = "prob")