## The problem
I'm having trouble with parallelizing my `fit_resamples` pipelin…e using `future` for some reason.
```r
library(tidymodels)
library(furrr)
plan(multisession, workers = 4)
print(dim(my_data))
print(lobstr::obj_size(my_data))
splits <- rsample::group_vfold_cv(my_data, v = 5, repeats = 4, group = group)
fit_resamples(
linear_reg(engine = "glmnet", mixture = 1, penalty = 1),
ga ~ . - group,
splits
)
```
This results in:
```r
[1] 54 962
693.65 kB
Error in getGlobalsAndPackages(expr, envir = globals_envir, globals = globals, :
The total size of the 38 globals exported for future expression (‘{; lapply(seq_along(...future.x_ii), FUN = function(jj) {; ...future.x_jj <- ...future.x_ii[[jj]]; {; {; NULL; ...; }, error = identity); }); }’) is 40.60 GiB.. This exceeds the maximum allowed size of 500.00 MiB (option 'future.globals.maxSize'). The three largest globals are ‘fit’ (2.72 GiB of class ‘function’), ‘fn_tune_grid_loop_iter’ (2.71 GiB of class ‘function’) and ‘predict_model’ (2.71 GiB of class ‘function’)
16.
stop(msg) at
globals.R#365
15.
getGlobalsAndPackages(expr, envir = globals_envir, globals = globals,
packages = packages) at
dofuture_OP.R#451
14.
doFuture2(foreach, expr, envir = parent.frame(), data = NULL) at
dofuture_OP.R#170
13.
for_each %op% {
fn_tune_grid_loop_iter_safely(fn_tune_grid_loop_iter = fn_tune_grid_loop_iter,
split = split, grid_info = grid_info, workflow = workflow,
metrics = metrics, control = control, eval_time = eval_time, ...
12.
withCallingHandlers(expr, packageStartupMessage = function(c) tryInvokeRestart("muffleMessage"))
11.
suppressPackageStartupMessages(for_each %op% {
fn_tune_grid_loop_iter_safely(fn_tune_grid_loop_iter = fn_tune_grid_loop_iter,
split = split, grid_info = grid_info, workflow = workflow,
metrics = metrics, control = control, eval_time = eval_time, ... at
grid_code_paths.R#200
10.
rlang::with_env(rlang::env_clone(rlang::current_env()), {
if (is_future) {
for_each <- foreach::foreach(split = splits, seed = seeds,
.options.future = list(seed = NULL, packages = packages)) ... at
grid_code_paths.R#179
9.
tune_grid_loop_impl(fn_tune_grid_loop_iter = fn_tune_grid_loop_iter,
resamples = resamples, grid = grid, workflow = workflow,
metrics = metrics, control = control, eval_time = eval_time,
rng = rng, parallel_over = parallel_over) at
grid_code_paths.R#54
8.
fn_tune_grid_loop(resamples, grid, workflow, metrics, control,
eval_time, rng) at
grid_code_paths.R#15
7.
tune_grid_loop(resamples = resamples, grid = grid, workflow = workflow,
metrics = metrics, eval_time = eval_time, control = control,
rng = rng) at
tune_grid.R#355
6.
tune_grid_workflow(workflow = workflow, resamples = resamples,
grid = grid, metrics = metrics, eval_time = eval_time, pset = pset,
control = control, rng = rng, call = call) at
resample.R#143
5.
resample_workflow(workflow = object, resamples = resamples, metrics = metrics,
eval_time = eval_time, control = control, rng = TRUE) at
resample.R#120
4.
fit_resamples.workflow(wflow, resamples = resamples, metrics = metrics,
eval_time = eval_time, control = control) at
resample.R#58
3.
fit_resamples(wflow, resamples = resamples, metrics = metrics,
eval_time = eval_time, control = control) at
resample.R#98
2.
fit_resamples.model_spec(linear_reg(engine = "glmnet", mixture = 1,
penalty = 1), ga ~ . - group, splits) at
resample.R#58
1.
fit_resamples(linear_reg(engine = "glmnet", mixture = 1, penalty = 1),
ga ~ . - group, splits)
```
Is the issue on my side here? Sorry, but I don't quite understand how I can prevent the export of the global objects in that case.