I am trying to combine results from many different bootstraps runs into one data frame/tibble, where each set of bootstrap results have some value that differs. A silly example that works, but is less than pretty is below:
library(tidyverse)
#> Registered S3 method overwritten by 'rvest':
#> method from
#> read_xml.response xml2
library(rsample)
# Just a silly example. My function runs a regression and returns the estimated
# elasticity
test_fun <- function(df, cut_off) {
mean((df$carb < cut_off))
}
fit_fun <- function(split, ...) {
df <- analysis(split)
tibble(
term = "ratio",
estimate = test_fun(df, ...),
std.error = NA_real_
)
}
for (z in 1:9) {
set.seed(2)
temp <-
bootstraps(mtcars, times = 2000, apparent = TRUE) %>%
mutate(ratio = map(splits, ~ fit_fun(.x, cut_off = z))) %>%
int_pctl(ratio) %>%
mutate(cut_off = z)
assign(paste0("ratio_bt_", z), temp)
}
bind_rows(ratio_bt_1, ratio_bt_2, ratio_bt_3, ratio_bt_4, ratio_bt_5, ratio_bt_6, ratio_bt_7, ratio_bt_8, ratio_bt_9)
#> # A tibble: 9 x 7
#> term .lower .estimate .upper .alpha .method cut_off
#> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <int>
#> 1 ratio 0 0 0 0.05 percentile 1
#> 2 ratio 0.0938 0.219 0.375 0.05 percentile 2
#> 3 ratio 0.375 0.534 0.719 0.05 percentile 3
#> 4 ratio 0.438 0.626 0.781 0.05 percentile 4
#> 5 ratio 0.844 0.937 1 0.05 percentile 5
#> 6 ratio 0.844 0.937 1 0.05 percentile 6
#> 7 ratio 0.906 0.970 1 0.05 percentile 7
#> 8 ratio 0.906 0.970 1 0.05 percentile 8
#> 9 ratio 1 1 1 0.05 percentile 9
Created on 2020-03-10 by the reprex package (v0.2.1)
I was thinking that I could use map_df()
instead of the loop, but the following gives me the error shown below:
# Version that does not work
library(tidyverse)
#> Registered S3 method overwritten by 'rvest':
#> method from
#> read_xml.response xml2
library(rsample)
test_fun <- function(df, cut_off) {
mean((df$carb < cut_off))
}
fit_fun <- function(split, ...) {
df <- analysis(split)
tibble(
term = "ratio",
estimate = test_fun(df, ...),
std.error = NA_real_
)
}
map_df(1:9, function(.y) {
set.seed(2)
m <- bootstraps(mtcars, times = 2000, apparent = TRUE) %>%
mutate(ratio = map(splits, ~ fit_fun(.x, cut_off = .y))) %>%
int_pctl(ratio) %>%
mutate(cut_off = .y)
return(m)
})
#> Error in mean((df$carb < cut_off)): the ... list contains fewer than 2 elements
Created on 2020-03-10 by the reprex package (v0.2.1)
I suspect that the problem is in the combination of multiple map
s, because the following works fine:
# This works
library(tidyverse)
#> Registered S3 method overwritten by 'rvest':
#> method from
#> read_xml.response xml2
library(rsample)
test_fun <- function(df, cut_off) {
mean((df$carb < cut_off))
}
map_df(1:9, function(.y) {
data.frame(ratio = test_fun(mtcars, cut_off = .y)) %>%
mutate(cut_off = .y)
})
#> ratio cut_off
#> 1 0.00000 1
#> 2 0.21875 2
#> 3 0.53125 3
#> 4 0.62500 4
#> 5 0.93750 5
#> 6 0.93750 6
#> 7 0.96875 7
#> 8 0.96875 8
#> 9 1.00000 9
Created on 2020-03-10 by the reprex package (v0.2.1)
Suggestions for how to do the looping over the cut-off values in tidyverse
would be greatly appreciated.
Claus