I want to do repeated resampling with replacement, which can be done like the generic example below, but I'm wondering if there is a more elegant solution than the one applied in the lapply()
-step?
# Load libraries ----------------------------------------------------------
library("tidyverse")
# Define example data -----------------------------------------------------
n <- 1000
my_data <- tibble(
x = rnorm(n),
y = rnorm(n),
group = sample(LETTERS[1:4],
size = n,
replace = TRUE),
class = sample(c(0, 1),
size = n,
replace = TRUE)
)
# Resample data once ------------------------------------------------------
set.seed(859948)
my_data %>%
group_by(group, class) %>%
sample_n(100,
replace = TRUE) %>%
ungroup %>%
group_by(group, class) %>%
summarise(value = mean(x) - mean(y))
# Resample data multiple times --------------------------------------------
set.seed(859948)
my_resamples <- lapply(1:1000,
function(i){
my_data %>%
group_by(group, class) %>%
sample_n(100,
replace = TRUE) %>%
ungroup %>%
group_by(group, class) %>%
summarise(value = mean(x) - mean(y)) })
my_resamples <- my_resamples %>%
bind_rows
# Visualise distributions -------------------------------------------------
my_resamples %>%
ggplot(aes(x = value,
y = group,
fill = factor(class))) +
geom_boxplot() +
theme_minimal()
Yielding