Ok, it is appreciated. One more favor, how to address NA in those columns? When I run the code for the columns with missing, I get the below message. Thank you again.
ORDINARY NONPARAMETRIC BOOTSTRAP
Call:
boot(data = df1, statistic = col_mean, R = 1000, col = 1)
Bootstrap Statistics :
WARNING: All values of t1* are NA
I tried to add include na.rm=TRUE in the function, but got the below error.
# Define a function to calculate the column means
col_mean <- function(data, indices, col) {
return(mean(data[indices, col]), na.rm=TRUE)
}
boot(df1, col_mean, R = 1000, col = 1)
Error in return(mean(data[indices, col]), na.rm = TRUE) :
multi-argument returns are not permitted
# or how to address boot with this dataset with missing values in the columns.
# Create a 100x4 matrix of zeros, ones, and missing with
# specified proportions
m <- matrix(
c(
rep(c(0,1, NA), c(28, 62, 10)),
rep(c(0,1, NA), c(37, 58, 5)),
rep(c(0,1, NA), c(34, 55,11)),
rep(c(0,1, NA), c(28, 66,6 ))
),
nrow = 100, ncol = 4
)
colnames(m) <- paste0("X", 1:4)
# Define a function to calculate the column means
col_mean <- function(data, indices, col) {
return(mean(data[indices, col]))
}
# Set the seed for reproducibility
set.seed(123)
level <- 0.95
# Perform bootstrap sampling for each column
boot_results_col1 <- boot(m, col_mean, R = 1000, col = 1)
boot_results_col2 <- boot(m, col_mean, R = 1000, col = 2)
boot_results_col3 <- boot(m, col_mean, R = 1000, col = 3)
boot_results_col4 <- boot(m, col_mean, R = 1000, col = 4)
# Calculate the 95% confidence intervals for each column
conf_intervals_col1 <- boot.ci(boot_results_col1, conf = level, type = "perc")
conf_intervals_col2 <- boot.ci(boot_results_col2, conf = level, type = "perc")
conf_intervals_col3 <- boot.ci(boot_results_col3, conf = level, type = "perc")
conf_intervals_col4 <- boot.ci(boot_results_col4, conf = level, type = "perc")
report <- data.frame(
var = colnames(m),
est = c(
boot_results_col1$t0,
boot_results_col2$t0,
boot_results_col3$t0,
boot_results_col4$t0
),
level = rep(level, 4),
lower = c(
conf_intervals_col1$percent[4],
conf_intervals_col2$percent[4],
conf_intervals_col3$percent[4],
conf_intervals_col4$percent[4]
),
upper = c(
conf_intervals_col1$percent[5],
conf_intervals_col2$percent[5],
conf_intervals_col3$percent[5],
conf_intervals_col4$percent[5]
)
)
report