Hallo there,
I am trying to run the following simulation to check the performance characteristics i.e. type I error for the t test, welch t t-test and of a two step procedure, where first I run a test of variance homogeneity with all F-Test, Levene's default and Levene's mean, and the non parametric Fligner-Killeen Test and then based on the test decision I run a t test (in case of variance homogeneity) or a Welch Test otherwise. I have written the following code, but it doesnt run smoothly. Are there any suggestions for making better? Thanks.
################################################################################
# Simulation: Comparing Students T-Test with Welch T Test #
################################################################################
library(tidyverse)
library(car) # Load the 'car' package
#------------------------- Parameter definitions -------------------------------
n_sim <- 100
n_obs <- c(20, 40, 120, 200, 500)
var_ratios <- c(1, 2, 4)
mu_control <- 45 # Mean for control group
mu_treatment <- mu_control # Equal means for type 1 error
sd_control <- 10 # Standard deviation for control group
gamma <- 1 # Skewness parameter for skewed normal distribution
parasim <- expand.grid(n_sim = n_sim, n_obs = n_obs, variance_ratio = var_ratios)
#------------------------- Functions -------------------------------
Rejection <- function(pval, n = n_sim, alpha) {
1/n * sum(pval <= alpha)
}
#------------------------- Starting simulations --------------------------------
start.time.1 <- Sys.time()
# Setting outcome variables
Two.Stage.Procedure_Rej <- c()
T_Rej <- c()
W_Rej <- c()
# Setting seed
set.seed(21032024)
# Starting loop
for (row in 1:dim(parasim)[1]) {
p_TwoStage_sk <- rep(NA, parasim$n_sim[row])
p_TwoStage_Norm <- rep(NA, parasim$n_sim[row])
p_T_sk <- rep(NA, parasim$n_sim[row])
p_T_Norm <- rep(NA, parasim$n_sim[row])
p_W_sk <- rep(NA, parasim$n_sim[row])
p_W_Norm <- rep(NA, parasim$n_sim[row])
sd_treatment <- sd_control * sqrt(parasim$variance_ratio[row]) # Standard deviation for treatment group
for (i in 1:parasim$n_sim[row]) {
### Data generation
# indicator denoting assignment of treatment
X_i <- c(rep(1, parasim$n_obs[row] / 2), rep(0, parasim$n_obs[row] / 2))
# Define function to generate skewed normal data
rskewnorm <- function(n, mu, sigma, gamma) {
x <- rnorm(n, mean = mu, sd = sigma)
y <- x + gamma * (x^2 - 1)
return(y)
}
# Skewed normal distribution
Skewed_control <- rskewnorm(n = parasim$n_obs[row] / 2, mu = mu_control, sigma = sd_control, gamma = gamma)
Skewed_treatment <- rskewnorm(n = parasim$n_obs[row] / 2, mu = mu_treatment, sigma = sd_treatment, gamma = gamma)
Skewed <- c(Skewed_control, Skewed_treatment)
# Normal distribution
Norm_control <- rnorm(n = parasim$n_obs[row] / 2, mean = mu_control, sd = sd_control)
Norm_treatment <- rnorm(n = parasim$n_obs[row] / 2, mean = mu_treatment, sd = sd_treatment)
Norm <- c(Norm_control, Norm_treatment)
### Analysis Two Stage Procedure
# Skewed normal distribution
if( (var.test(Skewed[which(X_i == 0)])$p.value <= 0.05) |
(var.test(Skewed[which(X_i == 1)])$p.value <= 0.05) ) {
p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i, var.equal=FALSE)$p.value
} else{
p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i)$p.value
}
if (( leveneTest(Skewed[which(X_i == 0)])$p.value <= 0.05) |
(leveneTest(Skewed[which(X_i == 1)])$p.value <= 0.05) ) {
p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i, var.equal=FALSE)$p.value
} else{
p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i)$p.value
if ((leveneTest(Skewed[which(X_i == 0)], center=mu_control)$p.value <= 0.05) |
(leveneTest(Skewed[which(X_i == 1)], center=mu_treatment)$p.value <= 0.05) ) {
p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i, var.equal=FALSE)$p.value
} else{
p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i)$p.value
if ((fligner.test(Skewed[which(X_i == 0)])$p.value <= 0.05) |
(leveneTest(Skewed[which(X_i == 1)])$p.value <= 0.05) ) {
p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i, var.equal=FALSE)$p.value
} else{
p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i)$p.value
# Normal distribution
# Normal distribution
if( (var.test(Norm[which(X_i == 0)])$p.value <= 0.05) |
(var.test(Norm[which(X_i == 1)])$p.value <= 0.05) ) {
p_TwoStage_Norm[i] <- t.test(Norm ~ X_i, var.equal=FALSE)$p.value
} else{
p_TwoStage_Norm[i] <- t.test(Norm ~ X_i)$p.value
}
if ((leveneTest(Norm[which(X_i == 0)])$p.value <= 0.05) |
(leveneTest(Norm[which(X_i == 1)])$p.value <= 0.05) ) {
p_TwoStage_Norm[i] <- t.test(Norm ~ X_i, var.equal=FALSE)$p.value
} else{
p_TwoStage_Norm[i] <- t.test(Norm ~ X_i)$p.value
if ((leveneTest(Norm[which(X_i == 0)], center=mu_control)$p.value <= 0.05) |
(leveneTest(Norm[which(X_i == 1)], center=mu_treatment)$p.value <= 0.05) ) {
p_TwoStage_Norm[i] <- t.test(Norm ~ X_i, var.equal=FALSE)$p.value
} else{
p_TwoStage_Norm[i] <- t.test(Norm ~ X_i)$p.value
if ((fligner.test(Norm[which(X_i == 0)])$p.value <= 0.05) |
(leveneTest(Norm[which(X_i == 1)])$p.value <= 0.05) ) {
p_TwoStage_Norm[i] <- t.test(Norm ~ X_i, var.equal=FALSE)$p.value
} else{
p_TwoStage_Norm[i] <- t.test(Norm ~ X_i)$p.value
### Analysis T-Test
# Skewed normal distribution
p_T_sk[i] <- t.test(Skewed ~ X_i)$p.value
# Normal distribution
p_T_Norm[i] <- t.test(Norm ~ X_i)$p.value
### Analysis Welch T-Test
# Skewed normal distribution
p_W_sk[i] <- t.test(Skewed ~ X_i, var.equal = FALSE)$p.value
# Normal distribution
p_W_Norm[i] <- t.test(Norm ~ X_i, var.equal = FALSE)$p.value
}
Two.Stage.Procedure_Rej <- c(Two.Stage.Procedure_Rej,
Rejection(pval = p_TwoStage_sk, n = parasim$n_sim[row], alpha = 0.05),
Rejection(pval = p_TwoStage_Norm, n = parasim$n_sim[row], alpha = 0.05))
T_Rej <- c(T_Rej,
Rejection(pval = p_T_sk, n = parasim$n_sim[row], alpha = 0.05),
Rejection(pval = p_T_Norm, n = parasim$n_sim[row], alpha = 0.05))
W_Rej <- c(W_Rej,
Rejection(pval = p_W_sk, n = parasim$n_sim[row], alpha = 0.05),
Rejection(pval = p_W_Norm, n = parasim$n_sim[row], alpha = 0.05))
}