Simulation Study in R Testing of Variance

D0l0m1tes · March 23, 2024, 1:50pm

Hallo there,

I am trying to run the following simulation to check the performance characteristics i.e. type I error for the t test, welch t t-test and of a two step procedure, where first I run a test of variance homogeneity with all F-Test, Levene's default and Levene's mean, and the non parametric Fligner-Killeen Test and then based on the test decision I run a t test (in case of variance homogeneity) or a Welch Test otherwise. I have written the following code, but it doesnt run smoothly. Are there any suggestions for making better? Thanks.

################################################################################
#           Simulation: Comparing Students T-Test with Welch T Test              #
################################################################################
library(tidyverse)
library(car)  # Load the 'car' package

#------------------------- Parameter definitions -------------------------------
n_sim <- 100
n_obs <- c(20, 40, 120, 200, 500)
var_ratios <- c(1, 2, 4)
mu_control <- 45  # Mean for control group
mu_treatment <- mu_control  # Equal means for type 1 error
sd_control <- 10  # Standard deviation for control group
gamma <- 1  # Skewness parameter for skewed normal distribution

parasim <- expand.grid(n_sim = n_sim, n_obs = n_obs, variance_ratio = var_ratios)

#------------------------- Functions -------------------------------
Rejection <- function(pval, n = n_sim, alpha) {
  1/n * sum(pval <= alpha)
}

#------------------------- Starting simulations --------------------------------
start.time.1 <- Sys.time()

# Setting outcome variables
Two.Stage.Procedure_Rej <- c()
T_Rej <- c()
W_Rej <- c()

# Setting seed
set.seed(21032024)

# Starting loop
for (row in 1:dim(parasim)[1]) {
  p_TwoStage_sk <- rep(NA, parasim$n_sim[row])
  p_TwoStage_Norm <- rep(NA, parasim$n_sim[row])
  
  p_T_sk <- rep(NA, parasim$n_sim[row])
  p_T_Norm <- rep(NA, parasim$n_sim[row])
  
  p_W_sk <- rep(NA, parasim$n_sim[row])
  p_W_Norm <- rep(NA, parasim$n_sim[row])
  
  sd_treatment <- sd_control * sqrt(parasim$variance_ratio[row])  # Standard deviation for treatment group
  
  for (i in 1:parasim$n_sim[row]) {
    ### Data generation
    # indicator denoting assignment of treatment
    X_i <- c(rep(1, parasim$n_obs[row] / 2), rep(0, parasim$n_obs[row] / 2))
    
    # Define function to generate skewed normal data
    rskewnorm <- function(n, mu, sigma, gamma) {
      x <- rnorm(n, mean = mu, sd = sigma)
      y <- x + gamma * (x^2 - 1)
      return(y)
    }
    
    # Skewed normal distribution
    Skewed_control <- rskewnorm(n = parasim$n_obs[row] / 2, mu = mu_control, sigma = sd_control, gamma = gamma)
    Skewed_treatment <- rskewnorm(n = parasim$n_obs[row] / 2, mu = mu_treatment, sigma = sd_treatment, gamma = gamma)
    Skewed <- c(Skewed_control, Skewed_treatment)
    
    # Normal distribution
    Norm_control <- rnorm(n = parasim$n_obs[row] / 2, mean = mu_control, sd = sd_control)
    Norm_treatment <- rnorm(n = parasim$n_obs[row] / 2, mean = mu_treatment, sd = sd_treatment)
    Norm <- c(Norm_control, Norm_treatment)
    
    ### Analysis Two Stage Procedure
        # Skewed normal distribution
    if( (var.test(Skewed[which(X_i == 0)])$p.value <= 0.05) | 
        (var.test(Skewed[which(X_i == 1)])$p.value <= 0.05) ) {
      p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i, var.equal=FALSE)$p.value 
    } else{
      p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i)$p.value
    }
    if (( leveneTest(Skewed[which(X_i == 0)])$p.value <= 0.05) |
        (leveneTest(Skewed[which(X_i == 1)])$p.value <= 0.05) ) {
      p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i, var.equal=FALSE)$p.value 
    } else{
      p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i)$p.value
      
    if ((leveneTest(Skewed[which(X_i == 0)], center=mu_control)$p.value <= 0.05) |
        (leveneTest(Skewed[which(X_i == 1)], center=mu_treatment)$p.value <= 0.05) ) {
      p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i, var.equal=FALSE)$p.value 
    } else{
      p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i)$p.value
    if ((fligner.test(Skewed[which(X_i == 0)])$p.value <= 0.05) |
        (leveneTest(Skewed[which(X_i == 1)])$p.value <= 0.05) ) {
      p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i, var.equal=FALSE)$p.value 
    } else{
      p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i)$p.value
    
   # Normal distribution
     # Normal distribution
    if( (var.test(Norm[which(X_i == 0)])$p.value <= 0.05) | 
        (var.test(Norm[which(X_i == 1)])$p.value <= 0.05) ) {
      p_TwoStage_Norm[i] <- t.test(Norm ~ X_i, var.equal=FALSE)$p.value
    } else{
      p_TwoStage_Norm[i] <- t.test(Norm ~ X_i)$p.value
    }
    if ((leveneTest(Norm[which(X_i == 0)])$p.value <= 0.05) |
        (leveneTest(Norm[which(X_i == 1)])$p.value <= 0.05) ) {
      p_TwoStage_Norm[i] <- t.test(Norm ~ X_i, var.equal=FALSE)$p.value 
    } else{
      p_TwoStage_Norm[i] <- t.test(Norm ~ X_i)$p.value
      
    if ((leveneTest(Norm[which(X_i == 0)], center=mu_control)$p.value <= 0.05) |
        (leveneTest(Norm[which(X_i == 1)], center=mu_treatment)$p.value <= 0.05) ) {
      p_TwoStage_Norm[i] <- t.test(Norm ~ X_i, var.equal=FALSE)$p.value 
    } else{
      p_TwoStage_Norm[i] <- t.test(Norm ~ X_i)$p.value
      
    if ((fligner.test(Norm[which(X_i == 0)])$p.value <= 0.05) |
        (leveneTest(Norm[which(X_i == 1)])$p.value <= 0.05) ) {
      p_TwoStage_Norm[i] <- t.test(Norm ~ X_i, var.equal=FALSE)$p.value 
    } else{
      p_TwoStage_Norm[i] <- t.test(Norm ~ X_i)$p.value
    
    ### Analysis T-Test
    # Skewed normal distribution
    p_T_sk[i] <- t.test(Skewed ~ X_i)$p.value
    
    # Normal distribution
    p_T_Norm[i] <- t.test(Norm ~ X_i)$p.value
    
    ### Analysis Welch T-Test
    # Skewed normal distribution
    p_W_sk[i] <- t.test(Skewed ~ X_i, var.equal = FALSE)$p.value
    
    # Normal distribution
    p_W_Norm[i] <- t.test(Norm ~ X_i, var.equal = FALSE)$p.value
  }
  
  Two.Stage.Procedure_Rej <- c(Two.Stage.Procedure_Rej,
                               Rejection(pval = p_TwoStage_sk, n = parasim$n_sim[row], alpha = 0.05),
                               Rejection(pval = p_TwoStage_Norm, n = parasim$n_sim[row], alpha = 0.05))
  
  T_Rej <- c(T_Rej,
             Rejection(pval = p_T_sk, n = parasim$n_sim[row], alpha = 0.05),
             Rejection(pval = p_T_Norm, n = parasim$n_sim[row], alpha = 0.05))
  
  W_Rej <- c(W_Rej,
             Rejection(pval = p_W_sk, n = parasim$n_sim[row], alpha = 0.05),
             Rejection(pval = p_W_Norm, n = parasim$n_sim[row], alpha = 0.05))
}

dromano · March 23, 2024, 2:17pm

Hi,

Could you say more? What isn't smooth about how it runs and what would you prefer it do?

D0l0m1tes · March 23, 2024, 2:49pm

Hi so I keep getting many different errors, when running it. I would appreciate some help, if possible. to resolve them.

dromano · March 23, 2024, 2:54pm

I'm sure folks here will do their best to help.

What is the first error you get, and could you post the code that triggers the error and the error itself here, as a code block, like this?

``` r
[<-- paste code and error output here]
```

D0l0m1tes · March 23, 2024, 2:57pm

### Analysis Two Stage Procedure
     # Skewed normal distribution
    if( (var.test(Skewed[which(X_i == 0)])$p.value <= 0.05) | 
        (var.test(Skewed[which(X_i == 1)])$p.value <= 0.05) ) {
      p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i, var.equal=FALSE)$p.value 
    } else{
      p_TwoStage_Skewed[i] <- t.test(Skewed ~ X_i)$p.value


Error in var.test.default(Skewed[which(X_i == 0)]) : 
  argument "y" is missing, with no default
2.
var.test.default(Skewed[which(X_i == 0)])
1.
var.test(Skewed[which(X_i == 0)])

startz · March 23, 2024, 4:24pm

Does var.test require more than one argument?

dromano · March 23, 2024, 5:22pm

Thanks, could you also edit your original post so that all the code is in a single code block?

dromano · March 23, 2024, 6:57pm

Thanks for the edit to the original post.

Which distributions did you intend to compare here?

system · April 13, 2024, 6:58pm

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.