How do I store the results of my while loop within a simulation function in pre-defined results matrix

Renren · December 25, 2024, 7:08pm

Hello,

I am running a power simulation similar to the one described in this paper using the following code: https://www.sciencedirect.com/science/article/abs/pii/S1551714421003992

# Load necessary library
install.packages('MASS')
install.packages('ggplot2')
install.packages('reshape2')
install.packages("gridExtra")
install.packages("ggbreak")

library(MASS)
library(ggplot2)
library(reshape2)
library(gridExtra)
library(ggbreak)
library(dplyr)

# Set study input data
outcomes            <- c('mJOA', 'Fast Tap', 'Hold', 'Type', 'Walk')
n_outcomes          <- length(outcomes)
mJOA_BL_mean        <- 12.0  # table 1, 'riluzole' group, CSM trial
mJOA_BL_sd          <-  1.4  # table 1, 'riluzole' group, CSM trial
taps_BL_mean        <-  0.18 # EMPOWER (unpublished)
taps_BL_sd          <-  0.19 # EMPOWER (unpublished)
type_BL_mean        <-  1.59 # EMPOWER (unpublished)
type_BL_sd          <-  0.53 # EMPOWER (unpublished)
hold_BL_mean        <- 50.5  # EMPOWER (unpublished)
hold_BL_sd          <- 38.2  # EMPOWER (unpublished)
walk_BL_mean        <- 60.5  # EMPOWER (unpublished)
walk_BL_sd          <- 27.1  # EMPOWER (unpublished)

timepoints          <- c('3mo','6mo','12mo')
n_timepoints        <- length(timepoints)
mJOA_effect_sizes   <- c(1.25, 2.5, 2.5) # figure 2, CSM trial

# Set study assumptions
n1 <- 181                       # sample size in control arm, protocol
n2 <-  n1                       # sample size in treatment arm, protocol

# Set simulation parameters
set.seed(389)                        # fix seed
alpha        <- 0.05                 # alpha
beta         <- 0.80                 # beta
simunum      <- 10000                # number of simulations
correlations <- c(0.2, 0.5, 0.8)     # correlation across time
n_correls    <- length(correlations)

# Initialize covariance matrix and results storage
Sigma  <- array(0, dim=rep(n_timepoints, 2))
result <- matrix(NA, nrow=n_correls*n_timepoints, ncol=6)
colnames(result) <- c(
  'rho',
  'endpoint',
  'test',
  'power',
  'n_critical',
  ''
)

#' Define simulation function
#' 
#' Arguments
#' @o_mean         : Baseline mean for given outcome measure
#' @o_sd           : Baseline sd for given outcome measure
#' @o_effect_sizes : Effect sizes at given timepoints
#' @N              : Sample size of an arm in the trial
#' 
#' Returns
#' @result         : List with summary data ("result) and
#'                   simulated values (
#'                      "C0_mean", "C0_sd", "C0_ci.lb", "C0_ci.ub",
#'                      "I0_mean", "I0_sd,  "I0_ci.lb", "I0_ci.ub",
#'                      "p"
#'                   ).
sim <- function(o_mean, o_sd, o_effect_sizes, N, test=factor(c("t","ANCOVA"))) {
  
  # Validate test argument
  if (!(test %in% c("t", "ANCOVA"))) {
    stop("Invalid 'test' argument. Please choose either 't' or 'ANCOVA'.")
  }
  
  # Begin simulation by setting correlation across time points
  for (k in 1:n_correls) {
    rho <- correlations[k]
    
    # Build covariance matrix for the given correlation
    for (i in 1:n_timepoints) { for (j in 1:n_timepoints) {
        Sigma[i, j] <- ifelse(i==j, o_sd^2, rho*o_sd^2)
    }}
    
    
    # Initialise empirical computation of critical sample size by:
    # - Simulating 10,000 trials of size 'N-m'
    # - Computing power
    # - Incrementing 'm' by 1 if power falls below beta
    # - Rinse and repeat until power falls below beta
    
    # Notes:
    # - Multiple primary stopping criteria: ANY time point <80%; not ALL <80%
    # - Without P-value correction for multiple comparisons
    # - The 10,000x 'N-m-1' trials are NOT A SUBSET of the 10,000x 'N-m' trials
    #   (ie, the 'N-m-1' trials are re-generated, NOT sampled w/o rep from 'N-m')

    
    m <- 0
    if        (test=="t")      { power1 <- rep(1,3) }
    else if   (test=="ANCOVA") { power2 <- rep(1,3) }
    
    while (
      if      (test=="t")      { all(power1>beta) }
      else if (test=="ANCOVA") { all(power2>beta) }
    ) {
      
      # Initialise empty objects to store summary stats for each simu
      C0_mean  <- matrix(NA, nrow=simunum, ncol=n_timepoints) # control means
      I0_mean  <- matrix(NA, nrow=simunum, ncol=n_timepoints) # treatment means
      C0_sd    <- matrix(NA, nrow=simunum, ncol=n_timepoints) # control sd's
      I0_sd    <- matrix(NA, nrow=simunum, ncol=n_timepoints) # treatment sd's
      C0_ci.lb <- matrix(NA, nrow=simunum, ncol=n_timepoints) # control CI_lb
      I0_ci.lb <- matrix(NA, nrow=simunum, ncol=n_timepoints) # treatment CI_lb
      C0_ci.ub <- matrix(NA, nrow=simunum, ncol=n_timepoints) # control CI_ub
      I0_ci.ub <- matrix(NA, nrow=simunum, ncol=n_timepoints) # treatment CI_ub
      p        <- matrix(NA, nrow=simunum, ncol=n_timepoints) # p values
      
      for (simu in 1:simunum) {
        
        # Simulate all endpoints of a trial
        mu_C0 <- rep(o_mean, n_timepoints)    # control arm intercepts
        mu_I0 <- mu_C0 + o_effect_sizes       # treatment arm intercepts
        
        C0    <- mvrnorm(N-m, mu_C0, Sigma)   # control arm simulated outcomes
        I0    <- mvrnorm(N-m, mu_I0, Sigma)   # treatment arm simulated outcomes
        
        for (j in 1:n_timepoints) {
          
          # Method 1: T-score
          if (test=="t") {
            df        <- (N-m) + (N-m) - 2
            S_pooled  <- ( sd(C0[,j])^2*(N-m-1) + sd(I0[,j])^2*(N-m-1) ) / df
            sd_pooled <- sqrt(S_pooled)
            diff      <- mean(C0[,j]) - mean(I0[,j])
            t         <- diff / sqrt( sd_pooled*(1/(N-m)+1/(N-m)) )
            P         <- pt(-abs(t), df) + (1 - pt(abs(t), df))
          }
          
          # Method 2: Linear model (ANCOVA)
          else if (test=="ANCOVA") {
            I0.bl     <- mvrnorm(N-m, o_mean, o_sd)
            C0.bl     <- mvrnorm(N-m, o_mean, o_sd)
            
            endpoint  <- c( I0[,j],    C0[,j]    )
            baseline  <- c( I0.bl[,1], C0.bl[,1] )
            group     <- factor(rep( c("Treatment","Control"), each=(N-m)) )
            model     <- lm(endpoint ~ baseline + group)
            P         <- anova(model)["group", "Pr(>F)"]
          }
          
          # Store trial stats
          C0_mean[simu, j]   <- mean( C0[,j] )
          I0_mean[simu, j]   <- mean( I0[,j] )
          C0_sd[simu, j]     <- sd( C0[,j] )
          I0_sd[simu, j]     <- sd( I0[,j] )
          C0_ci.lb[simu, j]  <- quantile( C0[,j], probs=0.025 )
          I0_ci.lb[simu, j]  <- quantile( I0[,j], probs=0.025 )
          C0_ci.ub[simu, j]  <- quantile( C0[,j], probs=0.975 )
          I0_ci.ub[simu, j]  <- quantile( I0[,j], probs=0.975 )
          p[simu, j]         <- P
        }
        
      }
      
      # Calculate power
      print(N-m)
      if (test=="t") {
        # Multiple primary endpoints
        power1 <- colSums(p < alpha) / simunum
        if (all(power1>beta)) { m <- m+1 ; power <- power1 }
        print(power1)
      }
      
      else if (test=="ANCOVA") {
        # Multiple primary endpoints
        power2 <- colSums(p < alpha) / simunum
        if (all(power2>beta)) { m <- m+1 ; power <- power2 }
        print(power2)
      }
      
    }
    
    
    # Store global simulation stats
    for (j in 1:n_timepoints) {
      result[3*(k-1)+j,] <- c(
        rho,
        timepoints[j],
        test,
        power[j],
        N-m+1,
        NA
      )
    }
  }
  
  result = as.data.frame(result)
  print(result)
  return(list(
    "result"           = result,
    "C0_mean"          = C0_mean,
    "C0_sd"            = C0_sd,
    "C0_ci.lb"         = C0_ci.lb,
    "C0_ci.ub"         = C0_ci.ub,
    "I0_mean"          = I0_mean,
    "I0_sd"            = I0_sd,
    "I0_ci.lb"         = I0_ci.lb,
    "I0_ci.ub"         = I0_ci.ub,
    "p"                = p
  ))
}

# Method 1: Extrapolation by correlation
mJOA_taps_cor       <- -0.40             # table 2, doi:10.2196/52832
mJOA_type_cor       <-  0.37             # table 2, doi:10.2196/52832
mJOA_hold_cor       <-  0.64             # table 2, doi:10.2196/52832
mJOA_walk_cor       <-  0.35             # table 2, doi:10.2196/52832

taps_effect_sizes <- mJOA_effect_sizes / mJOA_BL_mean * mJOA_taps_cor * taps_BL_mean
type_effect_sizes <- mJOA_effect_sizes / mJOA_BL_mean * mJOA_type_cor * type_BL_mean
hold_effect_sizes <- mJOA_effect_sizes / mJOA_BL_mean * mJOA_hold_cor * hold_BL_mean
walk_effect_sizes <- mJOA_effect_sizes / mJOA_BL_mean * mJOA_walk_cor * walk_BL_mean

mJOA_result1 <- sim(mJOA_BL_mean, mJOA_BL_sd, mJOA_effect_sizes, n1)
taps_result1 <- sim(taps_BL_mean, taps_BL_sd, taps_effect_sizes, n1)
type_result1 <- sim(type_BL_mean, type_BL_sd, type_effect_sizes, n1)
hold_result1 <- sim(hold_BL_mean, hold_BL_sd, hold_effect_sizes, n1)
walk_result1 <- sim(walk_BL_mean, walk_BL_sd, walk_effect_sizes, n1)

# Method 2: Extrapolation by estimation
taps_effect_sizes <- c( -0.16, -0.31, -0.31 ) # EMPOWER (unpublished)
type_effect_sizes <- c(  0.29,  0.57,  0.57 ) # EMPOWER (unpublished)
hold_effect_sizes <- c(  37.7,  75.4,  75.4 ) # EMPOWER (unpublished)
walk_effect_sizes <- c(  8.99,  18.0,  18.0 ) # EMPOWER (unpublished)

# T-score: Head-on comparisons not adjusting for baseline or repeated measures
mJOA_result2 <- sim(mJOA_BL_mean, mJOA_BL_sd, mJOA_effect_sizes, 18, "t")       # Nc=17
taps_result2 <- sim(taps_BL_mean, taps_BL_sd, taps_effect_sizes, 83, "t")       # Nc=81-82
type_result2 <- sim(type_BL_mean, type_BL_sd, type_effect_sizes, 86, "t")       # Nc=84-85
hold_result2 <- sim(hold_BL_mean, hold_BL_sd, hold_effect_sizes,  4, "t")       # Nc=3
walk_result2 <- sim(walk_BL_mean, walk_BL_sd, walk_effect_sizes, 18, "t")       # Nc=16-17

# ANCOVA: Head-on comparisons adjusting for baseline but not repeated measures
mJOA_result3 <- sim(mJOA_BL_mean, mJOA_BL_sd, mJOA_effect_sizes,  25, "ANCOVA") # Nc=22
taps_result3 <- sim(taps_BL_mean, taps_BL_sd, taps_effect_sizes,  25, "ANCOVA") # Nc=24
type_result3 <- sim(type_BL_mean, type_BL_sd, type_effect_sizes,  55, "ANCOVA") # Nc=54-55
hold_result3 <- sim(hold_BL_mean, hold_BL_sd, hold_effect_sizes,  20, "ANCOVA") # Nc=18
walk_result3 <- sim(walk_BL_mean, walk_BL_sd, walk_effect_sizes, 146, "ANCOVA") # Nc=144

I would like to now do the following: add modifications to the following code so that all the power1 or power2 values (depending on whether test == "t" or "ANCOVA") up until beta exceeds 0.8 (the while loop is broken) are stored as a list in a column called 'power_values' in the 'result' dataframe (along with all the other existing outputs). I would also like to store the correspondig sample size (N-m) values as a list in a column called 'sample_sizes' in the result dataframe. At the moment, the simulation function only stores the last power and corresponding sample size, but I would like all the powers and sample sizes to be stored as lists or vectors in 2 additional columns called 'power_values' and 'sample_sizes', respectively. I would like to then use the values within the columns to make a figure of each power plotted against the corresponding sample size value as in Figure 1 of the above study.

I am struggling to manipulate the simulation function to do this and would very much appreciate any suggestions/help to achieve my intended goal.

Thank you in advance.

system · March 25, 2025, 7:09pm

This topic was automatically closed 90 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.