Hello,
I am running a power simulation similar to the one described in this paper using the following code: https://www.sciencedirect.com/science/article/abs/pii/S1551714421003992
# Load necessary library
install.packages('MASS')
install.packages('ggplot2')
install.packages('reshape2')
install.packages("gridExtra")
install.packages("ggbreak")
library(MASS)
library(ggplot2)
library(reshape2)
library(gridExtra)
library(ggbreak)
library(dplyr)
# Set study input data
outcomes <- c('mJOA', 'Fast Tap', 'Hold', 'Type', 'Walk')
n_outcomes <- length(outcomes)
mJOA_BL_mean <- 12.0 # table 1, 'riluzole' group, CSM trial
mJOA_BL_sd <- 1.4 # table 1, 'riluzole' group, CSM trial
taps_BL_mean <- 0.18 # EMPOWER (unpublished)
taps_BL_sd <- 0.19 # EMPOWER (unpublished)
type_BL_mean <- 1.59 # EMPOWER (unpublished)
type_BL_sd <- 0.53 # EMPOWER (unpublished)
hold_BL_mean <- 50.5 # EMPOWER (unpublished)
hold_BL_sd <- 38.2 # EMPOWER (unpublished)
walk_BL_mean <- 60.5 # EMPOWER (unpublished)
walk_BL_sd <- 27.1 # EMPOWER (unpublished)
timepoints <- c('3mo','6mo','12mo')
n_timepoints <- length(timepoints)
mJOA_effect_sizes <- c(1.25, 2.5, 2.5) # figure 2, CSM trial
# Set study assumptions
n1 <- 181 # sample size in control arm, protocol
n2 <- n1 # sample size in treatment arm, protocol
# Set simulation parameters
set.seed(389) # fix seed
alpha <- 0.05 # alpha
beta <- 0.80 # beta
simunum <- 10000 # number of simulations
correlations <- c(0.2, 0.5, 0.8) # correlation across time
n_correls <- length(correlations)
# Initialize covariance matrix and results storage
Sigma <- array(0, dim=rep(n_timepoints, 2))
result <- matrix(NA, nrow=n_correls*n_timepoints, ncol=6)
colnames(result) <- c(
'rho',
'endpoint',
'test',
'power',
'n_critical',
''
)
#' Define simulation function
#'
#' Arguments
#' @o_mean : Baseline mean for given outcome measure
#' @o_sd : Baseline sd for given outcome measure
#' @o_effect_sizes : Effect sizes at given timepoints
#' @N : Sample size of an arm in the trial
#'
#' Returns
#' @result : List with summary data ("result) and
#' simulated values (
#' "C0_mean", "C0_sd", "C0_ci.lb", "C0_ci.ub",
#' "I0_mean", "I0_sd, "I0_ci.lb", "I0_ci.ub",
#' "p"
#' ).
sim <- function(o_mean, o_sd, o_effect_sizes, N, test=factor(c("t","ANCOVA"))) {
# Validate test argument
if (!(test %in% c("t", "ANCOVA"))) {
stop("Invalid 'test' argument. Please choose either 't' or 'ANCOVA'.")
}
# Begin simulation by setting correlation across time points
for (k in 1:n_correls) {
rho <- correlations[k]
# Build covariance matrix for the given correlation
for (i in 1:n_timepoints) { for (j in 1:n_timepoints) {
Sigma[i, j] <- ifelse(i==j, o_sd^2, rho*o_sd^2)
}}
# Initialise empirical computation of critical sample size by:
# - Simulating 10,000 trials of size 'N-m'
# - Computing power
# - Incrementing 'm' by 1 if power falls below beta
# - Rinse and repeat until power falls below beta
# Notes:
# - Multiple primary stopping criteria: ANY time point <80%; not ALL <80%
# - Without P-value correction for multiple comparisons
# - The 10,000x 'N-m-1' trials are NOT A SUBSET of the 10,000x 'N-m' trials
# (ie, the 'N-m-1' trials are re-generated, NOT sampled w/o rep from 'N-m')
m <- 0
if (test=="t") { power1 <- rep(1,3) }
else if (test=="ANCOVA") { power2 <- rep(1,3) }
while (
if (test=="t") { all(power1>beta) }
else if (test=="ANCOVA") { all(power2>beta) }
) {
# Initialise empty objects to store summary stats for each simu
C0_mean <- matrix(NA, nrow=simunum, ncol=n_timepoints) # control means
I0_mean <- matrix(NA, nrow=simunum, ncol=n_timepoints) # treatment means
C0_sd <- matrix(NA, nrow=simunum, ncol=n_timepoints) # control sd's
I0_sd <- matrix(NA, nrow=simunum, ncol=n_timepoints) # treatment sd's
C0_ci.lb <- matrix(NA, nrow=simunum, ncol=n_timepoints) # control CI_lb
I0_ci.lb <- matrix(NA, nrow=simunum, ncol=n_timepoints) # treatment CI_lb
C0_ci.ub <- matrix(NA, nrow=simunum, ncol=n_timepoints) # control CI_ub
I0_ci.ub <- matrix(NA, nrow=simunum, ncol=n_timepoints) # treatment CI_ub
p <- matrix(NA, nrow=simunum, ncol=n_timepoints) # p values
for (simu in 1:simunum) {
# Simulate all endpoints of a trial
mu_C0 <- rep(o_mean, n_timepoints) # control arm intercepts
mu_I0 <- mu_C0 + o_effect_sizes # treatment arm intercepts
C0 <- mvrnorm(N-m, mu_C0, Sigma) # control arm simulated outcomes
I0 <- mvrnorm(N-m, mu_I0, Sigma) # treatment arm simulated outcomes
for (j in 1:n_timepoints) {
# Method 1: T-score
if (test=="t") {
df <- (N-m) + (N-m) - 2
S_pooled <- ( sd(C0[,j])^2*(N-m-1) + sd(I0[,j])^2*(N-m-1) ) / df
sd_pooled <- sqrt(S_pooled)
diff <- mean(C0[,j]) - mean(I0[,j])
t <- diff / sqrt( sd_pooled*(1/(N-m)+1/(N-m)) )
P <- pt(-abs(t), df) + (1 - pt(abs(t), df))
}
# Method 2: Linear model (ANCOVA)
else if (test=="ANCOVA") {
I0.bl <- mvrnorm(N-m, o_mean, o_sd)
C0.bl <- mvrnorm(N-m, o_mean, o_sd)
endpoint <- c( I0[,j], C0[,j] )
baseline <- c( I0.bl[,1], C0.bl[,1] )
group <- factor(rep( c("Treatment","Control"), each=(N-m)) )
model <- lm(endpoint ~ baseline + group)
P <- anova(model)["group", "Pr(>F)"]
}
# Store trial stats
C0_mean[simu, j] <- mean( C0[,j] )
I0_mean[simu, j] <- mean( I0[,j] )
C0_sd[simu, j] <- sd( C0[,j] )
I0_sd[simu, j] <- sd( I0[,j] )
C0_ci.lb[simu, j] <- quantile( C0[,j], probs=0.025 )
I0_ci.lb[simu, j] <- quantile( I0[,j], probs=0.025 )
C0_ci.ub[simu, j] <- quantile( C0[,j], probs=0.975 )
I0_ci.ub[simu, j] <- quantile( I0[,j], probs=0.975 )
p[simu, j] <- P
}
}
# Calculate power
print(N-m)
if (test=="t") {
# Multiple primary endpoints
power1 <- colSums(p < alpha) / simunum
if (all(power1>beta)) { m <- m+1 ; power <- power1 }
print(power1)
}
else if (test=="ANCOVA") {
# Multiple primary endpoints
power2 <- colSums(p < alpha) / simunum
if (all(power2>beta)) { m <- m+1 ; power <- power2 }
print(power2)
}
}
# Store global simulation stats
for (j in 1:n_timepoints) {
result[3*(k-1)+j,] <- c(
rho,
timepoints[j],
test,
power[j],
N-m+1,
NA
)
}
}
result = as.data.frame(result)
print(result)
return(list(
"result" = result,
"C0_mean" = C0_mean,
"C0_sd" = C0_sd,
"C0_ci.lb" = C0_ci.lb,
"C0_ci.ub" = C0_ci.ub,
"I0_mean" = I0_mean,
"I0_sd" = I0_sd,
"I0_ci.lb" = I0_ci.lb,
"I0_ci.ub" = I0_ci.ub,
"p" = p
))
}
# Method 1: Extrapolation by correlation
mJOA_taps_cor <- -0.40 # table 2, doi:10.2196/52832
mJOA_type_cor <- 0.37 # table 2, doi:10.2196/52832
mJOA_hold_cor <- 0.64 # table 2, doi:10.2196/52832
mJOA_walk_cor <- 0.35 # table 2, doi:10.2196/52832
taps_effect_sizes <- mJOA_effect_sizes / mJOA_BL_mean * mJOA_taps_cor * taps_BL_mean
type_effect_sizes <- mJOA_effect_sizes / mJOA_BL_mean * mJOA_type_cor * type_BL_mean
hold_effect_sizes <- mJOA_effect_sizes / mJOA_BL_mean * mJOA_hold_cor * hold_BL_mean
walk_effect_sizes <- mJOA_effect_sizes / mJOA_BL_mean * mJOA_walk_cor * walk_BL_mean
mJOA_result1 <- sim(mJOA_BL_mean, mJOA_BL_sd, mJOA_effect_sizes, n1)
taps_result1 <- sim(taps_BL_mean, taps_BL_sd, taps_effect_sizes, n1)
type_result1 <- sim(type_BL_mean, type_BL_sd, type_effect_sizes, n1)
hold_result1 <- sim(hold_BL_mean, hold_BL_sd, hold_effect_sizes, n1)
walk_result1 <- sim(walk_BL_mean, walk_BL_sd, walk_effect_sizes, n1)
# Method 2: Extrapolation by estimation
taps_effect_sizes <- c( -0.16, -0.31, -0.31 ) # EMPOWER (unpublished)
type_effect_sizes <- c( 0.29, 0.57, 0.57 ) # EMPOWER (unpublished)
hold_effect_sizes <- c( 37.7, 75.4, 75.4 ) # EMPOWER (unpublished)
walk_effect_sizes <- c( 8.99, 18.0, 18.0 ) # EMPOWER (unpublished)
# T-score: Head-on comparisons not adjusting for baseline or repeated measures
mJOA_result2 <- sim(mJOA_BL_mean, mJOA_BL_sd, mJOA_effect_sizes, 18, "t") # Nc=17
taps_result2 <- sim(taps_BL_mean, taps_BL_sd, taps_effect_sizes, 83, "t") # Nc=81-82
type_result2 <- sim(type_BL_mean, type_BL_sd, type_effect_sizes, 86, "t") # Nc=84-85
hold_result2 <- sim(hold_BL_mean, hold_BL_sd, hold_effect_sizes, 4, "t") # Nc=3
walk_result2 <- sim(walk_BL_mean, walk_BL_sd, walk_effect_sizes, 18, "t") # Nc=16-17
# ANCOVA: Head-on comparisons adjusting for baseline but not repeated measures
mJOA_result3 <- sim(mJOA_BL_mean, mJOA_BL_sd, mJOA_effect_sizes, 25, "ANCOVA") # Nc=22
taps_result3 <- sim(taps_BL_mean, taps_BL_sd, taps_effect_sizes, 25, "ANCOVA") # Nc=24
type_result3 <- sim(type_BL_mean, type_BL_sd, type_effect_sizes, 55, "ANCOVA") # Nc=54-55
hold_result3 <- sim(hold_BL_mean, hold_BL_sd, hold_effect_sizes, 20, "ANCOVA") # Nc=18
walk_result3 <- sim(walk_BL_mean, walk_BL_sd, walk_effect_sizes, 146, "ANCOVA") # Nc=144
I would like to now do the following: add modifications to the following code so that all the power1 or power2 values (depending on whether test == "t" or "ANCOVA") up until beta exceeds 0.8 (the while loop is broken) are stored as a list in a column called 'power_values' in the 'result' dataframe (along with all the other existing outputs). I would also like to store the correspondig sample size (N-m) values as a list in a column called 'sample_sizes' in the result dataframe. At the moment, the simulation function only stores the last power and corresponding sample size, but I would like all the powers and sample sizes to be stored as lists or vectors in 2 additional columns called 'power_values' and 'sample_sizes', respectively. I would like to then use the values within the columns to make a figure of each power plotted against the corresponding sample size value as in Figure 1 of the above study.
I am struggling to manipulate the simulation function to do this and would very much appreciate any suggestions/help to achieve my intended goal.
Thank you in advance.