I have used the mixed_model function from GLMMadaptive to run a model with fixed effects and a random effect to account for clustering. I have extracted the ICC using icc() from performance. I want to estimate the ICC confidence intervals.

I know that other functions are available to do this that are compatible with lme4 models, but they don't seem to apply to GLMMadaptive models. (note that I have been advised to use GLMMadaptive rather than lme4 due to the differences in approximation methods.)

In the meantime, I have used simulate() to try to estimate ICC. This involved simulating new data from my model, re-fitting a new model to simulated data, and extracting parameters of interest. It looks like my code to update the model (newfit = update(m, newy ~ .) is not working and it produces warnings. When I explore the model output, the model output/estimates are the same as the original model and the ICC confidence interval output are just the ICC value.

In summary, my questions are:

- Is simulate() the right approach to estimate ICC confidence intervals from my model (or is there another specific function that will estimate the parameters)?
- If simulate() is the right approach, how can I fix my code to produce meaningful/correct estimates?

packages

library(magrittr)

library(dplyr)

#>

#> Attaching package: 'dplyr'

#> The following objects are masked from 'package:stats':

#>

#> filter, lag

#> The following objects are masked from 'package:base':

#>

#> intersect, setdiff, setequal, union

library(GLMMadaptive)

library(performance)

library(reprex)

#Create dataset

set.seed(432)

dummy_vec <- rbinom(400, 1, prob = 0.3) # Create dummy vector

head(dummy_vec) # Print dummy vector

#> [1] 0 0 1 0 1 1

dummy_mat <- matrix(dummy_vec, ncol = 4) # Convert vector to matrix

data <- as.data.frame(dummy_mat) # Convert matrix to data frame

cols<-c("2","3","4")

data %<>%

mutate_each_(funs(factor(.)),cols) # Convert columns to factors

#> Warning: `mutate_each_()`

was deprecated in dplyr 0.7.0.

#> Please use `across()`

instead.

#> This warning is displayed once every 8 hours.

#> Call `lifecycle::last_lifecycle_warnings()`

to see where this warning was generated.

#> Warning: `funs()`

was deprecated in dplyr 0.8.0.

#> Please use a list of either functions or lambdas:

#>

#> # Simple named list:

#> list(mean = mean, median = median)

#>

#> # Auto named with `tibble::lst()`

:

#> tibble::lst(mean, median)

#>

#> # Using lambdas

#> list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))

#> This warning is displayed once every 8 hours.

#> Call `lifecycle::last_lifecycle_warnings()`

to see where this warning was generated.

str(data)

#> 'data.frame': 100 obs. of 4 variables:

#> V1: int 0 0 1 0 1 1 1 0 0 1 ...
#> V2: Factor w/ 2 levels "0","1": 1 2 2 2 1 1 1 2 2 1 ...

#> V3: Factor w/ 2 levels "0","1": 1 2 1 2 2 1 2 2 1 2 ...
#> V4: Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 2 1 ...

names(data)[1] <- "distress" # Rename column names

names(data)[2] <- "mhhx"

names(data)[3] <- "gender"

names(data)[4] <- "exposure"

data$school<-sample(100, size = nrow(data), replace = TRUE) #Add new column

head(data) # Visualise data

#> distress mhhx gender exposure school

#> 1 0 0 0 0 4

#> 2 0 1 1 0 20

#> 3 1 1 0 0 21

#> 4 0 1 1 1 49

#> 5 1 0 1 0 94

#> 6 1 0 0 0 18

str(data)

#> 'data.frame': 100 obs. of 5 variables:

#> distress: int 0 0 1 0 1 1 1 0 0 1 ...
#> mhhx : Factor w/ 2 levels "0","1": 1 2 2 2 1 1 1 2 2 1 ...

#> gender : Factor w/ 2 levels "0","1": 1 2 1 2 2 1 2 2 1 2 ...
#> exposure: Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 2 1 ...

#> $ school : int 4 20 21 49 94 18 45 23 38 53 ...

#Run mixed model

m<-mixed_model(fixed=distress~mhhx+gender+exposure,random=~1|school,data=data, family="binomial")

summary(m)

#>

#> Call:

#> mixed_model(fixed = distress ~ mhhx + gender + exposure, random = ~1 |

#> school, data = data, family = "binomial")

#>

#> Data Descriptives:

#> Number of Observations: 100

#> Number of Groups: 67

#>

#> Model:

#> family: binomial

#> link: logit

#>

#> Fit statistics:

#> log.Lik AIC BIC

#> -62.2027 134.4054 145.4289

#>

#> Random effects covariance matrix:

#> StdDev

#> (Intercept) 0.1314838

#>

#> Fixed effects:

#> Estimate Std.Err z-value p-value

#> (Intercept) -0.3308 0.3136 -1.0547 0.291551

#> mhhx1 -0.9077 0.5221 -1.7387 0.082086

#> gender1 -0.3253 0.4960 -0.6559 0.511905

#> exposure1 -0.0532 0.4749 -0.1121 0.910769

#>

#> Integration:

#> method: adaptive Gauss-Hermite quadrature rule

#> quadrature points: 11

#>

#> Optimization:

#> method: hybrid EM and quasi-Newton

#> converged: TRUE

#Extract icc

icc(m)

#> # Intraclass Correlation Coefficient

#>

#> Adjusted ICC: 0.005

#> Conditional ICC: 0.005

#Simulate new data from model and fit new model to simulated data

nsim = 10

results = rep(NA, nsim)

for (isim in 1:nsim) {

newy=GLMMadaptive::simulate(m)

newfit = update(m, newy ~ .)

results[isim] = icc(newfit)[[1]]

}

#> Warning in mixed_model(fixed = distress ~ mhhx + gender + exposure, random = ~1

#> | : unknown names in control: formula

head(newy) # Visualise data

#> [,1]

#> [1,] 0

#> [2,] 1

#> [3,] 0

#> [4,] 0

#> [5,] 0

#> [6,] 0

str(newy)

#> num [1:100, 1] 0 1 0 0 0 0 0 0 1 0 ...

all(newy == m$data$distress) # Checking whether simulated data different to original data

#> [1] FALSE

summary(newfit) # Checking model (estimates are the same as m)

#>

#> Call:

#> mixed_model(fixed = distress ~ mhhx + gender + exposure, random = ~1 |

#> school, data = data, family = "binomial", formula = newy ~

#> mhhx + gender + exposure)

#>

#> Data Descriptives:

#> Number of Observations: 100

#> Number of Groups: 67

#>

#> Model:

#> family: binomial

#> link: logit

#>

#> Fit statistics:

#> log.Lik AIC BIC

#> -62.2027 134.4054 145.4289

#>

#> Random effects covariance matrix:

#> StdDev

#> (Intercept) 0.1314838

#>

#> Fixed effects:

#> Estimate Std.Err z-value p-value

#> (Intercept) -0.3308 0.3136 -1.0547 0.291551

#> mhhx1 -0.9077 0.5221 -1.7387 0.082086

#> gender1 -0.3253 0.4960 -0.6559 0.511905

#> exposure1 -0.0532 0.4749 -0.1121 0.910769

#>

#> Integration:

#> method: adaptive Gauss-Hermite quadrature rule

#> quadrature points: 11

#>

#> Optimization:

#> method: hybrid EM and quasi-Newton

#> converged: TRUE

#Extract paramaters from refitted model

icc(newfit)

#> # Intraclass Correlation Coefficient

#>

#> Adjusted ICC: 0.005

#> Conditional ICC: 0.005

quantile(results, c(0.025, 0.975))

#> 2.5% 97.5%

#> 0.005227451 0.005227451