As @jrkrideau notes, a reprex
attracts more answers, but since I'm familiar with {tableone}
, I'm running a special today for newcomers.
There are two approaches, generally, to handle NA
values in a dataset—they can be removed before being identified to to table1()
or another format presentation function.
To remove before hand
library(tableone)
data(pbc, package = "survival")
dim(pbc)
#> [1] 418 20
# shows some NAs exist
mean(complete.cases(pbc))
#> [1] 0.6602871
# create data with no NAs, discarding rows in which
# any appear
d <- pbc[complete.cases(pbc),]
dim(d)
#> [1] 276 20
Created on 2023-02-22 with reprex v2.0.2
Whether the reduction in observations is acceptable is a separate question.
{tableone}
does the work of adjusting for NA
and applying consistent number of places automatically
library(tableone)
data(pbc, package = "survival")
dim(pbc)
#> [1] 418 20
# shows some NAs exist
mean(complete.cases(pbc))
#> [1] 0.6602871
# create data with no NAs, discarding rows in which
# any appear
d <- pbc[complete.cases(pbc),]
dim(d)
#> [1] 276 20
vars <- c("time","status","age","sex","ascites","hepato",
"spiders","edema","bili","chol","albumin",
"copper","alk.phos","ast","trig","platelet",
"protime","stage")
t1 <- CreateTableOne(vars = vars,
strata = c("trt"),
data = pbc)
t1
#> Stratified by trt
#> 1 2 p test
#> n 158 154
#> time (mean (SD)) 2015.62 (1094.12) 1996.86 (1155.93) 0.883
#> status (mean (SD)) 0.89 (0.96) 0.84 (0.96) 0.657
#> age (mean (SD)) 51.42 (11.01) 48.58 (9.96) 0.018
#> sex = f (%) 137 (86.7) 139 (90.3) 0.421
#> ascites (mean (SD)) 0.09 (0.29) 0.06 (0.25) 0.434
#> hepato (mean (SD)) 0.46 (0.50) 0.56 (0.50) 0.069
#> spiders (mean (SD)) 0.28 (0.45) 0.29 (0.46) 0.886
#> edema (mean (SD)) 0.11 (0.28) 0.11 (0.27) 0.828
#> bili (mean (SD)) 2.87 (3.63) 3.65 (5.28) 0.131
#> chol (mean (SD)) 365.01 (209.54) 373.88 (252.48) 0.748
#> albumin (mean (SD)) 3.52 (0.44) 3.52 (0.40) 0.874
#> copper (mean (SD)) 97.64 (90.59) 97.65 (80.49) 0.999
#> alk.phos (mean (SD)) 2021.30 (2183.44) 1943.01 (2101.69) 0.747
#> ast (mean (SD)) 120.21 (54.52) 124.97 (58.93) 0.460
#> trig (mean (SD)) 124.14 (71.54) 125.25 (58.52) 0.886
#> platelet (mean (SD)) 258.75 (100.32) 265.20 (90.73) 0.555
#> protime (mean (SD)) 10.65 (0.85) 10.80 (1.14) 0.197
#> stage (mean (SD)) 2.97 (0.94) 3.09 (0.81) 0.243
summary(t1)
#>
#> ### Summary of continuous variables ###
#>
#> trt: 1
#> n miss p.miss mean sd median p25 p75 min max skew kurt
#> time 158 0 0.0 2e+03 1e+03 1895 1e+03 2632 41.0 4556 0.41 -0.4
#> status 158 0 0.0 9e-01 1e+00 0 0e+00 2 0.0 2 0.23 -1.9
#> age 158 0 0.0 5e+01 1e+01 52 4e+01 59 26.3 78 0.06 -0.5
#> ascites 158 0 0.0 9e-02 3e-01 0 0e+00 0 0.0 1 2.92 6.6
#> hepato 158 0 0.0 5e-01 5e-01 0 0e+00 1 0.0 1 0.15 -2.0
#> spiders 158 0 0.0 3e-01 5e-01 0 0e+00 1 0.0 1 0.96 -1.1
#> edema 158 0 0.0 1e-01 3e-01 0 0e+00 0 0.0 1 2.36 4.4
#> bili 158 0 0.0 3e+00 4e+00 1 8e-01 3 0.3 20 2.67 7.6
#> chol 158 18 11.4 4e+02 2e+02 316 2e+02 417 127.0 1712 3.83 20.2
#> albumin 158 0 0.0 4e+00 4e-01 4 3e+00 4 2.1 5 -0.40 0.3
#> copper 158 1 0.6 1e+02 9e+01 73 4e+01 121 9.0 588 2.50 8.2
#> alk.phos 158 0 0.0 2e+03 2e+03 1214 8e+02 2028 369.0 11552 2.71 7.4
#> ast 158 0 0.0 1e+02 5e+01 112 8e+01 152 26.4 338 1.09 1.6
#> trig 158 19 12.0 1e+02 7e+01 106 8e+01 146 33.0 598 2.95 14.3
#> platelet 158 2 1.3 3e+02 1e+02 255 2e+02 322 62.0 563 0.50 0.2
#> protime 158 0 0.0 1e+01 9e-01 11 1e+01 11 9.0 14 1.10 1.6
#> stage 158 0 0.0 3e+00 9e-01 3 2e+00 4 1.0 4 -0.51 -0.7
#> ------------------------------------------------------------
#> trt: 2
#> n miss p.miss mean sd median p25 p75 min max skew kurt
#> time 154 0 0.0 2e+03 1e+03 1811 1e+03 2771 51.0 4523 0.4 -0.7
#> status 154 0 0.0 8e-01 1e+00 0 0e+00 2 0.0 2 0.3 -1.8
#> age 154 0 0.0 5e+01 1e+01 48 4e+01 56 30.6 75 0.2 -0.5
#> ascites 154 0 0.0 6e-02 2e-01 0 0e+00 0 0.0 1 3.6 10.9
#> hepato 154 0 0.0 6e-01 5e-01 1 0e+00 1 0.0 1 -0.3 -2.0
#> spiders 154 0 0.0 3e-01 5e-01 0 0e+00 1 0.0 1 0.9 -1.2
#> edema 154 0 0.0 1e-01 3e-01 0 0e+00 0 0.0 1 2.5 5.0
#> bili 154 0 0.0 4e+00 5e+00 1 7e-01 4 0.3 28 2.7 7.3
#> chol 154 10 6.5 4e+02 3e+02 304 3e+02 377 120.0 1775 3.1 11.1
#> albumin 154 0 0.0 4e+00 4e-01 4 3e+00 4 2.0 4 -0.8 2.0
#> copper 154 1 0.6 1e+02 8e+01 73 4e+01 139 4.0 558 2.0 6.6
#> alk.phos 154 0 0.0 2e+03 2e+03 1283 9e+02 1950 289.0 13862 3.3 12.8
#> ast 154 0 0.0 1e+02 6e+01 117 8e+01 152 28.4 457 1.7 6.3
#> trig 154 11 7.1 1e+02 6e+01 113 8e+01 155 44.0 432 1.7 5.5
#> platelet 154 2 1.3 3e+02 9e+01 260 2e+02 322 71.0 487 0.2 -0.3
#> protime 154 0 0.0 1e+01 1e+00 11 1e+01 11 9.2 17 1.9 6.4
#> stage 154 0 0.0 3e+00 8e-01 3 3e+00 4 1.0 4 -0.5 -0.6
#>
#> p-values
#> pNormal pNonNormal
#> time 0.88304691 0.82661809
#> status 0.65704319 0.65269257
#> age 0.01767247 0.01962155
#> ascites 0.43435214 0.43346814
#> hepato 0.06941257 0.06948076
#> spiders 0.88579825 0.88552572
#> edema 0.82771239 0.73580382
#> bili 0.13093942 0.84168460
#> chol 0.74799072 0.54433899
#> albumin 0.87388074 0.95045176
#> copper 0.99915849 0.71745444
#> alk.phos 0.74726165 0.81198200
#> ast 0.45969842 0.45892358
#> trig 0.88604213 0.36980434
#> platelet 0.55451136 0.45482564
#> protime 0.19714026 0.58802048
#> stage 0.24295457 0.38796031
#>
#> Standardize mean differences
#> 1 vs 2
#> time 0.0166658751
#> status 0.0503271899
#> age 0.2702619258
#> ascites 0.0887209135
#> hepato 0.2063295634
#> spiders 0.0162760113
#> edema 0.0246664452
#> bili 0.1710905651
#> chol 0.0382210537
#> albumin 0.0180021838
#> copper 0.0001200022
#> alk.phos 0.0365323630
#> ast 0.0837836058
#> trig 0.0170615337
#> platelet 0.0674763888
#> protime 0.1460939117
#> stage 0.1325941306
#>
#> =======================================================================================
#>
#> ### Summary of categorical variables ###
#>
#> trt: 1
#> var n miss p.miss level freq percent cum.percent
#> sex 158 0 0.0 m 21 13.3 13.3
#> f 137 86.7 100.0
#>
#> ------------------------------------------------------------
#> trt: 2
#> var n miss p.miss level freq percent cum.percent
#> sex 154 0 0.0 m 15 9.7 9.7
#> f 139 90.3 100.0
#>
#>
#> p-values
#> pApprox pExact
#> sex 0.4212261 0.3774323
#>
#> Standardize mean differences
#> 1 vs 2
#> sex 0.1114116
table()
from {tidyr}
won't work—it outputs an example of pre-set data using a different formatting convention.