#Aim-run multiple t tests with a tidy output
# The 1st example below works. The second doesn't. Why not? What am I missing here?
#Create a file with 3 numeric & 1 factor variable
# set seed(333)
a <- rnorm(20, 10, 1)
b <- rnorm(20, 15, 2)
c <- rnorm(20, 20, 3)
grp <- rep(c('m', 'y'),10)
test_data <- data.frame(a, b, c, grp)
View(test_data)
############################################
## long version
with(test_data, t.test(a ~ grp))
#>
#> Welch Two Sample t-test
#>
#> data: a by grp
#> t = 0.39411, df = 16.653, p-value = 0.6985
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#> -0.7368916 1.0747718
#> sample estimates:
#> mean in group m mean in group y
#> 10.028952 9.860012
with(test_data, t.test(b ~ grp))
#>
#> Welch Two Sample t-test
#>
#> data: b by grp
#> t = 0.13049, df = 17.674, p-value = 0.8977
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#> -1.868864 2.116044
#> sample estimates:
#> mean in group m mean in group y
#> 15.43087 15.30728
with(test_data, t.test(c ~ grp))
#>
#> Welch Two Sample t-test
#>
#> data: c by grp
#> t = -0.19228, df = 16.137, p-value = 0.8499
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#> -3.949589 3.292280
#> sample estimates:
#> mean in group m mean in group y
#> 20.37787 20.70653
#I would like to have the outputs like this
# mean in group m mean in group y p-value
# 9.747412 9.878820 0.6944
# 15.12936 16.49533 0.07798
# 20.39531 20.20168 0.9027
######################################################
library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 3.6.1
#> Warning: package 'tidyr' was built under R version 3.6.1
#> Warning: package 'dplyr' was built under R version 3.6.1
res <- test_data %>%
select_if(is.numeric) %>%
map_df(~ broom::tidy(t.test(. ~ grp)), .id = 'vars')
res
#> # A tibble: 3 x 11
#> vars estimate estimate1 estimate2 statistic p.value parameter conf.low
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 a 0.169 10.0 9.86 0.394 0.698 16.7 -0.737
#> 2 b 0.124 15.4 15.3 0.130 0.898 17.7 -1.87
#> 3 c -0.329 20.4 20.7 -0.192 0.850 16.1 -3.95
#> # ... with 3 more variables: conf.high <dbl>, method <chr>,
#> # alternative <chr>
str(test_data) # is a data frame
#> 'data.frame': 20 obs. of 4 variables:
#> $ a : num 11.64 10.65 11.17 10.83 9.83 ...
#> $ b : num 14.6 17.4 16.6 14.3 16.1 ...
#> $ c : num 14.6 17.7 25 22.1 20.5 ...
#> $ grp: Factor w/ 2 levels "m","y": 1 2 1 2 1 2 1 2 1 2 ...
#The above works
### But when using my test data it doesn't. What am I missing here?
library(readxl)
ttestdata <- read_excel("ttestdata.xlsx")
#> Error: `path` does not exist: 'ttestdata.xlsx'
tdat <- data.frame(ttestdata) ## makes a data.frame
#> Error in data.frame(ttestdata): object 'ttestdata' not found
tdat$sex <- as.factor(tdat$sex) ## t test requires that sex is not a chr but a factor
#> Error in is.factor(x): object 'tdat' not found
View(tdat)
#> Error in as.data.frame(x): object 'tdat' not found
str(tdat)
#> Error in str(tdat): object 'tdat' not found
summary(tdat)
#> Error in summary(tdat): object 'tdat' not found
####
mw <- tdat %>%
select_if(is.numeric) %>%
map_df(~ broom::tidy(t.test(. ~ sex)), .id = 'vars')
#> Error in eval(lhs, parent, parent): object 'tdat' not found
mw
#> Error in eval(expr, envir, enclos): object 'mw' not found
Hi @bbewkram,
are you sure your tdat
variable exists and is as expected? There seem to be a lot of errors upstream from your map_df
call.
1 Like
Specifically, this is your first error:
ttestdata <- read_excel("ttestdata.xlsx")
#> Error: `path` does not exist: 'ttestdata.xlsx'
Make sure you have the correct file path.
This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.