map_df missunderstanding

bbewkram · October 11, 2019, 10:21am

#Aim-run multiple t tests with a tidy output
# The 1st example below works. The second doesn't. Why not? What am I missing here?
#Create a file with 3 numeric & 1 factor variable

# set seed(333)
a <- rnorm(20, 10, 1)
b <- rnorm(20, 15, 2)
c <- rnorm(20, 20, 3)
grp <- rep(c('m', 'y'),10)
test_data <- data.frame(a, b, c, grp)
View(test_data)
############################################
## long version
with(test_data, t.test(a ~ grp))
#> 
#>  Welch Two Sample t-test
#> 
#> data:  a by grp
#> t = 0.39411, df = 16.653, p-value = 0.6985
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#>  -0.7368916  1.0747718
#> sample estimates:
#> mean in group m mean in group y 
#>       10.028952        9.860012
with(test_data, t.test(b ~ grp))
#> 
#>  Welch Two Sample t-test
#> 
#> data:  b by grp
#> t = 0.13049, df = 17.674, p-value = 0.8977
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#>  -1.868864  2.116044
#> sample estimates:
#> mean in group m mean in group y 
#>        15.43087        15.30728
with(test_data, t.test(c ~ grp))
#> 
#>  Welch Two Sample t-test
#> 
#> data:  c by grp
#> t = -0.19228, df = 16.137, p-value = 0.8499
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#>  -3.949589  3.292280
#> sample estimates:
#> mean in group m mean in group y 
#>        20.37787        20.70653

#I would like to have the outputs like this

# mean in group m mean in group y  p-value
# 9.747412        9.878820         0.6944
# 15.12936        16.49533         0.07798 
# 20.39531        20.20168         0.9027
######################################################
library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 3.6.1
#> Warning: package 'tidyr' was built under R version 3.6.1
#> Warning: package 'dplyr' was built under R version 3.6.1

res <- test_data %>% 
  select_if(is.numeric) %>%
  map_df(~ broom::tidy(t.test(. ~ grp)), .id = 'vars')
res
#> # A tibble: 3 x 11
#>   vars  estimate estimate1 estimate2 statistic p.value parameter conf.low
#>   <chr>    <dbl>     <dbl>     <dbl>     <dbl>   <dbl>     <dbl>    <dbl>
#> 1 a        0.169      10.0      9.86     0.394   0.698      16.7   -0.737
#> 2 b        0.124      15.4     15.3      0.130   0.898      17.7   -1.87 
#> 3 c       -0.329      20.4     20.7     -0.192   0.850      16.1   -3.95 
#> # ... with 3 more variables: conf.high <dbl>, method <chr>,
#> #   alternative <chr>

str(test_data) # is a data frame
#> 'data.frame':    20 obs. of  4 variables:
#>  $ a  : num  11.64 10.65 11.17 10.83 9.83 ...
#>  $ b  : num  14.6 17.4 16.6 14.3 16.1 ...
#>  $ c  : num  14.6 17.7 25 22.1 20.5 ...
#>  $ grp: Factor w/ 2 levels "m","y": 1 2 1 2 1 2 1 2 1 2 ...
#The above works
### But when using my test data it doesn't. What am I missing here?
library(readxl)
ttestdata <- read_excel("ttestdata.xlsx")
#> Error: `path` does not exist: 'ttestdata.xlsx'
tdat <- data.frame(ttestdata)  ##  makes a data.frame
#> Error in data.frame(ttestdata): object 'ttestdata' not found
tdat$sex <- as.factor(tdat$sex) ## t test requires that sex is not a chr but a factor
#> Error in is.factor(x): object 'tdat' not found
View(tdat)
#> Error in as.data.frame(x): object 'tdat' not found
str(tdat)
#> Error in str(tdat): object 'tdat' not found
summary(tdat)
#> Error in summary(tdat): object 'tdat' not found
####
mw <- tdat %>% 
  select_if(is.numeric) %>%
  map_df(~ broom::tidy(t.test(. ~ sex)), .id = 'vars')
#> Error in eval(lhs, parent, parent): object 'tdat' not found
mw
#> Error in eval(expr, envir, enclos): object 'mw' not found

valeri · October 11, 2019, 10:28am

Hi @bbewkram,

are you sure your tdat variable exists and is as expected? There seem to be a lot of errors upstream from your map_df call.

nwerth · October 11, 2019, 2:01pm

Specifically, this is your first error:

ttestdata <- read_excel("ttestdata.xlsx")
#> Error: `path` does not exist: 'ttestdata.xlsx'

Make sure you have the correct file path.

system · November 1, 2019, 2:01pm

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.