See the FAQ: How to do a minimal reproducible example reprex
for beginners. It's not difficult to convert the table to a data frame but the unnecessary effort is an impediment to receiving answers.
The following example has been derived from [this post]((ANOVA in R - Stats and R) using your data and should be studied using that link.
suppressPackageStartupMessages({
library(car)
library(dplyr)
library(ggplot2)
library(ggpubr)
library(multcomp)
library(palmerpenguins)
library(patchwork)
})
dat <- data.frame(
Parent =
as.factor(c("Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Mock", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_I", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_II", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III", "Pst_III")),
Line =
as.factor(c("M1", "M1", "M1", "M1", "M1", "M1", "M1", "M1", "M1", "M1", "M1", "M2", "M2", "M2", "M2", "M2", "M2", "M2", "M2", "M3", "M3", "M3", "M3", "M3", "M3", "M3", "M3", "M3", "M3", "M3", "M3", "M3", "M4", "M4", "M4", "M4", "M4", "M4", "M4", "M4", "M4", "M4", "Pst_I_1", "Pst_I_1", "Pst_I_1", "Pst_I_1", "Pst_I_1", "Pst_I_1", "Pst_I_1", "Pst_I_1", "Pst_I_1", "Pst_I_1", "Pst_I_1", "Pst_I_2", "Pst_I_2", "Pst_I_2", "Pst_I_2", "Pst_I_2", "Pst_I_2", "Pst_I_2", "Pst_I_2", "Pst_I_2", "Pst_I_2", "Pst_I_2", "Pst_I_3", "Pst_I_3", "Pst_I_3", "Pst_I_3", "Pst_I_3", "Pst_I_3", "Pst_I_3", "Pst_I_3", "Pst_I_3", "Pst_I_3", "Pst_I_4", "Pst_I_4", "Pst_I_4", "Pst_I_4", "Pst_I_4", "Pst_I_4", "Pst_I_4", "Pst_I_4", "Pst_I_4", "Pst_I_4", "Pst_II_1", "Pst_II_1", "Pst_II_1", "Pst_II_1", "Pst_II_1", "Pst_II_1", "Pst_II_1", "Pst_II_1", "Pst_II_1", "Pst_II_1", "Pst_II_1", "Pst_II_2", "Pst_II_2", "Pst_II_2", "Pst_II_2", "Pst_II_2", "Pst_II_2", "Pst_II_2", "Pst_II_2", "Pst_II_2", "Pst_II_2", "Pst_II_2", "Pst_II_3", "Pst_II_3", "Pst_II_3", "Pst_II_3", "Pst_II_3", "Pst_II_3", "Pst_II_3", "Pst_II_3", "Pst_II_3", "Pst_II_3", "Pst_II_4", "Pst_II_4", "Pst_II_4", "Pst_II_4", "Pst_II_4", "Pst_II_4", "Pst_II_4", "Pst_II_4", "Pst_II_4", "Pst_II_4", "Pst_III_1", "Pst_III_1", "Pst_III_1", "Pst_III_1", "Pst_III_1", "Pst_III_1", "Pst_III_1", "Pst_III_1", "Pst_III_1", "Pst_III_1", "Pst_III_1", "Pst_III_2", "Pst_III_2", "Pst_III_2", "Pst_III_2", "Pst_III_2", "Pst_III_2", "Pst_III_2", "Pst_III_2", "Pst_III_2", "Pst_III_2", "Pst_III_2", "Pst_III_3", "Pst_III_3", "Pst_III_3", "Pst_III_3", "Pst_III_3", "Pst_III_3", "Pst_III_3", "Pst_III_3", "Pst_III_3", "Pst_III_3", "Pst_III_4", "Pst_III_4", "Pst_III_4", "Pst_III_4", "Pst_III_4", "Pst_III_4", "Pst_III_4", "Pst_III_4", "Pst_III_4", "Pst_III_4")),
Log =
c(8.055155783, 6.173051289, 6.838547899, 7.051532265, 6.890318194, 6.341863188, 5.959270051, 6.815049617, 6.082606218, 6.864741933, 6.795590584, 7.984846313, 6.250502269, 6.401850846, 7.043015595, 6.494560588, 6.714226935, 6.792170872, 6.795590584, 5.599318096, 7.43209122, 6.795590584, 7.211245462, 7.969408029, 6.507725058, 5.693530593, 6.670651848, 6.850727571, 6.781576222, 6.733121216, 5.909212475, 6.099987271, 5.730938813, 7.864741933, 6.930274078, 6.480546227, 6.83574596, 6.193530593, 6.221983018, 7.030731499, 6.225759587, 6.369621852, 7.773276591, 6.670651848, 6.409212475, 7.43209122, 5.617316922, 5.082606218, 5.983422391, 5.693530593, 6.255456413, 4.554847554, 6.933893282, 4.35587755, 4.724256832, 7.556180064, 6.723879513, 5.642893184, 5.714226935, 6.189143631, 6.821166845, 5.714226935, 5.795047036, 6.933893282, 6.901017267, 7.834331315, 5.316286927, 6.631061225, 5.253817558, 6.746285685, 5.971681843, 6.631061225, 7.403095247, 7.005155238, 6.532334069, 5.505155238, 5.082606218, 6.522536291, 6.582606218, 5.567624606, 6.381819071, 7.568591856, 5.99779361, 4.573741835, 7.706695062, 7.001242569, 3.209275221, 5.011272466, 6.155670236, 6.43209122, 5.218139604, 6.067624606, 5.306185234, 5.847064568, 3.161078569, 4.76783192, 5.779832058, 5.247135578, 6.145075586, 5.645075586, 6.741985599, 6.145075586, 6.821166845, 7.179516231, 6.583328838, 5.535953274, 7.178936398, 4.806185234, 5.272711839, 7.014019622, 5.722743605, 4.904332556, 6.93209122, 5.909212475, 7.679516231, 6.917109608, 6.02013685, 3.058760223, 6.284452387, 6.214226935, 6.182392395, 3.698680571, 6.155670236, 6.051532265, 6.218139604, 5.816779883, 7.182392395, 7.05257374, 4.757730227, 5.952787563, 6.155670236, 5.905971411, 6.52013685, 6.821166845, 5.883636214, 5.903673535, 4.573741835, 5.118301719, 4.503411074, 5.645075586, 6.838547899, 6.043015595, 2.758967537, 5.559727473, 5.901017267, 5.766594611, 5.6269695, 6.272711839, 6.210242471, 6.034151212, 2.786726201, 6.255456413, 3.068861916, 6.184666209, 6.353603736, 4.59662058, 6.903095247, 5.335181207, 6.682849067, 7.599987271, 5.743715865, 6.582606218, 5.561330043, 5.645075586, 6.480546227, 3.640511289, 6.608182479, 5.816779883))
# use only one parent line, to simplify illustration
# mocks <- dat[which(dat[,1] == "Mock"),]
summary(dat)
#> Parent Line Log
#> Mock :42 M3 : 13 Min. :2.759
#> Pst_I :42 M1 : 11 1st Qu.:5.681
#> Pst_II :42 Pst_I_1 : 11 Median :6.212
#> Pst_III:42 Pst_I_2 : 11 Mean :6.117
#> Pst_II_1: 11 3rd Qu.:6.800
#> Pst_II_2: 11 Max. :8.055
#> (Other) :100
p1 <- ggplot(dat) +
aes(x = Parent, y = Log, color = Parent) +
geom_jitter() +
theme(legend.position = "none") +
theme_minimal()
p2 <- ggplot(dat) +
aes(x = Parent, y = Log, color = Parent) +
geom_boxplot() +
theme(legend.position = "none") +
theme_minimal()
p3 <- ggplot(dat) +
aes(Log, fill = Parent) +
geom_dotplot(method = "histodot", binwidth = 1.5) +
theme(legend.position = "none") +
theme_minimal()
p1 + p2 + p3
res_aov <- aov(Log ~ Parent, data = dat)
resids <- data.frame(.resid = res_aov$residuals)
# histogram
p4 <- ggplot(resids, aes(.resid)) +
geom_histogram(color = "black", fill = "grey") +
theme_minimal()
p5 <- ggplot(resids, aes(sample = .resid)) +
stat_qq() +
stat_qq_line() +
theme_minimal()
p4 + p5
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
shapiro.test(res_aov$residuals)
#>
#> Shapiro-Wilk normality test
#>
#> data: res_aov$residuals
#> W = 0.95189, p-value = 1.663e-05
leveneTest(Log ~ Parent, data = dat)
#> Levene's Test for Homogeneity of Variance (center = median)
#> Df F value Pr(>F)
#> group 3 2.6809 0.04866 *
#> 164
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aggregate(Log ~ Parent,
data = dat,
function(x) round(c(mean = mean(x), sd = sd(x)), 2)
)
#> Parent Log.mean Log.sd
#> 1 Mock 6.68 0.60
#> 2 Pst_I 6.20 0.91
#> 3 Pst_II 5.85 1.12
#> 4 Pst_III 5.73 1.12
group_by(dat, Parent) %>%
summarise(
mean = mean(Log, na.rm = TRUE),
sd = sd(Log, na.rm = TRUE)
)
#> # A tibble: 4 x 3
#> Parent mean sd
#> <fct> <dbl> <dbl>
#> 1 Mock 6.68 0.596
#> 2 Pst_I 6.20 0.907
#> 3 Pst_II 5.85 1.12
#> 4 Pst_III 5.73 1.12
oneway.test(Log ~ Parent,
data = dat,
var.equal = TRUE # assuming equal variances
)
#>
#> One-way analysis of means
#>
#> data: Log and Parent
#> F = 8.2319, num df = 3, denom df = 164, p-value = 3.899e-05
res_aov <- aov(Log ~ Parent,
data = dat
)
summary(res_aov)
#> Df Sum Sq Mean Sq F value Pr(>F)
#> Parent 3 22.67 7.558 8.232 3.9e-05 ***
#> Residuals 164 150.57 0.918
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
oneway.test(Log ~ Parent,
data = dat,
var.equal = FALSE # assuming unequal variances
)
#>
#> One-way analysis of means (not assuming equal variances)
#>
#> data: Log and Parent
#> F = 11.43, num df = 3.00, denom df = 87.72, p-value = 2.13e-06
post_test <- glht(res_aov,
linfct = mcp(Parent = "Tukey")
)
summary(post_test)
#>
#> Simultaneous Tests for General Linear Hypotheses
#>
#> Multiple Comparisons of Means: Tukey Contrasts
#>
#>
#> Fit: aov(formula = Log ~ Parent, data = dat)
#>
#> Linear Hypotheses:
#> Estimate Std. Error t value Pr(>|t|)
#> Pst_I - Mock == 0 -0.4737 0.2091 -2.266 0.110
#> Pst_II - Mock == 0 -0.8271 0.2091 -3.956 <0.001 ***
#> Pst_III - Mock == 0 -0.9440 0.2091 -4.515 <0.001 ***
#> Pst_II - Pst_I == 0 -0.3533 0.2091 -1.690 0.332
#> Pst_III - Pst_I == 0 -0.4703 0.2091 -2.249 0.115
#> Pst_III - Pst_II == 0 -0.1170 0.2091 -0.559 0.944
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> (Adjusted p values reported -- single-step method)
par(mar = c(3, 8, 3, 3))
plot(post_test)
TukeyHSD(res_aov)
#> Tukey multiple comparisons of means
#> 95% family-wise confidence level
#>
#> Fit: aov(formula = Log ~ Parent, data = dat)
#>
#> $Parent
#> diff lwr upr p adj
#> Pst_I-Mock -0.4737345 -1.0164396 0.06897055 0.1104074
#> Pst_II-Mock -0.8270620 -1.3697671 -0.28435691 0.0006507
#> Pst_III-Mock -0.9440129 -1.4867179 -0.40130778 0.0000707
#> Pst_II-Pst_I -0.3533275 -0.8960325 0.18937762 0.3323298
#> Pst_III-Pst_I -0.4702783 -1.0129834 0.07242675 0.1145360
#> Pst_III-Pst_II -0.1169509 -0.6596560 0.42575421 0.9438792
plot(TukeyHSD(res_aov))
# Dunnett's test:
post_test <- glht(res_aov,
linfct = mcp(species = "Dunnett")
)
#> Error in mcp2matrix(model, linfct = linfct): Variable(s) 'species' have been specified in 'linfct' but cannot be found in 'model'!
summary(post_test)
#>
#> Simultaneous Tests for General Linear Hypotheses
#>
#> Multiple Comparisons of Means: Tukey Contrasts
#>
#>
#> Fit: aov(formula = Log ~ Parent, data = dat)
#>
#> Linear Hypotheses:
#> Estimate Std. Error t value Pr(>|t|)
#> Pst_I - Mock == 0 -0.4737 0.2091 -2.266 0.110
#> Pst_II - Mock == 0 -0.8271 0.2091 -3.956 <0.001 ***
#> Pst_III - Mock == 0 -0.9440 0.2091 -4.515 <0.001 ***
#> Pst_II - Pst_I == 0 -0.3533 0.2091 -1.690 0.332
#> Pst_III - Pst_I == 0 -0.4703 0.2091 -2.249 0.115
#> Pst_III - Pst_II == 0 -0.1170 0.2091 -0.559 0.944
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> (Adjusted p values reported -- single-step method)
par(mar = c(3, 8, 3, 3))
plot(post_test)
dat$Parent <- relevel(dat$Parent, ref = "Mock")
res_aov2 <- aov(Log ~ Parent,
data = dat
)
# Dunnett's test:
post_test <- glht(res_aov2,
linfct = mcp(Parent = "Dunnett")
)
summary(post_test)
#>
#> Simultaneous Tests for General Linear Hypotheses
#>
#> Multiple Comparisons of Means: Dunnett Contrasts
#>
#>
#> Fit: aov(formula = Log ~ Parent, data = dat)
#>
#> Linear Hypotheses:
#> Estimate Std. Error t value Pr(>|t|)
#> Pst_I - Mock == 0 -0.4737 0.2091 -2.266 0.0647 .
#> Pst_II - Mock == 0 -0.8271 0.2091 -3.956 <0.001 ***
#> Pst_III - Mock == 0 -0.9440 0.2091 -4.515 <0.001 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> (Adjusted p values reported -- single-step method)
par(mar = c(3, 8, 3, 3))
plot(post_test)
pairwise.t.test(dat$Log, dat$Parent,
p.adjust.method = "holm"
)
#>
#> Pairwise comparisons using t tests with pooled SD
#>
#> data: dat$Log and dat$Parent
#>
#> Mock Pst_I Pst_II
#> Pst_I 0.09911 - -
#> Pst_II 0.00057 0.18592 -
#> Pst_III 7.2e-05 0.09911 0.57670
#>
#> P value adjustment method: holm
x <- which(names(dat) == "Parent") # name of grouping variable
y <- which(
names(dat) == "Log" # names of variables to test
)
method1 <- "anova" # one of "anova" or "kruskal.test"
method2 <- "t.test" # one of "wilcox.test" or "t.test"
my_comparisons <- list(c("Mock", "Pst_I"), c("Pst_I", "Pst_II"), c("Pst_II", "Pst_III")) # comparisons for post-hoc tests
for (i in y) {
for (j in x) {
p <- ggboxplot(dat,
x = colnames(dat[j]), y = colnames(dat[i]),
color = colnames(dat[j]),
legend = "none",
palette = "npg",
add = "jitter"
)
print(
p + stat_compare_means(aes(label = paste0(..method.., ", p-value = ", ..p.format..)),
method = method1, label.y = max(dat[, i], na.rm = TRUE)
)
+ stat_compare_means(comparisons = my_comparisons, method = method2, label = "p.format") # remove if p-value of ANOVA or Kruskal-Wallis test >= alpha
)
}
}