Hi everyone,
I’m working with a dataset that includes bacterial counts and metadata (diet parameters ), and I’m trying to generate a correlation matrix. I have a couple of issues I haven’t been able to solve:
- Excluding self-correlations: I want to exclude correlations where the same variable is being compared (e.g.,
weight vs weight
). How can I make sure these are not displayed in my final plot? - Formatting the matrix: I want the correlation matrix to have bacteria as the rows and metadata as the columns. Currently, I’m struggling to set it up this way.
library(tidyverse)
library(corrplot)
#> corrplot 0.92 loaded
library(RColorBrewer)
metadata <- data.frame(tibble::tribble(
~SampleID, ~BMI, ~Kilocalories.intake,
"P01", 20.27, 890,
"P02", 21.7, 774,
"P03", 15.3, 731,
"P04", 31.9, 508,
"P05", 25.6, 1094,
"P06", 19.68, 1230,
"P07", 19.6, 1170,
"P08", 22.7, 893,
"P09", 23.3, 838,
"P10", 39.2, 625
))
bacteria <- tibble::tribble(
~SampleID, ~Actinomycetota, ~Bacteroidota,
"P01", 196L, 13333L,
"P02", 208L, 22731L,
"P03", 6L, 33610L,
"P04", 5L, 27634L,
"P05", 11L, 31627L,
"P06", 8L, 21651L,
"P07", 22L, 24244L,
"P08", 74L, 21368L,
"P09", 15L, 26575L,
"P10", 47L, 77969L
)
datos_combinados <- merge(metadata, bacteria, by = "SampleID")
correlaciones <- cor(datos_combinados[, -1], method = "pearson") # Excluimos la columna de SampleID
# Muestra la matriz de correlación
print(correlaciones)
#> BMI Kilocalories.intake Actinomycetota Bacteroidota
#> BMI 1.0000000 -0.5325944 -0.1400981 0.7549547
#> Kilocalories.intake -0.5325944 1.0000000 -0.1224807 -0.4147559
#> Actinomycetota -0.1400981 -0.1224807 1.0000000 -0.2731836
#> Bacteroidota 0.7549547 -0.4147559 -0.2731836 1.0000000
testRes = cor.mtest(correlaciones, conf.level = 0.95)
print(testRes)
#> $p
#> BMI Kilocalories.intake Actinomycetota Bacteroidota
#> BMI 0.00000000 0.1192133 0.5766032 0.04860413
#> Kilocalories.intake 0.11921330 0.0000000 0.9607037 0.21386722
#> Actinomycetota 0.57660325 0.9607037 0.0000000 0.42969023
#> Bacteroidota 0.04860413 0.2138672 0.4296902 0.00000000
#>
#> $lowCI
#> BMI Kilocalories.intake Actinomycetota Bacteroidota
#> BMI 1.0000000 -0.9974877 -0.9840522 -0.1131811
#> Kilocalories.intake -0.9974877 1.0000000 -0.9639767 -0.9952595
#> Actinomycetota -0.9840522 -0.9639767 1.0000000 -0.9891995
#> Bacteroidota -0.1131811 -0.9952595 -0.9891995 1.0000000
#>
#> $uppCI
#> BMI Kilocalories.intake Actinomycetota Bacteroidota
#> BMI 1.0000000 0.5231729 0.9066085 0.9990120
#> Kilocalories.intake 0.5231729 1.0000000 0.9579706 0.7156826
#> Actinomycetota 0.9066085 0.9579706 1.0000000 0.8647761
#> Bacteroidota 0.9990120 0.7156826 0.8647761 1.0000000
str(testRes)
#> List of 3
#> $ p : num [1:4, 1:4] 0 0.1192 0.5766 0.0486 0.1192 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:4] "BMI" "Kilocalories.intake" "Actinomycetota" "Bacteroidota"
#> .. ..$ : chr [1:4] "BMI" "Kilocalories.intake" "Actinomycetota" "Bacteroidota"
#> $ lowCI: num [1:4, 1:4] 1 -0.997 -0.984 -0.113 -0.997 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:4] "BMI" "Kilocalories.intake" "Actinomycetota" "Bacteroidota"
#> .. ..$ : chr [1:4] "BMI" "Kilocalories.intake" "Actinomycetota" "Bacteroidota"
#> $ uppCI: num [1:4, 1:4] 1 0.523 0.907 0.999 0.523 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : chr [1:4] "BMI" "Kilocalories.intake" "Actinomycetota" "Bacteroidota"
#> .. ..$ : chr [1:4] "BMI" "Kilocalories.intake" "Actinomycetota" "Bacteroidota"
# diagonal
corrplot(correlaciones, p.mat = testRes$p, method = 'color', diag = FALSE, type = 'upper',
sig.level = c(0.05), pch.cex = 0.9,
insig = 'label_sig', pch.col = 'black', order = 'AOE')
# cuadrado
corrplot(correlaciones, p.mat = testRes$p, method = 'color', diag = FALSE,
sig.level = c(0.05), pch.cex = 0.9,
insig = 'label_sig', pch.col = 'black', order = 'AOE')
# Cálculo de correlaciones y prueba de significancia
correlaciones <- cor(datos_combinados[, -1], method = "pearson") # Excluir la columna SampleID
# Prueba de significancia
testRes = cor.mtest(correlaciones, conf.level = 0.95)
# Visualización sin la diagonal
corrplot(correlaciones, p.mat = testRes$p, method = 'color', diag = FALSE,
type = 'upper', sig.level = 0.05, pch.cex = 0.9,
insig = 'label_sig', pch.col = 'black', order = 'AOE')
Created on 2024-09-30 with reprex v2.1.1
Of course I have more data, but for this forum, i pasted just three columns of each