How to exclude self-correlations and format correlation matrix with bacteria as rows and metadata (parameters) as columns?

Hi everyone,

I’m working with a dataset that includes bacterial counts and metadata (diet parameters ), and I’m trying to generate a correlation matrix. I have a couple of issues I haven’t been able to solve:

  1. Excluding self-correlations: I want to exclude correlations where the same variable is being compared (e.g., weight vs weight). How can I make sure these are not displayed in my final plot?
  2. Formatting the matrix: I want the correlation matrix to have bacteria as the rows and metadata as the columns. Currently, I’m struggling to set it up this way.
library(tidyverse)
library(corrplot)
#> corrplot 0.92 loaded
library(RColorBrewer)


metadata <- data.frame(tibble::tribble(
  ~SampleID,  ~BMI, ~Kilocalories.intake,
  "P01", 20.27,                  890,
  "P02",  21.7,                  774,
  "P03",  15.3,                  731,
  "P04",  31.9,                  508,
  "P05",  25.6,                 1094,
  "P06", 19.68,                 1230,
  "P07",  19.6,                 1170,
  "P08",  22.7,                  893,
  "P09",  23.3,                  838,
  "P10",  39.2,                  625
))

bacteria <- tibble::tribble(
  ~SampleID, ~Actinomycetota, ~Bacteroidota,
  "P01",            196L,        13333L,
  "P02",            208L,        22731L,
  "P03",              6L,        33610L,
  "P04",              5L,        27634L,
  "P05",             11L,        31627L,
  "P06",              8L,        21651L,
  "P07",             22L,        24244L,
  "P08",             74L,        21368L,
  "P09",             15L,        26575L,
  "P10",             47L,        77969L
)

datos_combinados <- merge(metadata, bacteria, by = "SampleID")
correlaciones <- cor(datos_combinados[, -1], method = "pearson")  # Excluimos la columna de SampleID

# Muestra la matriz de correlación
print(correlaciones)
#>                            BMI Kilocalories.intake Actinomycetota Bacteroidota
#> BMI                  1.0000000          -0.5325944     -0.1400981    0.7549547
#> Kilocalories.intake -0.5325944           1.0000000     -0.1224807   -0.4147559
#> Actinomycetota      -0.1400981          -0.1224807      1.0000000   -0.2731836
#> Bacteroidota         0.7549547          -0.4147559     -0.2731836    1.0000000


testRes = cor.mtest(correlaciones, conf.level = 0.95)

print(testRes)
#> $p
#>                            BMI Kilocalories.intake Actinomycetota Bacteroidota
#> BMI                 0.00000000           0.1192133      0.5766032   0.04860413
#> Kilocalories.intake 0.11921330           0.0000000      0.9607037   0.21386722
#> Actinomycetota      0.57660325           0.9607037      0.0000000   0.42969023
#> Bacteroidota        0.04860413           0.2138672      0.4296902   0.00000000
#> 
#> $lowCI
#>                            BMI Kilocalories.intake Actinomycetota Bacteroidota
#> BMI                  1.0000000          -0.9974877     -0.9840522   -0.1131811
#> Kilocalories.intake -0.9974877           1.0000000     -0.9639767   -0.9952595
#> Actinomycetota      -0.9840522          -0.9639767      1.0000000   -0.9891995
#> Bacteroidota        -0.1131811          -0.9952595     -0.9891995    1.0000000
#> 
#> $uppCI
#>                           BMI Kilocalories.intake Actinomycetota Bacteroidota
#> BMI                 1.0000000           0.5231729      0.9066085    0.9990120
#> Kilocalories.intake 0.5231729           1.0000000      0.9579706    0.7156826
#> Actinomycetota      0.9066085           0.9579706      1.0000000    0.8647761
#> Bacteroidota        0.9990120           0.7156826      0.8647761    1.0000000
str(testRes)
#> List of 3
#>  $ p    : num [1:4, 1:4] 0 0.1192 0.5766 0.0486 0.1192 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:4] "BMI" "Kilocalories.intake" "Actinomycetota" "Bacteroidota"
#>   .. ..$ : chr [1:4] "BMI" "Kilocalories.intake" "Actinomycetota" "Bacteroidota"
#>  $ lowCI: num [1:4, 1:4] 1 -0.997 -0.984 -0.113 -0.997 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:4] "BMI" "Kilocalories.intake" "Actinomycetota" "Bacteroidota"
#>   .. ..$ : chr [1:4] "BMI" "Kilocalories.intake" "Actinomycetota" "Bacteroidota"
#>  $ uppCI: num [1:4, 1:4] 1 0.523 0.907 0.999 0.523 ...
#>   ..- attr(*, "dimnames")=List of 2
#>   .. ..$ : chr [1:4] "BMI" "Kilocalories.intake" "Actinomycetota" "Bacteroidota"
#>   .. ..$ : chr [1:4] "BMI" "Kilocalories.intake" "Actinomycetota" "Bacteroidota"


# diagonal
corrplot(correlaciones, p.mat = testRes$p, method = 'color', diag = FALSE, type = 'upper',
         sig.level = c(0.05), pch.cex = 0.9,
         insig = 'label_sig', pch.col = 'black', order = 'AOE')


# cuadrado
corrplot(correlaciones, p.mat = testRes$p, method = 'color', diag = FALSE,
         sig.level = c(0.05), pch.cex = 0.9,
         insig = 'label_sig', pch.col = 'black', order = 'AOE')


# Cálculo de correlaciones y prueba de significancia
correlaciones <- cor(datos_combinados[, -1], method = "pearson")  # Excluir la columna SampleID

# Prueba de significancia
testRes = cor.mtest(correlaciones, conf.level = 0.95)

# Visualización sin la diagonal
corrplot(correlaciones, p.mat = testRes$p, method = 'color', diag = FALSE, 
         type = 'upper', sig.level = 0.05, pch.cex = 0.9, 
         insig = 'label_sig', pch.col = 'black', order = 'AOE')

Created on 2024-09-30 with reprex v2.1.1

Of course I have more data, but for this forum, i pasted just three columns of each