I have been stuck on how to make a publication worthy co-occurance network. My sequencing data had originally been processed through QIIME2, resulting in a species table, here and the metadata corresponding to the samples.
Up to this point I have only been able to run basic code to clean up the formatting of the species table and create a species abundance table. I have no idea if the following code makes sense or has been done correctly, but I have no idea where to go from here, other than to use igraph and gephi, and spearmans correlation.
I believe my next step would be to create specific networks for each of my variables. (i.e., UTC at V3 stage in bulk soil), followed by computing spearmans correlation, finding significant differences and creating the graph for output to gephi. correct me if im wrong.
This is what I have so far:
setwd("~/Desktop/OAData-moved/Coocurance")
library(tidyverse)
#Read in the table
species_table <- read_tsv("table-L7.tsv", skip = 1) %>%
rename(Taxon = `#ASV ID`)
#> Rows: 662 Columns: 65
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (1): #ASV ID
#> dbl (64): R5-BG3-1, R5-BG3-2, R5-BG3-3, R5-BG3-4, R5-BU-1, R5-BU-2, R5-BU-3,...
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Extract taxonomic levels as columns
tax_levels <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
species_table_parsed <- species_table %>%
mutate(
Kingdom = str_extract(Taxon, "k__[^;]+") %>% str_remove("k__"),
Phylum = str_extract(Taxon, "p__[^;]+") %>% str_remove("p__"),
Class = str_extract(Taxon, "c__[^;]+") %>% str_remove("c__"),
Order = str_extract(Taxon, "o__[^;]+") %>% str_remove("o__"),
Family = str_extract(Taxon, "f__[^;]+") %>% str_remove("f__"),
Genus = str_extract(Taxon, "g__[^;]+") %>% str_remove("g__"),
Species = str_extract(Taxon, "s__[^;]+") %>% str_remove("s__")
) %>%
mutate(across(all_of(tax_levels), ~replace_na(., "")))
# Function to pick best available taxon per row
species_table_parsed <- species_table_parsed %>%
rowwise() %>%
mutate(
BestTaxon = coalesce(
if_else(Species != "", Species, NA_character_),
if_else(Genus != "", Genus, NA_character_),
if_else(Family != "", Family, NA_character_),
if_else(Order != "", Order, NA_character_),
if_else(Class != "", Class, NA_character_),
if_else(Phylum != "", Phylum, NA_character_),
if_else(Kingdom != "", Kingdom, NA_character_),
NA_character_
)
) %>%
ungroup()
# Optionally replace underscores with spaces for readability
species_table_parsed <- species_table_parsed %>%
mutate(BestTaxon = str_replace_all(BestTaxon, "_", " "))
# Sum all abundances by Besttaxon
species_abundances <- species_table_parsed %>%
select(-Taxon, -Kingdom, -Phylum, -Class, -Order, -Family, -Genus, -Species) %>%
group_by(BestTaxon) %>%
summarise(across(everything(), sum)) %>%
column_to_rownames("BestTaxon")
#make dataframe and transpose
species_abundances_t <- as.data.frame(t(species_abundances))
species_abundances_t$SampleID <- rownames(species_abundances_t)
#bring in metadata
metadata <- read_tsv("sample_metadata.tsv")
#> Rows: 64 Columns: 6
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: "\t"
#> chr (5): SampleID, Stage, Treatment, Soiltype, Barcode
#> num (1): Reads
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#merge metadata and species abundances
merged_data <- left_join(metadata, species_abundances_t, by = "SampleID")
#remove reads and barcodes
merged_data <- merged_data %>%
select(-Barcode, -Reads)
Created on 2025-07-30 with reprex v2.1.1