Here is my code. It doesnt work. Please fix the relating clinical traits part. Here is the dataset: GEO Accession viewer
The dataset is the first supplementary file. The clinical trait data is the series matrix file.
Here is my code:
Install required packages
install.packages(c('WGCNA', 'matrixStats'))
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("preprocessCore")
BiocManager::install("edgeR")
BiocManager::install("impute")
Load the required packages
library(WGCNA)
library(edgeR)
library(matrixStats)
library(impute)
library(preprocessCore)
Set the working directory
setwd("/Users/ayanpatel/LumiereProject")
Display the current working directory
getwd()
The following setting is important, do not omit.
options(stringsAsFactors = FALSE)
Read in the dataset
Data <- read.csv("GSE194313_FPKM.matrix.csv", header = TRUE, sep = ",")
Preprocessing steps
rownames(Data) <- Data[,"GeneID"]
Data <- Data[, -c(1:3)]
Data <- Data[rowSums(as.matrix(Data)) != 0, ]
Take a quick look at the data set
dim(Data)
names(Data)
Additional preprocessing steps
Data <- DGEList(Data)
Data <- calcNormFactors(Data, method = "TMM")
Data <- cpm(Data)
Continue with the rest of your code
powers <- c(c(1:10), seq(from = 12, to = 20, by = 2))
sft <- pickSoftThreshold(Data, powerVector = powers, verbose = 5)
sizeGrWindow(9, 5)
par(mfrow = c(1, 2))
cex1 <- 0.9
plot(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
xlab = "Soft Threshold (power)", ylab = "Scale Free Topology Model Fit, signed R^2",
type = "n", main = "Scale independence")
text(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2])
Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[, 1], sft$fitIndices[, 5],
xlab = "Soft Threshold (power)", ylab = "Mean Connectivity",
type = "n", main = "Mean connectivity")
text(sft$fitIndices[, 1], sft$fitIndices[, 5], labels = powers, cex = cex1, col = "red")
Saving Results
moduleLabels <- net$colors
moduleColors <- labels2colors(net$colors)
MEs <- net$MEs
geneTree <- net$dendrograms[[1]]
save(MEs, moduleLabels, moduleColors, geneTree, file = "part1.RData")
Network Construction and Module Detection
net <- blockwiseModules(t(Data), power = 9,
TOMType = "unsigned", minModuleSize = 30,
reassignThreshold = 0, mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = TRUE, saveTOMFileBase = "lunghemorrhageTOM",
verbose = 3)
Visualizing the network and module colors
sizeGrWindow(12, 9)
Convert labels to colors for plotting
mergedColors <- labels2colors(net$colors)
Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
Saving Results
moduleLabels <- net$colors
moduleColors <- labels2colors(net$colors)
MEs <- net$MEs
geneTree <- net$dendrograms[[1]]
save(MEs, moduleLabels, moduleColors, geneTree, file = "part2.RData")
Relating modules to external clinical traits and identifying important genes
Load the expression and trait data saved in the first part
load("part1.RData")
Load network data saved in the second part
load("part2.RData")
Define numbers of genes and samples
nGenes <- ncol(Data)
nSamples <- nrow(Data)
Recalculate MEs with color labels
MEs0 <- moduleEigengenes(Data, moduleColors)$eigengenes
MEs <- orderMEs(MEs0)
moduleTraitCor <- cor(MEs, datTraits, use = "p")
moduleTraitPvalue <- corPvalueStudent(moduleTraitCor, nSamples)
sizeGrWindow(10, 6)
Display the correlations and their p-values
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3))
Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = greenWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5,
zlim = c(-1, 1),
main = "Module-trait relationships")
Define the weight variable containing the weight column of datTraits
weight <- as.data.frame(datTraits$weight_g)
names(weight) <- "weight"
Names (colors) of the modules
modNames <- substring(names(MEs), 3)
geneModuleMembership <- as.data.frame(cor(datExpr, MEs, use = "p"))
MMPvalue <- as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
names(geneModuleMembership) <- paste("MM", modNames, sep = "")
names(MMPvalue) <- paste("p.MM", modNames, sep = "")
geneTraitSignificance <- as.data.frame(cor(datExpr, weight, use = "p"))
GSPvalue <- as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples))
names(geneTraitSignificance) <- paste("GS.", names(weight), sep = "")
names(GSPvalue) <- paste("p.GS.", names(weight), sep = "")
module <- "brown"
column <- match(module, modNames)
moduleGenes <- moduleColors == module
sizeGrWindow(7, 7)
par(mfrow = c(1, 1))
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for body weight",
main = "Module membership vs. gene significance\n",
cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
Load the gene annotation data
annot <- read.csv("GeneAnnotation.csv")
dim(annot)
names(annot)
probes <- names(datExpr)
probes2annot <- match(probes, annot$substanceBXH)
The following is the number of probes without annotation
sum(is.na(probes2annot))
Should return 0
Create the starting data frame
geneInfo0 <- data.frame(substanceBXH = probes,
geneSymbol = annot$gene_symbol[probes2annot],
LocusLinkID = annot$LocusLinkID[probes2annot],
moduleColor = moduleColors,
geneTraitSignificance,
GSPvalue)
Order modules by their significance for weight
modOrder <- order(-abs(cor(MEs, weight, use = "p")))
Add module membership information in the chosen order
for (mod in 1:ncol(geneModuleMembership)) {
oldNames <- names(geneInfo0)
geneInfo0 <- data.frame(geneInfo0, geneModuleMembership[, modOrder[mod]],
MMPvalue[, modOrder[mod]])
names(geneInfo0) <- c(oldNames, paste("MM.", modNames[modOrder[mod]], sep = ""),
paste("p.MM.", modNames[modOrder[mod]], sep = ""))
}
Order the genes in the geneInfo variable first by module color, then by geneTraitSignificance
geneOrder <- order(geneInfo0$moduleColor, -abs(geneInfo0$GS.weight))
geneInfo <- geneInfo0[geneOrder, ]
write.csv(geneInfo, file = "geneAnnotation.csv")