I used this code to compare all genes to each other. The code function, but the main problem is that the execution is slow. Is there anyway to make this code faster? There are four nested loop but i don't understand how to make this code in parallel.
Id_GeneNameTwoGenes <- Id_GeneName3[1:10,]
cl <- makeCluster(detectCores()-1)
registerDoParallel(cl)
system.time({
n <- nrow(Id_GeneNameTwoGenes)
clusterExport(cl,"n")
nms <- rownames(Id_GeneNameTwoGenes)
clusterExport(cl,"nms")
V1 <- rep(nms[1:(n-1)],seq(from=n-1, to = 1, by = -1))
clusterExport(cl,"V1")
V2 <- unlist(parLapply(cl,1:(n-1), function(i)(nms[(i+1):n])))
clusterExport(cl,"V2")
weight <- unlist(lapply(1:(n-1), function(i) (
sapply((i+1):n, function(j) {
rowx <- Id_GeneNameTwoGenes[i,colnames(Id_GeneNameTwoGenes[i,which(Id_GeneNameTwoGenes[i,] == 1 & colnames(Id_GeneNameTwoGenes) != "ENTREZID")])]
rowy <- Id_GeneNameTwoGenes[j,colnames(Id_GeneNameTwoGenes[j,which(Id_GeneNameTwoGenes[j,] == 1 & colnames(Id_GeneNameTwoGenes) != "ENTREZID")])]
weight2 <- unlist(lapply(1:ncol(rowx), function(k) (
sapply(1:ncol(rowy), function(w) {
sh <- distance.matrix[colnames(rowx[k]),colnames(rowy[w])]
if ( sh != "Inf")
sh
else
NA
})
)))
mean(weight2, na.rm = TRUE)
})
)))
similarity.matrix <- data.frame(source=Id_GeneNameTwoGenes[V1,1],dest=Id_GeneNameTwoGenes[V2,1],weight=weight)
q <- quantile(unlist(similarity.matrix), probs = c(.25, .5, .75))
filter.matrix.25 <- similarity.matrix[which(similarity.matrix$weight >= q[1]),]
filter.matrix.50 <- similarity.matrix[which(similarity.matrix$weight >= q[2]),]
filter.matrix.75 <- similarity.matrix[which(similarity.matrix$weight >= q[3]),]
})
registerDoSEQ()