dot_sankey plot

Hi, I have trouble with dot_sankey plot and need help with my script.

this is my data:

structure(list(Pathway = c("Alpha-amino acid biosynthetic proc. ", 
"Carboxylic acid biosynthetic proc. ", "Organic acid biosynthetic proc. ", 
"Small molecule biosynthetic proc. ", "Carboxylic acid metabolic proc. ", 
"Oxoacid metabolic proc. ", "Organic acid metabolic proc. ", 
"Small molecule metabolic proc. ", "Cellular catabolic proc. ", 
"Catabolic proc. "), GeneRatio = c(0.133333333333333, 0.0668693009118541, 
0.0664652567975831, 0.0558375634517767, 0.0427263479145473, 0.0426164519326065, 
0.0418287937743191, 0.0377066115702479, 0.0324729392173189, 0.0299465240641711
), FDR = c(3.8517556300416, 4.94174664623985, 4.94174664623985, 
5.91926984684473, 5.20765687373531, 5.20765687373531, 5.19008290917224, 
7.78847331776209, 5.91926984684473, 5.20765687373531), GeneID = c("ASNS/CTH/ASS1/PSAT1/CBS/ATP2B4/OAT/PHGDH/MTHFD1/PLOD3", 
"ASNS/CTH/ASS1/ACLY/PSAT1/OXSM/CBS/NA/CASP1/ATP2B4/MTHFD1/KYNU/OLAH/AKR1C3/OAT/PHGDH/PLOD3/HSD17B12/FABP5/ERLIN2/PRMT3/CBR1", 
"ASNS/CTH/ASS1/ACLY/PSAT1/OXSM/CBS/NA/CASP1/ATP2B4/MTHFD1/KYNU/OLAH/AKR1C3/OAT/PHGDH/PLOD3/HSD17B12/FABP5/ERLIN2/PRMT3/CBR1", 
"ASNS/COQ9/COQ5/CTH/COQ6/ASS1/ACLY/PSAT1/OXSM/CBS/FDXR/PC/NA/CASP1/ATP2B4/MTHFD1/KYNU/ERLIN2/OLAH/AKR1C3/OAT/PHGDH/PLOD3/ALDOC/HSD17B12/ADK/FABP5/PTPN2/PRMT3/SIRT5/AACS/ACSS3/CBR1", 
"ACAA1/OAT/ASNS/ARG2/QPRT/FAH/ALDOC/KYNU/CTH/ASS1/ACLY/HSD17B4/PSAT1/IDH1/EPHX1/OXSM/CBS/PC/AKR1C3/NA/DLD/CASP1/ATP2B4/MTHFD1/ALDH1L2/OLAH/FOXK1/ACAD9/GPX1/LYPLA2/AACS/PHGDH/PLOD3/SLC27A1/HSD17B12/FABP5/GFPT1/NUDT19/ERLIN2/PRMT3/GSS/CBR1", 
"ACAA1/OAT/ASNS/ARG2/QPRT/FAH/ALDOC/KYNU/CTH/ASS1/ACLY/HSD17B4/PSAT1/IDH1/EPHX1/OXSM/CBS/PC/AKR1C3/NA/DLD/CASP1/ATP2B4/MTHFD1/ALDH1L2/OLAH/FOXK1/ACAD9/GPX1/LYPLA2/AACS/PHGDH/PLOD3/SLC27A1/HSD17B12/FABP5/GFPT1/NUDT19/ERLIN2/PRMT3/GSS/ABHD14B/CBR1", 
"ACAA1/OAT/ASNS/ARG2/QPRT/FAH/ALDOC/KYNU/CTH/ASS1/ACLY/HSD17B4/PSAT1/IDH1/EPHX1/OXSM/CBS/PC/AKR1C3/NA/DLD/CASP1/ATP2B4/MTHFD1/ALDH1L2/OLAH/FOXK1/ACAD9/GPX1/LYPLA2/AACS/PHGDH/PLOD3/SLC27A1/HSD17B12/FABP5/GFPT1/NUDT19/ERLIN2/PRMT3/GSS/ABHD14B/CBR1", 
"ACAA1/OAT/ASNS/ARG2/COQ9/MTHFD1/QPRT/FAH/NAMPT/ALDOC/COQ5/KYNU/CTH/COQ6/NUDT10/PPCS/ASS1/ACLY/HSD17B4/PSAT1/IDH1/EPHX1/OXSM/CBS/FLAD1/FDXR/DCXR/PC/IMPDH2/FUCA1/CA13/AKR1C3/NUDT11/GFPT1/NA/DLD/SNX17/CASP1/AIFM2/ATP2B4/DGUOK/ALDH1L2/UCK2/ERLIN2/OLAH/CBR1/FOXK1/ACAD9/GPX1/BAD/LYPLA2/MAT2B/SCARB1/AACS/PHGDH/PLOD3/HDLBP/CAT/SLC27A1/ HSD17B12/ADK/FABP5/PTPN2/NUDT19/BRAT1/PRMT3/SIRT5/DIP2A/ERH/GSS/ACSS3/ABHD14B/NEIL2", 
"ACAA1/OAT/NEDD4/ARG2/DIS3/ACHE/HMOX1/PYGL/TIMP1/QPRT/FAH/UBE2S/SKP1/KYNU/CLU/CAT/NUDT10/PSME3/HSD17B4/STAM/PCYOX1L/CLGN/FBXL18/AGL/ERAP1/FBXO22/TMUB2/GUSB/UBE2L3/LONP1/NUDT11/PSMB10/FIS1/GPX1/NA/HGS/LYPLA2/ATP2B4/GPC1/FAP/DLD/PON2/WFS1/RBM24/OPTN/RBM38/HSPBP1/GSTM3/ALDH1L2/TGFB1I1/ERLIN2/DFFA/CBS/FOXK1/BCL2/TBK1/AKR1C3/SCARB1/SRPX/UCHL3/IDH1/RCN3/EPHX1/NUDT19/STX12/UFM1/ITGB4/CPTP/METTL3/FUCA1/SNX5/BAD/HAX1/PRKCA/NEIL2/MTMR14", 
"LYPLA2/ACAA1/OAT/NEDD4/ARG2/DIS3/ACHE/HMOX1/PYGL/TIMP1/QPRT/FAH/UBE2S/ALDOC/SKP1/KYNU/CLU/CAT/NUDT10/PSME3/HSD17B4/STAM/PCYOX1L/CLGN/FBXL18/AGL/ERAP1/FBXO22/TMUB2/GUSB/FUCA1/UBE2L3/LONP1/NUDT11/PSMB10/FIS1/GPX1/NA/SNX17/HGS/ATP2B4/GPC1/FAP/DLD/PON2/WFS1/RBM24/UCHL3/OPTN/RBM38/HSPBP1/GSTM3/ALDH1L2/TGFB1I1/ERLIN2/DFFA/CBS/FOXK1/BCL2/TBK1/AKR1C3/HSD17B11/BAD/FYN/SCARB1/SRPX/CDK4/IDH1/RCN3/EPHX1/NUDT19/STX12/UFM1/ITGB4/CPTP/METTL3/SNX5/NAGLU/HAX1/PRKCA/NEIL2/MTMR14"
), Count = c(10L, 22L, 22L, 33L, 42L, 43L, 43L, 73L, 78L, 84L
)), class = "data.frame", row.names = c(NA, -10L))
                               Pathway  GeneRatio      FDR
1 Alpha-amino acid biosynthetic proc.  0.13333333 3.851756
2  Carboxylic acid biosynthetic proc.  0.06686930 4.941747
3     Organic acid biosynthetic proc.  0.06646526 4.941747
4   Small molecule biosynthetic proc.  0.05583756 5.919270
5     Carboxylic acid metabolic proc.  0.04272635 5.207657
6             Oxoacid metabolic proc.  0.04261645 5.207657
                                                                                                                                                                                                                                                GeneID
1                                                                                                                                                                                                ASNS/CTH/ASS1/PSAT1/CBS/ATP2B4/OAT/PHGDH/MTHFD1/PLOD3
2                                                                                                                           ASNS/CTH/ASS1/ACLY/PSAT1/OXSM/CBS/NA/CASP1/ATP2B4/MTHFD1/KYNU/OLAH/AKR1C3/OAT/PHGDH/PLOD3/HSD17B12/FABP5/ERLIN2/PRMT3/CBR1
3                                                                                                                           ASNS/CTH/ASS1/ACLY/PSAT1/OXSM/CBS/NA/CASP1/ATP2B4/MTHFD1/KYNU/OLAH/AKR1C3/OAT/PHGDH/PLOD3/HSD17B12/FABP5/ERLIN2/PRMT3/CBR1
4                                                                   ASNS/COQ9/COQ5/CTH/COQ6/ASS1/ACLY/PSAT1/OXSM/CBS/FDXR/PC/NA/CASP1/ATP2B4/MTHFD1/KYNU/ERLIN2/OLAH/AKR1C3/OAT/PHGDH/PLOD3/ALDOC/HSD17B12/ADK/FABP5/PTPN2/PRMT3/SIRT5/AACS/ACSS3/CBR1
5         ACAA1/OAT/ASNS/ARG2/QPRT/FAH/ALDOC/KYNU/CTH/ASS1/ACLY/HSD17B4/PSAT1/IDH1/EPHX1/OXSM/CBS/PC/AKR1C3/NA/DLD/CASP1/ATP2B4/MTHFD1/ALDH1L2/OLAH/FOXK1/ACAD9/GPX1/LYPLA2/AACS/PHGDH/PLOD3/SLC27A1/HSD17B12/FABP5/GFPT1/NUDT19/ERLIN2/PRMT3/GSS/CBR1
6 ACAA1/OAT/ASNS/ARG2/QPRT/FAH/ALDOC/KYNU/CTH/ASS1/ACLY/HSD17B4/PSAT1/IDH1/EPHX1/OXSM/CBS/PC/AKR1C3/NA/DLD/CASP1/ATP2B4/MTHFD1/ALDH1L2/OLAH/FOXK1/ACAD9/GPX1/LYPLA2/AACS/PHGDH/PLOD3/SLC27A1/HSD17B12/FABP5/GFPT1/NUDT19/ERLIN2/PRMT3/GSS/ABHD14B/CBR1
  Count
1    10
2    22
3    22
4    33
5    42
6    43

and this is my code:
install.packages("ggplot2")
install.packages("dplyr")
install.packages("ggalluvial") # For Sankey-like plots
install.packages("tidyr") # For unnest function

library(dplyr) # For data manipulation
library(tidyr) # For separate_rows function

data <- read.csv(file = "C:/Users/Sofia/Downloads/data.csv", sep=";")

sankey_dot <- function(data,
sankey.text.size = 3,
sankey.x.axis.text.position = 1,
sankey.x.axis.text.size = 12,
molecule.label = "GeneID",
dot.color = "Count",
dot.position = 6,
dot.label = "Pathway",
dot.text.size = 12,
dot.scale = 0.72,
dot.x = 0.59,
dot.y = -0.173,
dot.width = 0.48,
dot.height = 1.33,
...) {

Data collation after KEGG or GO enrichment analysis

data3 <- data %>%
dplyr::select("GeneID", "Pathway", "FDR", "Count", "FoldEnrichment") %>%
dplyr::distinct() %>%
dplyr::arrange(FDR, descreasing = TRUE)
data3$FoldEnrichment <- data3$FoldEnrichment %>% round(digits = 2)

bubble chart

if (dot.label == "Pathway") {
p1 <- ggplot(
data = data3,
aes(
x = FoldEnrichment,
y = stats::reorder(Pathway, FoldEnrichment)
)
) +
geom_point(aes(size = Count, color = -log10(FDR))) +
scale_size_continuous(range = c(2, 6)) +
theme_bw() +
theme(
axis.text.x = element_text(
size = "Count",
colour = "black", vjust = 1
),
axis.text.y = element_text(
size = "Count",
colour = "black", hjust = 1
)
) +
scale_colour_distiller(palette = dot.color, direction = -1) +
labs(x = "FoldEnrichment", y = "") +
theme_bw() +
theme(
axis.title = element_text(size = 12),
axis.text = element_text(size = 11),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8)
)
} else if (dot.label == "GeneID") {
p1 <- ggplot(
data = data3,
aes(
x = FoldEnrichment,
y = stats::reorder(GeneID, FoldEnrichment)
)
) +
geom_point(aes(size = Count, color = -log10(FDR))) +
scale_size_continuous(range = c(2, 6)) +
theme_bw() +
theme(
axis.text.x = element_text(
size = "Count",
colour = "black", vjust = 1
),
axis.text.y = element_text(
size = "Count",
colour = "black", hjust = 1
)
) +
scale_colour_distiller(palette = dot.color, direction = -1) +
labs(x = "FoldEnrichment", y = "") +
theme_bw() +
theme(
axis.title = element_text(size = 12),
axis.text = element_text(size = 11),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8)
)
}

data_sankey <- data %>%
dplyr::arrange(FoldEnrichment, descreasing = FALSE) %>%
dplyr::select(herb, molecule_id, target, dot.label) %>%
as.data.frame() %>%
dplyr::distinct()

df <- data_sankey %>% ggsankey::make_long(colnames(data_sankey))

if (dot.label == "Description" && molecule.label == "molecule_id") {
df$node <- factor(df$node, levels = c(
data_sankey$Description %>% unique(),
data_sankey$target %>% unique(),
data_sankey$molecule_id %>% unique(),
data_sankey$herb %>% unique()
))
} else if (dot.lable == "ID" && molecule.lable == "molecule_id") {
df$node <- factor(df$node, levels = c(
data_sankey$ID %>% unique(),
data_sankey$target %>% unique(),
data_sankey$molecule_id %>% unique(),
data_sankey$herb %>% unique()
))
} else if (dot.lable == "Description" && molecule.lable == "molecule") {
df$node <- factor(df$node, levels = c(
data_sankey$Description %>% unique(),
data_sankey$target %>% unique(),
data_sankey$molecule_id %>% unique(),
data_sankey$herb %>% unique()
))
} else if (dot.lable == "ID" && molecule.lable == "molecule") {
df$node <- factor(df$node, levels = c(
data_sankey$ID %>% unique(),
data_sankey$target %>% unique(),
data_sankey$molecule %>% unique(),
data_sankey$herb %>% unique()
))
}

color settings

mycol <- cols4all::c4a("rainbow_wh_rd", length(unique(df$node)))
mycol2 <- sample(mycol, length(mycol))

Sankey diagram

p2 <- ggplot(df, aes(
x = x,
next_x = next_x,
node = node,
next_node = next_node,
fill = node,
label = node
)) +
ggsankey::geom_sankey(
flow.alpha = 0.5,
node.color = NA,
show.legend = FALSE
) +
ggsankey::geom_sankey_text(
size = sankey.text.size,
color = "black",
hjust = 1,
position = position_nudge(x = -0.05)
) +
ggsankey::theme_sankey(base_size = 20, base_family = "sans") +
scale_fill_manual(values = mycol2) +
theme(axis.title = element_blank()) +
theme(axis.text.x = element_text(
size = sankey.x.axis.text.size,
hjust = sankey.x.axis.text.position,
vjust = 10, colour = "black"
))
p3 <- p2 + theme(plot.margin = unit(c(0, dot.position, 0, 0), units = "cm"))
p4 <- cowplot::ggdraw() +
cowplot::draw_plot(p3) +
cowplot::draw_plot(p1,
scale = dot.scale,
x = dot.x, y = dot.y,
width = dot.width,
height = dot.height
)
return(p4)
}

Can anyone help me to have an input like this?

thank you

This topic was automatically closed 90 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.