Sankey using NetworkD3

newtoRguy · September 18, 2024, 10:58pm

Hi R coders,
I'm no R expert.
I am using NetworkD3 to create a sankey diagram. The codes run perfectly well but i'd like to add nodes percentages. Denominator would be the total of a column (month) and numerator would be the number in each node of the month. I tried everything but it doesn't seem to work.
Would really appreciate your help. My codes can be found below.

### install R packages
# install.packages("dplyr","tidyr","readr",sjmisc","sjlabelled",
"networkD3", "ggplot2", "lubridate", "RColorBrewer", "extrafont")

### Load R packages
library(dplyr)
library(tibble)
library(lubridate)
library(sjlabelled)
library(tidyr)
library(networkD3)
library(readr)
library(sjmisc)
library(ggplot2)
library(RColorBrewer)
library(extrafont)
library(stringr)
library(haven)
library (htmlwidgets)
library(timevis)
library(lifecycle)

setwd("/_PHSR/DQ_FDA_OUD/PI/Sankey")

glimpse(rename_all13months_9mo)

raw_data2 <- rename_all13months_9mo %>% 
  gather(key, value, -ENROLID, -newmonth1) %>%  
  unite(new.col, c(key, newmonth1)) %>%   
  spread(new.col, value) 


character <- c("A")
character_1 <- paste0(character, sep= '_1')

sankey_flow_addnum <-raw_data2 %>%
  mutate(newtreat_M1=case_when(newtreat_M1 != ""~paste0(newtreat_M1,sep='_01')),
         newtreat_M2=case_when(newtreat_M2 != ""~paste0(newtreat_M2, sep='_02')),
         newtreat_M3=case_when(newtreat_M3 != ""~paste0(newtreat_M3, sep='_03')),
         newtreat_M4=case_when(newtreat_M4 != ""~paste0(newtreat_M4, sep='_04')),
         newtreat_M5=case_when(newtreat_M5 != ""~paste0(newtreat_M5, sep='_05')),
         newtreat_M6=case_when(newtreat_M6 != ""~paste0(newtreat_M6, sep='_06')),
         newtreat_M7=case_when(newtreat_M7 != ""~paste0(newtreat_M7, sep='_07')),
         newtreat_M8=case_when(newtreat_M8 != ""~paste0(newtreat_M8, sep='_08')),
         newtreat_M9=case_when(newtreat_M9 != ""~paste0(newtreat_M9, sep='_09')),
         newtreat_M10=case_when(newtreat_M10 != ""~paste0(newtreat_M10, sep='_10')))

sankey_flow_freq <- sankey_flow_addnum %>%
  
  group_by(newtreat_M1,newtreat_M2,newtreat_M3, newtreat_M4, newtreat_M5, newtreat_M6, newtreat_M7, newtreat_M8, newtreat_M9, newtreat_M10) %>%
  summarise(n=n()) %>%
  ungroup()


fil_data <- sankey_flow_freq[sankey_flow_freq$newtreat_M1 == "BUP_01", ]
print(fil_data)


Newdata_11more <- fil_data[fil_data$n >= 11, ]


newtreat_M1_M2 <- Newdata_11more %>%
  select(IN=1, Out=2,11)
newtreat_M2_M3 <- Newdata_11more %>%
  select(IN=2, Out=3,11)
newtreat_M3_M4 <- Newdata_11more %>%
  select(IN=3, Out=4,11)
newtreat_M4_M5 <- Newdata_11more %>%
  select(IN=4, Out=5,11)
newtreat_M5_M6 <- Newdata_11more %>%
  select(IN=5, Out=6,11)
newtreat_M6_M7 <- Newdata_11more %>%
  select(IN=6, Out=7,11)
newtreat_M7_M8 <- Newdata_11more %>%
  select(IN=7, Out=8,11)
newtreat_M8_M9 <- Newdata_11more %>%
  select(IN=8, Out=9,11)
newtreat_M9_M10 <- Newdata_11more %>%
  select(IN=9, Out=10,11)


Sankey_plot_data <- rbind(newtreat_M1_M2, newtreat_M2_M3, newtreat_M3_M4, newtreat_M4_M5, newtreat_M5_M6, newtreat_M6_M7, newtreat_M7_M8, newtreat_M8_M9, newtreat_M9_M10)
group_by(IN,Out) %>%
  summarise(Freq=sum(n)) %>%
  ungroup()

houshki <- Sankey_plot_data[Sankey_plot_data$IN != "Dropout_02" & Sankey_plot_data$IN != "Dropout_03" & Sankey_plot_data$IN != "Dropout_04" & Sankey_plot_data$IN != "Dropout_05" & Sankey_plot_data$IN != "Dropout_06" & Sankey_plot_data$IN != "Dropout_07" & Sankey_plot_data$IN != "Dropout_08" & Sankey_plot_data$IN != "Dropout_09" & Sankey_plot_data$IN != "Dropout_10" & Sankey_plot_data$IN != "Dropout_11" & Sankey_plot_data$IN != "Dropout_12" & Sankey_plot_data$IN != "Dropout_13" & Sankey_plot_data$IN != "Dropout_14", ]


nodes <- houshki %>%
  select(IN, Out) %>%
  pivot_longer(c("IN", "Out"), names_to = "col_name",
               values_to="name_match") %>%
  select(-1) %>% distinct() %>%
  mutate(name=str_sub(name_match, end=-4))

nodes <- data.frame(nodes)

sankey_plot_id <- houshki  %>%
  mutate(IDIn =match(IN, nodes$name_match)-1,
         IDout= match(Out, nodes$name_match)-1)

sankey_plot_id <- data.frame(sankey_plot_id)


newda <- sankey_plot_id %>%
  mutate(nam= case_when(IN == "BHV_01" ~ "BHV",
                        IN == "BHV_02" ~ "BHV",
                        IN == "BHV_03" ~ "BHV",
                        IN == "BHV_04" ~ "BHV",
                        IN == "BHV_05" ~ "BHV",
                        IN == "BHV_06" ~ "BHV",
                        IN == "BHV_07" ~ "BHV",
                        IN == "BHV_08" ~ "BHV",
                        IN == "BHV_09" ~ "BHV",
                        IN == "BHV_10" ~ "BHV",
                        IN == "BHV_11" ~ "BHV",
                        IN == "BHV_12" ~ "BHV",
                        IN == "BHV_13" ~ "BHV",
                        IN == "BUP,BHV_01" ~ "BUP,BHV",
                        IN == "BUP,BHV_02" ~ "BUP,BHV",
                        IN == "BUP,BHV_03" ~ "BUP,BHV",
                        IN == "BUP,BHV_04" ~ "BUP,BHV",
                        IN == "BUP,BHV_05" ~ "BUP,BHV",
                        IN == "BUP,BHV_06" ~ "BUP,BHV",
                        IN == "BUP,BHV_07" ~ "BUP,BHV",
                        IN == "BUP,BHV_08" ~ "BUP,BHV",
                        IN == "BUP,BHV_09" ~ "BUP,BHV",
                        IN == "BUP,BHV_10" ~ "BUP,BHV",
                        IN == "BUP,BHV_11" ~ "BUP,BHV",
                        IN == "BUP,BHV_12" ~ "BUP,BHV",
                        IN == "BUP,BHV_13" ~ "BUP,BHV",
                        IN == "BUP_01" ~ "BUP",
                        IN == "BUP_02" ~ "BUP",
                        IN == "BUP_03" ~ "BUP",
                        IN == "BUP_04" ~ "BUP",
                        IN == "BUP_05" ~ "BUP",
                        IN == "BUP_06" ~ "BUP",
                        IN == "BUP_07" ~ "BUP",
                        IN == "BUP_08" ~ "BUP",
                        IN == "BUP_09" ~ "BUP",
                        IN == "BUP_10" ~ "BUP",
                        IN == "BUP_11" ~ "BUP",
                        IN == "BUP_12" ~ "BUP",
                        IN == "BUP_13" ~ "BUP",
                        IN == "No treatment_01" ~ "No treatment",
                        IN == "No treatment_02" ~ "No treatment",
                        IN == "No treatment_03" ~ "No treatment",
                        IN == "No treatment_04" ~ "No treatment",
                        IN == "No treatment_05" ~ "No treatment",
                        IN == "No treatment_06" ~ "No treatment",
                        IN == "No treatment_07" ~ "No treatment",
                        IN == "No treatment_08" ~ "No treatment",
                        IN == "No treatment_09" ~ "No treatment",
                        IN == "No treatment_10" ~ "No treatment",
                        IN == "No treatment_11" ~ "No treatment",
                        IN == "No treatment_12" ~ "No treatment",
                        IN == "No treatment_13" ~ "No treatment",
                        IN == "NTX_01" ~ "NTX",
                        IN == "NTX_02" ~ "NTX",
                        IN == "NTX_03" ~ "NTX",
                        IN == "NTX_04" ~ "NTX",
                        IN == "NTX_05" ~ "NTX",
                        IN == "NTX_06" ~ "NTX",
                        IN == "NTX_07" ~ "NTX",
                        IN == "NTX_08" ~ "NTX",
                        IN == "NTX_09" ~ "NTX",
                        IN == "NTX_10" ~ "NTX",
                        IN == "NTX_11" ~ "NTX",
                        IN == "NTX_12" ~ "NTX",
                        IN == "NTX_13" ~ "NTX",
                        IN == "NTX,BHV_01" ~ "NTX,BHV",
                        IN == "NTX,BHV_02" ~ "NTX,BHV",
                        IN == "NTX,BHV_03" ~ "NTX,BHV",
                        IN == "NTX,BHV_04" ~ "NTX,BHV",
                        IN == "NTX,BHV_05" ~ "NTX,BHV",
                        IN == "NTX,BHV_06" ~ "NTX,BHV",
                        IN == "NTX,BHV_07" ~ "NTX,BHV",
                        IN == "NTX,BHV_08" ~ "NTX,BHV",
                        IN == "NTX,BHV_09" ~ "NTX,BHV",
                        IN == "NTX,BHV_10" ~ "NTX,BHV",
                        IN == "NTX,BHV_11" ~ "NTX,BHV",
                        IN == "NTX,BHV_12" ~ "NTX,BHV",
                        IN == "NTX,BHV_13" ~ "NTX,BHV",
                        IN == "dropout_01" ~ "dropout",
                        IN == "dropout_02" ~ "dropout",
                        IN == "dropout_03" ~ "dropout",
                        IN == "dropout_04" ~ "dropout",
                        IN == "dropout_05" ~ "dropout",
                        IN == "dropout_06" ~ "dropout",
                        IN == "dropout_07" ~ "dropout",
                        IN == "dropout_08" ~ "dropout",
                        IN == "dropout_09" ~ "dropout",
                        IN == "dropout_10" ~ "dropout",
                        IN == "dropout_11" ~ "dropout",
                        IN == "dropout_12" ~ "dropout",
                        IN == "dropout_13" ~ "dropout",
                        IN == "MTD+/-BHV_01" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_02" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_03" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_04" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_05" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_06" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_07" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_08" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_09" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_10" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_11" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_12" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_13" ~ "MTD+/-BHV",
                        IN == "Other_01" ~ "Other",
                        IN == "Other_02" ~ "Other",
                        IN == "Other_03" ~ "Other",
                        IN == "Other_04" ~ "Other",
                        IN == "Other_05" ~ "Other",
                        IN == "Other_06" ~ "Other",
                        IN == "Other_07" ~ "Other",
                        IN == "Other_08" ~ "Other",
                        IN == "Other_09" ~ "Other",
                        IN == "Other_10" ~ "Other",
                        IN == "Other_11" ~ "Other",
                        IN == "Other_12" ~ "Other",
                        IN == "Other_13" ~ "Other"
                        
  ))


glimpse(newda)


sankeyNetwork(Links = newda, Nodes = nodes,
              Source = "IDIn", Target = "IDout",
              Value = "n", NodeID ="name", fontSize = 15, nodeWidth = 15, 
              nodePadding = 25, LinkGroup = "nam",
              sinksRight = FALSE)

sn <- sankeyNetwork(Links = newda, Nodes = nodes,
                    Source = "IDIn", Target = "IDout",
                    Value = "n", NodeID ="name", fontSize = 15, nodeWidth = 15, 
                    nodePadding = 25, LinkGroup = "nam",
                    sinksRight = FALSE)






p <- htmlwidgets::onRender(sn, '
  function(el) { 
    var cols_x = this.sankey.nodes().map(d => d.x).filter((v, i, a) => a.indexOf(v) === i).sort(function(a, b){return a - b});
       var labels = ["Assignment period, n=601,347", "Month 1, n=595,757", "Month 2, n=590,899", "Month 3, n=577,097" ]

    cols_x.forEach((d, i) => {
      d3.select(el).select("svg")
        .append("text")
        .attr("x", d)
        .attr("y", 12)
        .text(labels[i]);
    })
  }
')

onRender(
  p,
  '
  function(el, x) {
    d3.selectAll(".node text").attr("text-anchor", "begin").attr("x", 20);
  }
  '
)

cjyetman · September 19, 2024, 2:18am

Are you trying to calculate the percentages? It's hard to see exactly what your data looks like, but it seems like it might look like this, and if so, you could calculate the percentages like this...

sankey_plot_id <- 
  tibble::tribble(
  ~IN,               ~Out,              ~Freq,
  "BUP_01",          "No_treatment_02", 4,
  "BUP_01",          "BUP_02",          4,
  "BUP_01",          "BHV_02",          4,
  "BUP_01",          "Dropout_02",      1,
  "No_treatment_02", "No_treatment_03", 1,
  "No_treatment_02", "Dropout_03",      1,
  "No_treatment_02", "BUP_03",          1,
  "No_treatment_02", "BHV_03",          1,
  "BUP_02",          "No_treatment_03", 1,
  "BUP_02",          "Dropout_03",      1,
  "BUP_02",          "BUP_03",          1,
  "BUP_02",          "BHV_03",          1,
  "BHV_02",          "No_treatment_03", 1,
  "BHV_02",          "Dropout_03",      1,
  "BHV_02",          "BUP_03",          1,
  "BHV_02",          "BHV_03",          1,
  "BUP_03",          "Dropout_04",      3,
  "No_treatment_03", "Dropout_04",      3,
  "BHV_03",          "Dropout_04",      3
  )

nodes <- data.frame(node = unique(c(sankey_plot_id$IN, sankey_plot_id$Out)))
nodes$group <- sub("_[0-9]$", "", nodes$node)

sankey_plot_id$IDIn <- match(sankey_plot_id$IN, nodes$node) - 1
sankey_plot_id$IDout <- match(sankey_plot_id$Out, nodes$node) - 1

sankey_plot_id
#> # A tibble: 19 × 5
#>    IN              Out              Freq  IDIn IDout
#>    <chr>           <chr>           <dbl> <dbl> <dbl>
#>  1 BUP_01          No_treatment_02     4     0     1
#>  2 BUP_01          BUP_02              4     0     2
#>  3 BUP_01          BHV_02              4     0     3
#>  4 BUP_01          Dropout_02          1     0     7
#>  5 No_treatment_02 No_treatment_03     1     1     5
#>  6 No_treatment_02 Dropout_03          1     1     8
#>  7 No_treatment_02 BUP_03              1     1     4
#>  8 No_treatment_02 BHV_03              1     1     6
#>  9 BUP_02          No_treatment_03     1     2     5
#> 10 BUP_02          Dropout_03          1     2     8
#> 11 BUP_02          BUP_03              1     2     4
#> 12 BUP_02          BHV_03              1     2     6
#> 13 BHV_02          No_treatment_03     1     3     5
#> 14 BHV_02          Dropout_03          1     3     8
#> 15 BHV_02          BUP_03              1     3     4
#> 16 BHV_02          BHV_03              1     3     6
#> 17 BUP_03          Dropout_04          3     4     9
#> 18 No_treatment_03 Dropout_04          3     5     9
#> 19 BHV_03          Dropout_04          3     6     9

library(dplyr)

sankey_plot_id %>% 
  summarise(total = sum(Freq), .by = IN) %>% 
  mutate(month = sub(".*([0-9])$", "\\1", IN)) %>% 
  mutate(month_ttl = sum(total), .by = month) %>% 
  mutate(percentage = total / month_ttl)
#> # A tibble: 7 × 5
#>   IN              total month month_ttl percentage
#>   <chr>           <dbl> <chr>     <dbl>      <dbl>
#> 1 BUP_01             13 1            13      1    
#> 2 No_treatment_02     4 2            12      0.333
#> 3 BUP_02              4 2            12      0.333
#> 4 BHV_02              4 2            12      0.333
#> 5 BUP_03              3 3             9      0.333
#> 6 No_treatment_03     3 3             9      0.333
#> 7 BHV_03              3 3             9      0.333

newtoRguy · September 19, 2024, 2:25am

Hi! Thanks for your help. Here's what "newda" looks like... For column 1, the % should be 100% since there is only 1 group. For group 2, the denominator should be the sum of all nodes in column 2, and the numerator the number of people in the node... I still dont know how to do that.

cjyetman · September 19, 2024, 2:57am

that's what I showed you already , in the percentage column

sankey_plot_id %>% 
  summarise(total = sum(Freq), .by = IN) %>% 
  mutate(month = sub(".*([0-9])$", "\\1", IN)) %>% 
  mutate(month_ttl = sum(total), .by = month) %>% 
  mutate(percentage = total / month_ttl)
#> # A tibble: 7 × 5
#>   IN              total month month_ttl percentage
#>   <chr>           <dbl> <chr>     <dbl>      <dbl>
#> 1 BUP_01             13 1            13      1    
#> 2 No_treatment_02     4 2            12      0.333
#> 3 BUP_02              4 2            12      0.333
#> 4 BHV_02              4 2            12      0.333
#> 5 BUP_03              3 3             9      0.333
#> 6 No_treatment_03     3 3             9      0.333
#> 7 BHV_03              3 3             9      0.333

cjyetman · September 19, 2024, 3:01am

or, given the structure of newda...

newda %>% 
  summarise(total = sum(n), .by = IN) %>% 
  mutate(month = sub(".*([0-9])$", "\\1", IN)) %>% 
  mutate(month_ttl = sum(total), .by = month) %>% 
  mutate(percentage = total / month_ttl)

newtoRguy · September 19, 2024, 3:02am

Thanks!

Yes, this code gave me the correct calculation. I need to integrate them into the sankey... How do i do that? Here's what i have.

glimpse(newda)

newda %>%
summarise(total = sum(n), .by = IN) %>%
mutate(month = sub(".*([0-9])$", "\1", IN)) %>%
mutate(month_ttl = sum(total), .by = month) %>%
mutate(percentage = total / month_ttl * 100)

sankeyNetwork(Links = newda, Nodes = nodes,
Source = "IDIn", Target = "IDout",
Value = "n", NodeID ="name", fontSize = 15, nodeWidth = 15,
nodePadding = 25, LinkGroup = "nam",
sinksRight = FALSE)

sn <- sankeyNetwork(Links = newda, Nodes = nodes,
Source = "IDIn", Target = "IDout",
Value = "n", NodeID ="name", fontSize = 15, nodeWidth = 15,
nodePadding = 25, LinkGroup = "nam",
sinksRight = FALSE)

p <- htmlwidgets::onRender(sn, '
function(el) {
var cols_x = this.sankey.nodes().map(d => d.x).filter((v, i, a) => a.indexOf(v) === i).sort(function(a, b){return a - b});
var labels = ["Assignment period, n=601,347", "Month 1, n=595,757", "Month 2, n=590,899", "Month 3, n=577,097" ]

cols_x.forEach((d, i) => {
  d3.select(el).select("svg")
    .append("text")
    .attr("x", d)
    .attr("y", 12)
    .text(labels[i]);
})

}
')

onRender(
p,
'
function(el, x) {
d3.selectAll(".node text").attr("text-anchor", "begin").attr("x", 20);
}
'
)

cjyetman · September 19, 2024, 3:17am

library(dplyr)

newda <- 
  tibble::tribble(
    ~IN,               ~Out,              ~n,
    "BUP_01",          "No_treatment_02", 4,
    "BUP_01",          "BUP_02",          4,
    "BUP_01",          "BHV_02",          4,
    "BUP_01",          "Dropout_02",      1,
    "No_treatment_02", "No_treatment_03", 1,
    "No_treatment_02", "Dropout_03",      1,
    "No_treatment_02", "BUP_03",          1,
    "No_treatment_02", "BHV_03",          1,
    "BUP_02",          "No_treatment_03", 1,
    "BUP_02",          "Dropout_03",      1,
    "BUP_02",          "BUP_03",          1,
    "BUP_02",          "BHV_03",          1,
    "BHV_02",          "No_treatment_03", 1,
    "BHV_02",          "Dropout_03",      1,
    "BHV_02",          "BUP_03",          1,
    "BHV_02",          "BHV_03",          1,
    "BUP_03",          "Dropout_04",      3,
    "No_treatment_03", "Dropout_04",      3,
    "BHV_03",          "Dropout_04",      3
  )

nodes <- data.frame(node = unique(c(newda$IN, newda$Out)))

newda$IDIn <- match(newda$IN, nodes$node) - 1
newda$IDout <- match(newda$Out, nodes$node) - 1
newda$nam <- sub("_[0-9]$", "", newda$IN)

percentages <-
  newda %>% 
  summarise(total = sum(n), .by = IN) %>% 
  mutate(month = sub(".*([0-9])$", "\\1", IN)) %>% 
  mutate(month_ttl = sum(total), .by = month) %>% 
  mutate(percentage = total / month_ttl) %>% 
  select(IN, percentage)

nodes <-
  nodes %>% 
  left_join(percentages, by = c(node = "IN")) %>% 
  mutate(node_name = sub("_[0-9]{2}$", "", node)) %>% 
  mutate(node_label = trimws(paste(node_name, " ", ifelse(is.na(percentage), "", percentage))))

networkD3::sankeyNetwork(
  Links = newda,
  Nodes = nodes,
  Source = "IDIn",
  Target = "IDout",
  Value = "n",
  NodeID ="node_label",
  LinkGroup = "nam",
  sinksRight = FALSE
)
#> Links is a tbl_df. Converting to a plain data frame.

^{Created on 2024-09-19 with reprex v2.1.1}

newtoRguy · September 19, 2024, 3:29am

Thanks! It worked, but with one problem. I need to keep a percentage for the "dropout" group to show. It should have its own numerator, contribute to the denominator, and the denominator would be that of the column its in. I guess the reason it doesn't show is because it's not in the "IN". Thanks again for your help.

cjyetman · September 19, 2024, 10:22am

library(dplyr)

newda <- 
  tibble::tribble(
    ~IN,               ~Out,              ~n,
    "BUP_01",          "No_treatment_02", 4,
    "BUP_01",          "BUP_02",          4,
    "BUP_01",          "BHV_02",          4,
    "BUP_01",          "Dropout_02",      1,
    "No_treatment_02", "No_treatment_03", 1,
    "No_treatment_02", "Dropout_03",      1,
    "No_treatment_02", "BUP_03",          1,
    "No_treatment_02", "BHV_03",          1,
    "BUP_02",          "No_treatment_03", 1,
    "BUP_02",          "Dropout_03",      1,
    "BUP_02",          "BUP_03",          1,
    "BUP_02",          "BHV_03",          1,
    "BHV_02",          "No_treatment_03", 1,
    "BHV_02",          "Dropout_03",      1,
    "BHV_02",          "BUP_03",          1,
    "BHV_02",          "BHV_03",          1,
    "BUP_03",          "Dropout_04",      3,
    "No_treatment_03", "Dropout_04",      3,
    "BHV_03",          "Dropout_04",      3
  )

nodes <- data.frame(node = unique(c(newda$IN, newda$Out)))

newda$IDIn <- match(newda$IN, nodes$node) - 1
newda$IDout <- match(newda$Out, nodes$node) - 1
newda$nam <- sub("_[0-9]{2}$", "", newda$IN)

percentages <-
  newda %>% 
  select(IN, Out, n) %>% 
  tidyr::pivot_longer(cols = c(IN, Out), names_to = "type", values_to = "node") %>% 
  summarise(total = sum(n), .by = c("node", "type")) %>% 
  mutate(month = as.numeric(sub(".*([0-9])$", "\\1", node))) %>% 
  slice_head(n = 1, by = c(month, node)) %>% 
  mutate(month_ttl = sum(total), .by = month) %>% 
  mutate(percentage = total / month_ttl) %>% 
  select(node, percentage)

nodes <-
  nodes %>% 
  left_join(percentages, by = "node") %>% 
  mutate(node_name = sub("_[0-9]{2}$", "", node)) %>% 
  mutate(node_label = trimws(paste(node_name, " ", ifelse(is.na(percentage), "", percentage))))

networkD3::sankeyNetwork(
  Links = newda,
  Nodes = nodes,
  Source = "IDIn",
  Target = "IDout",
  Value = "n",
  NodeID ="node_label",
  LinkGroup = "nam",
  sinksRight = FALSE
)

newtoRguy · September 19, 2024, 5:41pm

Thank you for your amazing help, it finally worked out. One problem is that there is a "space" between the word and the comma in the sankey, and i want them to be attached (e.g. BUP, 100% NOT BUP , 100%). I tried making adjustments but the colors of the links are changing and no longer that of the node before it. Here's the code. Thanks again.

newda <- sankey_plot_id %>%
mutate(nam= case_when(IN == "BHV_01" ~ "BHV",
IN == "BHV_02" ~ "BHV",
IN == "BHV_03" ~ "BHV",
IN == "BHV_04" ~ "BHV",
IN == "BHV_05" ~ "BHV",
IN == "BHV_06" ~ "BHV",
IN == "BHV_07" ~ "BHV",
IN == "BHV_08" ~ "BHV",
IN == "BHV_09" ~ "BHV",
IN == "BHV_10" ~ "BHV",
IN == "BHV_11" ~ "BHV",
IN == "BHV_12" ~ "BHV",
IN == "BHV_13" ~ "BHV",
IN == "BUP,BHV_01" ~ "BUP,BHV",
IN == "BUP,BHV_02" ~ "BUP,BHV",
IN == "BUP,BHV_03" ~ "BUP,BHV",
IN == "BUP,BHV_04" ~ "BUP,BHV",
IN == "BUP,BHV_05" ~ "BUP,BHV",
IN == "BUP,BHV_06" ~ "BUP,BHV",
IN == "BUP,BHV_07" ~ "BUP,BHV",
IN == "BUP,BHV_08" ~ "BUP,BHV",
IN == "BUP,BHV_09" ~ "BUP,BHV",
IN == "BUP,BHV_10" ~ "BUP,BHV",
IN == "BUP,BHV_11" ~ "BUP,BHV",
IN == "BUP,BHV_12" ~ "BUP,BHV",
IN == "BUP,BHV_13" ~ "BUP,BHV",
IN == "BUP_01" ~ "BUP",
IN == "BUP_02" ~ "BUP",
IN == "BUP_03" ~ "BUP",
IN == "BUP_04" ~ "BUP",
IN == "BUP_05" ~ "BUP",
IN == "BUP_06" ~ "BUP",
IN == "BUP_07" ~ "BUP",
IN == "BUP_08" ~ "BUP",
IN == "BUP_09" ~ "BUP",
IN == "BUP_10" ~ "BUP",
IN == "BUP_11" ~ "BUP",
IN == "BUP_12" ~ "BUP",
IN == "BUP_13" ~ "BUP",
IN == "No treatment_01" ~ "No treatment",
IN == "No treatment_02" ~ "No treatment",
IN == "No treatment_03" ~ "No treatment",
IN == "No treatment_04" ~ "No treatment",
IN == "No treatment_05" ~ "No treatment",
IN == "No treatment_06" ~ "No treatment",
IN == "No treatment_07" ~ "No treatment",
IN == "No treatment_08" ~ "No treatment",
IN == "No treatment_09" ~ "No treatment",
IN == "No treatment_10" ~ "No treatment",
IN == "No treatment_11" ~ "No treatment",
IN == "No treatment_12" ~ "No treatment",
IN == "No treatment_13" ~ "No treatment",
IN == "NTX_01" ~ "NTX",
IN == "NTX_02" ~ "NTX",
IN == "NTX_03" ~ "NTX",
IN == "NTX_04" ~ "NTX",
IN == "NTX_05" ~ "NTX",
IN == "NTX_06" ~ "NTX",
IN == "NTX_07" ~ "NTX",
IN == "NTX_08" ~ "NTX",
IN == "NTX_09" ~ "NTX",
IN == "NTX_10" ~ "NTX",
IN == "NTX_11" ~ "NTX",
IN == "NTX_12" ~ "NTX",
IN == "NTX_13" ~ "NTX",
IN == "NTX,BHV_01" ~ "NTX,BHV",
IN == "NTX,BHV_02" ~ "NTX,BHV",
IN == "NTX,BHV_03" ~ "NTX,BHV",
IN == "NTX,BHV_04" ~ "NTX,BHV",
IN == "NTX,BHV_05" ~ "NTX,BHV",
IN == "NTX,BHV_06" ~ "NTX,BHV",
IN == "NTX,BHV_07" ~ "NTX,BHV",
IN == "NTX,BHV_08" ~ "NTX,BHV",
IN == "NTX,BHV_09" ~ "NTX,BHV",
IN == "NTX,BHV_10" ~ "NTX,BHV",
IN == "NTX,BHV_11" ~ "NTX,BHV",
IN == "NTX,BHV_12" ~ "NTX,BHV",
IN == "NTX,BHV_13" ~ "NTX,BHV",
IN == "dropout_01" ~ "dropout",
IN == "dropout_02" ~ "dropout",
IN == "dropout_03" ~ "dropout",
IN == "dropout_04" ~ "dropout",
IN == "dropout_05" ~ "dropout",
IN == "dropout_06" ~ "dropout",
IN == "dropout_07" ~ "dropout",
IN == "dropout_08" ~ "dropout",
IN == "dropout_09" ~ "dropout",
IN == "dropout_10" ~ "dropout",
IN == "dropout_11" ~ "dropout",
IN == "dropout_12" ~ "dropout",
IN == "dropout_13" ~ "dropout",
IN == "MTD+/-BHV_01" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_02" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_03" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_04" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_05" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_06" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_07" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_08" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_09" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_10" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_11" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_12" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_13" ~ "MTD+/-BHV",
IN == "Other_01" ~ "Other",
IN == "Other_02" ~ "Other",
IN == "Other_03" ~ "Other",
IN == "Other_04" ~ "Other",
IN == "Other_05" ~ "Other",
IN == "Other_06" ~ "Other",
IN == "Other_07" ~ "Other",
IN == "Other_08" ~ "Other",
IN == "Other_09" ~ "Other",
IN == "Other_10" ~ "Other",
IN == "Other_11" ~ "Other",
IN == "Other_12" ~ "Other",
IN == "Other_13" ~ "Other"

))

glimpse(newda)

nodes <- data.frame(node = unique(c(newda$IN, newda$Out)))

newda$IDIn <- match(newda$IN, nodes$node) - 1
newda$IDout <- match(newda$Out, nodes$node) - 1
newda$nam <- sub("_[0-9]{2}$", "", newda$IN)

percentages <-
newda %>%
select(IN, Out, n) %>%
tidyr::pivot_longer(cols = c(IN, Out), names_to = "type", values_to = "node") %>%
summarise(total = sum(n), .by = c("node", "type")) %>%
mutate(month = as.numeric(sub(".*([0-9])$", "\1", node))) %>%
slice_head(n = 1, by = c(month, node)) %>%
mutate(month_ttl = sum(total), .by = month) %>%
mutate(percentage = round((total / month_ttl) * 100, 1)) %>% # Multiply by 100 and round to 1 decimal point
select(node, percentage)

nodes <-
nodes %>%
left_join(percentages, by = "node") %>%
mutate(node_name = sub("_[0-9]{2}$", "", node)) %>%
mutate(node_label = trimws(paste(node_name,",",ifelse(is.na(percentage), "", paste(percentage, "%", sep = "")))))

sankeyNetwork(Links = newda, Nodes = nodes,
Source = "IDIn", Target = "IDout",
Value = "n", NodeID ="node_label", fontSize = 15, nodeWidth = 15,
nodePadding = 25, LinkGroup = "nam",
sinksRight = FALSE)

sn <- sankeyNetwork(Links = newda, Nodes = nodes,
Source = "IDIn", Target = "IDout",
Value = "n", NodeID ="name", fontSize = 15, nodeWidth = 15,
nodePadding = 25, LinkGroup = "nam",
sinksRight = FALSE)

cjyetman · September 19, 2024, 7:13pm

change...

to...

mutate(node_label = trimws(paste0(node_name, ",", ifelse(is.na(percentage), "",

(note: paste0() instead of paste()

newtoRguy · September 19, 2024, 8:12pm

Thanks... When I did this the link colors changes and are no longer reflecting that of the previous node.

cjyetman · September 19, 2024, 9:06pm

Link color can be controlled by passing the name of the column in your links data frame that contains each links color group name to tne LinkGroup argument. Check the package documentation e.g. sankeyNetwork function - RDocumentation

newtoRguy · September 19, 2024, 9:24pm

Awesome. Thank you for your great help!!

system · September 26, 2024, 9:24pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.