Sankey using NetworkD3

Hi R coders,
I'm no R expert.
I am using NetworkD3 to create a sankey diagram. The codes run perfectly well but i'd like to add nodes percentages. Denominator would be the total of a column (month) and numerator would be the number in each node of the month. I tried everything but it doesn't seem to work.
Would really appreciate your help. My codes can be found below.

### install R packages
# install.packages("dplyr","tidyr","readr",sjmisc","sjlabelled",
"networkD3", "ggplot2", "lubridate", "RColorBrewer", "extrafont")

### Load R packages
library(dplyr)
library(tibble)
library(lubridate)
library(sjlabelled)
library(tidyr)
library(networkD3)
library(readr)
library(sjmisc)
library(ggplot2)
library(RColorBrewer)
library(extrafont)
library(stringr)
library(haven)
library (htmlwidgets)
library(timevis)
library(lifecycle)

setwd("/_PHSR/DQ_FDA_OUD/PI/Sankey")

glimpse(rename_all13months_9mo)

raw_data2 <- rename_all13months_9mo %>% 
  gather(key, value, -ENROLID, -newmonth1) %>%  
  unite(new.col, c(key, newmonth1)) %>%   
  spread(new.col, value) 


character <- c("A")
character_1 <- paste0(character, sep= '_1')

sankey_flow_addnum <-raw_data2 %>%
  mutate(newtreat_M1=case_when(newtreat_M1 != ""~paste0(newtreat_M1,sep='_01')),
         newtreat_M2=case_when(newtreat_M2 != ""~paste0(newtreat_M2, sep='_02')),
         newtreat_M3=case_when(newtreat_M3 != ""~paste0(newtreat_M3, sep='_03')),
         newtreat_M4=case_when(newtreat_M4 != ""~paste0(newtreat_M4, sep='_04')),
         newtreat_M5=case_when(newtreat_M5 != ""~paste0(newtreat_M5, sep='_05')),
         newtreat_M6=case_when(newtreat_M6 != ""~paste0(newtreat_M6, sep='_06')),
         newtreat_M7=case_when(newtreat_M7 != ""~paste0(newtreat_M7, sep='_07')),
         newtreat_M8=case_when(newtreat_M8 != ""~paste0(newtreat_M8, sep='_08')),
         newtreat_M9=case_when(newtreat_M9 != ""~paste0(newtreat_M9, sep='_09')),
         newtreat_M10=case_when(newtreat_M10 != ""~paste0(newtreat_M10, sep='_10')))

sankey_flow_freq <- sankey_flow_addnum %>%
  
  group_by(newtreat_M1,newtreat_M2,newtreat_M3, newtreat_M4, newtreat_M5, newtreat_M6, newtreat_M7, newtreat_M8, newtreat_M9, newtreat_M10) %>%
  summarise(n=n()) %>%
  ungroup()


fil_data <- sankey_flow_freq[sankey_flow_freq$newtreat_M1 == "BUP_01", ]
print(fil_data)


Newdata_11more <- fil_data[fil_data$n >= 11, ]


newtreat_M1_M2 <- Newdata_11more %>%
  select(IN=1, Out=2,11)
newtreat_M2_M3 <- Newdata_11more %>%
  select(IN=2, Out=3,11)
newtreat_M3_M4 <- Newdata_11more %>%
  select(IN=3, Out=4,11)
newtreat_M4_M5 <- Newdata_11more %>%
  select(IN=4, Out=5,11)
newtreat_M5_M6 <- Newdata_11more %>%
  select(IN=5, Out=6,11)
newtreat_M6_M7 <- Newdata_11more %>%
  select(IN=6, Out=7,11)
newtreat_M7_M8 <- Newdata_11more %>%
  select(IN=7, Out=8,11)
newtreat_M8_M9 <- Newdata_11more %>%
  select(IN=8, Out=9,11)
newtreat_M9_M10 <- Newdata_11more %>%
  select(IN=9, Out=10,11)


Sankey_plot_data <- rbind(newtreat_M1_M2, newtreat_M2_M3, newtreat_M3_M4, newtreat_M4_M5, newtreat_M5_M6, newtreat_M6_M7, newtreat_M7_M8, newtreat_M8_M9, newtreat_M9_M10)
group_by(IN,Out) %>%
  summarise(Freq=sum(n)) %>%
  ungroup()

houshki <- Sankey_plot_data[Sankey_plot_data$IN != "Dropout_02" & Sankey_plot_data$IN != "Dropout_03" & Sankey_plot_data$IN != "Dropout_04" & Sankey_plot_data$IN != "Dropout_05" & Sankey_plot_data$IN != "Dropout_06" & Sankey_plot_data$IN != "Dropout_07" & Sankey_plot_data$IN != "Dropout_08" & Sankey_plot_data$IN != "Dropout_09" & Sankey_plot_data$IN != "Dropout_10" & Sankey_plot_data$IN != "Dropout_11" & Sankey_plot_data$IN != "Dropout_12" & Sankey_plot_data$IN != "Dropout_13" & Sankey_plot_data$IN != "Dropout_14", ]


nodes <- houshki %>%
  select(IN, Out) %>%
  pivot_longer(c("IN", "Out"), names_to = "col_name",
               values_to="name_match") %>%
  select(-1) %>% distinct() %>%
  mutate(name=str_sub(name_match, end=-4))

nodes <- data.frame(nodes)

sankey_plot_id <- houshki  %>%
  mutate(IDIn =match(IN, nodes$name_match)-1,
         IDout= match(Out, nodes$name_match)-1)

sankey_plot_id <- data.frame(sankey_plot_id)


newda <- sankey_plot_id %>%
  mutate(nam= case_when(IN == "BHV_01" ~ "BHV",
                        IN == "BHV_02" ~ "BHV",
                        IN == "BHV_03" ~ "BHV",
                        IN == "BHV_04" ~ "BHV",
                        IN == "BHV_05" ~ "BHV",
                        IN == "BHV_06" ~ "BHV",
                        IN == "BHV_07" ~ "BHV",
                        IN == "BHV_08" ~ "BHV",
                        IN == "BHV_09" ~ "BHV",
                        IN == "BHV_10" ~ "BHV",
                        IN == "BHV_11" ~ "BHV",
                        IN == "BHV_12" ~ "BHV",
                        IN == "BHV_13" ~ "BHV",
                        IN == "BUP,BHV_01" ~ "BUP,BHV",
                        IN == "BUP,BHV_02" ~ "BUP,BHV",
                        IN == "BUP,BHV_03" ~ "BUP,BHV",
                        IN == "BUP,BHV_04" ~ "BUP,BHV",
                        IN == "BUP,BHV_05" ~ "BUP,BHV",
                        IN == "BUP,BHV_06" ~ "BUP,BHV",
                        IN == "BUP,BHV_07" ~ "BUP,BHV",
                        IN == "BUP,BHV_08" ~ "BUP,BHV",
                        IN == "BUP,BHV_09" ~ "BUP,BHV",
                        IN == "BUP,BHV_10" ~ "BUP,BHV",
                        IN == "BUP,BHV_11" ~ "BUP,BHV",
                        IN == "BUP,BHV_12" ~ "BUP,BHV",
                        IN == "BUP,BHV_13" ~ "BUP,BHV",
                        IN == "BUP_01" ~ "BUP",
                        IN == "BUP_02" ~ "BUP",
                        IN == "BUP_03" ~ "BUP",
                        IN == "BUP_04" ~ "BUP",
                        IN == "BUP_05" ~ "BUP",
                        IN == "BUP_06" ~ "BUP",
                        IN == "BUP_07" ~ "BUP",
                        IN == "BUP_08" ~ "BUP",
                        IN == "BUP_09" ~ "BUP",
                        IN == "BUP_10" ~ "BUP",
                        IN == "BUP_11" ~ "BUP",
                        IN == "BUP_12" ~ "BUP",
                        IN == "BUP_13" ~ "BUP",
                        IN == "No treatment_01" ~ "No treatment",
                        IN == "No treatment_02" ~ "No treatment",
                        IN == "No treatment_03" ~ "No treatment",
                        IN == "No treatment_04" ~ "No treatment",
                        IN == "No treatment_05" ~ "No treatment",
                        IN == "No treatment_06" ~ "No treatment",
                        IN == "No treatment_07" ~ "No treatment",
                        IN == "No treatment_08" ~ "No treatment",
                        IN == "No treatment_09" ~ "No treatment",
                        IN == "No treatment_10" ~ "No treatment",
                        IN == "No treatment_11" ~ "No treatment",
                        IN == "No treatment_12" ~ "No treatment",
                        IN == "No treatment_13" ~ "No treatment",
                        IN == "NTX_01" ~ "NTX",
                        IN == "NTX_02" ~ "NTX",
                        IN == "NTX_03" ~ "NTX",
                        IN == "NTX_04" ~ "NTX",
                        IN == "NTX_05" ~ "NTX",
                        IN == "NTX_06" ~ "NTX",
                        IN == "NTX_07" ~ "NTX",
                        IN == "NTX_08" ~ "NTX",
                        IN == "NTX_09" ~ "NTX",
                        IN == "NTX_10" ~ "NTX",
                        IN == "NTX_11" ~ "NTX",
                        IN == "NTX_12" ~ "NTX",
                        IN == "NTX_13" ~ "NTX",
                        IN == "NTX,BHV_01" ~ "NTX,BHV",
                        IN == "NTX,BHV_02" ~ "NTX,BHV",
                        IN == "NTX,BHV_03" ~ "NTX,BHV",
                        IN == "NTX,BHV_04" ~ "NTX,BHV",
                        IN == "NTX,BHV_05" ~ "NTX,BHV",
                        IN == "NTX,BHV_06" ~ "NTX,BHV",
                        IN == "NTX,BHV_07" ~ "NTX,BHV",
                        IN == "NTX,BHV_08" ~ "NTX,BHV",
                        IN == "NTX,BHV_09" ~ "NTX,BHV",
                        IN == "NTX,BHV_10" ~ "NTX,BHV",
                        IN == "NTX,BHV_11" ~ "NTX,BHV",
                        IN == "NTX,BHV_12" ~ "NTX,BHV",
                        IN == "NTX,BHV_13" ~ "NTX,BHV",
                        IN == "dropout_01" ~ "dropout",
                        IN == "dropout_02" ~ "dropout",
                        IN == "dropout_03" ~ "dropout",
                        IN == "dropout_04" ~ "dropout",
                        IN == "dropout_05" ~ "dropout",
                        IN == "dropout_06" ~ "dropout",
                        IN == "dropout_07" ~ "dropout",
                        IN == "dropout_08" ~ "dropout",
                        IN == "dropout_09" ~ "dropout",
                        IN == "dropout_10" ~ "dropout",
                        IN == "dropout_11" ~ "dropout",
                        IN == "dropout_12" ~ "dropout",
                        IN == "dropout_13" ~ "dropout",
                        IN == "MTD+/-BHV_01" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_02" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_03" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_04" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_05" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_06" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_07" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_08" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_09" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_10" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_11" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_12" ~ "MTD+/-BHV",
                        IN == "MTD+/-BHV_13" ~ "MTD+/-BHV",
                        IN == "Other_01" ~ "Other",
                        IN == "Other_02" ~ "Other",
                        IN == "Other_03" ~ "Other",
                        IN == "Other_04" ~ "Other",
                        IN == "Other_05" ~ "Other",
                        IN == "Other_06" ~ "Other",
                        IN == "Other_07" ~ "Other",
                        IN == "Other_08" ~ "Other",
                        IN == "Other_09" ~ "Other",
                        IN == "Other_10" ~ "Other",
                        IN == "Other_11" ~ "Other",
                        IN == "Other_12" ~ "Other",
                        IN == "Other_13" ~ "Other"
                        
  ))


glimpse(newda)


sankeyNetwork(Links = newda, Nodes = nodes,
              Source = "IDIn", Target = "IDout",
              Value = "n", NodeID ="name", fontSize = 15, nodeWidth = 15, 
              nodePadding = 25, LinkGroup = "nam",
              sinksRight = FALSE)

sn <- sankeyNetwork(Links = newda, Nodes = nodes,
                    Source = "IDIn", Target = "IDout",
                    Value = "n", NodeID ="name", fontSize = 15, nodeWidth = 15, 
                    nodePadding = 25, LinkGroup = "nam",
                    sinksRight = FALSE)






p <- htmlwidgets::onRender(sn, '
  function(el) { 
    var cols_x = this.sankey.nodes().map(d => d.x).filter((v, i, a) => a.indexOf(v) === i).sort(function(a, b){return a - b});
       var labels = ["Assignment period, n=601,347", "Month 1, n=595,757", "Month 2, n=590,899", "Month 3, n=577,097" ]

    cols_x.forEach((d, i) => {
      d3.select(el).select("svg")
        .append("text")
        .attr("x", d)
        .attr("y", 12)
        .text(labels[i]);
    })
  }
')

onRender(
  p,
  '
  function(el, x) {
    d3.selectAll(".node text").attr("text-anchor", "begin").attr("x", 20);
  }
  '
)

Are you trying to calculate the percentages? It's hard to see exactly what your data looks like, but it seems like it might look like this, and if so, you could calculate the percentages like this...

sankey_plot_id <- 
  tibble::tribble(
  ~IN,               ~Out,              ~Freq,
  "BUP_01",          "No_treatment_02", 4,
  "BUP_01",          "BUP_02",          4,
  "BUP_01",          "BHV_02",          4,
  "BUP_01",          "Dropout_02",      1,
  "No_treatment_02", "No_treatment_03", 1,
  "No_treatment_02", "Dropout_03",      1,
  "No_treatment_02", "BUP_03",          1,
  "No_treatment_02", "BHV_03",          1,
  "BUP_02",          "No_treatment_03", 1,
  "BUP_02",          "Dropout_03",      1,
  "BUP_02",          "BUP_03",          1,
  "BUP_02",          "BHV_03",          1,
  "BHV_02",          "No_treatment_03", 1,
  "BHV_02",          "Dropout_03",      1,
  "BHV_02",          "BUP_03",          1,
  "BHV_02",          "BHV_03",          1,
  "BUP_03",          "Dropout_04",      3,
  "No_treatment_03", "Dropout_04",      3,
  "BHV_03",          "Dropout_04",      3
  )

nodes <- data.frame(node = unique(c(sankey_plot_id$IN, sankey_plot_id$Out)))
nodes$group <- sub("_[0-9]$", "", nodes$node)

sankey_plot_id$IDIn <- match(sankey_plot_id$IN, nodes$node) - 1
sankey_plot_id$IDout <- match(sankey_plot_id$Out, nodes$node) - 1

sankey_plot_id
#> # A tibble: 19 × 5
#>    IN              Out              Freq  IDIn IDout
#>    <chr>           <chr>           <dbl> <dbl> <dbl>
#>  1 BUP_01          No_treatment_02     4     0     1
#>  2 BUP_01          BUP_02              4     0     2
#>  3 BUP_01          BHV_02              4     0     3
#>  4 BUP_01          Dropout_02          1     0     7
#>  5 No_treatment_02 No_treatment_03     1     1     5
#>  6 No_treatment_02 Dropout_03          1     1     8
#>  7 No_treatment_02 BUP_03              1     1     4
#>  8 No_treatment_02 BHV_03              1     1     6
#>  9 BUP_02          No_treatment_03     1     2     5
#> 10 BUP_02          Dropout_03          1     2     8
#> 11 BUP_02          BUP_03              1     2     4
#> 12 BUP_02          BHV_03              1     2     6
#> 13 BHV_02          No_treatment_03     1     3     5
#> 14 BHV_02          Dropout_03          1     3     8
#> 15 BHV_02          BUP_03              1     3     4
#> 16 BHV_02          BHV_03              1     3     6
#> 17 BUP_03          Dropout_04          3     4     9
#> 18 No_treatment_03 Dropout_04          3     5     9
#> 19 BHV_03          Dropout_04          3     6     9

library(dplyr)

sankey_plot_id %>% 
  summarise(total = sum(Freq), .by = IN) %>% 
  mutate(month = sub(".*([0-9])$", "\\1", IN)) %>% 
  mutate(month_ttl = sum(total), .by = month) %>% 
  mutate(percentage = total / month_ttl)
#> # A tibble: 7 × 5
#>   IN              total month month_ttl percentage
#>   <chr>           <dbl> <chr>     <dbl>      <dbl>
#> 1 BUP_01             13 1            13      1    
#> 2 No_treatment_02     4 2            12      0.333
#> 3 BUP_02              4 2            12      0.333
#> 4 BHV_02              4 2            12      0.333
#> 5 BUP_03              3 3             9      0.333
#> 6 No_treatment_03     3 3             9      0.333
#> 7 BHV_03              3 3             9      0.333

Hi! Thanks for your help. Here's what "newda" looks like... For column 1, the % should be 100% since there is only 1 group. For group 2, the denominator should be the sum of all nodes in column 2, and the numerator the number of people in the node... I still dont know how to do that.

image

that's what I showed you already , in the percentage column

sankey_plot_id %>% 
  summarise(total = sum(Freq), .by = IN) %>% 
  mutate(month = sub(".*([0-9])$", "\\1", IN)) %>% 
  mutate(month_ttl = sum(total), .by = month) %>% 
  mutate(percentage = total / month_ttl)
#> # A tibble: 7 × 5
#>   IN              total month month_ttl percentage
#>   <chr>           <dbl> <chr>     <dbl>      <dbl>
#> 1 BUP_01             13 1            13      1    
#> 2 No_treatment_02     4 2            12      0.333
#> 3 BUP_02              4 2            12      0.333
#> 4 BHV_02              4 2            12      0.333
#> 5 BUP_03              3 3             9      0.333
#> 6 No_treatment_03     3 3             9      0.333
#> 7 BHV_03              3 3             9      0.333

or, given the structure of newda...

newda %>% 
  summarise(total = sum(n), .by = IN) %>% 
  mutate(month = sub(".*([0-9])$", "\\1", IN)) %>% 
  mutate(month_ttl = sum(total), .by = month) %>% 
  mutate(percentage = total / month_ttl)

Thanks!

Yes, this code gave me the correct calculation. I need to integrate them into the sankey... How do i do that? Here's what i have.

glimpse(newda)

newda %>%
summarise(total = sum(n), .by = IN) %>%
mutate(month = sub(".*([0-9])$", "\1", IN)) %>%
mutate(month_ttl = sum(total), .by = month) %>%
mutate(percentage = total / month_ttl * 100)

sankeyNetwork(Links = newda, Nodes = nodes,
Source = "IDIn", Target = "IDout",
Value = "n", NodeID ="name", fontSize = 15, nodeWidth = 15,
nodePadding = 25, LinkGroup = "nam",
sinksRight = FALSE)

sn <- sankeyNetwork(Links = newda, Nodes = nodes,
Source = "IDIn", Target = "IDout",
Value = "n", NodeID ="name", fontSize = 15, nodeWidth = 15,
nodePadding = 25, LinkGroup = "nam",
sinksRight = FALSE)

p <- htmlwidgets::onRender(sn, '
function(el) {
var cols_x = this.sankey.nodes().map(d => d.x).filter((v, i, a) => a.indexOf(v) === i).sort(function(a, b){return a - b});
var labels = ["Assignment period, n=601,347", "Month 1, n=595,757", "Month 2, n=590,899", "Month 3, n=577,097" ]

cols_x.forEach((d, i) => {
  d3.select(el).select("svg")
    .append("text")
    .attr("x", d)
    .attr("y", 12)
    .text(labels[i]);
})

}
')

onRender(
p,
'
function(el, x) {
d3.selectAll(".node text").attr("text-anchor", "begin").attr("x", 20);
}
'
)

library(dplyr)

newda <- 
  tibble::tribble(
    ~IN,               ~Out,              ~n,
    "BUP_01",          "No_treatment_02", 4,
    "BUP_01",          "BUP_02",          4,
    "BUP_01",          "BHV_02",          4,
    "BUP_01",          "Dropout_02",      1,
    "No_treatment_02", "No_treatment_03", 1,
    "No_treatment_02", "Dropout_03",      1,
    "No_treatment_02", "BUP_03",          1,
    "No_treatment_02", "BHV_03",          1,
    "BUP_02",          "No_treatment_03", 1,
    "BUP_02",          "Dropout_03",      1,
    "BUP_02",          "BUP_03",          1,
    "BUP_02",          "BHV_03",          1,
    "BHV_02",          "No_treatment_03", 1,
    "BHV_02",          "Dropout_03",      1,
    "BHV_02",          "BUP_03",          1,
    "BHV_02",          "BHV_03",          1,
    "BUP_03",          "Dropout_04",      3,
    "No_treatment_03", "Dropout_04",      3,
    "BHV_03",          "Dropout_04",      3
  )

nodes <- data.frame(node = unique(c(newda$IN, newda$Out)))

newda$IDIn <- match(newda$IN, nodes$node) - 1
newda$IDout <- match(newda$Out, nodes$node) - 1
newda$nam <- sub("_[0-9]$", "", newda$IN)

percentages <-
  newda %>% 
  summarise(total = sum(n), .by = IN) %>% 
  mutate(month = sub(".*([0-9])$", "\\1", IN)) %>% 
  mutate(month_ttl = sum(total), .by = month) %>% 
  mutate(percentage = total / month_ttl) %>% 
  select(IN, percentage)

nodes <-
  nodes %>% 
  left_join(percentages, by = c(node = "IN")) %>% 
  mutate(node_name = sub("_[0-9]{2}$", "", node)) %>% 
  mutate(node_label = trimws(paste(node_name, " ", ifelse(is.na(percentage), "", percentage))))

networkD3::sankeyNetwork(
  Links = newda,
  Nodes = nodes,
  Source = "IDIn",
  Target = "IDout",
  Value = "n",
  NodeID ="node_label",
  LinkGroup = "nam",
  sinksRight = FALSE
)
#> Links is a tbl_df. Converting to a plain data frame.

Created on 2024-09-19 with reprex v2.1.1

Thanks! It worked, but with one problem. I need to keep a percentage for the "dropout" group to show. It should have its own numerator, contribute to the denominator, and the denominator would be that of the column its in. I guess the reason it doesn't show is because it's not in the "IN". Thanks again for your help.

library(dplyr)

newda <- 
  tibble::tribble(
    ~IN,               ~Out,              ~n,
    "BUP_01",          "No_treatment_02", 4,
    "BUP_01",          "BUP_02",          4,
    "BUP_01",          "BHV_02",          4,
    "BUP_01",          "Dropout_02",      1,
    "No_treatment_02", "No_treatment_03", 1,
    "No_treatment_02", "Dropout_03",      1,
    "No_treatment_02", "BUP_03",          1,
    "No_treatment_02", "BHV_03",          1,
    "BUP_02",          "No_treatment_03", 1,
    "BUP_02",          "Dropout_03",      1,
    "BUP_02",          "BUP_03",          1,
    "BUP_02",          "BHV_03",          1,
    "BHV_02",          "No_treatment_03", 1,
    "BHV_02",          "Dropout_03",      1,
    "BHV_02",          "BUP_03",          1,
    "BHV_02",          "BHV_03",          1,
    "BUP_03",          "Dropout_04",      3,
    "No_treatment_03", "Dropout_04",      3,
    "BHV_03",          "Dropout_04",      3
  )

nodes <- data.frame(node = unique(c(newda$IN, newda$Out)))

newda$IDIn <- match(newda$IN, nodes$node) - 1
newda$IDout <- match(newda$Out, nodes$node) - 1
newda$nam <- sub("_[0-9]{2}$", "", newda$IN)

percentages <-
  newda %>% 
  select(IN, Out, n) %>% 
  tidyr::pivot_longer(cols = c(IN, Out), names_to = "type", values_to = "node") %>% 
  summarise(total = sum(n), .by = c("node", "type")) %>% 
  mutate(month = as.numeric(sub(".*([0-9])$", "\\1", node))) %>% 
  slice_head(n = 1, by = c(month, node)) %>% 
  mutate(month_ttl = sum(total), .by = month) %>% 
  mutate(percentage = total / month_ttl) %>% 
  select(node, percentage)

nodes <-
  nodes %>% 
  left_join(percentages, by = "node") %>% 
  mutate(node_name = sub("_[0-9]{2}$", "", node)) %>% 
  mutate(node_label = trimws(paste(node_name, " ", ifelse(is.na(percentage), "", percentage))))

networkD3::sankeyNetwork(
  Links = newda,
  Nodes = nodes,
  Source = "IDIn",
  Target = "IDout",
  Value = "n",
  NodeID ="node_label",
  LinkGroup = "nam",
  sinksRight = FALSE
)

1 Like

Thank you for your amazing help, it finally worked out. One problem is that there is a "space" between the word and the comma in the sankey, and i want them to be attached (e.g. BUP, 100% NOT BUP , 100%). I tried making adjustments but the colors of the links are changing and no longer that of the node before it. Here's the code. Thanks again.

newda <- sankey_plot_id %>%
mutate(nam= case_when(IN == "BHV_01" ~ "BHV",
IN == "BHV_02" ~ "BHV",
IN == "BHV_03" ~ "BHV",
IN == "BHV_04" ~ "BHV",
IN == "BHV_05" ~ "BHV",
IN == "BHV_06" ~ "BHV",
IN == "BHV_07" ~ "BHV",
IN == "BHV_08" ~ "BHV",
IN == "BHV_09" ~ "BHV",
IN == "BHV_10" ~ "BHV",
IN == "BHV_11" ~ "BHV",
IN == "BHV_12" ~ "BHV",
IN == "BHV_13" ~ "BHV",
IN == "BUP,BHV_01" ~ "BUP,BHV",
IN == "BUP,BHV_02" ~ "BUP,BHV",
IN == "BUP,BHV_03" ~ "BUP,BHV",
IN == "BUP,BHV_04" ~ "BUP,BHV",
IN == "BUP,BHV_05" ~ "BUP,BHV",
IN == "BUP,BHV_06" ~ "BUP,BHV",
IN == "BUP,BHV_07" ~ "BUP,BHV",
IN == "BUP,BHV_08" ~ "BUP,BHV",
IN == "BUP,BHV_09" ~ "BUP,BHV",
IN == "BUP,BHV_10" ~ "BUP,BHV",
IN == "BUP,BHV_11" ~ "BUP,BHV",
IN == "BUP,BHV_12" ~ "BUP,BHV",
IN == "BUP,BHV_13" ~ "BUP,BHV",
IN == "BUP_01" ~ "BUP",
IN == "BUP_02" ~ "BUP",
IN == "BUP_03" ~ "BUP",
IN == "BUP_04" ~ "BUP",
IN == "BUP_05" ~ "BUP",
IN == "BUP_06" ~ "BUP",
IN == "BUP_07" ~ "BUP",
IN == "BUP_08" ~ "BUP",
IN == "BUP_09" ~ "BUP",
IN == "BUP_10" ~ "BUP",
IN == "BUP_11" ~ "BUP",
IN == "BUP_12" ~ "BUP",
IN == "BUP_13" ~ "BUP",
IN == "No treatment_01" ~ "No treatment",
IN == "No treatment_02" ~ "No treatment",
IN == "No treatment_03" ~ "No treatment",
IN == "No treatment_04" ~ "No treatment",
IN == "No treatment_05" ~ "No treatment",
IN == "No treatment_06" ~ "No treatment",
IN == "No treatment_07" ~ "No treatment",
IN == "No treatment_08" ~ "No treatment",
IN == "No treatment_09" ~ "No treatment",
IN == "No treatment_10" ~ "No treatment",
IN == "No treatment_11" ~ "No treatment",
IN == "No treatment_12" ~ "No treatment",
IN == "No treatment_13" ~ "No treatment",
IN == "NTX_01" ~ "NTX",
IN == "NTX_02" ~ "NTX",
IN == "NTX_03" ~ "NTX",
IN == "NTX_04" ~ "NTX",
IN == "NTX_05" ~ "NTX",
IN == "NTX_06" ~ "NTX",
IN == "NTX_07" ~ "NTX",
IN == "NTX_08" ~ "NTX",
IN == "NTX_09" ~ "NTX",
IN == "NTX_10" ~ "NTX",
IN == "NTX_11" ~ "NTX",
IN == "NTX_12" ~ "NTX",
IN == "NTX_13" ~ "NTX",
IN == "NTX,BHV_01" ~ "NTX,BHV",
IN == "NTX,BHV_02" ~ "NTX,BHV",
IN == "NTX,BHV_03" ~ "NTX,BHV",
IN == "NTX,BHV_04" ~ "NTX,BHV",
IN == "NTX,BHV_05" ~ "NTX,BHV",
IN == "NTX,BHV_06" ~ "NTX,BHV",
IN == "NTX,BHV_07" ~ "NTX,BHV",
IN == "NTX,BHV_08" ~ "NTX,BHV",
IN == "NTX,BHV_09" ~ "NTX,BHV",
IN == "NTX,BHV_10" ~ "NTX,BHV",
IN == "NTX,BHV_11" ~ "NTX,BHV",
IN == "NTX,BHV_12" ~ "NTX,BHV",
IN == "NTX,BHV_13" ~ "NTX,BHV",
IN == "dropout_01" ~ "dropout",
IN == "dropout_02" ~ "dropout",
IN == "dropout_03" ~ "dropout",
IN == "dropout_04" ~ "dropout",
IN == "dropout_05" ~ "dropout",
IN == "dropout_06" ~ "dropout",
IN == "dropout_07" ~ "dropout",
IN == "dropout_08" ~ "dropout",
IN == "dropout_09" ~ "dropout",
IN == "dropout_10" ~ "dropout",
IN == "dropout_11" ~ "dropout",
IN == "dropout_12" ~ "dropout",
IN == "dropout_13" ~ "dropout",
IN == "MTD+/-BHV_01" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_02" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_03" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_04" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_05" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_06" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_07" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_08" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_09" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_10" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_11" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_12" ~ "MTD+/-BHV",
IN == "MTD+/-BHV_13" ~ "MTD+/-BHV",
IN == "Other_01" ~ "Other",
IN == "Other_02" ~ "Other",
IN == "Other_03" ~ "Other",
IN == "Other_04" ~ "Other",
IN == "Other_05" ~ "Other",
IN == "Other_06" ~ "Other",
IN == "Other_07" ~ "Other",
IN == "Other_08" ~ "Other",
IN == "Other_09" ~ "Other",
IN == "Other_10" ~ "Other",
IN == "Other_11" ~ "Other",
IN == "Other_12" ~ "Other",
IN == "Other_13" ~ "Other"

))

glimpse(newda)

nodes <- data.frame(node = unique(c(newda$IN, newda$Out)))

newda$IDIn <- match(newda$IN, nodes$node) - 1
newda$IDout <- match(newda$Out, nodes$node) - 1
newda$nam <- sub("_[0-9]{2}$", "", newda$IN)

percentages <-
newda %>%
select(IN, Out, n) %>%
tidyr::pivot_longer(cols = c(IN, Out), names_to = "type", values_to = "node") %>%
summarise(total = sum(n), .by = c("node", "type")) %>%
mutate(month = as.numeric(sub(".*([0-9])$", "\1", node))) %>%
slice_head(n = 1, by = c(month, node)) %>%
mutate(month_ttl = sum(total), .by = month) %>%
mutate(percentage = round((total / month_ttl) * 100, 1)) %>% # Multiply by 100 and round to 1 decimal point
select(node, percentage)

nodes <-
nodes %>%
left_join(percentages, by = "node") %>%
mutate(node_name = sub("_[0-9]{2}$", "", node)) %>%
mutate(node_label = trimws(paste(node_name,",",ifelse(is.na(percentage), "", paste(percentage, "%", sep = "")))))

sankeyNetwork(Links = newda, Nodes = nodes,
Source = "IDIn", Target = "IDout",
Value = "n", NodeID ="node_label", fontSize = 15, nodeWidth = 15,
nodePadding = 25, LinkGroup = "nam",
sinksRight = FALSE)

sn <- sankeyNetwork(Links = newda, Nodes = nodes,
Source = "IDIn", Target = "IDout",
Value = "n", NodeID ="name", fontSize = 15, nodeWidth = 15,
nodePadding = 25, LinkGroup = "nam",
sinksRight = FALSE)

change...

to...

mutate(node_label = trimws(paste0(node_name, ",", ifelse(is.na(percentage), "",

(note: paste0() instead of paste()

Thanks... When I did this the link colors changes and are no longer reflecting that of the previous node.

Link color can be controlled by passing the name of the column in your links data frame that contains each links color group name to tne LinkGroup argument. Check the package documentation e.g. sankeyNetwork function - RDocumentation

1 Like

Awesome. Thank you for your great help!!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.