When hovering a node, the label and the value attached to it appears with no spacing. For example, in the screenshot here, you will see that at 6 months, there are 64 participants who are in Stage 3.
But hovering over the node it looks like "Stage 364" because there is no spacing. I am not sure how to change or fix this. The code is pasted below.
# Load the libraries
library(networkD3)
library(tidyverse)
library(dplyr)
library(tidyr)
library(haven)
library(htmltools)
library(htmlwidgets)
library(writexl)
library("readxl")
Mdf<- read_sas("C:/Users/jayla/Dropbox (TFGH Central)/LEDoxy File Sharing with Brian/Mali Main Analysis/Pull 20210913/Final Datasets/mali_table2a_20210913.sas7bdat")
Mdf2 <- spread(Mdf,redcap_event_name, STAGE_ANALYSIS_LEG)
Mdf3 <- Mdf2[,c("Baseline", "6 Months", "12 Months", "24 Months")]
# removing the M18 data from Mali bc it was not collected the same way due to covid
# - we instead want to keep whatever was in month 12 as month 18
# DO NOT REPEAT THIS FOR OTHER SITES!!!
### UPDATE ON 6/23/22 - the table 2a no longer includes the 18M data for Mali - code has been modified to account for this change.
# Mdf4 <- Mdf3[,c("Baseline", "6 Months", "12 Months", "12 Months", "24 Months")]
# colnames(Mdf4) <- c("Baseline", "6 Months", "12 Months", "18 Months", "24 Months")
Mdf_all <- Mdf3
df_toplot <- Mdf_all
### RESTRUCTURE DATA %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# set if want labels or not;
full_label = FALSE;
# Dealing w NAs - DON'T CHANGE ORDER OF THESE STEPS!
# has to be in this order so if missing multiple timepoints for a subject, it goes to earliest.
#summarizes how many are NA per column
df_toplot %>% summarise_all(~ sum(is.na(.)))
# changes any 6M NAs to the value at baseline
df_toplot$`6 Months` <- ifelse(is.na(df_toplot$`6 Months`), df_toplot$Baseline, df_toplot$`6 Months` )
# changes any 12M NAs to the value at 6M
df_toplot$`12 Months` <- ifelse(is.na(df_toplot$`12 Months`), df_toplot$`6 Months`, df_toplot$`12 Months` )
# changes any 18M NAs to the value at 12M
# df_toplot$`18 Months` <- ifelse(is.na(df_toplot$`18 Months`), df_toplot$`12 Months`, df_toplot$`18 Months` )
# changes any 24M NAs to the value at 12M
df_toplot$`24 Months` <- ifelse(is.na(df_toplot$`24 Months`), df_toplot$`12 Months`, df_toplot$`24 Months` )
# df_toplot <- filter(df_toplot,df_toplot$`Baseline`!=".")
# df_toplot <- filter(df_toplot,df_toplot$`6 Months`!=".")
# df_toplot <- filter(df_toplot,df_toplot$`12 Months`!=".")
# df_toplot <- filter(df_toplot,df_toplot$`18 Months`!=".")
# df_toplot <- filter(df_toplot,df_toplot$`24 Months`!=".")
# add word "stage" to the front of every stage value
df_toplot$Baseline <- sub("^","Stage ",df_toplot$Baseline)
df_toplot$`6 Months` <- sub("^","Stage ",df_toplot$`6 Months`)
df_toplot$`12 Months` <- sub("^","Stage ",df_toplot$`12 Months`)
# df_toplot$`18 Months` <- sub("^","Stage ",df_toplot$`18 Months`)
df_toplot$`24 Months` <- sub("^","Stage ",df_toplot$`24 Months`)
# sort by stages, starting w baseline and doing all sequentially (makes final graph look better)
# df_toplot2 <- df_toplot[order()]
# df_toplot <- desc(df_toplot, Baseline, `6 Months`, `12 Months`, `24 Months`)
df_toplot <- arrange(df_toplot, desc(Baseline), desc(`6 Months`), desc(`12 Months`), desc(`24 Months`))
#df_toplot2 <- desc(df_toplot)
# df_toplot <- arrange(df_toplot, `24 Months`,`18 Months`,`12 Months`,`6 Months`, Baseline, )
# Make initial links and nodes structures ------------------------------------------------
# Later we have to make a combined version of these
# Make new data structure where each row is a link
links <-
df_toplot %>%
mutate(row = row_number()) %>% # add a row id
pivot_longer(-row, names_to = "column", values_to = "source") %>% # gather all columns
mutate(column = match(column, names(df_toplot))) %>% # convert col names to col ids
group_by(row) %>%
mutate(target = lead(source, order_by = column)) %>% # get target from following node in row
ungroup()
# Add suffix labels to stages so that they're all different , and only keep source and target
links <-
links %>%
mutate(source = paste0(source, '_', column)) %>%
mutate(target = paste0(target, '_', column + 1)) %>%
select(source, target)
# Create nodes
nodes <- data.frame(name = unique(c(links$source, links$target)))
# Remove suffix label from nodes
nodes$label <- sub('_[0-9]*$', '', nodes$name) # remove column id from node label
nodes$group <- sub('Stage ', '', nodes$label)
# add source_id and target_id
links$source_id <- match(links$source, nodes$name) - 1
links$target_id <- match(links$target, nodes$name) - 1
# Making collective links and nodes ------------------------------------------------
# make new version of links to use for next steps
links2 <- arrange(links,source_id,target_id)
# initialize loop
i <- 1
n <- 1
new_links <- links[1,]
new_links$source_id <- 9
new_links$value <-0
# Start loop
for (i in 1:nrow(links2))
{
# get start and next that we are dealing with
start_stage = pull(links2[i,3])
next_stage = pull(links2[i,4])
# only start loop if not a repeat
if (start_stage != new_links[n,3] | next_stage !=new_links[n,4])
{
# calculate how many rows follow that path
total_mask <- links2$source_id == start_stage & links2$target_id == next_stage
total <- sum(total_mask)
# create a new row w that info
new_row <- links2[i,]
new_row$value <- total
# put that row into new dataframe
if (i==1) {n=1} else {n=n+1}
new_links[n,] <- new_row
} else { i = i+1 }
}
# make a new version of nodes that has the labels we want
new_nodes <- nodes
for (i in 1:nrow(nodes))
{
# identify which node we are talking about
my_node = nodes$name[i]
# sum all the values of that node in new_links
mask <- new_links$source == my_node
node_total <- new_links$value[mask] %>% sum
new_nodes$label[i] <- paste(nodes$label[i], node_total, sep=": ")
}
# remove the NAs in links and nodes
# from new_nodes
new_nodes <- new_nodes[!new_nodes$name=='NA_6',]
new_nodes <- new_nodes[!new_nodes$name=='NA_5',]
nodes <- nodes[!nodes$name=='NA_6',]
nodes <- nodes[!nodes$name=='NA_5',]
# from new_links
new_links <- new_links[ new_links$target != 'NA_6',]
new_links <- new_links[ new_links$target != 'NA_5',]
### MAKING PLOT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Setting parameters
# define colors
new_links$group <- substr(new_links$source,7,7)
# set colors to whatever you want - this scheme is lcd safe and color blind safe
#node_colors <- 'd3.scaleOrdinal().domain(["0","1","2","3","4","5"]).range(["#01665e","#5ab4ac","#c7eae5","#f6e8c3","#d8b365","#8c510a"])'
node_colors <- 'd3.scaleOrdinal().domain(["0","1","2","3","4","5","6"]).range(["#cb181d","#fc9272","#a50f15","#ef3b2c","#67000d","#fc9272"])'
#Manual Edits made by Jayla outside of R to fix order of nodes and links to our desired order
# write_xlsx(new_nodes, path = "C:/Users/jayla/Downloads/Graphics/newnodesMali.xlsx")
# write_xlsx(new_links, path = "C:/Users/jayla/Downloads/Graphics/newlinksMali.xlsx")
JlinksMali <- read_xlsx("C:/Users/jayla/Downloads/Graphics/newlinksMali_06132023.xlsx", 2)
JnodesMali <- read_xlsx("C:/Users/jayla/Downloads/Graphics/newnodesMali_06132023.xlsx", 3)
# Making plot
sankey <- sankeyNetwork(Links = JlinksMali, Nodes = JnodesMali, Source = 'source_id',
Target = 'target_id', Value = 'value', NodeID = 'label',
fontSize = 16, nodeWidth=30, nodePadding = 20,
height = 300, width = 900, NodeGroup = 'group', LinkGroup = 'group',
colourScale= node_colors, margin = list(left = 70), iterations = 0)
#If you don't want the data labels, change Nodes= nodes
# Making Title, printing to R, and saving
sankey <- htmlwidgets::prependContent(sankey, htmltools::tags$h1( " Mali - All Participants"))
#style = "text-align: center",
htmlwidgets::onRender(sankey, '
function(el) {
var cols_x = this.sankey.nodes().map(d => d.x).filter((v, i, a) => a.indexOf(v) === i).sort(function(a, b){return a - b});
var labels = ["Baseline", "6 Months", "12 Months", "24 Months"];
cols_x.forEach((d, i) => {
d3.select(el).select("svg")
.append("text")
.attr("x", d)
.attr("y", 12)
.text(labels[i]);
})
}
')
I have made changes to what label shows up on the Sankey Diagram. I have changed it from stating the stage and how many are in that stage, to just the stage for a cleaner appearance: "Stage 3: 64" vs. "Stage 3". The screenshot shows when all stages just say "Stage 1", "Stage 2", etc. When the labels say the stage and number of participants in that stage, such as, "Stage 3:64", then hovering over the node produces "Stage3:6464". Not sure where in the code would fix this issue or what to do.