I am trying to plot a bubble chart in d3.js using a file that is updated every few seconds and I'm having trouble with a few parts of the code.
The whole data-to-visualization pipeline is described below:
- Data is gathered, processed and saved using a dataproc cluster on GCP
- Data is retrieved from GCP and saved to a local machine as a .csv file every 10 seconds
- The .csv file is used to create a d3.js visualization - the visualization is updated using d3's setInterval().
A sample of the data is shown below:
Tag | Count | Date | |
---|---|---|---|
0 | python | 1 | 12/06 18:15:40 |
1 | pandas | 1 | 12/06 18:15:40 |
2 | java | 1 | 12/06 18:15:40 |
3 | html | 1 | 12/06 18:15:40 |
4 | c++ | 1 | 12/06 18:15:40 |
The issues I'm facing are numbered below:
Ideally, I would like to be able to get the data and store just that new data (i.e. overwrite the old file) when saving it on the GCP end. I'm not able to do that because for some reason it seems that something is causing the script to clear out variables in memory (potentially?). I say this because when I run the code below, the csv variable is reset each time. To remedy this, I'm appending all data gathered so that I have all the data.
Currently, it seems that every time that I run the update function, the circles are replotted (which is what is happening - I understand) but I would like to figure out how to get the circles that I already have and transition them so that they show their new values and only draw new circles for the new data. I'm unable to achieve that with my current code even though I've tried many different ways to transition the data.
I was expecting to be able to update the csv variable in memory so that I would not have to reprocess the data every time. I would also prefer not to append the file and pull the complete file each iteration. Furthermore, the every iteration the visuals are a complete wipe of the canvas and replotting rather than a smooth transition.
Here is my code and a couple of pictures of the visualization at different times. Any suggestion would be extremely helpful.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Real-Time Tags Analysis</title>
<script src="https://d3js.org/d3.v4.js"></script>
<script src="//d3js.org/d3-scale-chromatic.v0.3.min.js"></script>
</head>
<body>
<h1>Real-Time Tags Analysis</h1>
<div id="dataviz"></div>
<script>
//defining constant for the visualization
const width = window.innerWidth, height = window.innerHeight, sizeDivisor = 0.1, nodePadding = 2.75, topTags = 12;
//defining svg canvas
let svg = d3.select("body")
.append("svg")
.attr("width", width)
.attr("height", height);
//color palette
let color = d3.scaleSequential().domain([1,200]).interpolator(d3.interpolateViridis);
//simulation instantiation
let simulation = d3.forceSimulation()
.force("forceX", d3.forceX().strength(.1).x(width * .5))
.force("forceY", d3.forceY().strength(.1).y(height * .5))
.force("center", d3.forceCenter().x(width * .5).y(height * .5))
.force("charge", d3.forceManyBody().strength(-15));
//global variables
let csv = {}, tag, temp = {}, oldData;
//initial bubble chart plotting
d3.csv("bigquery_data.csv", function(error, data) {
if (error) throw error;
oldData = data;
//adding new data from file to the csv variable
data.forEach(function(d) {
tag = d['tag'];
if(csv[tag] == undefined) {
csv[tag] = +d['count']
} else {
csv[tag]+= +d['count'];
}
});
//converting the csv variable to d3 entries list for visualization and defining more values
let csvlist = d3.entries(csv);
csvlist.forEach(function(d) {
d.value = +d.value;
d.size = +d.value / sizeDivisor;
d.size < 3 ? d.radius = 3 : d.radius = d.size;
return d;
});
// sort the nodes so that the bigger ones are at the back
csvlist = csvlist.sort(function(a,b) { return b.size - a.size; });
//update the simulation based on the data
simulation
.nodes(csvlist)
.force("collide", d3.forceCollide().strength(0.5).radius(function(d) { return d.radius + nodePadding; }).iterations(5))
.on("tick", function(d){
circle.attr("cx", function(d){ return d.x; })
.attr("cy", function(d){ return d.y; })
text.attr("dx", function(d) { return d.x; })
.attr("dy", function(d) { return d.y; })
});
//setting up nodes
let node = svg.selectAll("circle")
.data(csvlist)
.attr("class", "node")
.enter().append("g")
.call(d3.drag()
.on("start", dragstarted)
.on("drag", dragged)
.on("end", dragended));
//appending circles to nodes
let circle = node.append("circle")
.data(csvlist)
.attr("r", function(d) { return d.radius; })
.attr("fill", function(d) { return color(d.index); })
.attr("cx", function(d){ return d.x; })
.attr("cy", function(d){ return d.y; });
//appending text to nodes
let text = node.append("text")
.attr("class", "text")
.data(csvlist)
.attr("dx", function(d) { return d.x; })
.attr("dy", function(d) { return d.y + d.size/5; })
.text(function(d) { return d.key; })
.style("font-size", function(d) {return d.size/2 > 8 ? d.size/2 : 8; })
.style("font-family", "Helvetica")
.style("text-anchor", "middle");
});
//helper functions for simulation
function dragstarted(d) {
if (!d3.event.active) simulation.alphaTarget(.03).restart();
d.fx = d.x;
d.fy = d.y;
}
function dragged(d) {
d.fx = d3.event.x;
d.fy = d3.event.y;
}
function dragended(d) {
if (!d3.event.active) simulation.alphaTarget(.03);
d.fx = null;
d.fy = null;
}
// update function which is similar to the original plot with conditional exit
function update() {
d3.csv("bigquery_data.csv", function(error, data) {
if (error) throw error;
// exit if no new data
if (data[data.length - 1].date == oldData[oldData.length - 1].date ) {
return;
}
// update for next conditional exit
oldData = data;
// updating the csv variable
data.forEach(function(d) {
tag = d['tag'];
if(csv[tag] == undefined) {
csv[tag] = +d['count'];
} else {
csv[tag] += +d['count'];
}
});
// converting to d3 entries list and adding other required data for visualization
let csvlist = d3.entries(csv);
csvlist.forEach(function(d) {
d.value = +d.value;
d.size = +d.value / sizeDivisor;
d.size < 3 ? d.radius = 3 : d.radius = d.size;
return d;
});
// sort the nodes so that the bigger ones are at the back
csvlist = csvlist.sort(function(a,b) { return b.size - a.size; });
//update the simulation based on the data
simulation
.nodes(csvlist)
.force("collide", d3.forceCollide().strength(0.5).radius(function(d) { return d.radius + nodePadding; }).iterations(1))
.on("tick", function(d) {
circle.attr("cx", function(d){ return d.x; })
.attr("cy", function(d){ return d.y; })
text.attr("dx", function(d) { return d.x; })
.attr("dy", function(d) { return d.y; })
});
let node = svg.selectAll(".node")
.data(csvlist)
.transition()
.duration(100)
.ease(d3.easeLinear);
node.enter().append("g")
.attr("class", "node")
.call(d3.drag()
.on("start", dragstarted)
.on("drag", dragged)
.on("end", dragended));
let circle = node.append("circle")
.data(csvlist)
.attr("r", function(d) { return d.radius; })
.attr("fill", function(d) { return color(d.index); })
.attr("cx", function(d){ return d.x; })
.attr("cy", function(d){ return d.y; });
let text = node.append("text")
.attr("class", "text")
.data(csvlist)
.attr("dx", function(d) { return d.x; })
.attr("dy", function(d) { return d.y + d.size/5; })
.text(function(d) { return d.key; })
.style("font-size", function(d) {return d.size/2 > 8 ? d.size/2 : 8; })
.style("font-family", "Helvetica")
.style("text-anchor", "middle");
});
function dragstarted(d) {
if (!d3.event.active) simulation.alphaTarget(.03).restart();
d.fx = d.x;
d.fy = d.y;
}
function dragged(d) {
d.fx = d3.event.x;
d.fy = d3.event.y;
}
function dragended(d) {
if (!d3.event.active) simulation.alphaTarget(.03);
d.fx = null;
d.fy = null;
}
}
// code to update the visualization every 10seconds
let inter = setInterval(function() {update()}, 10000);
</script>
</body>
</html>