4

I have a database that has got a month full of datasets in 10min intervals. (So a dataset for every 10min)

Now I want to show that data on three graphs: last 24 hours, last 7 days and last 30 days.

The data looks like this:

{ "data" : 278, "date" : ISODate("2016-08-31T01:51:05.315Z") }
{ "data" : 627, "date" : ISODate("2016-08-31T01:51:06.361Z") }
{ "data" : 146, "date" : ISODate("2016-08-31T01:51:07.938Z") }
// etc

For the 24h graph I simply output the data for the last 24h, that's easy.

For the other graphs I thin the data:

const data = {}; //data from database
let newData = [];
const interval = 7; //for 7 days the interval is 7, for 30 days it's 30

for( let i = 0; i < data.length; i += interval ) {
    newData.push( data[ i ] );
};

This works fine but extreme events where data is 0 or differs greatly from the other values average, can be lost depending on what time you search the data. Not thinning out the data however will result in a large sum of data points that are sent over the pipe and have to be processed on the front end. I'd like to avoid that.

Now to my question

How can I reduce the data for a 7 day period while keeping extremes in it? What's the most efficient way here?

Additions: In essence I think I'm trying to simplify a graph to reduce points but keep the overall shape. (If you look at it from a pure image perspective)

Something like an implementation of Douglas–Peucker algorithm in node? enter image description here

Dominik
  • 6,078
  • 8
  • 37
  • 61
  • 1
    Here are a few implementations of the path simplification algorithm in Javascript: http://stackoverflow.com/questions/22512532/d3-js-how-to-simplify-a-complex-path-using-a-custom-algorithm, http://mourner.github.io/simplify-js/, https://gist.github.com/adammiller/826148, https://gist.github.com/rhyolight/2846020 – ConnorsFan Sep 25 '16 at 03:32
  • Thanks those are fantastic resources. I am struggling with this a bit because X for me is a time stamp and always the same distance but I'll have a stab at it on Monday. – Dominik Sep 25 '16 at 11:56

1 Answers1

3

As you mention in the comments, the Ramer-Douglas-Peucker (RDP) algorithm is used to process data points in 2D figures but you want to use it for graph data where X values are fixed. I modified this Javascript implementation of the algorithm provided by M Oehm to consider only the vertical (Y) distance in the calculations.

On the other hand, data smoothing is often suggested to reduce the number of data points in a graph (see this post by csgillespie).

In order to compare the two methods, I made a small test program. The Reset button creates new test data. An algorithm can be selected and applied to obtain a reduced number of points, separated by the specified interval. In the case of the RDP algorithm however, the resulting points are not evenly spaced. To get the same number of points as for the specified interval, I run the calculations iteratively, adjusting the espilon value each time until the correct number of points is reached.

From my tests, the RDP algorithm gives much better results. The only downside is that the spacing between points varies. I don't think that this can be avoided, given that we want to keep the extreme points which are not evenly distributed in the original data.

Here is the code snippet, which is better seen in Full Page mode:

var svgns = 'http://www.w3.org/2000/svg';
var graph = document.getElementById('graph1');
var grpRawData = document.getElementById('grpRawData');
var grpCalculatedData = document.getElementById('grpCalculatedData');

var btnReset = document.getElementById('btnReset');
var cmbMethod = document.getElementById('cmbMethod');
var btnAddCalculated = document.getElementById('btnAddCalculated');
var btnClearCalculated = document.getElementById('btnClearCalculated');

var data = [];
var calculatedCount = 0;
var colors = ['black', 'red', 'green', 'blue', 'orange', 'purple'];

var getPeriod = function () {
    return parseInt(document.getElementById('txtPeriod').value, 10);
};

var clearGroup = function (grp) {
    while (grp.lastChild) {
        grp.removeChild(grp.lastChild);
    }
};

var showPoints = function (grp, pts, markerSize, color) {
    var i, point;
    for (i = 0; i < pts.length; i++) {
        point = pts[i];
        var marker = document.createElementNS(svgns, 'circle');
        marker.setAttributeNS(null, 'cx', point.x);
        marker.setAttributeNS(null, 'cy', point.y);
        marker.setAttributeNS(null, 'r', markerSize);
        marker.setAttributeNS(null, 'fill', color);
        grp.appendChild(marker);
    }
};

// Create and display test data
var showRawData = function () {
    var i, x, y;
    var r = 0;
    data = [];
    for (i = 1; i < 500; i++) {
        x = i;
        r += 15.0 * (Math.random() * Math.random() - 0.25);
        y = 150 + 30 * Math.sin(x / 200) * Math.sin((x - 37) / 61) + 2 * Math.sin((x - 7) / 11) + r;
        data.push({ x: x, y: y });
    }
    showPoints(grpRawData, data, 1, '#888');
};

// Gaussian kernel smoother
var createGaussianKernelData = function () {
    var i, x, y;
    var r = 0;
    var result = [];
    var period = getPeriod();
    for (i = Math.floor(period / 2) ; i < data.length; i += period) {
        x = data[i].x;
        y = gaussianKernel(i);
        result.push({ x: x, y: y });
    }
    return result;
};

var gaussianKernel = function (index) {
    var halfRange = Math.floor(getPeriod() / 2);
    var distance, factor;
    var totalValue = 0;
    var totalFactor = 0;
    for (i = index - halfRange; i <= index + halfRange; i++) {
        if (0 <= i && i < data.length) {
            distance = Math.abs(i - index);
            factor = Math.exp(-Math.pow(distance, 2));
            totalFactor += factor;
            totalValue += data[i].y * factor;
        }
    }
    return totalValue / totalFactor;
};

// Ramer-Douglas-Peucker algorithm
var ramerDouglasPeuckerRecursive = function (pts, first, last, eps) {
    if (first >= last - 1) {
        return [pts[first]];
    }

    var slope = (pts[last].y - pts[first].y) / (pts[last].x - pts[first].x);

    var x0 = pts[first].x;
    var y0 = pts[first].y;

    var iMax = first;
    var max = -1;
    var p, dy;

    // Calculate vertical distance
    for (var i = first + 1; i < last; i++) {
        p = pts[i];
        y = y0 + slope * (p.x - x0);
        dy = Math.abs(p.y - y);

        if (dy > max) {
            max = dy;
            iMax = i;
        }
    }

    if (max < eps) {
        return [pts[first]];
    }

    var p1 = ramerDouglasPeuckerRecursive(pts, first, iMax, eps);
    var p2 = ramerDouglasPeuckerRecursive(pts, iMax, last, eps);

    return p1.concat(p2);
}

var internalRamerDouglasPeucker = function (pts, eps) {
    var p = ramerDouglasPeuckerRecursive(data, 0, pts.length - 1, eps);
    return p.concat([pts[pts.length - 1]]);
}

var createRamerDouglasPeuckerData = function () {
    var finalPointCount = Math.round(data.length / getPeriod());
    var epsilon = getPeriod();
    var pts = internalRamerDouglasPeucker(data, epsilon);
    var iteration = 0;
    // Iterate until the correct number of points is obtained
    while (pts.length != finalPointCount && iteration++ < 20) {
        epsilon *= Math.sqrt(pts.length / finalPointCount);
        pts = internalRamerDouglasPeucker(data, epsilon);
    }
    return pts;
};

// Event handlers
btnReset.addEventListener('click', function () {
    calculatedCount = 0;
    clearGroup(grpRawData);
    clearGroup(grpCalculatedData);
    showRawData();
});

btnClearCalculated.addEventListener('click', function () {
    calculatedCount = 0;
    clearGroup(grpCalculatedData);
});

btnAddCalculated.addEventListener('click', function () {
    switch (cmbMethod.value) {
        case "Gaussian":
            showPoints(grpCalculatedData, createGaussianKernelData(), 2, colors[calculatedCount++]);
            break;
        case "RDP":
            showPoints(grpCalculatedData, createRamerDouglasPeuckerData(), 2, colors[calculatedCount++]);
            return;
    }
});

showRawData();
div
{
    margin-bottom: 6px;
}
<div>
    <button id="btnReset">Reset</button>&nbsp;
    <select id="cmbMethod">
        <option value="RDP">Ramer-Douglas-Peucker</option>
        <option value="Gaussian">Gaussian kernel</option>
    </select>&nbsp;
    <label for="txtPeriod">Interval: </label>
    <input id="txtPeriod" type="text" style="width: 36px;" value="7" />
</div>
<div>
    <button id="btnAddCalculated">Add calculated points</button>
    <button id="btnClearCalculated">Clear calculated points</button>
</div>
<svg id="svg1" width="765" height="450" viewBox="0 0 510 300">
    <g id="graph1" transform="translate(0,300) scale(1,-1)">
        <rect width="500" height="300" stroke="black" fill="#eee"></rect>
        <g id="grpRawData"></g>
        <g id="grpCalculatedData"></g>
    </g>
</svg>
Community
  • 1
  • 1
ConnorsFan
  • 70,558
  • 13
  • 122
  • 146