1

I would like to manually reproduce the method that authors of an article used in their research (DOI: 10.1038/s41598-017-02750-9 (Page 8. top)). It is mentioned as "ACF", so I wrote different functions:

1, a version based on a youtube video (https://youtu.be/ZjaBn93YPWo?t=417) using Alglibs Pearson correlation coefficient function

2, then another version based on the formula that is described in the article mentioned above

3, then another version based on the simplified formula described at an online ACF calculator page (https://planetcalc.com/7908/)

4, then a version based on the longer formula described there (https://planetcalc.com/7908/)

=> Yet, all of these give different output. However, method 3. is consistent with the output coming from the online calculator ran in my browser: https://planetcalc.com/7884/?d=.bTkjs.ymyQ8blXMoYiMgIOOmzhhI4fnckel.J5yEDWtV89Gz32Ch0kse2s

My code is here:

#include <iostream>

#define _WIN32_WINNT 0x0500
#include<windows.h>
//#include <cmath>

#include "alglib/alglibinternal.h"
#include "alglib/alglibmisc.h"
#include "alglib/ap.h"
#include "alglib/dataanalysis.h"
#include "alglib/diffequations.h"
#include "alglib/fasttransforms.h"
#include "alglib/integration.h"
#include "alglib/interpolation.h"
#include "alglib/linalg.h"
#include "alglib/optimization.h"
#include "alglib/solvers.h"
#include "alglib/specialfunctions.h"
#include "alglib/statistics.h"
#include "alglib/stdafx.h"

using namespace std;


double* normalize(double* _arr, int _s) {
    double* output = new double[_s];
    double mod = 0.0;

    for (size_t i = 0; i < _s; ++i)
        mod += _arr[i] * _arr[i];

    double mag = sqrt(mod); //TODO: if 0, throw exc
    double mag_inv = 1.0 / mag;

    for (size_t i = 0; i < _s; ++i)
        output[i] = _arr[i] * mag_inv;

    return output;
}

void doACFyoutube(double* _ina, int _s)
// https://youtu.be/ZjaBn93YPWo?t=417   => the most unefficient, but understandable method
{
    double* temp_x;
    double* temp_y;

    double* ACFoutput = new double[_s];
    for(int shift = 0; shift < _s; shift++)
    {
        temp_x = new double[_s-shift];
        temp_y = new double[_s-shift];

        for(int cpy = 0; cpy < _s-shift; cpy++)
        {
            temp_x[cpy] = _ina[cpy];
            temp_y[cpy] = _ina[cpy+shift];
        }

        temp_y = normalize(temp_y, _s-shift); //not sure if needed //TODO: leak

        alglib::real_1d_array temp_x_alglib;
        alglib::real_1d_array temp_y_alglib;

        temp_x_alglib.setcontent(_s-shift, temp_x);
        temp_y_alglib.setcontent(_s-shift, temp_y);

        ACFoutput[shift] = alglib::pearsoncorr2(temp_x_alglib, temp_y_alglib); //Pearson product-moment correlation coefficient

        delete temp_x;
        delete temp_y;
    }

    for(int i=0; i<_s; i++)
        cout << " lag = " << i << "\tACF(lag) = " << ACFoutput[i] << endl;
}

void doACFgoal(double* _ina, int _s)
// DOI: 10.1038/s41598-017-02750-9    => page 8, first equation (my goal is to reproduce this)
{
    double mean = 0; //mean
    for(int a = 0; a < _s; a++ )
      mean += _ina[a];
    mean /= _s;

    double var = 0; //variance
    for(int b = 0; b < _s; b++ )
      var += (_ina[b]-mean)*(_ina[b]-mean);
    var /= _s-1; //needed? (-1) a.k.a. Bessell's correction ?

    double* ACFoutput = new double[_s];
    for(int i = 0; i < _s; i++)
    {
        double temp_sum = 0;

        for(int j = 1; j <= _s-i; j++)
            temp_sum += (_ina[j]-mean)*(_ina[j+i]-mean);

        ACFoutput[i] = (double)1/(((double)_s-(double)i)*var*var) * temp_sum;
    }

    for(int i=0; i<_s; i++)
        cout << " lag = " << i << "\tACF(lag) = " << ACFoutput[i] << endl;
}

void doACFplanetcalcCoarse(double* _ina, int _s)
// https://planetcalc.com/7908/
{
    double mean = 0; //mean
    for(int a = 0; a < _s; a++ )
      mean += _ina[a];
    mean /= _s;

    double* ACFoutput = new double[_s];
    for(int i = 0; i < _s; i++)
    {
        double temp_sum1 = 0;
        double temp_sum2 = 0;

        for(int j = 0; j < _s-i; j++)
            temp_sum1 += (_ina[j]-mean)*(_ina[j+i]-mean);

        for(int k = 0; k < _s; k++)
            temp_sum2 += (_ina[k]-mean)*(_ina[k]-mean);

        ACFoutput[i] = temp_sum1 / temp_sum2;
    }

    for(int i=0; i<_s; i++)
        cout << " lag = " << i << "\tACF(lag) = " << ACFoutput[i] << endl;
}

void doACFplanetcalcFine(double* _ina, int _s)
// https://planetcalc.com/7908/     => gives different output than the online calculator script, even though uses the longer formula described there
{
    double* ACFoutput = new double[_s];
    for(int k = 0; k < _s; k++)
    {
        double mean1 = 0;    //mean of first N-k values
        for(int a = 0; a < _s-k; a++ )
          mean1 += _ina[a];
        mean1 /= _s-k;
 //       cout << "\t mean of first N-" << k << " values = " << mean1 << endl;

        double mean2 = 0;    //mean of last N-k values
        for(int a = k; a < _s; a++ )
          mean2 += _ina[a];
        mean2 /= _s-k;
 //       cout << "\t mean of last N-" << k << " values = " << mean2 << endl;


        double temp_sum1 = 0;
        double temp_sum2 = 0;
        double temp_sum3 = 0;

        for(int i = 0; i < _s-k; i++)
        {
            temp_sum1 += (_ina[i]-mean1)*(_ina[i+k]-mean2);
 //           cout << "\t\t temp_sum1 (" << i << ") = " << temp_sum1 << endl;
        }
//        cout << "\t temp_sum1 = " << temp_sum1 << endl;

        for(int i = 0; i < _s-k; i++)
        {
            temp_sum2 += (_ina[i]-mean2)*(_ina[i]-mean2); //pow2
 //           cout << "\t\t temp_sum2 (" << i << ") = " << temp_sum2 << endl;
        }
 //       cout << "\t temp_sum2 = " << temp_sum2 << endl;

        for(int i = 0; i < _s-k; i++)
        {
            temp_sum3 += (_ina[i+k]-mean2)*(_ina[i+k]-mean2); //pow2
 //           cout << "\t\t temp_sum3 (" << i << ") = " << temp_sum3 << endl;
        }
 //       cout << "\t temp_sum3 = " << temp_sum3 << endl;

        ACFoutput[k] = temp_sum1 / (sqrt(temp_sum2)*sqrt(temp_sum3));
    }

    for(int i=0; i<_s; i++)
        cout << " lag = " << i << "\tACF(lag) = " << ACFoutput[i] << endl;
}

int main()
{
    //fullscreenhez
    HWND hWnd = GetConsoleWindow();
    ShowWindow(hWnd,SW_SHOWMAXIMIZED);

    double ina[15] = {2,3,4,5,4,3,4,5,6,7,6,5,4,3,4}; //15 elem
    for(int x=0; x<15; x++)
        cout << ina[x] << ",";
    cout << endl;

    cout << endl;
    // https://youtu.be/ZjaBn93YPWo?t=417   => the most unefficient, but understandable method
    doACFyoutube(ina, 15);          // ??? result doesn't match any other

    cout << endl;
    // DOI: 10.1038/s41598-017-02750-9    => page 8, first equation (my goal is to reproduce this)
    doACFgoal(ina, 15);             // ??? result doesn't match any other

    cout << endl;
    // https://planetcalc.com/7908/ (simplified formula)
    doACFplanetcalcCoarse(ina, 15); //result equals to the online calculator result: https://planetcalc.com/7884/?_d=.bTkjs.ymyQ8blXMoYiMgIOOmzhhI4fnckel.J5yEDWtV89Gz32Ch0kse2s_

    cout << endl;
    // https://planetcalc.com/7908/ (longer formula)
    doACFplanetcalcFine(ina, 15);   // ??? result doesn't match any other

    return 0;
}

The output looks like this:

Outputs of different ACF functions I wrote

As I do not have the original data they used in the publication, I can only rely on how consistent the output of my program is related to other codes output. But these outputs are different, and I do not know why. Could you please have a look at the code and help me end up in four equal outputs?

(Codeblocks project zipped here: https://drive.google.com/file/d/1s3SeJSiDgk-hiMazp94HfFerL582VG2K/view?usp=sharing)

  • Good question, this (trying to reconcile published results with your own or with each other) is something that happens all the time and it's often difficult. You might want to take this question to stats.stackexchange.com as it's more about definitions than programming, I suspect. – Robert Dodier Mar 31 '21 at 18:46

0 Answers0