When calling a Fortran subroutine from a C++ function, and the C++ function is called inside an OpenMP parallel for construct, the Fortran subroutine returns different values from time to time. It is a blackbox subroutine that should return the same result with the same input (50 arguments). I parallelized the subroutine call in order to run it for hundreds of different input combinations. If I run the program twice and print the results from each subroutine execution, the results are not the same.
Details about the problem :
- The serial version is consistent and working fine, giving the same answer all the time;
- The subroutine does not use pseudo random generated numbers;
- The subroutine calls other subroutines in the same .F90 file;
- There is no nesting and no openmp pragmas or includes inside fortran subroutines;
- If I try to use OpenMP API functions inside Fortran subroutines they return gibberish;
- I am using -fautomatic, -fopenmp and -frecursive when compiling with gfortran (btw I am using gcc 5.2.0) and all subroutines were made RECURSIVE. Everything is compiling and linking fine and the problem does appear when I run the .exe.
- The Fortran subroutines do not access I/O. All variables are passed through arguments. There are no COMMON or SAVED blocks. All subroutines use dummy arguments and output variables are explicitly initialized inside every subroutine;
- I am not using any OpenMP clauses with #pragma omp parallel for.
- The number of discrepancies between results is reduced if the number of threads is smaller than the number of available processors. Binding threads with processors does not solve the problem.
The code is huge, but I have managed to simplify it in an example to illustrate the problem :
//StdAfx.h
#include "other.h"
#include <omp.h>
//...many other includes
//first.cpp
#include "StdAfx.h"
typedef struct
{
float x[51];
float result;
} A;
typedef A *B;
B old=NULL;
int size;
float weight;
int var;
int main()
{
size = 100;
old = new (nothrow) A[size];
long* control=NULL;
control = new long[size];
int kk;
//...
//changing some control[] values to -1
var = 5; weight = 0.7;
//...
#pragma omp parallel for
for (kk=0; kk<=size-1; kk++)
{
if (control[kk]>-1) old[kk].result = calcresult(old[kk].x,kk);
}
...
delete [] old;
oldpop = NULL;
delete [] control;
control = NULL;
}
float calcresult(float *x, int k)
{
int dev=0;
double kresult;
dev = 10;
kresult = othercalcresult(&x[0],k);
kresult += (weight*dev*double(1.0/var));
return(kresult);
}
//other.h
float othercalcresult(float *x, int anyk=0);
//other.cpp
extern "C" {
void _stdcall fdlf_(int VET[93],int *N, double *extresult);
}
double anothercalcresult(float *x, int *iVet)
{
int iN=1;
double extresult=0.0;
//stuff here
//original fortran subroutine has 50 arguments
fdlf_(iVet,&iN,&extresult);
return(extresult);
}
float othercalcresult(float *x, int anyk=0)
{
unsigned int i,ii;
float otherresult=0.0;
int ulimit;
//ulimit = 10;
//iVet is a two dimensional array iVet
int** iVet = new int*[numcenarios_anaprog_local];
for (ii=0; ii<ulimit; ii++) iVet[ii]=new int[93];
//initialize new vector
for (i=0; i<ulimit; i++)
for (ii=0; ii<93; ii++)
iVet[i][ii]=(100*i)+ii;
double* partialresult=NULL;
partialresult= new double[ulimit];
for (int jj=0;jj<ulimit;jj++) partialresult[jj] = 0.0;
//stuff here
for (i=0;i<ulimit;i++) partialresult[i] = anothercalcresult(x,iVet[i])
for (i=0;i<ulimit;i++) otherresult+=float(partialresult[i]);
return(otherresult);
}
//EXT.F90
RECURSIVE SUBROUTINE AUXSUB1(N,VALUE1)
INTEGER N
REAL*8 VALUE1
VALUE1 = 1 / (2 ** N)
RETURN
END SUBROUTINE AUXSUB1
RECURSIVE SUBROUTINE AUXSUB2(N,VALUE2)
INTEGER N
REAL*8 VALUE2
VALUE2 = 1 / (3 ** N)
RETURN
END SUBROUTINE AUXSUB2
RECURSIVE SUBROUTINE FDLF(VET,N,EXTRESULT)
INTEGER VET(93),N
REAL*8 VALUE1, VALUE2, EXTRESULT
VALUE1 = 0.
VALUE2 = 0.
EXTRESULT = 0.0
CALL AUXSUB1(N,VALUE1)
CALL AUXSUB2(N,VALUE2)
DO I=1,93
IF I.LT.47 THEN
EXTRESULT = EXTRESULT + VALUE1
ELSE
EXTRESULT = EXTRESULT + VALUE2
END IF
END DO
EXTRESULT = 1 / EXTRESULT
RETURN
END SUBROUTINE FDLF