I am trying to use omp to accelerate my mexfunction. However, it crashes Matlab every time. I am sorry for the long code. However, I don't know where comes the mistake, so I have to post all of them. The code works well without using omp.
#include <math.h>
#include <vector>
#include <algorithm>
#include "mex.h"
#include <omp.h>
using namespace std;
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
/* Macros for the ouput and input arguments */
#define T1_OUT plhs[0]
#define T2_OUT plhs[1]
#define point_IN prhs[0]
#define PlaneLocation_IN prhs[1]
#define SearchRadius_IN prhs[2]
#define NeighborIndex_IN prhs[3]
#define D_IN prhs[4]
#define pi 3.14159265358979
double *point,*PL,R,*NI,*T1,*T2,*tempPr,*DP2P;
int i,j,k;
int NumOfPoints;
int NumOfNeighbors;
double vector2[3];
double vector1[3];
double cross[3];
double V;
vector<int> idIN;
vector<double> GPStime;
vector<double> LineInBallLength;
// define input pointers.
point = mxGetPr(point_IN);
PL = mxGetPr(PlaneLocation_IN);
R = mxGetScalar(SearchRadius_IN);
NI = mxGetPr(NeighborIndex_IN);
DP2P = mxGetPr(D_IN);
V = 4.0/3.0*pi*R*R*R;
NumOfPoints = mxGetM(point_IN);
NumOfNeighbors = mxGetN(NeighborIndex_IN);
T1_OUT = mxCreateDoubleMatrix(NumOfPoints,1,mxREAL);
T2_OUT = mxCreateDoubleMatrix(NumOfPoints,1,mxREAL);
T1 = mxGetPr(T1_OUT);
T2 = mxGetPr(T2_OUT);
double Dist;
double Coef;
double Noise;
#pragma omp parallel
{
#pragma omp for //if I comment this line and the two lines above, the code works OK.
for(i=0;i<NumOfPoints;i++){
double pointX = point[i+ NumOfPoints];
double pointY = point[i + NumOfPoints *2];
double pointZ = point[i + NumOfPoints * 3];
idIN.clear();
GPStime.clear();
for(j=0;j<NumOfPoints;j++){
vector2[0] = pointX - point[j+ NumOfPoints];
vector2[1] = pointY - point[j + NumOfPoints * 2];
vector2[2] = pointZ - point[j + NumOfPoints * 3];
vector1[0] = pointX - PL[j];
vector1[1] = pointY - PL[j + NumOfPoints];
vector1[2] = pointZ - PL[j + NumOfPoints * 2];
cross[0] = vector2[1]*vector1[2] - vector2[2]*vector1[1];
cross[1] = vector2[2]*vector1[0] - vector2[0]*vector1[2];
cross[2] = vector2[0]*vector1[1] - vector2[1]*vector1[0];
Dist = sqrt(cross[0]*cross[0] + cross[1]*cross[1] + cross[2]*cross[2]) / DP2P[j];
if (Dist <=R){
idIN.push_back(j);
GPStime.push_back(point[j]);
LineInBallLength.push_back(2*sqrt(R*R - Dist*Dist));
}
}
// Calculate Coef
sort(GPStime.begin(),GPStime.end());
GPStime.erase(unique(GPStime.begin(),GPStime.end()),GPStime.end());
Coef = double(GPStime.size()) * 100 / double(idIN.size());
Noise = 0;
for(k=0;k<idIN.size();k++){
Noise = Noise + LineInBallLength[k]*point[4*NumOfPoints+idIN[k]] / V * Coef;
}
T1[i] = Noise;
}
}
return;
}
Also I tried another short example using omp and it works. The computation time is about one fourth of the original time since my computer has four cores. Can anyone help me with this, thanks. By the way, I am very new in writing C code, so any other suggestions about the C code is appreciated.
Update: I tried to revise the omp part as following
#pragma omp parallel for shared(point, DP2P, R, PL) private(pointX,pointY,pointZ,idIN,GPStime,vector1,vector2,cross,Dist, LineInBallLength,Noise)
Right now, matlab will not crash and can get a result. However, this result is not the same as the one without omp. For most of the result values, they are close to the right one (result without omp) and some of them are quite different from the right ones.