I have a Model class that holds data for a model and runs several functions on that data. The details are probably not too important except that it has the following design:
- Variables are stored in the class namespace.
- Variables are initialized and freed by one of the class's methods.
- Variables are used by several other methods.
A MWE of the class appears as follows:
#include <cstdlib>
class Model {
private:
int width;
int height;
int size;
int nshift[8]; //Offset from a focal cell's index to its neighbours
double *restrict h; //Digital elevation model (height)
int *restrict rec; //Index of receiving cell
const int NO_FLOW = -1;
const double SQRT2 = 1.414213562373095048801688724209698078569671875376948;
const double dr[8] = {1,SQRT2,1,SQRT2,1,SQRT2,1,SQRT2};
private:
void GenerateRandomTerrain(){
//srand(std::random_device()());
for(int y=0;y<height;y++)
for(int x=0;x<width;x++){
const int c = y*width+x;
h[c] = rand()/(double)RAND_MAX;
}
}
public:
Model(const int width0, const int height0)
: nshift{-1,-width0-1,-width0,-width0+1,1,width0+1,width0,width0-1}
{
width = width0;
height = height0;
size = width*height;
h = new double[size];
GenerateRandomTerrain();
}
~Model(){
delete[] h;
}
private:
void FindDownstream(){
//! computing receiver array
#pragma acc parallel loop collapse(2) independent present(h,rec,width,height)
for(int y=2;y<height-2;y++)
for(int x=2;x<width-2;x++){
const int c = y*width+x;
//The slope must be greater than zero for there to be downhill flow;
//otherwise, the cell is marekd NO_FLOW
double max_slope = 0;
int max_n = NO_FLOW;
#pragma acc loop seq
for(int n=0;n<8;n++){
double slope = (h[c] - h[c+nshift[n]])/dr[n];
if(slope>max_slope){
max_slope = slope;
max_n = n;
}
}
rec[c] = max_n;
}
}
public:
void run(const int nstep){
rec = new int[size];
#pragma acc enter data copyin(h[0:size],nshift[0:8],height,width,this) create(rec[0:size])
for(int step=0;step<=nstep;step++)
FindDownstream();
#pragma acc exit data copyout(h[0:size]) delete(this,rec)
delete[] rec;
}
};
int main(int argc, char **argv){
Model model(300,300);
model.run(100);
return 0;
}
When I compile with:
pgc++ -acc -ta=tesla,pinned,cc60 -Minfo=accel -fast test.cpp -std=c++11
I get the following warning:
51, Loop without integer trip count will be executed in sequential mode
Complex loop carried dependence of rec->,nshift prevents parallelization
Loop carried dependence of rec-> prevents parallelization
Loop carried backward dependence of rec-> prevents vectorization
Some digging on the internet reveals that a typical cause of this is the potential for pointer aliasing to cause dependencies.
I have tried to use *restrict
and independent
(as shown) to tell the compiler everything is alright, but it ignores me and does not parallelize the loop.
Passing pointers as arguments to the function with appropriate use of restrict
eliminates the error, but I have an aesthetic preference against this design. Alternatively, all the methods, which are each, essentially, a kernel, could be strung together in the run()
function, but again, this is not desirable.
If I use independent
on the inner loop, I get:
PGCC-W-0155-inner loop of tiled/collapsed loop nest should not have another loop directive (actual_code.cpp: 227)
But the loop does appear to parallelize.
I am compiling with PGI 17.9.