0

Good afternoon, I get the following error if I push nx, ny and nz above a certain threshold, for example 100.

Primary job terminated normally, but 1 process returned a non-zero exit code. 
Per user-direction, the job has been aborted.
mpiexec noticed that process rank 0 with PID 0 on node debian exited on signal 11 (Segmentation fault).

If I stay below the thresholds everything works fine.

The piece of code which generate the error is in the following function:

void dealiasing(int nx, int ny, int nz, int nxd, int nzd, FFT_SCALAR *U) {

int nz_left = 1+ (nz-1)/2 ;
int i, stride_y, stride_z, reader=0, last_index;
for ( stride_z = 0; stride_z < nz_left*ny*nxd*2; stride_z = stride_z + ny*nxd*2) {
    for ( stride_y = 0; stride_y < ny*nxd*2; stride_y = stride_y + nxd*2) {
        for ( i = 0; i < (nx)*2; i++) {
            U[reader] = U[stride_z + stride_y+i];
            reader++;
        }
    }
    last_index = stride_z + stride_y;
}

for ( stride_z = (nzd - nz_left+1)*nxd*ny*2; stride_z < nzd*ny*nxd*2; stride_z = stride_z + ny*nxd*2) {
    for ( stride_y = 0; stride_y < ny*nxd*2; stride_y = stride_y + nxd*2) { 
        for ( i = 0; i < (nx)*2; i++) {
            U[reader] = U[stride_z + stride_y+i];
            reader++;
        }
    }
    last_index = stride_z + stride_y;
}


//Save positive modes
double U_pos[2*nx*ny*(1+(nz-1)/2)];
memmove(U_pos, U, sizeof(FFT_SCALAR)*2*nx*ny*(1+(nz-1)/2)); 
memmove(U, &U[2*nx*ny*(1+(nz-1)/2)], sizeof(FFT_SCALAR)*2*nx*ny*((nz-1)/2));
memmove(&U[2*nx*ny*((nz-1)/2)], U_pos, sizeof(FFT_SCALAR)*2*nx*ny*(1+(nz-1)/2));

In particular if I turn off the last 4 rows of the function the code works also at nx,ny,nz higher than 100. The intent of these rows is to move U entries in such a manner that the ending block of values is remapped as the starting ones and viceversa. FFT_SCALAR is like double.

The function is called after a series of MPI_Gatherv and MPI_Barrier, enclosed in the block

// Alloc memory for the global output
nfast=nxd;  nmid=ny; nslow=nzd;
FFT_SCALAR *UU, *UV, *VV, *VW, *WW, *UW;
UU = (FFT_SCALAR*) malloc( nfast*nmid*nslow*2* sizeof(FFT_SCALAR));
UV = (FFT_SCALAR*) malloc( nfast*nmid*nslow*2* sizeof(FFT_SCALAR));
VV = (FFT_SCALAR*) malloc( nfast*nmid*nslow*2* sizeof(FFT_SCALAR));
VW = (FFT_SCALAR*) malloc( nfast*nmid*nslow*2* sizeof(FFT_SCALAR));
WW = (FFT_SCALAR*) malloc( nfast*nmid*nslow*2* sizeof(FFT_SCALAR));
UW = (FFT_SCALAR*) malloc( nfast*nmid*nslow*2* sizeof(FFT_SCALAR));

// Gather all data on rank 0
MPI_Gatherv( u, receive[rank], MPI_DOUBLE, U, scounts, displs, MPI_DOUBLE, 0, MPI_COMM_WORLD); // @suppress("Symbol is not resolved")
MPI_Barrier(MPI_COMM_WORLD); // @suppress("Symbol is not resolved")

MPI_Gatherv( uu, receive[rank], MPI_DOUBLE, UU, scounts, displs, MPI_DOUBLE, 0, MPI_COMM_WORLD); // @suppress("Symbol is not resolved")
MPI_Barrier(MPI_COMM_WORLD); // @suppress("Symbol is not resolved")
MPI_Gatherv( uv, receive[rank], MPI_DOUBLE, UV, scounts, displs, MPI_DOUBLE, 0, MPI_COMM_WORLD); // @suppress("Symbol is not resolved")
MPI_Barrier(MPI_COMM_WORLD); // @suppress("Symbol is not resolved")
MPI_Gatherv( vv, receive[rank], MPI_DOUBLE, VV, scounts, displs, MPI_DOUBLE, 0, MPI_COMM_WORLD); // @suppress("Symbol is not resolved")
MPI_Barrier(MPI_COMM_WORLD); // @suppress("Symbol is not resolved")
MPI_Gatherv( vw, receive[rank], MPI_DOUBLE, VW, scounts, displs, MPI_DOUBLE, 0, MPI_COMM_WORLD); // @suppress("Symbol is not resolved")
MPI_Barrier(MPI_COMM_WORLD); // @suppress("Symbol is not resolved")
MPI_Gatherv( ww, receive[rank], MPI_DOUBLE, WW, scounts, displs, MPI_DOUBLE, 0, MPI_COMM_WORLD); // @suppress("Symbol is not resolved")
MPI_Barrier(MPI_COMM_WORLD); // @suppress("Symbol is not resolved")
MPI_Gatherv( uw, receive[rank], MPI_DOUBLE, UW, scounts, displs, MPI_DOUBLE, 0, MPI_COMM_WORLD); // @suppress("Symbol is not resolved")
MPI_Barrier(MPI_COMM_WORLD); // @suppress("Symbol is not resolved")


 /**************************************** Dealias and Transpose dataset ****************************************/
 if (rank == 0) {
  dealiasing( nx, ny, nz, nxd, nzd, U);

  dealiasing( nx, ny, nz, nxd, nzd, UU);
  dealiasing( nx, ny, nz, nxd, nzd, UV);
  dealiasing( nx, ny, nz, nxd, nzd, VV);
  dealiasing( nx, ny, nz, nxd, nzd, VW);
  dealiasing( nx, ny, nz, nxd, nzd, WW);
  dealiasing( nx, ny, nz, nxd, nzd, UW);
}
Mirco
  • 165
  • 2
  • 13

1 Answers1

2

If I rewrite these to lines:

double U_pos[2*nx*ny*(1+(nz-1)/2)];
memmove(U_pos, U, sizeof(FFT_SCALAR)*2*nx*ny*(1+(nz-1)/2)); 

to

#define SOME_SIZE (2*nx*ny*(1+(nz-1)/2))

double U_pos[SOME_SIZE];
memmove(U_pos, U, sizeof(FFT_SCALAR)*SOME_SIZE);

it seems that you overwrite memory - unless sizeof(FFT_SCALAR) is 1 - because you move more bytes into U_pos than it can hold.

Another problem could be that U_pos is a VLA that (in most implementations) will be allocated on the stack. As nx, ny, nz gets large, the U_pos array get very large and you may have a stack overflow.

Support Ukraine
  • 42,271
  • 4
  • 38
  • 63
  • It's likely that is a stack overflow as you told me, because the arrays are huge. What can I do to avoid it? I've read that do a dynamic allocation on the array could help, is it true? Sorry for the questions but I'm a newby and I'm trying to understand where are my faults and why – Mirco Jan 22 '19 at 17:13
  • @Mirco yes, dynamic allocation is better for huge arrays. Like: `double *U_pos = malloc(THE_ELEMTS_YOU_NEED * sizeof *U_pos)` – Support Ukraine Jan 22 '19 at 17:25
  • @Mirco Try `double *U_pos = malloc(sizeof(*U_pos) * SOME_SIZE);` Don't forget to do `free(U_pos);` at the end of the function. All the rest should be the same. – Craig Estey Jan 22 '19 at 17:25
  • you can try `ulimit -s` before running your app – Gilles Gouaillardet Jan 23 '19 at 07:23