I have implemented the Mandelbrot set in Cuda. When I input the height and width present in the attached code I get this error by running the cuda-memcheck command. What is it caused by? I believe that it may be due to an overflow error of the index index of the output result vector, but I would not understand why I would have the error in this case, and when I input other values, this does not happen (for example, when i have a height= 16384 * 4 and width=8192 * 4). Thanks everyone for your time.
#include <iostream>
static void
writePPM( int *buf, int width, int height, int Max_Iterations, const char *fn) {
FILE *fp = fopen(fn, "wb");
fprintf(fp, "P6\n");
fprintf(fp, "%d %d\n", width, height);
fprintf(fp, "255\n");
for (int i = 0; i < width*height; ++i) {
// Map the iteration count to colors by just alternating between
// two greys.
char c = (buf[i]== Max_Iterations) ? char(255): 20;
for (int j = 0; j < 3; ++j)
fputc(c, fp);
}
fclose(fp);
printf("Wrote image file %s\n", fn);
}
__device__ static int mandel(float c_re, float c_im, int count) {
float z_re = c_re, z_im = c_im;
int i;
for (i = 0; i < count; ++i) {
if (z_re * z_re + z_im * z_im > 4.f)
break;
float new_re = z_re*z_re - z_im*z_im;
float new_im = 2.f * z_re * z_im;
z_im = c_im + new_im;
z_re = c_re + new_re;
}
return i;
}
__global__ void kernel (float x0, float y0, float x1, float y1,
int width, int height, int maxIterations,
int *output)
{
int w= blockIdx.x*blockDim.x+threadIdx.x;
int h= blockIdx.y*blockDim.y+threadIdx.y;
float dx =(x1 - x0) / width;
float dy =(y1 - y0) / height;
if (h<height && w<width) {
//for (int i = 0; i < width; ++i) {
float x = x0 + w * dx;
float y = y0 + h * dy;
int index= (width*h+w);
output[index] = mandel(x, y, maxIterations);
//}
}
}
int main(){
unsigned int width =16384*8;
unsigned int height=8192*8;
float x0 = -2;
float x1 = 1;
float myy0 = -1;
float myy1 = 1;
uint32_t maxIterations = 1024;
size_t THREADSPERBLOCK = 1024;
size_t THREADSPERBLOCK_X = 256;
size_t THREADSPERBLOCK_Y = THREADSPERBLOCK / THREADSPERBLOCK_X;
int *buf_h= (int *)malloc(width * height * sizeof(unsigned int));
int *buf= (int *) malloc (width *height *sizeof(unsigned int));
int num_blocks_x= (width + THREADSPERBLOCK_X-1)/THREADSPERBLOCK_X;
int num_blocks_y=(height + THREADSPERBLOCK_Y-1)/THREADSPERBLOCK_Y;
cudaEvent_t start, stop;
float streamElapsedTime;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord( start, 0 );
int *buf_d;
int *buff;
cudaMalloc ((void **) & buf_d,width * height * sizeof(unsigned int));
cudaMalloc ((void**) & buff, width *height * sizeof(unsigned int));
cudaMemcpy (buf_d,buf_h,width * height * sizeof(unsigned int),cudaMemcpyHostToDevice);
dim3 gridDims (num_blocks_x,num_blocks_y);
dim3 blockDims(THREADSPERBLOCK_X,THREADSPERBLOCK_Y);
kernel<<<gridDims,blockDims>>>(x0,myy0,x1,myy1,width,height,maxIterations,buf_d);
cudaMemcpy( buf,buf_d, width*height*sizeof(unsigned int), cudaMemcpyDeviceToHost );
writePPM(buf, width, height,maxIterations, "mandelbrot-parallel.ppm");
cudaEventRecord( stop, 0);
cudaEventSynchronize( stop);
cudaEventElapsedTime( &streamElapsedTime, start, stop );
cudaEventDestroy( start);
cudaEventDestroy( stop );
cudaFree(buf_d);
cudaFree(buff);
free(buf_h);
free(buf);
printf("\nCUDA stream elapsed time: %f", streamElapsedTime);
return 0;
}**strong text**
Running with cuda-memcheck I have this error reported several times:
Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2fe) [0x28ccce]
========= Host Frame:./Mandelbrot [0x1d3eb]
========= Host Frame:./Mandelbrot [0x3a63e]
========= Host Frame:./Mandelbrot [0x678c]
========= Host Frame:./Mandelbrot [0x6655]
========= Host Frame:./Mandelbrot [0x66cd]
========= Host Frame:./Mandelbrot [0x63af]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xe7) [0x21b97]
========= Host Frame:./Mandelbrot [0x5efa]
=========
========= Invalid __global__ write of size 4
========= at 0x00000310 in kernel(float, float, float, float, int, int, int, int*)
========= by thread (176,3,0) in block (92,0,0)
========= Address 0x001972c0 is out of bounds
========= Device Frame:kernel(float, float, float, float, int, int, int, int*) (kernel(float, float, float, float, int, int, int, int*) : 0x310)
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2fe) [0x28ccce]
========= Host Frame:./Mandelbrot [0x1d3eb]
========= Host Frame:./Mandelbrot [0x3a63e]
========= Host Frame:./Mandelbrot [0x678c]
========= Host Frame:./Mandelbrot [0x6655]
========= Host Frame:./Mandelbrot [0x66cd]
========= Host Frame:./Mandelbrot [0x63af]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xe7) [0x21b97]
========= Host Frame:./Mandelbrot [0x5efa]
=========
At the end:
Program hit cudaErrorLaunchFailure (error 4) due to "unspecified launch failure" on CUDA API call to cudaEventSynchronize.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x3a0403]
========= Host Frame:./Mandelbrot [0x4a9a0]
========= Host Frame:./Mandelbrot [0x641a]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xe7) [0x21b97]
========= Host Frame:./Mandelbrot [0x5efa]
=========
========= Program hit cudaErrorLaunchFailure (error 4) due to "unspecified launch failure" on CUDA API call to cudaEventElapsedTime.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x3a0403]
========= Host Frame:./Mandelbrot [0x4a641]
========= Host Frame:./Mandelbrot [0x6434]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xe7) [0x21b97]
========= Host Frame:./Mandelbrot [0x5efa]
=========
========= Program hit cudaErrorLaunchFailure (error 4) due to "unspecified launch failure" on CUDA API call to cudaEventDestroy.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x3a0403]
========= Host Frame:./Mandelbrot [0x4a7f0]
========= Host Frame:./Mandelbrot [0x6440]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xe7) [0x21b97]
========= Host Frame:./Mandelbrot [0x5efa]
=========
========= Program hit cudaErrorLaunchFailure (error 4) due to "unspecified launch failure" on CUDA API call to cudaEventDestroy.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x3a0403]
========= Host Frame:./Mandelbrot [0x4a7f0]
========= Host Frame:./Mandelbrot [0x644c]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xe7) [0x21b97]
========= Host Frame:./Mandelbrot [0x5efa]
=========
========= Program hit cudaErrorLaunchFailure (error 4) due to "unspecified launch failure" on CUDA API call to cudaFree.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x3a0403]
========= Host Frame:./Mandelbrot [0x48350]
========= Host Frame:./Mandelbrot [0x6458]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xe7) [0x21b97]
========= Host Frame:./Mandelbrot [0x5efa]
=========
========= Program hit cudaErrorLaunchFailure (error 4) due to "unspecified launch failure" on CUDA API call to cudaFree.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x3a0403]
========= Host Frame:./Mandelbrot [0x48350]
========= Host Frame:./Mandelbrot [0x6464]
========= Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xe7) [0x21b97]
========= Host Frame:./Mandelbrot [0x5efa]