If this sort of question has been asked I apologize, link me to the thread please!
Anyhow I am new to CUDA (I'm coming from OpenCL) and wanted to try generating an image with it. The relevant CUDA code is:
__global__
void mandlebrot(uint8_t *pixels, size_t pitch, unsigned long width, unsigned long height) {
unsigned block_size = blockDim.x;
uint2 location = {blockIdx.x*block_size, blockIdx.y*block_size};
ulong2 pixel_location = {threadIdx.x, threadIdx.y};
ulong2 real_location = {location.x + pixel_location.x, location.y + pixel_location.y};
if (real_location.x >= width || real_location.y >= height)
return;
uint8_t *row = (uint8_t *)((char *)pixels + real_location.y * pitch);
row[real_location.x * 4+0] = 0;
row[real_location.x * 4+1] = 255;
row[real_location.x * 4+2] = 0;
row[real_location.x * 4+3] = 255;
}
cudaError_t err = cudaSuccess;
#define CUDA_ERR(e) \
if ((err = e) != cudaSuccess) { \
fprintf(stderr, "Failed to allocate device vector A (error code %s)!\n", cudaGetErrorString(err)); \
exit(-1); \
}
int main(void) {
ulong2 dims = {1000, 1000};
unsigned long block_size = 500;
dim3 threads_per_block(block_size, block_size);
dim3 remainders(dims.x % threads_per_block.x, dims.y % threads_per_block.y);
dim3 blocks(dims.x / threads_per_block.x + (remainders.x == 0 ? 0 : 1), dims.y / threads_per_block.y + (remainders.y == 0 ? 0 : 1));
size_t pitch;
uint8_t *pixels, *h_pixels = NULL;
CUDA_ERR(cudaMallocPitch(&pixels, &pitch, dims.x * 4 * sizeof(uint8_t), dims.y));
mandlebrot<<<blocks, threads_per_block>>>(pixels, pitch, dims.x, dims.y);
h_pixels = (uint8_t *)malloc(dims.x * 4 * sizeof(uint8_t) * dims.y);
memset(h_pixels, 0, dims.x * 4 * sizeof(uint8_t) * dims.y);
CUDA_ERR(cudaMemcpy2D(h_pixels, dims.x * 4 * sizeof(uint8_t), pixels, pitch, dims.x, dims.y, cudaMemcpyDeviceToHost));
save_png("out.png", h_pixels, dims.x, dims.y);
CUDA_ERR(cudaFree(pixels));
free(h_pixels);
CUDA_ERR(cudaDeviceReset());
puts("Success");
return 0;
}
The save_png
function is a usual utility function I created for taking a block of data and saving it to a png:
void save_png(const char *filename, uint8_t *buffer, unsigned long width, unsigned long height) {
png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
if (!png_ptr) {
std::cerr << "Failed to create png write struct" << std::endl;
return;
}
png_infop info_ptr = png_create_info_struct(png_ptr);
if (!info_ptr) {
std::cerr << "Failed to create info_ptr" << std::endl;
png_destroy_write_struct(&png_ptr, NULL);
return;
}
FILE *fp = fopen(filename, "wb");
if (!fp) {
std::cerr << "Failed to open " << filename << " for writing" << std::endl;
png_destroy_write_struct(&png_ptr, &info_ptr);
return;
}
if (setjmp(png_jmpbuf(png_ptr))) {
png_destroy_write_struct(&png_ptr, &info_ptr);
std::cerr << "Error from libpng!" << std::endl;
return;
}
png_init_io(png_ptr, fp);
png_set_IHDR(png_ptr, info_ptr, width, height, 8, PNG_COLOR_TYPE_RGBA, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
png_write_info(png_ptr, info_ptr);
png_byte *row_pnts[height];
size_t i;
for (i = 0; i < height; i++) {
row_pnts[i] = buffer + width * 4 * i;
}
png_write_image(png_ptr, row_pnts);
png_write_end(png_ptr, info_ptr);
png_destroy_write_struct(&png_ptr, &info_ptr);
fclose(fp);
}
Anyways the image that's generated is a weird whiteish strip that's speckled with random colored pixels which can be seen here.
Is there something glaring I did wrong? I tried to follow the introduction documentation on the CUDA site. Otherwise can anyone help me out to fix this? Here I'm simply trying to fill the pixels
buffer with green pixels.
I am using a MBP retina with an NVIDIA GeForce GT 650M discrete graphics card. I can run and paste the output to print_devices
from the cuda sample code if need be.
EDIT: Note no errors or warnings during compilation with the following makefile:
all:
nvcc -c mandlebrot.cu -o mandlebrot.cu.o
nvcc mandlebrot.cu.o -o mandlebrot -lpng
and no errors at runtime.