I've written the following code for generating a Julia set fractal in CUDA C/C++ with some help from online sources. I've been trying for hours now, but I'm unable to figure out as to why this always generates a grey image rather than the one I get when I run the CPU code. I'm new to CUDA C and parallel programming, and I'm currently referring to CUDA by Example by Sanders and Kandrot.
Here is the CPU variant of code, which runs fine with all the necessary imports in VS2013:
/*
References:
[1] http://stackoverflow.com/questions/23711681/generating-custom-color-palette-for-julia-set
[2] http://www.cs.rit.edu/~ncs/color/t_convert.html
*/
#include <stdio.h>
#include <stdlib.h>
#include <complex>
#include <string.h>
#include <IL/il.h>
#include <IL/ilu.h>
#include <time.h>
using namespace std;
#define N 1024
#define SQRT_2 1.4142
#define MAX_ITER 512
void HSVtoRGB( float *r, float *g, float *b, float h, float s, float v );
void saveImage(int width, int height, unsigned char * bitmap, complex<float> seed);
void compute_julia(complex<float> c, unsigned char * image);
int main(int argc, char **argv)
{
complex<float> c(0.285f, 0.01f);
if(argc > 2)
{
c.real(atof(argv[1]));
c.imag(atof(argv[2]));
} else
fprintf(stderr, "Usage: %s <real> <imag>\nWhere <real> and <imag> form the complex seed for the Julia set.\n", argv[0]);
ilInit();
unsigned char *image = new unsigned char[N*N*3]; //RGB image
compute_julia(c, image);
saveImage(N, N, image, c);
delete[] image;
}
void compute_julia(complex<float> c, unsigned char * image)
{
complex<float> z_old(0.0f, 0.0f);
complex<float> z_new(0.0f, 0.0f);
for(int y=0; y<N; y++)
for(int x=0; x<N; x++)
{
z_new.real(4.0f * x / (N) - 2.0f);
z_new.imag(4.0f * y / (N) - 2.0f);
int i;
for(i=0; i<MAX_ITER; i++)
{
z_old.real(z_new.real());
z_old.imag(z_new.imag());
z_new = pow(z_new, 2);
z_new += c;
if(norm(z_new) > 4.0f) break;
}
float brightness = (i<MAX_ITER) ? 1.0f : 0.0f;
float hue = (i % MAX_ITER)/float(MAX_ITER - 1);
hue = (120*sqrtf(hue) + 150);
float r, g, b;
HSVtoRGB(&r, &g, &b, hue, 1.0f, brightness);
image[(x + y*N)*3 + 0] = (unsigned char)(b*255);
image[(x + y*N)*3 + 1] = (unsigned char)(g*255);
image[(x + y*N)*3 + 2] = (unsigned char)(r*255);
}
}
void saveImage(int width, int height, unsigned char * bitmap, complex<float> seed)
{
ILuint imageID = ilGenImage();
ilBindImage(imageID);
ilTexImage(width, height, 1, 3, IL_RGB, IL_UNSIGNED_BYTE, bitmap);
//ilEnable(IL_FILE_OVERWRITE);
char imageName[256];
sprintf(imageName, "Julia %.3f + i%.3f.png", seed.real(), seed.imag());
ilSave(IL_PNG, imageName);
fprintf(stderr, "Image saved as: %s\n", imageName);
}
// r,g,b values are from 0 to 1
// h = [0,360], s = [0,1], v = [0,1]
// if s == 0, then h = -1 (undefined)
void HSVtoRGB( float *r, float *g, float *b, float h, float s, float v )
{
int i;
float f, p, q, t;
if( s == 0 ) {
// achromatic (grey)
*r = *g = *b = v;
return;
}
h /= 60; // sector 0 to 5
i = floor( h );
f = h - i; // factorial part of h
p = v * ( 1 - s );
q = v * ( 1 - s * f );
t = v * ( 1 - s * ( 1 - f ) );
switch( i ) {
case 0:
*r = v;
*g = t;
*b = p;
break;
case 1:
*r = q;
*g = v;
*b = p;
break;
case 2:
*r = p;
*g = v;
*b = t;
break;
case 3:
*r = p;
*g = q;
*b = v;
break;
case 4:
*r = t;
*g = p;
*b = v;
break;
default: // case 5:
*r = v;
*g = p;
*b = q;
break;
}
}
And here is the corresponding GPU version (note that it is pretty unrefined at the moment, I'll do that once I'm able to get basic functionality out of it):
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <stdlib.h>
#include <complex>
#include <string.h>
#include <IL/il.h>
#include <IL/ilu.h>
#include <time.h>
/*
References:
[1] http://stackoverflow.com/questions/23711681/generating-custom-color-palette-for-julia-set
[2] http://www.cs.rit.edu/~ncs/color/t_convert.html
*/
using namespace std;
#define N 1024
#define SQRT_2 1.4142
#define MAX_ITER 512
struct cuComplex {
float r;
float i;
__host__ __device__ cuComplex(float a, float b) : r(a), i(b) {}
__host__ __device__ float magnitude2(void) {
return r * r + i * i;
}
__host__ __device__ cuComplex operator*(const cuComplex& a) {
return cuComplex(r*a.r - i*a.i, i*a.r + r*a.i);
}
__host__ __device__ cuComplex operator+(const cuComplex& a) {
return cuComplex(r + a.r, i + a.i);
}
};
void HSVtoRGB(float *r, float *g, float *b, float h, float s, float v);
void saveImage(int width, int height, unsigned char * bitmap, cuComplex seed);
void compute_julia(complex<float> c, unsigned char * image);
__global__ void compute_julia_gpu(unsigned char* image);
__device__ void HSVtoRGB_GPU(float *r, float *g, float *b, float h, float s, float v);
int main(int argc, char **argv)
{
cuComplex c(-0.8f, 0.156f);
/*
if (argc > 2)
{
c.real(atof(argv[1]));
c.imag(atof(argv[2]));
}*/
fprintf(stderr, "Usage: %s <real> <imag>\nWhere <real> and <imag> form the complex seed for the Julia set.\n", argv[0]);
ilInit();
dim3 grid(N, N);
unsigned char *image = new unsigned char[N*N * 3]; //RGB image
size_t size = sizeof(image);
unsigned char *d_image; //RGB image
cudaMalloc((void **)&d_image, size);
compute_julia_gpu<<<grid, 1>>>(d_image);
cudaMemcpy(image, d_image, size, cudaMemcpyDeviceToHost);
saveImage(N, N, image, c);
cudaFree(d_image);
delete[] image;
}
__global__ void compute_julia_gpu(unsigned char* image) {
/*
complex<float> z_old(0.0f, 0.0f);
complex<float> z_new(0.0f, 0.0f);
complex<float> c(-0.8f, 0.156f);
*/
cuComplex z_old(0.0, 0.0);
cuComplex z_new(0.0, 0.0);
cuComplex c(-0.8f, 0.156f);
int x = blockIdx.x;
int y = blockIdx.y;
z_new.r = (4.0f * x / (N)-2.0f);
z_new.i = (4.0f * y / (N)-2.0f);
int i = 0;
for (i = 0; i<MAX_ITER; i++)
{
z_old.r = z_new.r;
z_old.i = z_new.i;
z_new = (z_new * z_new) + c;
if (z_new.magnitude2() > 4.0f) break;
}
float brightness = (i<MAX_ITER) ? 1.0f : 0.0f;
float hue = (i % MAX_ITER) / float(MAX_ITER - 1);
hue = (120 * sqrtf(hue) + 150);
float r, g, b;
HSVtoRGB_GPU(&r, &g, &b, hue, 1.0f, brightness);
image[(x + y*N) * 3 + 0] = (unsigned char)(b * 255);
image[(x + y*N) * 3 + 1] = (unsigned char)(g * 255);
image[(x + y*N) * 3 + 2] = (unsigned char)(r * 255);
}
void saveImage(int width, int height, unsigned char * bitmap, cuComplex seed)
{
ILuint imageID = ilGenImage();
ilBindImage(imageID);
ilTexImage(width, height, 1, 3, IL_RGB, IL_UNSIGNED_BYTE, bitmap);
//ilEnable(IL_FILE_OVERWRITE);
char imageName[256];
sprintf(imageName, "Julia %.3f + i%.3f.png", seed.r, seed.i);
ilSave(IL_PNG, imageName);
fprintf(stderr, "Image saved as: %s\n", imageName);
}
__device__ void HSVtoRGB_GPU(float *r, float *g, float *b, float h, float s, float v)
{
int i;
float f, p, q, t;
if (s == 0) {
// achromatic (grey)
*r = *g = *b = v;
return;
}
h /= 60; // sector 0 to 5
i = floor(h);
f = h - i; // factorial part of h
p = v * (1 - s);
q = v * (1 - s * f);
t = v * (1 - s * (1 - f));
switch (i) {
case 0:
*r = v;
*g = t;
*b = p;
break;
case 1:
*r = q;
*g = v;
*b = p;
break;
case 2:
*r = p;
*g = v;
*b = t;
break;
case 3:
*r = p;
*g = q;
*b = v;
break;
case 4:
*r = t;
*g = p;
*b = v;
break;
default: // case 5:
*r = v;
*g = p;
*b = q;
break;
}
}
Any help is appreciated, thanks.