1

I want to generate white noise (normal distribution) using CUDA. Below is my attempt.

enter code here

#define SCALE 1.0
#define SHIFT 0.0
#define BLOCKS 64
#define THREADS 64

__global__ void setup_kernel(curandState *state)
{
int id = threadIdx.x + blockIdx.x * blockDim.x;
curand_init(7+id, id, 0, &state[id]);
}

__global__ void generate_normal_kernel(curandState *state, int *result)
{
int id = threadIdx.x + blockIdx.x * blockDim.x;
float x;
curandState localState = state[id];
for(int n = 0; n < 100000; n++) {
x = (curand_normal(&localState) * SCALE)+SHIFT;
}
state[id] = localState;
result[id] = (int) x;
}


int main(int argc, char *argv[])
{
int i;
unsigned int total;
curandState *devStates;
int *devResults, *hostResults;
int device;
struct cudaDeviceProp properties;

CUDA_CALL(cudaGetDevice(&device));
CUDA_CALL(cudaGetDeviceProperties(&properties,device));

hostResults = (int *)calloc(THREADS * BLOCKS, sizeof(int));

CUDA_CALL(cudaMalloc((void **)&devResults, BLOCKS * THREADS *
sizeof(int)));

CUDA_CALL(cudaMemset(devResults, 0, THREADS * BLOCKS *
sizeof(int)));

CUDA_CALL(cudaMalloc((void **)&devStates, THREADS * BLOCKS *
sizeof(curandState)));

setup_kernel<<<BLOCKS, THREADS>>>(devStates);

generate_normal_kernel<<<BLOCKS, THREADS>>>(devStates, devResults);

CUDA_CALL(cudaMemcpy(hostResults, devResults, BLOCKS * THREADS *
sizeof(int), cudaMemcpyDeviceToHost));


I_TCS = ITCSAmp*hostResults;


/* Cleanup */
CUDA_CALL(cudaFree(devStates));
CUDA_CALL(cudaFree(devResults));
free(hostResults);
return EXIT_SUCCESS;
}

===============================================================================

But I got the following errors,

error: identifier "CUDA_CALL" is undefined

error: expression must have arithmetic or enum type

error: expression must have arithmetic or enum type

error: expression must have arithmetic or enum type

warning: variable "total" was declared but never referenced

error: identifier "devStates" is undefined

error: identifier "CUDA_CALL" is undefined

error: identifier "devResults" is undefined

error: identifier "hostResults" is undefined

It thought I defined them already, but obviously it didn't work. If you have any suggestions or know how might I change the code, I will be really thankful for your help!

Vitality
  • 20,705
  • 4
  • 108
  • 146
waynelee1217
  • 113
  • 1
  • 1
  • 9
  • Please show where CUDA_CALL is defined? CUDA_CALL is not a macro provided by the CUDA toolkit. – cklin Mar 31 '14 at 18:22
  • The code you have shown is definitely missing some `#include` compiler directives. If you want help, I would suggest showing the *entire* code you are attempting to compile (surely you are not trying to compile "enter code here", right?), and show the complete command you use to compile, as well as the entire output of the compiler. Then I think someone will be able to help you easily. Right now, the best that can be said is that the code you have shown is missing important pieces. – Robert Crovella Apr 01 '14 at 00:48
  • I'm sorry, Robert. You're right, I missed the #include parts and some other parts of the codes. The reason is I don't have the ownership of the codes so I can't put all the things here. Next time I'll consider this point and avoid this situation. Sorry for that again. – waynelee1217 Apr 03 '14 at 01:06

1 Answers1

2

Please, find below a compilable and executable code generating random numbers with normal distribution in CUDA. It is a modification of the code that you posted above. Some of the changed instructions are commented in their old versions.

I have changed the CUDA_CALL to gpuErrchk according to What is the canonical way to check for errors using the CUDA runtime API?.

I think you have misinterpreted the curand_init syntax and fixed it. Also, the setup_kernel kernel missed a seed, so that I have added it.

I have simplified your generate_normal_kernel kernel: I believe that the for loop repeteadly calculating x is undeeded.

curand_normal returns floats, not ints, and indeed a normal distribution of integers is underfined. I have changed the relevant variable types accordingly.

#include <stdio.h>
#include <curand.h>
#include <curand_kernel.h>
#include <time.h>

#define SCALE 1.0f
#define SHIFT 0.0f
#define BLOCKS 64
#define THREADS 64

/***********************/
/* CUDA ERROR CHECKING */
/***********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
    if (code != cudaSuccess) 
    {
        fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); 
        if (abort) exit(code);
    }
}

/*************************/
/* CURAND INITIALIZATION */
/*************************/
__global__ void setup_kernel(unsigned long seed, curandState *state)
{
    int id = threadIdx.x + blockIdx.x * blockDim.x;
    curand_init(seed, id, 0, &state[id]);
//  curand_init(7+id, id, 0, &state[id]);
}

/*****************************************/
/* RANDOM DISTRIBUTION GENERATION KERNEL */
/*****************************************/
__global__ void generate_normal_kernel(curandState *state, float *result)
{
    int id = threadIdx.x + blockIdx.x * blockDim.x;
    result[id] = (curand_normal(&state[id])*SCALE)+SHIFT;
}

/********/
/* MAIN */
/********/
void main()
{
    float* hostResults = (float*)calloc(THREADS * BLOCKS, sizeof(float));

    float *devResults; gpuErrchk(cudaMalloc((void**)&devResults, BLOCKS * THREADS * sizeof(float)));
    gpuErrchk(cudaMemset(devResults, 0, THREADS * BLOCKS * sizeof(float)));

    curandState *devStates; gpuErrchk(cudaMalloc((void **)&devStates, THREADS * BLOCKS * sizeof(curandState)));

    setup_kernel<<<BLOCKS, THREADS>>>(time(NULL),devStates);
    gpuErrchk(cudaPeekAtLastError());
    gpuErrchk(cudaDeviceSynchronize());

    generate_normal_kernel<<<BLOCKS, THREADS>>>(devStates, devResults);
    gpuErrchk(cudaPeekAtLastError());
    gpuErrchk(cudaDeviceSynchronize());

    gpuErrchk(cudaMemcpy(hostResults, devResults, BLOCKS * THREADS * sizeof(float), cudaMemcpyDeviceToHost));

    for (int i=0; i<THREADS*BLOCKS; i++) printf("rand[%i] = %f\n", i, hostResults[i]);

    /* Cleanup */
    gpuErrchk(cudaFree(devStates));
    gpuErrchk(cudaFree(devResults));

    free(hostResults);

    getchar();
 }
Community
  • 1
  • 1
Vitality
  • 20,705
  • 4
  • 108
  • 146