2

In a cuda application, I need filling a matrix with random values ​​in an interval between a and b.

I used a code already available on the net, using CURAND, but I can not modify it to produce values ​​between a and b.

Code is as follows:

// Fill the array A(nr_rows_A, nr_cols_A) with random numbers on GPU
void GPU_fill_rand(float *A, int nr_rows_A, int nr_cols_A)
{
    // Create a pseudo-random number generator
    curandGenerator_t prng;
    curandCreateGenerator(&prng, CURAND_RNG_PSEUDO_XORWOW);

    // Set the seed for the random number generator using the system clock
    curandSetPseudoRandomGeneratorSeed(prng, (unsigned long long) clock());

    // Fill the array with random numbers on the device
    curandGenerateUniform(prng, A, nr_rows_A * nr_cols_A);
}


int main(void)
{
    // Variables declaration
    float   *hst_Mat ,
        *dev_Mat;

    int Height = 3 ;
    int Width  = 10 ;
    int vSize = Height*Width ;
    int mSize = sizeof(float)*vSize ;

    hst_Mat = (float *)malloc(mSize) ;
    cudaMalloc((void**)&dev_Mat, mSize) ;

    memset(hst_Mat, 0, mSize) ;
    cudaMemset(dev_Mat, 0, mSize) ;

    // Print initial matrix
    cout << " * Initial matrix : " << endl << "\t" ;
    for(int i=0 ;i<Height ; i++)
    {
        for(int j=0 ; j<Width ; j++)
            cout << "\t" << hst_Mat[i*Width+j] ;
        cout << endl << "\t" ;
    }
    cout << endl << endl ;

//
// Cuda kernel invoke
//
    // Initializing device state for random generator
    GPU_fill_rand(dev_Mat, Height, Width) ;

    // Retrieving data from device
    cudaMemcpy(hst_Mat, dev_Mat, mSize, cudaMemcpyDeviceToHost) ;

//
// Print result matrix
//
    cout << " * Result matrix : " << endl << "     " ;
    for(int i=0 ;i<Height ; i++)
    {
        for(int j=0 ; j<Width ; j++)
            cout << "   " << hst_Mat[i*Width+j] ;
        cout << endl << "     " ;
    }
    cout << endl << endl ;

    // FREE MEMORY
    free(hst_Mat) ;
    cudaFree(dev_Mat) ;

    system("pause") ;

    return 0;
}

But it generate a true random value in [0 and 1].

How to do this?

Cœur
  • 37,241
  • 25
  • 195
  • 267
kmaniche
  • 45
  • 1
  • 6
  • 1
    Just write a simple kernel in which you use `t[k]=(b-a)*t[k]+a`, where `t` is the output of `cuRAND`? – Vitality Jan 14 '14 at 11:54

1 Answers1

2

Try this code

#include <curand.h>
#include <conio.h>
#include <iostream>

using namespace std;

int iDivUp(int a, int b) { return ((a % b) != 0) ? (a / b + 1) : (a / b); }

void GPU_fill_rand(float *A, int nr_rows_A, int nr_cols_A)
{
    curandGenerator_t prng;
    curandCreateGenerator(&prng, CURAND_RNG_PSEUDO_XORWOW);

    curandSetPseudoRandomGeneratorSeed(prng, (unsigned long long) clock());

    curandGenerateUniform(prng, A, nr_rows_A * nr_cols_A);
}


__global__ void generate_in_a_b(float *A, float a, float b, int nr_rows_A, int nr_cols_A) {

    int tid = threadIdx.x + blockIdx.x * blockDim.x;

    if (tid < nr_rows_A*nr_cols_A) A[tid] = (b-a) * A[tid] + a;

}

int main(void)
{
    float   *hst_Mat , *dev_Mat;

    int Height = 3 ;
    int Width  = 10 ;
    int vSize = Height*Width ;
    int mSize = sizeof(float)*vSize ;

    hst_Mat = (float *)malloc(mSize) ;
    cudaMalloc((void**)&dev_Mat, mSize) ;

    memset(hst_Mat, 0, mSize) ;
    cudaMemset(dev_Mat, 0, mSize) ;

    GPU_fill_rand(dev_Mat, Height, Width) ;

    dim3 threads(32);
    dim3 blocks(iDivUp(Height*Width, 32));

    float a = 3.f; 
    float b = 7.f;

    generate_in_a_b<<<blocks,threads>>>(dev_Mat,a,b,Height,Width);

    cudaMemcpy(hst_Mat, dev_Mat, mSize, cudaMemcpyDeviceToHost) ;

    cout << " * Result matrix : " << endl << "     " ;
    for(int i=0 ;i<Height ; i++)
    {
        for(int j=0 ; j<Width ; j++)
            cout << "   " << hst_Mat[i*Width+j] ;
            cout << endl << "     " ;
    }
    cout << endl << endl ;

    free(hst_Mat) ;
    cudaFree(dev_Mat) ;

    system("pause") ;

    return 0;
}

It will return uniformly spaced numbers in [a,b]=[3.f,7.f].

Please, add CUDA error checking according to What is the canonical way to check for errors using the CUDA runtime API?.

Community
  • 1
  • 1
Vitality
  • 20,705
  • 4
  • 108
  • 146
  • This version is for test, the productive one I add the error check. – kmaniche Jan 14 '14 at 13:09
  • Your answer is working correctly , but I'm looking for solution directly using the **curandGenerator_t**. I didn't find it !! – kmaniche Jan 14 '14 at 13:10
  • 1
    @kmaniche: Believe us when we say that you need error checking also during testing and development :) You save yourself a bunch of time by adding error checking early instead of late. – Roger Dahl Jan 16 '14 at 00:28
  • The error checking is now integrated in my source code, by using this post [link](http://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api) – kmaniche Mar 25 '14 at 19:47