I need to generate many random numbers. I have found a lot of documentation on how to generate one array of random numbers. But, I need to generate 4 independent arrays of random numbers. For example, generate independently the components of a 4-vector (x, y, z, t). With the following code I generate one array:
#include <iostream>
#include <stdio.h>
#include <curand.h>
#include <curand_kernel.h>
using namespace std;
//Give a randState to each CUDA thread from which it can sample from
__global__ void init_rng(unsigned int seed, curandState* state)
{
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
curand_init(seed, idx, 0, &state[idx]);
}
__global__ void gen_x(curandState *state, float* x)
{
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
curandState localState = state[idx];
x[idx] = curand_uniform(&localState);
state[idx] = localState;
}
int main(void)
{
long int N = 1E+6;
int threadsPerBlock = 1024;
int nBlocks = (N + threadsPerBlock - 1) / threadsPerBlock;
float *x;
//Allocate Unified Memory – accessible from CPU or GPU
cudaMallocManaged((void**)&x, N*sizeof(float));
// Create a device pointer on the host, to hold the random states
curandState *d_state;
//Malloc number of states equal to number of threads
cudaMalloc((void**)&d_state, N*sizeof(curandState));
// Init the random states
init_rng<<<nBlocks, threadsPerBlock>>>(12345, d_state);
gen_x<<<nBlocks, threadsPerBlock>>>(d_state, x);
}
How could I also generate other independent random arrays, say "y", "z" and "t" without any correlation between them? Should I create 4 different kernels for each one initiallized to a different seed reusing the curandstate, or is there a way to do it on the same kernel? Thank you!