I am implementing a cuda kernel function which will be useful in my project. In this code sample, every thread find the combination based on their index. For example,
int numCols = 20;
int strength = 3; // length of combinations
with these inputs, the all combinations will be:
(0,1,2), (0,1,3), (0,1,4), (0,1,5), ..., (17,18,19)
If the thread index (with 1 block, threadIdx.x) is 2, then it will calculate the 2nd combination which is (0,1,4). In the code below, everything is there to do these calculations.
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <iostream>
using namespace std;
__device__ int numCombinations(int n, int k)
{
// Number of combinations (k choose out of n)
if (k * 2 > n)
k = n-k;
if (k == 0)
return 1;
int result = n;
for( int i = 2; i <= k; ++i )
{
result *= (n-i+1);
result /= i;
}
return result;
}
__device__ void findCombFromIndex(int *optCom, int index, int numCols, int strength)
{
// combination calculation based on index
int tempIndex, t = 0;
optCom[0] = 0;
for (t; t < strength-1; t++)
{
tempIndex = numCombinations(numCols-optCom[t]-1, strength-t-1);
while(index >= tempIndex)
{
index -= tempIndex;
optCom[t]++;
tempIndex = (tempIndex*(numCols-optCom[t]-strength+t+1)) / (numCols-optCom[t]);
}
optCom[t+1] = optCom[t] + 1;
}
optCom[t] = optCom[t-1] + index + 1;
}
__global__ void foo(int numCols, int strength)
{
int index = threadIdx.x;
int *optComb = new int[3]; // 3 is strength
int length = 10000;
for (int i = 0; i < length; i++)
findCombFromIndex(optComb, index, numCols, strength);
}
int main()
{
int numCols = 20;
int strength = 3;
foo<<<1,100>>>(numCols, strength);
cudaDeviceSynchronize();
cudaDeviceReset();
return 0;
}
When I run this code with small loop lengths, it works fine. However when I increase the size of loop, nvidia stops and give me the error below.
I know there is no point looping same function several times. Instead of giving whole project, I just wanted to present here a small working example to understand why this is the case.