0

I am implementing a cuda kernel function which will be useful in my project. In this code sample, every thread find the combination based on their index. For example,

int numCols = 20;  
int strength = 3;  // length of combinations

with these inputs, the all combinations will be:

(0,1,2), (0,1,3), (0,1,4), (0,1,5), ..., (17,18,19)

If the thread index (with 1 block, threadIdx.x) is 2, then it will calculate the 2nd combination which is (0,1,4). In the code below, everything is there to do these calculations.

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <iostream>

using namespace std;

__device__ int numCombinations(int n, int k) 
{
    // Number of combinations (k choose out of n)
    if (k * 2 > n) 
        k = n-k;
    if (k == 0) 
        return 1;
    int result = n;
    for( int i = 2; i <= k; ++i ) 
    {
        result *= (n-i+1);
        result /= i;
    }
    return result;
}

__device__ void findCombFromIndex(int *optCom, int index, int numCols, int strength)   
{
    // combination calculation based on index
    int tempIndex, t = 0;
    optCom[0] = 0;
    for (t; t < strength-1; t++)
    {
        tempIndex = numCombinations(numCols-optCom[t]-1, strength-t-1);
        while(index >= tempIndex)
        {
            index -= tempIndex;
            optCom[t]++;
            tempIndex = (tempIndex*(numCols-optCom[t]-strength+t+1))  / (numCols-optCom[t]);
        }
        optCom[t+1] = optCom[t] + 1;
    }
    optCom[t] = optCom[t-1] + index + 1;
}


__global__ void foo(int numCols, int strength)
{
    int index = threadIdx.x;
    int *optComb = new int[3];   // 3 is strength
    int length = 10000;
    for (int i = 0; i < length; i++)
        findCombFromIndex(optComb, index, numCols, strength);

}

int main()
{
    int numCols = 20;
    int strength = 3;

    foo<<<1,100>>>(numCols, strength);
    cudaDeviceSynchronize();


    cudaDeviceReset();
    return 0;
}

When I run this code with small loop lengths, it works fine. However when I increase the size of loop, nvidia stops and give me the error below.

enter image description here

I know there is no point looping same function several times. Instead of giving whole project, I just wanted to present here a small working example to understand why this is the case.

genclik27
  • 323
  • 1
  • 7
  • 18
  • 1
    same GPU is used for both displaying and computation, You can't perform longer computation. To do this you need to increase the default threshold time of GPU computation or use to different GPU one for computation and one for Displaying. – Hemant Gangwar Oct 20 '14 at 12:16
  • @Learner Can you explain detail how to increase the default time and use different GPU for displaying? I am using Visual Studio – genclik27 Oct 20 '14 at 12:36
  • 1
    @Learner is right, to workaround this issue you may find [this SO thread](http://stackoverflow.com/questions/6182151/cuda-program-causes-nvidia-driver-to-crash) helpful. I provided an answer there as well and it worked well for me. – Michal Hosala Oct 20 '14 at 12:46
  • @MichalHosala This seems, it is going to work fine for my case. But I can not restart my pc now. When I do, I let you know the result :) Thanks for the asnwer – genclik27 Oct 20 '14 at 13:00

0 Answers0