I'm trying to convert following c code in CUDA.... In following code I've array of elements [12 ,13, 23, 24, 25 ]... I want output like [123, 234, 234, 245 ].. You'll get the logic by looking at the code...
#include <stdio.h>
#define N 5
int main(){
int i,j;
int array[N] = { 12, 13, 23, 24, 25};
int new_array[(N-1)*N/2] = { 0 };
int k=0;
for ( i = 0; i < N; i++)
for( j = i+1; j < N; j++)
{
if((array[i]-array[i]%10) == (array[j]-array[j]%10))
{
new_array[i*N+(j-(i+1))-(i)*(i+1)/2] = array[i]*10 + array[j]%10;
k++;
}
}
for ( i = 0; i < (N-1)*N/2; i++) printf("new_array[%d] = %d\n", i, new_array[i]);
return 0;
}
Now problem comes when i try it to convert into CUDA kernel... I m getting something in output but i dont know how to choose selective elements and store it in the array...
#include <stdio.h>
#define N 5
__global__ void kernel(int* new_array_d, int* array_d) {
int i = threadIdx.x;
int j = threadIdx.y;
if (j > i && (array_d[i]-(array_d[i]%10)) == (array_d[j]-(array_d[j]%10)))
new_array_d[i*N+(j-(i+1))-(i)*(i+1)/2] = array_d[i]*10 + (array_d[j]%10);
}
int main(){
int array[N] = { 12, 13, 23, 24, 25};
int new_array[N-1)*N/2] = { 0 };
int* array_d; cudaMalloc((void**)&array_d,N*sizeof(int));
int* new_array_d; cudaMalloc((void**)&new_array_d,(N-1)*N/2*sizeof(int));
cudaMemcpy(array_d,array,N*sizeof(int),cudaMemcpyHostToDevice);
dim3 grid(1,1);
dim3 block(N,N);
kernel<<<grid,block>>>(new_array_d,array_d);
cudaMemcpy(new_array,new_array_d,(N-1)*N/2*sizeof(int),cudaMemcpyDeviceToHost);
for (int i = 0; i < (N-1)*N/2; i++) printf("new_array[%d] = %d\n", i, new_array[i]);
return 0;
}
Should i try for different index or do it on the CPU only...?? Please help me... thnks in advance...
please Note : I'm trying to combine elements 12 and 13 only if the number except unit place is same... 12 and 13 ....(have 1 in common)...do 12*10+(13%10) = 123
23 and 25 .....(have 2 in common)...do 23*10+(25%10) = 235...