I am trying to use opencl for the first time, the goal is to calculate the argmin of each row in an array. Since the operation on each row is independent of the others, I thought this would be easy to put on the graphics card.
I seem to get worse performance using this code than when i just run the code on the cpu with an outer forloop, any help would be appreciated.
Here is the code:
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
int argmin(global double *array, int end)
{
double minimum = array[0];
int index;
for (int j = 0; j < end; j++)
{
if (array[j] < minimum)
{
minimum = array[j];
index = j;
}
}
return index;
}
kernel void execute(global double *dist, global long *res, global double *min_dist)
{
int row_size = 0;
int i = get_global_id(0);
int row_index = i * row_size;
res[i] = argmin(&dist[row_index], row_size);
min_dist[i] = dist[res[i] + row_index];
}