I'm attempting to write a reduction function in cuda (this is an exercise, I know that I'm doing things which have been done better by other people) which takes a binary associative operator and an array and reduces the array using the operator.
I'm having difficulty with how to pass the function. I've written hostOp() as a host based example which works fine.
deviceOp() works for the first statement with an explicit call to fminf(), but when I call the function parameter, there is an illegal memory access error.
#include <iostream>
#include <cstdio>
#include <cmath>
using namespace std; //for brevity
__device__ float g_d_a = 9, g_d_b = 5;
float g_h_a = 9, g_h_b = 5;
template<typename argT, typename funcT>
__global__
void deviceOp(funcT op){
argT result = fminf(g_d_a, g_d_b); //works fine
printf("static function result: %f\n", result);
result = op(g_d_a,g_d_b); //illegal memory access
printf("template function result: %f\n", result);
}
template<typename argT, typename funcT>
void hostOp(funcT op){
argT result = op(g_h_a, g_h_b);
printf("template function result: %f\n", result);
}
int main(int argc, char* argv[]){
hostOp<float>(min<float>); //works fine
deviceOp<float><<<1,1>>>(fminf);
cudaDeviceSynchronize();
cout<<cudaGetErrorString(cudaGetLastError())<<endl;
}
OUTPUT:
host function result: 5.000000
static function result: 5.000000
an illegal memory access was encountered
Assuming I'm not doing something horribly stupid, how should I be passing fminf to deviceOp so that there isn't an illegal memory access?
If I am doing something horribly stupid, what is a better way?