I try to perform a QR factorization on GPU using the cusolver library from CUDA.
I reduced my problem to the example below.
Basically, the few steps are :
- I allocate memory and initialized a [5x3] matrix with 1s on the host,
- I allocate memory and copy the matrix on the device
- I initialize the solver handler with
cusolverDnCreate
- I determine the size of the needed work space with
cusolverDnDgeqrf_bufferSize
- And, finally, try to do the QR factorization with
cusolverDnDgeqrf
Unfortunately, the last command systematically fails by returning a CUSOLVER_STATUS_EXECUTION_FAILED
(int value = 6) and I can't figure out what went wrong!
Here is the faulty code:
#include <cusolverDn.h>
#include <cuda_runtime_api.h>
int main(void)
{
int N = 5, P = 3;
double *hostData;
cudaMallocHost((void **) &hostData, N * sizeof(double));
for (int i = 0; i < N * P; ++i)
hostData[i] = 1.;
double *devData;
cudaMalloc((void**)&devData, N * sizeof(double));
cudaMemcpy((void*)devData, (void*)hostData, N * sizeof(double), cudaMemcpyHostToDevice);
cusolverStatus_t retVal;
cusolverDnHandle_t solverHandle;
retVal = cusolverDnCreate(&solverHandle);
std::cout << "Handler creation : " << retVal << std::endl;
double *devTau, *work;
int szWork;
cudaMalloc((void**)&devTau, P * sizeof(double));
retVal = cusolverDnDgeqrf_bufferSize(solverHandle, N, P, devData, N, &szWork);
std::cout << "Work space sizing : " << retVal << std::endl;
cudaMalloc((void**)&work, szWork * sizeof(double));
int *devInfo;
cudaMalloc((void **)&devInfo, 1);
retVal = cusolverDnDgeqrf(solverHandle, N, P, devData, N, devTau, work, szWork, devInfo); //CUSOLVER_STATUS_EXECUTION_FAILED
std::cout << "QR factorization : " << retVal << std::endl;
int hDevInfo = 0;
cudaMemcpy((void*)devInfo, (void*)&hDevInfo, 1 * sizeof(int), cudaMemcpyDeviceToHost);
std::cout << "Info device : " << hDevInfo << std::endl;
cudaFree(devInfo);
cudaFree(work);
cudaFree(devTau);
cudaFree(devData);
cudaFreeHost(hostData);
cudaDeviceReset();
}
Would you see any obvious error in my code, please let me know! Many thanks.