10

CUDA runtime has a convenience function cudaGetErrorString(cudaError_t error) that translates an error enum into a readable string. cudaGetErrorString is used in the CUDA_SAFE_CALL(someCudaFunction()) macro that many people use for CUDA error handling.

I'm familiarizing myself with cuBLAS now, and I'd like to create a macro similar to CUDA_SAFE_CALL for cuBLAS. To make my macro's printouts useful, I'd like to have something analogous to cudaGetErrorString in cuBLAS.

Is there an equivalent of cudaGetErrorString() in cuBLAS? Or, have any cuBLAS users written a function like this?

solvingPuzzles
  • 8,541
  • 16
  • 69
  • 112

5 Answers5

10

In CUDA 5.0, assuming you installed the samples, there is a file ..../samples/common/inc/helper_cuda.h which has the following:

#ifdef CUBLAS_API_H_
// cuBLAS API errors
static const char *_cudaGetErrorEnum(cublasStatus_t error)
{
    switch (error)
    {
        case CUBLAS_STATUS_SUCCESS:
            return "CUBLAS_STATUS_SUCCESS";

        case CUBLAS_STATUS_NOT_INITIALIZED:
            return "CUBLAS_STATUS_NOT_INITIALIZED";

        case CUBLAS_STATUS_ALLOC_FAILED:
            return "CUBLAS_STATUS_ALLOC_FAILED";

        case CUBLAS_STATUS_INVALID_VALUE:
            return "CUBLAS_STATUS_INVALID_VALUE";

        case CUBLAS_STATUS_ARCH_MISMATCH:
            return "CUBLAS_STATUS_ARCH_MISMATCH";

        case CUBLAS_STATUS_MAPPING_ERROR:
            return "CUBLAS_STATUS_MAPPING_ERROR";

        case CUBLAS_STATUS_EXECUTION_FAILED:
            return "CUBLAS_STATUS_EXECUTION_FAILED";

        case CUBLAS_STATUS_INTERNAL_ERROR:
            return "CUBLAS_STATUS_INTERNAL_ERROR";
    }

    return "<unknown>";
}
#endif

There is probably something similar in previous versions of the CUDA SDK (Samples). This is not in answer to a question "is something built in" if you asked that, but in answer to your question "have any cuBLAS users written a function like this?"

Robert Crovella
  • 143,785
  • 11
  • 213
  • 257
  • 2
    Fantasic! I ended up writing roughly the same code myself in my answer. :) – solvingPuzzles Oct 24 '12 at 01:42
  • 1
    To my knowledge, there is no such function. I would suggest filing a feature request (known as an RFE = request for enhancement) through the NVIDIA bug reporting system. – njuffa Oct 24 '12 at 04:21
2

I'm still curious whether there's a built-in way to get error strings in cuBLAS, but I wrote my own for now.

According to Section 8.1 of the cuBLAS Guide there are only 8 types of cublasError_t values in cuBLAS. I printed them out...

printf("CUBLAS_STATUS_SUCCESS = %d \n", CUBLAS_STATUS_SUCCESS);
printf("CUBLAS_STATUS_NOT_INITIALIZED = %d \n", CUBLAS_STATUS_NOT_INITIALIZED);
printf("CUBLAS_STATUS_ALLOC_FAILED = %d \n", CUBLAS_STATUS_ALLOC_FAILED);
printf("CUBLAS_STATUS_INVALID_VALUE = %d \n", CUBLAS_STATUS_INVALID_VALUE);
printf("CUBLAS_STATUS_ARCH_MISMATCH = %d \n", CUBLAS_STATUS_ARCH_MISMATCH);
printf("CUBLAS_STATUS_MAPPING_ERROR = %d \n", CUBLAS_STATUS_MAPPING_ERROR);
printf("CUBLAS_STATUS_EXECUTION_FAILED = %d \n", CUBLAS_STATUS_EXECUTION_FAILED);
printf("CUBLAS_STATUS_INTERNAL_ERROR = %d \n", CUBLAS_STATUS_INTERNAL_ERROR);

The printout:

CUBLAS_STATUS_SUCCESS = 0 
CUBLAS_STATUS_NOT_INITIALIZED = 1 
CUBLAS_STATUS_ALLOC_FAILED = 3 
CUBLAS_STATUS_INVALID_VALUE = 7 
CUBLAS_STATUS_ARCH_MISMATCH = 8 
CUBLAS_STATUS_MAPPING_ERROR = 11 
CUBLAS_STATUS_EXECUTION_FAILED = 13 
CUBLAS_STATUS_INTERNAL_ERROR = 14

My function to get the cuBLAS error string:

const char* cublasGetErrorString(cublasStatus_t status)
{
    switch(status)
    {
        case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS";
        case CUBLAS_STATUS_NOT_INITIALIZED: return "CUBLAS_STATUS_NOT_INITIALIZED";
        case CUBLAS_STATUS_ALLOC_FAILED: return "CUBLAS_STATUS_ALLOC_FAILED";
        case CUBLAS_STATUS_INVALID_VALUE: return "CUBLAS_STATUS_INVALID_VALUE"; 
        case CUBLAS_STATUS_ARCH_MISMATCH: return "CUBLAS_STATUS_ARCH_MISMATCH"; 
        case CUBLAS_STATUS_MAPPING_ERROR: return "CUBLAS_STATUS_MAPPING_ERROR";
        case CUBLAS_STATUS_EXECUTION_FAILED: return "CUBLAS_STATUS_EXECUTION_FAILED"; 
        case CUBLAS_STATUS_INTERNAL_ERROR: return "CUBLAS_STATUS_INTERNAL_ERROR"; 
    }
    return "unknown error";
}
solvingPuzzles
  • 8,541
  • 16
  • 69
  • 112
  • 4
    Your `cublasGetErrorString` is *very* bad programming practice. You shouldn't ever refer to an enum by value, only by name. The whole point of using enumeration is so that the values are abstracted away. NVIDIA could decide to change the values of the enumeration tomorrow and your code will break, whereas [this code](http://stackoverflow.com/a/13041801/681865) will not. – talonmies Oct 24 '12 at 06:40
2

Since CUDA 11.4.2 (September 2021), there are finally cublasGetStatusName and cublasGetStatusString that return the enum name and description, respectively, of a cuBLAS error status. For example:

cublasGetStatusName( CUBLAS_STATUS_NOT_INITIALIZED ) returns "CUBLAS_STATUS_NOT_INITIALIZED".

cublasGetStatusString( CUBLAS_STATUS_NOT_INITIALIZED ) returns "the library was not initialized".

https://docs.nvidia.com/cuda/cublas/index.html#cublasGetStatusName

https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cublas-11.4.2

Incidentally, rocBLAS provides rocblas_status_to_string, and hipBLAS provides hipblasStatusToString.

Mark Gates
  • 452
  • 5
  • 8
  • Nvidia changed their documentation so the release notes doesn't have historical details anymore. For posterity, here's a link to their archived docs that show these functions were added in 11.4.2 as Mark said: https://docs.nvidia.com/cuda/archive/11.4.2/cuda-toolkit-release-notes/index.html#cublas-11.4.2 – jli Jun 07 '23 at 15:03
1

To complement Mark Gates's answer, since CUDA 11.4.2, we can use cublasGetStatusString and thus write the following macro:

#define CUBLAS_ERROR(x) do { if((x)!=CUBLAS_STATUS_SUCCESS) { \
    printf("Error %s at %s:%d\n", cublasGetStatusString(x), __FILE__, __LINE__);\
    exit(EXIT_FAILURE);}} while(0)

It might not always what you want. Remember that you probably need to free memory before exiting and destroy the cublas handle. You may also not want to exit at all. I believe it to be similar to the macro CUDA_SAFE_CALL(someCudaFunction()) that you mention.

Finally, since CUDA 11.4.2 version is fairly new, I would recommend to use Robert Crovella's answer instead for better backwards compatibility.

Dimitri Lesnoff
  • 317
  • 1
  • 14
-1

Following the popular gpuErrchk solution what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api I have overloaded the function with a cuBLAS alternative to also handle cuBLAS errors. Then you can easily wrap your cuBLAS function calls with gpuErrchk as normal.

Judging from the helper_cuda.h file others have mentioned, it would be easy to keep adding overloaded functions for other cuda libraries (cuFFT, etc.). Hope this helps someone. Please tell me if there is a better way!

inline void error::gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) const
{
    if (code != cudaSuccess)
    {
        fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        if (abort) exit(code);
    }
}

inline void error::gpuAssert(cublasStatus_t code, const char *file, int line, bool abort=true) const
{
    if (code != CUBLAS_STATUS_SUCCESS)
    {
        switch (code) {
            case CUBLAS_STATUS_NOT_INITIALIZED:
            fprintf(stderr, "cuBLAS Error: CUBLAS_STATUS_NOT_INITIALIZED file: %s line: %d ", file, line);
            break; 
            
            case CUBLAS_STATUS_ALLOC_FAILED:
            fprintf(stderr, "cuBLAS Error: CUBLAS_STATUS_ALLOC_FAILED file: %s line: %d ", file, line); 
            break; 

            case CUBLAS_STATUS_INVALID_VALUE:
            fprintf(stderr, "cuBLAS Error: CUBLAS_STATUS_INVALID_VALUE file: %s line: %d ", file, line); 
            break; 

            case CUBLAS_STATUS_ARCH_MISMATCH:
            fprintf(stderr, "cuBLAS Error: CUBLAS_STATUS_ARCH_MISMATCH file: %s line: %d ", file, line); 
            break; 

            case CUBLAS_STATUS_MAPPING_ERROR:
            fprintf(stderr, "cuBLAS Error: CUBLAS_STATUS_MAPPING_ERROR file: %s line: %d ", file, line); 
            break; 

            case CUBLAS_STATUS_EXECUTION_FAILED:
            fprintf(stderr, "cuBLAS Error: CUBLAS_STATUS_EXECUTION_FAILED file: %s line: %d ", file, line); 
            break; 

            case CUBLAS_STATUS_INTERNAL_ERROR:
            fprintf(stderr, "cuBLAS Error: CUBLAS_STATUS_INTERNAL_ERROR file: %s line: %d ", file, line); 
            break; 

            case CUBLAS_STATUS_NOT_SUPPORTED:
            fprintf(stderr, "cuBLAS Error: CUBLAS_STATUS_NOT_SUPPORTED file: %s line: %d ", file, line); 
            break; 

            case CUBLAS_STATUS_LICENSE_ERROR:
            fprintf(stderr, "cuBLAS Error: CUBLAS_STATUS_LICENSE_ERROR file: %s line: %d ", file, line); 
            break; 
        }
        if (abort) exit(code);
    }
}

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }

Example output:

 ** On entry to SGEMM  parameter number 13 had an illegal value
cuBLAS Error: CUBLAS_STATUS_INVALID_VALUE file: ../src/XX.cu line: 323 Segmentation fault (core dumped)
t0mlane
  • 1
  • 1