I'm trying to do a matrix-matrix multiplication using Cublas but it still not work and I don't figure out the problem. Since it is the first time I use Cublas I'm not sure that I set the right parameter, especially for the leading dimension
For example:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cublas_v2.h"
#include <stdio.h>
void mulWithCuda(double *c, const double *a, const double *b, unsigned int size);
int main(){
const int arraySize = 9;
const double a[12] = { 1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 11, 12 };
const double b[arraySize] = { 10, 20, 30, 40, 50, 60, 70, 80, 90 };
double c[12] = { 0 };
mulWithCuda(c, a, b, arraySize);
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 3; j++) {
printf("%lf ", c[i * 3 + j]);
}
printf("\n");
}
return 0;
}
void mulWithCuda(double* c, const double* a, const double* b, unsigned int size){
double *dev_a = 0;
double *dev_b = 0;
double *dev_c = 0;
cudaMalloc((void**)&dev_c, 12 * sizeof(double));
cudaMalloc((void**)&dev_a, size * sizeof(double));
cudaMalloc((void**)&dev_b, 12 * sizeof(double));
cudaMemcpy(dev_a, a, 12 * sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, size * sizeof(double), cudaMemcpyHostToDevice);
cublasHandle_t handle;
cublasCreate(&handle);
double alpha = 1.0;
double beta = 0;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, 4, 3, 3, &alpha, dev_a, 3, dev_b, 3, &beta, dev_c, 3);
cudaMemcpy(c, dev_c, 12 * sizeof(double), cudaMemcpyDeviceToHost);
cublasDestroy(handle);
cudaFree(dev_c);
cudaFree(dev_a);
cudaFree(dev_b);
}
The two matrix used are:
1 2 3
4 5 6
7 8 9
10 11 12
10 20 30
40 50 60
70 80 90
while the output is:
** On entry to DGEMM parameter number 8 had an illegal value
0.000000 0.000000 0.000000
0.000000 0.000000 0.000000
0.000000 0.000000 0.000000
0.000000 0.000000 0.000000