I am quite new at CUDA programming with c++, so sorry for this simple question. I simply cannot figure out where i am going wrong with this. I am trying to do a matrix multiplication. I have found inspiration from several sources so it might be that i have mixed up some different methods. I am trying to multiply two matrixes h_a and h_b. I successfuly generate the two matrixes, but when i allocate the memory for the two matrices, i for some reason lose the values in that matrix, and even after the multiplication all values are zero. Below is the code:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <ctime>
#include <stdio.h>
#include <iostream>
#include <math.h>
using namespace std;
__global__ void MulKernel(int *c, const int *a, const int *b, const int P)
{
float tempsum;
int row = blockIdx.y*blockDim.y + threadIdx.y;
int col = blockIdx.x*blockDim.x + threadIdx.x;
if (row < P && col < P){
for (int i = 0; i < P; i++){
tempsum += a[row*P + i] * b[i*P + col];
}
}
c[row*P + col] = tempsum;
}
int main()
{
srand(time(NULL));
int *pointer;
int N = 16;
int SIZE = N*N;
int *h_a = new int[SIZE];
int *h_b = new int[SIZE];
int *h_c = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
h_a[i] = rand() % 1000;
h_b[i] = rand() % 1000;
}
cout << "First values " << h_a[0] << " " << h_b[0] << endl;
cudaMalloc(&h_a, sizeof(int)*SIZE);
cudaMalloc(&h_b, sizeof(int)*SIZE);
cudaMalloc(&h_c, sizeof(int)*SIZE);
cudaMalloc(&pointer, sizeof(int));
cout << "Second values " << h_a[0] << " " << h_b[0] << endl;
cudaMemcpy(h_a, &h_a, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(h_b, &h_b, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(pointer, &N, sizeof(int), cudaMemcpyHostToDevice);
cout << "Third values " << h_a[0] <<" "<< h_b[0] << endl;
MulKernel <<<1, 256 >>>(h_c, h_a, h_b, N);
cudaMemcpy(h_c, &h_c, sizeof(int), cudaMemcpyDeviceToHost);
cudaMemcpy(h_a, &h_a, sizeof(int), cudaMemcpyDeviceToHost);
cudaMemcpy(h_b, &h_b, sizeof(int), cudaMemcpyDeviceToHost);
for (int i = 0; i < 5; i++){
cout << h_c[i] << "=" << h_a[i] << h_b[i] << endl;
}
cout << h_c[1] << endl;
cudaFree(h_a);
cudaFree(h_b);
cudaFree(h_c);
return 0;
}
The output in the terminal reads:
First values 454 964
Second values 0 0
Third values 0 0
0=00
0=00
0=00
0=00
0=00
0
Press any key to continue . . .
I hope someone can spot the error(s)
Best regards