i tried to run a simple cuda programm that adds to vector but it only result in a vector with zeros.
[EDIT] CUDA 11.0 compiles for a gpu with compute capability 5.2 by default. You can change this with the -arch= flag when compiling with nvcc (nvcc -arch=sm_50 file.cu in my case): cuda 11 kernel doesn't run
It seems like the kernel doesnt do anything, cause I tried it with save an integer directly in c[0]. The program runs on cuda 11.0 with a m1200 on ubuntu 20.04.
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <iostream>
__global__ void vectorAdd(int* a, int* b, int* c){
int i = threadIdx.x;
c[0] = 100;
c[i] = a[i] + b[i];
return;
}
int main() {
int a[]= {1,2,3,4,5,6,7,8,9};
int b[]= {1,2,3,4,5,6,7,8,9};
int sa = sizeof(a) / sizeof(int);
int c[sa] = {0};
int* cudaA = 0;
int* cudaB = 0;
int* cudaC = 0;
cudaMalloc(&cudaA, sizeof(a));
cudaMalloc(&cudaB, sizeof(b));
cudaMalloc(&cudaC, sizeof(c));
cudaMemcpy(cudaA, a, sizeof(a), cudaMemcpyHostToDevice);
cudaMemcpy(cudaB, b, sizeof(b), cudaMemcpyHostToDevice);
std::cout << sa << std::endl;
vectorAdd <<< 1, sa >>> (cudaA, cudaB, cudaC);
cudaMemcpy(c, cudaC, sizeof(c), cudaMemcpyDeviceToHost);
for (int x = 0; x < 9; x++){
std::cout << c[x]<< std::endl;
}
return 0;
}
the code is from a video on youtube