0

Can the following program be written in CUDA 9.0 using generic vector<> types instead of raw dynamic arrays?

#include <vector>
#include <iostream>

__global__ void addVectors(float *a, float *b, float *c, int n) {
    // Calculate the global thread ID
    int tid = blockIdx.x * blockDim.x + threadIdx.x;

    // Make sure we don't go out of bounds 
    if (tid < n) {
        // Add the corresponding elements of a and b and store the result in c   
        c[tid] = a[tid] + b[tid];
    }
}

int main() {
    int n = 1024;
    
    // Use vectors instead of raw pointers
    std::vector<float> a(n), b(n), c(n);
    
    // Initialize the input vectors  
    for (int i = 0; i < n; i++) {
        a[i] = i;
        b[i] = i * i;    
    }
    
    // Allocate memory on the GPU 
    float* d_a, *d_b, *d_c;
    cudaMalloc(&d_a, a.size() * sizeof(float));
    cudaMalloc(&d_b, b.size() * sizeof(float));    
    cudaMalloc(&d_c, c.size() * sizeof(float));
    
    // Copy vectors to GPU
    cudaMemcpy(d_a, a.data(), a.size() * sizeof(float), cudaMemcpyHostToDevice);  
    // Similar for b and c
        
    // Launch kernel 
    addVectors<<<1, 1024>>>(d_a, d_b, d_c, n);
        
    // Copy result back from GPU
    cudaMemcpy(c.data(), d_c, c.size() * sizeof(float),cudaMemcpyDeviceToHost); 
    
    // Free GPU memory 
    cudaFree(d_a); 
    cudaFree(d_b);
    cudaFree(d_c);
        
    // Print first 10 elements
    for (int i = 0; i < 10; i++) {
        std::cout << c[i] << '\n';
    }
}

I want to replace *d_a, *d_b, and *d_c with generic types.

How can I do that?

user366312
  • 16,949
  • 65
  • 235
  • 452
  • Potentially this is an answer: https://stackoverflow.com/a/71177717/1075282 – Renat Jul 21 '23 at 10:19
  • Not sure why you would want to use ancient CUDA 9.0, but here is a very similar example using the Thrust library coming with the CUDA toolkit: [`examples/saxpy.cu`](https://github.com/NVIDIA/thrust/blob/cuda-9.0/examples/saxpy.cu). I linked the version tagged CUDA 9.0. – paleonix Jul 21 '23 at 10:20
  • @paleonix, *Not sure why you would want to use ancient CUDA 9.0* --- because my lab machine doesn't support higher. – user366312 Jul 21 '23 at 10:22
  • 1
    The point of the answer linked by @Renat is that you don't want to use a `std::vector`-like container in device code because many features like resizing are bad for performance. For managing the device memory from the host, `vector`s are totally fine. – paleonix Jul 21 '23 at 10:25
  • @paleonix, 10 years old machines. – user366312 Jul 21 '23 at 10:36
  • What you are calling generics are templates in C++. See e.g. the [tag:generics] tag description for the distinction. This question should be tagged [tag:templates] instead. – paleonix Jul 21 '23 at 10:42
  • Does this answer your question? [High level GPU programming in C++](https://stackoverflow.com/questions/16438099/high-level-gpu-programming-in-c) – paleonix Jul 21 '23 at 10:53
  • Yes, the code you have shown works fine with CUDA. – Robert Crovella Jul 21 '23 at 14:35

0 Answers0