Can I call CUDA runtime function from C++ code not compiled by nvcc?

Question

Is there any way I can call CUDA runtime function calls such as

cudaMemcpy(...);

in a .cpp file, compiled with a regular C++ compiler?

Why don't you just try? :) And yes, this particular function cudaMemcpy() can be called from a C file. By the way I find cuda documentation terrible in that sort of details. — Slava, Aug 29 '11 at 12:27
Following on from Preet's answer: you'll also need to link against `cudart` to satisfy the linker. I.e. nvcc -lcudart myfile.cpp — Edric, Sep 28 '10 at 10:30

Preet Sangha · Accepted Answer · 2013-09-03T22:45:24.027

EDIT: There was an example here but it's not longer found, but most of the example was copied below.

The caller C (but could be C++)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cuda.h>

extern void kernel_wrapper(int *a, int *b);

int main(int argc, char *argv[])
{
   int a = 2;
   int b = 3;

   kernel_wrapper(&a, &b);

   return 0;
}

The Callee (CUDA)

__global__ void kernel(int *a, int *b)
{
   int tx = threadIdx.x;

   switch( tx )
   {
case 0:
    *a = *a + 10;
    break;
case 1:
    *b = *b + 3;
    break;
default:
    break;
   }
}

void kernel_wrapper(int *a, int *b)
{
   int *d_1, *d_2;
   dim3 threads( 2, 1 );
   dim3 blocks( 1, 1 );

   cudaMalloc( (void **)&d_1, sizeof(int) );
   cudaMalloc( (void **)&d_2, sizeof(int) );

   cudaMemcpy( d_1, a, sizeof(int), cudaMemcpyHostToDevice );
   cudaMemcpy( d_2, b, sizeof(int), cudaMemcpyHostToDevice );

   kernel<<< blocks, threads >>>( a, b );

   cudaMemcpy( a, d_1, sizeof(int), cudaMemcpyDeviceToHost );
   cudaMemcpy( b, d_2, sizeof(int), cudaMemcpyDeviceToHost );

   cudaFree(d_1);
   cudaFree(d_2);
}

on a side note - Cuda does not recommend diverging threads as you are doing in the kernel function you provided. — Muli Yulzary, Apr 18 '15 at 13:33

score 0 · Answer 2 · edited Jul 23 '18 at 20:02

0

you can use

g++ I/usr/local/cuda/include filename.cpp -o obj -L/usr/local/cuda/lib64 -lcudart

for compile or

nvcc filename.cu

edited Jul 23 '18 at 20:02

Richard

56,349
34
180
251

answered Jul 03 '18 at 07:18

9113303

852
1
16
30

score 0 · Answer 3 · answered Mar 25 '23 at 17:10

Similarly to @PreetSangha (who provided a very useful answer), I had some issues when running it as extern ... so I would just like to add the solution which worked for me (including templated function calls).

This is the code for my example (the full CUDA code is excluded because it is already in @PreetSangha's example) and is supposed to give a main idea on how it worksr. It was compiled and confirmed to run on a linux machine. I haven't tried it on windows yet but should be similar. In my scenario I wanted to try int, float and double but more templates could be added.

// main.cpp
#include "wrapper.hpp"

int main(int argc, char *argv[]) {
    runOnGPU(1,2,3);
}

// cuda.cu
#include "wrapper.hpp"

template<typename T>
__global__ static void matMultCUDA(const T* a, const T* b, T* c, int n) {
    int col = threadIdx.x + blockIdx.x * blockDim.x;
    int row = threadIdx.y + blockIdx.y * blockDim.y;
    
    T value = 0;
    if(col < n && row < n)
        for(int j=0; j < n; j++){
            value += a[row*n + j] * b[j*n+col];
        }
        
    c[row*n + col] = value;
}

bool InitCUDA(bool b) {
    /* CUDA Initialization */
}

template<typename T>
float runOnGPU(T *a, T *b, int n) {
    /* Do CUDA things here :D */
    matMultCUDA<<<dimGrid, dimBlock>>>(cuda_a , cuda_b , cuda_c , n);
}


template float runOnGPU<int>(int* a, int* b, int n);
template float runOnGPU<float>(float* a, float* b, int n);
template float runOnGPU<double>(double* a, double* b, int n);

// wrapper.hpp

bool InitCUDA(bool b);

template<typename T>
float runOnGPU(T *a, T *b, int n);

# makefile
CXX = g++
CXXFLAGS = -O3
NVCC = nvcc
NVCCFLAGS = -O3

LDFLAGS = -lcudart

OBJS = main.o cuda.o

all: program

program: $(OBJS)
        $(CXX) $(CXXFLAGS) -L/usr/local/cuda-11/lib64 cuda.o main.o -o program.out $(LDFLAGS)

main.o: main.cpp wrapper.hpp
        $(CXX) $(CXXFLAGS) -c main.cpp

cuda.o: cuda.cu wrapper.hpp
        $(NVCC) $(NVCCFLAGS) -c cuda.cu

Can I call CUDA runtime function from C++ code not compiled by nvcc?

3 Answers3

Linked

Related