the code files as follow:
a.h
void warperFoo();
a.cu
//---------- a.cu ----------
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include "a.h"
__global__ void foo (void) {
printf("calling from kernel foo: %d\n", threadIdx.x);
// bar();
}
void warperFoo() {
printf("calling from warperFoo\n");
dim3 gdim(1,1,1);
dim3 bdim(4,4,4);
foo<<<gdim, bdim>>>();
}
main.cpp
#include <iostream>
#include <cuda_runtime_api.h>
#include "a.h"
using namespace std;
int main() {
warperFoo();
return 0;
}
makefile
.PHONY: clean
all: a.o
g++ -m64 -Wall a.o main.cpp -lcudart -L/usr/local/cuda-11.2/lib64/ -I/usr/local/cuda-11.2/include -lcudadevrt -lcuda
a.o:
nvcc --gpu-architecture=sm_70 -ccbin /usr/bin/gcc -c a.cu
clean:
rm -rf *.o a.out
make output
nvcc --gpu-architecture=sm_70 -ccbin /usr/bin/gcc -c a.cu
g++ -m64 -Wall a.o main.cpp -lcudart -L/usr/local/cuda-11.2/lib64/ -I/usr/local/cuda-11.2/include -lcudadevrt -lcuda
a.out output
calling from warperFoo
i want compile .cu with nvcc first and then compile c++ host code with g++.
it supposed to print "calling from kernel foo"...
SO why kernel didn't output?