How can cudaMemcpyToSymbol just make this ??
// head.h
#include <stdio.h>
__constant__ float const_mem[1];
__global__ void k0(); //I will declare it in main.cu
__global__ void k1(); //I will declare it in separate.cu
//separate.cu
#include "head.h"
__global__ void k0() {
printf("%f\n", const_mem[0]);
}
//main.cu
#include "head.h"
__global__ void k1() {
printf("%f\n", const_mem[0]);
}
int main() {
float arr[] = {5};
cudaMemcpyToSymbol(const_mem, arr, sizeof(float));
k0<<<1,1>>>();
k1<<<1,1>>>();
}
Compilation : nvcc main.cu separate.cu
output of sudo nvprof ./a.out
(./a.out
gives litteraly nothing)
0.000000
5.000000
That mean that kernel writed in an other transition unit is not accessing const_memory ... but how is it possible ??