0

I had written a program follow the JackOlantem's answer in CUDA extern texture declaration but my result dosen't print the value of extern texture declaration P/s: how to add -rdc = true to enable external linkage? Result of the program ! https://i.stack.imgur.com/aGh3U.png Thanks for your help!!. kernel.cu compilation unit

#include <stdio.h>

texture<int, 1, cudaReadModeElementType> texture_test;

/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
   if (code != cudaSuccess) 
   {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   }
}

/*************************/
/* LOCAL KERNEL FUNCTION */
/*************************/
__global__ void kernel1() {

    printf("ThreadID = %i; Texture value = %i\n", threadIdx.x, tex1Dfetch(texture_test, threadIdx.x));

}

__global__ void kernel2();

/********/
/* MAIN */
/********/
int main() {

    const int N = 16;

    // --- Host data allocation and initialization
    int *h_data = (int*)malloc(N * sizeof(int));
    for (int i=0; i<N; i++) h_data[i] = i;

    // --- Device data allocation and host->device memory transfer
    int *d_data; gpuErrchk(cudaMalloc((void**)&d_data, N * sizeof(int)));
    gpuErrchk(cudaMemcpy(d_data, h_data, N * sizeof(int), cudaMemcpyHostToDevice));

    gpuErrchk(cudaBindTexture(NULL, texture_test, d_data, N * sizeof(int)));

    kernel1<<<1, 16>>>();
    gpuErrchk(cudaPeekAtLastError());
    gpuErrchk(cudaDeviceSynchronize());

    kernel2<<<1, 16>>>();
    gpuErrchk(cudaPeekAtLastError());
    gpuErrchk(cudaDeviceSynchronize());

    gpuErrchk(cudaUnbindTexture(texture_test));

}

kernel2.cu compilation unit

#include <stdio.h>

extern texture<int, 1, cudaReadModeElementType> texture_test;

/**********************************************/
/* DIFFERENT COMPILATION UNIT KERNEL FUNCTION */
/**********************************************/
__global__ void kernel2() {

    printf("Texture value = %i\n", tex1Dfetch(texture_test, threadIdx.x));

}
Community
  • 1
  • 1
  • 1
    I was able to run the code provided by @JackOLantern successfully. It's recommended that you don't include important content in your question via external links - when those links die, the question becomes less useful. It's also a good idea to specify as much as possible about your platform (compile command, GPU, CUDA version, platform linux/windows) *in your question*. Your question is clearer that way. (If you have a cc1.x GPU, for example, you can't use relocatable device code.) – Robert Crovella Mar 09 '15 at 14:26
  • Ok thanks you , also i'm sorry about this. i was included the code into my question but the code is mess such as the body type , i can't edit and i'm new mem so i use it not clealy, i will try it. – An Nguyễn Mar 09 '15 at 14:35

1 Answers1

0

P/s: how to add -rdc = true to enable external linkage?

In nsight VSE, try Properties | CUDA C/C++ | Common | Generate Relocatable Device Code" set to "Yes",

nsight vse cuda project configuration page

Here is an nsight VSE documentation page that describes it as well.

Robert Crovella
  • 143,785
  • 11
  • 213
  • 257