1

I am doing a project in cuda.I have allocated pointers in device memory copied the host pointers to the device pointers and called the global function. I am trying to copy back the device pointers into the host pointers and display the result.

But it seems that the cudaMemcpy() is not working since the device pointers which i intended to copy to the host pointers are supposedly not being copied. In the actual code I am trying to print the index and the content of the array that were assigned in the global function. I spent a whole day trying to fix it but still I am unable to.Please help me out here. any sort of help will be greatly appreciated.

#include<cuda.h>
#include<cuda_runtime.h>
#include<iostream>
#include<device_launch_parameters.h>
#define number 4
#define PI 3.141592654
using namespace std;
struct vertex
{
    float x,y,z,h;
    vertex():x(0),y(0),z(0),h(1){};
    vertex(float x, float y, float z, float h=0):x(x),y(y),z(z),h(h){};

};
struct triangle
{
    vertex v1,v2,v3;
    triangle(){
    }
};
__global__ void check(float mat[4][4],vertex *a,float *re,int *index)
{
    int idx = blockIdx.x+ threadIdx.x;
    re[idx] = mat[idx][0];
    re[idx] = mat[idx][1];
    re[idx] = mat[idx][2];
    re[idx] = mat[idx][3];
}
int main()
{   
    triangle t1;

    t1.v1.x = 2;
    t1.v1.y = 1.33512;
    t1.v1.z = 5.849567;

    t1.v2.x = 2;
    t1.v2.y = -1.33512;
    t1.v2.z = 5.849567;

    t1.v3.x = 2;
    t1.v3.y = 0;
    t1.v3.z = 5;

    int index_h[4]={0,0,0,0};
    int index_d[4];
    vertex *check_d;
    vertex *check_h;
    float check_re_d[4];
    float check_re_h[4]={0,0,0,0};
    float translation_check_d[4][4];
    float translation_check_h[number][number] = {{1, 0, 0, -t1.v1.x},
        {0, 1, 0, -t1.v1.y},
        {0, 0, 1, -t1.v1.z},
        {0 ,0 ,0, 1}};

    check_h = new vertex(1,-4,3);
    cudaMalloc((void**)&index_d,4*sizeof(int));
    cudaMalloc((void**)&check_d, sizeof(vertex));
    cudaMalloc((void**)&check_re_d, 4*sizeof(float));
    cudaMemcpy(index_d,index_h,4*sizeof(int),cudaMemcpyHostToDevice);
    cudaMemcpy(check_d,check_h,sizeof(vertex),cudaMemcpyHostToDevice);
    cudaMemcpy(check_re_d,check_re_h,4*sizeof(float),cudaMemcpyHostToDevice);
    size_t dPitch;
    cudaMallocPitch((void**)&translation_check_d,&dPitch,4*sizeof(float),4);
    cudaMemcpy2D(translation_check_d,4*sizeof(float),translation_check_h,4*sizeof(float),4*sizeof(float),4,cudaMemcpyHostToDevice);
    check<<<4,1>>>(translation_check_d,check_d,check_re_d,index_d);
    cudaMemcpy(check_re_h,check_re_d,4*sizeof(float),cudaMemcpyDeviceToHost);
    cudaMemcpy(index_h,index_d,4*sizeof(int),cudaMemcpyDeviceToHost);
    std::cout<<"These are the value"<<"INDEX: "<<index_h[0]<<" x: "<<check_re_h[0]<<"\n";
    std::cout<<"These are the value"<<"INDEX: "<<index_h[1]<<" x: "<<check_re_h[1]<<"\n";
    std::cout<<"These are the value"<<"INDEX: "<<index_h[2]<<" x: "<<check_re_h[2]<<"\n";
    std::cout<<"These are the value"<<"INDEX: "<<index_h[2]<<" x: "<<check_re_h[3]<<"\n";
    cudaFree(check_d);
    cudaFree(check_re_d);
    cudaFree(index_d);
    int a;
    cin>>a;
    return 0;
}
talonmies
  • 70,661
  • 34
  • 192
  • 269
  • 1
    I've already pointed out what [proper cuda error checking is to you](http://stackoverflow.com/questions/19633528/about-cudamemcpy-function-about-whether-both-the-source-and-destination-pointe) Why are you not using it? – Robert Crovella Oct 29 '13 at 04:22
  • i have taken your suggestions.. i have not used null pointers anywhere...there is no error while building the project...only i am not getting the intended output...help me out – Cool Programmer Oct 29 '13 at 04:29
  • ok i will give it a look..previously i happened to overlook it... – Cool Programmer Oct 29 '13 at 04:31
  • 1
    Your kernel `check` is failing to execute properly. if you did cuda error checking you'd discover this. If you run your code with cuda-memcheck you'll discover the reason why your kernel is failing and get some ideas about how to fix it. – Robert Crovella Oct 29 '13 at 04:35
  • read about [proper cuda error checking here](http://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api) – Robert Crovella Oct 29 '13 at 04:36
  • `cudaMallocPitch` and `cudaMemcpy2D` are not designed to transfer double pointer (`**`) arrays. Note that those functions use single-pointer arguments. You should "flatten" your `mat` array. The invalid global read is arising from the attempt to read from that array incorrectly. The question I linked as a duplicate has more description. – Robert Crovella Oct 29 '13 at 04:42
  • i did the error check and it displayed the message like: cannot display the pdb file.. i am looking at the cudaMallocPitch() and cudaMemcpy().thanks – Cool Programmer Oct 29 '13 at 05:40
  • This is the same code as in [CXX0030:error:expression could not be evaluated, visual studio && CUDA](http://stackoverflow.com/questions/19630740/cxx0030errorexpression-could-not-be-evaluated-visual-studio-cuda). I have written an answer for that question. Have a look if it is useful to you. – Vitality Oct 29 '13 at 11:59
  • we are actually project partners and posted separately....thanks a lot for your help.. – Cool Programmer Oct 30 '13 at 06:13

0 Answers0