-3

I am new to CUDA programming and writing a simple CUDA program to do simple calculations but when I am printing a particular float array back on CPU it is giving me segmentation fault. I am stuck at this please help!

#include <cuda.h>
#include <cuComplex.h>
#include <thrust/complex.h>
#include <cuda_runtime.h>
#include <math.h>
#include "generate.h"   // it is used to generate vectors X1,X2,Y1,Y2
#include <bits/stdc++.h>

using namespace std;

__global__ void shdce(float *dx1, float *dx2, float *dy1, float *dy2,float *dax, float *dbx, float *day, float *dby, float *dsr, float *deta1, float *deta2)
{
    int ii = threadIdx.x;
    dax[ii] = (dx2[ii] - dx1[ii])/2.0;
    dbx[ii] = (dx2[ii] + dx1[ii])/2.0;

    day[ii] = (dy2[ii] - dy1[ii])/2.0;
    dby[ii] = (dy2[ii] - dy1[ii])/2.0;

    // element normal vector (float)
    dsr[ii] = sqrt(pow(dax[ii],2) + pow(day[ii],2));
    deta1[ii] = (dy2[ii]-dy1[ii])/(2.0*dsr[ii]);
    deta2[ii] = (dx2[ii]-dx1[ii])/(2.0*dsr[ii]);
}

int main()
{
    vector<float> X1, Y1, X2, Y2;
    int size1, size2;
    float *dx1, *dx2, *dy1, *dy2, *dax, *dbx, *day, *dby, *dsr, *deta1, *deta2;

    X1 = generate1();                                         //X1=[0:10:1]
    size1 = X1.size();

    Y1 = generate3(size1);                  //Y1=zeroes (sizeof X1)     

    X2 = generate2();                       //X2=[1:11:1]
    size2 = X2.size();

    Y2 = generate3(size2);                  //Y2=zeroes (sizeof X2)

    float eta2[size1];
    for(int i = 0; i < size1; i++)
    {
        eta2[i]=0.0;
    }
    cudaMalloc( (void**)&dx1, size1 * sizeof(float) );
    cudaMalloc( (void**)&dx2, size2 * sizeof(float) );
    cudaMalloc( (void**)&dy1, size1 * sizeof(float) );
    cudaMalloc( (void**)&dy2, size2 * sizeof(float) );
    cudaMalloc( (void**)&dax, size1 * sizeof(float) );
    cudaMalloc( (void**)&dbx, size1 * sizeof(float) );
    cudaMalloc( (void**)&day, size1 * sizeof(float) );
    cudaMalloc( (void**)&dby, size1 * sizeof(float) );
    cudaMalloc( (void**)&dsr, size1 * sizeof(float) );
    cudaMalloc( (void**)&deta1, size1 * sizeof(float) );
    cudaMalloc( (void**)&deta2, size1 * sizeof(float) );

    cudaMemcpy( dx1, &X1, size1 * sizeof(float), cudaMemcpyHostToDevice );
    cudaMemcpy( dx2, &X2, size1 * sizeof(float), cudaMemcpyHostToDevice );
    cudaMemcpy( dy1, &Y1, size1 * sizeof(float), cudaMemcpyHostToDevice );
    cudaMemcpy( dy2, &Y2, size1 * sizeof(float), cudaMemcpyHostToDevice );
    cudaMemcpy( deta2, &eta2, size1 * sizeof(float), cudaMemcpyHostToDevice );
    dim3 dimBlock( size1, 1 );
    dim3 dimGrid( 1, 1 );
    shdce <<< dimGrid, dimBlock >>> dx1,dx2,dy1,dy2,dax,dbx,day,dby,dsr,deta1,deta2);
    cudaMemcpy( eta2, deta2, size1 * sizeof(float), cudaMemcpyDeviceToHost);

    for(int i=0;i<size1;i++)
    {
        printf("%f \n",deta2[i]);
    }
    return 0;
}
hnefatl
  • 5,860
  • 2
  • 27
  • 49
  • One (probably unrelated) problem is that you use [variable-length arrays](https://en.wikipedia.org/wiki/Variable-length_array). These are not part of C++. Another unrelated problem is that you [should not include ``](http://stackoverflow.com/questions/31816095/why-should-i-not-include-bits-stdc-h). – Some programmer dude Jul 24 '17 at 09:17
  • 2
    This is illegal: `printf("%f \n",deta2[i]);` because `deta2` is allocated on the device. You cannot print that (or access it) from host code. Probably you should change `deta2` to `eta2` in that `printf` statement. And the vector handling is incorrect also as indicated in the answer. – Robert Crovella Jul 24 '17 at 13:24

1 Answers1

1

The problems is very likely with your cudaMemcpy calls:

cudaMemcpy( dx1, &X1, size1 * sizeof(float), cudaMemcpyHostToDevice );

More precisely your use of &X1 (etc). This will not copy the data in the vector, but the contents of the vector object itself.

You should use e.g. &X1[0] to get a pointer to the actual data wrapped by the vector.

Some programmer dude
  • 400,186
  • 35
  • 402
  • 621