1

I try to pass my dynamic array of structs to kernel but it doesn't works. I get - "Segmentation fault (core dumped)"

My code - EDITED

#include <stdio.h>
#include <stdlib.h>

struct Test {
    unsigned char *array;
};

__global__ void kernel(Test *dev_test) {
}

int main(void) {

    int n = 4;
    int size = 5;
    unsigned char *array[size];
    Test *dev_test;

    //   allocate for host
    Test *test = (Test*)malloc(sizeof(Test)*n);
    for(int i = 0; i < n; i++)
    test[i].array =  (unsigned char*)malloc(size);


    //  fill data
    for(int i=0; i<n; i++) {
        unsigned char temp[] = { 'a', 'b', 'c', 'd' , 'e' };
        memcpy(test[i].array, temp, size);
    }

    //  allocate for gpu
    cudaMalloc((void**)&dev_test, n * sizeof(Test));
    for(int i=0; i < n; i++) {
        cudaMalloc((void**)&(array[i]), size * sizeof(unsigned char));
        cudaMemcpy(&(dev_test[i].array), &(array[i]), sizeof(unsigned char *), cudaMemcpyHostToDevice);
    }

    kernel<<<1, 1>>>(dev_test);

    return 0;
}

How correctly I should allocate gpu memory and copy data to this memory?

Bakus123
  • 1,399
  • 6
  • 21
  • 40

2 Answers2

4

You need to allocate memory for struct member array.

Test *test = malloc(sizeof(Test)*n);
for(int i = 0; i < n; i++)   
    test[i]->array =  malloc(size);  

I would suggest to read this answer to cope up with other issues after this fix.

Community
  • 1
  • 1
haccks
  • 104,019
  • 25
  • 176
  • 264
  • 3
    And when you've fixed this issue, which is indeed the source of the seg fault (has nothing to do with CUDA) you'll run into the next problem with this code (if you attempt to use the data in the kernel) which will be a duplicate of what is discussed [here](http://stackoverflow.com/questions/15431365/cudamemcpy-segmentation-fault). – Robert Crovella May 05 '15 at 14:08
  • @RobertCrovella; Agreed. – haccks May 05 '15 at 14:11
  • @RobertCrovella, thanks I improved my code according to your instructions. But how can I copy data from test[i].array to dev_test[i].array? And how looks deallocation of memory in this case? – Bakus123 May 05 '15 at 14:59
  • 2
    refer to the duplicate question I linked and study the linked examples. You have copied the pointers but not the actual data they point to. If you still need help, ask a new question. This one has already been answered. SO is not a chat room, and if you continue to modify your question as you incorporate the feedback given in the answers, it's going to be very confusing. – Robert Crovella May 05 '15 at 15:25
2

what is your card ? if your card support compute capability >= 3.0, try the unified memory system , to have same data in host/device memory

you can have a look here :

it should maybe look like this one :

    int main(void) {
int n = 4;
int size = 5;
Test *test;
cudaMallocManaged(&test, n * size);
unsigned char values[] = { 'a', 'b', 'c', 'd' , 'e' };
for(int i=0; i<n; i++) 
{
    unsigned char* temp;
    cudaMallocManaged(&temp, size*sizeof(char) );
    memcpy(temp, values, sizeof(values) );
}
// avoid copy code, makes a deep copy of objects
kernel<<<1, 1>>>(test);
return 0;
    }

And i hope you know it, but Don't forget do call cudaFree & delete/free on allocated memory. (better to use std::vector and use data() to access to raw pointer)

X3liF
  • 1,054
  • 6
  • 10