I have an array of pointers to an abstract class A
. These pointers point to objects of class B
,C
and they all derive from A
. The sizes of these derived classes are not the same, so they have a function called size
that returns a size_t
of their size.
MCVE:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <vector>
class A {
public:
float a;
A() {}
__host__ __device__ virtual size_t size() = 0;
};
class B : public A {
public:
float b;
B() {}
__host__ __device__ virtual size_t size() override { return sizeof(*this); }
};
class C : public A {
public:
float c, d;
C() {}
__host__ __device__ virtual size_t size() override { return sizeof(*this); }
};
__global__ void testKernel(A** objects, int numObjects) {
for (int i = 0; i < numObjects; i++) {
printf("%d\n", objects[i]->size());
}
}
int main()
{
std::vector<A*> host_pointers;
host_pointers.push_back(new B());
host_pointers.push_back(new C());
cudaError_t cudaStatus;
std::vector<A*> device_pointers;
for (auto obj : host_pointers) {
A* device_pointer;
cudaStatus = cudaMalloc((void**)&device_pointer, obj->size());
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed for size %d\n", obj->size());
exit(-1);
}
cudaStatus = cudaMemcpy(device_pointer, obj, obj->size(), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpy failed for size %d\n", obj->size());
exit(-1);
}
device_pointers.push_back(device_pointer);
}
///By this point, both objects should have been copied over
///to device memory, and I should have valid pointers to them
A** array_of_device_pointers;
cudaStatus = cudaMalloc((void**)&array_of_device_pointers, device_pointers.size() * sizeof(A*));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed\n");
exit(-1);
}
cudaStatus = cudaMemcpy(array_of_device_pointers, device_pointers.data(), device_pointers.size() * sizeof(A*), cudaMemcpyHostToDevice);
testKernel<<<1, 1>>>(array_of_device_pointers, device_pointers.size());
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "kernel failed, reason: %s\n", cudaGetErrorString(cudaStatus));
}
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSynchronize failed\n");
exit(-1);
}
}
What I want to do: have objects derived from A
in a kernel
How I'm trying to do it: copy the objects to device memory one by one (since their sizes aren't the same), then copy an array of their device pointers to device memory, and then pass a device pointer to that onto a kernel
What I experience: When I run the program with the Nsight debugger, it stops at the line
printf("%d\n", objects[i]->size());
, (I'm guessing) meaning that object[0] is not a valid pointer.
Not sure if it matters with something this simple, but I'm running this on a GPU with compute capability 8.6, though compiling for compute capability 5.2