I want to create an object on the device and allocate it to a pointer available on the host. Is there something I'm doing wrong in here?
__global__ void createAProduction(DeviceProduction* production) {
production = new AProduction();
}
DeviceProduction * devAProduction = NULL;
cudaMalloc(&devAProduction, sizeof(AProduction));
createAProduction<<<1, 1>>>(devAProduction);
deviceProductions["A"] = devAProduction;
Somewhere further in the code I'd like to do sth. like:
BatchOperation ** devBatchOperations;
cudaMalloc((void **) &devBatchOperations, sizeof(BatchOperation *) * operationCount);
Then I populate that pointer array with that:
void DeviceBatchExecutor::execute(vector<BatchOperation> operationsToPerform) {
BatchOperation ** devBatchOperations;
cudaMalloc((void **) &devBatchOperations, sizeof(BatchOperation *) * operationsToPerform.size());
int i = 0;
for(batchOperationIt it = operationsToPerform.begin(); it != operationsToPerform.end(); ++it) {
BatchOperation * devBatchOperation;
cudaMalloc(&devBatchOperation, sizeof(BatchOperation));
cudaMemcpy(&devBatchOperation, &it, sizeof(BatchOperation), cudaMemcpyHostToDevice);
Vertex * devInputNode = it->inputNode->allocateToDevice();
cudaMemcpy(&(devBatchOperation->inputNode), &devInputNode, sizeof(Vertex *), cudaMemcpyDeviceToDevice);
cudaMemcpy(&(devBatchOperation->production), &(it->production), sizeof(Production *), cudaMemcpyDeviceToDevice);
cudaMemcpy(&devBatchOperations[i], &devBatchOperation, sizeof(BatchOperation *), cudaMemcpyDeviceToDevice);
i++;
}
int operationCount = operationsToPerform.size();
executeOperations<<<operationCount, 1>>>(devBatchOperations);
}
where production is a pointer to the device memory holding that created object AProduction. Then I finally invoke processing via
executeOperations<<<operationCount, 1>>>(devBatchOperations);
So I'm relying on virtual method calls. As those DeviceProduction objects were created on the device, there is also a virtual pointer table so it should work. See example here. But it doesn't since the received batch operations seem random... crashes on invocation.
__global__ void executeOperations(BatchOperation ** operation) {
operation[blockIdx.x]->production->apply(operation[blockIdx.x]->inputNode);
}
Batch operation is a struct holding the production to be executed.
struct BatchOperation {
Production * production;
Vertex * inputNode;
Vertex * outputNode;
};