I'm trying to use a cudaGraphNode. I've got everything working, except for passing arguments to it. It doesn't crash, but gives a completely different result. Eg: If I pass 1337, in the kernel is will be 960051513.
Here's my code:
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <cuda.h>
#include <iostream>
#include <chrono>
__global__ void TestKernel(int *value)
{
printf("Value: %d\n", *value);
}
void ExecuteTestKernel()
{
int* d_value;
cudaMalloc((void **)&d_value, sizeof(int));
cudaMemset(d_value, 1337, sizeof(int));
cudaGraph_t graph;
cudaGraphCreate(&graph, 0);
cudaGraphNode_t kernelNode;
void *kernelArgs[1] = {(void *)&d_value};
cudaKernelNodeParams kernelNodeParams = {0};
kernelNodeParams.func = (void *)TestKernel;
kernelNodeParams.gridDim = dim3(1, 1, 1);
kernelNodeParams.blockDim = dim3(1, 1, 1);
kernelNodeParams.sharedMemBytes = 0;
kernelNodeParams.kernelParams = kernelArgs;
kernelNodeParams.extra = NULL;
cudaGraphAddKernelNode(&kernelNode, graph, 0, 0, &kernelNodeParams);
cudaError_t e = cudaGetLastError();
if (e != cudaSuccess)
{
printf("Cuda failure %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(e));
exit(0);
}
cudaGraphExec_t graphInstance;
cudaGraphInstantiate(&graphInstance, graph, NULL, NULL, 0);
cudaGraphLaunch(graphInstance, 0);
cudaFree(d_value);
}