0

I'm trying to use a cudaGraphNode. I've got everything working, except for passing arguments to it. It doesn't crash, but gives a completely different result. Eg: If I pass 1337, in the kernel is will be 960051513.

Here's my code:

#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include <cuda.h>

#include <iostream>
#include <chrono>

__global__ void TestKernel(int *value)
{
    printf("Value: %d\n", *value);
}

void ExecuteTestKernel()
{
    int* d_value;

    cudaMalloc((void **)&d_value, sizeof(int));
    cudaMemset(d_value, 1337, sizeof(int));

    cudaGraph_t graph;
    cudaGraphCreate(&graph, 0);

    cudaGraphNode_t kernelNode;

    void *kernelArgs[1] = {(void *)&d_value};

    cudaKernelNodeParams kernelNodeParams = {0};
    kernelNodeParams.func = (void *)TestKernel;
    kernelNodeParams.gridDim = dim3(1, 1, 1);
    kernelNodeParams.blockDim = dim3(1, 1, 1);
    kernelNodeParams.sharedMemBytes = 0;
    kernelNodeParams.kernelParams = kernelArgs;
    kernelNodeParams.extra = NULL;

    cudaGraphAddKernelNode(&kernelNode, graph, 0, 0, &kernelNodeParams);

    cudaError_t e = cudaGetLastError();
    if (e != cudaSuccess)
    {
        printf("Cuda failure %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(e));
        exit(0);
    }

    cudaGraphExec_t graphInstance;
    cudaGraphInstantiate(&graphInstance, graph, NULL, NULL, 0);

    cudaGraphLaunch(graphInstance, 0);

    cudaFree(d_value);
}
  • 2
    You are using cudaMemset incorrectly. See https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1gf7338650f7683c51ee26aadc6973c63a – Abator Abetor Jan 05 '21 at 14:57
  • A dumb mistake on the simplest part. Changing that to a memcpy did the trick. Thanks :) – user3899556 Jan 05 '21 at 15:33

0 Answers0