0

Consider the CUDA graphs API function cuFindNodeInClone(). The documentation says, that it:

Returns:

CUDA_SUCCESS, CUDA_ERROR_INVALID_VALUE

This seems problematic to me. How can I tell whether the search failed (e.g. because there is no copy of the passed node in the graph), or whether the node or graph are simply invalid (e.g. nullptr)? Does the second error value signify both? Can I get a third error value which is just not mentioned?

einpoklum
  • 118,144
  • 57
  • 340
  • 684
  • From the documentation it seems to me that the caller must already know that the node exists. (you have to have cloned the graph before and know that the node was part of the graph) . The first case (node does not exist in cloned graph) should not happen and would be a user error in my opinion. – Abator Abetor Dec 18 '22 at 07:22
  • @AbatorAbetor: Well, you could say that about any API call - the caller should/must already know the inputs are valid; but that's still checked. – einpoklum Dec 18 '22 at 09:17

1 Answers1

2

When using the runtime API, the returned node is nullptr if the original node does not exist in the cloned graph. For nullptr original node or nullptr cloned graph, the output node is left unmodified.


#include <iostream>
#include <cassert>

int main(){
    cudaError_t status;

    cudaGraph_t graph;
    status = cudaGraphCreate(&graph, 0);
    assert(status == cudaSuccess);

    cudaGraphNode_t originalNode;
    status = cudaGraphAddEmptyNode(&originalNode, graph, nullptr, 0);
    assert(status == cudaSuccess);

    cudaGraph_t graphclone;
    status = cudaGraphClone(&graphclone, graph);
    assert(status == cudaSuccess);

    cudaGraphNode_t anotherNode;
    status = cudaGraphAddEmptyNode(&anotherNode, graph, nullptr, 0);
    assert(status == cudaSuccess);

    cudaGraphNode_t nodeInClone = (cudaGraphNode_t)7;
    status = cudaGraphNodeFindInClone(&nodeInClone, originalNode, graphclone);
    std::cout << cudaGetErrorString(status) << " " << (void*)nodeInClone << "\n";

    nodeInClone = (cudaGraphNode_t)7;
    status = cudaGraphNodeFindInClone(&nodeInClone, nullptr, graphclone);
    std::cout << cudaGetErrorString(status) << " " << (void*)nodeInClone << "\n";

    nodeInClone = (cudaGraphNode_t)7;
    status = cudaGraphNodeFindInClone(&nodeInClone, originalNode, nullptr);
    std::cout << cudaGetErrorString(status) << " " << (void*)nodeInClone << "\n";

    nodeInClone = (cudaGraphNode_t)7;
    status = cudaGraphNodeFindInClone(&nodeInClone, anotherNode, graphclone);
    std::cout << cudaGetErrorString(status) << " " << (void*)nodeInClone << "\n";
}

On my machine with CUDA 11.8, this prints

no error 0x555e3cf287c0
invalid argument 0x7
invalid argument 0x7
invalid argument 0
Abator Abetor
  • 2,345
  • 1
  • 10
  • 12