4

I am trying to copy a struct array to device.I am working with one GPU atm, and i have a problem with cuPrintf function which i use to debug my code.

My struct definition is as below:

 struct Node
 {
        char Key[25];
        char ConsAlterKey[25];
        char MasterKey[3];
        int VowelDeletion;
        char Data[6];
        char MasterData[6];
        int Children[35];
        int ChildCount;
 };

and for test purpose i fill the struct array like this :

void FillArray(Node *NodeArray)
{       
    for(int i=0;i<TotalNodeCount;i++)
    {
            strcpy(NodeArray[i].Key,"Key");

            strcpy(NodeArray[i].ConsAlterKey,"ConsAlterKey");

            strcpy(NodeArray[i].MasterKey,"Mk");

            NodeArray[i].VowelDeletion=0;

            strcpy(NodeArray[i].Data,"Data");

            strcpy(NodeArray[i].MasterData,"Mdata");

            NodeArray[i].ChildCount=5;

            for(int j =0;j<NodeArray[i].ChildCount;j++)
            {
                    NodeArray[i].Children[j]=i+j;
            }
    }
}

my main function looks like this:

int main()
{
    Node *NodeArray;
    Node *GpuTree;
    int tokenCount=0;
    int *tokenCountGPU;

    NodeArray =(Node *)malloc(sizeof(Node)*(TotalNodeCount));
    FillArray(NodeArray);
    printf("Filling test : %s\n", NodeArray[13].Key);

    gpuAssert(cudaMalloc( (void**)&GpuTree, sizeof(Node)*(TotalNodeCount)));
    gpuAssert(cudaMemcpy(GpuTree, NodeArray,sizeof(Node)*(TotalNodeCount), cudaMemcpyHostToDevice));

    //test value
    tokenCount=35;


    gpuAssert( cudaMalloc((void **)&tokenCountGPU, sizeof(int)) );
    gpuAssert( cudaMemcpy(tokenCountGPU, &tokenCount, sizeof(int), cudaMemcpyHostToDevice) );

    cudaPrintfInit();
    Test <<< 1, tokenCount >>> (GpuTree,tokenCountGPU);
    cudaPrintfDisplay(stdout, true);
    cudaPrintfEnd();
    gpuAssert( cudaGetLastError() );

    //TODO:free pointers
    return(0);
}

and if I write test function as below:

__global__ void Test(Node *Trie,int *tokenCount)
{
    if (threadIdx.x < *tokenCount) 
    {
            cuPrintf("%s\n",Trie[threadIdx.x].Key);

    }   
    return;
}

i get output like this:

Filling test : Key
[0, 0]: <
[0, 1]: ¶☺!
[0, 2]: ì☺!
[0, 3]: Ä☻!
[0, 4]: o♥!
[0, 5]: t♦!
[0, 6]: L♣!
[0, 7]: $♠!
[0, 8]: ü♠!
[0, 9]: Ô!
[0, 10]: !
[0, 11]: "
[0, 12]: \
!
[0, 13]: 4♂!
[0, 14]: ♀♀!
[0, 15]: ä♀!
!0, 16]: ¼
[0, 17]: "♫!
[0, 18]: l☼!
[0, 19]: D►!
[0, 20]: ∟◄!
[0, 21]: ô◄!
[0, 22]: Ì↕!
[0, 23]: ¤‼!
[0, 24]: |¶!
[0, 25]: T§!
[0, 26]: ,▬!
[0, 27]: ♦↨!
[0, 28]: Ü↨!
[0, 29]: ´↑!
[0, 30]: O↓!
[0, 31]: d→!
[0, 32]: <←!
[0, 33]: ¶∟!
[0, 34]: ì∟!

but if i change my test method to this:

__global__ void Test(Node *Trie,int *tokenCount)
{
    if (threadIdx.x < *tokenCount) 
    {
        cuPrintf("%c%c%c\n",
                            Trie[threadIdx.x].Key[0],
                            Trie[threadIdx.x].Key[1],
                            Trie[threadIdx.x].Key[2]);
    }
    return;
}

then i get the correct output:

Filling test : Key
[0, 0]: Key
[0, 1]: Key
[0, 2]: Key
[0, 3]: Key
[0, 4]: Key
[0, 5]: Key
[0, 6]: Key
[0, 7]: Key
[0, 8]: Key
[0, 9]: Key
[0, 10]: Key
[0, 11]: Key
[0, 12]: Key
[0, 13]: Key
[0, 14]: Key
[0, 15]: Key
[0, 16]: Key
[0, 17]: Key
[0, 18]: Key
[0, 19]: Key
[0, 20]: Key
[0, 21]: Key
[0, 22]: Key
[0, 23]: Key
[0, 24]: Key
[0, 25]: Key
[0, 26]: Key
[0, 27]: Key
[0, 28]: Key
[0, 29]: Key
[0, 30]: Key
[0, 31]: Key
[0, 32]: Key
[0, 33]: Key
[0, 34]: Key

So the question is why do i get corrupt output when I try to print strings through using "%s"?


So the problem is solved.Looks like it is because of cuPrintf limitations. And actually i was not aware of them.Thank you.

Here is a small test:

__global__ void Test(Node *Trie,int *tokenCount)
{
    const char *Key="Key";
    char *KeyPointer="Key";
    char KeyArray[4]="Key";
    cuPrintf("Constant : %s - Array :%s  - Pointer : %s - Casting Pointer : %s - Casting Array : %s\n",Key, KeyArray,KeyPointer,(const char *)KeyPointer,(const char *)KeyArray);

    //cuPrintf("%s\n",Trie[threadIdx.x].Key);
    //cuPrintf("%d\n",*tokenCount);

}

Gives the output :

    [0, 0]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 1]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 2]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 3]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 4]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 5]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 6]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 7]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 8]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 9]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 10]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 11]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 12]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 13]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 14]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 15]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 16]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 17]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 18]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 19]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 20]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 21]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 22]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 23]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 24]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 25]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 26]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 27]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 28]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 29]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 30]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 31]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 32]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 33]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
    [0, 34]: Constant : Key - Array :  - Pointer : ♀ - Casting Pointer : Key - Casting Array : Key
meva
  • 77
  • 1
  • 7

3 Answers3

7

Have a look at cuPrintf documentation (a readme is located at C/src/simplePrintf/doc/cuPrintf_readme.htm from the base directory where you installed the SDK):

There are Limitations/Known Issues on the usage of cuPrintf, number 2 answers your question:

Limitations / Known Issues

Currently, the following limitations and restrictions apply to cuPrintf:

  1. Buffer size is rounded up to the nearest factor of 256
  2. Arguments associated with “%s” string format specifiers must be of type (const char *)
  3. To print the value of a (const char *) pointer, it must first be converted to (char *). All (const char *) arguments are interpreted as strings
  4. Non-zero return code does not match standard C printf()
  5. Cannot asynchronously output the printf buffer (i.e. while kernel is running)
  6. Calling cudaPrintfDisplay implicitly issues a cudaDeviceSynchronize()
  7. Restrictions applied by cuPrintfRestrict persist between launches. To clear these from the host-side, you must call cudaPrintfEnd() then cudaPrintfInit() again
  8. cuPrintf output is undefined if multiple modules are loaded into a single context
  9. Compile with “-arch=sm_11” or better when possible. Buffer usage is far more efficient and register use is lower
  10. Supported format specifiers are: “cdiouxXeEfgGaAs”
  11. Behaviour of format specifiers, especially justification/size specifiers, are dependent on the host machine’s implementation of printf
  12. cuPrintf requires applications to be built using the CUDA runtime API

In your case you're not using const char* arguments.

jopasserat
  • 5,721
  • 4
  • 31
  • 50
  • Thanks! - I was banging my head against a wall -- it appeared to be correct, but the standard printf syntax isn't fully supported. – M. Tibbits Jul 06 '11 at 21:16
  • @M. Tibbits: I just checked the implementation and it seems a bit raw :) It would be interesting for OP to test with casts or rewrite the function to accept char* – jopasserat Jul 06 '11 at 21:44
  • It would be. I'm personally in a time crunch on another project -- hence I offered the bounty and 'paused' my effort. – M. Tibbits Jul 06 '11 at 21:56
1

On your most recent update, you need to multiple slenz by sizeof(char) <- when you're copying. So it should be:

gpuAssert( cudaMemcpy(strGPU, str, slenz*sizeof(char), cudaMemcpyHostToDevice));
M. Tibbits
  • 8,400
  • 8
  • 44
  • 59
  • How about printing out the `childCount`? Is the gpu able to pull an integer out of the structure? For the record, this all seems bizarre. I don't see any problems in your code above. **Oh, and, could you print out `sizeof(Node)`**? – M. Tibbits Jul 04 '11 at 16:28
  • Sorry for my late answer.Sizeof(Node) is 216. – meva Jul 05 '11 at 09:49
  • And i have updated the code as you say still getting nothing.Maybe my version of cuPrintf.cu has a problem.Anyway i dont want to bother you with this newbie question because i have solved the problem in my code and i dont need cuPrintf for now.In fact i will post a new question about performance guidance for my current code.I hope you will help me on that :) – meva Jul 05 '11 at 09:57
  • So I had just suggested using `printf`s in the kernel until I realized you need a device with compute capability 2.x (Fermi). Crap. Back to drawing boards... – M. Tibbits Jul 05 '11 at 17:12
  • I find that I learn more when things don't work... I'm still stuck on why the above code doesn't produce the expected results -- hence the bounty -- let's attract better minds! – M. Tibbits Jul 05 '11 at 17:47
  • Ah :) I'm just trying to do a simple project.Kind of an enhanced patricia trie.Thank you. – meva Jul 05 '11 at 17:50
  • And here are the codes if anyone want to run it.http://dl.dropbox.com/u/13011034/codes.zip – meva Jul 05 '11 at 17:51
1

One of the members of your struct is

    char MasterKey[3];

and when you initialize the objects you do

        //strcpy(NodeArray[i].MasterKey,"MasterKey");
        strcpy(NodeArray[i].MasterKey,"Msk"); /* still too large */

which is a little (!) too much for the available space.

pmg
  • 106,608
  • 13
  • 126
  • 198
  • Now it's MasterData[6]: `strcpy(NodeArray[i].MasterData,"Master");`. What happens, assuming `cuPrintf` behaves the same as `printf`, if you limit the output to 3 characters: `cuPrintf("%3.3s\n", Trie[threadIdx.x].Key)` – pmg Jul 05 '11 at 10:30
  • Done but i still get corrupt output with `cuPrintf("%3.3s\n", Trie[threadIdx.x].Key);` – meva Jul 05 '11 at 10:37