3

As was mentioned in this Shared Memory Array Default Value question, shared memory is non-initialized, i.e. can contain any value.

#include <stdio.h>

#define BLOCK_SIZE 512

__global__ void scan(float *input, float *output, int len) {
    __shared__ int data[BLOCK_SIZE];

    // DEBUG
    if (threadIdx.x == 0 && blockIdx.x == 0)
    {
        printf("Block Number: %d\n", blockIdx.x);
        for (int i = 0; i < BLOCK_SIZE; ++i)
        {
            printf("DATA[%d] = %d\n", i, data[i]);
        }
    }
    
}

int main(int argc, char ** argv) {
    dim3 block(BLOCK_SIZE, 1, 1);
    dim3 grid(10, 1, 1);
    scan<<<grid,block>>>(NULL, NULL, NULL);
    cudaDeviceSynchronize();
    return 0;
}

But why in this code it is not true and I'm constantly getting zeroed shared memory?

DATA[0] = 0
DATA[1] = 0
DATA[2] = 0
DATA[3] = 0
DATA[4] = 0
DATA[5] = 0
DATA[6] = 0
...

I tested with Release and Debug Mode: -O3 -arch=sm_20, -O3 -arch=sm_30 and -arch=sm_30. The result is always the same.

paleonix
  • 2,293
  • 1
  • 13
  • 29
likern
  • 3,744
  • 5
  • 36
  • 47
  • 1
    Did you tested it under release and debug mode? In some projects I had observe that in debug mode shared memory was initialized to 0, but not in release mode and not general in all projects. This isn't a defined behaviour as @CygnusX1 answered in your linked question. You have to initialize shared memory on your own! – hubs Mar 04 '14 at 13:14
  • If it can contain any value than it can contain zeros, no? System may still need to reinitialize memory sometimes to prevent information leaking between processes (security). – zch Mar 04 '14 at 13:17
  • Yes, I tested. With "-arch=sm_30" and "-O3 -arch=sm_30" options, also with "-arch=sm_20". The result is the same - zeroed shared memory. – likern Mar 04 '14 at 13:36
  • Yes, it can contain and zero too, but the strange is that no any other values in shared memory, means that it is specially zeroed. – likern Mar 04 '14 at 13:40
  • 2
    Zero is within the subset of "any value". – Michael Foukarakis Mar 04 '14 at 13:54
  • If you launch more than one wave of blocks you will likely see non-zero values in the second wave. On context switch the shared memory is reset to zero. – Greg Smith Mar 15 '21 at 01:11

1 Answers1

13

tl;dr: shared memory is not initialized to 0

I think your conjecture of shared memory initialized to 0 is questionable. Try the following code, which is a slight modification of yours. Here, I'm calling the kernel twice and altering the values of the data array. The first time the kernel is launched, the "uninitialized" values of data will be all 0's. The second time the kernel is launched, the "uninitialized" values of data will be all different from 0's.

I think this depends on the fact that shared memory is SRAM, which exhibits data remanence.

#include <stdio.h>

#define BLOCK_SIZE 32

__global__ void scan(float *input, float *output, int len) {

    __shared__ int data[BLOCK_SIZE];

    if (threadIdx.x == 0 && blockIdx.x == 0)
    {
        for (int i = 0; i < BLOCK_SIZE; ++i)
        {
            printf("DATA[%d] = %d\n", i, data[i]);
            data[i] = i;
        }

    }
}

int main(int argc, char ** argv) {
    dim3 block(BLOCK_SIZE, 1, 1);
    dim3 grid(10, 1, 1);
    scan<<<grid,block>>>(NULL, NULL, NULL);
    scan<<<grid,block>>>(NULL, NULL, NULL);
    cudaDeviceSynchronize();
    getchar();
    return 0;
}
einpoklum
  • 118,144
  • 57
  • 340
  • 684
Vitality
  • 20,705
  • 4
  • 108
  • 146
  • Yes, you are absolutely right! Thanks for this good explanation, now it is clear why we should initialize shared memory manually! – likern Mar 04 '14 at 14:40