2

I am trying to understand how the "static" modifier works in C, I went looking for its meaning and everything I found seemed a bit vague.

It is a modifier to allow the values of a variable to exist until the end of the program execution.

I understood what it means and its purpose, but beyond this definition I wanted to understand how it works underneath, so I generated the assembly of the C code

char    *thing(char *a)
{
    char *b;

    b = malloc(3);

    b[0] = 'y';
    b[1] = '\0';
    return (b);
}

char    *some(int fd)
{
    static char *a = "happened";
    a = thing(a);
    return (a);
}

I create another code with non-static a variable and got this

/* With static variable */
    .file   "static_test.c"
    .text
    .globl  thing
    .type   thing, @function
thing:
.LFB6:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movq    %rdi, -24(%rbp)
    movl    $3, %edi
    call    malloc@PLT
    movq    %rax, -8(%rbp)
    movq    -8(%rbp), %rax
    movb    $121, (%rax)
    movq    -8(%rbp), %rax
    addq    $1, %rax
    movb    $0, (%rax)
    movq    -8(%rbp), %rax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE6:
    .size   thing, .-thing
    .globl  some
    .type   some, @function
some:
.LFB7:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movl    %edi, -4(%rbp)
    movq    a.0(%rip), %rax
    movq    %rax, %rdi
    call    thing
    movq    %rax, a.0(%rip)
    movq    a.0(%rip), %rax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE7:
    .size   some, .-some
    .section    .rodata
.LC0:
    .string "happened"
    .section    .data.rel.local,"aw"
    .align 8
    .type   a.0, @object
    .size   a.0, 8
a.0:
    .quad   .LC0
    .ident  "GCC: (GNU) 12.1.0"
    .section    .note.GNU-stack,"",@progbits

/* no static variable */
    .file   "nostatic_test.c"
    .text
    .globl  thing
    .type   thing, @function
thing:
.LFB6:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movq    %rdi, -24(%rbp)
    movl    $3, %edi
    call    malloc@PLT
    movq    %rax, -8(%rbp)
    movq    -8(%rbp), %rax
    movb    $121, (%rax)
    movq    -8(%rbp), %rax
    addq    $1, %rax
    movb    $0, (%rax)
    movq    -8(%rbp), %rax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE6:
    .size   thing, .-thing
    .section    .rodata
.LC0:
    .string "happened"
    .text
    .globl  some
    .type   some, @function
some:
.LFB7:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movl    %edi, -20(%rbp)
    leaq    .LC0(%rip), %rax
    movq    %rax, -8(%rbp)
    movq    -8(%rbp), %rax
    movq    %rax, %rdi
    call    thing
    movq    %rax, -8(%rbp)
    movq    -8(%rbp), %rax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE7:
    .size   some, .-some
    .ident  "GCC: (GNU) 12.1.0"
    .section    .note.GNU-stack,"",@progbits

The question would be, what is happening and the difference between the two assembly codes and how does this behave at compile time and at program execution time.

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
Astaroth
  • 25
  • 4
  • 1
    The only difference you will see in assembly is that `static` variables will live in the `.bss` section and loads/stores will be to global memory instead of the stack. That's about it. – Marco Bonelli Jun 07 '22 at 23:41
  • @MarcoBonelli -plus the load is only done once – pm100 Jun 07 '22 at 23:43
  • 2
    That static variable is initialized in the program image, so lives in `.data`, not `.bss`. – Erik Eidt Jun 07 '22 at 23:45
  • @pm100 hmm... not sure what you mean, the load (or well, in this case the LEA) is always done when the function is called. If the static variable was of type `int` then you would see a load (MOV) every time instead. – Marco Bonelli Jun 07 '22 at 23:46
  • 2
    Using a string is really confusing the point here, since pretty much no matter what, the string literal itself will be in a read-only section. The pointer variable itself being on the stack or not is a lot harder to suss out from this assembly mess. – Carl Norum Jun 07 '22 at 23:52
  • Compiling with optimization disabled also makes everything a mess, and as @Carl said your test case has a lot going on besides the `static` vs. `auto`matic storage. https://godbolt.org/z/YEnT5c8jP should be much clearer to see the difference between a fresh variable for every invocation of a function vs. keeping its value in static storage across invocations. (See also [How to remove "noise" from GCC/clang assembly output?](https://stackoverflow.com/q/38552116)) – Peter Cordes Jun 08 '22 at 00:10
  • 2
    It probably doesn't matter much as this is illustrative code, but (1) both versions of the code leak allocated memory and (2) the argument to `thing()` is unused and could sensibly be omitted. IMO, it is not necessary to dive into assembler to understand the difference between static and automatic variables inside a function. – Jonathan Leffler Jun 08 '22 at 00:10

2 Answers2

4

In the static version, you have a pointer-sized datum reserved in the .data section, which is initialized by the program file/image to refer to the string literal (which is in the .rodata section).  This is the

a.0:
   .quad   .LC0

In the non-static version, the variable is "automatic" — a local variable of the function, and is created on function entry and effectively destroyed on function exit.  Because the variable comes into existence each time the function is called, it must be initialized each time.  In the code you're showing (unoptimized) this automatic variable lives on the stack.  (Optimization would improve the code.)

Static variables can enjoy an efficiency with respect to such initialization, whereas local variable enjoy the performance possibility of living in a CPU register (fast access & taking no memory), and potentially being recursion and thread safe by contrast with statics.

As you note, also the static variable lives on after the function stops because it has global storage and the compiler knows it how to access it.  The automatic/stack-based variable, by contrast, is lost after returning from the function — more specifically, the variable is re-created each time the function is called, so the old copy is no longer (realistically) accessible.  You could make a pointer to the automatic variable, but it would be a logic error to use/dereference that pointer after the function exits.

Erik Eidt
  • 23,049
  • 2
  • 29
  • 53
2

This might be a better example. r has local scope, but it will not be located locally on the stack, but either in the .bss or .data section of a program and only initialized one time to zero. After that each call to Rnd32 will update r. The program returns a pseudo random 32 bit unsigned integer on each call in a fixed order, so that it is a repeatable sequence that goes through all 2^32 possible values.

uint32_t Rnd32()
{
static uint32_t r = 0;
    r = r*1664525 + 1013904223;
    return r;
}
rcgldr
  • 27,407
  • 3
  • 36
  • 61
  • I was thinking the same thing; just commented with https://godbolt.org/z/YEnT5c8jP as a similar example under the question. But it doesn't have to do anything useful to see the difference in asm, e.g. `return ++r` makes the asm output even easier to follow. – Peter Cordes Jun 08 '22 at 00:12
  • This example shows one of the problems with static variables in a function — there is no way to reset the value of `r` so you can't restart the sequence. The `srand()` function provides a way to restart the PRNG implemented by `rand()`. As illustrative code, that doesn't matter much. – Jonathan Leffler Jun 08 '22 at 00:13
  • @JonathanLeffler - In the case of Windows | Visual Studio, srand() and rand() rely on a per thread global variable. The point of this example was to show a static variable with local scope. – rcgldr Jun 08 '22 at 05:48