8

If I have:

#define likely(x)       __builtin_expect((x),1)
#define unlikely(x)     __builtin_expect((x),0)

if (A)
    return true;
else if (B)
    return false;
...
else if (Z)
    return true;
else
    //this will never really happen!!!!
    raiseError();
    return false;

Can I put likely() around the last condition check like else if (likely(Z)) to signify that the final statement (else) is very unlikely WITHOUT the compiler affecting the branch prediction of the previous checks?

Basically, does GCC try to optimize the entire if-else if block if there is a single conditional statement with a branch predictor hint?

jww
  • 97,681
  • 90
  • 411
  • 885
Lars
  • 233
  • 2
  • 9
  • The only way to find out for certain is to build (with optimizations enabled of course) and check the generated code. Compare the code generated with and without `likely` being used. – Some programmer dude Aug 19 '16 at 00:14
  • Why not just put `unlikely` around all the other conditions? – Kerrek SB Aug 19 '16 at 00:26
  • @KerrekSB That's what I want to prevent. All of the conditions are equally likely, except for the condition that none of them are true. – Lars Aug 19 '16 at 00:29
  • 2
    @JoachimPileborg Indeed, but I am hoping someone already knows, because it's not a trivial check for someone who doesn't look at assembler on a regular basis. – Lars Aug 19 '16 at 00:30
  • Is there a reason you didn't try this yourself? https://godbolt.org/g/MYRQeO – kfsone Sep 28 '16 at 02:47

2 Answers2

10

You shall make this explicit:

if (A)
  return true;
else if (B)
  return true;
...  
else if (Y)
  return true;
else {
  if (likely(Z))
    return true;

  raiseError();
  return false;
}

Now compiler clearly understands your intention and will not reassign other branch probabilities. Also readability of code increased.

P.S. I suggest you to rewrite also likely and unlikely in the way Linux kernel do to protect from silent integral casts:

#define likely(x)      __builtin_expect(!!(x), 1)
#define unlikely(x)    __builtin_expect(!!(x), 0)
Konstantin Vladimirov
  • 6,791
  • 1
  • 27
  • 36
3

In general, GCC assumes that conditionals in if statements will be true - there are exceptions, but they are contextual.

extern int s(int);

int f(int i) {
  if (i == 0)
    return 1;
  return s(i);
}

produces

f(int):
        testl   %edi, %edi
        jne     .L4
        movl    $1, %eax
        ret
.L4:
        jmp     s(int)

while

extern int t(int*);
int g(int* ip) {
  if (!ip)
    return 0;
  return t(ip);
}

produces:

g(int*):
        testq   %rdi, %rdi
        je      .L6
        jmp     t(int*)
.L6:
        xorl    %eax, %eax
        ret

(see godbolt)

Note how in f the branch is jne (assume the condition is true), while in g the condition is assumed to be false.

Now compare with the following:

extern int s(int);
extern int t(int*);

int x(int i, int* ip) {
  if (!ip)
    return 1;
  if (!i)
    return 2;
  if (s(i))
    return 3;
  if (t(ip))
    return 4;
  return s(t(ip));
}

which produces

x(int, int*):
        testq   %rsi, %rsi
        je      .L3         # first branch: assumed unlikely
        movl    $2, %eax
        testl   %edi, %edi
        jne     .L12        # second branch: assumed likely
        ret
.L12:
        pushq   %rbx
        movq    %rsi, %rbx
        call    s(int)
        movl    %eax, %edx
        movl    $3, %eax
        testl   %edx, %edx
        je      .L13       # third branch: assumed likely
.L2:
        popq    %rbx
        ret
.L3:
        movl    $1, %eax
        ret
.L13:
        movq    %rbx, %rdi
        call    t(int*)
        movl    %eax, %edx
        movl    $4, %eax
        testl   %edx, %edx
        jne     .L2       # fourth branch: assumed unlikely!
        movq    %rbx, %rdi
        call    t(int*)
        popq    %rbx
        movl    %eax, %edi
        jmp     s(int)

Here we see one of the context factors: GCC spotted that it could re-use L2 here, so it decided to deem the final conditional unlikely so that it could emit less code.

Lets look at the assembly of the example you gave:

#define likely(x)       __builtin_expect((x),1)
#define unlikely(x)     __builtin_expect((x),0)

extern void raiseError();

int f(int A, int B, int Z)
{
  if (A)
    return 1;
  else if (B)
    return 2;
  else if (Z)
    return 3;

  raiseError();
  return -1;
}

The assembly looks like this:

f(int, int, int):
        movl    $1, %eax
        testl   %edi, %edi
        jne     .L9
        movl    $2, %eax
        testl   %esi, %esi
        je      .L11
.L9:
        ret
.L11:
        testl   %edx, %edx
        je      .L12       # branch if !Z
        movl    $3, %eax
        ret
.L12:
        subq    $8, %rsp
        call    raiseError()
        movl    $-1, %eax
        addq    $8, %rsp
        ret

Note that the generated code branches when !Z is true, it's already behaving as though Z is likely. What happens if we tell it Z is likely?

#define likely(x)       __builtin_expect((x),1)
#define unlikely(x)     __builtin_expect((x),0)

extern void raiseError();

int f(int A, int B, int Z)
{
  if (A)
    return 1;
  else if (B)
    return 2;
  else if (likely(Z))
    return 3;

  raiseError();
  return -1;
}

now we get

f(int, int, int):
        movl    $1, %eax
        testl   %edi, %edi
        jne     .L9
        movl    $2, %eax
        testl   %esi, %esi
        je      .L11
.L9:
        ret
.L11:
        movl    $3, %eax    # assume Z
        testl   %edx, %edx
        jne     .L9         # but branch if Z
        subq    $8, %rsp
        call    raiseError()
        movl    $-1, %eax
        addq    $8, %rsp
        ret

The point here is that you should be cautious when using these macros and examine the code before and after carefully to make sure you get the results you were expecting, and benchmark (e.g with perf) to make sure that the processor is making predictions that align with the code you are generating.

kfsone
  • 23,617
  • 2
  • 42
  • 74