Question
I came across a leetcode question gas-station
However I found my code is slightly faster if I use if/else
rather than if/continue
.
Edit: after twiddling with the test case. I found the problem is continue
It seems that clang is taking continue
seriously..? My guess is hat clang somehow tries to put the part before continue and loop checking together.
In short: I want to know:
- How do I "persuade" clang to understand my implementation about continue like if/else? Because I don't want to change my style of coding.
Edited sidenote: branch-prediction tag is removed.
Relative question
Should I use return/continue statement instead of if-else? which seems that they should have nearly no difference.
Investigation
Edit: the demo shows that the problem is continue
I used a 10k data set to test the code and found that the problem is continue
itself.
I put continue
in the if
's end of body and then the runtime is increased by 30% ~ 50% which is still surprising to me.
Live Demo
Difference in number: 11070 to 15060(w/ continue
)
Edited on 2022/5/30: Sorry I found that my previous naive profiling mistakenly got optimized out of part of calculation... which causes the difference in order of 100... Bad demo
Profiling code
#include <iostream>
#include <vector>
#include <chrono>
#include <cstdlib>
using namespace std; // Yeah I know it's bad.
// input , output
class Solution {
public:
int canCompleteCircuit(vector<int>& gas, vector<int>& cost) {
if(gas.empty()){
return -1;
}
int len = gas.size();
// start at the last station, move start backward if we cannot finished the loop
int start = len - 1;
int tank = gas[start] - cost[start];
int cur = 0; // from first station
while(start != cur){ // other condition?
if(tank < 0){
// if not, move start backward, check the value
// continue...start > 0;
start--;
tank += gas[start] - cost[start];
// continue;
}
else{
tank += gas[cur] - cost[cur];
cur++;
}
}
//cout << tank ;
if(tank < 0){
return -1;
}
return start;
}
};
volatile int tryNoToBeOptimizedOut = 0;
int main() {
Solution s;
vector<int> gas{...}; // 10k data here
vector<int> cost{...};
int count = 100;
vector<int> results;
for(int i = 0; i <count; i++){
// size_t rIndex = rand()%gas.size();
// gas[rIndex] += i;
gas[i] += i;
auto start = std::chrono::system_clock::now();
int ret = s.canCompleteCircuit(gas,cost);
auto end = std::chrono::system_clock::now();
auto elapsed = end - start;
std::cout << elapsed.count() << '\n';
tryNoToBeOptimizedOut = ret;
results.push_back(ret);
}
tryNoToBeOptimizedOut = results[rand()%results.size()];
return 0;
}
old tests before edit
In the first place I checked the implementation in gcc and found that both versions are nearly identical in assembly.
Live Demo in gcc
I know that Leetcode uses clang 11 with O2.
So I checked the difference in clang 11.
Live Demo in clang 11
Live Demo in clang 14 doesn't get better
The results are a bit different between them.
It seems that cur++;
is combined into while
predicates in if/continue
version.
I'm not sure why....Is it related to optimization policies?
My code:
int canCompleteCircuit(vector<int>& gas, vector<int>& cost) {
if(gas.empty()){
return -1;
}
int len = gas.size();
// start at the last station, move start backward if we cannot finished the loop
int start = len - 1;
int tank = gas[start] - cost[start];
int cur = 0; // from first station
while(start != cur){ // other condition?
if(tank < 0){
// if not, move start backward, check the value
// continue...start > 0;
start--;
tank += gas[start] - cost[start];
continue;
}
// else{
tank += gas[cur] - cost[cur];
cur++;
// continue; <-- If I put another continue there then the runtime is similar to if/else version. But is there a better way than adding conitnue manually?
// }
}
if(tank < 0){
return -1;
}
return start;
}
Assembly in if
/else
version
.LBB0_5: # in Loop: Header=BB0_3 Depth=1
movsxd rcx, ecx # `else` part
mov edx, dword ptr [r14 + 4*rcx]
sub edx, dword ptr [rbx + 4*rcx]
add ecx, 1
.LBB0_6: # in Loop: Header=BB0_3 Depth=1
add eax, edx
mov edx, eax
shr edx, 31
cmp esi, ecx # `while(start != cur){`
je .LBB0_7
.LBB0_3: # =>This Inner Loop Header: Depth=1
test dl, 1 # `if(tank < 0){`
je .LBB0_5 # go to else
movsxd rdi, esi
add esi, -1
mov edx, dword ptr [r14 + 4*rdi - 4]
sub edx, dword ptr [rbx + 4*rdi - 4]
jmp .LBB0_6
Assembly in if
/continue
version
.LBB0_3: # =>This Loop Header: Depth=1
mov r9, rdx
mov edx, r10d
sub edx, r9d
add rdx, 4
movsxd rdi, r9d
lea rsi, [r12 + 4*rdi]
lea rbx, [r14 + 4*rdi]
xor edi, edi
.LBB0_4: # Parent Loop BB0_3 Depth=1
test cl, 1 # `if` part
jne .LBB0_5
add eax, dword ptr [rbx + 4*rdi] # after `if` statement
sub eax, dword ptr [rsi + 4*rdi]
mov ecx, eax
shr ecx, 31
add rdi, 1
cmp edx, edi
jne .LBB0_4
jmp .LBB0_7
.LBB0_5: # in Loop: Header=BB0_3 Depth=1
add eax, dword ptr [r14 + 4*r8 - 4] # Body of `if`
sub eax, dword ptr [r12 + 4*r8 - 4]
add r8, -1
mov esi, r10d
sub esi, r9d
add esi, 3
mov ecx, eax
shr ecx, 31
movsxd rdx, r9d
add rdx, rdi
add r10d, -1
cmp esi, edi
jne .LBB0_3