I am working on my math homework and have written a c++ version of the SJT algorithm.
When I tried to test the time cost of this code with input n=12
, I got different results.
You can see that there are three calls of the init
in generate
, they are not redundant.
If I keep all of them, the time cost is 1200+ms
. However, if I delete the second or the third init
, the time cost becomes 600+ms
. How could the call of init
even after the neighbour_exchange
affect the time cost? Is this a bug of gcc -O2 optimization?
Compiled with -O2, the assembly code is this.
My operating system is ubuntu 20.04, I've tried both gcc (x86_64-linux-gnu
) version 9.3.0
and 10.3.0
Here is the code.
#include <cstdio>
#include <cstring>
#include <ctime>
// #define SHOW
struct Permutation {
const static int MAXN = 100;
int N;
char s[2*MAXN]; // output string
int r[MAXN]; // radix number
inline void init() {
memset(s, 0, sizeof(s));
memset(r, 0, sizeof(r));
for (int i = 1; i <= N; i++) {
s[i] = 'a' + i - 1;
}
}
void generate(int n) {
N = n;
init();
init();
auto start = clock();
neighbour_exchange();
printf("neighbour_exchange: %lf\n", (double)(clock() - start)*1000/CLOCKS_PER_SEC);
init();
}
// SJT algorithm with Even's speed up
void neighbour_exchange() {
// idx_N is the index of the largest element
// t_N is the current direction of the largest element
int idx_N = N, t_N = -1;
while (true) {
#ifdef SHOW
puts(s+1);
#endif
int i = N;
r[i] += 1;
// if the N-th radix doesn't carry
// just use the old direciton and move N
// else we should find the largest non-carry element and move it
if (r[i] < N) {
char c = s[idx_N];
idx_N += t_N;
s[idx_N-t_N] = s[idx_N];
s[idx_N] = c;
continue;
}
// this is safe because if r[1] == 1, r[0] will be set to 1
// so in the next loop r[0] != 0, which will break the while loop
// and then the function will end
while (r[i] == i) {
r[i] = 0;
r[--i] += 1;
}
if (i == 0) break;
// some computation of the direction and movement of the elements
int j = 1;
char c = 'a'+i-1;
while (s[j] != c) j++;
int t = (r[i-1] ^ (~i & r[i-2])) & 1;
t = (t << 1) - 1;
c = s[j+t]; s[j+t] = s[j]; s[j] = c;
t_N = (r[N-1] ^ (~N & r[N-2])) & 1;
t_N = (t_N << 1) - 1;
}
}
} perm;
int main() {
int n;
scanf("%d", &n);
perm.generate(n);
return 0;
}