I enjoyed figuring out how to create the SWAR x LT (Less Than) y
function with 64bit unsigned int
and using only logical operators
and arithmetic +
and -
.
I looked at some information on the web (https://www.chessprogramming.org/SIMD_and_SWAR_Techniques) and from there I got the idea that the function can be done starting from the subtraction (x - y)
.
Looking at the meaning of the highest bit of: x
, y
and (x - y)
when unsigned int
are used, I created the following truth table where:
R (result) is 1 when the LT condition occurs.
D is the highest bit of the subtracion (x-y),
X is the highest bit of the X value to be tested,
Y is the highest bit of the Y value to be tested.
D X Y | R
0 0 0 | 0
0 0 1 | 1
0 1 0 | 0
0 1 1 | 0
1 0 0 | 1
1 0 1 | 1
1 1 0 | 0
1 1 1 | 1
Applying the Karnaugh's map (https://getcalc.com/karnaugh-map/3variable-kmap-solver.htm) to the table above we obtain the following formula:
(~ X & Y) | (D & ~ X) | (D & Y)
from which the macro SWARLTU(x, y)
arose (see file swar.h below).
Since I was not satisfied, I observed how the compiler generated the assembler code of the macro SWARLTU
and then following that code I wrote the macro SWARLTU2(x, y)
(see file swar.h below). This last macro should be logically optimized.
The limit of this code is that the value for the LT result is 0x80 and not 0xFF as requested in the question.
The program can be launched in three different ways:
Without parameters, in this case it will perform 10 tests on random numbers.
With only one parameter, the parameter will indicate the number of random tests to be performed.
With two parameters, two numbers in the form 0xnnnnn, in this case only the control of the entered values will be shown.
Here the code:
The file swar.h (this file contains also other SWAR macros E.G.: SHL, SHR)
#ifndef SWAR_H
#define SWAR_H
/*
https://www.chessprogramming.org/SIMD_and_SWAR_Techniques
SWAR add z = x + y
z = ((x &~H) + (y &~H)) ^ ((x ^ y) & H)
SWAR sub z = x - y
z = ((x | H) - (y &~H)) ^ ((x ^~y) & H)
SWAR average z = (x+y)/2 based on x + y = (x^y) + 2*(x&y)
z = (x & y) + (((x ^ y) & ~L) >> 1)
*/
// 0 1 2 3 4 5 6 7
#define SWARH 0x8080808080808080LL
#define SWARL 0x0101010101010101LL
#define SWARADD(x,y) \
((( (x) &~SWARH) + ( (y) &~SWARH)) ^ (( (x) ^ (y) ) & SWARH))
#define SWARSUB(x,y) \
((( (x) | SWARH) - ( (y) &~SWARH)) ^ (( (x) ^~(y) ) & SWARH))
#define SWARAVE(x,y) \
(( (x) & (y) ) + ((( (x) ^ (y)) & ~SWARL) >> 1))
#define SWARLTI(x,y) \
( SWARSUB(x,y) & SWARH )
#define SWARSHL(x) \
(((x)&(~SWARH))<<1)
#define SWARSHR(x) \
(((x)&(~SWARL))>>1)
/*** Computing unsigned less than
Truth table considering the HIGH bit setting of
Differece, X Value, Y Value
D X Y | R
0 0 0 | 0
0 0 1 | 1
0 1 0 | 0
0 1 1 | 0
1 0 0 | 1
1 0 1 | 1
1 1 0 | 0
1 1 1 | 1
***/
#define _SWARDH (SWARSUB(x,y) & SWARH)
#define _SWARXH ((x)&SWARH)
#define _SWARYH ((y)&SWARH)
#define SWARLTU(x,y) \
((~_SWARXH & _SWARYH) | (_SWARDH & ~_SWARXH) | (_SWARDH & _SWARYH))
// Elaborated from the generated ASM of the previous.
#define SWARLTU2(X,Y) \
((((~(X & SWARH)) & ((((~(X ^ Y)) & SWARH) ^ ((X | SWARH) - Y)) | Y)) | \
((((~(X ^ Y)) & SWARH) ^ ((X | SWARH) - Y)) & Y)) & SWARH)
#endif // SWAR_H
The file main.c
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <inttypes.h>
#include <time.h>
#include "swar.h"
char * verifyltu(char * rs,uint64_t x, uint64_t y, uint64_t v);
void printvalues(uint64_t x,uint64_t y,uint64_t r,uint64_t r1);
uint64_t rand64();
int main(int argc, char *argv[])
{
int rnd=1;
size_t i,n=10;
uint64_t x=0,y=0,r,r1;
srand(time(NULL));
if (argc>1) {
if (argc==2) {
n=strtoul(argv[1],NULL,0);
} else {
x=strtoull(argv[1],NULL,0);
y=strtoull(argv[2],NULL,0);
rnd=0;
}
}
if (rnd) {
for(i=0;i<n;i++) {
x=rand64();
y=rand64();
r=SWARLTU(x,y);
r1=SWARLTU2(x,y);
printvalues(x,y,r,r1);
}
} else {
r=SWARLTU(x,y);
r1=SWARLTU2(x,y);
printvalues(x,y,r,r1);
}
return 0;
}
char * verifyltu(char * rs,uint64_t x, uint64_t y, uint64_t v)
{
size_t i;
uint8_t *xs, *ys, *vs;
xs=(uint8_t *)&x; ys=(uint8_t *)&y;
vs=(uint8_t *)&v;
for(i=0;i<sizeof(uint64_t);i++) {
if ( ( xs[i]<ys[i] && vs[i]&0x80) ||
( !(xs[i]<ys[i]) && !(vs[i]&0x80) ) )
{
rs[i*2]='*';rs[i*2+1]=' ';
} else {
rs[i*2]='-';rs[i*2+1]=' ';
}
}
rs[i*2]=0;
return rs;
}
void printvalues(uint64_t x,uint64_t y,uint64_t r,uint64_t r1)
{
char rs[17],rs1[17];
printf(
"X %016" PRIX64 " <\n"
"Y %016" PRIX64 "\n"
" ----------------\n"
"LTU %016" PRIX64 "\n"
"*=Ok %s\n"
"LTU2 %016" PRIX64 "\n"
"*=Ok %s\n\n",
x,y,
r,verifyltu(rs,x,y,r),
r1,verifyltu(rs1,x,y,r1)
);
}
uint64_t rand64()
{
uint64_t x;
x=rand(); x=(x<<32)+rand();
return x;
}