This is a partial answer as my approach cannot handle an arbitrary number of inputs bytes. It can handle any number of inputs for which an efficient sorting network is known. I am showing examples for arrays with a length of two through sixteen bytes in the code below.
Knuth, TAOCP, vol. 4A, section 7.1.1 explains in much detail the utility of the ternary majority operation ⟨xyz⟩ = (x ∧ y) ∨ (y ∧ z) ∨ (x ∧ z). This function, when applied bitwise to three inputs, will implement the result requested by asker. Knuth prefers to call the function the "median" instead of the "majority" because if one lets ∧ correspond to min
and ∨ correspond to max
, then ⟨xyz⟩ = y precisely when x ≤ y ≤ z.
The interesting observation here is that one way of constructing sorting networks is from min
and max
primitives. The median of three median3 (x,y,z) = min (max (min (y, z), x), max (y,z))
, while min3 (x,y,z) = min (x, min (y, z))
and max3 (x,y,z) = max (x, max (y, z))
.
Knuth points out that any monotone Boolean function can be expressed using only the median operation and the constants 0 and 1. Therefore, the median-of-five can be expressed this way, and according to Knuth (p. 64) the most efficient arrangement is ⟨vwxyz⟩ = ⟨v⟨xyz⟩⟨wx⟨wyz⟩⟩⟩.
As a test case for the derive-ability of the bitwise median computation from sorting networks, I used a nine-input network from the literature and converted it into the bit-wise median-of-nine computation, which delivers the result requested by asker. I also translated some search network graphs I had lying around from previous work into the corresponding sequences of mix / max operations, and translated additional network graphs from TAOCP vol. 3. For additional sorting networks I consulted Bert Dobbelaere's list as noted in code comments. The Wikipedia article on sorting networks suggests that (near) optimal sorting networks are known up to 20 inputs, thus covering the range of array lengths of interest to the asker.
As for efficiency, byte_mode_16()
in the code below compiled with Clang 16 compiles to about 170 x86 instructions with a lot of instruction-level parallelism, so I would guesstimate if could execute in about 50 cycles on a modern x86-64 CPU. On NVIDIA GPUs, which support arbitrary three-input logic operations with the LOP3
instruction, the same function compiles to about 80 instructions.
#include <cstdio>
#include <cstdlib>
#include <climits>
#include <cstdint>
#define REPS (1000000)
#define N (16)
#define USE_KNUTH (0)
uint8_t byte_mode_ref (const uint8_t *bytes, int n)
{
int count1 [CHAR_BIT] = {0, 0, 0, 0, 0, 0, 0, 0};
uint8_t res = 0;
for (int i = 0; i < n; i++) {
uint8_t a = bytes [i];
for (int j = 0; j < CHAR_BIT; j++) {
uint8_t bit = (a >> j) & 1;
count1 [j] += bit;
}
}
for (int j = 0; j < CHAR_BIT; j++) {
res |= (count1[j] > (n / 2)) << j;
}
return res;
}
#define MIN3(x,y,z) (x & y & z)
#define MEDIAN3(x,y,z) (((y & z) | x) & (y | z))
#define MAX3(x,y,z) (x | y | z)
#define MIN2(x,y) (x & y)
#define MAX2(x,y) (x | y)
#define SWAP(i,j) do { int s = MIN2(a##i,a##j); int t = MAX2(a##i,a##j); a##i = s; a##j = t; } while (0)
uint8_t byte_mode_2 (const uint8_t *bytes)
{
int x = bytes [0];
int y = bytes [1];
return MIN2 (x, y);
}
uint8_t byte_mode_3 (const uint8_t *bytes)
{
int x = bytes [0];
int y = bytes [1];
int z = bytes [2];
return MEDIAN3 (x, y, z);
}
uint8_t byte_mode_4 (const uint8_t *bytes)
{
int a0 = bytes [0];
int a1 = bytes [1];
int a2 = bytes [2];
int a3 = bytes [3];
SWAP (0, 2); SWAP (1, 3);
SWAP (0, 1); SWAP (2, 3);
SWAP (1, 2);
return a1;
}
uint8_t byte_mode_5 (const uint8_t *bytes)
{
#if USE_KNUTH
int v = bytes [0];
int w = bytes [1];
int x = bytes [2];
int y = bytes [3];
int z = bytes [4];
/* Knuth, TAOCP, Vol. 4a, p. 64 */
return MEDIAN3 (v, MEDIAN3 (x, y, z), MEDIAN3 (w, x, (MEDIAN3 (w, y, z))));
#else // USE_KNUTH
int a0 = bytes [0];
int a1 = bytes [1];
int a2 = bytes [2];
int a3 = bytes [3];
int a4 = bytes [4];
SWAP (0, 3); SWAP (1, 4);
SWAP (0, 2); SWAP (1, 3);
SWAP (0, 1); SWAP (2, 4);
SWAP (1, 2); SWAP (3, 4);
SWAP (2, 3);
return a2;
#endif // USE_KNUTH
}
uint8_t byte_mode_6 (const uint8_t *bytes)
{
int a0 = bytes [0];
int a1 = bytes [1];
int a2 = bytes [2];
int a3 = bytes [3];
int a4 = bytes [4];
int a5 = bytes [5];
SWAP (0, 5); SWAP (1, 3); SWAP (2,4);
SWAP (1, 2); SWAP (3, 4);
SWAP (0, 3); SWAP (2, 5);
SWAP (0, 1); SWAP (2, 3); SWAP (4, 5);
SWAP (1, 2); SWAP (3, 4);
return a2;
}
uint8_t byte_mode_7 (const uint8_t *bytes)
{
int a0 = bytes [0];
int a1 = bytes [1];
int a2 = bytes [2];
int a3 = bytes [3];
int a4 = bytes [4];
int a5 = bytes [5];
int a6 = bytes [6];
SWAP (0, 6); SWAP (2, 3); SWAP (4, 5);
SWAP (0, 2); SWAP (1, 4); SWAP (3, 6);
SWAP (0, 1); SWAP (2, 5); SWAP (3, 4);
SWAP (1, 2); SWAP (4, 6);
SWAP (2, 3); SWAP (4, 5);
SWAP (1, 2); SWAP (3, 4); SWAP (5, 6);
return a3;
}
uint8_t byte_mode_8 (const uint8_t *bytes)
{
int a0 = bytes [0];
int a1 = bytes [1];
int a2 = bytes [2];
int a3 = bytes [3];
int a4 = bytes [4];
int a5 = bytes [5];
int a6 = bytes [6];
int a7 = bytes [7];
SWAP (0, 2); SWAP (1, 3); SWAP (4, 6); SWAP (5, 7);
SWAP (0, 4); SWAP (1, 5); SWAP (2, 6); SWAP (3, 7);
SWAP (0, 1); SWAP (2, 3); SWAP (4, 5); SWAP (6, 7);
SWAP (2, 4); SWAP (3, 5);
SWAP (1, 4); SWAP (3, 6);
SWAP (1, 2); SWAP (3, 4); SWAP (5, 6);
return a3;
}
uint8_t byte_mode_9 (const uint8_t *bytes)
{
int a0 = bytes [0];
int a1 = bytes [1];
int a2 = bytes [2];
int a3 = bytes [3];
int a4 = bytes [4];
int a5 = bytes [5];
int a6 = bytes [6];
int a7 = bytes [7];
int a8 = bytes [8];
int b0, b1, b2, b3, b4, b5, b6, b7, b8;
int c2, c4, c6;
/*
Chaitali Chakrabarti and Li-Yu Wang, "Novel sorting network-based
architectures for rank order filters." IEEE Transactions on Very
Large Scale Integration Systems, Vol. 2, No. 4, 1994, pp. 502-507
*/
// SORT3 (0, 1, 2)
b0 = MIN3 (a0, a1, a2);
b1 = MEDIAN3 (a0, a1, a2);
b2 = MAX3 (a0, a1, a2);
// SORT3 (3, 4, 5)
b3 = MIN3 (a3, a4, a5);
b4 = MEDIAN3 (a3, a4, a5);
b5 = MAX3 (a3, a4, a5);
// SORT3 (6, 7, 8)
b6 = MIN3 (a6, a7, a8);
b7 = MEDIAN3 (a6, a7, a8);
b8 = MAX3 (a6, a7, a8);
// SORT3 (0, 3, 6)
// c0 = MIN3 (b0, b3, b6);
// c3 = MEDIAN3 (b0, b3, b6);
c6 = MAX3 (b0, b3, b6);
// SORT3 (1, 4, 7)
// c1 = MIN3 (b1, b4, b7);
c4 = MEDIAN3 (b1, b4, b7);
// c7 = MAX3 (b1, b4, b7);
// SORT3 (2, 5, 8)
c2 = MIN3 (b2, b5, b8);
// c5 = MEDIAN3 (b2, b5, b8);
// c8 = MAX3 (b2, b5, b8);
// final median computation
// SORT3 (2, 4, 6)
return MEDIAN3 (c2, c4, c6);
}
uint8_t byte_mode_10 (const uint8_t *bytes)
{
int a0 = bytes [0];
int a1 = bytes [1];
int a2 = bytes [2];
int a3 = bytes [3];
int a4 = bytes [4];
int a5 = bytes [5];
int a6 = bytes [6];
int a7 = bytes [7];
int a8 = bytes [8];
int a9 = bytes [9];
#if USE_KNUTH
/* Knuth, TAOCP, vol. 3, p. 227 */
SWAP (4, 9); SWAP (3, 8); SWAP (2, 7); SWAP (1, 6); SWAP (0, 5);
SWAP (1, 4); SWAP (6, 9); SWAP (0, 3); SWAP (5, 8);
SWAP (0, 2); SWAP (3, 6); SWAP (7, 9);
SWAP (0, 1); SWAP (2, 4); SWAP (5, 7); SWAP (8, 9);
SWAP (4, 6); SWAP (1, 2); SWAP (7, 8); SWAP (3, 5);
SWAP (2, 5); SWAP (6, 8); SWAP (1, 3); SWAP (4, 7);
SWAP (2, 3); SWAP (6, 7);
SWAP (3, 4); SWAP (5, 6);
SWAP (4, 5);
#else // USE_KNUTH
/* https://bertdobbelaere.github.io/sorting_networks.html */
SWAP (0, 8); SWAP (1, 9); SWAP (2, 7); SWAP (3, 5); SWAP (4, 6);
SWAP (0, 2); SWAP (1, 4); SWAP (5, 8); SWAP (7, 9);
SWAP (0, 3); SWAP (2, 4); SWAP (5, 7); SWAP (6, 9);
SWAP (0, 1); SWAP (3, 6); SWAP (8, 9);
SWAP (1, 5); SWAP (2, 3); SWAP (4, 8); SWAP (6, 7);
SWAP (1, 2); SWAP (3, 5); SWAP (4, 6); SWAP (7, 8);
SWAP (2, 3); SWAP (4, 5); SWAP (6, 7);
SWAP (3, 4); SWAP (5, 6);
#endif // USE_KNUTH
return a4;
}
uint8_t byte_mode_11 (const uint8_t *bytes)
{
int a0 = bytes [0];
int a1 = bytes [1];
int a2 = bytes [2];
int a3 = bytes [3];
int a4 = bytes [4];
int a5 = bytes [5];
int a6 = bytes [6];
int a7 = bytes [7];
int a8 = bytes [8];
int a9 = bytes [9];
int a10 = bytes [10];
/* https://bertdobbelaere.github.io/sorting_networks.html */
SWAP (0, 9); SWAP (1, 6); SWAP (2, 4); SWAP (3, 7); SWAP (5, 8);
SWAP (0, 1); SWAP (3, 5); SWAP (4, 10); SWAP (6, 9); SWAP (7, 8);
SWAP (1, 3); SWAP (2, 5); SWAP (4, 7); SWAP (8, 10);
SWAP (0, 4); SWAP (1, 2); SWAP (3, 7); SWAP (5, 9); SWAP (6, 8);
SWAP (0, 1); SWAP (2, 6); SWAP (4, 5); SWAP (7, 8); SWAP (9, 10);
SWAP (2, 4); SWAP (3, 6); SWAP (5, 7); SWAP (8, 9);
SWAP (1, 2); SWAP (3, 4); SWAP (5, 6); SWAP (7, 8);
SWAP (2, 3); SWAP (4, 5); SWAP (6, 7);
return a5;
}
uint8_t byte_mode_12 (const uint8_t *bytes)
{
int a0 = bytes [0];
int a1 = bytes [1];
int a2 = bytes [2];
int a3 = bytes [3];
int a4 = bytes [4];
int a5 = bytes [5];
int a6 = bytes [6];
int a7 = bytes [7];
int a8 = bytes [8];
int a9 = bytes [9];
int a10 = bytes [10];
int a11 = bytes [11];
#if USE_KNUTH
/* Knuth, TAOCP, vol. 3, p. 227 */
SWAP (0, 1); SWAP (2, 3); SWAP (4, 5); SWAP (6, 7); SWAP (8, 9); SWAP (10, 11);
SWAP (1, 3); SWAP (5, 7); SWAP (9, 11); SWAP (0, 2); SWAP (4, 6); SWAP ( 8, 10);
SWAP (1, 2); SWAP (5, 6); SWAP (9, 10);
SWAP (1, 5); SWAP (6, 10);
SWAP (2, 6); SWAP (5, 9);
SWAP (1, 5); SWAP (6, 10);
SWAP (0, 4); SWAP (7, 11);
SWAP (3, 7); SWAP (4, 8);
SWAP (0, 4); SWAP (7, 11);
SWAP (1, 4); SWAP (7, 10); SWAP (3, 8);
SWAP (2, 3); SWAP (8, 9);
SWAP (2, 4); SWAP (7, 9); SWAP (3, 5); SWAP (6, 8);
SWAP (3, 4); SWAP (5, 6); SWAP (7, 8);
#else // USE_KNUTH
/* https://bertdobbelaere.github.io/sorting_networks.html */
SWAP (0, 8); SWAP (1, 7); SWAP (2, 6); SWAP (3, 11); SWAP (4, 10); SWAP (5, 9);
SWAP (0, 1); SWAP (2, 5); SWAP (3, 4); SWAP (6, 9); SWAP (7, 8); SWAP (10, 11);
SWAP (0, 2); SWAP (1, 6); SWAP (5, 10); SWAP (9, 11);
SWAP (0, 3); SWAP (1, 2); SWAP (4, 6); SWAP (5, 7); SWAP (8, 11); SWAP (9, 10);
SWAP (1, 4); SWAP (3, 5); SWAP (6, 8); SWAP (7, 10);
SWAP (1, 3); SWAP (2, 5); SWAP (6, 9); SWAP (8, 10);
SWAP (2, 3); SWAP (4, 5); SWAP (6, 7); SWAP (8, 9);
SWAP (4, 6); SWAP (5, 7);
SWAP (3, 4); SWAP (5, 6); SWAP (7, 8);
#endif // USE_KNUTH
return a5;
}
uint8_t byte_mode_13 (const uint8_t *bytes)
{
int a0 = bytes [0];
int a1 = bytes [1];
int a2 = bytes [2];
int a3 = bytes [3];
int a4 = bytes [4];
int a5 = bytes [5];
int a6 = bytes [6];
int a7 = bytes [7];
int a8 = bytes [8];
int a9 = bytes [9];
int a10 = bytes [10];
int a11 = bytes [11];
int a12 = bytes [12];
/* Knuth, TAOCP, vol. 3, p. 227 */
SWAP (0, 5); SWAP ( 8, 12); SWAP (1, 7); SWAP ( 3, 9); SWAP ( 2, 4); SWAP (6, 11);
SWAP (0, 6); SWAP ( 7, 9); SWAP (1, 3); SWAP ( 5, 11); SWAP ( 2, 8); SWAP (4, 12);
SWAP (0, 2); SWAP ( 4, 5); SWAP (6, 8); SWAP ( 9, 10); SWAP (11, 12);
SWAP (7, 8); SWAP (10, 12); SWAP (5, 9); SWAP ( 3, 11);
SWAP (1, 5); SWAP ( 9, 11); SWAP (2, 3); SWAP ( 4, 7); SWAP ( 8, 10);
SWAP (0, 1); SWAP ( 5, 6); SWAP (8, 9); SWAP (10, 11);
SWAP (3, 6); SWAP ( 2, 5); SWAP (1, 4); SWAP ( 7, 8); SWAP ( 9, 10);
SWAP (3, 7); SWAP ( 1, 2); SWAP (4, 5); SWAP ( 6, 8);
SWAP (2, 4); SWAP ( 6, 7); SWAP (3, 5); SWAP ( 8, 9);
SWAP (3, 4); SWAP ( 5, 6);
return a6;
}
uint8_t byte_mode_14 (const uint8_t *bytes)
{
int a0 = bytes [0];
int a1 = bytes [1];
int a2 = bytes [2];
int a3 = bytes [3];
int a4 = bytes [4];
int a5 = bytes [5];
int a6 = bytes [6];
int a7 = bytes [7];
int a8 = bytes [8];
int a9 = bytes [9];
int a10 = bytes [10];
int a11 = bytes [11];
int a12 = bytes [12];
int a13 = bytes [13];
/* https://bertdobbelaere.github.io/sorting_networks.html */
SWAP (0, 1); SWAP (2, 3); SWAP (4, 5); SWAP (6, 7); SWAP ( 8, 9); SWAP (10, 11); SWAP (12, 13);
SWAP (0, 2); SWAP (1, 3); SWAP (4, 8); SWAP (5, 9); SWAP (10, 12); SWAP (11, 13);
SWAP (0, 10); SWAP (1, 6); SWAP (2, 11); SWAP (3, 13); SWAP ( 5, 8); SWAP ( 7, 12);
SWAP (1, 4); SWAP (2, 8); SWAP (3, 6); SWAP (5, 11); SWAP ( 7, 10); SWAP ( 9, 12);
SWAP (0, 1); SWAP (3, 9); SWAP (4, 10); SWAP (5, 7); SWAP ( 6, 8); SWAP (12, 13);
SWAP (1, 5); SWAP (2, 4); SWAP (3, 7); SWAP (6, 10); SWAP ( 8, 12); SWAP ( 9, 11);
SWAP (1, 2); SWAP (3, 5); SWAP (4, 6); SWAP (7, 9); SWAP ( 8, 10); SWAP (11, 12);
SWAP (2, 3); SWAP (4, 5); SWAP (6, 7); SWAP (8, 9); SWAP (10, 11);
SWAP (3, 4); SWAP (5, 6); SWAP (7, 8); SWAP (9, 10);
return a6;
}
uint8_t byte_mode_15 (const uint8_t *bytes)
{
int a0 = bytes [0];
int a1 = bytes [1];
int a2 = bytes [2];
int a3 = bytes [3];
int a4 = bytes [4];
int a5 = bytes [5];
int a6 = bytes [6];
int a7 = bytes [7];
int a8 = bytes [8];
int a9 = bytes [9];
int a10 = bytes [10];
int a11 = bytes [11];
int a12 = bytes [12];
int a13 = bytes [13];
int a14 = bytes [14];
/* https://bertdobbelaere.github.io/sorting_networks.html */
SWAP (0, 6); SWAP (1, 10); SWAP (2,14); SWAP (3, 9); SWAP ( 4, 12); SWAP ( 5, 13); SWAP ( 7, 11);
SWAP (0, 7); SWAP (2, 5); SWAP (3, 4); SWAP (6, 11); SWAP ( 8, 10); SWAP ( 9, 12); SWAP (13, 14);
SWAP (1, 13); SWAP (2, 3); SWAP (4, 6); SWAP (5, 9); SWAP ( 7, 8); SWAP (10, 14); SWAP (11, 12);
SWAP (0, 3); SWAP (1, 4); SWAP (5, 7); SWAP (6, 13); SWAP ( 8, 9); SWAP (10, 11); SWAP (12, 14);
SWAP (0, 2); SWAP (1, 5); SWAP (3, 8); SWAP (4, 6); SWAP ( 7, 10); SWAP ( 9, 11); SWAP (12, 13);
SWAP (0, 1); SWAP (2, 5); SWAP (3,10); SWAP (4, 8); SWAP ( 6, 7); SWAP ( 9, 12); SWAP (11, 13);
SWAP (1, 2); SWAP (3, 4); SWAP (5, 6); SWAP (7, 9); SWAP ( 8, 10); SWAP (11, 12);
SWAP (3, 5); SWAP (4, 6); SWAP (7, 8); SWAP (9, 10);
SWAP (2, 3); SWAP (4, 5); SWAP (6, 7); SWAP (8, 9); SWAP (10, 11);
return a7;
}
uint8_t byte_mode_16 (const uint8_t *bytes)
{
int a0 = bytes [0];
int a1 = bytes [1];
int a2 = bytes [2];
int a3 = bytes [3];
int a4 = bytes [4];
int a5 = bytes [5];
int a6 = bytes [6];
int a7 = bytes [7];
int a8 = bytes [8];
int a9 = bytes [9];
int a10 = bytes [10];
int a11 = bytes [11];
int a12 = bytes [12];
int a13 = bytes [13];
int a14 = bytes [14];
int a15 = bytes [15];
/* Knuth TAOCP, vol. 3, p. 229 */
SWAP (0, 1); SWAP ( 2, 3); SWAP ( 4, 5); SWAP ( 6, 7); SWAP ( 8, 9); SWAP (10, 11); SWAP (12, 13); SWAP (14, 15);
SWAP (1, 3); SWAP ( 5, 7); SWAP ( 9, 11); SWAP (13, 15); SWAP ( 0, 2); SWAP ( 4, 6); SWAP ( 8, 10); SWAP (12, 14);
SWAP (3, 7); SWAP (11, 15); SWAP ( 2, 6); SWAP (10, 14); SWAP ( 1, 5); SWAP ( 9, 13); SWAP ( 0, 4); SWAP ( 8, 12);
SWAP (7, 15); SWAP ( 6, 14); SWAP ( 5, 13); SWAP ( 4, 12); SWAP ( 3, 11); SWAP ( 2, 10); SWAP ( 1, 9); SWAP ( 0, 8);
SWAP (1, 2); SWAP ( 3, 12); SWAP (13, 14); SWAP ( 7, 11); SWAP ( 4, 8); SWAP ( 5, 10); SWAP ( 6, 9);
SWAP (1, 4); SWAP (11, 14); SWAP ( 7, 13); SWAP ( 2, 8); SWAP ( 6, 12); SWAP ( 3, 10); SWAP ( 5, 9);
SWAP (3, 5); SWAP ( 7, 9); SWAP (11, 13); SWAP ( 2, 4); SWAP ( 6, 8); SWAP (10, 12);
SWAP (5, 8); SWAP ( 9, 12); SWAP ( 3, 6); SWAP ( 7, 10);
SWAP (3, 4); SWAP ( 5, 6); SWAP ( 7, 8); SWAP ( 9, 10); SWAP (11, 12);
return a7;
}
// George Marsaglia's KISS PRNG, period 2**123. Newsgroup sci.math, 21 Jan 1999
// Bug fix: Greg Rose, "KISS: A Bit Too Simple" http://eprint.iacr.org/2011/007
static uint32_t kiss_z=362436069, kiss_w=521288629;
static uint32_t kiss_jsr=123456789, kiss_jcong=380116160;
#define znew (kiss_z=36969*(kiss_z&65535)+(kiss_z>>16))
#define wnew (kiss_w=18000*(kiss_w&65535)+(kiss_w>>16))
#define MWC ((znew<<16)+wnew )
#define SHR3 (kiss_jsr^=(kiss_jsr<<13),kiss_jsr^=(kiss_jsr>>17), \
kiss_jsr^=(kiss_jsr<<5))
#define CONG (kiss_jcong=69069*kiss_jcong+1234567)
#define KISS ((MWC^CONG)+SHR3)
int main (void)
{
uint8_t res, ref, arr [N];
printf ("Testing byte arrays of length %d\n", N);
for (int i = 0; i < REPS; i++) {
for (int j = 0; j < N; j++) {
arr [j] = KISS;
}
ref = byte_mode_ref (arr, N);
#if N==2
res = byte_mode_2 (arr);
#elif N==3
res = byte_mode_3 (arr);
#elif N==4
res = byte_mode_4 (arr);
#elif N==5
res = byte_mode_5 (arr);
#elif N==6
res = byte_mode_6 (arr);
#elif N==7
res = byte_mode_7 (arr);
#elif N==8
res = byte_mode_8 (arr);
#elif N==9
res = byte_mode_9 (arr);
#elif N==10
res = byte_mode_10 (arr);
#elif N==11
res = byte_mode_11 (arr);
#elif N==12
res = byte_mode_12 (arr);
#elif N==13
res = byte_mode_13 (arr);
#elif N==14
res = byte_mode_14 (arr);
#elif N==15
res = byte_mode_15 (arr);
#elif N==16
res = byte_mode_16 (arr);
#else
#error unsupported value of N
#endif
if (res != ref) {
printf ("Error: res=%02x ref=%02x bytes: ", res, ref);
for (int j = 0; j < N; j++) {
printf ("%02x ", arr[j]);
}
printf ("\nTest FAILED\n");
return EXIT_FAILURE;
}
}
printf ("Test PASSED\n");
return EXIT_SUCCESS;
}