You could check k
elements at a time by casting to the largest integer available and crafting the right mask, eg: if you have an array of char, or int8_t, you could check 8 elements at a time.
The mask is crafted to be all zeroes, except for the sign bits.
This way if one of those chars is negative the bitwise AND of the mask and the cast iterator will be > 0
int8_t _i8[8] = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN };
uint64_t mask = *(uint64_t *)_i8;
uint64_t *it = (uint64_t *)a;
size_t i = 0;
if (it[i] & mask)
return FALSE;
Of course you should take care of the remainders of n % k
, which can be done with a simple switch like this:
size_t i = 0;
switch (n % 8) {
case 0: if (a[i++] < 0) return FALSE;
case 7: if (a[i++] < 0) return FALSE;
case 6: if (a[i++] < 0) return FALSE;
case 5: if (a[i++] < 0) return FALSE;
case 4: if (a[i++] < 0) return FALSE;
case 3: if (a[i++] < 0) return FALSE;
case 2: if (a[i++] < 0) return FALSE;
case 1: if (a[i++] < 0) return FALSE;
}
Here I've made a proof of concept that compares the naive approach, the unrolled approach using the Duff's device, and an unrolled and masked approach that uses both the Duff's device and the masking.
It would require a bit more testing, but seems promising nonetheless.
You can run it here https://onlinegdb.com/SyliIUrSZO
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <limits.h>
#include <math.h>
#include <time.h>
#define CLOCKS_PER_MSEC (CLOCKS_PER_SEC/1000)
#define TRUE (0 == 0)
#define FALSE (!TRUE)
// 8 bit integers
int are_non_negatives_naive_i8(int8_t *a, size_t n)
{
size_t i;
for (i = 0; i < n; ++i)
if (a[i] < 0)
return FALSE;
return TRUE;
}
int are_non_negatives_unrolled_i8(int8_t *a, size_t n)
{
size_t M = (n + 7) / 8;
size_t i = 0;
switch (n % 8) {
case 0: do { if (a[i++] < 0) return FALSE;
case 7: if (a[i++] < 0) return FALSE;
case 6: if (a[i++] < 0) return FALSE;
case 5: if (a[i++] < 0) return FALSE;
case 4: if (a[i++] < 0) return FALSE;
case 3: if (a[i++] < 0) return FALSE;
case 2: if (a[i++] < 0) return FALSE;
case 1: if (a[i++] < 0) return FALSE;
} while (--M > 0);
}
}
int are_non_negatives_unrolled_masked_i8(int8_t *a, size_t n)
{
int8_t _i8[8] = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN };
uint64_t mask = *(uint64_t *)_i8;
uint64_t *it = (uint64_t *)a;
size_t i = 0;
switch (n % 8) {
case 0: if (a[i++] < 0) return FALSE;
case 7: if (a[i++] < 0) return FALSE;
case 6: if (a[i++] < 0) return FALSE;
case 5: if (a[i++] < 0) return FALSE;
case 4: if (a[i++] < 0) return FALSE;
case 3: if (a[i++] < 0) return FALSE;
case 2: if (a[i++] < 0) return FALSE;
case 1: if (a[i++] < 0) return FALSE;
}
size_t N = n / 8;
// for (i = 0; i < N; ++i)
// if (it[i] & mask)
// return FALSE;
size_t M = (N + 7) / 8;
switch (N % 8) {
case 0: do { if (it[i++] & mask) return FALSE;
case 7: if (it[i++] & mask) return FALSE;
case 6: if (it[i++] & mask) return FALSE;
case 5: if (it[i++] & mask) return FALSE;
case 4: if (it[i++] & mask) return FALSE;
case 3: if (it[i++] & mask) return FALSE;
case 2: if (it[i++] & mask) return FALSE;
case 1: if (it[i++] & mask) return FALSE;
} while (--M > 0);
}
return TRUE;
}
//----------------------------------------------------------------
// 16 bit integers
int are_non_negatives_naive_i16(int16_t *a, size_t n)
{
size_t i;
for (i = 0; i < n; ++i)
if (a[i] < 0)
return FALSE;
return TRUE;
}
int are_non_negatives_unrolled_i16(int16_t *a, size_t n)
{
size_t M = (n + 7) / 8;
size_t i = 0;
switch (n % 8) {
case 0: do { if (a[i++] < 0) return FALSE;
case 7: if (a[i++] < 0) return FALSE;
case 6: if (a[i++] < 0) return FALSE;
case 5: if (a[i++] < 0) return FALSE;
case 4: if (a[i++] < 0) return FALSE;
case 3: if (a[i++] < 0) return FALSE;
case 2: if (a[i++] < 0) return FALSE;
case 1: if (a[i++] < 0) return FALSE;
} while (--M > 0);
}
}
int are_non_negatives_unrolled_masked_i16(int16_t *a, size_t n)
{
int16_t _i16[4] = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN };
uint64_t mask = *(uint64_t *)_i16;
uint64_t *it = (uint64_t *)a;
size_t i = 0;
switch (n % 8) {
case 0: if (a[i++] < 0) return FALSE;
case 7: if (a[i++] < 0) return FALSE;
case 6: if (a[i++] < 0) return FALSE;
case 5: if (a[i++] < 0) return FALSE;
case 4: if (a[i++] < 0) return FALSE;
case 3: if (a[i++] < 0) return FALSE;
case 2: if (a[i++] < 0) return FALSE;
case 1: if (a[i++] < 0) return FALSE;
}
size_t N = n / 4;
// for (i = 0; i < N; ++i)
// if (it[i] & mask)
// return FALSE;
size_t M = (N + 7) / 8;
switch (N % 8) {
case 0: do { if (it[i++] & mask) return FALSE;
case 7: if (it[i++] & mask) return FALSE;
case 6: if (it[i++] & mask) return FALSE;
case 5: if (it[i++] & mask) return FALSE;
case 4: if (it[i++] & mask) return FALSE;
case 3: if (it[i++] & mask) return FALSE;
case 2: if (it[i++] & mask) return FALSE;
case 1: if (it[i++] & mask) return FALSE;
} while (--M > 0);
}
return TRUE;
}
//----------------------------------------------------------------
// 32 bit integers
int are_non_negatives_naive_i32(int32_t *a, size_t n)
{
size_t i;
for (i = 0; i < n; ++i)
if (a[i] < 0)
return FALSE;
return TRUE;
}
int are_non_negatives_unrolled_i32(int32_t *a, size_t n)
{
size_t M = (n + 7) / 8;
size_t i = 0;
switch (n % 8) {
case 0: do { if (a[i++] < 0) return FALSE;
case 7: if (a[i++] < 0) return FALSE;
case 6: if (a[i++] < 0) return FALSE;
case 5: if (a[i++] < 0) return FALSE;
case 4: if (a[i++] < 0) return FALSE;
case 3: if (a[i++] < 0) return FALSE;
case 2: if (a[i++] < 0) return FALSE;
case 1: if (a[i++] < 0) return FALSE;
} while (--M > 0);
}
}
int are_non_negatives_unrolled_masked_i32(int32_t *a, size_t n)
{
int32_t _i32[2] = { INT32_MIN, INT32_MIN };
uint64_t mask = *(uint64_t *)_i32;
uint64_t *it = (uint64_t *)a;
size_t i = 0;
switch (n % 8) {
case 0: if (a[i++] < 0) return FALSE;
case 7: if (a[i++] < 0) return FALSE;
case 6: if (a[i++] < 0) return FALSE;
case 5: if (a[i++] < 0) return FALSE;
case 4: if (a[i++] < 0) return FALSE;
case 3: if (a[i++] < 0) return FALSE;
case 2: if (a[i++] < 0) return FALSE;
case 1: if (a[i++] < 0) return FALSE;
}
size_t N = n / 2;
// for (i = 0; i < N; ++i)
// if (it[i] & mask)
// return FALSE;
size_t M = (N + 7) / 8;
switch (N % 8) {
case 0: do { if (it[i++] & mask) return FALSE;
case 7: if (it[i++] & mask) return FALSE;
case 6: if (it[i++] & mask) return FALSE;
case 5: if (it[i++] & mask) return FALSE;
case 4: if (it[i++] & mask) return FALSE;
case 3: if (it[i++] & mask) return FALSE;
case 2: if (it[i++] & mask) return FALSE;
case 1: if (it[i++] & mask) return FALSE;
} while (--M > 0);
}
return TRUE;
}
//----------------------------------------------------------------
void fill_array(void *array, size_t type, size_t n, int contains_negatives)
{
size_t i;
switch (type) {
case 1: {
int8_t *a = array;
for (i = 0; i < n; ++i)
a[i] = rand() % INT8_MAX;
break;
}
case 2: {
int16_t *a = array;
for (i = 0; i < n; ++i)
a[i] = rand() % INT16_MAX;
break;
}
case 4: {
int32_t *a = array;
for (i = 0; i < n; ++i)
a[i] = rand() % INT32_MAX;
break;
}
case 8: {
int64_t *a = array;
for (i = 0; i < n; ++i)
a[i] = rand();
break;
}
}
if (contains_negatives) {
int how_many = rand() % n;
for (int i; i < how_many; ++i) {
int j = rand() % n;
switch (type) {
case 1: {
int8_t *a = array;
a[j] *= -1;
break;
}
case 2: {
int16_t *a = array;
a[j] *= -1;
break;
}
case 4: {
int32_t *a = array;
a[j] *= -1;
break;
}
case 8: {
int64_t *a = array;
a[j] *= -1;
break;
}
}
}
}
}
int main()
{
time_t t;
clock_t clock_start;
clock_t clock_end;
clock_t clock_naive = 0;
clock_t clock_unrolled = 0;
clock_t clock_unrolled_masked = 0;
/* Intializes random number generator */
srand((unsigned long) time(&t));
printf("8 bit integers\n");
printf("----------------------------------------------------------------\n");
int8_t *array_i8;
for (int i = 3; i < 7; ++i) {
size_t size = pow(10, i);
array_i8 = malloc(size * sizeof(int8_t));
printf("------------------------------------------------\n");
printf("%lu elements\n", size);
for (int has_neg = 0; has_neg < 2; ++has_neg) {
printf("--------------------------------\n");
printf("Has negatives: %s\n", has_neg ? "yes": "no");
fill_array(array_i8, sizeof(int8_t), size, has_neg);
int neg_naive;
int neg_unrolled;
int neg_unrolled_masked;
clock_start = clock();
neg_naive = are_non_negatives_naive_i8(array_i8, size);
clock_end = clock();
clock_naive = clock_end - clock_start;
clock_start = clock();
neg_unrolled = are_non_negatives_unrolled_i8(array_i8, size);
clock_end = clock();
clock_unrolled = clock_end - clock_start;
clock_start = clock();
neg_unrolled_masked = are_non_negatives_unrolled_masked_i8(array_i8, size);
clock_end = clock();
clock_unrolled_masked = clock_end - clock_start;
if (!has_neg) {
if (!neg_naive)
printf("Error: neg_naive\n");
if (!neg_unrolled)
printf("Error: neg_unrolled\n");
if (!neg_unrolled_masked)
printf("Error: neg_unrolled_masked\n");
}
printf(" clock_naive: %lu\n", clock_naive);
printf(" clock_unrolled: %lu\n", clock_unrolled);
printf("clock_unrolled_masked: %lu\n", clock_unrolled_masked);
printf("--------------------------------\n");
}
printf("------------------------------------------------\n");
free (array_i8);
array_i8 = NULL;
}
printf("----------------------------------------------------------------\n");
printf("16 bit integers\n");
printf("----------------------------------------------------------------\n");
int16_t *array_i16;
for (int i = 3; i < 7; ++i) {
size_t size = pow(10, i);
array_i16 = malloc(size * sizeof(int16_t));
printf("------------------------------------------------\n");
printf("%lu elements\n", size);
for (int has_neg = 0; has_neg < 2; ++has_neg) {
printf("--------------------------------\n");
printf("Has negatives: %s\n", has_neg ? "yes": "no");
fill_array(array_i16, sizeof(int16_t), size, has_neg);
int neg_naive;
int neg_unrolled;
int neg_unrolled_masked;
clock_start = clock();
neg_naive = are_non_negatives_naive_i16(array_i16, size);
clock_end = clock();
clock_naive = clock_end - clock_start;
clock_start = clock();
neg_unrolled = are_non_negatives_unrolled_i16(array_i16, size);
clock_end = clock();
clock_unrolled = clock_end - clock_start;
clock_start = clock();
neg_unrolled_masked = are_non_negatives_unrolled_masked_i16(array_i16, size);
clock_end = clock();
clock_unrolled_masked = clock_end - clock_start;
if (!has_neg) {
if (!neg_naive)
printf("Error: neg_naive\n");
if (!neg_unrolled)
printf("Error: neg_unrolled\n");
if (!neg_unrolled_masked)
printf("Error: neg_unrolled_masked\n");
}
printf(" clock_naive: %lu\n", clock_naive);
printf(" clock_unrolled: %lu\n", clock_unrolled);
printf("clock_unrolled_masked: %lu\n", clock_unrolled_masked);
printf("--------------------------------\n");
}
printf("------------------------------------------------\n");
free (array_i16);
array_i16 = NULL;
}
printf("----------------------------------------------------------------\n");
printf("32 bit integers\n");
printf("----------------------------------------------------------------\n");
int32_t *array_i32;
for (int i = 3; i < 7; ++i) {
size_t size = pow(10, i);
array_i32 = malloc(size * sizeof(int32_t));
printf("------------------------------------------------\n");
printf("%lu elements\n", size);
for (int has_neg = 0; has_neg < 2; ++has_neg) {
printf("--------------------------------\n");
printf("Has negatives: %s\n", has_neg ? "yes": "no");
fill_array(array_i32, sizeof(int32_t), size, has_neg);
int neg_naive;
int neg_unrolled;
int neg_unrolled_masked;
clock_start = clock();
neg_naive = are_non_negatives_naive_i32(array_i32, size);
clock_end = clock();
clock_naive = clock_end - clock_start;
clock_start = clock();
neg_unrolled = are_non_negatives_unrolled_i32(array_i32, size);
clock_end = clock();
clock_unrolled = clock_end - clock_start;
clock_start = clock();
neg_unrolled_masked = are_non_negatives_unrolled_masked_i32(array_i32, size);
clock_end = clock();
clock_unrolled_masked = clock_end - clock_start;
if (!has_neg) {
if (!neg_naive)
printf("Error: neg_naive\n");
if (!neg_unrolled)
printf("Error: neg_unrolled\n");
if (!neg_unrolled_masked)
printf("Error: neg_unrolled_masked\n");
}
printf(" clock_naive: %lu\n", clock_naive);
printf(" clock_unrolled: %lu\n", clock_unrolled);
printf("clock_unrolled_masked: %lu\n", clock_unrolled_masked);
printf("--------------------------------\n");
}
printf("------------------------------------------------\n");
free (array_i32);
array_i32 = NULL;
}
printf("----------------------------------------------------------------\n");
}
Any suggestions or fixes are more than welcome
Note that this solution is not standard and depends on the alignment of the data types.
Albeit it'll usually work, you should check it with something like this
#include <stdio.h>
#include <stdint.h>
#include <stdalign.h>
int main()
{
printf(
"alignof(int8_t ): %lu\n"
"alignof(int16_t): %lu\n"
"alignof(int32_t): %lu\n"
"alignof(int64_t): %lu\n",
alignof(int8_t),
alignof(int16_t),
alignof(int32_t),
alignof(int64_t)
);
return 0;
}