Another slow version, for uint32:
void str2uint_aux(unsigned& number, unsigned& overflowCtrl, const char*& ch)
{
unsigned digit = *ch - '0';
++ch;
number = number * 10 + digit;
unsigned overflow = (digit + (256 - 10)) >> 8;
// if digit < 10 then overflow == 0
overflowCtrl += overflow;
}
unsigned str2uint(const char* s, size_t n)
{
unsigned number = 0;
unsigned overflowCtrl = 0;
// for VC++10 the Duff's device is faster than loop
switch (n)
{
default:
throw std::invalid_argument(__FUNCTION__ " : `n' too big");
case 10: str2uint_aux(number, overflowCtrl, s);
case 9: str2uint_aux(number, overflowCtrl, s);
case 8: str2uint_aux(number, overflowCtrl, s);
case 7: str2uint_aux(number, overflowCtrl, s);
case 6: str2uint_aux(number, overflowCtrl, s);
case 5: str2uint_aux(number, overflowCtrl, s);
case 4: str2uint_aux(number, overflowCtrl, s);
case 3: str2uint_aux(number, overflowCtrl, s);
case 2: str2uint_aux(number, overflowCtrl, s);
case 1: str2uint_aux(number, overflowCtrl, s);
}
// here we can check that all chars were digits
if (overflowCtrl != 0)
throw std::invalid_argument(__FUNCTION__ " : `s' is not a number");
return number;
}
Why it's slow? Because it processes chars one-by-one. If we'd had a guarantee that we can access bytes upto s+16
, we'd can use vectorization for *ch - '0'
and digit + 246
.
Like in this code:
uint32_t digitsPack = *(uint32_t*)s - '0000';
overflowCtrl |= digitsPack | (digitsPack + 0x06060606); // if one byte is not in range [0;10), high nibble will be non-zero
number = number * 10 + (digitsPack >> 24) & 0xFF;
number = number * 10 + (digitsPack >> 16) & 0xFF;
number = number * 10 + (digitsPack >> 8) & 0xFF;
number = number * 10 + digitsPack & 0xFF;
s += 4;
Small update for range checking:
the first snippet has redundant shift (or mov
) on every iteration, so it should be
unsigned digit = *s - '0';
overflowCtrl |= (digit + 256 - 10);
...
if (overflowCtrl >> 8 != 0) throw ...