I came up with this which works, but there is probably a more elegant way to do it. Using a look-up table seems a bit like cheating, but I don't know how to get rid of the last few multiples of 10 without branching and looping etc. Anyway, it might be of interest to someone.
A 64 bit version could be constructed similarly but instead I've opted to play safe and have 64bit function which calls the 32 bit function for which all input values have been checked.
uint32_t uint32mod10(uint32_t n) {
/*
n = sum_{i=0 to 7} (2^4i)n_i
n % 10 = (n_0 + 6(sum_{i=1 to 7} n_i)) % 10
n = sum_{i=0 to 9} (2^i)b_i
n % 10 = (b_0 + 2(b_1 + b_5 + b_9) + 4(b_2 + b_6) + 8(b_3 + b_7) + 6(b_4 + b_8))
*/
uint8_t lut[44] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
0, 1, 2, 3};
uint32_t m = ((n >> 4) & 0xf) + ((n >> 8) & 0xf) + ((n >> 12) & 0xf) + ((n >> 16) & 0xf)
+ ((n >> 20) & 0xf) + ((n >> 24) & 0xf) + ((n >> 28) & 0xf); // max 7*15 = 105
n = (n & 0xf) + (m << 2) + (m << 1); // max 15 + 630 = 645
m = ((n & 0x10) >> 3) + ((n & 0x100) >> 7);
n = (n & 0xf) + ((n & 0xe0) >> 4) + (m << 1) + m
+ ((n & 0x200) >> 8); // max 15 + 14 + 12 + 2 = 43
return lut[n];
}
uint32_t uint64mod10(uint64_t n) {
/*
n = n_0 + n_1*2^24 + n_2*2^48
n % 10 = (n_0 + 6(n_1 + n_2)) % 10
*/
uint64_t m = (n >> 48) + ((n >> 24) & 0xffffff);
return uint32mod10((uint32_t)((n & 0xffffff) + (m << 2) + (m << 1)));
}
EDIT:
Something probably easy to code on a microcontroller which could actually be extended for use for binary to decimal conversion for 32bit unsigned integers is as follows.
static inline uint32_t uint32divby5(uint32_t n) {
uint32_t a, m = (n >> 2) - (n >> 4);
m = (n - m) >> 2;
m = (n - m) >> 2;
m = (n - m) >> 2;
m = (n - m) >> 2;
m = (n - m) >> 2;
m = (n - m) >> 2;
m = (n - m) >> 2;
m = (n - m) >> 2;
m = (n - m) >> 2;
m = (n - m) >> 2;
m = (n - m) >> 2;
m = (n - m) >> 2;
m = (n - m) >> 2;
a = m + (m << 2);
if (a > n) return m - 1;
return m;
}
uint32_t uint32mod10b(uint32_t n) {
uint32_t m = uint32divby5(n >> 1);
return n - (m << 3) - (m << 1);
}
EDIT2:
Another alternative if 64bit arithmetic is available,
static inline uint32_t fastdiv16(uint16_t denom, uint32_t lp30, uint64_t m, uint32_t num) {
//result = num/denom
//uint64_t m = (((uint64_t)1 << 63) / denom);
//uint32_t lp30 = 30 + lzcnt(m);
//m = m >> (63 - lp30);
uint64_t q = (num * m) >> lp30;
uint64_t test = q*denom;
return (uint32_t)(q + 1 - (test + denom > num));
}
uint32_t uint32mod10c(uint32_t n) {
uint32_t m = fastdiv16(10, 34, 1717986918, n);
return n - (m << 3) - (m << 1);
}