I need size of initialized data stored in an integer variable.
Suppose.
u32 var = 0x0; should return 0
u32 var = 0x12; should return 1
u32 var = 0x1234; should return 2
u32 var = 0x123456; should return 3
u32 var = 0x12345678; should return 4
I need size of initialized data stored in an integer variable.
Suppose.
u32 var = 0x0; should return 0
u32 var = 0x12; should return 1
u32 var = 0x1234; should return 2
u32 var = 0x123456; should return 3
u32 var = 0x12345678; should return 4
A log2(x) will give you the exponent of a binary value. Some C implementations have this function already built-in. If not, there are some alternative here: How to write log base(2) in c/c++
The resulting exponent can be divided and rounded in order to give the values you need.
A first attempt (untested) is:
int byteCount(const int x)
{
if (x == 0) return 0; /* Avoid error */
return (int)trunc((log10(x)/log10(2))/8+1);
}
UPDATE: It seems my code is being taken literally. Here is an optimized version:
int byteCount(const u32 x)
{
if (x == 0) return 0; /* Avoid error */
return (int)trunc((log10(x)/0.301029995663981)/8+1);
}
Do you need to count number of non-zero bytes?
u8 countNonZeroBytes(u32 n) {
u8 result = n == 0 ? 0 : 1;
while (n >> 8 != 0) {
result++;
n = n >> 8;
}
return result;
}
This should give you the answer as per your requirement.
u8 CountNonZeroBytes(u32 n) {
u32 mask = 0xFF;
u8 i, result = 0;
for (i = 0; i < sizeof(n); i++) {
if (mask & n)
result++;
mask = mask << 8;
}
return result;
}
Here is a version of the "leading zeroes" approach to log2
that doesn't use floating point. The optimizer will do loop unrolling, so it's equivalent to the "four compare" version. It is 4x faster than the floating point version.
u32
bytecnt(u32 val)
{
int bitno;
u32 msk;
u32 bycnt;
bycnt = 0;
for (bitno = 24; bitno >= 0; bitno -= 8) {
msk = 0xFF << bitno;
if (val & msk) {
bycnt = bitno / 8;
bycnt += 1;
break;
}
}
return bycnt;
}
Here is a test program that compares the two algorithms [Note that I'm using Jaime's floating point version for comparison]:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
typedef unsigned int u32;
#define RATIO \
do { \
if (tvslow > tvfast) \
ratio = tvslow / tvfast; \
else \
ratio = tvfast / tvslow; \
printf("%.3fx\n",ratio); \
} while (0)
int opt_f;
// _tvgetf -- get timestamp
double
_tvgetf(void)
{
struct timespec ts;
double val;
#if 1
clock_gettime(CLOCK_REALTIME,&ts);
#else
clock_gettime(CLOCK_MONOTONIC_RAW,&ts);
#endif
val = ts.tv_nsec;
val /= 1e9;
val += ts.tv_sec;
return val;
}
u32
bytecnt(u32 val)
{
int bitno;
u32 msk;
u32 bycnt;
bycnt = 0;
for (bitno = 24; bitno >= 0; bitno -= 8) {
msk = 0xFF << bitno;
if (val & msk) {
bycnt = bitno / 8;
bycnt += 1;
break;
}
}
return bycnt;
}
u32
bytecnt2(u32 val)
{
u32 bycnt;
do {
if (val & (0xFF << 24)) {
bycnt = 4;
break;
}
if (val & (0xFF << 16)) {
bycnt = 3;
break;
}
if (val & (0xFF << 8)) {
bycnt = 2;
break;
}
if (val & (0xFF << 0)) {
bycnt = 1;
break;
}
bycnt = 0;
} while (0);
return bycnt;
}
int byteCount(const int x)
{
if (x == 0) return 0; /* Avoid error */
return (int)trunc((log10(x)/log10(2))/8+1);
}
u32 byteCount2(u32 x)
{
if (x == 0) return 0; /* Avoid error */
return (u32)trunc((log10(x)/log10(2))/8+1);
}
static double l2 = 0;
u32 byteCount3(u32 x)
{
if (x == 0) return 0; /* Avoid error */
return (u32)trunc((log10(x)/l2)/8+1);
}
u32 byteCount4(u32 x)
{
if (x == 0) return 0; /* Avoid error */
return (u32)trunc((log10(x)/0.301029995663981)/8+1);
}
void
test(u32 val)
{
u32 bicnt;
u32 lgcnt;
bicnt = bytecnt(val);
lgcnt = byteCount2(val);
if (bicnt != lgcnt) {
printf("%8.8X: bicnt=%8.8X lgcnt=%8.8X\n",
val,bicnt,lgcnt);
exit(1);
}
}
double
timeit(u32 (*proc)(u32),const char *who)
{
double tvbeg;
double tvdif;
double tvper;
int trycnt;
int trymax;
u32 val;
trymax = 1000000;
trymax *= 10;
tvbeg = _tvgetf();
for (trycnt = 1; trycnt < trymax; ++trycnt) {
for (val = 1; val != 0; val <<= 1)
proc(val);
}
tvdif = _tvgetf();
tvdif -= tvbeg;
tvper = tvdif;
tvper /= trymax;
tvper /= 32;
printf("%.9f %.9f -- %s\n",tvdif,tvper,who);
return tvdif;
}
int
main(int argc,char **argv)
{
char *cp;
u32 val;
double tvfast;
double tvslow;
double ratio;
--argc;
++argv;
l2 = log10(2);
for (; argc > 0; --argc, ++argv) {
cp = *argv;
if (*cp != '-')
break;
switch (cp[1]) {
case 'f':
opt_f = 1;
break;
}
}
// do quick validity test
printf("quick validity test ...\n");
test(0);
for (val = 1; val != 0; val <<= 1)
test(val);
// speed tests
printf("speed tests ...\n");
tvfast = timeit(bytecnt2,"bytecnt2");
tvslow = timeit(bytecnt,"bytecnt");
RATIO;
tvslow = timeit(byteCount2,"byteCount2");
RATIO;
tvslow = timeit(byteCount3,"byteCount3");
RATIO;
tvslow = timeit(byteCount4,"byteCount4");
RATIO;
// do full validity test
if (opt_f) {
for (val = 1; val != 0; ++val)
test(val);
}
return 0;
}
Here is the test output:
quick validity test ...
speed tests ...
1.180300474 0.000000004 -- bytecnt2
1.363260031 0.000000004 -- bytecnt
1.155x
6.759670734 0.000000021 -- byteCount2
5.727x
6.653460503 0.000000021 -- byteCount3
5.637x
6.636421680 0.000000021 -- byteCount4
5.623x
UPDATE:
my byteCount proposal is not optimized, for the sake of clarity. For example, you can convert log10(2) into a constant. I think that would have a noticeable increase of performance.
I've updated the test program to incorporate the changes.
But, the optimizer had already eliminated the log10(2)
in your original code (i.e. only one call to log10
), so hand coding it had little to no effect.
Several others did similar loop implementations for number of zero bytes [which I don't believe is what OP wanted, based on the "sizeof" phrase].
It turns out that the fastest version is also the simplest, most boring, and [IMO] most straightforward. This is something I added: bytecnt2
, which is the "four compares" suggested by Paul R.
Doing floating point would be fine with better [or comparable] performance. I'd give it a pass even at 2x [FYI, before getting the results, I assumed that they would be ballpark (e.g. within 10%)].
But, the F.P. implementation is also less straightforward for OP's intended result.
IMO, something that is 4x slower [and more complicated] is a red flag. Not just tweaking, but indicates the approach is incorrect. Taking an int and converting it into a float [and back again], using some relatively heavyweight functions, for something that simple bit shifting/masking will accomplish.
If you don't mind use gcc extensions, this is a very good solution:
by the way you should be more clear in your question. Your terminology is confusing. Both "size" and "initialized" are used outside their established meaning.
Extra extra safe/portable: (probably not needed):
size_t leading_zeroes(uint32_t v)
{
if (v == 0) // __builtin_clz is undefined for 0
return sizeof(uint32_t) * CHAR_BIT;
return __builtin_clz(v);
}
size_t trailing_bytes(uint32_t v)
{
return sizeof(uint32_t) - leading_zeroes(v) / CHAR_BIT;
}
Simpler version:
size_t leading_zeroes(uint32_t v)
{
if (v == 0) // __builtin_clz is undefined for 0
return 32;
return __builtin_clz(v);
}
size_t trailing_bytes(uint32_t v)
{
return 4 - leading_zeroes(v) / 8;
}