A switch/case
is like a linear search through a table to find a table index.
What we want is something that takes the incoming two byte code value (e.g. "code") [which can be arbitrary] and convert this to a linear table index/pointer.
It might be possible to come up with an elaborate multitable lookup that (e.g.) takes the leftmost nibble as an index into the "zero level" table. Then, the next leftmost digit is an index into a second level table. This is similar to how virtual memory page tables are set up.
However, this can be complex and, although fast, it might use more memory.
With a virtual function table approach, we put the code into the vtable entry, along with any function pointers we need.
If the table is sorted, we can use a binary search to find the correct table entry. Below, I used a binary search to translate an incoming code into a table index/pointer.
The speed of this function (e.g. vtblfind
below) is the crux of it. The binary search is fast and general. But, it may be possible to devise a specialized function that is faster
Here's some code that illustrates the binary search approach:
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
typedef enum {
COMMAND_1 = 0x0110,
COMMAND_2 = 0x0120,
COMMAND_x = 0x01A3,
INFO_1 = 0x0210,
INFO_2 = 0x0220,
INFO_3 = 0x0230,
#if 0
SM_LAST,
#endif
} SM_t;
typedef struct smctl smctl_t;
typedef void (*smfunc_p)(smctl_t *ctl,void *data);
struct smctl {
SM_t sm_port; // I2C port value
smfunc_p sm_func1; // a function
smfunc_p sm_func2; // another function
smfunc_p sm_func3; // yet another function
};
void command_1_func(smctl_t *,void *);
void command_2_func(smctl_t *,void *);
void command_x_func(smctl_t *,void *);
void info_1_func(smctl_t *,void *);
void info_2_func(smctl_t *,void *);
void info_3_func(smctl_t *,void *);
smctl_t vtable[] = {
{ .sm_port = COMMAND_1, .sm_func1 = command_1_func },
{ .sm_port = COMMAND_2, .sm_func1 = command_2_func },
{ .sm_port = COMMAND_x, .sm_func1 = command_x_func },
{ .sm_port = INFO_1, .sm_func1 = info_1_func },
{ .sm_port = INFO_2, .sm_func1 = info_2_func },
{ .sm_port = INFO_3, .sm_func1 = info_3_func },
};
#define ARRAY_COUNT(_arr) \
(sizeof(_arr) / sizeof(_arr[0]))
smctl_t *
vtblfind(uint16_t port)
{
int pval = port;
int ilo = 0;
int ihi = ARRAY_COUNT(vtable) - 1;
int imid;
int dif;
smctl_t *vptr = NULL;
smctl_t *vret = NULL;
while (ilo <= ihi) {
imid = (ilo + ihi) / 2;
vptr = &vtable[imid];
dif = pval - vptr->sm_port;
if (dif == 0) {
vret = vptr;
break;
}
if (dif < 0)
ihi = imid - 1;
else
ilo = imid + 1;
}
return vret;
}
uint8_t
handler(uint8_t request[2])
{
uint16_t port = *(uint16_t *) request;
smctl_t *vptr;
uint8_t ret = 23;
do {
vptr = vtblfind(port);
if (vptr == NULL) {
printf("handler: unknown request code -- %2.2X\n",port);
break;
}
vptr->sm_func1(vptr,request);
vptr->sm_func2(vptr,request);
} while (0);
return ret;
}
UPDATE:
I think the switch depends on the compiler implementation but mostly, they are not a linear search. A constant time is more usual
A smart compiler might be able to combine some tests into ranges or some other peephole optimizations, such as "conditional move" (e.g. cmovne
).
But, it's still a "linear" search: If you have N cases, you need to do N compares, sets, jumps.
The compiler has to pick an order and the compiler can reorder them but usually doesn't because, to be true to the source order, it follows it. The reason is, that, if you have a case that is 90% most typical, you [the programmer] put that first, because you want that to be tested first.
If you recoded the switch
as an if/else
ladder, you'd be forcing the order. You'd want the same behavior for your switch
.
If the cases were contiguous (e.g. 1, 2, 3, 4, ..., N
) [or nearly so], the compiler could build a jump table and index into it. That would be constant [O(1)] time. For certain specialized switch
sequences, I have seen a compiler do this.
Unfortunately, that is what we don't have here--the whole point of OP's question.
Here's a switch
based function:
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
typedef enum {
COMMAND_1 = 0x0110,
COMMAND_2 = 0x0120,
COMMAND_x = 0x01A3,
INFO_1 = 0x0210,
INFO_2 = 0x0220,
INFO_3 = 0x0230,
#if 0
SM_LAST,
#endif
} SM_t;
typedef struct smctl smctl_t;
typedef void (*smfunc_p)(smctl_t *ctl,void *data);
struct smctl {
SM_t sm_port; // I2C port value
smfunc_p sm_func1; // a function
smfunc_p sm_func2; // another function
smfunc_p sm_func3; // yet another function
};
void command_1_func(smctl_t *,void *);
void command_2_func(smctl_t *,void *);
void command_x_func(smctl_t *,void *);
void info_1_func(smctl_t *,void *);
void info_2_func(smctl_t *,void *);
void info_3_func(smctl_t *,void *);
smfunc_p
funcfind(uint16_t port)
{
smfunc_p func = NULL;
switch (port) {
case COMMAND_1:
func = command_1_func;
break;
case COMMAND_2:
func = command_2_func;
break;
case COMMAND_x:
func = command_x_func;
break;
case INFO_1:
func = info_1_func;
break;
case INFO_2:
func = info_2_func;
break;
case INFO_3:
func = info_3_func;
break;
default:
func = NULL;
break;
}
return func;
}
uint8_t
handler(uint8_t request[2])
{
uint16_t port = *(uint16_t *) request;
smfunc_p func;
uint8_t ret = 23;
do {
func = funcfind(port);
if (func == NULL) {
printf("handler: unknown request code -- %2.2X\n",port);
break;
}
func(NULL,request);
} while (0);
return ret;
}
Here's the [x86_64
] disassembly [with -O2
]. There are still six cmp
instructions, which is the same as the six case
statements:
0000000000000000 <funcfind>:
0: 66 81 ff a3 01 cmp $0x1a3,%di
5: 74 31 je 38 <L00>
7: 76 37 jbe 40 <L02>
9: b8 00 00 00 00 mov $0x0,%eax
e: 66 81 ff 20 02 cmp $0x220,%di
13: 74 28 je 3d <L01>
15: b8 00 00 00 00 mov $0x0,%eax
1a: 66 81 ff 30 02 cmp $0x230,%di
1f: 74 1c je 3d <L01>
21: 66 81 ff 10 02 cmp $0x210,%di
26: ba 00 00 00 00 mov $0x0,%edx
2b: b8 00 00 00 00 mov $0x0,%eax
30: 48 0f 45 c2 cmovne %rdx,%rax
34: c3 retq
35: 0f 1f 00 nopl (%rax)
38:L00 b8 00 00 00 00 mov $0x0,%eax
3d:L01 c3 retq
3e: 66 90 xchg %ax,%ax
40:L02 b8 00 00 00 00 mov $0x0,%eax
45: 66 81 ff 10 01 cmp $0x110,%di
4a: 74 f1 je 3d <L01>
4c: 66 81 ff 20 01 cmp $0x120,%di
51: ba 00 00 00 00 mov $0x0,%edx
56: b8 00 00 00 00 mov $0x0,%eax
5b: 48 0f 45 c2 cmovne %rdx,%rax
5f: c3 retq
UPDATE #2:
I think this is the best performance that can be achieved using low-level languages (probably faster than higher performance solutions in high-level languages).
Yes, this is about the fastest, given the two byte "key" you have. It doesn't "hash" too well [any decent hash algorithm would be slower--see below].
I was hoping to find something like a python
dictionary where the key would be the enum
and the value would be function pointer.
Ah, python
... [Personally, I prefer perl
, but a topic for another time, perhaps ;-)].
What you're talking about with a python
dictionary is what perl
calls a "hash" [aka "associative array"].
Indexing into it, deceptively looks like an O(1) operation [based on the syntax], but, it's much more under the hood [which is the level we have to talk about when using c
].
A two byte key doesn't benefit too much from a hash. So, let's consider an arbitrary length buffer (e.g. a string).
We compute a [32 bit] hash value. For fixed constant strings, the perl/python
compiler could compute the hash value at compile time.
Either way, we use the hash value (modulo the number of hash buckets) to index into a hash table to get the address of a sub-list. We have to traverse that list to find a match on the actual key.
The sub-list is typically a singly [or doubly] linked list. So, we have to traverse that.
Or, the sub-list could be a sorted array and we could use a binary search. Or, it could be a balanced tree (e.g. a red/black tree). But, these are more problematic in the general case.
Here's a rewrite of my original code that finds the descriptor based on a string/length. For brevity, this only has the "search" code [and not insert/update/delete].
Note that the strhash
function I'm using is a ripoff of perl's actual hash function. python
will use something similar. The execution time for a given search will be on par with either python
or perl
:
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
// strhash -- calculate a string hash
uint32_t
strhash(const void *bf,size_t bflen)
{
const uint8_t *bp;
const uint8_t *bfe;
uint32_t hash;
bp = bf;
bfe = bp + bflen;
hash = 0x37FB26C5;
for (; bp < bfe; ++bp) {
hash += *bp;
hash += hash << 10;
hash ^= hash >> 6;
}
hash += hash << 3;
hash ^= hash >> 11;
hash += hash << 15;
return hash;
}
typedef struct smctl smctl_t;
typedef void (*smfunc_p)(smctl_t *ctl,void *data);
struct smctl {
uint32_t sm_hash; // hash value
const uint8_t *sm_key; // key value
size_t sm_keylen; // length of sm_key
smctl_t *sm_next; // pointer to next element on sublist
smfunc_p sm_func1; // a function
smfunc_p sm_func2; // another function
smfunc_p sm_func3; // yet another function
};
// hash table element
typedef struct {
smctl_t *list_head; // head of list
} vlist_t;
// hash table
#define HASHMAX 256
vlist_t hashlist[HASHMAX];
smctl_t *
vtblfind(const void *buf,size_t buflen)
{
uint32_t hash;
smctl_t *ctl;
// get hash value for buffer
hash = strhash(buf,buflen);
// locate hash bucket
vlist_t *vlist = &hashlist[hash % HASHMAX];
// scan hash bucket for match
for (ctl = vlist->list_head; ctl != NULL; ctl = ctl->sm_next) {
// hash values must match
if (hash != ctl->sm_hash)
continue;
// key length must match
if (ctl->sm_keylen != buflen)
continue;
// do compare of key/buffer contents
if (memcmp(ctl->sm_key,buf,buflen) == 0)
break;
}
return ctl;
}
void
test(void)
{
smctl_t *ctl = vtblfind("the quick brown fox",19);
printf("test: ctl=%p\n",ctl);
}
UPDATE #3:
I was hoping that someone has come up with a simple, static, heuristic hashing function for these cases that doesn't need collision resolution etc.
Either the switch
or the binary search doesn't use collision resolution. That was just for the string example.
I guess for the number of commands I have (around 50)
The rule of thumb is linear search for up to ~20 items and binary search for larger.
With the exact list of port numbers, it's possible to get a "smart" locator. That is, maybe a 2 level lookup based on MSB and then LSB. Or, something compact but fast based on tricks that are specific to the actual port numbers.
Because you've got a speed/space tradeoff, you'll have to measure/benchmark to find the optimal mix.
a switch case would be the best way to go for code maintainability. I can put the most commonly used commands on top to save some speed
My general rule is: If you can't measure it, you can't tune it
As I [and others] have mentioned, a table would be just about as fast and with a smaller memory footprint than a switch
. The table might even be faster due to the smaller loading on the processor instruction cache.
The switch
doesn't scale too well. If you had a million types, the table would be obvious. If you had two, the switch
would be obvious. Whenever I've had to make a switch
vs. table decision, I've usually found the table approach to be the winner based on several factors. YMMV.
For the binary search, if you don't want to have to have the list sorted by hand, you could just use (e.g.) a qsort
call during initialization.
If you decide to order the table by highest-to-lowest frequency, a linear search would be the best.
Another method would be a "cache" of the most frequently found/used entries (aka "memoization"). If there is a cache hit, use it. If not, the "slow" path (i.e. binary search) can be used. The cache replacement can be based on either LRU or frequency.
Consider a multistep design process. You could start with the binary search implementation. It's probably the best overall space/speed tradeoff.
Get it working. Use that to collect frequency data and benchmark values. See if there is a pattern (e.g. portA is always followed by portB). The data collected could help guide you to a more optimal solution for your use case.
Here's some code that illustrates the cache approach:
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
typedef uint32_t u32;
typedef enum {
COMMAND_1 = 0x0110,
COMMAND_2 = 0x0120,
COMMAND_x = 0x01A3,
INFO_1 = 0x0210,
INFO_2 = 0x0220,
INFO_3 = 0x0230,
#if 0
SM_LAST,
#endif
} SM_t;
typedef struct smctl smctl_t;
typedef void (*smfunc_p)(smctl_t *ctl,void *data);
struct smctl {
SM_t sm_port; // I2C port value
u32 sm_freq; // frequency count
smfunc_p sm_func1; // a function
smfunc_p sm_func2; // another function
smfunc_p sm_func3; // yet another function
};
void command_1_func(smctl_t *,void *);
void command_2_func(smctl_t *,void *);
void command_x_func(smctl_t *,void *);
void info_1_func(smctl_t *,void *);
void info_2_func(smctl_t *,void *);
void info_3_func(smctl_t *,void *);
smctl_t vtable[] = {
{ .sm_port = COMMAND_1, .sm_func1 = command_1_func },
{ .sm_port = COMMAND_2, .sm_func1 = command_2_func },
{ .sm_port = COMMAND_x, .sm_func1 = command_x_func },
{ .sm_port = INFO_1, .sm_func1 = info_1_func },
{ .sm_port = INFO_2, .sm_func1 = info_2_func },
{ .sm_port = INFO_3, .sm_func1 = info_3_func },
};
typedef struct {
#if OPT_CASHPORT
SM_t cash_port; // I2C port value
#endif
smctl_t *cash_ctl; // pointer to control
#if OPT_LRU
u32 cash_lru; // temporal value
#endif
} smcash_t;
#define CASHMAX 4
smcash_t cashlist[CASHMAX];
#if OPT_LRU
u32 lrucur; // current LRU
#endif
#define ARRAY_COUNT(_arr) \
(sizeof(_arr) / sizeof(_arr[0]))
smctl_t *
vtblfind(uint16_t port)
{
int pval = port;
int ilo = 0;
int ihi = ARRAY_COUNT(vtable) - 1;
int imid;
int dif;
smctl_t *vptr = NULL;
smctl_t *vret = NULL;
while (ilo <= ihi) {
imid = (ilo + ihi) / 2;
vptr = &vtable[imid];
dif = pval - vptr->sm_port;
if (dif == 0) {
vret = vptr;
break;
}
if (dif < 0)
ihi = imid - 1;
else
ilo = imid + 1;
}
return vret;
}
static inline int
cashcmp(const smcash_t *cash,uint16_t port)
{
int match;
#if OPT_CASHPORT
match = (cash->cash_port == port);
#else
match = (cash->cash_ctl->sm_port == port);
#endif
return match;
}
uint8_t
handler(uint8_t request[2])
{
uint16_t port = *(uint16_t *) request;
smctl_t *vptr = NULL;
smcash_t *cashcur = cashlist;
smcash_t *cashold = cashlist;
smcash_t *cashmatch = NULL;
uint8_t ret = 23;
do {
#if (! OPT_LRU)
u32 oldfreq = cashold->cash_ctl->sm_freq;
#endif
for (; cashcur < &cashlist[CASHMAX]; ++cashcur) {
// get exact match
if (cashcmp(cashcur,port))
cashmatch = cashcur;
// find oldest cache entry
#if OPT_LRU
if (cashcur->cash_lru < lrucur)
cashold = cashcur;
#else
u32 curfreq = cashcur->cash_ctl->sm_freq;
if (curfreq < oldfreq) {
oldfreq = curfreq;
cashold = cashcur;
}
#endif
}
// slow path lookup
if (vptr == NULL)
vptr = vtblfind(port);
if (vptr == NULL) {
printf("handler: unknown request code -- %2.2X\n",port);
break;
}
// update the cache
do {
// no exact match found
if (cashmatch == NULL) {
cashmatch = cashold;
#if OPT_CASHPORT
cashmatch->cash_port = port;
#endif
cashmatch->cash_ctl = vptr;
break;
}
smcash_t tmp = *cashold;
*cashold = *cashmatch;
*cashmatch = tmp;
} while (0);
#if OPT_LRU
cashmatch->cash_lru = ++lrucur;
#endif
vptr->sm_freq += 1;
vptr->sm_func1(vptr,request);
vptr->sm_func2(vptr,request);
} while (0);
return ret;
}