GNU libc has this implementation of strcspn()
, which, quote,
Returns the length of the maximum initial segment of S which contains no characters from REJECT.
It's clever in its implementation in that it creates a 256-entry lookup table to make the operation of finding whether a character is in the reject
set a simple O(1) array lookup, as opposed to e.g. the OpenBSD implementation which uses nested loops.
However, I'm curious how the c0/c1/c2/c3 loop below can access memory past the apparent end of str
without a page fault or similar.
Does the C standard, for instance, guarantee that all strings, whether on the stack or heap or statically allocated, have their allocations aligned up to 4 bytes, so it's safe to access up to 3 past the last NUL?
I've added some comments in the code below; the original (as linked above) has none whatsoever.
/* Align a pointer by rounding down to closest size. */
#define PTR_ALIGN_DOWN(base, size) ...
size_t strcspn (const char *str, const char *reject)
{
if ((reject[0] == '\0') || (reject[1] == '\0'))
return __strchrnul (str, reject [0]) - str;
// Build a lookup table containing all characters from `reject`;
// due to the way the `do/while` loop below is constructed, `table`
// will end up having `table[0] = 1` always, which works as an
// exit condition.
unsigned char p[256] = {0};
unsigned char *s = (unsigned char*) reject;
unsigned char tmp;
do
p[tmp = *s++] = 1;
while (tmp);
// Check the first 4 bytes "by hand".
s = (unsigned char*) str;
if (p[s[0]]) return 0;
if (p[s[1]]) return 1;
if (p[s[2]]) return 2;
if (p[s[3]]) return 3;
// Align the pointer (for whichever reason?)
s = (unsigned char *) PTR_ALIGN_DOWN (s, 4);
unsigned int c0, c1, c2, c3;
do
{
s += 4; // Loop over 4 characters at a time (the first 4 bytes were checked before)
c0 = p[s[0]];
c1 = p[s[1]];
c2 = p[s[2]];
c3 = p[s[3]];
}
// break if any of c0, c1, c2, or c3 is nonzero,
// i.e. if we've either found one of the characters in `reject`, or a zero
while ((c0 | c1 | c2 | c3) == 0);
size_t count = s - (unsigned char *) str;
// figure out the actual offset based on which of c0..3 is set
return (c0 | c1) != 0 ? count - c0 + 1 : count - c2 + 3;
}