I sporadically chose to try writing my own strtok() function (named mystrtok()) and compare it to C's strtok() from <string.h> and came across this strange phenomenon:
When compiled on Linux with gcc, strtok() performs faster than my version as expected. However, when compiled on Windows with gcc or cl, strtok() is significantly slower than my version.
Confused by this, I wondered what might happen if I tried pasting in the source code for strtok() (renaming it to stdstrtok() to avoid name collision) and benchmarking it separately. It gave inconsistent but close results. This is where I got the code from
Does anyone have an idea why strtok() is running so slowly when compiled on Windows? Here's the code I am using:
#include <stdio.h>
#include <string.h>
#include <time.h>
#ifdef _MSC_VER
#define RESTRICT __restrict
void print_compiler() {
printf("This was compiled with CL\n");
}
#else
#define RESTRICT restrict
void print_compiler() {
printf("This was compiled with GCC\n");
}
#endif
typedef struct {
char string[100]; // string to be processed
char* delim; // delimeter for tokens
char size; // number of characters in string
} to_parse;
void show(func,name,target) // Show how the strtok processes and alters the target string
char* RESTRICT (*func)(char*,const char*); // Function pointer to strtok variants
const char* RESTRICT name; // Name of strtok variant for display
to_parse target; // Information about the string and its delimeter
{
printf("%s\tgives the output: ",name);
char* output = func(target.string,target.delim);
while (output != NULL) {
printf("{%s}",output);
output = func(NULL,target.delim);
};
printf("\n%s\tchanged the string to: ",name);
int x;
for (x=0;x<target.size;x++) {
if (target.string[x] == '\0') printf("%s","\\0");
else printf("%c",target.string[x]);
}
printf("\n");
}
void bench(func,name,target,iterations) // Benchmark a strtok variant by measuring execution seconds
char* RESTRICT (*func)(char*,const char*); // Function pointer to strtok variants
const char* RESTRICT name; // Name of strtok variant for display
const to_parse* target; // Information about the string and its delimeter
unsigned int iterations; // Number of times to execute strtok on the target
{
time_t start_time = time(NULL);
unsigned int x;
to_parse retarget;
for (x=0; x<iterations; x++) {
retarget = *target;
func(retarget.string,retarget.delim);
while(func(NULL,retarget.delim));
}
printf("%s\ttook %d seconds to iterate %d times\n",name,(int)(time(NULL)-start_time),iterations);
}
/*
This is my version of strtok, written for random practice. It operates
differently than strtok, returning null when nothing is between delimeters.
Also, it processes multichar delimeters correctly, unlike strtok. Why?
*/
char* mystrtok(line, delim)
register char* RESTRICT line;
register const char* RESTRICT delim;
{
static char* fline = NULL;
if (line != NULL) fline = line;
else if (fline == NULL) return NULL;
else line = fline;
register const char* fdelim = delim;
while (*fline != '\0') {
if (*fline == *fdelim) fdelim++;
else if (*fdelim == '\0') {
*(fline-(fdelim-delim)) = '\0';
return line;
}
else fdelim = delim;
fline++;
}
if (*fdelim == '\0') {
*(fline-(fdelim-delim)) = '\0';
return line;
}
fline = NULL;
return line;
}
/*
I copied this strtok function from
https://opensource.apple.com/source/Libc/Libc-167/string.subproj/strtok.c.auto.html
and renamed it to "stdstrtok" to avoid name collision with "strtok" from <string.h>
*/
char *
stdstrtok(s, delim)
register char *s;
register const char *delim;
{
register char *spanp;
register int c, sc;
char *tok;
static char *last;
if (s == NULL && (s = last) == NULL)
return (NULL);
/*
* Skip (span) leading delimiters (s += strspn(s, delim), sort of).
*/
cont:
c = *s++;
for (spanp = (char *)delim; (sc = *spanp++) != 0;) {
if (c == sc)
goto cont;
}
if (c == 0) { /* no non-delimiter characters */
last = NULL;
return (NULL);
}
tok = s - 1;
/*
* Scan token (scan for delimiters: s += strcspn(s, delim), sort of).
* Note that delim must have one NUL; we stop if we see that, too.
*/
for (;;) {
c = *s++;
spanp = (char *)delim;
do {
if ((sc = *spanp++) == c) {
if (c == 0)
s = NULL;
else
s[-1] = 0;
last = s;
return (tok);
}
} while (sc != 0);
}
/* NOTREACHED */
}
void benchem(const to_parse* target,unsigned int iterations) {
bench(mystrtok,"mystrtok",&*target,iterations);
bench(stdstrtok,"stdstrtok",&*target,iterations);
bench(strtok,"strtok ",&*target,iterations);
}
void showem(const to_parse* target) {
show(mystrtok,"mystrtok",*target);
show(stdstrtok,"stdstrtok",*target);
show(strtok,"strtok ",*target);
}
int main()
{
print_compiler();
const to_parse parse_me = { .string = "delimdelimDATAdelimdatadelimdelimDATAdelimdatadelimDATA",
.delim = "delim",
.size = 56
};
showem(&parse_me);
benchem(&parse_me,40000000);
// showem(&((to_parse){"duhduhduhtestduh","duh",17})); // this works too!
}
I'd also appreciate anyone compiling and running this on Windows and Linux (I used WSL) and confirm whether the issue is reproducible on their system.