I see two variants.
And I compare all three variants (your, and 2 mines) on such file:
(bash)for ((i=0;i<100000;++i)); do echo "$i $i $i $i"; done > test.txt
test.txt placed in tmpfs.
All timings in seconds.
Your variant:
CPU time 0.130000, abs time 0.135514
My variant 1:
CPU time 0.060000, abs time 0.062909,
My variant 2:
CPU time 0.050000, abs time 0.052963
1)"C mode":
//FILE *in
char buf[1000];
buf[sizeof(buf) - 1] = '\0';
char w1[sizeof(buf)];
char w2[sizeof(buf)];
char w3[sizeof(buf)];
char w4[sizeof(buf)];
while (fgets(buf, sizeof(buf) - 1, in) != nullptr) {
*w1 = *w2 = *w3 = *w4 = '\0';
sscanf(buf, "%s %s %s %s", w1, w2, w3, w4);//here should be check for == 4
//words.emplace_back(std::string(w1), std::string(w2), std::string(w3), std::string(w4));
}
2)"mapped file":
//MapFile in;
const char *beg = in.begin();
const char *end = beg + file_size;
std::string w[4];
const char *ptr = beg;
bool eof = false;
do {
for (int i = 0; i < 4; ++i) {
const char *q = find_end_of_word(ptr, end);
w[i].assign(ptr, q - ptr);
if (q == end) {
eof = true;
break;
}
ptr = q;
while (ptr != end && (*ptr == ' ' || *ptr == '\t' || *ptr == '\n'))
++ptr;
if (ptr == end) {
eof = true;
break;
}
}
//words.emplace_back(w[0], w[1], w[2], w[3]);
// printf("%s %s %s %s\n", w[0].c_str(), w[1].c_str(), w[2].c_str(), w[3].c_str());
} while (!eof);