You do almost no processing so probably the bottleneck is the file system throughput. However you should measure first if it really is. If you don't want to use a profiler, you can just measure the running time of your application. The size of input file divided by the running time can be used to check if you've reached the file system throughput limit.
Then if you are far away from aforementioned limit you probably need to optimize the way you read the file. It may be better to read it in larger chunks using fread()
and then process the buffer stored in memory with sscanf()
.
You also can parse the buffer yourself which would be faster than *scanf()
.
[edit]
Especially for Drakosha:
$ time ./main1
Good entries: 10000000
real 0m3.732s
user 0m3.531s
sys 0m0.109s
$ time ./main2
Good entries: 10000000
real 0m0.605s
user 0m0.496s
sys 0m0.094s
So the optimized version makes ~127MB/s which may be my file system's bottleneck or maybe OS caches the file in RAM. The original version is ~20MB/s.
Tested with a 80MB file:
10000000
1234 a
1234 a
...
main1.c
#include <stdio.h>
int ok = 0;
void processEntry(int a, char c) {
if (a == 1234 && c == 'a') {
++ok;
}
}
int main(int argc, char **argv) {
FILE *f = fopen("data.txt", "r");
int total = 0;
int a;
char c;
int i = 0;
fscanf(f, "%d", &total);
for (i = 0; i < total; ++i) {
if (2 != fscanf(f, "%d %c", &a, &c)) {
fclose(f);
return 1;
}
processEntry(a, c);
}
fclose(f);
printf("Good entries: %d\n", ok);
return (ok == total) ? 0 : 1;
}
main2.c
#include <stdio.h>
#include <stdlib.h>
int ok = 0;
void processEntry(int a, char c) {
if (a == 1234 && c == 'a') {
++ok;
}
}
int main(int argc, char **argv) {
FILE *f = fopen("data.txt", "r");
int total = 0;
int a;
char c;
int i = 0;
char *numberPtr = NULL;
char buf[2048];
size_t toProcess = sizeof(buf);
int state = 0;
int fileLength, lengthLeft;
fseek(f, 0, SEEK_END);
fileLength = ftell(f);
fseek(f, 0, SEEK_SET);
fscanf(f, "%d", &total); // read the first line
lengthLeft = fileLength - ftell(f);
// read other lines using FSM
do {
if (lengthLeft < sizeof(buf)) {
fread(buf, lengthLeft, 1, f);
toProcess = lengthLeft;
} else {
fread(buf, sizeof(buf), 1, f);
toProcess = sizeof(buf);
}
lengthLeft -= toProcess;
for (i = 0; i < toProcess; ++i) {
switch (state) {
case 0:
if (isdigit(buf[i])) {
state = 1;
a = buf[i] - '0';
}
break;
case 1:
if (isdigit(buf[i])) {
a = a * 10 + buf[i] - '0';
} else {
state = 2;
}
break;
case 2:
if (isalpha(buf[i])) {
state = 0;
c = buf[i];
processEntry(a, c);
}
break;
}
}
} while (toProcess == sizeof(buf));
fclose(f);
printf("Good entries: %d\n", ok);
return (ok == total) ? 0 : 1;
}