What should be a fairly efficient solution, though it's not portable outside of POSIX systems (read: You'd have to rewrite to use WinAPI equivalent calls on Windows), as it avoids the need to actually construct objects, perform explicit reads for each line, etc. Aside from the mmap
work (much of which can be factored out at least), it's basically a one-liner.
I don't really recommend this in general (your problem should be solved by just using std::vector
or the like so your data structure can grow dynamically to match the number of lines), but I'm putting it here if you're curious.
#include <algorithm>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
const char* map_whole_file(const char *filename, size_t& map_size);
int main(...) {
const char *filename = ...; // From argv, constant, whatever
size_t map_size;
const char *data = map_whole_file(filename, map_size);
// Number of lines is count of newline characters, +1 if final line not
// terminated with newline
size_t numlines = std::count(data, data+map_size, '\n') + (data[map_size-1] != '\n');
munmap(data, map_size);
}
const char* map_whole_file(const char *filename, size_t& map_size) {
int fd = open(filename, O_RDONLY);
if (fd == -1)
...handle_error...;
struct stat sb;
if (fstat(fd, &sb) == -1) /* To obtain file size */
...handle_error...;
// Cast only needed because it's C++
const char *data = (const char *)mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
if (data == MAP_FAILED)
...handle_error...;
close(fd); // Don't need fd after mapping done
// Optionally, posix_madvise(data, sb.st_size, POSIX_MADV_WILLNEED);
// or the like will hint the OS to aggressively page in the file, so
// count is less likely to be delayed by disk I/O
map_size = sb.st_size;
return data;
}