Here's a way to do the line reverse using mmap
to access the files. It should be faster for larger files.
I've tested this and it appears to handle most edge cases.
To improve the speed even more, you could add madvise
calls.
Side note: Sorry about the _GNU_SOURCE
. It's to get the definition of memrchr
. If your system doesn't have that, it's easy enough to create one.
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#define sysfault(_fmt...) \
do { \
fprintf(stderr,_fmt); \
exit(1); \
} while (0)
#ifdef DEBUG
#define dbgprt(_fmt...) fprintf(stderr,_fmt)
#else
#define dbgprt(_fmt...) /**/
#endif
typedef unsigned char byte;
typedef struct {
const char *map_file; // filename
int map_fd; // file descriptor
size_t map_siz; // file size
byte *map_base; // base of map area
byte *map_ptr; // current map pointer
size_t map_off; // current file offset
struct stat map_st; // stat syscall
} filemap;
// mapopen -- open the file
void
mapopen(filemap *map,const char *file,int mode,filemap *mapfm)
{
int prot;
int err;
memset(map,0,sizeof(filemap));
map->map_file = file;
map->map_fd = open(map->map_file,mode,0644);
if (map->map_fd < 0)
sysfault("mapopen: unable to open '%s' -- %s\n",
map->map_file,strerror(errno));
// create new file
if (mode & O_CREAT) {
map->map_siz = mapfm->map_siz;
err = ftruncate(map->map_fd,map->map_siz);
if (err < 0)
sysfault("mapopen: unable to ftruncate -- %s\n",strerror(errno));
map->map_off = 0;
}
// process existing file
else {
fstat(map->map_fd,&map->map_st);
map->map_siz = map->map_st.st_size;
map->map_off = map->map_siz - 1;
lseek(map->map_fd,map->map_off,0);
}
prot = 0;
if (mode & O_CREAT)
prot |= PROT_WRITE;
else
prot |= PROT_READ;
do {
if (map->map_siz == 0)
break;
map->map_base = mmap(NULL,map->map_siz,prot,MAP_SHARED,map->map_fd,0);
if (map->map_base == MAP_FAILED)
sysfault("mapopen: unable to map map_file='%s' map_siz=%ld -- %s\n",
map->map_file,map->map_siz,strerror(errno));
// get starting offset for file
map->map_ptr = map->map_base + map->map_off;
} while (0);
}
// mapclose -- close the file
void
mapclose(filemap *map)
{
if (map->map_base != NULL)
munmap(map->map_base,map->map_siz);
map->map_base = NULL;
if (map->map_fd >= 0)
close(map->map_fd);
map->map_fd = -1;
}
// mapreverse -- reverse copy lines in file
void
mapreverse(const char *srcfile,const char *dstfile)
{
filemap src;
filemap dst;
byte *base;
byte *prev;
byte *cur;
ssize_t lhslen;
ssize_t rawlen;
ssize_t curlen;
mapopen(&src,srcfile,O_RDONLY,NULL);
mapopen(&dst,dstfile,O_RDWR | O_CREAT,&src);
base = src.map_base;
// point past last char in file
lhslen = src.map_siz;
prev = base + lhslen;
do {
// empty file
if (lhslen <= 0)
break;
// assume file with last line that has _no_ newline
// bug out if short file
cur = prev - 1;
if (cur < base) {
dbgprt("mapreverse: SHORT\n");
break;
}
// well behaved file with newline as last char
if (*cur == '\n') {
dbgprt("mapreverse: NICE\n");
lhslen -= 1;
}
} while (0);
// copy over the bulk of the file
while (lhslen > 0) {
dbgprt("mapreverse: LOOP lhslen=%ld prev=%ld\n",lhslen,prev - base);
// locate next (i.e. "previous") line
cur = memrchr(base,'\n',lhslen);
// copy over final part
if (cur == NULL) {
dbgprt("mapreverse: FINAL\n");
break;
}
// get length of current line (including newline on left)
rawlen = prev - cur;
dbgprt("mapreverse: CURLEN cur=%ld prev=%ld rawlen=%ld\n",
cur - base,prev - base,rawlen);
// remove newline on left from copy buffer and length
curlen = rawlen - 1;
// copy current line
dbgprt("mapreverse: COPY\n");
memcpy(dst.map_ptr,cur + 1,curlen);
dst.map_ptr += curlen;
dst.map_siz -= curlen;
// cut back on the length we scan
lhslen = cur - base;
// point one past the newline we just found
prev = cur + 1;
}
// copy over final part
if (dst.map_siz > 0) {
dbgprt("mapreverse: FINAL map_siz=%ld\n",dst.map_siz);
memcpy(dst.map_ptr,base,dst.map_siz);
}
mapclose(&src);
mapclose(&dst);
}
// main -- main program
int
main(int argc,char **argv)
{
char *cp;
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
cp = *argv;
if (*cp != '-')
break;
switch (cp[1]) {
default:
break;
}
}
setlinebuf(stderr);
if (argc != 2)
sysfault("main: must have exactly two arguments\n");
mapreverse(argv[0],argv[1]);
return 0;
}