To my limited knowledge (I could be wrong), with the standard libc, there are no efficient ways to read a line when you do not know the max line length. You may get memory overflow with scanf()
and gets()
because they do not check the length of your buffer. If you use fgets()
, you may waste time on frequent strlen()
and realloc()
. If you use fgetc()
, it will be slow as fgetc()
has a huge overhead.
For efficient line reading, we have to keep some intermediate information. It is not that easy. I am attaching an implementation. It is quite complicated, but it is very efficient and generic. If you do not care about the details, you may just focus on the main()
function about how to use the routines.
To try this program:
gcc -Wall prog.c; ./a.out < input.txt > output.txt
Program:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#ifndef kroundup32
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif
#define kstype_t FILE* // type of file handler
#define ksread_f(fp, buf, len) fread((buf), 1, (len), (fp)) // function to read a data chunk
typedef struct {
int l, m; // l: length of string; m: allocated size
char *s; // string
} kstring_t;
typedef struct {
kstype_t f; // file handler
int begin, end, is_eof, bufsize;
unsigned char *buf; // buffer
} kstream_t;
kstream_t *ks_open(kstype_t fp, int bufsize)
{
kstream_t *ks;
ks = (kstream_t*)calloc(1, sizeof(kstream_t));
ks->bufsize = bufsize;
ks->buf = (unsigned char*)malloc(bufsize);
ks->f = fp;
return ks;
}
void ks_close(kstream_t *ks)
{
free(ks->buf); free(ks);
}
int ks_readline(kstream_t *ks, int delimiter, kstring_t *str)
{
str->l = 0;
if (ks->begin >= ks->end && ks->is_eof) return -1;
for (;;) {
int i;
if (ks->begin >= ks->end) {
if (!ks->is_eof) {
ks->begin = 0;
ks->end = ksread_f(ks->f, ks->buf, ks->bufsize);
if (ks->end < ks->bufsize) ks->is_eof = 1;
if (ks->end == 0) break;
} else break;
}
for (i = ks->begin; i < ks->end; ++i)
if (ks->buf[i] == delimiter) break;
if (str->m - str->l < i - ks->begin + 1) {
str->m = str->l + (i - ks->begin) + 1;
kroundup32(str->m);
str->s = (char*)realloc(str->s, str->m);
}
memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin);
str->l = str->l + (i - ks->begin);
ks->begin = i + 1;
if (i < ks->end) break;
}
if (str->s == 0) {
str->m = 1;
str->s = (char*)calloc(1, 1);
}
str->s[str->l] = '\0';
return str->l;
}
int main()
{
kstream_t *ks;
kstring_t str;
str.l = str.m = 0; str.s = 0; // initialize the string struct
ks = ks_open(stdin, 4096); // initialize the file handler
while (ks_readline(ks, '\n', &str) >= 0) // read each line
puts(str.s); // print it out
ks_close(ks); free(str.s); // free
return 0;
}