As @David C. Rankin suggested, using fgets
along with strtok
to read each line is a good approach to this problem.
If you want to use mergesort
in the future, then storing your data in an array of structs would be easiest to implement with this sorting algorithm. Furthermore, if you don't know how many lines will be in the file, then you might need to dynamically allocate this on run-time.
You can have a lower-level struct
storing each line in the file:
typedef struct {
int age;
char *firstname;
char *lastname;
} record_t;
And a higher-level struct
storing all contents of the file:
typedef struct {
record_t *records; /* pointer to record_t */
char *headers; /* pointer holding header */
size_t currsize; /* current status of information being added */
size_t lastidx;
} allrecords_t;
Things to note about fgets:
- Adds
\n
character at the end of buffer, before the null-terminator \0
. This appended \n
can be removed easily though.
- On error, returns
NULL
. If EOF
is reached and no characters have been read, then this also returns NULL
.
- Buffer size must be statically declared.
- Needs to be read from specified stream, either from
stdin
or from FILE *
.
Optional usage of fgets in a program:
When using fgets()
, you can call it once to consume the header information:
fgets(buffer, 256, pfile); /* error checking needed */
Then, you can call it again in a while()
loop, to consume the rest of the data in the file:
while (fgets(buffer, 256, pfile) != NULL) {
....
}
Implementation of all these ideas in a Program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* Constants used */
#define INITSIZE 20
#define BUFFSIZE 256
#define MALLOC_MSG "Allocation"
#define REALLOC_MSG "Reallocation"
/* array of structs setup */
typedef struct {
int age;
char *firstname;
char *lastname;
} record_t;
typedef struct {
record_t *records;
char *headers;
size_t currsize;
size_t lastidx;
} allrecords_t;
/* function prototypes */
allrecords_t *initialize_records(void);
void read_header(FILE *filestream, allrecords_t *Record, char buffer[]);
void read_data(FILE *filestream, allrecords_t *Record, char buffer[]);
void print_records(allrecords_t *Record);
void check_ptr(void *ptr, const char *msg);
void remove_newline(char buffer[]);
int main(void) {
FILE *fp;
allrecords_t *Record;
/* static buffer for fgets() */
char buffer[BUFFSIZE];
fp = fopen("fileex.txt", "r");
if (!fp) {
fprintf(stderr, "Cannot read file.\n");
exit(EXIT_FAILURE);
}
Record = initialize_records();
/* Reads the first line */
read_header(fp, Record, buffer);
/* Reads next lines */
read_data(fp, Record, buffer);
/* prints and frees structure elements*/
print_records(Record);
return 0;
}
/* function which reads the age/firstname/lastname data */
void read_data(FILE *filestream, allrecords_t *Record, char buffer[]) {
char *data; /* only need one char *pointer for strtok() */
const char *delim = ",";
while (fgets(buffer, BUFFSIZE, filestream) != NULL) {
remove_newline(buffer); /* optional to remove '\n' */
/* resize array when necessary */
if (Record->currsize == Record->lastidx) {
Record->currsize *= 2;
Record->records = realloc(Record->records, Record->currsize * sizeof(record_t));
check_ptr(Record->records, REALLOC_MSG);
}
/* adding info to array */
/* using strdup() will lead to less code here */
data = strtok(buffer, delim);
Record->records[Record->lastidx].age = atoi(data);
data = strtok(NULL, delim);
Record->records[Record->lastidx].firstname = malloc(strlen(data)+1);
check_ptr(Record->records[Record->lastidx].firstname, MALLOC_MSG);
strcpy(Record->records[Record->lastidx].firstname, data);
data = strtok(NULL, delim);
Record->records[Record->lastidx].lastname = malloc(strlen(data)+1);
check_ptr(Record->records[Record->lastidx].lastname, MALLOC_MSG);
strcpy(Record->records[Record->lastidx].lastname, data);
Record->lastidx++;
}
}
/* prints and frees all members safely, without UB */
void print_records(allrecords_t *Record) {
size_t i;
printf("\nComplete Record:\n");
printf("%s\n", Record->headers);
free(Record->headers);
Record->headers = NULL;
for (i = 0; i < Record->lastidx; i++) {
printf("%d,%s,%s\n", Record->records[i].age,
Record->records[i].firstname,
Record->records[i].lastname);
free(Record->records[i].firstname);
Record->records[i].firstname = NULL;
free(Record->records[i].lastname);
Record->records[i].lastname = NULL;
}
free(Record->records);
Record->records = NULL;
free(Record);
Record = NULL;
}
/* function which only reads header */
void read_header(FILE *filestream, allrecords_t *Record, char buffer[]) {
if (fgets(buffer, BUFFSIZE, filestream) == NULL) {
fprintf(stderr, "Error reading header.\n");
exit(EXIT_FAILURE);
}
remove_newline(buffer);
Record->headers = malloc(strlen(buffer)+1);
check_ptr(Record->headers, MALLOC_MSG);
strcpy(Record->headers, buffer);
}
/* function which removes '\n', lots of methods to do this */
void remove_newline(char buffer[]) {
size_t slen;
slen = strlen(buffer);
/* safe way to remove '\n' and check for bufferoverflow */
if (slen > 0) {
if (buffer[slen-1] == '\n') {
buffer[slen-1] = '\0';
} else {
printf("Buffer overflow detected.\n");
exit(EXIT_FAILURE);
}
}
}
/* initializes higher level struct */
allrecords_t *initialize_records(void) {
allrecords_t *Record = malloc(sizeof(*Record));
check_ptr(Record, MALLOC_MSG);
Record->currsize = INITSIZE;
Record->headers = NULL;
Record->records = malloc(Record->currsize * sizeof(record_t));
check_ptr(Record->records, MALLOC_MSG);
Record->lastidx = 0;
return Record;
}
/* instead of checking for 'ptr == NULL' everywhere, just call this function */
void check_ptr(void *ptr, const char *msg) {
if (!ptr) {
printf("Null pointer returned: %s\n", msg);
exit(EXIT_FAILURE);
}
}
Note: I used malloc()
+ strcpy()
instead of strdup()
, because they come from standard C libraries like <string.h>
and <stdlib.h>
, instead of POSIX C.
Program output:
Complete Record:
Age,LastName,FirstName
50,B,A
30,A,B
20,X,D
10,F,A
90,V,E
60,N,M