I am trying to use this code to add lines of file to a hash table. Currently it reads a file of the format.
289016164,279211721,462102225
341714666,132189021,299107290
362328497,466836829,47952622
That is with three comma separated lines. I would like it to be able to read lines of the format
289016164,279211721,462102225, some random text
341714666,132189021,299107290, some more random text
362328497,466836829,47952622, even more random text
The struct that should hold each line should be
typedef struct Row {
uint32_t a;
uint32_t b;
uint32_t t;
char text[40];
} Row;
The routine that reads in the file is called readAll (see below) and I am having problems modifying it to do this.
How can I change
readAll
to be able to cope with this new format?
I have included most of the code that uses readAll to give some context.
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
// Should be 37% occupied with 50m entries
#define TABLE_SIZE 0x8000000
#define MASK (TABLE_SIZE - 1)
#define BUFFER_SIZE 16384
#define END_OF_FILE (-1)
#define DEFAULT_VALUE (-1)
typedef struct Row {
uint32_t a;
uint32_t b;
uint32_t t;
} Row;
int32_t hash(int32_t a) {
return a * 428916315;
}
void insert(Row * table, Row row) {
long loc = hash(row.a) & MASK; // Entries are hashed on a
long inc = 0;
while (inc <= TABLE_SIZE) {
loc = (loc + inc) & MASK;
inc++;
if (table[loc].a == DEFAULT_VALUE) {
table[loc] = row;
break;
}
}
}
int readChar(FILE * input, char * buffer, int * pos, int * limit) {
if (*limit < *pos) {
return buffer[(*limit)++];
} else {
*limit = 0;
*pos = fread(buffer, sizeof(char), BUFFER_SIZE, input);
if (*limit < *pos) {
return buffer[(*limit)++];
} else
return END_OF_FILE;
}
}
void readAll(char * fileName, Row * table) {
char* buffer = (char*) malloc(sizeof(char) * BUFFER_SIZE);
int limit = 0;
int pos = 0;
FILE * input = fopen(fileName, "rb");
int lastRead;
Row currentRow;
uint32_t * currentElement = &(currentRow.a);
// We read rows with an FSM. We can
// roll up some of the code using the `currentElement` pointer
while (1) {
switch(lastRead = readChar(input, buffer, &pos, &limit)) {
case END_OF_FILE:
fclose(input);
return;
case ',':
if (currentElement == &(currentRow.a))
currentElement = &(currentRow.b);
else
currentElement = &(currentRow.t);
break;
case '\n':
insert(table, currentRow);
currentRow.a = 0;
currentRow.b = 0;
currentRow.t = 0;
currentElement = &(currentRow.a);
break;
default:
*currentElement = *currentElement * 10 + (lastRead - '0');
break;
}
} //printf("Read %d", lastRead);
}
int main(int argc, char** argv) {
Row* table = (Row*) malloc(sizeof(Row) * TABLE_SIZE);
memset(table, 255, sizeof(Row) * TABLE_SIZE);
readAll(argv[1], table);
//[...]
}