1

I have been givin an assignment which uses C to read a given file and input data into a binary tree. My current problem is splitting the line read from the file into two different variables.

The file that has been given contains two bits of data, an ID and some information. 2409, blah, blah, blah

Currently, the program is reading the file correctly and storing each line and then displaying it. I have tried to use token's, memmove and trying to simply select the characters manually however this needs to be dynamic. The ID is not a fixed amount of numbers so manually selecting it will not work. As mentioned, I have tried to use strtok using ", " as a delimited however it just doesn't change anything.

This is currently what I am using to display the information, I intent to split the string within the while loop for each line:

int main() {

    struct node* root = NULL;

    FILE *file;
    char filename[15];
    char buff[255];
    char line[128];

    strcpy(filename, "file.txt");

    file = fopen(filename, "r");
    if (file == NULL) {
        printf("File could not be openned.\n");
        exit(0);
    }

    while (line != NULL)
    {
        strcpy(line, fgets(buff, 255, file));

        printf("%s", line);
    }
    fclose(file);
}

Is there any way that I am able to simply select the first characters up to the first occurance of "," and convert them into an integer. Then select the rest of the data removing the first "ID, " and insert that into a char variable.

Your help is greatly appreciated.

Polecalex
  • 323
  • 2
  • 12
  • 1
    Take a look at [strtok](https://www.geeksforgeeks.org/strtok-strtok_r-functions-c-examples/) and [THIS](https://stackoverflow.com/a/7021750/3436922) – LPs Dec 18 '19 at 15:32
  • 1
    If you want to convert everything up to the first `,` to an integer, take a look at `sscanf()` or `fscanf()`, By the way, your `filename` variable serves no purpose, just do `fopen("file.txt", "r");` directly. It is a good idea to drop arrays when possible. – HAL9000 Dec 18 '19 at 15:40
  • 2
    In your code, `while (line != NULL)`, `line` is an array and will decay to a pointer to its first element, so it will _never_ be `NULL`. – Ian Abbott Dec 18 '19 at 16:12

4 Answers4

1

Like @LPs suggested, and assuming each line is like "2019, blah, blah, blah", you can get the ID for each line by calling:

   int id = atoi(strtok(line, ","));
1

If one wants to parse files like,

2409, blah, blah, blah
   0x10,foo,    bar,    baz, qux
# This is more difficult.
    010   , a\
a,   b  b\#\\\,still b,c

one is probably better off just using a parser generator like lex and yacc or my favourite, re2c.

#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <limits.h>
#include <assert.h>

/* Tokens. */
#define PARAM(A) A
#define STRINGISE(A) #A
#define TOKENS(X) X(ERROR), X(END), X(COMMA), X(NEWLINE), \
    X(ESCAPE), X(WSP), X(NUMBER), X(WORD)
enum Token { TOKENS(PARAM) };
static const char *const tokens[] = { TOKENS(STRINGISE) };

struct Lexer { size_t line; char *marker, *from, *cursor; };

static enum Token lex(struct Lexer *lexer) {
    assert(lexer);
/*!re2c
    re2c:yyfill:enable   = 0;
    re2c:define:YYCTYPE  = char;
    re2c:define:YYCURSOR = lexer->cursor;
    re2c:define:YYMARKER = lexer->marker; // Rules overlap.

    newline = "\n" | ("\r" "\n"?);
    oct = "0" [0-7]*;
    dec = [1-9][0-9]*;
    hex = '0x' [0-9a-fA-F]+;
    num = oct | dec | hex;
    word = [^\x00\\\n\r \t\v\f,0-9]+;
    comment = "#" [^\x00\n\r]* newline;
*/
scan:
    lexer->from = lexer->cursor;
/*!re2c
    * { return ERROR; }
    "\x00" { return END; }
    [ \t\v\f]+ { return WSP; }
    newline { lexer->line++; return NEWLINE; }
    "\\\n" | comment { lexer->line++; goto scan; }
    "\\\\" | "\\," | "\\ " | "\\n" | "\\#" { return ESCAPE; }
    "," { return COMMA; }
    word { return WORD; }
    num { return NUMBER; }
*/
}

struct Buffer {
    char *data;
    size_t size, capacity;
};

static char *buffer_reserve(struct Buffer *const buf, const size_t reserve) {
    const size_t min = buf->size + reserve;
    size_t c = buf->capacity;
    char *data;
    assert(buf);
    if(reserve > (size_t)-1 - buf->size || min > ((size_t)-1 >> 1) + 1)
        { errno = ERANGE; return 0; }
    if(min > c) {
        if(!c) c = 1;
        while(min <= c) c <<= 1;
        if(!(data = realloc(buf->data, c))) return 0;
        buf->data = data;
        buf->capacity = c;
    }
    return buf->data + buf->size;
}

struct Word { char *start, *end; };

struct Parser {
    int id, id_set, first_comma;
    size_t num_words;
    struct Word words[64]; /* Lazy. */
    char *start_words, *end_words;
};
static size_t parser_max_words = sizeof ((struct Parser *)0)->words
    / sizeof *((struct Parser *)0)->words;

static void clear_parser(struct Parser *const parser) {
    assert(parser);
    parser->id_set = 0;
    parser->first_comma = 1;
    parser->num_words = 0;
    parser->start_words = parser->end_words = 0;
}
static void print_parser(const struct Parser *const parser) {
    const struct Word *word = parser->words,
        *word_end = parser->words + parser->num_words;
    assert(parser && parser->id_set && parser->num_words <= parser_max_words);
    printf("#%d: ", parser->id);
    for( ; word < word_end; word++) {
        if(word != parser->words) printf(", ");
        if(!word->start) { printf("<null>"); continue; }
        assert(word->start <= word->end);
        if(word->start == word->end) { printf("<empty>"); continue; }
        printf("<%.*s>", (int)(word->end - word->start), word->start);
    }
    fputc('\n', stdout);
}
static void expand_word(struct Parser *const parser,
    const struct Lexer *const lexer) {
    assert(parser && lexer && lexer->from < lexer->cursor);
    if(!parser->start_words) {
        assert(!parser->end_words);
        parser->start_words = lexer->from;
    }
    parser->end_words = (lexer->from + INT_MAX >= lexer->cursor) ?
        lexer->cursor : lexer->from + INT_MAX;
}
static int store_word(struct Parser *const parser) {
    struct Word *word;
    assert(parser);
    if(parser->num_words >= parser_max_words) return errno = EILSEQ, 0;
    word = parser->words + parser->num_words++;
    word->start = parser->start_words;
    word->end = parser->end_words;
    parser->start_words = parser->end_words = 0;
    return 1;
}

int main(int argc, char **argv) {
    const size_t granularity = 1024;
    struct Lexer lexer = { 1, 0, 0, 0 };
    struct Parser parser;
    size_t nread;
    struct Buffer buf = { 0, 0, 0 };
    char *b;
    FILE *fp = 0;
    int success = 0, end_of_buffer = 0;

    /* Open. */
    if(argc != 2) return fprintf(stderr, "Needs filename.\n"), EXIT_FAILURE;
    if(!(fp = fopen(argv[1], "r"))) goto catch;

    /* Read. */
    do {
        if(!(b = buffer_reserve(&buf, granularity))) goto catch;
        nread = fread(b, 1, granularity, fp);
        buf.size += nread;
    } while(nread == granularity);
    if(ferror(fp)) goto catch;
    fclose(fp), fp = 0;
    if(!(b = buffer_reserve(&buf, 1))) goto catch;
    *b = '\0'; /* Make sure it's a string. */

    /* Parse. */
    lexer.cursor = buf.data;
    clear_parser(&parser);
    do {
        enum Token tok;
        switch((tok = lex(&lexer))) {
        case ERROR: goto catch;
        case END: end_of_buffer = 1; break;
        case COMMA:
            if(!parser.id_set) { errno = EILSEQ; goto catch; }
            if(parser.first_comma) { parser.first_comma = 0; break; }
            if(!store_word(&parser)) goto catch;
            break;
        case NEWLINE:
            if(parser.id_set) {
                /* We require at least key, data. */
                if(!store_word(&parser)) goto catch;
                print_parser(&parser);
                clear_parser(&parser);
            } else if(parser.start_words) {
                errno = EILSEQ; goto catch;
            }
            break;
        case ESCAPE:
            if(!parser.id_set) { errno = EILSEQ; goto catch; }
            expand_word(&parser, &lexer);
            break;
        case WSP: break;
        case NUMBER:
            if(parser.id_set) {
                expand_word(&parser, &lexer);
            } else {
                char *end;
                long i = strtol(lexer.from, &end, 0);
                if(end != lexer.cursor || i < INT_MIN || i > INT_MAX)
                    { errno = EDOM; goto catch; }
                parser.id = (int)i;
                parser.id_set = 1;
            }
            break;
        case WORD:
            expand_word(&parser, &lexer);
            break;
        }
    } while(!end_of_buffer);
    success = EXIT_SUCCESS;
    goto finally;
catch:
    fprintf(stderr, "While on line %lu.\n", (unsigned long)lexer.line);
    perror("parsing");
    assert(!lexer.from || (lexer.from < lexer.cursor
        && lexer.from + INT_MAX >= lexer.cursor));
    if(lexer.from) fprintf(stderr, "While on %.*s.\n",
        (int)(lexer.cursor - lexer.from), lexer.from);
finally:
    free(buf.data);
    if(fp) fclose(fp);
    return success;
}

Prints,

#2409: <blah>, <blah>, <blah>
#16: <foo>, <bar>, <baz>, <qux>
#8: <a\
a>, <b  b\#\\\,still b>, <c>

but that's probably overkill.

Neil
  • 1,767
  • 2
  • 16
  • 22
  • I appreciate this work however, it is indeed overkill. This program is simply to take data from a file, then input it into a binary tree and display it. – Polecalex Dec 19 '19 at 02:20
  • 1
    If the lines are in a _constant_ format, one might use something like `sscanf(s, "%d, %s, %s, %s\n", ...)` and check the return; that's fairly simple as compared to this. – Neil Dec 19 '19 at 18:54
1

As @HAL9000 mentioned, I was able to complete this by using sscanf. Simply extracting the integer and string from the line using sscanf(line, "%d %[^\n]s", &ID, details);

I did try using strtok however, couldn't get my head around it as it wasn't working. sscanf was the easiest to do so this is what I am going to use, thanks.

Polecalex
  • 323
  • 2
  • 12
1

Using sscanf

e.g

int main(int argc, char *argv[]) {
    const char *str = "123, this, is, a test ;@#";
    char buff[128] = {0};
    int num = 0;

    if (2 == sscanf(str, "%d,%[^\r\n]s", &num, buff))
        printf("== num: %d, string: '%s'\n", num, buff);
    else
        printf("== Wrong!\n");
    return 0;
}

result: == num: 123, string: ' this, is, a test ;@#'

S Dao
  • 555
  • 4
  • 7