3

I am correctly tokenizing single words from a line of strings; however, inserting them into a 2d array cuts off parts of the token. I also have a problem with NULL, and the code results in a segfault.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>   // strtok

#define MAX_FILE_LENGTH 30
#define MAX_COURSE_LENGTH 30
#define MAX_LINE_LENGTH 1000

void trim(char* str) {
  int l = strlen(str);
  if (str[l - 1] == '\n') {
    str[l - 1] = 0;
  }
}

int main() {
  char filename[MAX_FILE_LENGTH]; 
  char arr[MAX_COURSE_LENGTH][MAX_COURSE_LENGTH];
  const char delim[] = " ";
  char* token;
  int course = 0;
  char c;
  FILE* fp;
  int N = 0;    // number of lines in file

  printf("This program will read, from a file, a list of courses and their prerequisites and will print the list in which to take courses.\n");
  printf("Enter filename: ");
  scanf("%s%c", filename, &c);

  fp = fopen(filename, "r");

  if (fp == NULL) {
    printf("Could not open file %s. Exit\n", filename);
    printf("\nFailed to read from file. Program will terminate.\n");
    return -1;
  }

  while (!feof(fp) && !ferror(fp)) {
    int i = 0;
    if (fgets(arr[N], MAX_LINE_LENGTH, fp)) {
      trim(arr[N]);
      printf("Full line: |%s|\n", arr[N]);
      token = strtok(arr[N], delim);
      arr[N][i] = *token;
      printf("N = %d, i = %d, token = %s arr[%d][%d]: %s\n", N, i, token, N, i, &arr[N][i]);
      while (token != NULL) {
        i++;
        token = strtok(NULL, " \n");
        printf("token at arr[%d][%i]: %s value at arr[%d][%d]: %s\n", N, i, token, N, i, &arr[N][i]);
        arr[N][i] = *token;
        printf("N = %d, i = %d, token = %s arr[%d][%d]: %s\n", N, i, token, N, i, &arr[N][i]);
      }
      N++;
    }
  }
  fclose(fp);
  return 0;
}

The output I'm getting reads:

Full line: |c100 c200|
N = 0, i = 0, token = c100 arr[0][0]: c100
token at arr[0][1]: c200 value at arr[0][1]: 100
N = 0, i = 1, token = c200 arr[0][1]: c00
token at arr[0][2]: (null) value at arr[0][2]: 00
zsh: segmentation fault  ./a.out

My file is a list of courses and I am to build an adjacency matrix with the list of prerequisite courses.

c100 c200
c300 c200 c100
c200 c100

I tried to reset each index to NULL or '\0' before inserting the tokens, but the same result occurred. Inserting the first word in the [N][0]th index of the inner array works, but there is something I'm missing when inserting into other indexes of the inner array.

  • PSA: Try and use longer buffers for things like filenames, especially if they include paths. 30 characters is extremely skimpy. – tadman Apr 30 '23 at 19:40
  • 2
    OT: Your `trim` function have a few flaws... 1) You don't check for or handle `NULL` pointers; 2) If the string is empty you use index `-1`; 3) The term "trim" usually means removing *all* spaced, from both the start and end of a string. Please read [Removing trailing newline character from fgets() input](https://stackoverflow.com/questions/2693776/removing-trailing-newline-character-from-fgets-input) for a better way to remove the newline after a `fgets` call. – Some programmer dude Apr 30 '23 at 19:46
  • 1
    Also OT: Please read [Why is “while( !feof(file) )” always wrong?](https://stackoverflow.com/questions/5431941/why-is-while-feoffile-always-wrong) – Some programmer dude Apr 30 '23 at 19:47
  • 2
    segfault is due to `*token` when token is NULL. – Allan Wind Apr 30 '23 at 19:53
  • `arr[N][i] = *token;` makes no sense at all. Please be aware `arr[N][1]` is _not_ the start of the second word, but the second letter of the _first_ word. – Ruud Helderman Apr 30 '23 at 19:54
  • Your looping constructs are fundamentally flawed. `while(token != NULL) { token = strtok(NULL, " \n"); /* Now token can be NULL, so boom when you try */ arr[N][i] = *token;` Use the idiomatic looping constructs. They work. (ie, `while( (token = strtok(...)) != NULL )` – William Pursell Apr 30 '23 at 19:55
  • What is `arr` supposed to store? Adjacency matrix is binary (1 if a an edge from node i to j otherwise 0). `arr` is not initialized. You should explain what the input means, i.e. that c100 is required to take c200 and which was you want the directed edge. The circular dependency is interesting. – Allan Wind Apr 30 '23 at 20:01

3 Answers3

3

Much worse than all of my comments above, which could lead to undefined behavior, is the active and always UB you have by using %s to print a single character.

The assignment

arr[N][i] = *token;

assigns the single character in token[0] to the single character element arr[N][i].

You then use %s to print &arr[N][i], but the %s format expects a null-terminated string which you don't have.

To have a string you need an array of characters, and what you maybe should do is something like (but I'm just guessing):

char arr[MAX_COURSE_LENGTH][MAX_COURSE_LENGTH][MAX_COURSE_LENGTH];

and

strcpy(arr[N][i], token);

And as mentioned by Allan Wind, you're forgetting that strotk can (and will) return a NULL pointer.

Also note that all upper-case names are usually used for macros and symbolic constants. That makes it kind of confusing to see the name N being used as an index variable.

Some programmer dude
  • 400,186
  • 35
  • 402
  • 621
2

Consider the lines:

  while (token != NULL) {
    token = strtok(NULL, " \n");
    arr[N][i] = *token;

(I've omitted a few lines for clarity.) This is fundamentally flawed, since strtok can return NULL (indeed, you expect it to at some point), but as soon as token is NULL, the attempt to read *token is undefined behavior. The standard, idiomatic way to write this code is:

while( (token = strtok(NULL, " \n")) != NULL ){ ... }

The idiomatic code works. Use it. Similarly in the previous loop, instead of while( !feof(...) && ! ferror(...) { if( fgets (...) ) ...}, just write while( fgets (....)) {...} Doing the feof/ferror check on each iteration is not helpful, and the code looks unusual. The idiomatic code may look strange at first, but it is generally well understood and is the correct way to construct the loops.

William Pursell
  • 204,365
  • 48
  • 270
  • 300
2

The previous answers already covered the technical issues:

  1. trim() is usually refers to white space at the beginning & end of string, but in this case you just remove the trialing newline. As noted by @Someprogrammerdude it does not work as expected for the empty string.

  2. strtok() will return NULL when there is nothing else to process. When you subsequently dereference *token this is what cause your segfault.

  3. adj[N][i] refers to the i'th character of token in this case. This is why your input is not as expected

  4. You mentioned you wanted to build an adjacency matrix but adj isn't an adjacency matrix but a 2d array of chars that you store strings in.

I wrote quite a bit of code for you here to implement a adjacency matrix. It's not clear me to if you wanted the class to depend on it's requirements or the other way around (it's just matter of swapping course and prereq in process() if it's the other way around:

#include <stdio.h>
#include <stdlib.h>
#define POSIX_C_SOURCE 200809L
#include <string.h> 

#define DELIM " "
#define MAX_FILE_LENGTH 30
#define MAX_COURSE_LENGTH 30
#define MAX_LINE_LENGTH 1000

typedef struct adjacency_matrix {
    size_t nodes_len;
    char **nodes;
    char *edges;
} adjacency_matrix;

struct adjacency_matrix *adjacency_matrix_create(size_t nodes_len) {
    adjacency_matrix *am = malloc(sizeof *am);
    if(!am)
        return NULL;
    am->nodes_len = nodes_len;
    am->nodes = calloc(nodes_len, sizeof *am->nodes);
    am->edges = calloc(nodes_len * nodes_len, 1);
    return am;
}

int adjacency_matrix_node(adjacency_matrix *am, char *node) {
    int i = 0;
    for(; am->nodes[i]; i++)
        if(!strcmp(am->nodes[i], node))
            return i;
    if(i == am->nodes_len)
        return -1;
    am->nodes[i] = strdup(node);
    return i;
}

void adjacency_matrix_edge(adjacency_matrix *am, size_t a, size_t b) {
    am->edges[am->nodes_len * a + b] = 1;
}

void adjacency_matrix_print(const adjacency_matrix *am) {
    for(size_t i = 0; am->nodes[i]; i++) {
        printf("%s:", am->nodes[i]);
        for(size_t j = 0; am->nodes[j]; j++) {
            if(am->edges[i * am->nodes_len + j])
                printf(" %s", am->nodes[j]);
        }
        printf("\n");
    }
}

void adjacency_matrix_free(adjacency_matrix *am) {
    if(!am)
        return;
    free(am->nodes);
    free(am->edges);
    free(am);
}

void trim(char *str) {
    str[strcspn(str, "\n")] = '\0';
}

int process(adjacency_matrix *am, char *line) {
    trim(line);
    int course;
    for(int i = 0;; i++) {
        char *token = strtok(i ? NULL : line, DELIM);
        if(!token)
            break;
        if(!i) {
            course = adjacency_matrix_node(am, token);
            if(course == -1)
                return EXIT_FAILURE;
            continue;
        }
        int prereq = adjacency_matrix_node(am, token);
        if(prereq == -1)
            return EXIT_FAILURE;
        adjacency_matrix_edge(am, course, prereq);
    }
    return EXIT_SUCCESS;
}

int main() {
    printf("This program will read, from a file, a list of courses and their prerequisites and will print the list in which to take courses.\n");
    printf("Enter filename: ");
    char filename[MAX_FILE_LENGTH];
    scanf("%s", filename);

    FILE *fp = fopen(filename, "r");
    if (fp == NULL) {
        printf("Could not open file %s. Exit\n", filename);
        printf("\nFailed to read from file. Program will terminate.\n");
        return EXIT_FAILURE;
    }
    adjacency_matrix *am = adjacency_matrix_create(MAX_COURSE_LENGTH);
    for(;;) {
        char line[MAX_LINE_LENGTH];
        if (!fgets(line, MAX_LINE_LENGTH, fp))
            break;
        process(am, line);
    }
    fclose(fp);
    adjacency_matrix_print(am);
    adjacency_matrix_free(am);
}

and here is the corresponding output:

c100: c200
c200: c100
c300: c100 c200
Allan Wind
  • 23,068
  • 5
  • 28
  • 38