1

I was trying to use a linked list to hold every word from a textfile that had a paragraph of text in it. So each line has an unknown number of words on it, each separated by a space. I thought I could use strtok() and getline() to read through each word. However, the program only reads the first word on each line, so I thought I could use a loop to detect the end of each line of the file so that all the words would be read.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct node{
    char *word;
    struct node *next;
};

//refers to the struct for linked list 
typedef struct node link;

//a function to add word to front of linked list
link *addName(char[] word, link *head){
    link *temp1;

    temp1 = (link*)malloc(sizeof(link));
    //add char type word to linked list
    temp1->word = strdup(word);
    temp1->next = word;
    head = temp1;
    return head;
}

int main(){
    FILE *fO; 
    fO = fopen("paragraph.data", "r");
    int size = 0;
    int len = 0;

    //initialize it for the getline() and strtok()
    char *line = 0;

    
    //use malloc
    line = (char*)malloc(sizeof(int));

    //loop through the file
    while(getline(&line, &size, fO) != -1){
        char *word = strtok(line, " ");
        printf("the word: %s\n", word);

        //while(there is no "\n" detected?){}
        word = strtok(NULL, " ");
        printf("the word: %s\n", word);
        //addName()
    }

}

the file is like this(shortened for eg.):

lorem ipsum
dolor
sit amet con sec
euter orci

it could have any number of words which is what makes me confused. Does anyone know how to make the while loop detect the end of each line? Right now it just prints out the first word of every line.

the name: lorem
the name: dolor
the name: sit
the name: euter 
brocoli
  • 23
  • 5
  • What do you think the 2nd argument to `strtok` is? And why is it a string rather than a `char`? – Cheatah Mar 03 '22 at 18:11
  • I thought the 2nd argument was supposed to be the delimeter. Is it not supposed to be a string? Sorry, I thought it should return a string from `strtok`. I'm not sure. – brocoli Mar 03 '22 at 18:17
  • The second argument to `strtok` is a string that has all possible delimiters in it, so that's okay. You need a second loop over the words in each line as your comment ("while there is no newline") suggests. But you don't have to look for newlines. `strtok` returns `NULL` if there are no more words in the line. – M Oehm Mar 03 '22 at 18:22
  • This code doesn't compile, and even if it is fixed so it does it doesn't generate the given output. Please post the *actual* code and the output given by the same. – dbush Mar 03 '22 at 18:22
  • `temp1->next = word;` --> `temp1->next = head;` – Craig Estey Mar 03 '22 at 18:30
  • "line = (char*)malloc(sizeof(int))", since the `line` probably has to hold more than that, is slowing down your code unnecessarily; `getline` allows `line` to be null anyway. – Neil Mar 03 '22 at 20:36

2 Answers2

2

A number of issues ...

  1. char[] word is not valid C and won't compile
  2. temp1->next = word; won't compile -- word is a char * pointer and not a pointer to a node
  3. You want: temp1->next = head; to link the new node into the linked list
  4. You're not looping on strtok, so, of course, you'll only get one [or two] tokens.
  5. In main, size must be a size_t and not an int -- the getline call won't even compile.
  6. getline does not strip the newline
  7. line must be freed at the end of the loop
  8. Your code does not call addName in main
  9. Don't cast the return of malloc: Do I cast the result of malloc?

In the code below, I use cpp conditionals to denote old vs. new code:

#if 0
// old code
#else
// new code
#endif

#if 1
// new code
#endif

Here is the refactored code. I've changed link into node to be more descriptive. It is annotated with bugs and fixes:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node node;
struct node {
    char *word;
    node *next;
};

//a function to add word to front of linked list
// NOTE/BUG: char[] isn't valid C and won't compile
#if 0
node *
addName(char[] word, node *head)
#else
node *
addName(const char *word, node *head)
#endif
{
    node *temp1;

    temp1 = malloc(sizeof(*temp1));

    // add char type word to linked list
    temp1->word = strdup(word);
// NOTE/BUG: word [corrected] is a char* and can't be assigned to next
#if 0
    temp1->next = word;
#else
    temp1->next = head;
#endif
    head = temp1;

    return head;
}

int
main()
{
    FILE *fO;

    fO = fopen("paragraph.data", "r");
// NOTE: bug size must be size_t or the getline won't compile
#if 0
    int size = 0;
#else
    size_t size = 0;
#endif
    int len = 0;

    // initialize it for the getline() and strtok()
#if 0
    char *line = 0;
#else
    char *line = NULL;
#endif

    // use malloc
// NOTE/BUG: getline expects a null pointer if size is 0
#if 0
    line = (char *) malloc(sizeof(int));
#endif

    // loop through the file
#if 0
    while (getline(&line, &size, fO) != -1) {
        char *word = strtok(line, " ");

        printf("the word: %s\n", word);

        // while(there is no "\n" detected?){}
        word = strtok(NULL, " ");
        printf("the word: %s\n", word);
        // addName()
    }
#else
    node *head = NULL;

    while (getline(&line, &size, fO) != -1) {
        // strip newline
        line[strcspn(line,"\n")] = 0;

        char *word = strtok(line," ");

        while (word != NULL) {
            printf("DEBUG: %s\n", word);
            head = addName(word,head);
            word = strtok(NULL," ");
        }
    }

    // must be freed at end
    free(line);

    // print linked list
    for (node *cur = head;  cur != NULL;  cur = cur->next)
        printf("Final: %s\n",cur->word);
#endif

    return 0;
}

Here is the fully cleaned up code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node node;
struct node {
    char *word;
    node *next;
};

//a function to add word to front of linked list
node *
addName(const char *word, node *head)
{
    node *temp1;

    temp1 = malloc(sizeof(*temp1));

    // add char type word to linked list
    temp1->word = strdup(word);
    temp1->next = head;
    head = temp1;

    return head;
}

int
main()
{
    FILE *fO;

    fO = fopen("paragraph.data", "r");
    size_t size = 0;
    int len = 0;

    // initialize it for the getline() and strtok()
    char *line = NULL;

    // loop through the file
    node *head = NULL;

    while (getline(&line, &size, fO) != -1) {
        // strip newline
        line[strcspn(line,"\n")] = 0;

        char *word = strtok(line," ");

        while (word != NULL) {
            printf("DEBUG: %s\n", word);
            head = addName(word,head);
            word = strtok(NULL," ");
        }
    }

    // must be freed at end
    free(line);

    // print linked list
    for (node *cur = head;  cur != NULL;  cur = cur->next)
        printf("Final: %s\n",cur->word);

    return 0;
}

For your sample input, here is the program output:

DEBUG: lorem
DEBUG: ipsum
DEBUG: dolor
DEBUG: sit
DEBUG: amet
DEBUG: con
DEBUG: sec
DEBUG: euter
DEBUG: orci
Final: orci
Final: euter
Final: sec
Final: con
Final: amet
Final: sit
Final: dolor
Final: ipsum
Final: lorem
Craig Estey
  • 30,627
  • 4
  • 24
  • 48
  • Optionally you could omit the `strcspn` and use `" \n"` for the delimiter. – Cheatah Mar 03 '22 at 22:56
  • 1
    @Cheatah Yes, I considered that [and _almost_ did that], and it's arguably better, but left it in order to more closely mimic OP's code. I was influenced by a now deleted answer here that talked about using `readline` [instead of `getline`] with the claim that `readline` removed the newline and wanted to emphasize that `getline` kept the newline. – Craig Estey Mar 03 '22 at 23:02
  • It is not a critique, the way you explain your choice I fully support it. – Cheatah Mar 03 '22 at 23:06
  • thanks @cheatah how could I modify the code to account for punctuation? if the end of each line had a period or comma, then how could I add it to the linked list as another word? – brocoli Mar 09 '22 at 17:26
  • @txdara I assume you have `foo,` or `foo.` and just want to store `foo` then change the second arg of `strtok` to `", ."` – Craig Estey Mar 10 '22 at 02:03
  • @craig estey thank you. so `strtok(NULL, ",")` can get the stripped word. could I also get the punctuation to add to the list separately. for eg(could I just put "foo" and "," in as different 'words') ? – brocoli Mar 12 '22 at 05:09
0

Your main issue is that line is allocated and size is zero. If you intend to let getline allocate space for the line, then size must be zero AND line must be null. Also, you should free the allocated memory at the end of the loop.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct node {
    char *word;
    struct node *next;
} link;

//a function to add word to front of linked list
link *addWord(char *word, link *head){
    link *temp1 = malloc(sizeof(link));

    //add string word to linked list
    temp1->word = strdup(word);
    temp1->next = head;
    return temp1;
}

int main(){
    FILE *fO; 
    fO = fopen("paragraph.data", "r");
    int size = 0;
    int len = 0;
    char *line;

    //loop through the file
    while(getline(&line, &size, fO) != EOF){
        char *word = strtok(line, " \n");

// addWord(word,head); printf("the word: %s\n", word);

        while(word) {
            word = strtok(NULL, " \n");
            printf("the word: %s\n", word);

// addWord(word,head); } free(line); } }

SGeorgiades
  • 1,771
  • 1
  • 11
  • 11