0

here i need to use an array of linked lists to stock words gotten from a text . i did the whole implementation of the structure : i defined the array and the nodes of the listsenter image description here

to extract words from the text i used the function strtok and this is my code enter image description here

and my problem now is how to create lists which contain words according to their first letter . notice that each list is pointed by T[i] and i go from 0 to 25

macropod
  • 12,757
  • 2
  • 9
  • 21
yanis
  • 21
  • 1

1 Answers1

0

First of all, welcome to the community Yanis!

Second, please, use the code snippet functionality of StackOverflow to provide all the code, and specify, both in the text and in the tags, which programming language you are using.

Now, I infer from your question you need a C (?) implementation of a double linked list, which represents the tokens in a string, and store these lists in an array, which would be the the tokenized string.

Now, here is my implementation based on this understanding of the problem, please clarify if I am wrong.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_TOKENS 25

// Node pointer type
typedef struct node *p_node;

// Node structure of double linked list
struct node 
{
    char val;     // Character stored in node
    int line;     // Line of the character
    int pos;      // Position of the character in the line
    
    p_node next;  // next node (NULL if last node)
    p_node prev;  // previous node (NULL if first node)
};

// Utility function to convert a string into a Node
p_node strToNode(char* str, int* pos, int* line)
{
    // First node of the list
    p_node fnode = (p_node) malloc(sizeof(struct node));
    fnode->val = str[0];
    fnode->line = *line;
    fnode->pos = *pos;
    fnode->prev = NULL;

    // Stores the previous node in the list so that it can be set for the next one
    p_node pnode = fnode;

    ++(*pos);

    int i;
    for (i = 1; i < strlen(str); ++i)
    {
        // Ignore new lines, update the line and position pointers
        if (str[i] == '\n')
        {
            ++(*line);
            *pos = 0;
        }
        // Generate new node
        else
        {
            p_node nnode = (p_node) malloc(sizeof(struct node));
            nnode->val = str[i];
            nnode->line = *line;
            nnode->pos = *pos;
            nnode->prev = pnode;
            nnode->next = NULL;

            ++(*pos);
            // Update previous node and move forward
            pnode->next = nnode;
            pnode = nnode;
        }
    }

    return fnode;
}

int main(int argc, char** argv) 
{
    p_node nodes[MAX_TOKENS]; // Array of pointers to the first node of a double linked list

    char delimiters[] = " ;-'()1234567890:!%?.,+-";
    char *token;
    char str[] = " Hello wor\nld ";
    strlwr(str);

    int line = 0;
    int pos = 0;
    int n_tokens = 0;

    token = strtok(str, delimiters);

    while (token != NULL && n_tokens < MAX_TOKENS)
    {
        p_node node = strToNode(token, &pos, &line);
        nodes[n_tokens++] = node;
        token = strtok(NULL, delimiters);
    }

    // Testing the implementation
    int i;
    for (i = 0; i < n_tokens; ++i)
    {
        printf("# Token %d\n", (i+1));
        p_node node = nodes[i];

        while (node != NULL)
        {
            printf("\t[%d:%d] %c\n", node->line, node->pos, node->val);
            node = node->next;
        }

        printf("\n");
    }

    return 0;
}

Update #1

Since OP commented, it is more clear to me the requirements of the code. Instead of my first hunch where the array contains lists representing single words, the list represent words starting with the same letter.

As such, here is the updated version of the code. OP has clarified his input is in theory a file. I have adjusted the code to reflect that as well.

I have also added some memory management, but I have not checked for memory leaks.

For the file reading, I have used the getline function implemented by Anti Haapla, as the POSIX standard version does not exist on Windows systems.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stdint.h>

// From https://stackoverflow.com/questions/735126/are-there-alternate-implementations-of-gnu-getline-interface/
// if typedef doesn't exist (msvc, blah)
typedef intptr_t ssize_t;

ssize_t getline(char **lineptr, size_t *n, FILE *stream) {
    size_t pos;
    int c;

    if (lineptr == NULL || stream == NULL || n == NULL) {
        errno = EINVAL;
        return -1;
    }

    c = getc(stream);
    if (c == EOF) {
        return -1;
    }

    if (*lineptr == NULL) {
        *lineptr = malloc(128);
        if (*lineptr == NULL) {
            return -1;
        }
        *n = 128;
    }

    pos = 0;
    while(c != EOF) {
        if (pos + 1 >= *n) {
            size_t new_size = *n + (*n >> 2);
            if (new_size < 128) {
                new_size = 128;
            }
            char *new_ptr = realloc(*lineptr, new_size);
            if (new_ptr == NULL) {
                return -1;
            }
            *n = new_size;
            *lineptr = new_ptr;
        }

        ((unsigned char *)(*lineptr))[pos ++] = c;
        if (c == '\n') {
            break;
        }
        c = getc(stream);
    }

    (*lineptr)[pos] = '\0';
    return pos;
}

#define MAX_TOKENS 26
#define FILE_NAME "test_file.txt"

// Node pointer type
typedef struct node *p_node;

// Node structure of double linked list
struct node 
{
    char* val;    // String stored in node
    int line;     // Line of the character
    int pos;      // Position of the character in the line
    
    p_node next;  // next node (NULL if last node)
    p_node prev;  // previous node (NULL if first node)
};

p_node insert(p_node list, char* str, int* line, int* pos)
{
    // First we need to reach the end of the list
    p_node lnode = NULL;
    
    if (list != NULL)
    {
        for (lnode = list; lnode->next != NULL; lnode = lnode->next);
    }

    // Creating the new node
    p_node nnode = (p_node) malloc(sizeof(struct node));
    // Creating a copy of the string to save it
    nnode->val = strdup(str);
    nnode->line = *line;
    nnode->pos = *pos;
    nnode->prev = lnode;
    nnode->next = NULL;

    if (lnode != NULL)
    {
        lnode->next = nnode;
    }

    return nnode;
}

int main(int argc, char** argv) 
{
    p_node nodes[MAX_TOKENS]; // Array of pointers to the first node of a double linked list

    // Initializing all nodes to NULL
    int i;
    for (i = 0; i < MAX_TOKENS; ++i)
    {
        nodes[i] = NULL;
    }

    char delimiters[] = " ;-'()1234567890:!%?.,+-\t\r\n";
    char *token;

    FILE* fp;
    size_t len = 0;
    size_t read;
    char* str = NULL;

    fp = fopen(FILE_NAME, "r");
    if (fp == NULL)
    {
        exit(EXIT_FAILURE);
    }

    int line = 0;
    int pos = 0;

    while ((read = getline(&str, &len, fp)) != -1)
    {
        strlwr(str);
        token = strtok(str, delimiters);

        while (token != NULL)
        {
            int index = token[0] - 'a';
            if (nodes[index] == NULL)
            {
                nodes[index] = insert(NULL, token, &line, &pos);
            }
            else
            {
                insert(nodes[index], token, &line, &pos);
            }
            pos += strlen(token);
            token = strtok(NULL, delimiters);
        }

        ++line;
    }

    fclose(fp);

    // Testing the implementation
    for (i = 0; i < MAX_TOKENS; ++i)
    {
        if (nodes[i] != NULL)
        {
            printf("# Tokens starting with %c\n", (i+'a'));
            p_node node = nodes[i];

            while (node != NULL)
            {
                printf("\t[%d:%d] %s\n", node->line, node->pos, node->val);
                node = node->next;
            }

            printf("\n");
        }
    }

    // Freeing the used memory
    for (i = 0; i < MAX_TOKENS; ++i)
    {
        p_node node = nodes[i];
        while (node != NULL)
        {
            p_node tmp = node;
            node = node->next;
            free(tmp->val);
            free(tmp);
        }
    }

    if (token)
    {
        free(token);
    }

    if (str)
    {
        free(str);
    }

    return 0;
}

The test file I used (it has a \t in it too, before the test):

 Hello world

This is A
     test

The output:

# Tokens starting with a
        [2:16] a

# Tokens starting with h
        [0:0] hello

# Tokens starting with i
        [2:14] is

# Tokens starting with t
        [2:10] this
        [3:17] test

# Tokens starting with w
        [0:5] world
Sandro Massa
  • 98
  • 1
  • 10
  • first of all thank you so much for your help, then there is just one thing about the program so the program that i want to do consist of extractig words from a text and stock them in double linked-lists acording to their first letter and each list of these lists is pointed by a case in the array (aka T[i] as example).and for the code i couldn't use the code tool because they said i m still new to use this feature. – yanis May 26 '21 at 22:28
  • this is an example :as example: the list which has T[0] as head has the words :am abcd azerty.... , the list T[1] has: bcd bank bring ..... , until T[25] : zte zerty zack .... – yanis May 26 '21 at 22:43
  • In practice, each T[i] has words that start with the i-th letter of the alphabet if I understand correctly, and each node is a word, right? Also, please update your question with more detailed information in order for who lands on this page can understand and maybe find it of help. – Sandro Massa May 27 '21 at 08:21