First of all, welcome to the community Yanis!
Second, please, use the code snippet functionality of StackOverflow to provide all the code, and specify, both in the text and in the tags, which programming language you are using.
Now, I infer from your question you need a C (?) implementation of a double linked list, which represents the tokens in a string, and store these lists in an array, which would be the the tokenized string.
Now, here is my implementation based on this understanding of the problem, please clarify if I am wrong.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TOKENS 25
// Node pointer type
typedef struct node *p_node;
// Node structure of double linked list
struct node
{
char val; // Character stored in node
int line; // Line of the character
int pos; // Position of the character in the line
p_node next; // next node (NULL if last node)
p_node prev; // previous node (NULL if first node)
};
// Utility function to convert a string into a Node
p_node strToNode(char* str, int* pos, int* line)
{
// First node of the list
p_node fnode = (p_node) malloc(sizeof(struct node));
fnode->val = str[0];
fnode->line = *line;
fnode->pos = *pos;
fnode->prev = NULL;
// Stores the previous node in the list so that it can be set for the next one
p_node pnode = fnode;
++(*pos);
int i;
for (i = 1; i < strlen(str); ++i)
{
// Ignore new lines, update the line and position pointers
if (str[i] == '\n')
{
++(*line);
*pos = 0;
}
// Generate new node
else
{
p_node nnode = (p_node) malloc(sizeof(struct node));
nnode->val = str[i];
nnode->line = *line;
nnode->pos = *pos;
nnode->prev = pnode;
nnode->next = NULL;
++(*pos);
// Update previous node and move forward
pnode->next = nnode;
pnode = nnode;
}
}
return fnode;
}
int main(int argc, char** argv)
{
p_node nodes[MAX_TOKENS]; // Array of pointers to the first node of a double linked list
char delimiters[] = " ;-'()1234567890:!%?.,+-";
char *token;
char str[] = " Hello wor\nld ";
strlwr(str);
int line = 0;
int pos = 0;
int n_tokens = 0;
token = strtok(str, delimiters);
while (token != NULL && n_tokens < MAX_TOKENS)
{
p_node node = strToNode(token, &pos, &line);
nodes[n_tokens++] = node;
token = strtok(NULL, delimiters);
}
// Testing the implementation
int i;
for (i = 0; i < n_tokens; ++i)
{
printf("# Token %d\n", (i+1));
p_node node = nodes[i];
while (node != NULL)
{
printf("\t[%d:%d] %c\n", node->line, node->pos, node->val);
node = node->next;
}
printf("\n");
}
return 0;
}
Update #1
Since OP commented, it is more clear to me the requirements of the code. Instead of my first hunch where the array contains lists representing single words, the list represent words starting with the same letter.
As such, here is the updated version of the code. OP has clarified his input is in theory a file. I have adjusted the code to reflect that as well.
I have also added some memory management, but I have not checked for memory leaks.
For the file reading, I have used the getline
function implemented by Anti Haapla, as the POSIX standard version does not exist on Windows systems.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stdint.h>
// From https://stackoverflow.com/questions/735126/are-there-alternate-implementations-of-gnu-getline-interface/
// if typedef doesn't exist (msvc, blah)
typedef intptr_t ssize_t;
ssize_t getline(char **lineptr, size_t *n, FILE *stream) {
size_t pos;
int c;
if (lineptr == NULL || stream == NULL || n == NULL) {
errno = EINVAL;
return -1;
}
c = getc(stream);
if (c == EOF) {
return -1;
}
if (*lineptr == NULL) {
*lineptr = malloc(128);
if (*lineptr == NULL) {
return -1;
}
*n = 128;
}
pos = 0;
while(c != EOF) {
if (pos + 1 >= *n) {
size_t new_size = *n + (*n >> 2);
if (new_size < 128) {
new_size = 128;
}
char *new_ptr = realloc(*lineptr, new_size);
if (new_ptr == NULL) {
return -1;
}
*n = new_size;
*lineptr = new_ptr;
}
((unsigned char *)(*lineptr))[pos ++] = c;
if (c == '\n') {
break;
}
c = getc(stream);
}
(*lineptr)[pos] = '\0';
return pos;
}
#define MAX_TOKENS 26
#define FILE_NAME "test_file.txt"
// Node pointer type
typedef struct node *p_node;
// Node structure of double linked list
struct node
{
char* val; // String stored in node
int line; // Line of the character
int pos; // Position of the character in the line
p_node next; // next node (NULL if last node)
p_node prev; // previous node (NULL if first node)
};
p_node insert(p_node list, char* str, int* line, int* pos)
{
// First we need to reach the end of the list
p_node lnode = NULL;
if (list != NULL)
{
for (lnode = list; lnode->next != NULL; lnode = lnode->next);
}
// Creating the new node
p_node nnode = (p_node) malloc(sizeof(struct node));
// Creating a copy of the string to save it
nnode->val = strdup(str);
nnode->line = *line;
nnode->pos = *pos;
nnode->prev = lnode;
nnode->next = NULL;
if (lnode != NULL)
{
lnode->next = nnode;
}
return nnode;
}
int main(int argc, char** argv)
{
p_node nodes[MAX_TOKENS]; // Array of pointers to the first node of a double linked list
// Initializing all nodes to NULL
int i;
for (i = 0; i < MAX_TOKENS; ++i)
{
nodes[i] = NULL;
}
char delimiters[] = " ;-'()1234567890:!%?.,+-\t\r\n";
char *token;
FILE* fp;
size_t len = 0;
size_t read;
char* str = NULL;
fp = fopen(FILE_NAME, "r");
if (fp == NULL)
{
exit(EXIT_FAILURE);
}
int line = 0;
int pos = 0;
while ((read = getline(&str, &len, fp)) != -1)
{
strlwr(str);
token = strtok(str, delimiters);
while (token != NULL)
{
int index = token[0] - 'a';
if (nodes[index] == NULL)
{
nodes[index] = insert(NULL, token, &line, &pos);
}
else
{
insert(nodes[index], token, &line, &pos);
}
pos += strlen(token);
token = strtok(NULL, delimiters);
}
++line;
}
fclose(fp);
// Testing the implementation
for (i = 0; i < MAX_TOKENS; ++i)
{
if (nodes[i] != NULL)
{
printf("# Tokens starting with %c\n", (i+'a'));
p_node node = nodes[i];
while (node != NULL)
{
printf("\t[%d:%d] %s\n", node->line, node->pos, node->val);
node = node->next;
}
printf("\n");
}
}
// Freeing the used memory
for (i = 0; i < MAX_TOKENS; ++i)
{
p_node node = nodes[i];
while (node != NULL)
{
p_node tmp = node;
node = node->next;
free(tmp->val);
free(tmp);
}
}
if (token)
{
free(token);
}
if (str)
{
free(str);
}
return 0;
}
The test file I used (it has a \t
in it too, before the test
):
Hello world
This is A
test
The output:
# Tokens starting with a
[2:16] a
# Tokens starting with h
[0:0] hello
# Tokens starting with i
[2:14] is
# Tokens starting with t
[2:10] this
[3:17] test
# Tokens starting with w
[0:5] world