1

I am trying to tokenize a file and save the tokens into an array within a function named tokenize. Then I would like to take the output of the tokenize function to be used within the main or another function, to do something.

The problem is: I'm not quite sure how to move the pointer forward within the main function after tokenizing the file lines. The goal is to keep tokenized lines grouped together and not separated, so the meaning of the words are not lost.

The file.txt would look something like (spaces added between \t for readability):

948213843 \t 644321498 \t 16549816514 \t 13616131216 \t 1646312132 \t 13468486

My question: How would I be able to access the array information being returned from the tokenize function?

Thanks!

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define SIZE 1024

char *tokenize(char *buffer, char *pattern)
{
        int i = 0;
        int j;
        char *arr[15];

        char *token = strtok_r(buffer, "\t", &buffer);
        for (j=0; j < 15; j++)
                arr[j] = malloc(strlen(buffer) * sizeof(char));

        // pattern matching removed to focus only on tokenization
        while (token != NULL)
        {
                strcpy(arr[i], token);
                printf("%s\n", token);
                token = strtok_r(NULL, "\t", &buffer);
                i++;
        }

        // test to verify array data --- good here
        for (i=0; i < 15; i++)
                fprintf(stdout, "test: %s\n", arr[i]);

        return *arr;
}

int main(int argc, char *argv[])
{
        FILE            *filename;
        static char     buffer[SIZE];

        filename = fopen("file_name.txt", "rb+");
        if (filename != NULL)
        {
                while (fgets(buffer, SIZE, filename) != NULL)
                {
                        if (buffer[strlen(buffer) - 1] == '\n')
                        {
                                buffer[strlen(buffer) - 1] = '\0';

                                // the matching search pattern will grab the line of data to be tokenized
                                char *token = tokenize(buffer, "948213843");


                                // test print -- not good here
                                if (token != NULL)
                                {
                                        for (int i=1; i < 15; i++)
                                                fprintf(stdout, "sucks: %s\n", token);
                                }

                                // do something with the tokens
                                // doSomethingWithToken(token);
                        }
                }
        }
}
abelenky
  • 63,815
  • 23
  • 109
  • 159
  • Possible duplicate of [Returning an array using C](https://stackoverflow.com/questions/11656532/returning-an-array-using-c) – anatolyg Oct 26 '19 at 17:55
  • @anatolyg Maybe a *partial* duplicate - but there are other issues in the code that need fixing (as I've attempted to do). – Adrian Mole Oct 26 '19 at 17:56
  • @anatolyg I did review the link you've mentioned and wasn't getting the result that I was looking for, but Adrian solved my issue. :) – newb art online Oct 26 '19 at 18:23

1 Answers1

1

There are a number of errors in your code. I have fixed those that I can find, in the code below, where I've used the triple-slash (///) to mark the changes I've made.

However, the major problem is that your arr[] is defined as an array of strings (char pointers), but you're trying to treat it (in main) as a single string.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define SIZE 1024

char** tokenize(char* buffer, char* pattern) /// You want to return an ARRAY of pointers (array of strings)!
{
    int i = 0;
    int j;
    static char* arr[15]; /// Without the static keyword, the array is lost when the function returns

    char* token = strtok_r(buffer, "\t", &buffer);
    for (j = 0; j < 15; j++) {///
        arr[j] = malloc((strlen(buffer) + 1) * sizeof(char)); /// Need to add 1 char for nul terminator
        strcpy(arr[j], ""); /// Put in an empty string!
    }///
    // pattern matching removed to focus only on tokenization
    while (token != NULL)
    {
        strcpy(arr[i], token);
        printf("%s\n", token);
        token = strtok_r(NULL, "\t", &buffer);/// Oops! I've undone this change.
        i++;
    }

    // test to verify array data --- good here
    for (i = 0; i < 15; i++)
        fprintf(stdout, "test: %s\n", arr[i]);

    return arr; /// This now returns the array of string pointers!
}

int main(int argc, char* argv[])
{
    FILE* filename;
    static char     buffer[SIZE];

    filename = fopen("file_name.txt", "rb+");
    if (filename != NULL)
    {
        while (fgets(buffer, SIZE, filename) != NULL)
        {
            if (buffer[strlen(buffer) - 1] == '\n')
            {
                buffer[strlen(buffer) - 1] = '\0';

                // the matching search pattern will grab the line of data to be tokenized
                char** token = tokenize(buffer, "948213843"); /// Change this to match new function definition!

                // test print -- not good here
                if (token != NULL)
                {
                    for (int i = 1; i < 15; i++)
                        fprintf(stdout, "sucks: %s\n", token[i]); /// Need to give index to each string!
                }

                // do something with the tokens
                // doSomethingWithToken(token);

                /// Clean-up: Free the arrays...
                if (token != NULL)
                {
                    for (int i = 1; i < 15; i++)
                        free(token[i]);
                }

            }
        }
    }
    return 0;///
}

Without a data file to use, I can't test this; also, my MSVC system doesn't have the strtok_r function, but I think you're calling that correctly.

Feel free to ask for further clarification and/or explanation.

Adrian Mole
  • 49,934
  • 160
  • 51
  • 83