1
char line[255];
char *token = NULL;
char *line2 = NULL;
char *temporaryToken = NULL;

if( scanf(" %[^\n]", line) > 0)
    token = strtok( line, ";" ); //divide the line by ;
    do
    {
        line2 = token;
        temporaryToken = strtok(line2, " ");
        do
        {
            //divide the line2 by spaces into command and args, not the question here]
            temporaryToken = strtok( NULL, " " );
        }while (temporaryToken != NULL );
        token = strtok( NULL, ";" );
    }while(token != NULL);

this is not my code verbatim, by the way, just an example of how it's set out

In my program, when I print the "token" variable before I split a second time, it'll print out everything until the ; character.

For example, say stdIn took in "ls -la; mkdir lololol; ls -la", it would print "ls -la". But then, after the second split, printing "token" would only print "ls".

Why is this, and how could I go about fixing it?

assylias
  • 321,522
  • 82
  • 660
  • 783
Sam P
  • 453
  • 6
  • 19

2 Answers2

4

strtok modifies the original string. If you want to mix calls like that, you either need to make a copy or use strtok_r.

Karl Bielefeldt
  • 47,314
  • 10
  • 60
  • 94
  • How would I go about doing this? I'm a complete newb to C. Would I create another string called tokenizedLine and make temporaryTokens based upon that? – Sam P Aug 11 '12 at 05:38
1

There are two problems with strtok().

  1. It modifies its input string.
  2. Only one set of strtok() calls can be active at a time.

I think your problem is the latter. You also have an indentation problem in the code:

if (scanf(" %[^\n]", line) > 0)
    token = strtok( line, ";" );
do
{
    line2 = token;
    temporaryToken = strtok(line2, " ");
    do
    {
        //divide the line2 by spaces into command and args, not the question here]
        temporaryToken = strtok(NULL, " ");
    } while (temporaryToken != NULL);
    token = strtok( NULL, ";" );
} while(token != NULL);

You probably intended it to read:

if (scanf(" %[^\n]", line) > 0)
{
    token = strtok(line, ";");
    do
    {
        line2 = token;
        temporaryToken = strtok(line2, " ");
        do
        {
            //divide the line2 by spaces into command and args, not the question here]
            temporaryToken = strtok(NULL, " ");
        } while (temporaryToken != NULL);
        token = strtok(NULL, ";");
    } while (token != NULL);
}

Assuming this is what you intended, you still have the problem that there is one strtok() running on line, and then a second one running on line2. The trouble is, the loop on line2 completely wrecks the interpretation of line. You can't use the nested loops with strtok().

If you must use something like strtok(), then look for either POSIX strtok_r() or Microsoft's strtok_s() (but note that the C11 standard Annex K version of strtok_s() is different — see Do you use the TR 24731 'safe' functions?).

if (scanf(" %[^\n]", line) > 0)
{
    char *end1;
    token = strtok_r(line, ";", &end1);
    do
    {
        char *end2;
        line2 = token;
        temporaryToken = strtok_r(line2, " ", &end2);
        do
        {
            //divide the line2 by spaces into command and args, not the question here]
            temporaryToken = strtok_r(NULL, " ", &end2);
        } while (temporaryToken != NULL);
        token = strtok_r(NULL, ";", &end1);
    } while (token != NULL);
}

About the Comments

While you use strtok() or one of its relatives, the input string will be modified, and if you have multiple delimiters, you will not be able to tell which delimiter was present. You can work with a copy of the string, and do comparisons (usually based on offsets from the start of the string).

Within the limits of using strtok_r(), the solution above 'works'. Here's a test program to demonstrate:

#include <stdio.h>
#include <string.h>

int main(void)
{
    char line[1024];

    if (scanf(" %[^\n]", line) > 0)
    {
        char *end1;
        char *token;
        printf("Input: <<%s>>\n", line);
        token = strtok_r(line, ";", &end1);
        do
        {
            char *end2;
            char *line2 = token;
            char *temporaryToken;
            printf("Token1: <<%s>>\n", token);
            temporaryToken = strtok_r(line2, " ", &end2);
            do
            {
                printf("Token2: <<%s>>\n", temporaryToken);
                //divide the line2 by spaces into command and args, not the question here]
                temporaryToken = strtok_r(NULL, " ", &end2);
            } while (temporaryToken != NULL);
            token = strtok_r(NULL, ";", &end1);
        } while (token != NULL);
    }

    return 0;
}

Example input and output:

$ ./strtok-demo
ls -la; mkdir lololol; ls -la
Input: <<ls -la; mkdir lololol; ls -la>>
Token1: <<ls -la>>
Token2: <<ls>>
Token2: <<-la>>
Token1: << mkdir lololol>>
Token2: <<mkdir>>
Token2: <<lololol>>
Token1: << ls -la>>
Token2: <<ls>>
Token2: <<-la>>
$

Alternative using strcspn() and strspn()

If you don't want to demolish the original string, you must use other functions than the strtok() family. The functions strcspn() and strspn() are suitable; they are part of Standard C (C89 and later versions), albeit much less well known than some of the other functions. But they're spot on for this task.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static char *substrdup(const char *src, size_t len);

int main(void)
{
    char line[1024];

    if (scanf(" %[^\n]", line) > 0)
    {
        char *start1 = line;
        size_t len1;
        printf("Input: <<%s>>\n", line);
        while ((len1 = strcspn(start1, ";")) != 0)
        {
            char *copy = substrdup(start1, len1);
            char *start2 = copy;
            size_t len2;
            printf("Token1: %zd <<%.*s>>\n", len1, (int)len1, start1);
            printf("Copy: <<%s>>\n", copy);
            start2 += strspn(start2, " ");      // Skip leading white space
            while ((len2 = strcspn(start2, " ")) != 0)
            {
                printf("Token2: %zd <<%.*s>>\n", len2, (int)len2, start2);
                start2 += len2;
                start2 += strspn(start2, " ");
            }
            free(copy);
            start1 += len1;
            start1 += strspn(start1, ";");
        }
        printf("Check: <<%s>>\n", line);
    }

    return 0;
}

#include <assert.h>

static char *substrdup(const char *src, size_t len)
{
    char *copy = malloc(len+1);
    assert(copy != 0);              // Apalling error handling strategy
    memmove(copy, src, len);
    copy[len] = '\0';
    return(copy);
}

Example input and output:

$ strcspn-demo
ls -la; mkdir lololol; ls -la
Input: <<ls -la; mkdir lololol; ls -la>>
Token1: 140734970342872 <<>>
Copy: <<ls -la>>
Token2: 2 <<ls>>
Token2: 3 <<-la>>
Copy: << mkdir lololol>>
Token2: 5 <<mkdir>>
Token2: 7 <<lololol>>
Copy: << ls -la>>
Token2: 2 <<ls>>
Token2: 3 <<-la>>
Check: <<ls -la; mkdir lololol; ls -la>>
$

This code goes back to the more comfortable while loop, rather than needing to use do-while loops, which is a benefit.

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
  • Do I then have to worry about managing &end1 and &end2? that was the only thing I didn't know about strtok_r – Sam P Aug 11 '12 at 07:36
  • ALSO: I added your strtok_r implementation but it didn't work - there was still the issue of token being altered. Any ideas? – Sam P Aug 11 '12 at 07:51
  • Note that once you get to the point of supporting quoted string arguments, any technique based on `strtok` ceases to be appropriate. However, that is for the future, not an immediate problem. (You can use a `strcspn`-based method as long as your character class strings contain all the relevant special characters (e.g. single quote, double quote, back quote, dollar, backslash) as well as the basic white space characters (the code should probably accept tab as separator), and you look at what marked the end to decide what to do next. The code above didn't have to worry about all that. – Jonathan Leffler Aug 19 '12 at 15:55