0

Here's the contents of a text file:

SQUARE 2
SQUARE
RECTANGLE 4 5

I'm trying to figure out why my strtok() loop won't take the end of the 2ND "SQUARE" and just make the length = 0. Don't fully understand the concept behind strtok either, I wouldn't mind a lecture about strtok. Here's the code:

#include <cstring>
#include <cstdlib>
#include <iostream>
using std::cout;
using std::endl;
using std::cin;
using std::ios;

#include<iomanip>
using std::setprecision;

#include <fstream>
using std::ifstream;

const int MAX_CHARS_PER_LINE = 512;
const int MAX_TOKENS_PER_LINE = 20;
const char* const DELIMITER = " ";

int main()
{
// create a file-reading object
ifstream fin;
fin.open("geo.txt"); // open a file
if (!fin.good()) 
    return 1; // exit if file not found

//PI
float pi = 3.14159265359; 

//DIMENSIONS
float length, width, height, radius;

//AREAS, PERIMETERS, VOLUMES
float areaSquare, periSquare;
float areaRectangle, periRectangle;
float areaCube, volCube;
float areaPrism, volPrism;
float areaCircle, periCircle;
float areaCylinder, volCylinder;

// read each line of the file
while (!fin.eof())
{
    // read an entire line into memory
    char buf[MAX_CHARS_PER_LINE];
    fin.getline(buf, MAX_CHARS_PER_LINE);
    // parse the line into blank-delimited tokens
    int n = 0; // a for-loop index

    // array to store memory addresses of the tokens in buf
    const char* token[MAX_TOKENS_PER_LINE] = {0}; // initialize to 0

    // parse the line
    token[0] = strtok(buf, DELIMITER); // first token
    if (token[0]) // zero if line is blank
    {
        for (n = 1; n < MAX_TOKENS_PER_LINE; n++)
        {
            token[n] = strtok(0, DELIMITER); // subsequent tokens
            if (!token[n] || token[n]==0) break;
        }
    }

    if(strcmp("SQUARE", token[0]) == 0) //1
    {
        length = atof(token[1])?atof(token[1]):0;
        areaSquare = length * length;
        periSquare = 4 * length;

        cout.setf(ios::fixed|ios::showpoint);
        cout << setprecision(2);
        cout << token[0] << ' ' << "length="<< token[1] << ' ';
        cout << "Area=" << areaSquare << ' ';
        cout << "Perimeter=" << periSquare << '\n';
        cout.unsetf(ios::fixed|ios::showpoint);
        cout << setprecision(6);
    }

    else if(strcmp("RECTANGLE", token[0]) == 0) //2
    {
        length = atof(token[1])?atof(token[1]):0;
        width = atof(token[2])?atof(token[2]):0;

        areaRectangle = length * width;
        periRectangle = 2 * length + 2 * width;

        cout.setf(ios::fixed|ios::showpoint);
        cout << setprecision(2);
        cout << token[0] << ' ' << "length="<< token[1] << ' ';
        cout << "width=" << token[2] << ' ' ;
        cout << "Area=" << areaRectangle << ' ';
        cout << "Perimeter=" << periRectangle << '\n';
        cout.unsetf(ios::fixed|ios::showpoint);
        cout << setprecision(6);
        }
    else
    {
        cout << "End of program. Press ENTER to exit.";
        cin.ignore(1000,10);
        break;
}
    }
}
Kerrek SB
  • 464,522
  • 92
  • 875
  • 1,084
刘哲诚
  • 113
  • 2
  • 9
  • 2
    You're using C++. Use std::string. – Paul Tomblin Sep 01 '12 at 00:26
  • `eof` is usually never correct, and you are not checking the return value of your input operations. – Kerrek SB Sep 01 '12 at 00:37
  • Just had a brief look at your code. I am not sure how will your strtok work unless you have a newline as your delimiter as well. Please try putting the delimiter as " \n" (space and newline). – Deepanjan Mazumdar Sep 01 '12 at 00:43
  • what do you mean "return value"? "input operations"? And my code works, kind-of, it can run as long as the input file has the required dimensions (i.e. does not have an abrupt end at a line or a space). How do I put a '\n' as a delimiter? like space and newline together? – 刘哲诚 Sep 01 '12 at 05:12
  • You can look at [Strange `strtok()` behaviour](http://stackoverflow.com/questions/11911884/strange-strtok-behaviour/11912769#11912769) to see a discussion of how `strtok()` works and non-destructive alternatives. While you're using `strtok()`, you are fundamentally working in C mode rather than C++ mode; you're using raw `char *` rather than C++ strings or the like. – Jonathan Leffler Sep 02 '12 at 02:42
  • Please check whichever answer answered your question – std''OrgnlDave Sep 15 '12 at 22:16

4 Answers4

2

Here is a version that works.

Main differences are,

  1. Have changed the array of char * to array of 20 char strings. This guarantees the array elements have memory allocated, in your case they are null pointers and stay this way when strtok returns NULL, you cannot then use a NULL pointer.
  2. The second call to strtok is "strtok(0, DELIMITER)" but should be "strtok(NULL, DELIMITER)".

I think they are the only diffs, but use the diff utility to check.

#include <cstring>
#include <cstdlib>
#include <iostream>
using std::cout;
using std::endl;
using std::cin;
using std::ios;

#include<iomanip>
using std::setprecision;

#include <fstream>
using std::ifstream;

const int MAX_CHARS_PER_LINE = 512;
const int MAX_TOKENS_PER_LINE = 20;
const char* const DELIMITER = " ";

int main()
{
// create a file-reading object
   char *tok;
   ifstream fin;
   fin.open("geo.txt"); // open a file
   if (!fin.good())
       return 1; // exit if file not found

       //PI
       float pi = 3.14159265359;

       //DIMENSIONS
       float length, width, height, radius;

       //AREAS, PERIMETERS, VOLUMES
       float areaSquare, periSquare;
       float areaRectangle, periRectangle;
       float areaCube, volCube;
       float areaPrism, volPrism;
       float areaCircle, periCircle;
       float areaCylinder, volCylinder;

       // read each line of the file
       while (!fin.eof())
       {
           // read an entire line into memory
           char buf[MAX_CHARS_PER_LINE];
           fin.getline(buf, MAX_CHARS_PER_LINE);
           // parse the line into blank-delimited tokens
           int n = 0; // a for-loop index

           // array to store memory addresses of the tokens in buf
//         const char* token[MAX_TOKENS_PER_LINE] = {0}; // initialize to 0
           char token[MAX_TOKENS_PER_LINE][20];
           for (n=0;n<MAX_TOKENS_PER_LINE;n++)
               {
               token[n][0] = NULL;
               }
           // parse the line
           tok = strtok(buf, DELIMITER); // first token
           if (tok == NULL)
               break;
           strcpy(token[0],tok);
           if (token[0]) // zero if line is blank
               {
               for (n = 1; n < MAX_TOKENS_PER_LINE; n++)
                   {
                   tok = strtok(NULL, DELIMITER); // subsequent tokens
                   if (tok == NULL)
                       break;
                   strcpy(token[n],tok);
//                 if (!token[n] || token[n]==0) break;
                   }
               }
           if(strcmp("SQUARE", token[0]) == 0) //1
                {
                length = atof(token[1])?atof(token[1]):0;
                areaSquare = length * length;
                periSquare = 4 * length;

                cout.setf(ios::fixed|ios::showpoint);
                cout << setprecision(2);
                cout << token[0] << ' ' << "length="<< token[1] << ' ';
                cout << "Area=" << areaSquare << ' ';
                cout << "Perimeter=" << periSquare << '\n';
                cout.unsetf(ios::fixed|ios::showpoint);
                cout << setprecision(6);
                }

            else if(strcmp("RECTANGLE", token[0]) == 0) //2
                {
                length = atof(token[1])?atof(token[1]):0;
                width = atof(token[2])?atof(token[2]):0;

                areaRectangle = length * width;
                periRectangle = 2 * length + 2 * width;

                cout.setf(ios::fixed|ios::showpoint);
                cout << setprecision(2);
                cout << token[0] << ' ' << "length="<< token[1] << ' ';
                cout << "width=" << token[2] << ' ' ;
                cout << "Area=" << areaRectangle << ' ';
                cout << "Perimeter=" << periRectangle << '\n';
                cout.unsetf(ios::fixed|ios::showpoint);
                cout << setprecision(6);
            }
        else
            {
            cout << "End of program. Press ENTER to exit.";
            cin.ignore(1000,10);
            break;
            }
        }
}
Alec Danyshchuk
  • 307
  • 2
  • 12
  • Not quite sure what's going on... Well, the problem IS fixed, and for that, thank you very much, I am very grateful, but what's going on here? Why create a char * array; guarantees array elements have memory allocated? Like by setting an array in the first place? I don't know, but, it seems like the program is only using that space once? token[n][0]; I tried setting it to one, but it doesn't seem to work.. Hmm... and what's a diff utility..? – 刘哲诚 Sep 01 '12 at 05:07
  • `strtok(0, DELIMITER)` and `strtok(NULL, DELIMITER)` are equivalent. In C++11 you could use `std::nullptr`. You should check that the tokens are short enough before copying them into the `token` array. – Jonathan Leffler Sep 02 '12 at 02:48
2

Your segmentation fault is caused by this:

length = atof(token[1])?atof(token[1]):0;

You made the mistake of assuming that token[1] was tokenized. If you look at your 2nd 'SQUARE' you'll find that for that line it will have set token[1] to NULL. You then pass NULL to atof() which understandably errors out.

You're also using strtok() improperly. There is no reason to strcpy() from its result, because strtok() itself is a destructive operation.

So here's a lecture about strtok.

Firstly, it's evil, but so handy that you use it anyway sometimes. Tokenizers can be a pain in the butt to write.

The idea behind strtok was to create an easy tokenizer. A tokenizer is a pain in the butt to write, and the interface for it is actually fairly decent if you don't mind making it really easy to blow your computer up with it. You can use a very small amount of code to parse command line arguments, for example.

However, strtok is destructive to the string you use it on. It will replace the token that it finds with a 0, automatically null-terminating the returned value. That means that you can directly use the returned string without needing to copy it. A string like this:

here are spaces0

Is changed into

here0are0spaces0

where 0 delimits end of string character (0). This is done in place, and you get pointers to here, are, and spaces.

strtok uses static variables - meaning it retains state information between calls. On the first call you pass it a pointer to the string you're trying to tokenize; from then on, you pass it a NULL pointer to signal that you want it to continue where it left off before. It returns the next token, returning NULL when it finds the end of the string.

An strtok loop is very easy to write. This code will tokenize a string for you properly. The following example code is ugly, but I blame being tired.

char *input_string;        // Just so we have it
const int MAX_TOKENS = 10; // Arbitrary number
char *tokens[MAX_TOKENS];  // This will do all the storage we need.
tokens[0] = strtok(input_string, " -=\""); // Setup call.
int number_of_tokens = 1;  // We've already filled tokens[0], so we have 1 token. We want to start filling from 1.

do {
    if (tokens[number_of_tokens] = strtok(NULL," -=\"")) number_of_tokens++;
    else break;
} while(number_of_tokens < MAX_TOKENS);

That first line in the loop is common practice for C programmers, but is ugly for readability. Here's what it does:

a) It sets tokens[number_of_tokens] to the return value of strtok. b) If that is NULL, it terminates the loop (second line). addendnum: there is an inline test. You can do if (a = 1) and it will return true and set a to 1. You can do if (a = 0) it will return false while setting a to 0. This line takes advantage of that fact, if strtok() returns NULL, well, that's false. c) If that is not NULL, tokens[number_of_tokens] now contains a pointer to the next token found in the string. d) Since a token was found, the number of tokens (number_of_tokens) is incremented. 5) It reuses the variable that keeps count of how many tokens there are as an index into the array of pointers that it keeps. 6) It loops infinitely until it either meets the condition of strtok returning NULL, or the while() condition (in this case, there are more than 10 tokens).

If it was given this string:

here are some=words0

It would be

*tokens[0]="here"
*tokens[1]="are"
*tokens[2]="some"
*tokens[3]="words"
*tokens[4] = NULL
number_of_tokens = 4

As you can see, there's no need to copy anything, because that string is replaced in memory as such:

here0are0some0words0

where 0 delimits end of string character (0).

I hope this answers your questions.

std''OrgnlDave
  • 3,912
  • 1
  • 25
  • 34
  • I'm not too sure what you meant by this: "addendnum: there is an inline test. You can do if (a = 1) and it will return true and set a to 1. You can do if (a = 0) it will return false while setting a to 0." first of all, my English is terrible, I don't know what addendum means. That aside, a = 1, a = 0? And pardon me if I'm wrong, but is it if (tokens[number_of_tokens] = strtok(NULL," -=\"")) [using '='] or if(tokens[number_of_tokens] == strtok(NULL," -=\"")) [using '=='] – 刘哲诚 Sep 17 '12 at 22:54
0

Ok. When your line

const char* token[MAX_TOKENS_PER_LINE] = {0};

creates an array of pointers, but none of them point to anything. The first element is set to 0 (which is a NULL address) and the rest are not initialised. When you run and process line 2 (which has 1 element) token[0] points to 'SQUARE' but token[1] is given the value 0x00 (NULL). This is an invalid memory location. You then process token[1] with the line

length = atof(token[1])?atof(token[1]):0;

and this causes a Segmentation fault because token[1] is a NULL pointer. In my version token[1] is a valid pointer to a NULL string, which sets length to 0. I suggest you compile with the -g flag (eg g++ -g test.cpp -o test). Then call 'gdb test' and use break, run, continue commands to step through the code. You can use the print command to display the contents of variables.

In the first run in gdb just enter 'run'. This will fail, then enter 'bt' which will tell you the failing line, let's call it linenumber.

In the second run enter 'break linenumber' and then 'run' and the execution will stop on the failing line but before it is executed. You can then look at the contents of the variables which will give you a big clue to why it is failing.

Alec Danyshchuk
  • 307
  • 2
  • 12
0

Here is some working C++ based closely on your code.

I've revised the I/O handling; fin.getline() reports whether it got a line or not, so it should be used to control the loop; fin.eof() is a red flag warning in my estimation (as is feof(fp) in C).

The core dump occurs because you don't check that you got a length token after the word SQUARE. The revised code checks that it got exactly the correct number of tokens, complaining if not. The code using strtok() has been unified into a single loop; it contains a diagnostic print statement that shows the token just found (valuable for checking what's going on).

I removed a pile of unused variables; each variable is defined and initialized in the calculation blocks.

There are endless possible reservations about using C strings and strtok() in C++ (the printing would be a lot more succinct if all the code were written in C using the C standard I/O functions like printf()). You can find a discussion of the alternatives to strtok() at Strange strtok() error. You can find another discussion on why strtok() is a disaster in a library function at Reading user input and checking the string.

Working code for the 3 lines of data in the question

#include <cstring>
#include <cstdlib>
#include <iostream>
using std::cout;
using std::endl;
using std::cin;
using std::ios;
using std::cerr;

#include<iomanip>
using std::setprecision;

#include <fstream>
using std::ifstream;

const int MAX_CHARS_PER_LINE = 512;
const int MAX_TOKENS_PER_LINE = 20;
const char* const DELIMITER = " ";

int main()
{
    // create a file-reading object
    const char *fname = "geo.txt";
    ifstream fin;
    fin.open(fname); // open a file
    if (!fin.good()) 
    {
        cerr << "Failed to open file " << fname << endl;;
        return 1; // exit if file not found
    }

    // read each line of the file
    char buf[MAX_CHARS_PER_LINE];
    while (fin.getline(buf, sizeof(buf)))
    {
        int n = 0;
        const char *token[MAX_TOKENS_PER_LINE] = {0};
        char *position = buf;

        while ((token[n] = strtok(position, DELIMITER)) != 0)
        {
            cout << "Token " << n << ": " << token[n] << endl;
            n++;
            position = 0;
        }

        if (strcmp("SQUARE", token[0]) == 0 && n == 2)
        {
            float length = atof(token[1])?atof(token[1]):0;
            float areaSquare = length * length;
            float periSquare = 4 * length;
            cout.setf(ios::fixed|ios::showpoint);
            cout << setprecision(2);
            cout << token[0] << ' ' << "length="<< token[1] << ' ';
            cout << "Area=" << areaSquare << ' ';
            cout << "Perimeter=" << periSquare << '\n';
            cout.unsetf(ios::fixed|ios::showpoint);
            cout << setprecision(6);
        }
        else if (strcmp("RECTANGLE", token[0]) == 0 && n == 3)
        {
            float length = atof(token[1])?atof(token[1]):0;
            float width = atof(token[2])?atof(token[2]):0;
            float areaRectangle = length * width;
            float periRectangle = 2 * length + 2 * width;
            cout.setf(ios::fixed|ios::showpoint);
            cout << setprecision(2);
            cout << token[0] << ' ' << "length="<< token[1] << ' ';
            cout << "width=" << token[2] << ' ' ;
            cout << "Area=" << areaRectangle << ' ';
            cout << "Perimeter=" << periRectangle << '\n';
            cout.unsetf(ios::fixed|ios::showpoint);
            cout << setprecision(6);
        }
        else
        {
            cout << "Unrecognized data: " << buf << endl;
        }
    }
}
Community
  • 1
  • 1
Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278