1

I have a comma-delimited string that I want to store in a string vector. The string and vectors are:

string s = "1, 10, 'abc', 'test, 1'";
vector<string> v;

Ideally I want the strings 'abc' and 'test, 1' to be stored without the single quotes as below, but I can live with storing them with single quotes:

v[0] = "1";
v[1] = "10";
v[2] = "abc";
v[3] = "test, 1";
Remy Lebeau
  • 555,201
  • 31
  • 458
  • 770
Nelson
  • 11
  • 1
  • Does this answer your question? [How do I iterate over the words of a string?](https://stackoverflow.com/questions/236129/how-do-i-iterate-over-the-words-of-a-string) – Quimby Mar 28 '21 at 19:26
  • 2
    Just so you're thinking all the way through this, also think about when you need both a quote and a double quote in your word (not just commas). Think of C++ grammar, how it uses a backslash double-quote as an escape sequence for inserting double quotes. Your file needs its own grammar, and your logic needs to comprehend that grammar (not just for commas). – franji1 Mar 28 '21 at 20:07

3 Answers3

1

What you need to do here, is make yourself a parser that parses as you want it to. Here I have made a parsing function for you:

#include <string>
#include <vector>
using namespace std;

vector<string> parse_string(string master) {
    char temp; //the current character
    bool encountered = false; //for checking if there is a single quote
    string curr_parse; //the current string
    vector<string>result; //the return vector

    for (int i = 0; i < master.size(); ++i) { //while still in the string
        temp = master[i]; //current character
        switch (temp) { //switch depending on the character

        case '\'': //if the character is a single quote
            
            if (encountered) encountered = false; //if we already found a single quote, reset encountered
            else encountered = true; //if we haven't found a single quote, set encountered to true
            [[fallthrough]];

        case ',': //if it is a comma

            if (!encountered) { //if we have not found a single quote
                result.push_back(curr_parse); //put our current string into our vector

                curr_parse = ""; //reset the current string
                break; //go to next character
            }//if we did find a single quote, go to the default, and push_back the comma
            [[fallthrough]];

        default: //if it is a normal character
            if (encountered && isspace(temp)) curr_parse.push_back(temp); //if we have found a single quote put the whitespace, we don't care
            else if (isspace(temp)) break; //if we haven't found a single quote, trash the  whitespace and go to the next character
            else if (temp == '\'') break; //if the current character is a single quote, trash it and go to the next character.
            else curr_parse.push_back(temp); //if all of the above failed, put the character into the current string
            break; //go to the next character
        }
    }
    for (int i = 0; i < result.size(); ++i) { 
        if (result[i] == "") result.erase(result.begin() + i);  
        //check that there are no empty strings in the vector
        //if there are, delete them
    }
    return result;
}

This parses your string as you want it to, and returns a vector. Then, you can use it in your program:

#include <iostream>
int main() {
    string s = "1, 10, 'abc', 'test, 1'";
    vector<string> v = parse_string(s);

    for (int i = 0; i < v.size(); ++i) {
        cout << v[i] << endl;
    }
}

and it properly prints out:

1
10
abc
test, 1
  • 1
    There are no `break` for quote case, I suggest to use [`[[fallthrough]]`](https://en.cppreference.com/w/cpp/language/attributes/fallthrough) if it is intentional. – Jarod42 Mar 29 '21 at 08:34
  • Yes, it is intentional, I will add it in a bit –  Mar 29 '21 at 15:15
1
bool nextToken(const string &s, string::size_type &start, string &token)
{
    token.clear();
    
    start = s.find_first_not_of(" \t", start);
    if (start == string::npos)
        return false;
    
    string::size_type end;
    
    if (s[start] == '\'')
    {
        ++start;
        end = s.find('\'', start);
    }
    else
        end = s.find_first_of(" \t,", start);
    
    if (end == string::npos)
    {
        token = s.substr(start);
        start = s.size();
    }
    else
    {
        token = s.substr(start, end-start);
        if ((s[end] != ',') && ((end = s.find(',', end + 1)) == string::npos))
            start = s.size();
        else
            start = end + 1;
    }
    
    return true;
}
string s = "1, 10, 'abc', 'test, 1'", token;
vector<string> v;
 
string::size_type start = 0;
while (nextToken(s, start, token))
    v.push_back(token);

Demo

Remy Lebeau
  • 555,201
  • 31
  • 458
  • 770
0

A proper solution would require a parser implementation. If you need a quick hack, just write a cell reading function (demo). The c++14's std::quoted manipulator is of great help here. The only problem is the manipulator requires a stream. This is easily solved with istringstream - see the second function. Note that the format of your string is CELL COMMA CELL COMMA... CELL.

istream& get_cell(istream& is, string& s)
{
  char c;
  is >> c; // skips ws
  is.unget(); // puts back in the stream the last read character

  if (c == '\'')
    return is >> quoted(s, '\'', '\\'); // the first character of the cell is ' - read quoted
  else
    return getline(is, s, ','), is.unget(); // read unqoted, but put back comma - we need it later, in get function
}


vector<string> get(const string& s)
{
  istringstream iss{ s };
  string cell;
  vector<string> r;
  while (get_cell(iss, cell))
  {
    r.push_back( cell );
    char comma;
    iss >> comma; // expect a cell separator
    if (comma != ',')
      break; // cell separator not found; we are at the end of stream/string - break the loop
  }

  if (char c; iss >> c) // we reached the end of what we understand - probe the end of stream
    throw "ill formed";

  return r;
}

And this is how you use it:

int main()
{
  string s = "1, 10, 'abc', 'test, 1'";
  try
  {
    auto v = get(s);
  }
  catch (const char* e)
  {
    cout << e;
  }
}
zdf
  • 4,382
  • 3
  • 18
  • 29