163

If I have a std::string containing a comma-separated list of numbers, what's the simplest way to parse out the numbers and put them in an integer array?

I don't want to generalise this out into parsing anything else. Just a simple string of comma separated integer numbers such as "1,1,1,1,2,1,1,1,0".

Piku
  • 3,526
  • 6
  • 35
  • 38
  • 1
    for anyone looking for how to parse comma delimited strings https://stackoverflow.com/questions/11719538/how-to-use-stringstream-to-separate-comma-separated-strings – Sam B Sep 04 '21 at 15:49

18 Answers18

186

Input one number at a time, and check whether the following character is ,. If so, discard it.

#include <vector>
#include <string>
#include <sstream>
#include <iostream>

int main()
{
    std::string str = "1,2,3,4,5,6";
    std::vector<int> vect;

    std::stringstream ss(str);

    for (int i; ss >> i;) {
        vect.push_back(i);    
        if (ss.peek() == ',')
            ss.ignore();
    }

    for (std::size_t i = 0; i < vect.size(); i++)
        std::cout << vect[i] << std::endl;
}
L. F.
  • 19,445
  • 8
  • 48
  • 82
user229321
  • 304
  • 1
  • 3
  • 7
172

Something less verbose, std and takes anything separated by a comma.

stringstream ss( "1,1,1,1, or something else ,1,1,1,0" );
vector<string> result;

while( ss.good() )
{
    string substr;
    getline( ss, substr, ',' );
    result.push_back( substr );
}
Robert
  • 37,670
  • 37
  • 171
  • 213
Zoomulator
  • 20,774
  • 7
  • 28
  • 32
70

Yet another, rather different, approach: use a special locale that treats commas as white space:

#include <locale>
#include <vector>

struct csv_reader: std::ctype<char> {
    csv_reader(): std::ctype<char>(get_table()) {}
    static std::ctype_base::mask const* get_table() {
        static std::vector<std::ctype_base::mask> rc(table_size, std::ctype_base::mask());

        rc[','] = std::ctype_base::space;
        rc['\n'] = std::ctype_base::space;
        rc[' '] = std::ctype_base::space;
        return &rc[0];
    }
}; 

To use this, you imbue() a stream with a locale that includes this facet. Once you've done that, you can read numbers as if the commas weren't there at all. Just for example, we'll read comma-delimited numbers from input, and write then out one-per line on standard output:

#include <algorithm>
#include <iterator>
#include <iostream>

int main() {
    std::cin.imbue(std::locale(std::locale(), new csv_reader()));
    std::copy(std::istream_iterator<int>(std::cin), 
              std::istream_iterator<int>(),
              std::ostream_iterator<int>(std::cout, "\n"));
    return 0;
}
Jerry Coffin
  • 476,176
  • 80
  • 629
  • 1,111
  • 1
    Most creative answer I've ever seen! – yoco May 16 '11 at 16:37
  • +1 using only std and it's clean and easy solution. No peeking and ignoring of chars! – kravemir Feb 23 '13 at 14:30
  • 1
    Here is working example if someone wanted to try it: http://ideone.com/RX5o10 – kravemir Feb 23 '13 at 14:40
  • 1
    note that the above example will explode if the input looks like "1, 2, 3, 4, 5..." you have to add the line rc[' '] = ctype_base::space;. Took me awhile to figure it out – aCuria Mar 27 '13 at 19:35
  • this is the real C++ solution. – Slava Sep 19 '14 at 09:22
  • Real power you can see here: `string tmp = "1,2,blab,blub"; int startTime2 = 0; int endTime2 = 0; string startDate2; string endDate2; stringstream ss( tmp ); ss.imbue( std::locale( std::locale(), new csv_reader() ) ); ss >> startTime2 >> endTime2 >> startDate2 >> endDate2;` – tmanthey Aug 18 '15 at 20:12
  • 2
    I'm afraid this solution doesn't support empty string though, they will just be skipped over. Take for instance this input: `1,2,3,,5,6,7`. – Fabio A. Sep 24 '15 at 08:20
45

The C++ String Toolkit Library (Strtk) has the following solution to your problem:

#include <string>
#include <deque>
#include <vector>
#include "strtk.hpp"
int main()
{ 
   std::string int_string = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15";
   std::vector<int> int_list;
   strtk::parse(int_string,",",int_list);

   std::string double_string = "123.456|789.012|345.678|901.234|567.890";
   std::deque<double> double_list;
   strtk::parse(double_string,"|",double_list);

   return 0;
}

More examples can be found Here

  • how is this a solution?? sample.cpp(104): fatal error C1083: Cannot open include file: 'strtk.hpp': No such file or directory – Sam B Sep 04 '21 at 15:38
20

Alternative solution using generic algorithms and Boost.Tokenizer:

struct ToInt
{
    int operator()(string const &str) { return atoi(str.c_str()); }
};

string values = "1,2,3,4,5,9,8,7,6";

vector<int> ints;
tokenizer<> tok(values);

transform(tok.begin(), tok.end(), back_inserter(ints), ToInt());
TC.
  • 4,133
  • 3
  • 31
  • 33
10

Lots of pretty terrible answers here so I'll add mine (including test program):

#include <string>
#include <iostream>
#include <cstddef>

template<typename StringFunction>
void splitString(const std::string &str, char delimiter, StringFunction f) {
  std::size_t from = 0;
  for (std::size_t i = 0; i < str.size(); ++i) {
    if (str[i] == delimiter) {
      f(str, from, i);
      from = i + 1;
    }
  }
  if (from <= str.size())
    f(str, from, str.size());
}


int main(int argc, char* argv[]) {
    if (argc != 2)
        return 1;

    splitString(argv[1], ',', [](const std::string &s, std::size_t from, std::size_t to) {
        std::cout << "`" << s.substr(from, to - from) << "`\n";
    });

    return 0;
}

Nice properties:

  • No dependencies (e.g. boost)
  • Not an insane one-liner
  • Easy to understand (I hope)
  • Handles spaces perfectly fine
  • Doesn't allocate splits if you don't want to, e.g. you can process them with a lambda as shown.
  • Doesn't add characters one at a time - should be fast.
  • If using C++17 you could change it to use a std::stringview and then it won't do any allocations and should be extremely fast.

Some design choices you may wish to change:

  • Empty entries are not ignored.
  • An empty string will call f() once.

Example inputs and outputs:

""      ->   {""}
","     ->   {"", ""}
"1,"    ->   {"1", ""}
"1"     ->   {"1"}
" "     ->   {" "}
"1, 2," ->   {"1", " 2", ""}
" ,, "  ->   {" ", "", " "}
Timmmm
  • 88,195
  • 71
  • 364
  • 509
8

You could also use the following function.

void tokenize(const string& str, vector<string>& tokens, const string& delimiters = ",")
{
  // Skip delimiters at beginning.
  string::size_type lastPos = str.find_first_not_of(delimiters, 0);

  // Find first non-delimiter.
  string::size_type pos = str.find_first_of(delimiters, lastPos);

  while (string::npos != pos || string::npos != lastPos) {
    // Found a token, add it to the vector.
    tokens.push_back(str.substr(lastPos, pos - lastPos));

    // Skip delimiters.
    lastPos = str.find_first_not_of(delimiters, pos);

    // Find next non-delimiter.
    pos = str.find_first_of(delimiters, lastPos);
  }
}
apaderno
  • 28,547
  • 16
  • 75
  • 90
4
std::string input="1,1,1,1,2,1,1,1,0";
std::vector<long> output;
for(std::string::size_type p0=0,p1=input.find(',');
        p1!=std::string::npos || p0!=std::string::npos;
        (p0=(p1==std::string::npos)?p1:++p1),p1=input.find(',',p0) )
    output.push_back( strtol(input.c_str()+p0,NULL,0) );

It would be a good idea to check for conversion errors in strtol(), of course. Maybe the code may benefit from some other error checks as well.

Michael Krelin - hacker
  • 138,757
  • 24
  • 193
  • 173
  • 1
    What a mess! You don't have to put everything on one line. – Timmmm Feb 15 '18 at 12:04
  • @Timmmm and you don't have to tell what I don't have to do, you're not forced to use this code, are you? – Michael Krelin - hacker Feb 15 '18 at 15:32
  • Sorry I didn't mean to offend - just a suggestion that you can make your code a lot easier to understand if you break it out a little, and you're less likely to miss bugs too. I'm not forced to use it but that doesn't mean I can't express an opinion. That's the whole point of SO's voting system. – Timmmm Feb 15 '18 at 15:58
  • 1
    @Timmmm, of course you can, and I'm totally fine with your opinion differing from mine. I personally do find value in compressed code *exactly* because it's easier to read, though in terms of lines per time unit it's slower. I am also conscious of this being *my* view and other people see it differently. I honestly do think their code is a mess, but refrain from stating it :) – Michael Krelin - hacker Feb 15 '18 at 19:34
4

I'm surprised no one has proposed a solution using std::regex yet:

#include <string>
#include <algorithm>
#include <vector>
#include <regex>

void parse_csint( const std::string& str, std::vector<int>& result ) {

    typedef std::regex_iterator<std::string::const_iterator> re_iterator;
    typedef re_iterator::value_type re_iterated;

    std::regex re("(\\d+)");

    re_iterator rit( str.begin(), str.end(), re );
    re_iterator rend;

    std::transform( rit, rend, std::back_inserter(result), 
        []( const re_iterated& it ){ return std::stoi(it[1]); } );

}

This function inserts all integers at the back of the input vector. You can tweak the regular expression to include negative integers, or floating point numbers, etc.

Jonathan H
  • 7,591
  • 5
  • 47
  • 80
3
std::string exp = "token1 token2 token3";
char delimiter = ' ';
std::vector<std::string> str;
std::string acc = "";
for(const auto &x : exp)
{
    if(x == delimiter)
    {
        str.push_back(acc);
        acc = "";
    }
    else
        acc += x;
}
str.push_back(acc);
Richard
  • 56,349
  • 34
  • 180
  • 251
knapcio
  • 331
  • 3
  • 9
2
#include <sstream>
#include <vector>

const char *input = "1,1,1,1,2,1,1,1,0";

int main() {
    std::stringstream ss(input);
    std::vector<int> output;
    int i;
    while (ss >> i) {
        output.push_back(i);
        ss.ignore(1);
    }
}

Bad input (for instance consecutive separators) will mess this up, but you did say simple.

Steve Jessop
  • 273,490
  • 39
  • 460
  • 699
1
bool GetList (const std::string& src, std::vector<int>& res)
  {
    using boost::lexical_cast;
    using boost::bad_lexical_cast;
    bool success = true;
    typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
    boost::char_separator<char> sepa(",");
    tokenizer tokens(src, sepa);
    for (tokenizer::iterator tok_iter = tokens.begin(); 
         tok_iter != tokens.end(); ++tok_iter) {
      try {
        res.push_back(lexical_cast<int>(*tok_iter));
      }
      catch (bad_lexical_cast &) {
        success = false;
      }
    }
    return success;
  }
KeithB
  • 16,577
  • 3
  • 41
  • 45
1

I cannot yet comment (getting started on the site) but added a more generic version of Jerry Coffin's fantastic ctype's derived class to his post.

Thanks Jerry for the super idea.

(Because it must be peer-reviewed, adding it here too temporarily)

struct SeparatorReader: std::ctype<char>
{
    template<typename T>
    SeparatorReader(const T &seps): std::ctype<char>(get_table(seps), true) {}

    template<typename T>
    std::ctype_base::mask const *get_table(const T &seps) {
        auto &&rc = new std::ctype_base::mask[std::ctype<char>::table_size]();
        for(auto &&sep: seps)
            rc[static_cast<unsigned char>(sep)] = std::ctype_base::space;
        return &rc[0];
    }
};
mementum
  • 3,153
  • 13
  • 20
1

This is the simplest way, which I used a lot. It works for any one-character delimiter.

#include<bits/stdc++.h>
using namespace std;

int main() {
   string str;

   cin >> str;
   int temp;
   vector<int> result;
   char ch;
   stringstream ss(str);

   do
   {
       ss>>temp;
       result.push_back(temp);
   }while(ss>>ch);

   for(int i=0 ; i < result.size() ; i++)
       cout<<result[i]<<endl;

   return 0;
}
0

simple structure, easily adaptable, easy maintenance.

std::string stringIn = "my,csv,,is 10233478,separated,by commas";
std::vector<std::string> commaSeparated(1);
int commaCounter = 0;
for (int i=0; i<stringIn.size(); i++) {
    if (stringIn[i] == ",") {
        commaSeparated.push_back("");
        commaCounter++;
    } else {
        commaSeparated.at(commaCounter) += stringIn[i];
    }
}

in the end you will have a vector of strings with every element in the sentence separated by spaces. empty strings are saved as separate items.

tony gil
  • 9,424
  • 6
  • 76
  • 100
0

Simple Copy/Paste function, based on the boost tokenizer.

void strToIntArray(std::string string, int* array, int array_len) {
  boost::tokenizer<> tok(string);
  int i = 0;
  for(boost::tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
    if(i < array_len)
      array[i] = atoi(beg->c_str());
    i++;
}
-1
void ExplodeString( const std::string& string, const char separator, std::list<int>& result ) {
    if( string.size() ) {
        std::string::const_iterator last = string.begin();
        for( std::string::const_iterator i=string.begin(); i!=string.end(); ++i ) {
            if( *i == separator ) {
                const std::string str(last,i);
                int id = atoi(str.c_str());
                result.push_back(id);
                last = i;
                ++ last;
            }
        }
        if( last != string.end() ) result.push_back( atoi(&*last) );
    }
}
Frunsi
  • 7,099
  • 5
  • 36
  • 42
-6
#include <sstream>
#include <vector>
#include <algorithm>
#include <iterator>

const char *input = ",,29870,1,abc,2,1,1,1,0";
int main()
{
    std::stringstream ss(input);
    std::vector<int> output;
    int i;
    while ( !ss.eof() )
    {
       int c =  ss.peek() ;
       if ( c < '0' || c > '9' )
       {
          ss.ignore(1);
          continue;
        }

       if (ss >> i)
       {
          output.push_back(i);
        }

    }

    std::copy(output.begin(), output.end(), std::ostream_iterator<int> (std::cout, " ") );
    return 0;
}