-2

How do I extract field = value pairs from documents with the new C++ 11 regex library?

Example of document (treat it as std::string):

Caption = "calc.exe"; CommandLine = "\"C:\Windows\system32\calc.exe\" "; CreationClassName = "Win32_Process"; CreationDate = "20130606162432.173628+240"; CSCreationClassName = "Win32_ComputerSystem" Handle = "13484"; HandleCount = 93;

As output I need to get map:

{"Caption", "calc.exe"}
{"CommandLine", "\"C:\\Windows\\system32\\calc.exe\" "}
{"CreationClassName", "Win32_Process"}
{"CreationDate", "20130606162432.173628+240"}
{"CSCreationClassName", "Win32_ComputerSystem"}
{"Handle", "13484"}
{"HandleCount", "93"}

Code of what I want maybe like that:

Todo

Christian Rau
  • 45,360
  • 10
  • 108
  • 185
Edward83
  • 6,664
  • 14
  • 74
  • 102
  • Couldn't you tokenize by semicolon, then tokenize by spaces? – VoronoiPotato Jun 06 '13 at 12:59
  • @VoronoiPotato could you provide some example code? – Edward83 Jun 06 '13 at 13:04
  • out of curiosity, which compiler are you using? AFAIKnew, `` was broken in gcc – blue Jun 06 '13 at 13:04
  • sorry, I use Visual Studio 2012 Express – Edward83 Jun 06 '13 at 13:07
  • http://stackoverflow.com/questions/236129/splitting-a-string-in-c/237280#237280 – VoronoiPotato Jun 06 '13 at 13:21
  • 1
    Can `"quoted strings"` contain `;`? (unknown from example) Can `"quoted strings have \" escaped quotes"`? (yes) Is the missing `;` before `Handle` in your example missing on purpose, or a tpyo? Are these `string`s produced by machine, or a human typing them in? And how rigid is the format? What needs to be done on errors? – Yakk - Adam Nevraumont Jun 06 '13 at 13:31
  • No, "quoted strings" not contain ; Each pair of field=value separated by ; Example: Field1 = "some_string_value"; Field2 = some_uint_value; – Edward83 Jun 06 '13 at 13:35
  • do you have to use regex, not only it is going to be slower and likely more complicate than a simple string split. – yngccc Jun 06 '13 at 13:47
  • yes, you right, string splitting is good choice, but interesting to do that with new C++ 11 regex and I am new to Reges at all. – Edward83 Jun 06 '13 at 14:02
  • I only read "Todo", but what have you _done_ already? – Sebastian Mach Jun 06 '13 at 16:05
  • @phresnel, I read many comments and nothing with answer; as I see nobody know how to do that! – Edward83 Jun 06 '13 at 17:23
  • @phresnel, do you have solution? – Edward83 Jun 06 '13 at 17:29
  • @Edward83: Typically the questioner should show some effort and demonstrate what he has tried, and then post a _specific_ question. I don't see that you have even _tried_ to find a proper regex, do you have a problem in understanding regexes? Does the asterisk operators struggle you? Don't you find the documentation of C++ regexes? As said, this site targets specific questions, and it is not a free give-me-teh-codez site. – Sebastian Mach Jun 06 '13 at 19:54
  • @Edward83: That said: Yes, I would know the solution. It would help you now. However, it would teach you nothing for the future and you will be asking the same question again (sidenote: I have no evil intention; but I hope you understand what I mean) – Sebastian Mach Jun 06 '13 at 19:54
  • @phresnel and I have no evil intention. I am glad to read your comments;) – Edward83 Jun 07 '13 at 07:25

3 Answers3

1

Create a regular expression that matches a text field followed by an '=' sign followed by a text field followed by a ';'. Create a regex_iterator object that applies that regular expression to the target text. Iterate until done.

Pete Becker
  • 74,985
  • 8
  • 76
  • 165
1

I couldn't make gcc/libstdc++ regex work. The best I could do that seems to solve your problem was something like:

  #include <iostream>
  #include <string>

  using namespace std;

  class unquot {
    string where;
    char what, quote;
  public:
    unquot(char _what = '"', char _quote = '\\') : what(_what), quote(_quote) {}
    string str() { return where; }

    friend istream& operator>>(istream& i, unquot& w) {
      w.where = string();

      char c = i.get();
      if( !i )
        return i;

      if( c != w.what ) {
        i.setstate(ios::failbit);
        i.putback(c);
        return i;
      }

      bool quoted = false;
      for( c = i.get(); i; c = i.get() ) {
        if( quoted ) {
          w.where.push_back(c);
          quoted = false;
        } else if( c == w.quote ) {
          quoted = true;
        } else if( c == w.what ) {
          break;
        } else {
          w.where.push_back(c);
        }
      }

      return i;
    }
  };

  class until {
    string where;
    char what, quote;
  public:
    until(char _what = '"', char _quote = '\\') : what(_what), quote(_quote) {}
    string str() { return where; }

    friend istream& operator>>(istream& i, until& w) {
      w.where = string();

      char c = i.get();
      if( !i )
        return i;

      if( c != w.what ) {
        i.setstate(ios::failbit);
        i.putback(c);
        return i;
      }

      w.where.push_back(c);

      bool quoted = false;
      for( c = i.get(); i; c = i.get() ) {
        w.where.push_back(c);
        if( quoted ) {
          quoted = false;
        } else if( c == w.quote ) {
          quoted = true;
        } else if( c == w.what ) {
          break;
        }
      }

      return i;
    }
  };

  class word {
    string where;
  public:
    word() {}
    string str() { return where; }

    friend istream& operator>>(istream& i, word& w) {
      bool before = true, during = false;
      w.where = string();

      for( char c = i.get(); i; c = i.get() ) {
        bool wordchar = isalnum(c) || (c == '_');
        bool spacechar = isspace(c);
        bool otherchar = !wordchar && !spacechar;

        if( before ) {
          if( wordchar ) {
            swap(before, during);
            w.where.push_back(c);
          } else if( otherchar ) {
            i.setstate(ios::failbit);
            i.putback(c);
            break;
          }
        } else if( during ) {
          if( wordchar ) {
            w.where.push_back(c);
          } else if( otherchar ) {
            i.putback(c);
            break;
          } else {
            during = false;
          }
        } else {
          if( !spacechar ) {
            i.putback(c);
            break;
          }
        }
      }
      return i;
    }
  };

  class skip {
    char which;
  public:
    skip(char _which) : which(_which) {}
    friend istream& operator>>(istream& i, skip& s) {
      bool before = true;
      for( char c = i.get(); i; c = i.get() ) {
        if( c == s.which ) {
          before = false;
        } else if( !isspace(c) ) {
          i.putback(c);
          if( before )
            i.setstate(ios::failbit);
          break;
        }
      }
      return i;
    }
  };



  int main ()
  {
    word w;
    skip eq { '=' };
    unquot q;
    skip semi { ';' };
    while( cin >> w >> eq >> q >> semi ) {
      cout << w.str() << " {" << q.str() << "}" << endl;
    }

    return 0;
  }

You can use "until q;" instead of "unquot q;" if you want to keep the quotes...

Massa
  • 8,647
  • 2
  • 25
  • 26
  • you're welcome. I did some more testing, with g++ and clang++, and libstdc++. It seems that the regex function defs are missing for the version of libstdc++ that I have here. I suppose it will work with libc++ on MacOSX, but I did not have the time to test it. Good luck! – Massa Jun 17 '13 at 14:45
1

I made this work, but it really only worked on clang++ 3.2 with libc++ (ie, on a Mac):

#include <iostream>
#include <string>
#include <regex>

using namespace std;

int main(int, char**) {
  string input;
  getline(cin, input, '\0');
  cout << input;

  regex rx { string { R"---((\w+) *= *([^;]*))---" } };
  for( sregex_iterator E, i { input.begin(), input.end(), rx }; i != E; ++i ) {
    cout << "match: (" << (*i)[1].str() << ")(" << (*i)[2].str() << ")" << endl;
  }

  return 0;
}
Massa
  • 8,647
  • 2
  • 25
  • 26