I need to detect in C++ all possible C/C++ string constants:
std::string s = "dummy text"; // comment
std::string s = "dummier text about \"nothing\""; // don't worry
std::string multiLineString = "dummy multiline \
"another line";
std::string s1="aaa", s2="bbb";
std::string multiString="aaa" "bbb";
std::string division="a/b=c";
but also:
char c = '"';
char c = '\t';
char c = '\'';
char c = '\\';
from code above I want to extract:
"dummy text"
"dummier text about \"nothing\""
"dummy multiline \
"aaa"
"a/b=c"
'"'
'\t'
'\''
'\\'
NOTE: I process text line by line, so I need just first string from each line, e.g. only: "dummy multiline \
so first I've tries, then Alan's solution was very useful:
Finding quoted strings with escaped quotes in C# using a regular expression
Finally I managed to create program like that:
#include <iostream>
#include <string>
#include <boost/regex.hpp>
boost::regex regex2quotes;
void initRegex()
{
std::string notDQuota = "((?!\\\\).)*?";
std::string dQuota = "[\\\"]";
std::string notSQuota = "((?!\\\\).){1,2}?";
std::string sQuota = "[']";
std::string dQuotaExpression = '(' + dQuota + notDQuota + dQuota + ')';
std::string sQuotaExpression = '(' + sQuota + notSQuota + sQuota + ')';
std::string finalExpression = dQuotaExpression + '|' + sQuotaExpression;
std::cout << "Regex>>>>" << finalExpression << "<<<<<\n\n";
regex2quotes = finalExpression;
}
void checkIfFound(std::string text)
{
std::cout << "text>>>>>" << text << "<<<\n";
boost::smatch result;
bool found = boost::regex_search(text, result, regex2quotes);
if(found)
std::cout << "Found====" << result[0] << "====\n";
else
std::cout << "!!!Text not found in: " << text << std::endl;
}
int main(int argc, char *argv[])
{
initRegex();
checkIfFound("std::string s = \"dummy text\"; // comment");
checkIfFound("std::string s = \"dummier text about \\\"nothing\\\"\"; // don't worry");
checkIfFound("std::string multiLineString = \"dummy \\\n\
\"another line\";");
checkIfFound("std::string s1=\"aaa\", s2=\"bbb\";");
checkIfFound("std::string multiString=\"aaa\" \"bbb\";");
checkIfFound("std::string division=\"a/b=c\";");
checkIfFound("\"text\";");
checkIfFound("char c = '\"';");
checkIfFound("char c = '\n';");
checkIfFound("char c = '\\\'';");
checkIfFound("char c = '\\\\';");
return 0;
}
Unfortunately it doesn't extract all test cases I need, output:
Regex>>>>([\"]((?!\\).)*?[\"])|([']((?!\\).){1,2}?['])<<<<<
text>>>>>std::string s = "dummy text"; // comment<<<
Found===="dummy text"====
text>>>>>std::string s = "dummier text about \"nothing\""; // don't worry<<<
Found====""====
text>>>>>std::string multiLineString = "dummy \
"another line";<<<
Found===="another line"====
text>>>>>std::string s1="aaa", s2="bbb";<<<
Found===="aaa"====
text>>>>>std::string multiString="aaa" "bbb";<<<
Found===="aaa"====
text>>>>>std::string division="a/b=c";<<<
Found===="a/b=c"====
text>>>>>"text";<<<
Found===="text"====
text>>>>>char c = '"';<<<
Found===='"'====
text>>>>>char c = ' ';<<<
Found====' '====
text>>>>>char c = '\'';<<<
!!!Text not found in: char c = '\'';
text>>>>>char c = '\\';<<<
!!!Text not found in: char c = '\\';
Could you please give me some advices? Is it possible to detect its with regex?