I post the c++ version for Windows which originally came from @Ben Anderson's C# solution.
Note, the code isn't quite robust yet. Also all the leading and ending newlines would be trimmed.
// The trimming method comes from https://stackoverflow.com/a/1798170/1613961
wstring trim(const std::wstring& str, std::wstring& newline = L"\r\n")
{
const auto strBegin = str.find_first_not_of(newline);
if (strBegin == std::string::npos)
return L""; // no content
const auto strEnd = str.find_last_not_of(newline);
const auto strRange = strEnd - strBegin + 1;
return str.substr(strBegin, strRange);
}
wstring HtmlToText(wstring htmlTxt) {
std::wregex stripFormatting(L"<[^>]*(>|$)"); //match any character between '<' and '>', even when end tag is missing
wstring s1 = std::regex_replace(htmlTxt, stripFormatting, L"");
wstring s2 = trim(s1);
wstring s3 = std::regex_replace(s2, std::wregex(L"\\ "), L" ");
return s3;
}