1

I'm trying to parse this string and retrieve the "sid" and the "Type". I have the following code. It is crashing at the get_child line and I'm not entirely sure why...

const boost::property_tree::ptree& empty_ptree(){
static boost::property_tree::ptree t;
return t;
}

int _tmain(int argc, _TCHAR* argv[])
{
struct SXMLElements
{
    std::string strSessionId;
    unsigned int uiTypeOfNotification;
};

std::string strXMLText = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n" 
"<NotificationSet vers=\"1.0\" svcid=\"session\" notid=\"42\">\r\n" "<Notification><![CDATA[<SessionNotification vers=\"1.0\" notid=\"42\">\r\n"
"<Session sid=\"sdfkljdsfkjjsf\">\r\n" "<Property name=\"CharSet\" value=\"UTF-8\"></Property>\r\n" 
"</Session>\r\n" "<Type>5</Type>\r\n" 
"<Time>324242</Time>\r\n" 
"</SessionNotification>]]></Notification>\r\n" 
"</NotificationSet>";

//// Parse the HTTP header Status line.
std::stringstream ss( strXMLText );

boost::property_tree::ptree xmlResponse;
//if (strXMLText.size() > 0)
//{
std::istringstream isResponse (strXMLText);
boost::property_tree::read_xml(isResponse, xmlResponse);
SXMLElements sXmlElem;
//const boost::property_tree::ptree & formats = xmlResponse.get_child("NotificationSet.Notification.Session", empty_ptree());
BOOST_FOREACH( boost::property_tree::ptree::value_type const& v, xmlResponse.get_child("NotificationSet.Notification.SessionNotification.Session") )
{
    sXmlElem.strSessionId = xmlResponse.get<std::string>("<xmlattr>.sid", "");
    sXmlElem.uiTypeOfNotification = xmlResponse.get<unsigned int>("Type", 0);
    //  }
}
//}

return 0;
}

Can anyone spot what I might be doing wrong?

Null
  • 1,950
  • 9
  • 30
  • 33
Suzan Aydın
  • 355
  • 6
  • 17

1 Answers1

0

The Session attribute cd=""\"o=rrs,o=ces,maxtime=""\"64""\" decodes to

cd="o=rrs,o=ces,maxtime="64" 

which leaves invalid XML.

Regardless, all the ""\" could be replaced by just \"

Besides that, a simple test revealed that Boost Property Tree doesn't like <![CDATA[]]> sections, and rightly so: What does <![CDATA[]]> in XML mean?

In an XML document or external parsed entity, a CDATA section is a section of element content that is marked for the parser to interpret as only character data, not markup. wikipedia

SUMMARY

  • Boost Property Tree is not an XML parsing library (xml parsing using boost)
  • CDATA segments are not XML (so even using an XML library you shouldn't expect to parse it directly)

Live On Coliru

#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/xml_parser.hpp>
#include <iostream>

int main()
{
    std::string const strXMLText = R"(<?xml version="1.0" encoding="UTF-8" standalone="yes"?> 
    <NotificationSet vers="1.0" svcid="session" notid="42">
        <Notification><SessionNotification vers="1.0" notid="42">
                <Session sid="sdfkljdsfkjjsf" stype="user" cid="uid=1,u=People,dc=r,dc=c" cd="o=rrs,o=ces,maxtime=64" maxidle="24">
                    <Property name="CharSet" value="UTF-8"></Property>
                    <Property name="ed" value="xxx"></Property>
                    <Property name="Sdle" value="sdl:asdadsad"></Property>
                </Session>
                <Type>5</Type>
                <Time>324242</Time>
        </SessionNotification></Notification>
    </NotificationSet>
    )";

    //// Parse the HTTP header Status line.
    std::stringstream ss(strXMLText);

    boost::property_tree::ptree xmlResponse;

    std::istringstream isResponse (strXMLText);
    boost::property_tree::read_xml(isResponse, xmlResponse);

    if (auto SessionNotification = xmlResponse.get_child_optional("NotificationSet.Notification.SessionNotification"))
    {
        struct SessionElement {
            std::string id;
            unsigned int uiTypeOfNotification;
        };

        if (auto Session = SessionNotification->get_child_optional("Session")) {
            SessionElement elem {
                Session->get("<xmlattr>.sid", ""),
                SessionNotification->get("Type", 0u)
            };

            std::cout << "id: " << elem.id << ", type: " << elem.uiTypeOfNotification << "\n";
        }
    }
}

Prints

id: sdfkljdsfkjjsf, type: 5
Community
  • 1
  • 1
sehe
  • 374,641
  • 47
  • 450
  • 633
  • std::string strXMLText = "\r\n" "\r\n" "<![CDATA[\r\n" "\r\n" "\r\n" "\r\n" "5\r\n" "\r\n" "]]>\r\n" ""; – Suzan Aydın May 08 '15 at 22:18
  • That's what I have now Sehe, but i'm getting an exception still being thrown in the "xmlResponse.get_child("NotificationSet.Notification.SessionNotification.Session")" part..... – Suzan Aydın May 08 '15 at 22:19
  • Because of the CDATA, does that mean i can't use Boost Property Tree? What else could i use? – Suzan Aydın May 08 '15 at 22:28
  • Please, @suzan, update your question. Comments don't work for this - at all. I've updated my answer – sehe May 08 '15 at 22:40
  • Thanks @sehe, i've updated my code. The xml i initially had with the CDATA was from a demo HTTP POST request with content set to xml. Are you saying that when i receive the POST request with the xml content, that the CDATA won't be in there? – Suzan Aydın May 08 '15 at 22:49
  • It probably shouldn't. But I can't tell. Because what you receive depends _only_ on what the server sends. And the server may send you a Shakespeare drama if it so chooses... – sehe May 08 '15 at 22:50
  • Hah... great. So if it DID send it with the CDATA part, then what is the best thing to do...? – Suzan Aydın May 08 '15 at 22:52
  • You use the library that already exists to handle it. Preferrably not in C++. If you must, you grovel and growl and extract the CDATA using [a proper XML library](http://stackoverflow.com/questions/9387610/what-xml-parser-should-i-use-in-c/9387612#9387612) and parse its XML content /after that/. You could consider doing a quick-and-dirty CDATA removal preprocessing step. But it's hard to get right and kills XML compliance anyways – sehe May 08 '15 at 22:54
  • When you say use the library that already exists to handle it, what library are we talking about exactly? – Suzan Aydın May 08 '15 at 22:56
  • Please. I linked to that answer twice now. Read it. Thank you. – sehe May 08 '15 at 22:56
  • Ok so TinyXML is an option. Good... Maybe I can try and rewrite this with TinyXML – Suzan Aydın May 08 '15 at 22:58