0

I have been struggling with this for quite some time now.

I want to convert the html to xml. The structures is shown below.

I am using "HtmlAgilityPack" to convert the html to valid xml structure. So, after this, my HTML looks like this:

<div class="menuItem1" video="" preview="">
    Menu 1
    <div class="subMenu1">
        <div class="menuItem2" video="" preview="">
            Menu 2
            <div class="subMenu2">
                <div class="menuItem3" video="" preview="">
                    Menu 3
                    <div class="subMenu3">
                        <div class="" video="" preview="">Menu 4</div>
                    </div>
                    <div class="treeExpand"></div>
                </div>
                <div class="menuItem3" video="" preview="">Menu 3</div>
                <div class="menuItem3" video="" preview="">Menu 3</div>
            </div>
            <div class="treeExpand"></div>
        </div>
    </div>
    <div class="treeExpand"></div>
</div>
<div class="menuItem1" video="" preview="">
    Menu 1
    <div class="subMenu1">
        <div class="menuItem2" video="" preview="">
            Menu 2
            <div class="subMenu2">
                <div class="menuItem3" video="" preview="">
                    Menu 3
                    <div class="subMenu3">
                        <div class="" video="" preview="">Menu 4</div>
                    </div>
                    <div class="treeExpand"></div>
                </div>
                <div class="menuItem3" video="" preview="">Menu 3</div>
                <div class="menuItem3" video="" preview="">Menu 3</div>
            </div>
            <div class="treeExpand"></div>
        </div>
    </div>
    <div class="treeExpand"></div>
</div>

Which is exactly what I want. Now I can get this into an XElement, using this C# code:

XDocument doc = XDocument.Parse(THE_HTML_STRING_AS_SHOWN_ABOVE);
XDocument docw = new XDocument(new XElement("Navigation", doc.Root));
XElement root = docw.Root;

I created a method, which I can pass the root into:

GenerateXmlFromHtml(root);

The code for this method:

private string GenerateXmlFromHtml(XElement elem)
{
    StringBuilder sbNavigationXml = new StringBuilder();
    try
    {
        //HTML will always have a video and preview, according to the generation of the html structure.

        string text = string.Empty;
        string videopath = string.Empty;
        string previewpath = string.Empty;
        XText textNode;

        foreach (XElement element in elem.Elements())
        {
            element.Name = "MenuItem"; //Change element name.

            string htmlClass;
            try { htmlClass = element.Attribute("class").Value; }
            catch { htmlClass = ""; }

            if (!string.IsNullOrEmpty(htmlClass))
            {
                if (htmlClass.Contains("subMenu"))
                {
                    element.AddBeforeSelf(element.Elements());
                    element.Remove();
                    GenerateXmlFromHtml(element);
                }
                else if (htmlClass.Contains("menuItem"))
                {
                    textNode = element.Nodes().OfType<XText>().FirstOrDefault();
                    text = textNode.Value;
                    videopath = element.Attribute("video").Value;
                    previewpath = element.Attribute("preview").Value;

                    if (element.HasElements)
                    {
                        sbNavigationXml.AppendLine("<MenuItem Text=\"" + text + "\" VideoPath=\"" + videopath + "\" PreviewPath=\"" + previewpath + "\">");
                        sbNavigationXml.AppendLine(GenerateXmlFromHtml(element));
                        sbNavigationXml.AppendLine("</MenuItem>");
                    }
                    else
                    {
                        sbNavigationXml.AppendLine("<MenuItem Text=\"" + text + "\" VideoPath=\"" + videopath + "\" PreviewPath=\"" + previewpath + "\" />");
                    }
                }
                else if (htmlClass.Contains("treeExpand"))
                {
                    element.AddBeforeSelf(element.Elements());
                    element.Remove();
                    GenerateXmlFromHtml(element);
                }
            }
            else
            {
                element.AddBeforeSelf(element.Elements());
                element.Remove();
                GenerateXmlFromHtml(element);
            }
        }
    }
    catch (Exception)
    {
        throw;
    }
    return sbNavigationXml.ToString();
}

At the end, I want this to produce this XML output:

<Navigation>
  <MenuItem Text="Menu 1" VideoPath="" PreviewPath="">
    <MenuItem Text="Menu 2">
      <MenuItem Text="Menu 3">
        <MenuItem Text="Menu 4" VideoPath="" PreviewPath="" />
      </MenuItem>
      <MenuItem Text="Menu 3" />
      <MenuItem Text="Menu 3" />
    </MenuItem>
  </MenuItem>
  <MenuItem Text="Menu 1" VideoPath="" PreviewPath="">
    <MenuItem Text="Menu 2">
      <MenuItem Text="Menu 3">
        <MenuItem Text="Menu 4" VideoPath="" PreviewPath="" />
      </MenuItem>
      <MenuItem Text="Menu 3" />
      <MenuItem Text="Menu 3" />
    </MenuItem>
  </MenuItem>
</Navigation>

In other words, the sub menu's should fall away, as well as the tree expand divs, and I then want to produce the XML, but at the moment, I'm still failing miserably. Please ask if something is not clear. Any help appreciated!!!

===================================================================================================

EDIT: The fixed recursive method, for anyone who want to see:

private string GenerateXmlFromHtml(XElement elem)
{
    //HTML will always have a video and preview, according to the generation of the html structure.
    StringBuilder sbNavigationXml = new StringBuilder();
    string text = string.Empty;
    string videopath = string.Empty;
    string previewpath = string.Empty;
    XText textNode;

    try
    {
        foreach (XElement element in elem.Elements())
        {
            //element.Name = "MenuItem"; //Change element name.
            string htmlClass;
            try { htmlClass = element.Attribute("class").Value; }
            catch { htmlClass = ""; }

            if (!string.IsNullOrEmpty(htmlClass))
            {
                if (htmlClass.Contains("subMenu"))
                {
                    if (element.HasElements)
                    {
                        sbNavigationXml.AppendLine(GenerateXmlFromHtml(element));
                    }
                }
                else if (htmlClass.Contains("menuItem"))
                {
                    textNode = element.Nodes().OfType<XText>().FirstOrDefault(); //Get node Text attribute value.
                    text = textNode.Value;
                    videopath = element.Attribute("video").Value; //Get node VideoPath attribute value.
                    previewpath = element.Attribute("preview").Value; //Get node PreviewPath attribute value.

                    if (element.HasElements)
                    {
                        sbNavigationXml.AppendLine("<MenuItem Text=\"" + text + "\" VideoPath=\"" + videopath + "\" PreviewPath=\"" + previewpath + "\">");
                        sbNavigationXml.AppendLine(GenerateXmlFromHtml(element));
                        sbNavigationXml.AppendLine("</MenuItem>");
                    }
                    else
                    {
                        sbNavigationXml.AppendLine("<MenuItem Text=\"" + text + "\" VideoPath=\"" + videopath + "\" PreviewPath=\"" + previewpath + "\" />");
                    }
                }
                else if (htmlClass.Contains("treeExpand"))
                {
                    //DO NOTHING
                }
            }
            else
            {
                if (element.HasElements)
                {
                    sbNavigationXml.AppendLine(GenerateXmlFromHtml(element));
                }
            }
        }
    }
    catch (Exception)
    {
        throw;
    }
    return sbNavigationXml.ToString();
}
Fred
  • 2,402
  • 4
  • 31
  • 58
  • Side note: Usually people get it wrong other way around - parse HTML with Regex, but still construct XML with proper APIs. Is there a reason why you need to use string concatenation to build XML? – Alexei Levenkov Nov 04 '14 at 15:12
  • @AlexeiLevenkov - No, I am able to do anything I want... this is just the path I took, but anything else to produce that XML output will be fine, even if I have to do something totally different. – Fred Nov 04 '14 at 15:14
  • Check out [How can I build XML in C#](http://stackoverflow.com/questions/284324/how-can-i-build-xml-in-c) for guidance. – Alexei Levenkov Nov 04 '14 at 15:15
  • @AlexeiLevenkov - That is all good, but i think I have a problem with my recursive method then. because I don't see all the nodes. seems like only up to the second menu item level. – Fred Nov 04 '14 at 15:27
  • Suggestion: You are changing HTML tree while walking it. It makes code almost impossible to comprehend. Try to avoid doing so and it may just fix itself. – Alexei Levenkov Nov 04 '14 at 15:33
  • Hmmmm... let me try something with this in mind... – Fred Nov 04 '14 at 15:41
  • @AlexeiLevenkov - You are right, that resolved my problem. It really fixed itself after I just added an extra AppendLine if there is no htmlClass. Thanx. If you want you can post it as an answer and I will accept then. Else, THANX for helping me resolve this problem! – Fred Nov 04 '14 at 17:08
  • rodrigogq post have good code sample - I think it answers the question enough (showing proper recursion/XML creation). I think it is much better approach that you have and recommend accepting it as an answer (irrespective how you fixed your current issue). Additionally you can write up self-answer showing the fix if you want to with remark like "use XML creation API....". – Alexei Levenkov Nov 04 '14 at 17:29

1 Answers1

1

Try separating the input and output on different documents.

Then navigate the input and start outputting this to your XmlDocument output (another variable) in the format you want.

Something like...

class Converter
{
    public XmlDocument Convert(XmlDocument inputDocument)
    {
        XmlDocument result = new XmlDocument();
        ConvertNode(inputDocument.DocumentElement, result.DocumentElement, result);
        return result;
    }

    public void ConvertNode(XmlNode inputNode, XmlNode outputNode, XmlDocument outputDoc)
    {
        XmlNode newNode = null;

        // check elemment class
        string htmlClass;
        try { htmlClass = inputNode.Attributes["class"].Value; }
        catch { htmlClass = ""; }

        if(!string.IsNullOrWhiteSpace(htmlClass))
        {
            if (htmlClass.Contains("menuItem"))
            {
                newNode = outputDoc.CreateElement("MenuItem");
                outputNode.AppendChild(newNode);
            }

            /// check other wanted nodes etc..
        }

        if (newNode != null)
        {
            foreach (XmlNode node in inputNode.ChildNodes)
            {
                ConvertNode(node, newNode, outputDoc);
            }
        }
    }
}
rodrigogq
  • 1,943
  • 1
  • 16
  • 25