I am in the midst of working on a school project where I have to parse an XML file that can change in it's complexity level. All I know is the various interesting elements and their attributes that I am after. However, these values may not always exist so NULL checking is a must. During the course of my research, it seems most folks will say that when dealing with a complex XML file, it's best to deserialize the file into predefined class(es). I will provide two examples of this XML file for your reference. I will also tell you the interesting elements and attributes. What I am looking for is for someone to provide an example of how they'd parse this file in order to extract the attribute values for the purposes of editing them and writing them back to the same file. I will also include the code I have so far...
Example XML file (1) :
Example XML file (2) :
The interesting elements are any elements which have attributes like :
- w:rsidR
- wrsidRDefault
- w:rsidP
- w:rsidRPr
- w:rsidTr
I currently have the following method which will parse the files and read in the attribute values and allow me to edit them in memory. However, I can't figure out how best to write this same data structure back to the attributes. Hence my research and subsequent question about XML serialization. I truly appreciate your help and input as always... Thank you!!!
My Code Snippet :
public static void shaqfu(string strMsg)
{
string strFile = @"C:\SourceFolder\SampleXML\document-test.xml";
//string strFile = @"C:\SourceFolder\SampleXML\document (2).xml";
//string strFile = @"C:\SourceFolder\SampleXML\document (3).xml";
int index = 0;
var i = 0;
using (XmlReader reader = XmlReader.Create(strFile))
{
while (reader.Read())
{
if (reader.IsStartElement())
{
List <string> rlist = new List<string>();
switch (reader.Name)
{
case "w:p":
string wp_rsidRAttrib = reader.GetAttribute("w:rsidR");
string wp_rsidRDefaultAttrib = reader.GetAttribute("w:rsidRDefault");
string wp_rsidPAttrib = reader.GetAttribute("w:rsidP");
string wp_rsidRPrAttrib = reader.GetAttribute("w:rsidRPr");
string wp_rsidTrAttrib = reader.GetAttribute("w:rsidTr");
if (wp_rsidRAttrib != null)
{
rlist.Add(wp_rsidRAttrib);
index++;
}
if (wp_rsidRPrAttrib != null)
{
rlist.Add(wp_rsidRPrAttrib);
index++;
}
if (wp_rsidRDefaultAttrib != null)
{
rlist.Add(wp_rsidRDefaultAttrib);
index++;
}
if (wp_rsidPAttrib != null)
{
rlist.Add(wp_rsidPAttrib);
index++;
}
if (wp_rsidTrAttrib != null)
{
rlist.Add(wp_rsidTrAttrib);
index++;
}
break;
case "w:r":
string wr_rsidRAttrib = reader.GetAttribute("w:rsidR");
string wr_rsidRDefaultAttrib = reader.GetAttribute("w:rsidRDefault");
string wr_rsidPAttrib = reader.GetAttribute("w:rsidP");
string wr_rsidRPrAttrib = reader.GetAttribute("w:rsidRPr");
string wr_rsidTrAttrib = reader.GetAttribute("w:rsidTr");
if (wr_rsidRAttrib != null)
{
rlist.Add(wr_rsidRAttrib);
index++;
}
if (wr_rsidRPrAttrib != null)
{
rlist.Add(wr_rsidRPrAttrib);
index++;
}
if (wr_rsidRDefaultAttrib != null)
{
rlist.Add(wr_rsidRDefaultAttrib);
index++;
}
if (wr_rsidPAttrib != null)
{
rlist.Add(wr_rsidPAttrib);
index++;
}
if (wr_rsidTrAttrib != null)
{
rlist.Add(wr_rsidTrAttrib);
index++;
}
break;
case "w:tr":
string wtr_rsidRAttrib = reader.GetAttribute("w:rsidR");
string wtr_rsidRDefaultAttrib = reader.GetAttribute("w:rsidRDefault");
string wtr_rsidPAttrib = reader.GetAttribute("w:rsidP");
string wtr_rsidRPrAttrib = reader.GetAttribute("w:rsidRPr");
string wtr_rsidTrAttrib = reader.GetAttribute("w:rsidTr");
if (wtr_rsidRAttrib != null)
{
rlist.Add(wtr_rsidRAttrib);
index++;
}
if (wtr_rsidRPrAttrib != null)
{
rlist.Add(wtr_rsidRPrAttrib);
index++;
}
if (wtr_rsidRDefaultAttrib != null)
{
rlist.Add(wtr_rsidRDefaultAttrib);
index++;
}
if (wtr_rsidPAttrib != null)
{
rlist.Add(wtr_rsidPAttrib);
index++;
}
if (wtr_rsidTrAttrib != null)
{
rlist.Add(wtr_rsidTrAttrib);
index++;
}
break;
case "w:sectPr":
string wsPr_rsidRAttrib = reader.GetAttribute("w:rsidR");
string wsPr_rsidRDefaultAttrib = reader.GetAttribute("w:rsidRDefault");
string wsPr_rsidPAttrib = reader.GetAttribute("w:rsidP");
string wsPr_rsidRPrAttrib = reader.GetAttribute("w:rsidRPr");
string wsPr_rsidTrAttrib = reader.GetAttribute("w:rsidTr");
if (wsPr_rsidRAttrib != null)
{
rlist.Add(wsPr_rsidRAttrib);
index++;
}
if (wsPr_rsidRPrAttrib != null)
{
rlist.Add(wsPr_rsidRPrAttrib);
index++;
}
if (wsPr_rsidRDefaultAttrib != null)
{
rlist.Add(wsPr_rsidRDefaultAttrib);
index++;
}
if (wsPr_rsidPAttrib != null)
{
rlist.Add(wsPr_rsidPAttrib);
index++;
}
if (wsPr_rsidTrAttrib != null)
{
rlist.Add(wsPr_rsidTrAttrib);
index++;
}
break;
}
foreach (string r in rlist)
{
var rValCharArray = r.ToCharArray();
for (var x = 2; x < rValCharArray.Length && i < strMsg.Length; x++) rValCharArray[x] = strMsg[i++];
Console.WriteLine(rValCharArray);
}
}
}
}
Console.WriteLine("Number of rsids found : {0}",index);
}
Example XML File (1) - Actual Text
<?xml version="1.0" encoding="UTF-16" standalone="yes"?>
<w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 w15 wp14">
<w:body>
<w:p w14:paraId="2CBBB1B4" w14:textId="77777777" w:rsidR="00D9548A" w:rsidRDefault="00D9548A" w:rsidP="00ED7A0B"></w:p>
<w:p w14:paraId="2CBBB1B5" w14:textId="77777777" w:rsidR="00D9548A" w:rsidRPr="00ED77B9" w:rsidRDefault="00C706DD" w:rsidP="00D9548A"></w:p>
<w:pPr>
<w:rPr>
<w:rFonts w:ascii="Arial" w:hAnsi="Arial" w:cs="Arial"></w:rFonts>
<w:b></w:b>
<w:sz w:val="40"></w:sz>
<w:szCs w:val="40"></w:szCs>
</w:rPr>
</w:pPr>
<w:r w:rsidRPr="00EC456F"></w:r>
<w:tr w:rsidR="0029258E" w14:paraId="2CBBB242" w14:textId="77777777" w:rsidTr="0029258E"></w:tr>
</w:body>
Example XML file (2) - Actual Text :
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<w:document xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape" mc:Ignorable="w14 w15 wp14">
<w:body>
<w:p w:rsidR="00661DE2" w:rsidRDefault="00B31FC7">
<w:r>
<w:t>This is a single editing session. 9:49AM</w:t>
</w:r>
<w:r w:rsidR="00251096">
<w:t xml:space="preserve"> – adding more content to the first line 10:46AM</w:t>
</w:r>
<w:r w:rsidR="00A06ADC">
<w:t xml:space="preserve"> – adding some more content to the original sentence. 10:49AM</w:t>
</w:r>
<w:bookmarkStart w:id="0" w:name="_GoBack"></w:bookmarkStart>
<w:bookmarkEnd w:id="0"></w:bookmarkEnd>
</w:p>
<w:p w:rsidR="00481AA7" w:rsidRDefault="00481AA7">
<w:r>
<w:t>This is a second editing session. 9:56AM</w:t>
</w:r>
</w:p>
<w:p w:rsidR="005C6856" w:rsidRDefault="005C6856">
<w:r>
<w:t>This is a third editing session. 9:58AM</w:t>
</w:r>
</w:p>
<w:sectPr w:rsidR="005C6856">
<w:pgSz w:w="12240" w:h="15840"></w:pgSz>
<w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440" w:header="720" w:footer="720" w:gutter="0"></w:pgMar>
<w:cols w:space="720"></w:cols>
<w:docGrid w:linePitch="360"></w:docGrid>
</w:sectPr>
</w:body>