I have a 4.6 million line XML file. Each line represents an XML string. I wrote a simple app to shred the XML into a pipe delimited file. Over time my app gets slow, writing out a fraction of the number of lines per minute.
Can anyone review my code and give suggestions to speed it up?
string line;
string OutputFilepath = System.IO.Path.GetDirectoryName(txtSourceFile.Text);
string NewFileName = OutputFilepath + string.Format(@"\Results{0}.txt", DateTime.Now.Ticks);
using (System.IO.StreamWriter OutputFile = new System.IO.StreamWriter(NewFileName, true))
using (System.IO.StreamReader file = new System.IO.StreamReader(txtSourceFile.Text))
{
XElement Stream;
while ((line = file.ReadLine()) != null)
{
//Remove carriage return line feeds.
line = line.Replace("
", "");
line = line.Replace("
", "");
//Create pipe delimited file.
Stream = XElement.Parse(line);
string PipeDelimited =
(from el in Stream.Element("QUERY").Elements("ITEM")
select
String.Format("{0}|{1}|{2}|{3}|{4}|{5}|{6}|{7}|{8}|{9}|{10}|{11}|{12}|{13}|{14}|{15}|{16}|{17}|{18}",
Text = "",
Text = "",
Text = "",
Text = "",
(string)el.Attribute("unparsedname"),
Text = "",
(string)el.Attribute("addr1"),
Text = "",
(string)el.Attribute("city"),
(string)el.Attribute("state"),
(string)el.Attribute("postalcode"),
new RegionInfo((string)el.Attribute("countrycodeISO2")).ThreeLetterISORegionName,
Text = "",
Text = "01/01/" + (string)el.Attribute("dobyear"),
Text = "",
Text = "",
Text = "",
Text = "",
Text = "A"
)
).Single();
{
OutputFile.WriteLine(PipeDelimited);
}
}
file.Close();
}