1

I have a code here that generates XML for Engineer list using linq. My question is that is there any ways to improve and speed up this method

    public static string CreateXMLforEngineersByLinq(List<Engineers> lst)
    {
        string x = "<Engineers>\n";
        x += string.Concat(lst.Select(s =>
                string.Format("<Engineer>\n<LicenseID>{0}</LicenseID>\n<LastName>{1}</LastName>\n<FirstName>{2}</FirstName>\n<MiddleName>{3}</MiddleName>\n</Engineer>\n", 
                s.LicenseID, s.LastName, s.FirstName, s.MiddleName)));
        return x + "</Engineers>";
    }

RESULTS:

Hi, The code below is the added for me to show the actual speed of the methods i produced more answers and revisions are welcome thanks again for those who people who help here :)

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using test.Classes;

namespace test.LINQ
{
public static class BatchOperations
{
    delegate string Operations(List<Engineers> i);        
    public static List<int> BatchAddition(List<int> lstNumbers)
    {
        lstNumbers = (from nl in lstNumbers
                      select nl + 2).ToList();

        return lstNumbers;
    }

    public static string CreateXMLforEngineersTheSimpleWay(IEnumerable<Engineers> lst)
    {
        StringBuilder x = new StringBuilder();
        x.AppendLine("<Engineers>");
        foreach (var s in lst)
        {
            x.AppendFormat("<Engineer>\n<LicenseID>{0}</LicenseID>\n<LastName>{1}</LastName>\n<FirstName>{2}</FirstName>\n<MiddleName>{3}</MiddleName>\n</Engineer>\n",
                           s.LicenseID,
                           s.LastName,
                           s.FirstName,
                           s.MiddleName);
        }
        x.AppendLine("</Engineers1>");
        return x.ToString();
    }

    public static string CreateXMLforEngineersByLinq(List<Engineers> lst)
    {
        string x = "<Engineers>\n";
        x += string.Concat(lst.Select(s =>
                string.Format("<Engineer>\n<LicenseID>{0}</LicenseID>\n<LastName>{1}</LastName>\n<FirstName>{2}</FirstName>\n<MiddleName>{3}</MiddleName>\n</Engineer>\n", 
                s.LicenseID, s.LastName, s.FirstName, s.MiddleName)));
        return x + "</Engineers2>";
    }

    public static string CreateXMLforEngineersByLoop(List<Engineers> lst)
    {
        string XmlForEngineers = "<Engineers>";
        foreach (Engineers item in lst)
        {
            XmlForEngineers += string.Format("<Engineer>\n<LicenseID>{0}</LicenseID>\n<LastName>{1}</LastName>\n<FirstName>{2}</FirstName>\n<MiddleName>{3}</MiddleName>\n</Engineer>\n"
                , item.LicenseID, item.LastName, item.FirstName, item.MiddleName);
        }
        XmlForEngineers += "</Engineers3>";
        return XmlForEngineers;
    }

    public static void ShowEngineersByLicense()
    {
        List<Engineers> lstEngr = new List<Engineers>();

        Engineers tom = new Engineers();
        tom.FirstName = "Tom";
        tom.MiddleName = "Brook";
        tom.LastName = "Crook";
        tom.LicenseID = "1343-343434";

        Engineers ken = new Engineers();
        ken.FirstName = "ken";
        ken.MiddleName = "Brook";
        ken.LastName = "Crook";
        ken.LicenseID = "1343-343434";

        Engineers ben = new Engineers();
        ben.FirstName = "ben";
        ben.MiddleName = "Brook";
        ben.LastName = "Crook";
        ben.LicenseID = "1343-343434";

        for (int y = 0; y <= 1000; y++)
        {
            lstEngr.Add(tom);
            lstEngr.Add(ken);
            lstEngr.Add(ben);
        }

        List<Operations> j = new List<Operations>();
        j.Add(a => CreateXMLforEngineersTheSimpleWay(lstEngr));
        j.Add(i => CreateXMLforEngineersByLinq(lstEngr));
        j.Add(i => CreateXMLforEngineersByLoop(lstEngr));

        DateTime start, end;
        TimeSpan diff1 = new TimeSpan();

        foreach (Operations currentMethod in j)
        {
            start = DateTime.Now;
            Console.Write(currentMethod(lstEngr));
            end = DateTime.Now;
            diff1 = end - start;
            Console.WriteLine(diff1);
            Console.Write("\n\n");
        }
    }
}

}

PHeiberg
  • 29,411
  • 6
  • 59
  • 81
Allan Chua
  • 9,305
  • 9
  • 41
  • 61
  • 2
    You should re-work your benchmarking. 1: You need more than 3 people in the list to make a noticeable difference (try 10,000). 2: They need to have different strings assigned to them (which is most likely the case in the real world). C# has immutable strings, so having the same 4 strings will be incredibly quick. Use Guid.NewGuid().ToString(). Thirdly, you need to run the tests multiple times (say, 100), and grab the average. I find it very hard to believe that string += string is outperforming StringBuilder – Rob Oct 26 '11 at 04:45
  • 1
    Running with 5000 people, the results I get are: LINQ: 0.019ms, For-Loop: 12.059 _seconds_, Simple way: 0.010ms – Rob Oct 26 '11 at 04:47
  • I i appreciate that comment @Rob – Allan Chua Oct 26 '11 at 04:54
  • I had bench marked it with 300k records again but the linq came faster this time – Allan Chua Oct 26 '11 at 05:36
  • @Allan: Is this the bottleneck in your application? If not, then why bother optimizing it? Just write the simplest most maintainable code you possibly can, and only optimize it if you have to. If you're serializing, you're usually writing out to disk or the network, and that is almost assuredly going to dwarf any in-memory optimizations you can make. That being said, if this takes 12 seconds, there's probably something strange going on, and it would be worth a look in that case. – Merlyn Morgan-Graham Oct 26 '11 at 16:43
  • Also, some code style feedback: `Engineer` should be singular. `LicenseID` should be `LicenseId` (it is not an acronym, it is an abbreviation). `XML` should be `Xml` (only two-letter acronyms should be all-caps). Instead of repeating yourself, when calling `new Engineer` etc, you can use `var ken = new Engineer`. You can use initializer syntax to avoid having to retype the identifier, ala `ken.FirstName = ""; ken.LastName = "";` becomes `new Engineer { FirstName = "", LastName = "", ... };`. Same for lists, and typing out `Add` - `new List { new Engineer { } };`. – Merlyn Morgan-Graham Oct 26 '11 at 16:55

4 Answers4

3

Use the XmlSerializer, and build an extension method for convenience.

public static class XmlExtensions
{
    public static string ToXml<T>(this T instance)
    {
        var xmlSerializer = new XmlSerializer(typeof(T));
        var stringWriter = new StringWriter();
        xmlSerializer.Serialize(stringWriter, instance);
        return stringWriter.ToString();
    }
}

public static string CreateXMLforEngineersByLinqMyWay(List<Engineers> lst)
{
    return string.Format("<Engineers>{0}</Engineers>"
        , string.Join("",
            lst.Select(s => s.ToXml()) // Might have to put `.ToArray()` here
            )
        );
}

Or you could simply do this:

return lst.ToXml();

If this is part of a bigger object tree serialization, then just forego this whole method, and do ToXml on the top-level object.

You might need to edit the ToXml method to get it to drop XML namespaces, etc.

Merlyn Morgan-Graham
  • 58,163
  • 16
  • 128
  • 183
  • This came to be a little slower than what i expect :( – Allan Chua Oct 26 '11 at 05:43
  • @Allan: It is slow the first time you access serialization because a serialization assembly will be compiled at runtime. It will be much faster on all subsequent access. In order to avoid this you can pre-compile a serialization assembly. This is a project option in Visual Studio, or you can add `sgen.exe` to your build. See: http://devolutions.net/articles/dot-net/Net-Serialization-FAQ.aspx#S116 – Merlyn Morgan-Graham Oct 26 '11 at 16:42
  • @Allan: After forcing the serialization assembly to be generated, I get these timings for 100k pre-created `Engineer` instances: ToXml: 0.42-0.45s, CreateXmlforEngineersByLinqMyWay: 2.5-2.8s (lower value for removing the namespaces, ala [this answer](http://stackoverflow.com/questions/2950658/remove-namespace-from-generated-xml-in-net)), CreateXmlforEngineersTheSimpleWay: 0.25-0.3s, CreateXmlforEngineersByLinq: 0.55-0.65s. Seems like `ToXml` might win out over MyWay (I sort of expected this, since the loop has to build up the string twice - one in the formatter, once in the `string.Join`). – Merlyn Morgan-Graham Oct 26 '11 at 17:17
2

The most obvious speed up: Use a StringBuilder instead of a string. See this article for a comprehensive discussion on the performance of StringBuilder compared with simple concatenation.

Also, this is one case where you get a simpler and faster solution by dispensing with linq altogether;

 public static string CreateXMLforEngineersTheSimpleWay(IEnumerable<Engineers> lst)
    {
        StringBuilder x = new StringBuilder();
        x.AppendLine("<Engineers>");
        foreach(var s in lst)
        {
              x.AppendFormat("<Engineer>\n<LicenseID>{0}</LicenseID>\n<LastName>{1}</LastName>\n<FirstName>{2}</FirstName>\n<MiddleName>{3}</MiddleName>\n</Engineer>\n", 
                             s.LicenseID, 
                             s.LastName, 
                             s.FirstName, 
                             s.MiddleName);
        }
        x.AppendLine("</Engineers>");
        return x.ToString();
    }

If you have your heart set on Linq, you can use the String.Join method. However, I suspect this would not perform as well as the first solution, as this solution below has to create a temporary array.

public static string CreateXMLforEngineersByLinq(List<Engineers> lst)
{
    var x = new StringBuilder();
    x.AppendLine("<Engineers>)";
    x.Append(
                string.Join(
                             "\n",
                             lst.Select(s =>
                                string.Format(
                                           "<Engineer>\n<LicenseID>{0}</LicenseID>\n<LastName>{1}</LastName>\n<FirstName>{2}</FirstName>\n<MiddleName>{3}</MiddleName>\n</Engineer>\n", 
                                            s.LicenseID, 
                                            s.LastName, 
                                            s.FirstName, 
                                            s.MiddleName
                                            )
                              ).ToArray()
                        );
      x.AppendLine("</Engineers>");
      return x.ToString();

}
Andrew Shepherd
  • 44,254
  • 30
  • 139
  • 205
1

I think it would be worthed to test if serialization of the whole list using XmlSeriaizer isn't faster.

Like this:

using (var fileStream = new FileStream("engineers.xml", FileMode.OpenOrCreate))
{
     var xmlSerializer = new XmlSerializer(typeof(List<Engineer>))
     xmlSerializer.Serialize(fileStream, list);
}
Fischermaen
  • 12,238
  • 2
  • 39
  • 56
1

All of the other examples are using string concat to generate your XML. The downside of this is that it will fail if any of your values are not properly escaping unsupported characters in the XML (like < and &). It is better to work more natively with the XML. Consider the following:

public static string CreateXMLforEngineersByLinq(List<Engineers> lst) 
{ 
    string x = New XElement("Engineers", 
                   lst.Select(new XElement, "Engineer",
                      new XElement("LicenseID", s.LicenseID),
                      new XElement("LastName", s.LastName),
                      new XElement("FirstName", s.FirstName),
                      new XElement("MiddleName", s.MiddleName)
                   )
               );
    return x.ToString();
} 

IF you don't want to take the memory overhead of creating the entire XML in memory before pushing it out to the string, you might want to consider using the XStreamingElement ( http://msdn.microsoft.com/en-us/library/system.xml.linq.xstreamingelement.aspx ). See http://www.microsoft.com/uk/msdn/nuggets/nugget/295/LINQ-to-XML-Streaming-Large-Data-Files-Out-of-Memory.aspx for a quick video of how to use it.

Jim Wooley
  • 10,169
  • 1
  • 25
  • 43