2

Is there a more elegant/faster way to write the code below? Currently taking about 45seconds.

query.sql is 200,000 lines long and has SQL in it exactly like this on each line:

SELECT N'+dave' AS [AccountName], N'20005' AS [EmployeeID], N'-6' AS [PlatformID] UNION ALL

I found that by chunking into blocks of 1000 that things were much quicker than waiting until the end and using WriteAllText (which took about 20 minutes to run)

static void Main(string[] args)
{
    var s = new Stopwatch();
    s.Start();

    string textToWrite = "";
    string[] lines = File.ReadAllLines(@"e:\temp\query.sql");

    int i = 0;
    foreach (var line in lines)
    {
        var bits = line.Split('\'');

        var value1 = bits[1];
        var value2 = bits[3];
        var value3 = bits[5];

        var message = "INSERT [PreStaging].[Import_AccountEmployeeMapping] ([AccountName], [EmployeeID], [PlatformID]) VALUES (N" +
                    "'" + value1 + "', "
                    + value2 + ", "
                    + value3 + ")";

        textToWrite += message + Environment.NewLine;

        if (i % 1000 == 0)
        {
            Console.WriteLine(i + " " + DateTime.Now.ToLongTimeString());
            File.AppendAllText(@"e:\temp\query2.sql", textToWrite);
            textToWrite = "";
        }
        i++;
    }

    //File.WriteAllText(@"e:\temp\query2.sql", textToWrite);
    File.AppendAllText(@"e:\temp\query2.sql", textToWrite);

    s.Stop();
    TimeSpan ts = s.Elapsed;
    Console.WriteLine("Timespan: {0}m", ts.TotalMinutes);
    Console.WriteLine("Total records: " + i);

    Console.ReadLine();
}

Edit: StringBuilder Solution (1000ms):

static void Main2(string[] args)
{
    var s = new Stopwatch();
    s.Start();

    var textToWrite = new StringBuilder();
    string[] lines = File.ReadAllLines(@"e:\temp\query.sql");

    int i = 0;
    foreach (var line in lines)
    {
        var bits = line.Split('\'');

        var value1 = bits[1];
        var value2 = bits[3];
        var value3 = bits[5];

        var message = "INSERT [PreStaging].[Import_AccountEmployeeMapping] ([AccountName], [EmployeeID], [PlatformID]) VALUES (N" +
                    "'" + value1 + "', "
                    + value2 + ", "
                    + value3 + ")"
                    + Environment.NewLine;

        textToWrite.Append(message);

        // Buffering
        if (i % 1000 == 0)
        {
            Console.WriteLine(i + " " + DateTime.Now.ToLongTimeString());
            File.AppendAllText(@"e:\temp\query2.sql", textToWrite.ToString());
            textToWrite = new StringBuilder();
        }
        i++;
    }

    File.AppendAllText(@"e:\temp\query2.sql", textToWrite.ToString());

    s.Stop();
    TimeSpan ts = s.Elapsed;
    Console.WriteLine("Timespan: {0}ms", ts.TotalMilliseconds);
    Console.WriteLine("Total records: " + i);

    Console.ReadLine();
}

Edit: StreamWriter solution (450ms)

static void Main(string[] args)
    {
        var s = new Stopwatch();
        s.Start();

        string[] lines = File.ReadAllLines(@"e:\temp\query.sql");
        int i = 0;
        using (StreamWriter writer = File.AppendText(@"e:\temp\query2.sql"))
        {
            foreach (var line in lines)
            {
                var bits = line.Split('\'');

                var value1 = bits[1];
                var value2 = bits[3];
                var value3 = bits[5];

                writer.WriteLine("INSERT [PreStaging].[Import_AccountEmployeeMapping] ([AccountName], [EmployeeID], [PlatformID]) VALUES (N'{0}', {1}, {2})",
                    value1, value2, value3);

                i++;
            }
        }

        s.Stop();
        TimeSpan ts = s.Elapsed;
        Console.WriteLine("Timespan: {0}ms", ts.TotalMilliseconds);
        Console.WriteLine("Total records: " + i);

        Console.ReadLine();
    }
Dave Mateer
  • 6,588
  • 15
  • 76
  • 125
  • 2
    Reading between the lines it looks like your original problem is to get the results of a query in SSMS into some form to be able to load that data into a table in some other place or time. There are loads of simpler options for this for which c# is not really the best tool given that you have to compile an assembly for it. Perhaps ask a question focussed on that as I don't want to take this (resolved) question off on a tangent... – rohancragg Aug 02 '13 at 10:21

5 Answers5

4

As others have pointed out, use a StringBuilder. So in your case, declare:

StringBuilder textToWrite = new StringBuilder();

Then:

textToWrite.AppendLine(message);
if (i % 1000 == 0)
{
    Console.WriteLine(i + " " + DateTime.Now.ToLongTimeString());
    File.AppendAllText(@"e:\temp\query2.sql", textToWrite.ToString());
    textToWrite = new StringBuilder();
}

Although you'd probably be better off dispensing with the buffering altogether:

using (StreamWriter writer = File.AppendText(filename))
{
    // initialization stuff here

    foreach (var line in lines)
    {
        var bits = line.Split('\'');

        var value1 = bits[1];
        var value2 = bits[3];
        var value3 = bits[5];

        var message = "INSERT [PreStaging].[Import_AccountEmployeeMapping]                     ([AccountName], [EmployeeID], [PlatformID]) VALUES (N" +
                "'" + value1 + "', "
                + value2 + ", "
                + value3 + ")";

         writer.WriteLine(message); // write the line
    }
}
Jim Mischel
  • 131,090
  • 20
  • 188
  • 351
  • 2
    ... and let's hope a `bits` value isn't Little Bobby Tables. – Austin Salonen Aug 01 '13 at 16:46
  • Many thanks Jim, and everyone else who answered. I've put up the code solutions above and times of execution. Quite frankly, the speed improvements have been very surprising. – Dave Mateer Aug 02 '13 at 09:01
  • 1
    Agreed regarding StringBuilder, whenever you concat a string with the + , stop and think of StringBuilder or string.Format() instead. – rohancragg Aug 02 '13 at 10:17
2

A good start is to use the StringBuilder built in class in .net. This is going to avoid a bunch of string allocations and copying.

See the MSDN documenation about how it works: http://msdn.microsoft.com/en-us/library/system.text.stringbuilder.aspx

Also see this Stackoverflow post for more information: Most efficient way to concatenate strings?

Example:

StringBuilder a = new StringBuilder();
a.Append("some text");
a.Append("more text");
string result = a.ToString();
Community
  • 1
  • 1
Ralph Caraveo
  • 10,025
  • 7
  • 40
  • 52
  • 1
    Instead of building a string, writing to a file using StreamWriter on a FileStream instance would be more ideal. However, you ultimate point is correct in that it will be much faster than concatenating the strings over and over. – Darren Kopp Aug 01 '13 at 16:39
  • 2
    @DarrenKopp I agree, that he can go directly to using a StreamWriter, but I think the more important concept here to realize is his performance penalty for the raw string concatenation. He will greatly benefit understanding this class first and how to avoid this in the future when manipulating strings. Just my opinion. :) – Ralph Caraveo Aug 01 '13 at 16:46
1

What version of sql server? The best way to do this is not to use one giant sql script, but to use either a table valued parameter or use sql servers bulk copy support.

Darren Kopp
  • 76,581
  • 9
  • 79
  • 93
1

The best way would most likely be to open both files simultaneously, read and write each line as you go, then close the files.

However, the biggest problem you are most likely running into is string concatenation. Strings in .NET are immutable, so each concatenation results in a new copy being allocated, which takes both time and memory (although the GC will eventually give you back the latter).

If you replace textToWrite with a StringBuilder, and only do a single ToString() at the end, you will see a lot better performance.

Or, honestly, you could probably do a single regex replace on the whole thing and be done with it, although I believe you have to read the whole file into memory first as you are already doing.

lc.
  • 113,939
  • 20
  • 158
  • 187
0

MemoryMappedFiles are efficient so they may be worth looking into.

string[] lines = File.ReadAllLines(@"e:\temp\query.sql");
using (var mmf = MemoryMappedFile.CreateFromFile(@"e:\temp\query2.sql", FileMode.Create, "txt", new FileInfo(@"e:\temp\query.sql")Length))
{       
    StringBuilder sb = new StringBuilder();
    using (MemoryMappedViewStream mmvs = mmf.CreateViewStream())
    {
       StreamWriter writer = new StreamWriter(mmvs);
       for (int i = 0; i < lines.Length; i++)
       {
          var bits = lines[i].Split('\'');

          var value1 = bits[1];
          var value2 = bits[3];
          var value3 = bits[5];

          sb.AppendFormat("INSERT [PreStaging].[Import_AccountEmployeeMapping]
                          ([AccountName], [EmployeeID], [PlatformID])
                         VALUES (N'{0}', {1}, {2})", value1, value2, value3);


          writer.WriteLine(message.ToString()); 
      }
   }
}

You may find building the entire text first and then writing the entire thing to the MemoryMappedFiled performs better as there are fewer calls to ToString instead.

keyboardP
  • 68,824
  • 13
  • 156
  • 205