6

I need to export large DataTable(> 50 lacs( 5M ) DataRows ) to a .csv file

I am using the below code, but its taking long time.

public void CreateCSVFile(DataTable dtDataTablesList, string strFilePath)
{
    // Create the CSV file to which grid data will be exported.
    StreamWriter sw = new StreamWriter(strFilePath, false);
    //First we will write the headers.
    int iColCount = dtDataTablesList.Columns.Count;
    for (int i = 0; i < iColCount; i++)
    {
        sw.Write(dtDataTablesList.Columns[i]);
        if (i < iColCount - 1)
        {
            sw.Write("", "");
        }
    }
    sw.Write(sw.NewLine);

    // Now write all the rows.
    foreach (DataRow dr in dtDataTablesList.Rows)
    {
        for (int i = 0; i < iColCount; i++)
        {
            if (!Convert.IsDBNull(dr[i]))
            {
                sw.Write(dr[i].ToString());
            }
            if (i < iColCount - 1)
            {
                sw.Write("", "");
            }
        }
        sw.Write(sw.NewLine);
    }
    sw.Close();
}

Kindly let me know any another way of doing quickly.

kishore
  • 108
  • 1
  • 3
  • 12

5 Answers5

4

Instead of call StreamWriter.Write(..) all the time you may consider using a StringBuilder. Append all strings to Builder and only write once on the disk!

string filePath = @"e:\temp\test.csv";
string delimiter = ",";

#region init DataTable
DataTable dt = new DataTable();
dt.Columns.Add(new DataColumn("a", typeof(string)));
dt.Columns.Add(new DataColumn("b", typeof(string)));
dt.Columns.Add(new DataColumn("c", typeof(string)));
dt.Columns.Add(new DataColumn("d", typeof(string)));
dt.Columns.Add(new DataColumn("e", typeof(string)));
dt.Columns.Add(new DataColumn("f", typeof(string)));
dt.Columns.Add(new DataColumn("g", typeof(string)));
dt.Columns.Add(new DataColumn("h", typeof(string)));
dt.Columns.Add(new DataColumn("i", typeof(string)));
dt.Columns.Add(new DataColumn("j", typeof(string)));
dt.Columns.Add(new DataColumn("k", typeof(string)));
dt.Columns.Add(new DataColumn("l", typeof(string)));
dt.Columns.Add(new DataColumn("m", typeof(string)));
dt.Columns.Add(new DataColumn("n", typeof(string)));
dt.Columns.Add(new DataColumn("o", typeof(string)));
dt.Columns.Add(new DataColumn("p", typeof(string)));

for (int i = 0; i < 100000; i++)
{
    DataRow dr = dt.NewRow();
    for (int j = 0; j < dt.Columns.Count; j++)
    {
        dr[j] = "test" + i + " " + j;
    }
    dt.Rows.Add(dr);
}
#endregion

Stopwatch sw = new Stopwatch();
sw.Start();
StringBuilder sb = new StringBuilder();
foreach (DataRow dr in dt.Rows)
{
    sb.AppendLine(string.Join(delimiter, dr.ItemArray));
}
File.WriteAllText(filePath, sb.ToString());
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
Console.ReadLine();

EDIT

100000 rows took me 271 ms and created a file of around 18 MB

As @aiodintsov pointed out, there may/will be problems with using StringBuilder if there are several MBs of data. So I created an example according to his comment. Worked fine for me. Exported 1 000 000 rows within 2685 ms.

Stopwatch sw = new Stopwatch();
sw.Start();
using (StreamWriter swr = 
         new StreamWriter(File.Open(filePath, FileMode.CreateNew), Encoding.Default, 1000000))
         // change buffer size and Encoding to your needs
{
    foreach (DataRow dr in dt.Rows)
    {
        swr.WriteLine(string.Join(delimiter, dr.ItemArray));
    }
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
Pilgerstorfer Franz
  • 8,303
  • 3
  • 41
  • 54
  • 1
    StringBuilder will be holding the whole thing in memory and then will create another memory block of the same size. It may be good for 18Mb and even 50Mb, but when reaching 200Mb or more it will become a headache. StreamWriter is free of such problems. It only needs large enough buffer so that write operations occur less often. – aiodintsov Sep 07 '12 at 08:18
2

I've modified your code a little bit to use a StringBuilder as buffer. That requires more RAM but should be more efficent. Play around with the initial Capacity and MaxCapacity to avoid OutOfmemoryExceptions and for maximum efficiency:

public void CreateFastCSVFile(DataTable table, string strFilePath)
{
    const int capacity = 5000000;
    const int maxCapacity = 20000000;

    //First we will write the headers.
    StringBuilder csvBuilder = new StringBuilder(capacity);
    csvBuilder.AppendLine(string.Join(",", table.Columns.Cast<DataColumn>().Select(c => c.ColumnName)));

    // Create the CSV file and write all from StringBuilder
    using (var sw = new StreamWriter(strFilePath, false))
    {
        foreach (DataRow dr in table.Rows)
        {
            if (csvBuilder.Capacity >= maxCapacity)
            {
                sw.Write(csvBuilder.ToString());
                csvBuilder = new StringBuilder(capacity);
            }
            csvBuilder.Append(String.Join(",", dr.ItemArray));
        }
        sw.Write(csvBuilder.ToString());
    }
}

Here's a simple measurement with sample-data(10000000/100 lac DataRows).

Sample-data:

var TblData = new DataTable();
TblData.Columns.Add("FeeID", typeof(int));
TblData.Columns.Add("Amount", typeof(int));
TblData.Columns.Add("FeeItem", typeof(string));
TblData.Columns.Add("Type", typeof(char));
for (int i = 0; i < 1000000; i++)
{
    TblData.Rows.Add(9, 8500, "Admission Free", 'T');
    TblData.Rows.Add(9, 950, "Annual Fee", 'T');
    TblData.Rows.Add(9, 150, "Application Free", 'T');
    TblData.Rows.Add(9, 850, "Boy's Uniform", DBNull.Value);
    TblData.Rows.Add(9, 50, DBNull.Value, 'R');
    TblData.Rows.Add(10, 7500, "Admission Free", 'T');
    TblData.Rows.Add(11, 900, "Annual Fee", 'T');
    TblData.Rows.Add(11, 150, "Application Free", 'T');
    TblData.Rows.Add(11, 850, DBNull.Value, 'T');
    TblData.Rows.Add(11, 50, "Computer Free", 'R');
}
int rowCount = TblData.Rows.Count; // 10000000

Measurement ( less than 30sec. for a 207 MB file seems to be ok ):

var watch = new System.Diagnostics.Stopwatch();
watch.Start();
CreateFastCSVFile(TblData, @"C:\Temp\TestCSV.csv");
watch.Stop();
Console.Write("Elapsed: {0}", watch.Elapsed); // 00:00:26 for 207 MB CSV-file
Tim Schmelter
  • 450,073
  • 74
  • 686
  • 939
1

consider using OleDbConnection with connection string something like

"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=c:\txtFilesFolder;Extended Properties="text;HDR=Yes;FMT=Delimited";

more sample connections strings

there are some rules for CSV files and delimiters and you should treat characters such as quote,tab,comma,newline with extra care. see RFC4180 for more details on such rules.

UPD: try increasing the file stream buffer:

using(var stream = new FileStream(path,FileMode.Create,FileAccess.Write,FileShare.None,4*1024*1024))
{
  // your code referencing stream in StreamWriter()
}

you can also specify larger buffer size in StreamWriter constructor. not much else could be done to improve performance - StreamWriter is already quick enough and ToString() for standard types is quite good. I doubt you output user types there, but if you do, verify that their ToString() methods are efficient enough. everything else is beyond your control here.

Community
  • 1
  • 1
aiodintsov
  • 2,545
  • 15
  • 17
  • I believe the issue is with the slowness of writing to the CSV file not connecting to the data source. – Darren Sep 07 '12 at 07:32
  • 1
    @DarrenDavies the issue is this current implementation is quite error-prone – aiodintsov Sep 07 '12 at 07:41
  • if the issue is error prone then why have you guided the OP to a connection string? – Darren Sep 07 '12 at 07:48
  • @DarrenDavies because OleDriver is capable of handing data values with characters such as delimiter, quotes and CRLF so it does not have to be addressed in code. Besides they may be well aware of optimal performance tuning. – aiodintsov Sep 07 '12 at 07:54
1

This is my final solution for this.

with this code we can export 50 lakhs records to csv file in lessthan 2 minutes. instead of datatable here i used datareader.

private void button1_Click(object sender, EventArgs e)
    {

        Stopwatch swra = new Stopwatch();
        swra.Start();
        string NewconnectionString = "myCoonectionString";
        StreamWriter CsvfileWriter = new StreamWriter(@"D:\testfile.csv");
        string sqlselectQuery = "select * from Mytable";
        SqlCommand sqlcmd = new SqlCommand();

        SqlConnection spContentConn = new SqlConnection(NewconnectionString);
        sqlcmd.Connection = spContentConn;
        sqlcmd.CommandTimeout = 0;
        sqlcmd.CommandType = CommandType.Text;
        sqlcmd.CommandText = sqlselectQuery;
        spContentConn.Open();
        using (spContentConn)
        {
            using (SqlDataReader sdr = sqlcmd.ExecuteReader())
            using (CsvfileWriter)
            {
                //For getting the Table Headers
                DataTable Tablecolumns = new DataTable();

                for (int i = 0; i < sdr.FieldCount; i++)
                {
                    Tablecolumns.Columns.Add(sdr.GetName(i));
                }
                CsvfileWriter.WriteLine(string.Join(",", Tablecolumns.Columns.Cast<datacolumn>().Select(csvfile => csvfile.ColumnName)));
                //For table headers

                while (sdr.Read())
                //based on your columns
                    YourWriter.WriteLine(sdr[0].ToString() + "," + sdr[1].ToString() + "," + sdr[2].ToString() + "," + sdr[3].ToString() + "," + sdr[4].ToString() + "," + sdr[5].ToString() + "," + sdr[6].ToString() + "," + sdr[7].ToString() + "," + sdr[8].ToString() + "," + sdr[9].ToString() + "," + sdr[10].ToString() + "," + sdr[11].ToString() + ",");

            }
        }
       swra.Stop();
Console.WriteLine(swra.ElapsedMilliseconds);
}

Thanks for all.

kishore
  • 108
  • 1
  • 3
  • 12
0

One of the possible way speed up is to use StringBuilder and append the 1K record data in StringBuilder and then write it to sw.Write();

So your logic should be to write 1000 record first in SB and then in SW.Write.

This would increase performance for sure.

If you increase 1000 Record with 10K and test the performance would be much better as per me.

Hope this helps.

Jigar Pandya
  • 6,004
  • 2
  • 27
  • 45