19

I have two tables which need to be inserted when my application run.
Let's say that I have tables as followed

  • tbl_FirstTable and tbl_SecondTable

My problem is data volume.
I need to insert over 10,000 rows to tbl_FirstTable and over 500,000 rows to tbl_SecondTable.

So fristly, I use entity framework as follow.

public bool Save_tbl_FirstTable_Vs_tbl_SecondTable(List<tbl_FirstTable> List_tbl_FirstTable, List<tbl_SecondTable> List_tbl_SecondTable)
{
    bool IsSuccessSave = false;
    try
    {
        using (DummyDBClass_ObjectContext _DummyDBClass_ObjectContext = new DummyDBClass_ObjectContext())
        {           
            foreach (tbl_FirstTable _tbl_FirstTable in List_tbl_FirstTable)
            {
                _DummyDBClass_ObjectContext.tbl_FirstTable.InsertOnSubmit(_tbl_FirstTable);
            }

            foreach (tbl_SecondTable _tbl_SecondTable in List_tbl_SecondTable)
            {
                _DummyDBClass_ObjectContext.tbl_SecondTable.InsertOnSubmit(_tbl_SecondTable);
            }

            _DummyDBClass_ObjectContext.SubmitChanges();
            IsSuccessSave = true;
        }
    }
    catch (Exception ex)
    {
        Log4NetWrapper.WriteError(string.Format("{0} : {1} : Exception={2}",
                                    this.GetType().FullName,
                                    (new StackTrace(new StackFrame(0))).GetFrame(0).GetMethod().Name.ToString(),
                                    ex.Message.ToString()));

        if (ex.InnerException != null)
        {
            Log4NetWrapper.WriteError(string.Format("{0} : {1} : InnerException Exception={2}",
                                    this.GetType().FullName,
                                    (new StackTrace(new StackFrame(0))).GetFrame(0).GetMethod().Name.ToString(),
                                    ex.InnerException.Message.ToString()));
        }
    }

    return IsSuccessSave;
}

That is the place I face error Time out exception.
I think that exception will be solved If I use below code.

DummyDBClass_ObjectContext.CommandTimeout = 1800; // 30 minutes

So I used it. It solved but I face another error OutOfMemory Exception.
So I searched the solutions, fortunately, I found below articles.

  1. Problem with Bulk insert using Entity Framework
  2. Using Transactions with SqlBulkCopy
  3. Performing a Bulk Copy Operation in a Transaction

According to that articles, I change my code from Entity Framework to Classic ADO.net code.

public bool Save_tbl_FirstTable_Vs_tbl_SecondTable(DataTable DT_tbl_FirstTable, DataTable DT_tbl_SecondTable)
{
    bool IsSuccessSave = false;
    SqlTransaction transaction = null;
    try
    {
        using (DummyDBClass_ObjectContext _DummyDBClass_ObjectContext = new DummyDBClass_ObjectContext())
        {
            var connectionString = ((EntityConnection)_DummyDBClass_ObjectContext.Connection).StoreConnection.ConnectionString;
            using (SqlConnection connection = new SqlConnection(connectionString))
            {
                connection.Open();
                using (transaction = connection.BeginTransaction())
                {
                    using (SqlBulkCopy bulkCopy_tbl_FirstTable = new SqlBulkCopy(connection, SqlBulkCopyOptions.KeepIdentity, transaction))                            
                    {
                        bulkCopy_tbl_FirstTable.BatchSize = 5000;
                        bulkCopy_tbl_FirstTable.DestinationTableName = "dbo.tbl_FirstTable";
                        bulkCopy_tbl_FirstTable.ColumnMappings.Add("ID", "ID");
                        bulkCopy_tbl_FirstTable.ColumnMappings.Add("UploadFileID", "UploadFileID");
                        bulkCopy_tbl_FirstTable.ColumnMappings.Add("Active", "Active");
                        bulkCopy_tbl_FirstTable.ColumnMappings.Add("CreatedUserID", "CreatedUserID");
                        bulkCopy_tbl_FirstTable.ColumnMappings.Add("CreatedDate", "CreatedDate");
                        bulkCopy_tbl_FirstTable.ColumnMappings.Add("UpdatedUserID", "UpdatedUserID");
                        bulkCopy_tbl_FirstTable.ColumnMappings.Add("UpdatedDate", "UpdatedDate");
                        bulkCopy_tbl_FirstTable.WriteToServer(DT_tbl_FirstTable);
                    }

                    using (SqlBulkCopy bulkCopy_tbl_SecondTable = new SqlBulkCopy(connection, SqlBulkCopyOptions.KeepIdentity, transaction))                            
                    {

                        bulkCopy_tbl_SecondTable.BatchSize = 5000;
                        bulkCopy_tbl_SecondTable.DestinationTableName = "dbo.tbl_SecondTable";
                        bulkCopy_tbl_SecondTable.ColumnMappings.Add("ID", "ID");
                        bulkCopy_tbl_SecondTable.ColumnMappings.Add("UploadFileDetailID", "UploadFileDetailID");
                        bulkCopy_tbl_SecondTable.ColumnMappings.Add("CompaignFieldMasterID", "CompaignFieldMasterID");
                        bulkCopy_tbl_SecondTable.ColumnMappings.Add("Value", "Value");
                        bulkCopy_tbl_SecondTable.ColumnMappings.Add("Active", "Active");
                        bulkCopy_tbl_SecondTable.ColumnMappings.Add("CreatedUserID", "CreatedUserID");
                        bulkCopy_tbl_SecondTable.ColumnMappings.Add("CreatedDate", "CreatedDate");
                        bulkCopy_tbl_SecondTable.ColumnMappings.Add("UpdatedUserID", "UpdatedUserID");
                        bulkCopy_tbl_SecondTable.ColumnMappings.Add("UpdatedDate", "UpdatedDate");
                        bulkCopy_tbl_SecondTable.WriteToServer(DT_tbl_SecondTable);
                    }


                    transaction.Commit();
                    IsSuccessSave = true;
                }
                connection.Close();
            }
        }
    }
    catch (Exception ex)
    {
        if (transaction != null)
            transaction.Rollback();

        Log4NetWrapper.WriteError(string.Format("{0} : {1} : Exception={2}",
                                    this.GetType().FullName,
                                    (new StackTrace(new StackFrame(0))).GetFrame(0).GetMethod().Name.ToString(),
                                    ex.Message.ToString()));

        if (ex.InnerException != null)
        {
            Log4NetWrapper.WriteError(string.Format("{0} : {1} : InnerException Exception={2}",
                                    this.GetType().FullName,
                                    (new StackTrace(new StackFrame(0))).GetFrame(0).GetMethod().Name.ToString(),
                                    ex.InnerException.Message.ToString()));
        }
    }

    return IsSuccessSave;
}

Finally, It perform insert process in less than 15 seconds for over 500,000 rows.

There is two reasons why I post this scenario.

  1. I would like to share what I found out.
  2. As I am not perfect, I still need to get more suggestion from you.

So, every better solution will be appreciated.

Frank Myat Thu
  • 4,448
  • 9
  • 67
  • 113

3 Answers3

2

1) Use EF6.x, which has much better performance than EF5.x

Here are more suggestions (from Bulk insert with EF)

2) Keep the active Context Graph small by using a new context for each Unit of Work

3) Turn off AutoDetechChangesEnabled - context.Configuration.AutoDetectChangesEnabled = false;

4) Batching, in your loop, Call SaveChanges periodically

Community
  • 1
  • 1
Carl Prothman
  • 1,461
  • 13
  • 23
1

I use payed Entity Framework extension from ZZZ Projects which is developer friendly because of fluent API (extenssion methods, functional approach). This is not an andvertisment, I use it in business projects for several years and it is great. If You want to use something for free and You have Oracle database the Oracle Managed Data Access Oracle.ManagedDataAccess.Core has implementation of bulk operations.

Dawid Wekwejt
  • 533
  • 1
  • 4
  • 19
1

Bulk Operations are not really what ORMs are meant for. For bulk insert operations, I send xml to the stored procedure, and I shred it and bulk insert/update or merge from there. So even when I use an ORM, I create a Domain Library that is not EF (or NHibernate) dependent.so I have a "safety valve" to bypass the ORM in certain situations.

You should look at using the System.Data.SqlClient.SqlBulkCopy for this. Here's the documentation- http://msdn.microsoft.com/en-us/library/system.data.sqlclient.sqlbulkcopy.aspx, and of course there are plenty of tutorials online.

In case we want EF to bulk insert records, would suggest below points to improve performance

  • Call SaveChanges() after for example 100 records and dispose the context and create a new one.
  • Disable change detection

Example:

using (TransactionScope scope = new TransactionScope())
{
    MyDbContext context = null;
    try
    {
        context = new MyDbContext();
        context.Configuration.AutoDetectChangesEnabled = false;

        int count = 0;            
        foreach (var entityToInsert in someCollectionOfEntitiesToInsert)
        {
            ++count;
            context = AddToContext(context, entityToInsert, count, 100, true);
        }

        context.SaveChanges();
    }
    finally
    {
        if (context != null)
            context.Dispose();
    }

    scope.Complete();
}

private MyDbContext AddToContext(MyDbContext context,
    Entity entity, int count, int commitCount, bool recreateContext)
{
    context.Set<Entity>().Add(entity);

    if (count % commitCount == 0)
    {
        context.SaveChanges();
        if (recreateContext)
        {
            context.Dispose();
            context = new MyDbContext();
            context.Configuration.AutoDetectChangesEnabled = false;
        }
    }

    return context;
}

For the performance it is important to call SaveChanges() after "many" records ("many" around 100 or 1000). It also improves the performance to dispose the context after SaveChanges and create a new one.

This clears the context from all entites, SaveChanges doesn't do that, the entities are still attached to the context in state Unchanged. It is the growing size of attached entities in the context what slows down the insertion step by step. So, it is helpful to clear it after some time.

AutoDetectChangesEnabled = false; on the DbContext.

It also has a big additional performance effect: Why is inserting entities in EF 4.1 so slow compared to ObjectContext?.

below combination increase speed well enough in EF.

  • context.Configuration.AutoDetectChangesEnabled = false;
  • context.Configuration.ValidateOnSaveEnabled = false;