2

Yes, there is already the question Bulk Insert Optimization in .NET / EF Core. But this question is different.


I ran into a scenario where I have to create a DB and populate it with processed/calculated/generated data. The amount of records is between 100 millions and 200 millions of records.

Since seeding the data takes ages I can not do it on startup (of an web api). So I created a separated console application migrating the database and seeding it.

It still takes ages (I even haven't finished a single run of seeding yet).

Anything else what I can optimize in the main method (except removing console logs which are only there for debugging at the moment).


using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Metadata.Builders;
using Microsoft.EntityFrameworkCore.Migrations;
using System;
using System.Diagnostics;

namespace EFCoreBulkInsert
{

    class EntityA
    {

        public int Id { get; set; }
        public int ColA { get; set; }
        public string ColB { get; set; }

        public EntityB EntityB { get; set; }
        public EntityC EntityC { get; set; }

    }

    class EntityB
    {

        public int Id { get; set; }
        public string Col { get; set; }

        public EntityA EntityA { get; set; }

    }

    class EntityC
    {

        public int Id { get; set; }
        public int Col { get; set; }

        public EntityA EntityA { get; set; }

    }

    class MyDbContext : DbContext
    {

        public DbSet EntityA { get; set; }
        public DbSet EntityB { get; set; }
        public DbSet EntityC { get; set; }

        public MyDbContext(DbContextOptions options) : base(options) { }

        protected override void OnModelCreating(ModelBuilder builder)
        {
            builder.Entity(BuildEntityA);
            builder.Entity(BuildEntityB);
            builder.Entity(BuildEntityC);
        }

        protected virtual void BuildEntityA(EntityTypeBuilder entity)
        {

            entity
                .ToTable("EntityA")
                .HasKey(e => e.Id);

            entity.Property(e => e.Id)
                .HasColumnName("Id")
                .UseIdentityColumn(1, 1)
                .Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore);

            entity.Property(e => e.ColA)
                .HasColumnName("ColA")
                .IsRequired();

            entity.HasIndex(e => e.ColA)
                .HasName("IX_EntityA_ColA");

            entity.Property(e => e.ColB)
                .HasColumnName("ColB")
                .IsRequired();

            entity.HasIndex(e => e.ColB)
                .HasName("IX_EntityA_ColB");

            entity.HasOne(e => e.EntityB)
                .WithOne(e => e.EntityA);

            entity.HasOne(e => e.EntityC)
                .WithOne(e => e.EntityA);

        }

        protected virtual void BuildEntityB(EntityTypeBuilder entity)
        {

            entity
                .ToTable("EntityB")
                .HasKey(e => e.Id);

            entity.Property(e => e.Id)
                .HasColumnName("Id")
                //.UseIdentityColumn(1, 1)
                //.Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore)
                ;

            entity.Property(e => e.Col)
                .HasColumnName("Col")
                .IsRequired();

            entity.HasIndex(e => e.Col)
                .HasName("IX_EntityB_Col");

            entity.HasOne(e => e.EntityA)
                .WithOne(e => e.EntityB)
                .HasForeignKey(e => e.Id)
                .HasConstraintName("FK_EntityB-Id_EntityA-Id")
                .OnDelete(DeleteBehavior.Cascade);

        }

        protected virtual void BuildEntityC(EntityTypeBuilder entity)
        {

            entity
                .ToTable("EntityC")
                .HasKey(e => e.Id);

            entity.Property(e => e.Id)
                .HasColumnName("Id")
                //.UseIdentityColumn(1, 1)
                //.Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore)
                ;

            entity.Property(e => e.Col)
                .HasColumnName("Col")
                .IsRequired();

            entity.HasIndex(e => e.Col)
                .HasName("IX_EntityC_Col");

            entity.HasOne(e => e.EntityA)
                .WithOne(e => e.EntityC)
                .HasForeignKey(e => e.Id)
                .HasConstraintName("FK_EntityC-Id_EntityA-Id")
                .OnDelete(DeleteBehavior.Cascade);

        }

    }

    [DbContext(typeof(MyDbContext))]
    partial class MyDbContextModelSnapshot : ModelSnapshot
    {
        protected override void BuildModel(ModelBuilder modelBuilder)
        {
#pragma warning disable 612, 618
            modelBuilder
                .HasAnnotation("ProductVersion", "3.1.0")
                .HasAnnotation("Relational:MaxIdentifierLength", 128)
                .HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);

            modelBuilder.Entity("EFCoreBulkInsert.EntityA", b =>
            {
                b.Property("Id")
                    .ValueGeneratedOnAdd()
                    .HasColumnName("Id")
                    .HasColumnType("int")
                    .HasAnnotation("SqlServer:IdentityIncrement", 1)
                    .HasAnnotation("SqlServer:IdentitySeed", 1)
                    .HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);

                b.Property("ColA")
                    .HasColumnName("ColA")
                    .HasColumnType("int");

                b.Property("ColB")
                    .IsRequired()
                    .HasColumnName("ColB")
                    .HasColumnType("nvarchar(450)");

                b.HasKey("Id");

                b.HasIndex("ColA")
                    .HasName("IX_EntityA_ColA");

                b.HasIndex("ColB")
                    .HasName("IX_EntityA_ColB");

                b.ToTable("EntityA");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
            {
                b.Property("Id")
                    .HasColumnName("Id")
                    .HasColumnType("int");

                b.Property("Col")
                    .IsRequired()
                    .HasColumnName("Col")
                    .HasColumnType("nvarchar(450)");

                b.HasKey("Id");

                b.HasIndex("Col")
                    .HasName("IX_EntityB_Col");

                b.ToTable("EntityB");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
            {
                b.Property("Id")
                    .HasColumnName("Id")
                    .HasColumnType("int");

                b.Property("Col")
                    .HasColumnName("Col")
                    .HasColumnType("int");

                b.HasKey("Id");

                b.HasIndex("Col")
                    .HasName("IX_EntityC_Col");

                b.ToTable("EntityC");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
            {
                b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
                    .WithOne("EntityB")
                    .HasForeignKey("EFCoreBulkInsert.EntityB", "Id")
                    .HasConstraintName("FK_EntityB-Id_EntityA-Id")
                    .OnDelete(DeleteBehavior.Cascade)
                    .IsRequired();
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
            {
                b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
                    .WithOne("EntityC")
                    .HasForeignKey("EFCoreBulkInsert.EntityC", "Id")
                    .HasConstraintName("FK_EntityC-Id_EntityA-Id")
                    .OnDelete(DeleteBehavior.Cascade)
                    .IsRequired();
            });
#pragma warning restore 612, 618
        }
    }

    public partial class Initial : Migration
    {
        protected override void Up(MigrationBuilder migrationBuilder)
        {
            migrationBuilder.CreateTable(
                name: "EntityA",
                columns: table => new
                {
                    Id = table.Column(nullable: false)
                        .Annotation("SqlServer:Identity", "1, 1"),
                    ColA = table.Column(nullable: false),
                    ColB = table.Column(nullable: false)
                },
                constraints: table =>
                {
                    table.PrimaryKey("PK_EntityA", x => x.Id);
                });

            migrationBuilder.CreateTable(
                name: "EntityB",
                columns: table => new
                {
                    Id = table.Column(nullable: false),
                    Col = table.Column(nullable: false)
                },
                constraints: table =>
                {
                    table.PrimaryKey("PK_EntityB", x => x.Id);
                    table.ForeignKey(
                        name: "FK_EntityB-Id_EntityA-Id",
                        column: x => x.Id,
                        principalTable: "EntityA",
                        principalColumn: "Id",
                        onDelete: ReferentialAction.Cascade);
                });

            migrationBuilder.CreateTable(
                name: "EntityC",
                columns: table => new
                {
                    Id = table.Column(nullable: false),
                    Col = table.Column(nullable: false)
                },
                constraints: table =>
                {
                    table.PrimaryKey("PK_EntityC", x => x.Id);
                    table.ForeignKey(
                        name: "FK_EntityC-Id_EntityA-Id",
                        column: x => x.Id,
                        principalTable: "EntityA",
                        principalColumn: "Id",
                        onDelete: ReferentialAction.Cascade);
                });

            migrationBuilder.CreateIndex(
                name: "IX_EntityA_ColA",
                table: "EntityA",
                column: "ColA");

            migrationBuilder.CreateIndex(
                name: "IX_EntityA_ColB",
                table: "EntityA",
                column: "ColB");

            migrationBuilder.CreateIndex(
                name: "IX_EntityB_Col",
                table: "EntityB",
                column: "Col");

            migrationBuilder.CreateIndex(
                name: "IX_EntityC_Col",
                table: "EntityC",
                column: "Col");
        }

        protected override void Down(MigrationBuilder migrationBuilder)
        {
            migrationBuilder.DropTable(
                name: "EntityB");

            migrationBuilder.DropTable(
                name: "EntityC");

            migrationBuilder.DropTable(
                name: "EntityA");
        }
    }

    [DbContext(typeof(MyDbContext))]
    [Migration("20200103182223_Initial")]
    partial class Initial
    {
        protected override void BuildTargetModel(ModelBuilder modelBuilder)
        {
#pragma warning disable 612, 618
            modelBuilder
                .HasAnnotation("ProductVersion", "3.1.0")
                .HasAnnotation("Relational:MaxIdentifierLength", 128)
                .HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);

            modelBuilder.Entity("EFCoreBulkInsert.EntityA", b =>
            {
                b.Property("Id")
                    .ValueGeneratedOnAdd()
                    .HasColumnName("Id")
                    .HasColumnType("int")
                    .HasAnnotation("SqlServer:IdentityIncrement", 1)
                    .HasAnnotation("SqlServer:IdentitySeed", 1)
                    .HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);

                b.Property("ColA")
                    .HasColumnName("ColA")
                    .HasColumnType("int");

                b.Property("ColB")
                    .IsRequired()
                    .HasColumnName("ColB")
                    .HasColumnType("nvarchar(450)");

                b.HasKey("Id");

                b.HasIndex("ColA")
                    .HasName("IX_EntityA_ColA");

                b.HasIndex("ColB")
                    .HasName("IX_EntityA_ColB");

                b.ToTable("EntityA");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
            {
                b.Property("Id")
                    .HasColumnName("Id")
                    .HasColumnType("int");

                b.Property("Col")
                    .IsRequired()
                    .HasColumnName("Col")
                    .HasColumnType("nvarchar(450)");

                b.HasKey("Id");

                b.HasIndex("Col")
                    .HasName("IX_EntityB_Col");

                b.ToTable("EntityB");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
            {
                b.Property("Id")
                    .HasColumnName("Id")
                    .HasColumnType("int");

                b.Property("Col")
                    .HasColumnName("Col")
                    .HasColumnType("int");

                b.HasKey("Id");

                b.HasIndex("Col")
                    .HasName("IX_EntityC_Col");

                b.ToTable("EntityC");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
            {
                b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
                    .WithOne("EntityB")
                    .HasForeignKey("EFCoreBulkInsert.EntityB", "Id")
                    .HasConstraintName("FK_EntityB-Id_EntityA-Id")
                    .OnDelete(DeleteBehavior.Cascade)
                    .IsRequired();
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
            {
                b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
                    .WithOne("EntityC")
                    .HasForeignKey("EFCoreBulkInsert.EntityC", "Id")
                    .HasConstraintName("FK_EntityC-Id_EntityA-Id")
                    .OnDelete(DeleteBehavior.Cascade)
                    .IsRequired();
            });
#pragma warning restore 612, 618
        }
    }

    class Program
    {

        static int EntityCount = 0;
        const int EntityLimit = 200_000_000;

        /// 
        /// Just a fake factory method mimicking the real procedural generation of the db records
        /// 
        /// The next enity to store or null if no more entities can be calculated.
        static EntityA CalculateNextEntity()
        {
            if (EntityCount >= EntityLimit) return null;
            EntityCount++;

            return new EntityA
            {
                // id is generated by the db
                ColA = EntityCount,
                ColB = EntityCount.ToString(),

                EntityB = new EntityB
                {
                    // id is generated by the db
                    Col = EntityCount.ToString()
                },

                EntityC = new EntityC
                {
                    // id is generated by the db
                    Col = EntityCount
                }
            };
        }

        static void Main(string[] args)
        {

            string connectionString = "Server=(localdb)\\mssqllocaldb;Database=TestDB;Trusted_Connection=True;MultipleActiveResultSets=true";

            DbContextOptionsBuilder builder = new DbContextOptionsBuilder();
            builder.UseSqlServer(connectionString);

            using (MyDbContext dbContext = new MyDbContext(builder.Options))
            {

                Console.Write("migrating ... ");

                dbContext.Database.Migrate();

                Console.WriteLine(" done");


                dbContext.ChangeTracker.AutoDetectChangesEnabled = false;

                EntityA entity;

                int count = 0;
                int batchSize = 1000;

                Stopwatch watch = new Stopwatch();
                watch.Start();

                while (true)
                {
                    entity = CalculateNextEntity();
                    if (entity == null) break;

                    count++;

                    dbContext.Add(entity);

                    if (count == batchSize)
                    {
                        count = 0;
                        dbContext.SaveChanges();
                    }

                    Console.WriteLine($"{entity.Id} {entity.ColA} {entity.ColB} {entity.EntityB.Col} {entity.EntityC.Col}");

                }

                dbContext.SaveChanges();
                Console.WriteLine("---");

                int entityCount = dbContext.EntityA.CountAsync().Result;
                Console.WriteLine($"inserted {entityCount} within {watch.ElapsedMilliseconds} milliseconds");

            }


        }
    }
}

Note this is the full working code. Copy and paste into a console app Program.cs and it compiles and runs. Required nugets:

  • Microsoft.EntityFrameworkCore 3.1.0
  • Microsoft.EntityFrameworkCore.Relational 3.1.0
  • Microsoft.EntityFrameworkCore.SqlServer 3.1.0
  • Microsoft.EntityFrameworkCore.Tools 3.1.0
monty
  • 7,888
  • 16
  • 63
  • 100
  • If the generated output always the same i would suggest using backup restore. [Info](https://stackoverflow.com/questions/6267273/how-to-restore-to-a-different-database-in-sql-server) – Eldar Jan 03 '20 at 19:15
  • 1
    I would change CalculateNextEntity to StartingEntities that returns `IEnumerable`. Then install EFCore.BulkExtensions and `dbContext.BulkInsert(StartingEntities());` Should be ~30 times faster. – Robert McKee Jan 03 '20 at 19:51
  • 1
    If you still want to use batchSizes, then look at using https://www.nuget.org/packages/MoreLinq and `foreach(var batch in StartingEntities().Batch(1000)) { dbContext.BulkInsert(batch); }` – Robert McKee Jan 03 '20 at 19:56
  • And remove the console.writeline in your loop. That will greatly affect the performance. Writing to the console is very slow. If you want progress information, then put it in your if statement so it only happens once per batch. I know you said this, but just be aware that it is likely the biggest performance hit of everything in your code, so it will greatly skew any performance measurements you are trying to make. – Robert McKee Jan 03 '20 at 20:37

1 Answers1

3

In my view it is better to avoid EF to insert such amount of rows. It is better to give a try to use bulk insert using DataTable:

using (SqlConnection connection = new SqlConnection(connString))
{
    SqlBulkCopy bulkCopy = new SqlBulkCopy(
        connection, 
        SqlBulkCopyOptions.TableLock | 
        SqlBulkCopyOptions.FireTriggers | 
        SqlBulkCopyOptions.UseInternalTransaction,
        null
        );    

    bulkCopy.DestinationTableName = tableName;
    connection.Open();
    bulkCopy.WriteToServer(dataTable);
    connection.Close();
}

Then just reuse the above code for all entities.

It has some advantages:

  • no need to write unnecessary code, e.g. methods like BuildEntityB
  • Bulk insert is faster then EF. It is very fast.
  • the above code is highly reusable. You can insert any entity without creating auxiliary code which requires Entity Framework
StepUp
  • 36,391
  • 15
  • 88
  • 148
  • I agree that would be one way. But the real model is intended to run against different providers at the end (MSSQL, Sqlite, Postgre, MySql, InMemory) – monty Jan 03 '20 at 19:18
  • 1
    It is the decision of each provider whether or not to provide a bulk copy feature. You wouldn't need bulk copy for an InMemory implementation, MySql has `MySqlBulkLoader`, and PG has `PgSqlLoader ` – Adam Vincent Jan 03 '20 at 19:35