EF Core: inserting large amounts of data

c# entity-framework-core

Question

Yes, there is already the question Bulk Insert Optimization in .NET / EF Core. But this question is different.


I ran into a scenario where I have to create a DB and populate it with processed/calculated/generated data. The amount of records is between 100 millions and 200 millions of records.

Since seeding the data takes ages I can not do it on startup (of an web api). So I created a separated console application migrating the database and seeding it.

It still takes ages (I even haven't finished a single run of seeding yet).

Anything else what I can optimize in the main method (except removing console logs which are only there for debugging at the moment).


using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Metadata.Builders;
using Microsoft.EntityFrameworkCore.Migrations;
using System;
using System.Diagnostics;

namespace EFCoreBulkInsert
{

    class EntityA
    {

        public int Id { get; set; }
        public int ColA { get; set; }
        public string ColB { get; set; }

        public EntityB EntityB { get; set; }
        public EntityC EntityC { get; set; }

    }

    class EntityB
    {

        public int Id { get; set; }
        public string Col { get; set; }

        public EntityA EntityA { get; set; }

    }

    class EntityC
    {

        public int Id { get; set; }
        public int Col { get; set; }

        public EntityA EntityA { get; set; }

    }

    class MyDbContext : DbContext
    {

        public DbSet EntityA { get; set; }
        public DbSet EntityB { get; set; }
        public DbSet EntityC { get; set; }

        public MyDbContext(DbContextOptions options) : base(options) { }

        protected override void OnModelCreating(ModelBuilder builder)
        {
            builder.Entity(BuildEntityA);
            builder.Entity(BuildEntityB);
            builder.Entity(BuildEntityC);
        }

        protected virtual void BuildEntityA(EntityTypeBuilder entity)
        {

            entity
                .ToTable("EntityA")
                .HasKey(e => e.Id);

            entity.Property(e => e.Id)
                .HasColumnName("Id")
                .UseIdentityColumn(1, 1)
                .Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore);

            entity.Property(e => e.ColA)
                .HasColumnName("ColA")
                .IsRequired();

            entity.HasIndex(e => e.ColA)
                .HasName("IX_EntityA_ColA");

            entity.Property(e => e.ColB)
                .HasColumnName("ColB")
                .IsRequired();

            entity.HasIndex(e => e.ColB)
                .HasName("IX_EntityA_ColB");

            entity.HasOne(e => e.EntityB)
                .WithOne(e => e.EntityA);

            entity.HasOne(e => e.EntityC)
                .WithOne(e => e.EntityA);

        }

        protected virtual void BuildEntityB(EntityTypeBuilder entity)
        {

            entity
                .ToTable("EntityB")
                .HasKey(e => e.Id);

            entity.Property(e => e.Id)
                .HasColumnName("Id")
                //.UseIdentityColumn(1, 1)
                //.Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore)
                ;

            entity.Property(e => e.Col)
                .HasColumnName("Col")
                .IsRequired();

            entity.HasIndex(e => e.Col)
                .HasName("IX_EntityB_Col");

            entity.HasOne(e => e.EntityA)
                .WithOne(e => e.EntityB)
                .HasForeignKey(e => e.Id)
                .HasConstraintName("FK_EntityB-Id_EntityA-Id")
                .OnDelete(DeleteBehavior.Cascade);

        }

        protected virtual void BuildEntityC(EntityTypeBuilder entity)
        {

            entity
                .ToTable("EntityC")
                .HasKey(e => e.Id);

            entity.Property(e => e.Id)
                .HasColumnName("Id")
                //.UseIdentityColumn(1, 1)
                //.Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore)
                ;

            entity.Property(e => e.Col)
                .HasColumnName("Col")
                .IsRequired();

            entity.HasIndex(e => e.Col)
                .HasName("IX_EntityC_Col");

            entity.HasOne(e => e.EntityA)
                .WithOne(e => e.EntityC)
                .HasForeignKey(e => e.Id)
                .HasConstraintName("FK_EntityC-Id_EntityA-Id")
                .OnDelete(DeleteBehavior.Cascade);

        }

    }

    [DbContext(typeof(MyDbContext))]
    partial class MyDbContextModelSnapshot : ModelSnapshot
    {
        protected override void BuildModel(ModelBuilder modelBuilder)
        {
#pragma warning disable 612, 618
            modelBuilder
                .HasAnnotation("ProductVersion", "3.1.0")
                .HasAnnotation("Relational:MaxIdentifierLength", 128)
                .HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);

            modelBuilder.Entity("EFCoreBulkInsert.EntityA", b =>
            {
                b.Property("Id")
                    .ValueGeneratedOnAdd()
                    .HasColumnName("Id")
                    .HasColumnType("int")
                    .HasAnnotation("SqlServer:IdentityIncrement", 1)
                    .HasAnnotation("SqlServer:IdentitySeed", 1)
                    .HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);

                b.Property("ColA")
                    .HasColumnName("ColA")
                    .HasColumnType("int");

                b.Property("ColB")
                    .IsRequired()
                    .HasColumnName("ColB")
                    .HasColumnType("nvarchar(450)");

                b.HasKey("Id");

                b.HasIndex("ColA")
                    .HasName("IX_EntityA_ColA");

                b.HasIndex("ColB")
                    .HasName("IX_EntityA_ColB");

                b.ToTable("EntityA");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
            {
                b.Property("Id")
                    .HasColumnName("Id")
                    .HasColumnType("int");

                b.Property("Col")
                    .IsRequired()
                    .HasColumnName("Col")
                    .HasColumnType("nvarchar(450)");

                b.HasKey("Id");

                b.HasIndex("Col")
                    .HasName("IX_EntityB_Col");

                b.ToTable("EntityB");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
            {
                b.Property("Id")
                    .HasColumnName("Id")
                    .HasColumnType("int");

                b.Property("Col")
                    .HasColumnName("Col")
                    .HasColumnType("int");

                b.HasKey("Id");

                b.HasIndex("Col")
                    .HasName("IX_EntityC_Col");

                b.ToTable("EntityC");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
            {
                b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
                    .WithOne("EntityB")
                    .HasForeignKey("EFCoreBulkInsert.EntityB", "Id")
                    .HasConstraintName("FK_EntityB-Id_EntityA-Id")
                    .OnDelete(DeleteBehavior.Cascade)
                    .IsRequired();
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
            {
                b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
                    .WithOne("EntityC")
                    .HasForeignKey("EFCoreBulkInsert.EntityC", "Id")
                    .HasConstraintName("FK_EntityC-Id_EntityA-Id")
                    .OnDelete(DeleteBehavior.Cascade)
                    .IsRequired();
            });
#pragma warning restore 612, 618
        }
    }

    public partial class Initial : Migration
    {
        protected override void Up(MigrationBuilder migrationBuilder)
        {
            migrationBuilder.CreateTable(
                name: "EntityA",
                columns: table => new
                {
                    Id = table.Column(nullable: false)
                        .Annotation("SqlServer:Identity", "1, 1"),
                    ColA = table.Column(nullable: false),
                    ColB = table.Column(nullable: false)
                },
                constraints: table =>
                {
                    table.PrimaryKey("PK_EntityA", x => x.Id);
                });

            migrationBuilder.CreateTable(
                name: "EntityB",
                columns: table => new
                {
                    Id = table.Column(nullable: false),
                    Col = table.Column(nullable: false)
                },
                constraints: table =>
                {
                    table.PrimaryKey("PK_EntityB", x => x.Id);
                    table.ForeignKey(
                        name: "FK_EntityB-Id_EntityA-Id",
                        column: x => x.Id,
                        principalTable: "EntityA",
                        principalColumn: "Id",
                        onDelete: ReferentialAction.Cascade);
                });

            migrationBuilder.CreateTable(
                name: "EntityC",
                columns: table => new
                {
                    Id = table.Column(nullable: false),
                    Col = table.Column(nullable: false)
                },
                constraints: table =>
                {
                    table.PrimaryKey("PK_EntityC", x => x.Id);
                    table.ForeignKey(
                        name: "FK_EntityC-Id_EntityA-Id",
                        column: x => x.Id,
                        principalTable: "EntityA",
                        principalColumn: "Id",
                        onDelete: ReferentialAction.Cascade);
                });

            migrationBuilder.CreateIndex(
                name: "IX_EntityA_ColA",
                table: "EntityA",
                column: "ColA");

            migrationBuilder.CreateIndex(
                name: "IX_EntityA_ColB",
                table: "EntityA",
                column: "ColB");

            migrationBuilder.CreateIndex(
                name: "IX_EntityB_Col",
                table: "EntityB",
                column: "Col");

            migrationBuilder.CreateIndex(
                name: "IX_EntityC_Col",
                table: "EntityC",
                column: "Col");
        }

        protected override void Down(MigrationBuilder migrationBuilder)
        {
            migrationBuilder.DropTable(
                name: "EntityB");

            migrationBuilder.DropTable(
                name: "EntityC");

            migrationBuilder.DropTable(
                name: "EntityA");
        }
    }

    [DbContext(typeof(MyDbContext))]
    [Migration("20200103182223_Initial")]
    partial class Initial
    {
        protected override void BuildTargetModel(ModelBuilder modelBuilder)
        {
#pragma warning disable 612, 618
            modelBuilder
                .HasAnnotation("ProductVersion", "3.1.0")
                .HasAnnotation("Relational:MaxIdentifierLength", 128)
                .HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);

            modelBuilder.Entity("EFCoreBulkInsert.EntityA", b =>
            {
                b.Property("Id")
                    .ValueGeneratedOnAdd()
                    .HasColumnName("Id")
                    .HasColumnType("int")
                    .HasAnnotation("SqlServer:IdentityIncrement", 1)
                    .HasAnnotation("SqlServer:IdentitySeed", 1)
                    .HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);

                b.Property("ColA")
                    .HasColumnName("ColA")
                    .HasColumnType("int");

                b.Property("ColB")
                    .IsRequired()
                    .HasColumnName("ColB")
                    .HasColumnType("nvarchar(450)");

                b.HasKey("Id");

                b.HasIndex("ColA")
                    .HasName("IX_EntityA_ColA");

                b.HasIndex("ColB")
                    .HasName("IX_EntityA_ColB");

                b.ToTable("EntityA");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
            {
                b.Property("Id")
                    .HasColumnName("Id")
                    .HasColumnType("int");

                b.Property("Col")
                    .IsRequired()
                    .HasColumnName("Col")
                    .HasColumnType("nvarchar(450)");

                b.HasKey("Id");

                b.HasIndex("Col")
                    .HasName("IX_EntityB_Col");

                b.ToTable("EntityB");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
            {
                b.Property("Id")
                    .HasColumnName("Id")
                    .HasColumnType("int");

                b.Property("Col")
                    .HasColumnName("Col")
                    .HasColumnType("int");

                b.HasKey("Id");

                b.HasIndex("Col")
                    .HasName("IX_EntityC_Col");

                b.ToTable("EntityC");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
            {
                b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
                    .WithOne("EntityB")
                    .HasForeignKey("EFCoreBulkInsert.EntityB", "Id")
                    .HasConstraintName("FK_EntityB-Id_EntityA-Id")
                    .OnDelete(DeleteBehavior.Cascade)
                    .IsRequired();
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
            {
                b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
                    .WithOne("EntityC")
                    .HasForeignKey("EFCoreBulkInsert.EntityC", "Id")
                    .HasConstraintName("FK_EntityC-Id_EntityA-Id")
                    .OnDelete(DeleteBehavior.Cascade)
                    .IsRequired();
            });
#pragma warning restore 612, 618
        }
    }

    class Program
    {

        static int EntityCount = 0;
        const int EntityLimit = 200_000_000;

        /// 
        /// Just a fake factory method mimicking the real procedural generation of the db records
        /// 
        /// The next enity to store or null if no more entities can be calculated.
        static EntityA CalculateNextEntity()
        {
            if (EntityCount >= EntityLimit) return null;
            EntityCount++;

            return new EntityA
            {
                // id is generated by the db
                ColA = EntityCount,
                ColB = EntityCount.ToString(),

                EntityB = new EntityB
                {
                    // id is generated by the db
                    Col = EntityCount.ToString()
                },

                EntityC = new EntityC
                {
                    // id is generated by the db
                    Col = EntityCount
                }
            };
        }

        static void Main(string[] args)
        {

            string connectionString = "Server=(localdb)\\mssqllocaldb;Database=TestDB;Trusted_Connection=True;MultipleActiveResultSets=true";

            DbContextOptionsBuilder builder = new DbContextOptionsBuilder();
            builder.UseSqlServer(connectionString);

            using (MyDbContext dbContext = new MyDbContext(builder.Options))
            {

                Console.Write("migrating ... ");

                dbContext.Database.Migrate();

                Console.WriteLine(" done");


                dbContext.ChangeTracker.AutoDetectChangesEnabled = false;

                EntityA entity;

                int count = 0;
                int batchSize = 1000;

                Stopwatch watch = new Stopwatch();
                watch.Start();

                while (true)
                {
                    entity = CalculateNextEntity();
                    if (entity == null) break;

                    count++;

                    dbContext.Add(entity);

                    if (count == batchSize)
                    {
                        count = 0;
                        dbContext.SaveChanges();
                    }

                    Console.WriteLine($"{entity.Id} {entity.ColA} {entity.ColB} {entity.EntityB.Col} {entity.EntityC.Col}");

                }

                dbContext.SaveChanges();
                Console.WriteLine("---");

                int entityCount = dbContext.EntityA.CountAsync().Result;
                Console.WriteLine($"inserted {entityCount} within {watch.ElapsedMilliseconds} milliseconds");

            }


        }
    }
}

Note this is the full working code. Copy and paste into a console app Program.cs and it compiles and runs. Required nugets:

  • Microsoft.EntityFrameworkCore 3.1.0
  • Microsoft.EntityFrameworkCore.Relational 3.1.0
  • Microsoft.EntityFrameworkCore.SqlServer 3.1.0
  • Microsoft.EntityFrameworkCore.Tools 3.1.0
1
1
1/3/2020 7:01:00 PM

Popular Answer

In my view it is better to avoid EF to insert such amount of rows. It is better to give a try to use bulk insert using DataTable:

using (SqlConnection connection = new SqlConnection(connString))
{
    SqlBulkCopy bulkCopy = new SqlBulkCopy(
        connection, 
        SqlBulkCopyOptions.TableLock | 
        SqlBulkCopyOptions.FireTriggers | 
        SqlBulkCopyOptions.UseInternalTransaction,
        null
        );    

    bulkCopy.DestinationTableName = tableName;
    connection.Open();
    bulkCopy.WriteToServer(dataTable);
    connection.Close();
}

Then just reuse the above code for all entities.

It has some advantages:

  • no need to write unnecessary code, e.g. methods like BuildEntityB
  • Bulk insert is faster then EF. It is very fast.
  • the above code is highly reusable. You can insert any entity without creating auxiliary code which requires Entity Framework
2
1/3/2020 7:11:12 PM


Related Questions





Related

Licensed under: CC-BY-SA with attribution
Not affiliated with Stack Overflow
Licensed under: CC-BY-SA with attribution
Not affiliated with Stack Overflow