1

I have a TVP+SP insert strategy implemented as i need to insert big amounts of rows (probably concurrently) while being able to get some info in return like Id and stuff. Initially I'm using EF code first approach to generate the DB structure. My entities:

FacilityGroup

public class FacilityGroup
{
    public int Id { get; set; }

    [Required]
    public string Name { get; set; }

    public string InternalNotes { get; set; }

    public virtual List<FacilityInstance> Facilities { get; set; } = new List<FacilityInstance>();
}

FacilityInstance

public class FacilityInstance
{
    public int Id { get; set; }

    [Required]
    [Index("IX_FacilityName")]
    [StringLength(450)]
    public string Name { get; set; }

    [Required]
    public string FacilityCode { get; set; }

    //[Required]
    public virtual FacilityGroup FacilityGroup { get; set; }

    [ForeignKey(nameof(FacilityGroup))]
    [Index("IX_FacilityGroupId")]
    public int FacilityGroupId { get; set; }

    public virtual List<DataBatch> RelatedBatches { get; set; } = new List<DataBatch>();

    public virtual HashSet<BatchRecord> BatchRecords { get; set; } = new HashSet<BatchRecord>();
}

BatchRecord

public class BatchRecord
{
    public long Id { get; set; }

    //todo index?
    public string ItemName { get; set; }

    [Index("IX_Supplier")]
    [StringLength(450)]
    public string Supplier { get; set; }

    public decimal Quantity { get; set; }

    public string ItemUnit { get; set; }

    public string EntityUnit { get; set; }

    public decimal ItemSize { get; set; }

    public decimal PackageSize { get; set; }

    [Index("IX_FamilyCode")]
    [Required]
    [StringLength(4)]
    public string FamilyCode { get; set; }

    [Required]
    public string Family { get; set; }

    [Index("IX_CategoryCode")]
    [Required]
    [StringLength(16)]
    public string CategoryCode { get; set; }

    [Required]
    public string Category { get; set; }

    [Index("IX_SubCategoryCode")]
    [Required]
    [StringLength(16)]
    public string SubCategoryCode { get; set; }

    [Required]
    public string SubCategory { get; set; }

    public string ItemGroupCode { get; set; }

    public string ItemGroup { get; set; }

    public decimal PurchaseValue { get; set; }

    public decimal UnitPurchaseValue { get; set; }

    public decimal PackagePurchaseValue { get; set; }

    [Required]
    public virtual DataBatch DataBatch { get; set; }

    [ForeignKey(nameof(DataBatch))]
    public int DataBatchId { get; set; }

    [Required]
    public virtual FacilityInstance FacilityInstance { get; set; }

    [ForeignKey(nameof(FacilityInstance))]
    [Index("IX_FacilityInstance")]
    public int FacilityInstanceId { get; set; }

    [Required]
    public virtual Currency Currency { get; set; }

    [ForeignKey(nameof(Currency))]
    public int CurrencyId { get; set; }
}

DataBatch

public class DataBatch
{
    public int Id { get; set; }

    [Required]
    public string Name { get; set; }

    public DateTime DateCreated { get; set; }

    public BatchStatus BatchStatus { get; set; }

    public virtual List<FacilityInstance> RelatedFacilities { get; set; } = new List<FacilityInstance>();

    public virtual HashSet<BatchRecord> BatchRecords { get; set; } = new HashSet<BatchRecord>();
}

And then my SQL Server related code, TVP Structure:

CREATE TYPE dbo.RecordImportStructure 
AS TABLE (
ItemName VARCHAR(MAX),
Supplier VARCHAR(MAX),
Quantity DECIMAL(18, 2),
ItemUnit VARCHAR(MAX),
EntityUnit VARCHAR(MAX),
ItemSize DECIMAL(18, 2),
PackageSize DECIMAL(18, 2),
FamilyCode VARCHAR(4),
Family VARCHAR(MAX),
CategoryCode VARCHAR(MAX),
Category VARCHAR(MAX),
SubCategoryCode VARCHAR(MAX),
SubCategory VARCHAR(MAX),
ItemGroupCode VARCHAR(MAX),
ItemGroup VARCHAR(MAX),
PurchaseValue DECIMAL(18, 2),
UnitPurchaseValue DECIMAL(18, 2),
PackagePurchaseValue DECIMAL(18, 2),
FacilityCode VARCHAR(MAX),
CurrencyCode VARCHAR(MAX)
);

Insert stored procedure:

CREATE PROCEDURE dbo.ImportBatchRecords (
    @BatchId INT,
    @ImportTable dbo.RecordImportStructure READONLY
)
AS
SET NOCOUNT ON;

DECLARE     @ErrorCode  int  
DECLARE     @Step  varchar(200)

--Clear old stuff?
--TRUNCATE TABLE dbo.BatchRecords; 

INSERT INTO dbo.BatchRecords (
    ItemName,
    Supplier,
    Quantity,
    ItemUnit,
    EntityUnit,
    ItemSize,
    PackageSize,
    FamilyCode,
    Family,
    CategoryCode,
    Category,
    SubCategoryCode,
    SubCategory,
    ItemGroupCode,
    ItemGroup,
    PurchaseValue,
    UnitPurchaseValue,
    PackagePurchaseValue,
    DataBatchId,
    FacilityInstanceId,
    CurrencyId
)
    OUTPUT INSERTED.Id
    SELECT
    ItemName,
    Supplier,
    Quantity,
    ItemUnit,
    EntityUnit,
    ItemSize,
    PackageSize,
    FamilyCode,
    Family,
    CategoryCode,
    Category,
    SubCategoryCode,
    SubCategory,
    ItemGroupCode,
    ItemGroup,
    PurchaseValue,
    UnitPurchaseValue,
    PackagePurchaseValue,
    @BatchId,
    --FacilityInstanceId,
    --CurrencyId
    (SELECT TOP 1 f.Id from dbo.FacilityInstances f WHERE f.FacilityCode=FacilityCode),
    (SELECT TOP 1 c.Id from dbo.Currencies c WHERE c.CurrencyCode=CurrencyCode) 
    FROM    @ImportTable;

And finally my quick, test only solution to execute this stuff on .NET side.

public class BatchRecordDataHandler : IBulkDataHandler<BatchRecordImportItem>
{
    public async Task<int> ImportAsync(SqlConnection conn, SqlTransaction transaction, IEnumerable<BatchRecordImportItem> src)
    {
        using (var cmd = new SqlCommand())
        {
            cmd.CommandText = "ImportBatchRecords";
            cmd.Connection = conn;
            cmd.Transaction = transaction;
            cmd.CommandType = CommandType.StoredProcedure;
            cmd.CommandTimeout = 600;

            var batchIdParam = new SqlParameter
            {
                ParameterName = "@BatchId",
                SqlDbType = SqlDbType.Int,
                Value = 1
            };

            var tableParam = new SqlParameter
            {
                ParameterName = "@ImportTable",
                TypeName = "dbo.RecordImportStructure",
                SqlDbType = SqlDbType.Structured,
                Value = DataToSqlRecords(src)
            };

            cmd.Parameters.Add(batchIdParam);
            cmd.Parameters.Add(tableParam);

            cmd.Transaction = transaction;

            using (var res = await cmd.ExecuteReaderAsync())
            {
                var resultTable = new DataTable();
                resultTable.Load(res);

                var cnt = resultTable.AsEnumerable().Count();

                return cnt;
            }
        }
    }

    private IEnumerable<SqlDataRecord> DataToSqlRecords(IEnumerable<BatchRecordImportItem> src)
    {
        var tvpSchema = new[] {
            new SqlMetaData("ItemName", SqlDbType.VarChar, SqlMetaData.Max),
            new SqlMetaData("Supplier", SqlDbType.VarChar, SqlMetaData.Max),
            new SqlMetaData("Quantity", SqlDbType.Decimal),
            new SqlMetaData("ItemUnit", SqlDbType.VarChar, SqlMetaData.Max),
            new SqlMetaData("EntityUnit", SqlDbType.VarChar, SqlMetaData.Max),
            new SqlMetaData("ItemSize", SqlDbType.Decimal),
            new SqlMetaData("PackageSize", SqlDbType.Decimal),
            new SqlMetaData("FamilyCode", SqlDbType.VarChar, SqlMetaData.Max),
            new SqlMetaData("Family", SqlDbType.VarChar, SqlMetaData.Max),
            new SqlMetaData("CategoryCode", SqlDbType.VarChar, SqlMetaData.Max),
            new SqlMetaData("Category", SqlDbType.VarChar, SqlMetaData.Max),
            new SqlMetaData("SubCategoryCode", SqlDbType.VarChar, SqlMetaData.Max),
            new SqlMetaData("SubCategory", SqlDbType.VarChar, SqlMetaData.Max),
            new SqlMetaData("ItemGroupCode", SqlDbType.VarChar, SqlMetaData.Max),
            new SqlMetaData("ItemGroup", SqlDbType.VarChar, SqlMetaData.Max),
            new SqlMetaData("PurchaseValue", SqlDbType.Decimal),
            new SqlMetaData("UnitPurchaseValue", SqlDbType.Decimal),
            new SqlMetaData("PackagePurchaseValue", SqlDbType.Decimal),
            new SqlMetaData("FacilityInstanceId", SqlDbType.VarChar, SqlMetaData.Max),
            new SqlMetaData("CurrencyId", SqlDbType.VarChar, SqlMetaData.Max),
        };

        var dataRecord = new SqlDataRecord(tvpSchema);

        foreach (var importItem in src)
        {
            dataRecord.SetValues(importItem.ItemName,
                importItem.Supplier,
                importItem.Quantity,
                importItem.ItemUnit,
                importItem.EntityUnit,
                importItem.ItemSize,
                importItem.PackageSize,
                importItem.FamilyCode,
                importItem.Family,
                importItem.CategoryCode,
                importItem.Category,
                importItem.SubCategoryCode,
                importItem.SubCategory,
                importItem.ItemGroupCode,
                importItem.ItemGroup,
                importItem.PurchaseValue,
                importItem.UnitPurchaseValue,
                importItem.PackagePurchaseValue,
                importItem.FacilityCode,
                importItem.CurrencyCode);

            yield return dataRecord;
        }
    }
}

Import entity structure:

public class BatchRecordImportItem
{
    public string ItemName { get; set; }

    public string Supplier { get; set; }

    public decimal Quantity { get; set; }

    public string ItemUnit { get; set; }

    public string EntityUnit { get; set; }

    public decimal ItemSize { get; set; }

    public decimal PackageSize { get; set; }

    public string FamilyCode { get; set; }

    public string Family { get; set; }

    public string CategoryCode { get; set; }

    public string Category { get; set; }

    public string SubCategoryCode { get; set; }

    public string SubCategory { get; set; }

    public string ItemGroupCode { get; set; }

    public string ItemGroup { get; set; }

    public decimal PurchaseValue { get; set; }

    public decimal UnitPurchaseValue { get; set; }

    public decimal PackagePurchaseValue { get; set; }

    public int DataBatchId { get; set; }

    public string FacilityCode { get; set; }

    public string CurrencyCode { get; set; }
}

Please don't mind useless reader at the end, doesn't really do much. So without the reader inserting 2.5kk rows took around 26 minutes while SqlBulkCopy took around 6+- minutes. Is there something I'm doing fundamentally wrong? I’m using IsolationLevel.Snapshot if this matters. Using SQL Server 2014, free to change DB structure and indices.

UPD 1


Done a couple of adjustments/improvement attempts described by @Xedni, specifically:

  1. Limited all string fields that didn't have a max length to some fixed length
  2. Changed all TVP members from VARCHAR(MAX) to VARCHAR(*SomeValue*)
  3. Added a unique index to FacilityInstance->FacilityCode
  4. Added a unique index to Curreency->CurrencyCode
  5. Tried adding WITH RECOMPILE to my SP
  6. Tried using DataTable instead of IEnumerable<SqlDataRecord>
  7. Tried batchinng data into smaller buckets, 50k and 100k per SP execution instead of 2.5kk

My structure is now like this:

CREATE TYPE dbo.RecordImportStructure 
AS TABLE (
ItemName VARCHAR(4096),
Supplier VARCHAR(450),
Quantity DECIMAL(18, 2),
ItemUnit VARCHAR(2048),
EntityUnit VARCHAR(2048),
ItemSize DECIMAL(18, 2),
PackageSize DECIMAL(18, 2),
FamilyCode VARCHAR(16),
Family VARCHAR(512),
CategoryCode VARCHAR(16),
Category VARCHAR(512),
SubCategoryCode VARCHAR(16),
SubCategory VARCHAR(512),
ItemGroupCode VARCHAR(16),
ItemGroup VARCHAR(512),
PurchaseValue DECIMAL(18, 2),
UnitPurchaseValue DECIMAL(18, 2),
PackagePurchaseValue DECIMAL(18, 2),
FacilityCode VARCHAR(450),
CurrencyCode VARCHAR(4)
);

So far no noticeable performance gains unfortunately, 26-28 min as before


UPD 2
Checked the execution plan - indices are my bane? EXE_PLAN


UPD 3
Added OPTION (RECOMPILE); at the end of my SP, gained a minor boost, now sitting at ~25m for 2.5kk

HardLuck
  • 1,497
  • 1
  • 22
  • 43
  • Show execution plan – Backs Mar 22 '18 at 23:58
  • Do you have a trace running (SQL Trace or Extended Events) that captures rpc events? `varchar(MAX)` columns are very costly in that case. See [SQL Server TVP Performance Gotcha](http://www.dbdelta.com/sql-server-tvp-performance-gotchas/). If you must use `MAX` types, avoid capturing the event in a trace. – Dan Guzman Mar 23 '18 at 09:27
  • what is the purpose of so big output clause ?can you comment and check or can you just comment those "2 select top 1".Index is bane or boon depend whethere there is unneccasry index or unutilise index and so many other thing. – KumarHarsh Mar 23 '18 at 09:59
  • i think datatable is best suited for this occasion.yes if you have single proc to execute in c# code then don't use transaction.use transaction in sql only if you have multiple insert or update. – KumarHarsh Mar 23 '18 at 10:04
  • Any way you can add an auto-number (Identity) field and make that the primary key? It would speed up your inserts significantly. That being said, I design my own procedures and try not to use Entity Framework for this kind of reason. – Jason Geiger Mar 23 '18 at 14:23
  • @JasonGeiger Id is an autoincreement numeric identity already – HardLuck Mar 23 '18 at 15:07
  • @DanGuzman I got rid of all varchar(max) already – HardLuck Mar 23 '18 at 15:08
  • Is it proprietary or can you put a stripped down version on DropBox or something so I can take a look? Don't ever give out anything you need to keep secret but in case it isn't, I or somebody else might be able to point out the issue quickly. – Jason Geiger Mar 23 '18 at 18:43
  • It seems you have a clustered index around. These guys are meant for good read performance, but can be a pain for massive insertions. I would remove them during insert (it may take awhile when you reenable it) – Simon Mourier Mar 23 '18 at 21:05
  • 1
    @HardLuck You could try to enable **[traceflag 2453](https://support.microsoft.com/en-us/help/2952444/fix-poor-performance-when-you-use-table-variables-in-sql-server-2012-o)** Please let me know if it helps – Lukasz Szozda Mar 24 '18 at 11:04
  • 1
    @lad2025 this actually improved the performance by ~20%, great advice! – HardLuck Mar 24 '18 at 18:45
  • You need to show us the stored procedure in question. – RBarryYoung Mar 26 '18 at 15:15

5 Answers5

3

You could set traceflag 2453:

FIX: Poor performance when you use table variables in SQL Server 2012 or SQL Server 2014

When you use a table variable in a batch or procedure, the query is compiled and optimized for the initial empty state of table variable. If this table variable is populated with many rows at runtime, the pre-compiled query plan may no longer be optimal. For example, the query may be joining a table variable with nested loop since it is usually more efficient for small number of rows. This query plan can be inefficient if the table variable has millions of rows. A hash join may be a better choice under such condition. To get a new query plan, it needs to be recompiled. Unlike other user or temporary tables, however, row count change in a table variable does not trigger a query recompile. Typically, you can work around this with OPTION (RECOMPILE), which has its own overhead cost. The trace flag 2453 allows the benefit of query recompile without OPTION (RECOMPILE). This trace flag differs from OPTION (RECOMPILE) in two main aspects. (1) It uses the same row count threshold as other tables. The query does not need to be compiled for every execution unlike OPTION (RECOMPILE). It would trigger recompile only when the row count change exceeds the predefined threshold. (2) OPTION (RECOMPILE) forces the query to peek parameters and optimize the query for them. This trace flag does not force parameter peeking.

You can turn on trace flag 2453 to allow a table variable to trigger recompile when enough number of rows are changed. This may allow the query optimizer to choose a more efficient plan

Lukasz Szozda
  • 162,964
  • 23
  • 234
  • 275
2

I would guess your proc could use some love. Without seeing an execution plan its hard to say for sure, but here are some thoughts.

A table variable (which a table-valued-parameter essentially is) is always assumed by SQL Server to contain exactly 1 row (even if it doesn't). This is irrelevant for many cases, but you have two correlated subqueries in your insert list which is where I'd focus my attention. It's more than likely hammering that poor table variable with a bunch of nested loop joins because of the cardinality estimate. I would consider putting the rows from your TVP into a temp table, updating the temp table with the IDs from FacilityInstances and Currencies then do your final insert from that.

Xedni
  • 3,662
  • 2
  • 16
  • 27
  • tried this, the performance was slightly better but still nowhere near `SqlBulkCopy` – HardLuck Mar 21 '18 at 11:01
  • Right, which unfortunately is the point you originally made; that SqlBulkCopy is super fast and super efficient, but handled in a completely different way. You'll never be able to approach that level of speed with anything that goes through regular old SQL. Where is the timing becoming an issue? Is it hanging a web client? Or is it just an "id like to have this faster" kind of thing? – Xedni Mar 21 '18 at 15:50
  • Also do you have indexes supporting those Currency and FacilityInstances queries? if not, that might be a quick easy win too. – Xedni Mar 21 '18 at 15:52
  • There is going to be a service that will be uploading records. The speed is kind of an issue as there is going to be other processing don and not only uploading, so the speed is definitely an issue, I could leave with 2.5kk in 6 minutes but bulk copy doesn’t provide any control. As for the indices - facility code and currency code are not indexed indeed! I'll give them a try – HardLuck Mar 21 '18 at 17:28
  • Also, unless you *really* need them to be that way, I'd change some of those `varchar(max)` columns to something smaller (even if it's varchar(8000), but if you can, size them closer to what you expect the data to look like). `Varchar(max)` can cause some unexpected slowness depending on what your data looks like. It may or may not be treated as BLOB data and stored off row. You also can't rebuild any indexes which contain varchar(max) columns "online" – Xedni Mar 21 '18 at 19:16
2

Try with the following stored procedure:

CREATE PROCEDURE dbo.ImportBatchRecords (
    @BatchId INT,
    @ImportTable dbo.RecordImportStructure READONLY
)
AS
    SET NOCOUNT ON;

    DECLARE     @ErrorCode  int  
    DECLARE     @Step  varchar(200)


    CREATE TABLE #FacilityInstances
    (
        Id int NOT NULL,
        FacilityCode varchar(512) NOT NULL UNIQUE WITH (IGNORE_DUP_KEY=ON)
    );

    CREATE TABLE #Currencies
    (
        Id int NOT NULL,
        CurrencyCode varchar(512) NOT NULL UNIQUE WITH (IGNORE_DUP_KEY = ON)
    )

    INSERT INTO #FacilityInstances(Id, FacilityCode)
    SELECT Id, FacilityCode FROM dbo.FacilityInstances
    WHERE FacilityCode IS NOT NULL AND Id IS NOT NULL;

    INSERT INTO #Currencies(Id, CurrencyCode)
    SELECT Id, CurrencyCode FROM dbo.Currencies
    WHERE CurrencyCode IS NOT NULL AND Id IS NOT NULL


    INSERT INTO dbo.BatchRecords (
        ItemName,
        Supplier,
        Quantity,
        ItemUnit,
        EntityUnit,
        ItemSize,
        PackageSize,
        FamilyCode,
        Family,
        CategoryCode,
        Category,
        SubCategoryCode,
        SubCategory,
        ItemGroupCode,
        ItemGroup,
        PurchaseValue,
        UnitPurchaseValue,
        PackagePurchaseValue,
        DataBatchId,
        FacilityInstanceId,
        CurrencyId
    )
    OUTPUT INSERTED.Id
    SELECT
        ItemName,
        Supplier,
        Quantity,
        ItemUnit,
        EntityUnit,
        ItemSize,
        PackageSize,
        FamilyCode,
        Family,
        CategoryCode,
        Category,
        SubCategoryCode,
        SubCategory,
        ItemGroupCode,
        ItemGroup,
        PurchaseValue,
        UnitPurchaseValue,
        PackagePurchaseValue,
        @BatchId,
        F.Id,
        C.Id
    FROM   
        #FacilityInstances F RIGHT OUTER HASH JOIN 
        (
            #Currencies C 
            RIGHT OUTER HASH JOIN @ImportTable IT 
                ON C.CurrencyCode = IT.CurrencyCode
        )
        ON F.FacilityCode = IT.FacilityCode

This enforces the execution plan to use hash match joins instead of nested loops. I think the culprit of bad performance is the first nested loop that performs an index scan for each row in @ImportTable

I don't know if CurrencyCode is unique in Currencies table, so I create the temporal table #Currencies with unique currency codes.

I don't know if FacilityCode is unique in Facilities table, so I create the temporal table #FacilityInstances with unique facility codes.

If they are unique you don't need the temporal tables, you can use the permanent tables directly.

Assuming CurrencyCode and FacilityCode are unique the following stored procedure would be better because it doesn't create unnecessary temporary tables:

CREATE PROCEDURE dbo.ImportBatchRecords (
    @BatchId INT,
    @ImportTable dbo.RecordImportStructure READONLY
)
AS
    SET NOCOUNT ON;

    DECLARE     @ErrorCode  int  
    DECLARE     @Step  varchar(200)



    INSERT INTO dbo.BatchRecords (
        ItemName,
        Supplier,
        Quantity,
        ItemUnit,
        EntityUnit,
        ItemSize,
        PackageSize,
        FamilyCode,
        Family,
        CategoryCode,
        Category,
        SubCategoryCode,
        SubCategory,
        ItemGroupCode,
        ItemGroup,
        PurchaseValue,
        UnitPurchaseValue,
        PackagePurchaseValue,
        DataBatchId,
        FacilityInstanceId,
        CurrencyId
    )
    OUTPUT INSERTED.Id
    SELECT
        ItemName,
        Supplier,
        Quantity,
        ItemUnit,
        EntityUnit,
        ItemSize,
        PackageSize,
        FamilyCode,
        Family,
        CategoryCode,
        Category,
        SubCategoryCode,
        SubCategory,
        ItemGroupCode,
        ItemGroup,
        PurchaseValue,
        UnitPurchaseValue,
        PackagePurchaseValue,
        @BatchId,
        F.Id,
        C.Id
    FROM   
        dbo.FacilityInstances F RIGHT OUTER HASH JOIN 
        (
            dbo.Currencies C 
            RIGHT OUTER HASH JOIN @ImportTable IT 
                ON C.CurrencyCode = IT.CurrencyCode
        )
        ON F.FacilityCode = IT.FacilityCode
Jesús López
  • 8,338
  • 7
  • 40
  • 66
  • Thank you for your input, `CurrencyCode` and `FacilityCode` are both unique, can this solution work concurrently with multiple clients calling this same SP? – HardLuck Mar 27 '18 at 11:16
  • Yes, I don't see any reason why it would not work concurrenty. I updated my answer to include stored procedure assuming codes uniqueness – Jesús López Mar 27 '18 at 12:32
  • @HardLuck. If FacilityInstances table is under 20,000 rows, and ImportTable is under 10krows, I bet this stored procedure runs under 15 seconds. – Jesús López Mar 27 '18 at 12:42
  • @HardLuck. Of course I'm assuming BatchRecords table has no triggers ruining performance. – Jesús López Mar 27 '18 at 12:52
  • 1
    @HardLuck. I noticed some weird things. If `CurrenyCode` and `FacilityCode` are unique, why do you specify TOP 1? And Why don't you use JOIN instead of (SELECT TOP 1 ...)?. Are there unique constraints or unique indexes to enforce uniqueness? It would be the way to go. – Jesús López Mar 27 '18 at 16:13
  • 1
    @HadLuck. Another thing that I find weird is declaring `FacilityCode`and `CurrencyCode` as `varchar(max)` in `RecordImportStructure` if they are codes they should be declared as `varchar()`. How are they declared in the permanent tables? – Jesús López Mar 27 '18 at 16:18
  • The initial post is not very up to date, i've changed the codes to have fixed lengths – HardLuck Mar 28 '18 at 11:27
  • @HardLuck. I'm just curious about how much performance improvement have you got with this solution. Could you tell me, please? – Jesús López Mar 28 '18 at 13:29
  • ~33% with other minor optimizations – HardLuck Apr 01 '18 at 00:44
1

Well... why not just use SQL Bulk Copy? There's plenty of solutions out there that help you convert a collection of entities into a IDataReader object that can be handed directly to SqlBulkCopy.

This is a good start...

https://github.com/matthewschrager/Repository/blob/master/Repository.EntityFramework/EntityDataReader.cs

Then it becomes as simple as...

SqlBulkCopy bulkCopy = new SqlBulkCopy(connection);
IDataReader dataReader = storeEntities.AsDataReader();
bulkCopy.WriteToServer(dataReader);

I've used this code, the one caveat is that you need to be quite careful about the definition of your entity. The order of the properties in the entity determines the order of the columns exposed by the IDataReader and this needs to correlate with the order of the columns in the table that you are bulk copying to.

Alternatively there's other code here..

https://www.codeproject.com/Tips/1114089/Entity-Framework-Performance-Tuning-Using-SqlBulkC

Mick
  • 6,527
  • 4
  • 52
  • 67
  • 1
    Because i need the output results back – HardLuck Mar 23 '18 at 00:04
  • my SP produces output, for the sake of example it's the value of identity, i need to have the ability to read the output back – HardLuck Mar 23 '18 at 00:35
  • @HardLuck perhaps you could use the method described here... https://stackoverflow.com/questions/2945414/possible-to-get-primaykey-ids-back-after-a-sql-bulkcopy – Mick Mar 23 '18 at 01:03
  • 1
    @HardLuck so you would do it in two operations... a SqlBulkCopy into a staging table to get the data into sql server as fast as possible then another operation to insert the records from the staging table to the target table which returns your output – Mick Mar 23 '18 at 01:04
0

I know there is an accepted answer, but I can't resist. I believe you can improve the performance 20-50% over the accepted answer.

The key is to SqlBulkCopy to the final table dbo.BatchRecords directly.

To make this happen you need FacilityInstanceId and CurrencyId before to SqlBulkCopy. To get them, load SELECT Id, FacilityCode FROM FacilityIntances and SELECT Id, CurrencyCode FROM Currencies into collections, then build a dictionary:

var facilityIdByFacilityCode = facilitiesCollection.ToDictionary(x => x.FacilityCode, x => x.Id);
var currencyIdByCurrencyCode = currenciesCollection.ToDictionnary(x => x.CurrencyCode, x => x.Id);

Once you have the dictionaries, getting the id's from the codes is constant time cost. This is equivalent and very similar to HASH MATCH JOIN in SQL Server, but at the client side.

The other barrier you need to tear down is to get the Id column of new inserted rows in dbo.BatchRecords table. Actually can you get the Ids before inserting them.

Make the Id column "sequence driven":

CREATE SEQUENCE BatchRecords_Id_Seq START WITH 1;
CREATE TABLE BatchRecords
(
   Id int NOT NULL CONSTRAINT DF_BatchRecords_Id DEFAULT (NEXT VALUE FOR BatchRecords_Id_Seq), 

 .....

   CONSTRAINT PK_BatchRecords PRIMARY KEY (Id)

)

One you have the BatchRecords collection, you know how many records are in it. You can then reserve a contiguous range of sequences. Execute the following T-SQL:

DECLARE @BatchCollectionCount int = 2500 -- Replace with the actual value
DECLARE @range_first_value sql_variant
DECLARE @range_last_value sql_variant

EXEC sp_sequence_get_range
     @sequence_name =  N'BatchRecords_Id_Seq', 
     @range_size =  @BatchCollectionCount,
     @range_first_value = @range_first_value OUTPUT, 
     @range_last_value = @range_last_value OUTPUT

SELECT 
    CAST(@range_first_value AS INT) AS range_first_value, 
    CAST(@range_last_value AS int) as range_last_value

This returns range_first_value and range_last_value. You can now assign BatchRecord.Id to each record:

int id = range_first_value;
foreach (var record in batchRecords)
{
   record.Id = id++;
} 

Next, you can SqlBulkCopy the batch record collection directly into the final table dbo.BatchRecords.

To get a DataReader from an IEnumerable<T> to feed SqlBulkCopy.WriteToServer you can use code like this which is part of EntityLite, a micro ORM I developed.

You can make it even faster if you cache facilityIdByFacilityCode and currencyIdByCurrencyCode. To be sure these dictionaries are up to date you can use SqlDependencyor techniques like this one.

Jesús López
  • 8,338
  • 7
  • 40
  • 66