1

I have an web application where I parse a csv file that can have over 200k records in it. I parse each line for information, verify that the key does not exist in the database and then add it to the context. When the count reaches 10,000 records it calls SaveChanges routine. The problem is that there can be duplicates in the context and it errors out. This is running on a Azure VM communicating to an Azure SQL server.

Two questions, how do I handle the duplicate issue and is there any way I can improve the speed as it takes several hours to run?

            using (LoanFileEntities db = new LoanFileEntities())
            {
                db.Configuration.AutoDetectChangesEnabled = false; // 1. this is a huge time saver
                db.Configuration.ValidateOnSaveEnabled = false;  // 2. this can also save time

                while (parser.Read())
                {
                    counter++;

                    int loan_code = 0;
                    string loan_code_string = parser["LoanId"];

                    string dateToParse = parser["PullDate"].Trim();
                    DateTime date_pulled;
                    try
                    {
                        date_pulled = DateTime.Parse(dateToParse, CultureInfo.InvariantCulture);
                    }
                    catch (Exception)
                    {
                        throw new Exception("No Pull Date for line " + counter);
                    }

                    string originationdate = parser["OriginationDate"].Trim();
                    DateTime date_originated;
                    try
                    {
                        date_originated = DateTime.Parse(originationdate, CultureInfo.InvariantCulture);
                    }
                    catch (Exception)
                    {
                        throw new Exception("No Origination Date for line " + counter);
                    }

                    dateToParse = parser["DueDate"].Trim();
                    DateTime date_due;
                    try
                    {
                        date_due = DateTime.Parse(dateToParse, CultureInfo.InvariantCulture);
                    }
                    catch (Exception)
                    {
                        throw new Exception("No Due Date for line " + counter);
                    }

                    string region = parser["Region"].Trim();

                    string source = parser["Channel"].Trim();

                    string password = parser["FilePass"].Trim();

                    decimal principalAmt = Convert.ToDecimal(parser["Principal"].Trim());

                    decimal totalDue = Convert.ToDecimal(parser["TotalDue"].Trim());

                    string vitaLoanId = parser["VitaLoanId"];

                    var toAdd =
                        db.dfc_LoanRecords.Any(
                            x => x.loan_code_string == loan_code_string);
                    if (!toAdd)
                    {
                        dfc_LoanRecords loan = new dfc_LoanRecords();
                        loan.loan_code = loan_code;
                        loan.loan_code_string = loan_code_string;
                        loan.loan_principal_amt = principalAmt;
                        loan.loan_due_date = date_due;
                        loan.date_pulled = date_pulled;
                        loan.date_originated = date_originated;
                        loan.region = region;
                        loan.source = source;
                        loan.password = password;
                        loan.loan_amt_due = totalDue;
                        loan.vitaLoanId = vitaLoanId;

                        loan.load_file = fileName;
                        loan.load_date = DateTime.Now;

                        switch (loan.region)
                        {
                            case "UK":
                                if (location.Equals("UK"))
                                {
                                    //db.dfc_LoanRecords.Add(loan);
                                    if (loan.source == "Online")
                                    {
                                        counter_new_uk_online++;
                                    }
                                    else
                                    {
                                        counter_new_uk_retail++;
                                    }
                                }
                                break;
                            case "US":
                                if (location.Equals("US"))
                                {
                                    db.dfc_LoanRecords.Add(loan);
                                    if (loan.source == "Online")
                                    {
                                        counter_new_us_online++;
                                    }
                                    else
                                    {
                                        counter_new_us_retail++;
                                    }
                                }
                                break;
                            case "Canada":
                                if (location.Equals("US"))
                                {
                                    db.dfc_LoanRecords.Add(loan);
                                    if (loan.source == "Online")
                                    {
                                            counter_new_cn_online++;
                                    }
                                    else
                                    {
                                        counter_new_cn_retail++;
                                    }
                                }
                                break;
                        }
                        // delay save to speed up load.   3. also saves transactional time
                        if (counter % 10000 == 0)
                        {
                            db.SaveChanges();
                        }
                    }

                }  // end of parser read
                db.SaveChanges();
            }
        }
    }
Craig
  • 1,205
  • 2
  • 21
  • 54

1 Answers1

1

I would suggest removing duplicates in the code before sending it over to .SaveChanges().

Instead of going into detail about duplicate removal, I've put together this list of links to existing questions and answers on StackOverflow that may help:

Hope that helps!

Community
  • 1
  • 1
Shahed C - MSFT
  • 2,831
  • 23
  • 26