I have an web application where I parse a csv file that can have over 200k records in it. I parse each line for information, verify that the key does not exist in the database and then add it to the context. When the count reaches 10,000 records it calls SaveChanges routine. The problem is that there can be duplicates in the context and it errors out. This is running on a Azure VM communicating to an Azure SQL server.
Two questions, how do I handle the duplicate issue and is there any way I can improve the speed as it takes several hours to run?
using (LoanFileEntities db = new LoanFileEntities())
{
db.Configuration.AutoDetectChangesEnabled = false; // 1. this is a huge time saver
db.Configuration.ValidateOnSaveEnabled = false; // 2. this can also save time
while (parser.Read())
{
counter++;
int loan_code = 0;
string loan_code_string = parser["LoanId"];
string dateToParse = parser["PullDate"].Trim();
DateTime date_pulled;
try
{
date_pulled = DateTime.Parse(dateToParse, CultureInfo.InvariantCulture);
}
catch (Exception)
{
throw new Exception("No Pull Date for line " + counter);
}
string originationdate = parser["OriginationDate"].Trim();
DateTime date_originated;
try
{
date_originated = DateTime.Parse(originationdate, CultureInfo.InvariantCulture);
}
catch (Exception)
{
throw new Exception("No Origination Date for line " + counter);
}
dateToParse = parser["DueDate"].Trim();
DateTime date_due;
try
{
date_due = DateTime.Parse(dateToParse, CultureInfo.InvariantCulture);
}
catch (Exception)
{
throw new Exception("No Due Date for line " + counter);
}
string region = parser["Region"].Trim();
string source = parser["Channel"].Trim();
string password = parser["FilePass"].Trim();
decimal principalAmt = Convert.ToDecimal(parser["Principal"].Trim());
decimal totalDue = Convert.ToDecimal(parser["TotalDue"].Trim());
string vitaLoanId = parser["VitaLoanId"];
var toAdd =
db.dfc_LoanRecords.Any(
x => x.loan_code_string == loan_code_string);
if (!toAdd)
{
dfc_LoanRecords loan = new dfc_LoanRecords();
loan.loan_code = loan_code;
loan.loan_code_string = loan_code_string;
loan.loan_principal_amt = principalAmt;
loan.loan_due_date = date_due;
loan.date_pulled = date_pulled;
loan.date_originated = date_originated;
loan.region = region;
loan.source = source;
loan.password = password;
loan.loan_amt_due = totalDue;
loan.vitaLoanId = vitaLoanId;
loan.load_file = fileName;
loan.load_date = DateTime.Now;
switch (loan.region)
{
case "UK":
if (location.Equals("UK"))
{
//db.dfc_LoanRecords.Add(loan);
if (loan.source == "Online")
{
counter_new_uk_online++;
}
else
{
counter_new_uk_retail++;
}
}
break;
case "US":
if (location.Equals("US"))
{
db.dfc_LoanRecords.Add(loan);
if (loan.source == "Online")
{
counter_new_us_online++;
}
else
{
counter_new_us_retail++;
}
}
break;
case "Canada":
if (location.Equals("US"))
{
db.dfc_LoanRecords.Add(loan);
if (loan.source == "Online")
{
counter_new_cn_online++;
}
else
{
counter_new_cn_retail++;
}
}
break;
}
// delay save to speed up load. 3. also saves transactional time
if (counter % 10000 == 0)
{
db.SaveChanges();
}
}
} // end of parser read
db.SaveChanges();
}
}
}