I have to process like 1M entities to build facts. There should be about the same amount of resulting facts (1 million).
The first issue that I had was the bulk insert it was slow with entity framework. So I used this pattern Fastest Way of Inserting in Entity Framework (answer from SLauma). And I can insert entities real fast now about 100K in one minute.
Another issue I ran into is the lack of memory to process everything. So I've "paged" the processing. To avoid out of memory exception I would get if I make a list out of my 1 million resulting facts.
The issue I have is that the memory is always growing even with the paging and I don't understand why. After each batch no memory is released. I think this is weird because i fetch recons build facts and store them into the DB at each iteration of the loop. As soon as the loop is completed those should be released from memory. But it look like not because no memory is released after each iteration.
Can you please tell me if you see something wrong before I dig more ? More specifically why no memory is released after an iteration of the while loop.
static void Main(string[] args)
{
ReceiptsItemCodeAnalysisContext db = new ReceiptsItemCodeAnalysisContext();
var recon = db.Recons
.Where(r => r.Transacs.Where(t => t.ItemCodeDetails.Count > 0).Count() > 0)
.OrderBy( r => r.ReconNum);
// used for "paging" the processing
var processed = 0;
var total = recon.Count();
var batchSize = 1000; //100000;
var batch = 1;
var skip = 0;
var doBatch = true;
while (doBatch)
{ // list to store facts processed during the batch
List<ReconFact> facts = new List<ReconFact>();
// get the Recon items to process in this batch put them in a list
List<Recon> toProcess = recon.Skip(skip).Take(batchSize)
.Include(r => r.Transacs.Select(t => t.ItemCodeDetails))
.ToList();
// to process real fast
Parallel.ForEach(toProcess, r =>
{ // processing a recon and adding the facts to the list
var thisReconFacts = ReconFactGenerator.Generate(r);
thisReconFacts.ForEach(f => facts.Add(f));
Console.WriteLine(processed += 1);
});
// saving the facts using pattern provided by Slauma
using (TransactionScope scope = new TransactionScope(TransactionScopeOption.Required, new System.TimeSpan(0, 15, 0)))
{
ReceiptsItemCodeAnalysisContext context = null;
try
{
context = new ReceiptsItemCodeAnalysisContext();
context.Configuration.AutoDetectChangesEnabled = false;
int count = 0;
foreach (var fact in facts.Where(f => f != null))
{
count++;
Console.WriteLine(count);
context = ContextHelper.AddToContext(context, fact, count, 250, true); //context.AddToContext(context, fact, count, 250, true);
}
context.SaveChanges();
}
finally
{
if (context != null)
context.Dispose();
}
scope.Complete();
}
Console.WriteLine("batch {0} finished continuing", batch);
// continuing the batch
batch++;
skip = batchSize * (batch - 1);
doBatch = skip < total;
// AFTER THIS facts AND toProcess SHOULD BE RESET
// BUT IT LOOKS LIKE THEY ARE NOT OR AT LEAST SOMETHING
// IS GROWING IN MEMORY
}
Console.WriteLine("Processing is done {} recons processed", processed);
}
The method provided by Slauma to optimize bulk insert with entity framework.
class ContextHelper
{
public static ReceiptsItemCodeAnalysisContext AddToContext(ReceiptsItemCodeAnalysisContext context,
ReconFact entity, int count, int commitCount, bool recreateContext)
{
context.Set<ReconFact>().Add(entity);
if (count % commitCount == 0)
{
context.SaveChanges();
if (recreateContext)
{
context.Dispose();
context = new ReceiptsItemCodeAnalysisContext();
context.Configuration.AutoDetectChangesEnabled = false;
}
}
return context;
}
}