I came up with the following code which calls a database paging function repeatedly with a page size of 5 and for each item in a page executes a function in parallel with a max concurrency of 4. It looks like its working so far but I'm unsure if I need to use locking to enclose the parallelInvocationTasks.Remove(completedTask);
line and Task.WhenAll(parallelInvocationTasks.ToArray());
So do I need to use locking here and do you see any other improvements?
Here's the code
Program.cs
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
namespace ConsoleApp1
{
class Program
{
private static async Task Main(string[] args)
{
Console.WriteLine("Starting");
Func<int, int, CancellationToken, Task<IList<string>>> getNextPageFunction = GetNextPageFromDatabase;
await getNextPageFunction.ForEachParallel(4, 5, new CancellationToken(), async (item) =>
{
Console.WriteLine($"{item} started");
//simulate processing
await Task.Delay(1000);
Console.WriteLine($"{item} ended");
});
Console.WriteLine("Done");
}
private static async Task<IList<string>> GetNextPageFromDatabase(
int offset,
int pageSize,
CancellationToken cancellationToken)
{
//simulate i/o and database paging
await Task.Delay(2000, cancellationToken);
var pageData = new List<string>();
//simulate just 4 pages
if (offset >= pageSize * 3)
{
return pageData;
}
for (var i = 1; i <= pageSize; i++)
{
string nextItem = $"Item {i + offset}";
pageData.Add(nextItem);
}
return pageData;
}
}
}
PagingExtensions.cs
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
namespace ConsoleApp1
{
public static class PagingExtensions
{
public static async Task<int> ForEachParallel<TItem>(
this Func<int, int, CancellationToken, Task<IList<TItem>>> getNextPageFunction,
int concurrency,
int pageSize,
CancellationToken cancellationToken,
Func<TItem, Task> forEachFunction)
{
var enumeratedCount = 0;
if (getNextPageFunction == null || forEachFunction == null)
{
return enumeratedCount;
}
var offset = 0;
using (var semaphore = new SemaphoreSlim(concurrency))
{
IList<Task> parallelInvocationTasks = new List<Task>();
IList<TItem> items;
do
{
items = await getNextPageFunction(offset, pageSize, cancellationToken) ?? new List<TItem>();
foreach (TItem item in items)
{
await semaphore.WaitAsync(cancellationToken);
Task forEachFunctionTask = Task.Factory.StartNew(async () =>
{
try
{
await forEachFunction(item);
}
finally
{
// ReSharper disable once AccessToDisposedClosure
// This is safe as long as Task.WhenAll is called before the using semaphore
// enclosure ends
semaphore.Release();
}
}, cancellationToken)
.Unwrap();
parallelInvocationTasks.Add(forEachFunctionTask);
#pragma warning disable 4014
forEachFunctionTask.ContinueWith((completedTask) =>
#pragma warning restore 4014
{
if (completedTask.Exception == null)
{
//Intention is to release completed tasks during enumeration as they complete
//so they can be GCed. This is to ensure the 'parallelInvocationTasks' list does not
//grow in an unmanaged manner resulting in a list holding multiple completed tasks
//unnecessarily consuming more memory with each added invocation task
//Thus the final Task.WhenAll call below will only need to await only faulted tasks
//causing it to throw an exception and/or a minimal list of incomplete tasks only
parallelInvocationTasks.Remove(completedTask);
}
}, cancellationToken);
enumeratedCount += 1;
}
offset += pageSize;
}
while (items.Count >= pageSize);
await Task.WhenAll(parallelInvocationTasks.ToArray());
}
return enumeratedCount;
}
}
}