I have a large number of images I'm try to download (and then resize and save). I'm trying to do this in the most efficient way possible. I have opted for to use a BlockingCollection and with a producer which downloads the images and a consumer that will resize and save the images once they've been downloaded.
The main issue I'm having is with the producer which downloads the images. I'm using a SemaphoreSlim to queue up the download tasks and from the output I can tell it's working more quickly and nice and asynchronously. I am adding to a list of tasks and then using Task.WaitAll
to wait for all the downloads to finish before finalising the whole process. the problem is that WaitAll
doesn't seem to actually wait for all the tasks to finish before continuing as you can see from this screenshot:
Here's my dummy code which will reproduce the issue:
class Program
{
static IDictionary<string, string> imageUrls;
static BlockingCollection<Image> queue;
static void Main(string[] args)
{
imageUrls = new Dictionary<string, string>();
for (int i = 0; i < 20; i++)
{
imageUrls.Add($"{i}-1", "https://emergingpayments.org/wp-content/uploads/2017/11/landscape-2.jpeg");
imageUrls.Add($"{i}-2", "https://static.photocdn.pt/images/articles/2018/03/09/articles/2017_8/landscape_photography.jpg");
imageUrls.Add($"{i}-3", "https://wallup.net/wp-content/uploads/2015/12/258088-sunset-landscape-horizon.jpg");
}
queue = new BlockingCollection<Image>();
Task.WaitAll(
Task.Run(() => processImages()),
Task.Run(() => downloadImages())
);
}
static async Task downloadImages()
{
using (var client = new HttpClient(new HttpClientHandler { MaxConnectionsPerServer = 100 }))
{
var stopwatch = new Stopwatch();
stopwatch.Start();
using (var semaphore = new SemaphoreSlim(20))
{
var downloadTasks = new List<Task>();
foreach (var imageUrl in imageUrls)
{
await semaphore.WaitAsync();
downloadTasks.Add(Task.Factory.StartNew(async () =>
{
try
{
var imageData = await client.GetByteArrayAsync(imageUrl.Value);
queue.Add(new Image { Id = imageUrl.Key, ImageData = imageData });
Console.WriteLine($"Downloaded image {imageUrl.Key}");
}
catch (Exception ex)
{
Console.WriteLine($"Download failed for image {imageUrl.Key} - {ex.Message}");
}
finally
{
semaphore.Release();
}
}));
}
Task.WaitAll(downloadTasks.ToArray());
Console.WriteLine($"Downloading took {stopwatch.Elapsed.TotalSeconds} seconds");
}
}
}
static void processImages()
{
while (!queue.IsCompleted)
{
Image image = null;
try
{
image = queue.Take();
}
catch (InvalidOperationException) { }
if (image != null)
{
Thread.Sleep(100);
Console.WriteLine($"Processed image {image.Id}");
Console.WriteLine($"Processed image {image.Id}");
Console.WriteLine($"Processed image {image.Id}");
}
}
}
class Image
{
public string Id { get; set; }
public byte[] ImageData { get; set; }
}
}
My solution is similar to this: https://stackoverflow.com/a/36571420/5392786
I think it may be a problem with where I'm releasing the semaphore or something but I can't seem to figure out what the problem is.