I am trying to process numbers as fast as possible with C# app. I use a Thread.Sleep()
to simulate a processing and random numbers. I use 3 different techniques.
This is test code that I used:
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
namespace Test
{
internal class Program
{
private static void Main()
{
var data = new int[500000];
var random = new Random();
for (int i = 0; i < 500000; i++)
{
data[i] = random.Next();
}
var partialTimes = new Dictionary<int, double>();
var iterations = 5;
for (int i = 1; i < iterations + 1; i++)
{
Console.Write($"ProcessData3 {i}\t");
StartProcessing(data, partialTimes, ProcessData3);
GC.Collect();
}
Console.WriteLine();
Console.WriteLine("Press Enter to Exit");
Console.ReadLine();
}
private static void StartProcessing(int[] data, Dictionary<int, double> partialTimes, Action<int[], Dictionary<int, double>> processData)
{
var stopwatch = Stopwatch.StartNew();
try
{
processData?.Invoke(data, partialTimes);
stopwatch.Stop();
Console.WriteLine($"{stopwatch.Elapsed.ToString(@"mm\:ss\:fffffff")} total = {partialTimes.Sum(s => s.Value)} max = {partialTimes.Values.Max()}");
}
finally
{
partialTimes.Clear();
}
}
private static void ProcessData1(int[] data, Dictionary<int, double> partialTimes)
{
Parallel.ForEach(data, number =>
{
var partialStopwatch = Stopwatch.StartNew();
Thread.Sleep(1);
partialStopwatch.Stop();
lock (partialTimes)
{
partialTimes[number] = partialStopwatch.Elapsed.TotalMilliseconds;
}
});
}
private static void ProcessData3(int[] data, Dictionary<int, double> partialTimes)
{
// Partition the entire source array.
var rangePartitioner = Partitioner.Create(0, data.Length);
// Loop over the partitions in parallel.
Parallel.ForEach(rangePartitioner, (range, loopState) =>
{
// Loop over each range element without a delegate invocation.
for (int i = range.Item1; i < range.Item2; i++)
{
var number = data[i];
var partialStopwatch = Stopwatch.StartNew();
Thread.Sleep(1);
partialStopwatch.Stop();
lock (partialTimes)
{
partialTimes[number] = partialStopwatch.Elapsed.TotalMilliseconds;
}
}
});
}
private static void ProcessData2(int[] data, Dictionary<int, double> partialTimes)
{
var tasks = new Task[data.Count()];
for (int i = 0; i < data.Count(); i++)
{
var number = data[i];
tasks[i] = Task.Factory.StartNew(() =>
{
var partialStopwatch = Stopwatch.StartNew();
Thread.Sleep(1);
partialStopwatch.Stop();
lock (partialTimes)
{
partialTimes[number] = partialStopwatch.Elapsed.TotalMilliseconds;
}
});
}
Task.WaitAll(tasks);
}
}
}
For each technique I restart the program. And I get these results,
with having a Thread.Sleep( 1 )
:
ProcessData1 1 00:56:1796688 total = 801335,282599955 max = 16,8783
ProcessData1 2 00:23:5390014 total = 816167,642100022 max = 14,5913
ProcessData1 3 00:14:7090566 total = 827589,675899998 max = 13,2617
ProcessData1 4 00:10:8929177 total = 829296,528300007 max = 15,0175
ProcessData1 5 00:10:6333310 total = 839282,123200008 max = 29,2738
ProcessData2 1 00:37:8084153 total = 824507,174200022 max = 112,071
ProcessData2 2 00:16:3762096 total = 849272,47810001 max = 77,1514
ProcessData2 3 00:12:9177717 total = 854012,353100029 max = 67,5684
ProcessData2 4 00:10:4798701 total = 857396,642899983 max = 92,9408
ProcessData2 5 00:09:2206146 total = 870966,655499989 max = 51,8945
ProcessData3 1 01:13:6814541 total = 803581,718699918 max = 25,6815
ProcessData3 2 01:07:9809277 total = 814069,532899922 max = 26,0671
ProcessData3 3 01:07:9857984 total = 814148,329399928 max = 21,3116
ProcessData3 4 01:07:4812183 total = 808042,695499966 max = 16,8601
ProcessData3 5 01:07:2954614 total = 805895,325499903 max = 23,8517
Where
total
is total a time spent inside each Parallel.ForEach()
function together and
max
is a maximum time of each function.
Why is the first loop so slow? How is it possible that other attempts are processed so quickly? How to achieve a faster parallel processing on the first attempt?
EDIT:
So I tried it also with having a Thread.Sleep( 10 )
Results are:
ProcessData1 1 02:50:2845698 total = 5109831,95429994 max = 12,0612
ProcessData1 2 00:56:3361645 total = 5125884,05919954 max = 12,7666
ProcessData1 3 00:53:4911541 total = 5131105,15209993 max = 12,7486
ProcessData1 4 00:49:5665628 total = 5144654,75829992 max = 13,2678
ProcessData1 5 00:46:0218194 total = 5152955,19509996 max = 13,702
ProcessData2 1 01:21:7207557 total = 5121889,31579983 max = 73,8152
ProcessData2 2 00:39:6660074 total = 5175557,68889969 max = 59,369
ProcessData2 3 00:31:9036416 total = 5193819,89889973 max = 56,2895
ProcessData2 4 00:27:4616803 total = 5207168,56969977 max = 65,5495
ProcessData2 5 00:24:4270755 total = 5222567,9044998 max = 65,368
ProcessData3 1 02:44:9985645 total = 5110117,19019997 max = 11,7172
ProcessData3 2 02:25:6533128 total = 5237779,27010012 max = 26,3171
ProcessData3 3 02:22:2771259 total = 5116123,45259975 max = 12,0581
ProcessData3 4 02:22:1678911 total = 5112574,93779995 max = 11,5334
ProcessData3 5 02:21:9418178 total = 5104980,07120004 max = 11,5583
So first loop still takes much more seconds than others..