0

I am currently investigating some severe performance drops in an application.

Performance drops are of a strange kind - several iterations in a row work quite fast, but then there are one iteration that takes much more time to complete. This application works with graphics, so it looks very annoying.

Please, take a look at the following code.

while (true)
{
    var rng = new Random(1);
    var concurrenBag = new ConcurrentBag<ICollection<(int X, int Y)>>();
    Parallel.For(0, 20000, i =>
    {
        var entry = new List<(int X, int Y)>(); // essentially, this is what's going on:
        var r = rng.Next(0, 3);                 // around 20k handlers return coordinates of pixels to redraw
        for (var j = 0; j < r; j++)             // sometimes there are null entries, sometimes 1, more often 2
        {                                       // all entries are added to concurrent bag
            entry.Add((j, j * j));
        }                         

        if (entry.Count == 0)     
            entry = null;         

        concurrenBag.Add(entry);  
    });

    var sw = Stopwatch.StartNew();
    var results = concurrenBag.ToList().AsParallel().Where(x => x != null).SelectMany(x => x).Distinct().ToList();  // this is where severe performance drops occur from time to time
    var time = sw.ElapsedMilliseconds;

    Console.WriteLine($"CB count: {concurrenBag.Count:00000}, result count: {results.Count:00}, time: {time:000}");
    //Thread.Sleep(1000);
}

This code produces the following results:

CB count: 20000, result count: 02, time: 032        <- this is fine, initialization and stuff       
CB count: 20000, result count: 02, time: 004
CB count: 20000, result count: 02, time: 014        <- this is not fine
CB count: 20000, result count: 02, time: 003
CB count: 20000, result count: 02, time: 004
CB count: 20000, result count: 02, time: 004
CB count: 20000, result count: 02, time: 003
CB count: 20000, result count: 02, time: 015        <- every couple of frames it happens again
CB count: 20000, result count: 02, time: 003
CB count: 20000, result count: 02, time: 019
CB count: 20000, result count: 02, time: 004
CB count: 20000, result count: 02, time: 004
CB count: 20000, result count: 02, time: 003
CB count: 20000, result count: 02, time: 014
CB count: 20000, result count: 02, time: 003
CB count: 20000, result count: 02, time: 004
CB count: 20000, result count: 02, time: 003
CB count: 20000, result count: 02, time: 008
CB count: 20000, result count: 02, time: 003
CB count: 20000, result count: 02, time: 004
CB count: 20000, result count: 02, time: 011
CB count: 20000, result count: 02, time: 003
CB count: 20000, result count: 02, time: 003
CB count: 20000, result count: 02, time: 004

I believe you got the idea. In real application every "good" iteration takes around 10-15 ms, and those slow iterations occur every 6-8 iterations and take up to 150ms or something like that.

I honestly thought something is very wrong with my business logic, but you can run the example above and get quite the same results. I'm guessing now that there is something wrong with the way I utilized Parallel.For, AsParallel() or ConcurrentBag, but I have no idea what's wrong exactly.

Dmitry Volkov
  • 1,347
  • 1
  • 18
  • 33
  • Checked the Garbage Collector? – TomTom Oct 01 '18 at 12:33
  • @TomTom No, I did not ;D – Dmitry Volkov Oct 01 '18 at 12:41
  • 1
    What exactly are you trying to achieve in your real application? I have a feeling you wanted to have a parallel producer/consumer pattern here but ended up with first collecting lots of data and then handling it afterwards. That gives you the GC pressure... Should the second loop not run in parallel to the first and process items as they get added? That would keep your memory footprint lower. – dnickless Oct 02 '18 at 06:17
  • @DmitryVolkov a ConcurrentBag is *not* the equivalent of a concurrent collection. It's meant for thread-local storage which means you pay a penalty when accessing any bucket created by a different thread. – Panagiotis Kanavos Oct 02 '18 at 09:07
  • As a side note, the `ConcurrentBag` is a [very specialized](https://stackoverflow.com/questions/15400133/when-to-use-blockingcollection-and-when-concurrentbag-instead-of-listt/64823123#64823123) collection. A `ConcurrentQueue` would served you better in this case, because it preserves the order of the enqueued items (and it would also be slightly faster as well). – Theodor Zoulias Feb 21 '21 at 09:44
  • Also be aware that the `Random` class [is not thread safe](https://stackoverflow.com/questions/5819638/is-random-class-thread-safe). Its internal state is mutated every time you get a random value, and if the invocations are not synchronized its state becomes corrupted. In which case the `Random` instance stops being a good quality RNG, and starts emitting consecutive zeroes or whatever. – Theodor Zoulias Feb 21 '21 at 09:50

2 Answers2

3

If you call GC.Collect() before the measured section, the problem mostly vanish. It seems you have problem with garbage collection. Try to produce less garbage and less complicated throw away structures. Here is one way to redesign your solution:

var results = new HashSet<(int X, int Y)>();
object resultLockObj = new object();
var rng = new Random(1);
var sw = new Stopwatch();

while (true)
{
    results.Clear();
    sw.Restart();

    Parallel.For(0, 20000, i =>
    {
        var entry = new List<(int X, int Y)>(); // essentially, this is what's going on:
        var r = rng.Next(0, 3);                 // around 20k handlers return coordinates of pixels to redraw
        for (var j = 0; j < r; j++)             // sometimes there are null entries, sometimes 1, more often 2
        {                                       // all entries are added to concurrent bag
            entry.Add((i, j * j));
        }

        if (entry.Count == 0)
        {
            entry = null;
        }

        if (entry != null)
        {
            lock (resultLockObj)
            {
                foreach (var x in entry)
                {
                    results.Add(x);
                }
            }
        }
    });

    var time = sw.ElapsedMilliseconds;

    Console.WriteLine($"Result count: {results.Count:00000}, time: {time:000}");
    //Thread.Sleep(1000);
}

EDIT

I made slight changes. (j, j * j) is now (i, j * j), so there are no duplicates in the result and no performance gain from their elimination. And instead of creating HashSet every time I just Clear it (something you can not do with ConcurrentBag) to further reduce garbage production. You are right that tuple is a value, but the problem is somewhere else. When you add Lists into another structure, you keep pointer to them and their elimination is more difficult. It is better to use simple short lived structures. And if you can recycle them, that is obviously the best option.

Antonín Lejsek
  • 6,003
  • 2
  • 16
  • 18
  • Hey Antonín! Thanks for an answer! It seems the drops in performance had been reduced! Let me check your approach in the real application before I accept your answer ;D – Dmitry Volkov Oct 01 '18 at 12:44
  • Antonín, there is a problem. In your solution you are eliminating performance drops with `HashSet`, relying on its performance with duplicate items. It is true, in the example above there are 99.99% of duplicate items in `ConcurrentBag`, but it is just poor example. In real application there are about 1% of duplicates, so this redesign does nothing special. – Dmitry Volkov Oct 01 '18 at 14:10
  • And by the way, Antonín, you're speaking about garbage collection, but aren't those `(int X, int Y)` a `value`tuple? I thought they wouldn't be handled by GC. – Dmitry Volkov Oct 01 '18 at 14:12
1

Your issue is caused by the GC suspending all threads in order to do its black magic (I profiled it and the results were very obvious). The reason for that is that you are allocating one hell of a lot of Lists and ConcurrentBags and ValueTuples (wrapped in the list so ultimately on the heap anyway) which you then throw away upon the next loop. The ConcurrentBag goes out of scope, all the Lists contained in it, too. And the same for the ValueTuples.

So you want to eliminate all those allocations which you can do e.g. by allocating the required storage on the heap upfront thus avoiding new instantiations.

The following code should get you an idea of how this can be achieved - it's probably not 100% semantically equivalent but I'm assuming you wouldn't be able to copy/paste this into your solution anyway because it's based off a simplified example:

// type that simply serves as a data container (replacing the ValueTuple)
private class Data : IEquatable<Data>
{
    private const int maxNumberOfRandom = 3;

    public readonly int[] Values1 = new int[maxNumberOfRandom];
    public readonly int[] Values2 = new int[maxNumberOfRandom];
    public bool IsNull { get; set; }

    public bool Equals(Data other)
    {
        return CompareArrays(Values1, other.Values1) && CompareArrays(Values2, other.Values2);
    }

    private static bool CompareArrays(int[] values, int[] otherValues)
    {
        for (var i = 0; i < maxNumberOfRandom; i++)
        {
            if (values[i] != otherValues[i])
            {
                return false;
            }
        }
        return true;
    }

    public override int GetHashCode()
    {
        unchecked
        {
            var hashCode = Values1.GetHashCode();
            hashCode = (hashCode * 397) ^ Values2.GetHashCode();
            return hashCode;
        }
    }
}

static void Main(string[] args)
{
    const int count = 20000;

    var list = new List<Data>(count);
    // initialization loop to provision the required memory on the heap
    for (int i = 0; i < count; i++)
    {
        list.Add(new Data());
    }

    while (true)
    {
        var rng = new Random(1);

        Parallel.For(0, 20000, i =>
        {
            // Random isn't thread-safe: https://learn.microsoft.com/en-us/dotnet/api/system.random?view=netframework-4.7.2#the-systemrandom-class-and-thread-safety
            int r;
            lock (rng)
            {
                r = rng.Next(0, 3);
            }

            if (r == 0)
            {
                // we can do index-based access here so no need for locking
                list[i].IsNull = true;
            }
            else
            {
                // we can do index-based access here so no need for locking
                var data = list[i];
                data.IsNull = false;

                int j;
                for (j = 0; j < r; j++)
                {
                    data.Values1[j] = j;
                    data.Values2[j] = j * j;
                }
                Array.Clear(data.Values1, j, data.Values1.Length - j);
                Array.Clear(data.Values2, j, data.Values2.Length - j);
            }
        });

        var sw = Stopwatch.StartNew();
        var results = list.ToList().AsParallel().Where(x => !x.IsNull).Distinct().ToList();
        var time = sw.ElapsedMilliseconds;

        Console.WriteLine($"CB count: {list.Count:00000}, result count: {results.Count:00}, time: {time:000}");
    }
}
dnickless
  • 10,733
  • 1
  • 19
  • 34