-2

I'm trying to understand C# LINQ implementation and how is it performance against FOR and FOREACH loops.

Every where I see posts of how much better (in terms of performance) is to use a for loop implementation over a LINQ one. Example1, Example2, Example3

How ever, I'm trying to come along with my own POC to see if I can optimize the GroupBy and the Where operations and I see the opposite. Can you tell me if my implementations can be optimized better?

        //Where Implementation (Main Call)
        var students = createStudentList();
        var stopwatch1 = new Stopwatch();
        stopwatch1.Start();
        var y = students.Where(s=> s.age == 32);
        foreach(var entry in y){}
        stopwatch1.Stop();

        Console.WriteLine("1) TICKS ELAPSED WHERE: " + stopwatch1.ElapsedTicks);
        Console.WriteLine("1) MILLISECONDS  WHERE: " + stopwatch1.ElapsedMilliseconds);

        var stopwatch2 = new Stopwatch();
        stopwatch2.Start();
        var y2 = WhereManual(students);
        foreach(var entry in y2){}
        stopwatch2.Stop();

        Console.WriteLine("2) TICKS ELAPSED FOR: " + stopwatch2.ElapsedTicks);
        Console.WriteLine("2) MILLISECONDS  FOR: " + stopwatch2.ElapsedMilliseconds);



    public List<Student> WhereManual(List<Student> students){
        var filteredList = new List<Student>();
        for(var i = 0; i < students.Count(); i++){
            var student = students[i];
            if(student.age == 32){
                filteredList.Add(student);
            }
        }
        return filteredList;
    }

Output:

1) TICKS ELAPSED WHERE: 389478
1) MILLISECONDS WHERE: 38
2) TICKS ELAPSED FOR: 654023
2) MILLISECONDS FOR: 65

And for the GroupBy I have

//GroupBy Implementation (Main Call)
        var students = createStudentList();
        var stopwatch1 = new Stopwatch();
        stopwatch1.Start();
        var y = students.GroupBy(s => s.age);
        foreach(var entry in y){}
        stopwatch1.Stop();

        Console.WriteLine("1) TICKS ELAPSED GROUPBY: " + stopwatch1.ElapsedTicks);
        Console.WriteLine("1) MILLISECONDS GROUPBY: " + stopwatch1.ElapsedMilliseconds);

        var stopwatch2 = new Stopwatch();
        stopwatch2.Start();
        var y2 = dictOperation(students);
        foreach(var entry in y2){}
        stopwatch2.Stop();

        Console.WriteLine("2) TICKS ELAPSED FOR: " + stopwatch2.ElapsedTicks);
        Console.WriteLine("2) MILLISECONDS  FOR: " + stopwatch2.ElapsedMilliseconds);


    public List<Student> GetStudent(Dictionary<int, List<Student>> dict, int age){
        List<Student> dictStudent;
        return dict.TryGetValue(age, out dictStudent) ? dictStudent : null;
    }

    public Dictionary<int, List<Student>> dictOperation(List<Student> students){
        var dict = new Dictionary<int, List<Student>>();
        for(var i = 0; i < students.Count(); i++){
            var student = students[i];
            var studentAge = student.age;
            var dictStudent = GetStudent(dict, studentAge);
            if(dictStudent == null)
            {
                dict.Add(studentAge, new List<Student>(){student});
            } 
            else
            {
                dictStudent.Add(student);
            }
        }
        return dict;
    }

And this is the output:

1) TICKS ELAPSED GROUPBY: 865702
1) MILLISECONDS GROUPBY: 86
2) TICKS ELAPSED FOR: 1364863
2) MILLISECONDS FOR: 1.36
Pablo Glez
  • 306
  • 1
  • 7
  • 20
  • 3
    Please make sure to properly measure results (https://stackoverflow.com/questions/457605/how-to-measure-code-performance-in-net) - run enough iterations to get stable output, skip JIT by running code once, make sure code is not optimized out (I don't think `foreach(var entry in y2){}` can be optimized out but it does not hurt to check). Then [edit] post with numbers. – Alexei Levenkov Jan 09 '20 at 01:28
  • Replace `Count()` with `Count`. The first is linq function, the second is direct `List` method. – Antonín Lejsek Jan 09 '20 at 01:37
  • 2
    @Antonin Though `Count()` ought not to be a significant performance hit, since it's optimised for things like arrays and lists that offer their own counts. – ProgrammingLlama Jan 09 '20 at 01:44
  • 1
    (to be more specific about `Count` : https://learn.microsoft.com/en-us/dotnet/api/system.linq.enumerable.count?view=netframework-4.8 - "If the type of source implements ICollection, that implementation is used to obtain the count of elements. Otherwise, this method determines the count.") – Alexei Levenkov Jan 09 '20 at 01:50
  • @John_ReinstateMonica while I generally agree, it makes this very tiny loop about 10x slower in my measurement. – Antonín Lejsek Jan 09 '20 at 01:51
  • 1
    @John_ReinstateMonica In my testing, removing the parenthesis made a 40000 tick difference when iterating over 10000000 items, and was enough to make the manual test faster than linq in the code above. – Rufus L Jan 09 '20 at 03:00
  • For the `GroupBy` comparison, you're using a different type than Linq, which [internally](https://github.com/dotnet/corefx/blob/master/src/System.Linq/src/System/Linq/Grouping.cs) uses a `Lookup` to get the enumerator. Possibly the underlying type is the bottle neck in the manual test. – Rufus L Jan 09 '20 at 03:03
  • @Rufus Of course it made some difference, since it has to do the pattern matching, etc. to get an ICollection. I just meant that, compared to the other tasks, it probably doesn't make a huge difference (certainly not enough to be the reason why OP's manual for is slower). – ProgrammingLlama Jan 09 '20 at 03:05
  • @John_ReinstateMonica But in my testing it ***is*** enough to make a difference – Rufus L Jan 09 '20 at 03:06
  • Also, removing the unnecessary assignment `var student = students[i];` makes a very slight improvement. – Rufus L Jan 09 '20 at 03:09

1 Answers1

0

Not much of an answer, but since I played with it a little I may as well share.

I did not spend much time looking at the GroupBy comparison because the types used are different enough that they may be the bottleneck, and I'm not familiar enough with IGrouping to create a new test right now.

I found that if you use the List.Count property instead of the List.Count() extension method, it saved enough time (iterating over 1000000 items) to make the manual code faster than Linq. Additionally, a few more milliseconds were saved by removing the assignment var student = students[i];:

public class Student { public string Name { get; set; } public int Age { get; set; } }

public class Program
{
    public static List<Student> Students = new List<Student>();

    public static void CreateStudents()
    {
        for (var i = 0; i < 1000000; i++)
        {
            Students.Add(new Student {Name = $"Student{i}", Age = i});
        }
    }

    public static List<Student> WhereManualOriginal(List<Student> students)
    {
        var filteredList = new List<Student>();

        for (var i = 0; i < students.Count(); i++)
        {
            var student = students[i];

            if (student.Age == 32)
            {
                filteredList.Add(student);
            }
        }

        return filteredList;
    }

    public static List<Student> WhereManualNew(List<Student> students)
    {
        var filteredList = new List<Student>();

        for (var i = 0; i < students.Count; i++)
        {
            if (students[i].Age == 32)
            {
                filteredList.Add(students[i]);
            }
        }

        return filteredList;
    }

    public static long LinqWhere()
    {
        var sw = Stopwatch.StartNew();
        var items = Students.Where(s => s.Age == 32);
        foreach (var item in items) { }
        sw.Stop();
        return sw.ElapsedTicks;
    }

    public static long ManualWhere()
    {
        var sw = Stopwatch.StartNew();
        var items = WhereManualOriginal(Students);
        foreach (var item in items) { }
        sw.Stop();
        return sw.ElapsedTicks;
    }

    public static long NewManualWhere()
    {
        var sw = Stopwatch.StartNew();
        var items = WhereManualNew(Students);
        foreach (var item in items) { }
        sw.Stop();
        return sw.ElapsedTicks;
    }


    public static void Main()
    {
        // Warmup stuff
        CreateStudents();
        WhereManualOriginal(Students);
        WhereManualNew(Students);
        Students.Where(s => s.Age == 32).ToList();
        var linqResults = new List<long>();
        var manualResults = new List<long>();
        var newManualResults = new List<long>();

        for (int i = 0; i < 100; i++)
        {
            newManualResults.Add(NewManualWhere());
            manualResults.Add(ManualWhere());
            linqResults.Add(LinqWhere());
        }

        Console.WriteLine("Linq where ......... " + linqResults.Average());
        Console.WriteLine("Manual where ....... " + manualResults.Average());
        Console.WriteLine("New Manual where ... " + newManualResults.Average());

        GetKeyFromUser("\nDone! Press any key to exit...");
    }
}

Output

enter image description here

Rufus L
  • 36,127
  • 5
  • 30
  • 43