32

I have a class like this:

class MyClass<T> {
    public string value1 { get; set; }
    public T objT { get; set; }
}

and a list of this class. I would like to use .net 3.5 lambda or linq to get a list of MyClass by distinct value1. I guess this is possible and much simpler than the way in .net 2.0 to cache a list like this:

List<MyClass<T>> list; 
...
List<MyClass<T>> listDistinct = new List<MyClass<T>>();
foreach (MyClass<T> instance in list)
{
    // some code to check if listDistinct does contain obj with intance.Value1
    // then listDistinct.Add(instance);
}

What is the lambda or LINQ way to do it?

phoog
  • 42,068
  • 6
  • 79
  • 117
David.Chu.ca
  • 37,408
  • 63
  • 148
  • 190

8 Answers8

69

Both Marc's and dahlbyk's answers seem to work very well. I have a much simpler solution though. Instead of using Distinct, you can use GroupBy. It goes like this:

var listDistinct
    = list.GroupBy(
        i => i.value1,
        (key, group) => group.First()
    ).ToArray();

Notice that I've passed two functions to the GroupBy(). The first is a key selector. The second gets only one item from each group. From your question, I assumed First() was the right one. You can write a different one, if you want to. You can try Last() to see what I mean.

I ran a test with the following input:

var list = new [] {
    new { value1 = "ABC", objT = 0 },
    new { value1 = "ABC", objT = 1 },
    new { value1 = "123", objT = 2 },
    new { value1 = "123", objT = 3 },
    new { value1 = "FOO", objT = 4 },
    new { value1 = "BAR", objT = 5 },
    new { value1 = "BAR", objT = 6 },
    new { value1 = "BAR", objT = 7 },
    new { value1 = "UGH", objT = 8 },
};

The result was:

//{ value1 = ABC, objT = 0 }
//{ value1 = 123, objT = 2 }
//{ value1 = FOO, objT = 4 }
//{ value1 = BAR, objT = 5 }
//{ value1 = UGH, objT = 8 }

I haven't tested it for performance. I believe that this solution is probably a little bit slower than one that uses Distinct. Despite this disadvantage, there are two great advantages: simplicity and flexibility. Usually, it's better to favor simplicity over optimization, but it really depends on the problem you're trying to solve.

jpbochi
  • 4,366
  • 3
  • 34
  • 43
  • Very interesting. Actually, the Comparer method has one limitation: it only returns the distinct one by the first found. If I need the flexibility get the distinct by the second, ..., or the last, not sure if group.xxx() would be able to do it? – David.Chu.ca Jul 26 '09 at 15:32
  • Yes, you would. Simply replace `First()` for `Last()` and see. Of course you can make any other complex selection if you need it. – jpbochi Jul 26 '09 at 16:18
  • @David: You should consider to accept this answer. It is a flexible and elegant solution to your problem. – CARLOS LOTH May 25 '10 at 19:07
  • This saved me quite a headache. Thanks! – Chrsjkigs99 Jul 01 '13 at 15:51
  • what about distinct with more than one property? – RollRoll May 10 '17 at 14:11
  • @ThePoet more then one property as part of the key? You can merge them into a composed key; eg, if both properties are strings, you could concatenate them with a separator char like ":". – jpbochi May 17 '17 at 15:19
  • 1
    @RollRoll then you do composite key based on anonymous class: data.GroupBy(x => new { x.prop1, x.prop2 }). – avs099 Jun 14 '18 at 19:20
  • @jpbochi no, you should never do concatenation. You will face so many issues you don't even think about. See my previous answer - use composite key. – avs099 Jun 14 '18 at 19:21
  • @avs099 "never" is quite a strong word there. Depending on some safe assumptions you can make about the values, concatenation is totally fine and can be more efficient. This kind of decision depends on your case. As I said, you _could_ do that. For the more general case, yes, your solution works. – jpbochi Jun 21 '18 at 22:02
9

Hmm... I'd probably write a custom IEqualityComparer<T> so that I can use:

var listDistinct = list.Distinct(comparer).ToList();

and write the comparer via LINQ....

Possibly a bit overkill, but reusable, at least:

Usage first:

static class Program {
    static void Main() {
        var data = new[] {
            new { Foo = 1,Bar = "a"}, new { Foo = 2,Bar = "b"}, new {Foo = 1, Bar = "c"}
        };
        foreach (var item in data.DistinctBy(x => x.Foo))
            Console.WriteLine(item.Bar);
        }
    }
}

With utility methods:

public static class ProjectionComparer
{
    public static IEnumerable<TSource> DistinctBy<TSource,TValue>(
        this IEnumerable<TSource> source,
        Func<TSource, TValue> selector)
    {
        var comparer = ProjectionComparer<TSource>.CompareBy<TValue>(
            selector, EqualityComparer<TValue>.Default);
        return new HashSet<TSource>(source, comparer);
    }
}
public static class ProjectionComparer<TSource>
{
    public static IEqualityComparer<TSource> CompareBy<TValue>(
        Func<TSource, TValue> selector)
    {
        return CompareBy<TValue>(selector, EqualityComparer<TValue>.Default);
    }
    public static IEqualityComparer<TSource> CompareBy<TValue>(
        Func<TSource, TValue> selector,
        IEqualityComparer<TValue> comparer)
    {
        return new ComparerImpl<TValue>(selector, comparer);
    }
    sealed class ComparerImpl<TValue> : IEqualityComparer<TSource>
    {
        private readonly Func<TSource, TValue> selector;
        private readonly IEqualityComparer<TValue> comparer;
        public ComparerImpl(
            Func<TSource, TValue> selector,
            IEqualityComparer<TValue> comparer)
        {
            if (selector == null) throw new ArgumentNullException("selector");
            if (comparer == null) throw new ArgumentNullException("comparer");
            this.selector = selector;
            this.comparer = comparer;
        }

        bool IEqualityComparer<TSource>.Equals(TSource x, TSource y)
        {
            if (x == null && y == null) return true;
            if (x == null || y == null) return false;
            return comparer.Equals(selector(x), selector(y));
        }

        int IEqualityComparer<TSource>.GetHashCode(TSource obj)
        {
            return obj == null ? 0 : comparer.GetHashCode(selector(obj));
        }
    }
}
Marc Gravell
  • 1,026,079
  • 266
  • 2,566
  • 2,900
  • one question about codes: what is ProjectionComparer? A .Net class or LINQ or IEnumerable related class so that you can have customized extension? – David.Chu.ca Jul 26 '09 at 03:25
  • OK. I think that "ProjectionComparer" is any class name you defined, but within the class you have customized extension method DistinctBy() to IEnumerable, and ProjectionComparer is another helper class, right? Can ProjectionComparer be a different name, instead of the same name? – David.Chu.ca Jul 26 '09 at 03:30
  • If I want to get a list of value1 of MyClass, I can use this comparer like this: List listValue1s = list.Distinct(comparer).ToList().Select(y => y.value1); Is that right? – David.Chu.ca Jul 26 '09 at 06:17
  • The name of ProjectionComparer doesn't matter - you could call it EnumerableExtensions. ProjectionComparer is so named because it provides a Comparer through projection, the common term for getting a new value based on an existing one (value1 from a MyClass, for example). And for your last question: Don't call ToList() unless you need to. If you're not going to use the distinct list of MyClass objects, then you're better off getting your value1's like this: IEnumerable value1s = list.Select(y => y.value1).Distinct(); – dahlbyk Jul 26 '09 at 06:47
  • Marc, do you have any comments on jpbochi's alternative method? It seems no need to write an Comparer extension class, and much flexible. For the case of LINQ-to-Object, it seems to be good enough. – David.Chu.ca Jul 26 '09 at 15:40
  • They are the same, in essence, except mine can be used with **any** object, rather than just one specific case. – Marc Gravell Jul 26 '09 at 19:08
3

You can use this extension method:

    IEnumerable<MyClass> distinctList = sourceList.DistinctBy(x => x.value1);

    public static IEnumerable<TSource> DistinctBy<TSource, TKey>(
        this IEnumerable<TSource> source,
        Func<TSource, TKey> keySelector)
    {
        var knownKeys = new HashSet<TKey>();
        return source.Where(element => knownKeys.Add(keySelector(element)));
    }
Jon Rea
  • 9,337
  • 4
  • 32
  • 35
  • This seems to work, but when the method is called and not immediately evaluated, say with `ToList()`, the enumerable returned looks correct on the first evaluation but after a second evaluation, the enumerable is empty. Say, if I have a list of 4 objects with 1 duplicate, call this method to get an enumerable that has 3 objects. Calling `.Count()` returns `3` then calling it again returns `0`. Any ideas what's going on here? – mafafu Jul 24 '14 at 12:10
2

Check out Enumerable.Distinct(), which can accept an IEqualityComparer:

class MyClassComparer<T> : IEqualityComparer<MyClass<T>>
{
    // Products are equal if their names and product numbers are equal.
    public bool Equals(MyClass<T> x, MyClass<T>y)
    {
        // Check whether the compared objects reference the same data.
        if (Object.ReferenceEquals(x, y)) return true;

        // Check whether any of the compared objects is null.
        if (Object.ReferenceEquals(x, null) || Object.ReferenceEquals(y, null))
            return false;

        // Check whether the products' properties are equal.
        return x.value1 == y.value1;
    }

    // If Equals() returns true for a pair of objects,
    // GetHashCode must return the same value for these objects.

    public int GetHashCode(MyClass<T> x)
    {
        // Check whether the object is null.
        if (Object.ReferenceEquals(x, null)) return 0;

        // Get the hash code for the Name field if it is not null.
        return (x.value1 ?? "").GetHashCode();
    }
}

Your code snippet could look like this:

List<MyClass<T>> list; 
...
List<MyClass<T>> listDistinct = list.Distinct(new MyClassComparer<T>).ToList();
dahlbyk
  • 75,175
  • 8
  • 100
  • 122
  • I think each approach has merit. The group-by approach requires the least code and can be more flexible, but has a (slight) performance penalty and at a glance the purpose of the code is not as immediately obvious. Marc's general solution reads quite fluently, but some might say that single expression does too much: it both specifies how items are compared and does the actual select-distinct. My approach is more specific, but provides a clear separation between the equivalence logic and the operation(s) that leverage it. – dahlbyk Jul 26 '09 at 16:39
  • Thanks for you complete comments. I agree with you on readability and separation. However, in terms of flexibility to get distinct of instance of T by the second or last, the Comparer only gets the first and it might be complexed to the same flexibility in, right? See my comments on jpbochi. – David.Chu.ca Jul 26 '09 at 18:07
  • 1
    Indeed the Distinct-with-Comparer approach would only return the first in the "set". However, I think the semantics of "Distinct" are that the objects should be considered equivalent if they match by your criteria. Once you start picking the First or Last, you've really moved out of a "Distinct" calculation into some sort of aggregation (First, Last, Min, whatever) on a grouping. – dahlbyk Jul 26 '09 at 20:52
2

This will be more simple...

var distinctList = list.GroupBy(l => l.value1, (key, c) => l.FirstOrDefault());
Arasu RRK
  • 1,078
  • 16
  • 28
  • Instead of **l.FirstOrDefault()** it should be **c.FirstOrDefault()**. A similar comment was added to [this](https://stackoverflow.com/a/32165202/8240983) answer. – turbolocust Feb 29 '20 at 16:14
1

In linq this is more advance to group

list.GroupBy(li => li.value, (key, grp) => li.FirstOrDefault());
1

As of .NET 6 a new DistinctBy operator has been introduced. So now we can write:

var listDistinct = list.DistinctBy(x => x.value1).ToList();

Here's the implementation in the source code if anyone's interested.

TKharaishvili
  • 1,997
  • 1
  • 19
  • 30
0

I took Marc's answer, fixed it to work with TSource being a value type (test for default(TSource) instead of null), cleaned up some redundant type specifications, and wrote some tests for it. Here is what I am using today. Thank you Marc for the great idea and implementation.

public static class LINQExtensions
{
    public static IEnumerable<TSource> DistinctBy<TSource, TValue>(
        this IEnumerable<TSource> source,
        Func<TSource, TValue> selector)
    {
        var comparer = ProjectionComparer<TSource>.CompareBy(
            selector, EqualityComparer<TValue>.Default);
        return new HashSet<TSource>(source, comparer);
    }
}
public static class ProjectionComparer<TSource>
{
    public static IEqualityComparer<TSource> CompareBy<TValue>(
        Func<TSource, TValue> selector)
    {
        return CompareBy(selector, EqualityComparer<TValue>.Default);
    }
    public static IEqualityComparer<TSource> CompareBy<TValue>(
        Func<TSource, TValue> selector,
        IEqualityComparer<TValue> comparer)
    {
        return new ComparerImpl<TValue>(selector, comparer);
    }
    sealed class ComparerImpl<TValue> : IEqualityComparer<TSource>
    {
        private readonly Func<TSource, TValue> _selector;
        private readonly IEqualityComparer<TValue> _comparer;
        public ComparerImpl(
            Func<TSource, TValue> selector,
            IEqualityComparer<TValue> comparer)
        {
            if (selector == null) throw new ArgumentNullException("selector");
            if (comparer == null) throw new ArgumentNullException("comparer");
            _selector = selector;
            _comparer = comparer;
        }

        bool IEqualityComparer<TSource>.Equals(TSource x, TSource y)
        {
            if (x.Equals(default(TSource)) && y.Equals(default(TSource)))
            {
                return true;
            }

            if (x.Equals(default(TSource)) || y.Equals(default(TSource)))
            {
                return false;
            }
            return _comparer.Equals(_selector(x), _selector(y));
        }

        int IEqualityComparer<TSource>.GetHashCode(TSource obj)
        {
            return obj.Equals(default(TSource)) ? 0 : _comparer.GetHashCode(_selector(obj));
        }
    }
}

And the test class:

[TestClass]
public class LINQExtensionsTest
{
    [TestMethod]
    public void DistinctByTestDate()
    {
        var list = Enumerable.Range(0, 200).Select(i => new
        {
            Index = i,
            Date = DateTime.Today.AddDays(i%4)
        }).ToList();

        var distinctList = list.DistinctBy(l => l.Date).ToList();

        Assert.AreEqual(4, distinctList.Count);

        Assert.AreEqual(0, distinctList[0].Index);
        Assert.AreEqual(1, distinctList[1].Index);
        Assert.AreEqual(2, distinctList[2].Index);
        Assert.AreEqual(3, distinctList[3].Index);

        Assert.AreEqual(DateTime.Today, distinctList[0].Date);
        Assert.AreEqual(DateTime.Today.AddDays(1), distinctList[1].Date);
        Assert.AreEqual(DateTime.Today.AddDays(2), distinctList[2].Date);
        Assert.AreEqual(DateTime.Today.AddDays(3), distinctList[3].Date);

        Assert.AreEqual(200, list.Count);
    }

    [TestMethod]
    public void DistinctByTestInt()
    {
        var list = Enumerable.Range(0, 200).Select(i => new
        {
            Index = i % 4,
            Date = DateTime.Today.AddDays(i)
        }).ToList();

        var distinctList = list.DistinctBy(l => l.Index).ToList();

        Assert.AreEqual(4, distinctList.Count);

        Assert.AreEqual(0, distinctList[0].Index);
        Assert.AreEqual(1, distinctList[1].Index);
        Assert.AreEqual(2, distinctList[2].Index);
        Assert.AreEqual(3, distinctList[3].Index);

        Assert.AreEqual(DateTime.Today, distinctList[0].Date);
        Assert.AreEqual(DateTime.Today.AddDays(1), distinctList[1].Date);
        Assert.AreEqual(DateTime.Today.AddDays(2), distinctList[2].Date);
        Assert.AreEqual(DateTime.Today.AddDays(3), distinctList[3].Date);

        Assert.AreEqual(200, list.Count);
    }

    struct EqualityTester
    {
        public readonly int Index;
        public readonly DateTime Date;

        public EqualityTester(int index, DateTime date) : this()
        {
            Index = index;
            Date = date;
        }
    }

    [TestMethod]
    public void TestStruct()
    {
        var list = Enumerable.Range(0, 200)
            .Select(i => new EqualityTester(i, DateTime.Today.AddDays(i%4)))
            .ToList();

        var distinctDateList = list.DistinctBy(e => e.Date).ToList();
        var distinctIntList = list.DistinctBy(e => e.Index).ToList();

        Assert.AreEqual(4, distinctDateList.Count);
        Assert.AreEqual(200, distinctIntList.Count);
    }
}
DomenicDatti
  • 655
  • 7
  • 15