150
class Program
{
    static void Main(string[] args)
    {
        List<Book> books = new List<Book> 
        {
            new Book
            {
                Name="C# in Depth",
                Authors = new List<Author>
                {
                    new Author 
                    {
                        FirstName = "Jon", LastName="Skeet"
                    },
                     new Author 
                    {
                        FirstName = "Jon", LastName="Skeet"
                    },                       
                }
            },
            new Book
            {
                Name="LINQ in Action",
                Authors = new List<Author>
                {
                    new Author 
                    {
                        FirstName = "Fabrice", LastName="Marguerie"
                    },
                     new Author 
                    {
                        FirstName = "Steve", LastName="Eichert"
                    },
                     new Author 
                    {
                        FirstName = "Jim", LastName="Wooley"
                    },
                }
            },
        };


        var temp = books.SelectMany(book => book.Authors).Distinct();
        foreach (var author in temp)
        {
            Console.WriteLine(author.FirstName + " " + author.LastName);
        }

        Console.Read();
    }

}
public class Book
{
    public string Name { get; set; }
    public List<Author> Authors { get; set; }
}
public class Author
{
    public string FirstName { get; set; }
    public string LastName { get; set; }
    public override bool Equals(object obj)
    {
        return true;
        //if (obj.GetType() != typeof(Author)) return false;
        //else return ((Author)obj).FirstName == this.FirstName && ((Author)obj).FirstName == this.LastName;
    }

}

This is based on an example in "LINQ in Action". Listing 4.16.

This prints Jon Skeet twice. Why? I have even tried overriding Equals method in Author class. Still Distinct does not seem to work. What am I missing?

Edit: I have added == and != operator overload too. Still no help.

 public static bool operator ==(Author a, Author b)
    {
        return true;
    }
    public static bool operator !=(Author a, Author b)
    {
        return false;
    }
Joe
  • 1,091
  • 1
  • 11
  • 23
Tanmoy
  • 44,392
  • 16
  • 45
  • 55

11 Answers11

201

LINQ Distinct is not that smart when it comes to custom objects.

All it does is look at your list and see that it has two different objects (it doesn't care that they have the same values for the member fields).

One workaround is to implement the IEquatable interface as shown here.

If you modify your Author class like so it should work.

public class Author : IEquatable<Author>
{
    public string FirstName { get; set; }
    public string LastName { get; set; }

    public bool Equals(Author other)
    {
        if (FirstName == other.FirstName && LastName == other.LastName)
            return true;

        return false;
    }

    public override int GetHashCode()
    {
        int hashFirstName = FirstName == null ? 0 : FirstName.GetHashCode();
        int hashLastName = LastName == null ? 0 : LastName.GetHashCode();

        return hashFirstName ^ hashLastName;
    }
}

Try it as DotNetFiddle

Matt
  • 25,467
  • 18
  • 120
  • 187
skalb
  • 5,357
  • 3
  • 26
  • 23
  • 27
    IEquatable is fine but incomplete; you should *always* implemement Object.Equals() and Object.GetHashCode() together; IEquatable.Equals does not override Object.Equals, so this will fail when making non-strongly typed comparisons, which occurs often in frameworks and always in non-generic collections. – AndyM Sep 02 '09 at 04:23
  • So is it better to use the override of Distinct that takes IEqualityComparer as Rex M has suggested? I mean what I should be doing if I dont want to fall into the trap. – Tanmoy Sep 02 '09 at 04:29
  • 4
    @Tanmoy it depends. If you want Author to normally behave like a normal object (i.e. only reference equality) but check the name values for the purpose of Distinct, use an IEqualityComparer. If you *always* want Author objects to be compared based on the name values, then override GetHashCode and Equals, or implement IEquatable. – Rex M Sep 02 '09 at 14:29
  • 5
    I implemented `IEquatable` (and overrode `Equals`/`GetHashCode`) but none of my breakpoints are firing in these methods on a Linq `Distinct`? – PeterX Dec 15 '14 at 04:49
  • 2
    @PeterX I noticed this too. I had breakpoints in the `GetHashCode` and `Equals`, they were hit during the foreach loop. This is because the `var temp = books.SelectMany(book => book.Authors).Distinct();` returns an `IEnumerable`, meaning that the request is not executed right away, it is only executed when the data is used. If you would like an example of this firing right away, then add `.ToList()` after the `.Distinct()` and you will see the breakpoints in the `Equals` and `GetHashCode` before the foreach. – JabberwockyDecompiler Jun 19 '15 at 15:15
  • Yes, that could be it - thanks. I actually recall passing in an overloaded IComparable (I believe) in one case to get it working. – PeterX Jun 22 '15 at 06:20
77

The Distinct() method checks reference equality for reference types. This means it is looking for literally the same object duplicated, not different objects which contain the same values.

There is an overload which takes an IEqualityComparer, so you can specify different logic for determining whether a given object equals another.

If you want Author to normally behave like a normal object (i.e. only reference equality), but for the purposes of Distinct check equality by name values, use an IEqualityComparer. If you always want Author objects to be compared based on the name values, then override GetHashCode and Equals, or implement IEquatable.

The two members on the IEqualityComparer interface are Equals and GetHashCode. Your logic for determining whether two Author objects are equal appears to be if the First and Last name strings are the same.

public class AuthorEquals : IEqualityComparer<Author>
{
    public bool Equals(Author left, Author right)
    {
        if((object)left == null && (object)right == null)
        {
            return true;
        }
        if((object)left == null || (object)right == null)
        {
            return false;
        }
        return left.FirstName == right.FirstName && left.LastName == right.LastName;
    }

    public int GetHashCode(Author author)
    {
        return (author.FirstName + author.LastName).GetHashCode();
    }
}
Rex M
  • 142,167
  • 33
  • 283
  • 313
  • 1
    Thank you! Your GetHashCode() implementation showed me what I was still missing. I was returning {passed-in object}.GetHashCode(), not {property being used for comparison}.GetHashCode(). That made the difference and explains why mine was still failing - two different references would have two different hash codes. – pelazem Feb 18 '15 at 20:17
61

Another solution without implementing IEquatable, Equals and GetHashCode is to use the LINQs GroupBy method and to select the first item from the IGrouping.

var temp = books.SelectMany(book => book.Authors)
                .GroupBy (y => y.FirstName + y.LastName )
                .Select (y => y.First ());

foreach (var author in temp){
  Console.WriteLine(author.FirstName + " " + author.LastName);
}
Jehof
  • 34,674
  • 10
  • 123
  • 155
  • 1
    it did helped me, just considering performance, does this perform at same speed?, as considering above methods? – Biswajeet Jun 09 '15 at 07:10
  • much nicer than complicating it with implementing methods, and if using EF will delegate the work to the sql server. – Zapnologica Aug 31 '15 at 11:25
  • while This method may work, there will be a performance issue due to the number of things being grouped – Bellash Mar 09 '16 at 09:37
  • @Bellash Make it work then make it fast. Sure does this grouping may lead to more work to be done. but sometimes it is cumbersome to implement more than you want. – Jehof Mar 09 '16 at 09:46
  • 2
    I prefer this solution but then by using a "new" object in the groupby: `.GroupBy(y => new { y.FirstName, y.LastName })` – Dave de Jong Feb 12 '19 at 18:46
  • For some reason got this "The method 'First' can only be used as a final query operation. Consider using the method 'FirstOrDefault' in this instance instead." so used FirstOrDefault() instead. – Arvind Singh Jun 25 '20 at 05:27
51

There is one more way to get distinct values from list of user defined data type:

YourList.GroupBy(i => i.Id).Select(i => i.FirstOrDefault()).ToList();

Surely, it will give distinct set of data

Makis Mak
  • 5
  • 4
Ashu_90
  • 904
  • 7
  • 8
  • VB: `.GroupBy(Function(d) New With {Key d.field1, Key d.field2, Key d.field3}).Select(Function(d) d.FirstOrDefault()).OrderBy(Function(X) X.dield1).ToList()` – Dani Sep 11 '20 at 09:51
26

Distinct() performs the default equality comparison on objects in the enumerable. If you have not overridden Equals() and GetHashCode(), then it uses the default implementation on object, which compares references.

The simple solution is to add a correct implementation of Equals() and GetHashCode() to all classes which participate in the object graph you are comparing (ie Book and Author).

The IEqualityComparer interface is a convenience that allows you to implement Equals() and GetHashCode() in a separate class when you don't have access to the internals of the classes you need to compare, or if you are using a different method of comparison.

AndyM
  • 1,057
  • 1
  • 7
  • 11
13

You've overriden Equals(), but make sure you also override GetHashCode()

Eric King
  • 11,594
  • 5
  • 43
  • 53
  • 1
    +1 for emphasizing GetHashCode(). Do not add the base HashCode implementation as in `^base.GetHashCode()` – Dani Jul 08 '13 at 05:30
9

The Above answers are wrong!!! Distinct as stated on MSDN returns the default Equator which as stated The Default property checks whether type T implements the System.IEquatable interface and, if so, returns an EqualityComparer that uses that implementation. Otherwise, it returns an EqualityComparer that uses the overrides of Object.Equals and Object.GetHashCode provided by T

Which means as long as you overide Equals you are fine.

The reason you're code is not working is because you check firstname==lastname.

see https://msdn.microsoft.com/library/bb348436(v=vs.100).aspx and https://msdn.microsoft.com/en-us/library/ms224763(v=vs.100).aspx

Alex
  • 167
  • 1
  • 6
4

Instead of

var temp = books.SelectMany(book => book.Authors).Distinct();

Do

var temp = books.SelectMany(book => book.Authors).DistinctBy(f => f.Property);
Jeremy Caney
  • 7,102
  • 69
  • 48
  • 77
Sokika LLC
  • 41
  • 1
2

You can achieve this several ways:

1. You may to implement the IEquatable interface as shown Enumerable.Distinct Method or you can see @skalb's answer at this post

2. If your object has not unique key, You can use GroupBy method for achive distinct object list, that you must group object's all properties and after select first object.

For example like as below and working for me:

var distinctList= list.GroupBy(x => new {
                            Name= x.Name,
                            Phone= x.Phone,
                            Email= x.Email,
                            Country= x.Country
                        }, y=> y)
                       .Select(x => x.First())
                       .ToList()

MyObject class is like as below:

public class MyClass{
       public string Name{get;set;}
       public string Phone{get;set;}
       public string Email{get;set;}
       public string Country{get;set;}
}

3. If your object's has unique key, you can only use the it in group by.

For example my object's unique key is Id.

var distinctList= list.GroupBy(x =>x.Id)
                      .Select(x => x.First())
                      .ToList()
Ramil Aliyev 007
  • 4,437
  • 2
  • 31
  • 47
1

You can use extension method on list which checks uniqueness based on computed Hash. You can also change extension method to support IEnumerable.

Example:

public class Employee{
public string Name{get;set;}
public int Age{get;set;}
}

List<Employee> employees = new List<Employee>();
employees.Add(new Employee{Name="XYZ", Age=30});
employees.Add(new Employee{Name="XYZ", Age=30});

employees = employees.Unique(); //Gives list which contains unique objects. 

Extension Method:

    public static class LinqExtension
        {
            public static List<T> Unique<T>(this List<T> input)
            {
                HashSet<string> uniqueHashes = new HashSet<string>();
                List<T> uniqueItems = new List<T>();

                input.ForEach(x =>
                {
                    string hashCode = ComputeHash(x);

                    if (uniqueHashes.Contains(hashCode))
                    {
                        return;
                    }

                    uniqueHashes.Add(hashCode);
                    uniqueItems.Add(x);
                });

                return uniqueItems;
            }

            private static string ComputeHash<T>(T entity)
            {
                System.Security.Cryptography.SHA1CryptoServiceProvider sh = new System.Security.Cryptography.SHA1CryptoServiceProvider();
                string input = JsonConvert.SerializeObject(entity);

                byte[] originalBytes = ASCIIEncoding.Default.GetBytes(input);
                byte[] encodedBytes = sh.ComputeHash(originalBytes);

                return BitConverter.ToString(encodedBytes).Replace("-", "");
            }
1

The Equal operator in below code is incorrect.

Old

public bool Equals(Author other)
{
    if (FirstName == other.FirstName && LastName == other.LastName)
        return true;

    return false;
}

NEW

public override bool Equals(Object obj)
{
    var other = obj as Author;

    if (other is null)
    {
        return false;
    }

    if (FirstName == other.FirstName && LastName == other.LastName)
        return true;

    return false;
}
Josef
  • 2,869
  • 2
  • 22
  • 23