31

I want to get the difference between two sets of ints in c#. Given s1 and s2 I want to return those ints which are in s1 and not in s2. I can do something such as:

    List<int> s1 = new List<int>();
    List<int> s2 = new List<int>();

    foreach (int i in s1)
    {
        if (s1.Contains(i))
        {
            //
        }
        else
        {
            //
        }
    }

But I was wondering if anyone can point out anything cleaner. I would like to do something such as

List<int> omitted = s1.Difference(s2);

Not sure if there is an existing method or a LINQ construct that anyone might be able to point out? Thank you.

Brian
  • 117,631
  • 17
  • 236
  • 300
SiC
  • 457
  • 1
  • 9
  • 14

6 Answers6

36

I think you want HashSet.Except. That is, rather than use Lists, use HashSets, and then the operation is available. This is a better type if what you are representing is really a 'set' anyway. (If you already have a list, you can just create a 'new HashSet' out of it.)

Brian
  • 117,631
  • 17
  • 236
  • 300
  • awesome thankyou. Except() is the method I was looking for and I take your advice on HashSets. – SiC Apr 30 '09 at 09:54
  • Except is an extension function, no? – leppie Apr 30 '09 at 09:54
  • There is an extension method Enumerable.Except for all IEnumerables. But HashSet has a method Except specifically for HashSets. – Brian Apr 30 '09 at 09:58
  • Except is indeed an extension method that operates on any IEnumerable (see http://msdn.microsoft.com/en-us/library/system.linq.enumerable_members.aspx). That means it works fine on List. Of course, if order doesn't matter HashSet /is/ a more appropriate type. – Matthew Flaschen Apr 30 '09 at 10:08
  • 9
    Brian, I don't think so. The method you linked above is the extension method. HashSet has a ExceptWith method that destructively removes the elements in the parameter from the instance HashSet, but no Except. – Matthew Flaschen Apr 30 '09 at 10:13
33
IEnumerable<T> a, b;

var added = a.Except(b);
var removed = b.Except(a);
leppie
  • 115,091
  • 17
  • 196
  • 297
  • 2
    Note that this uses Enumerable.Except, so you need to use System.Linq. http://msdn.microsoft.com/en-us/library/bb300779.aspx – Brian Apr 30 '09 at 09:56
  • 1
    Problem? No, I just wanted to help someone out who might cut-and-paste this code and find that it doesn't compile. – Brian Apr 30 '09 at 10:00
  • 3
    @Brian, The same caveat also applies to your own answer. HashSet doesn't implement it's own Except method, so your answer requires System.Linq too. – LukeH Apr 30 '09 at 10:22
  • 2
    @Deviant: But yours run at O(n^2) and mine in O(n) :) – leppie May 01 '09 at 07:56
  • 2
    This answer shows that you have to call Except twice to get both added and removed items. This point is easy to miss. – Nils Lande Aug 17 '17 at 13:06
5

Another useful API, get the symmetric difference:

HashSet.SymmetricExceptWith()

Robin Qiu
  • 5,521
  • 1
  • 22
  • 19
2
List<int> s1 = new List<int>();
List<int> s2 = new List<int>();

return sl.FindAll( i => !s2.Contains(i) )
Chad Grant
  • 44,326
  • 9
  • 65
  • 80
1
from x in s1
where ! s2.contains(x)
select x
IMil
  • 1,391
  • 13
  • 17
0

Here are two extension methods that might come handy when you need to find unordered differences between two IEnumerable (it's more or less the same as the answer given by leppie wrapper into extension methods):

public class EnumerableDifferences<T>
{
    public IEnumerable<T> Added { get; }
    public IEnumerable<T> Removed { get; }

    public EnumerableDifferences(IEnumerable<T> added, IEnumerable<T> removed)
    {
        Added = added;
        Removed = removed;
    }
}

public static class EnumerableExtensions
{
    public static HashSet<TSource> ToHashSet<TSource>(this IEnumerable<TSource> source, IEqualityComparer<TSource> comparer)
    {
        return new HashSet<TSource>(source, comparer);
    }

    public static IEnumerable<TSource> ExceptBy<TSource, TKey>(this IEnumerable<TSource> first, IEnumerable<TSource> second, Func<TSource, TKey> keySelector, IEqualityComparer<TKey> keyComparer = null)
    {
        return first
            .ExceptBy(keySelector, second.Select(keySelector), keyComparer);
    }

    public static IEnumerable<TSource> ExceptBy<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IEnumerable<TKey> keys, IEqualityComparer<TKey> keyComparer = null)
    {
        var secondKeys = keys.ToHashSet(keyComparer);

        foreach (var firstItem in source)
        {
            var firstItemKey = keySelector(firstItem);

            if (!secondKeys.Contains(firstItemKey))
            {
                yield return firstItem;
            }
        }
    }

    public static EnumerableDifferences<TSource> DifferencesBy<TSource, TKey>(this IEnumerable<TSource> first, IEnumerable<TSource> second, Func<TSource, TKey> keySelector, IEqualityComparer<TKey> keyComparer = null)
    {
        keyComparer = keyComparer ?? EqualityComparer<TKey>.Default;

        var removed = first.ExceptBy(second, keySelector, keyComparer);
        var added = second.ExceptBy(first, keySelector, keyComparer);

        var result = new EnumerableDifferences<TSource>(added, removed);

        return result;
    }

    public static EnumerableDifferences<TSource> Differences<TSource>(this IEnumerable<TSource> first, IEnumerable<TSource> second, IEqualityComparer<TSource> comparer = null)
    {
        return first
            .DifferencesBy(second, x => x, comparer);
    }
}

public static class Program
{
    public static void Main(params string[] args)
    {
        var l1 = new[] { 'a', 'b', 'c' };
        var l2 = new[] { 'a', 'd', 'c' };

        var result = l1.Differences(l2);

        Console.ReadKey();
    }
}
Natalie Perret
  • 8,013
  • 12
  • 66
  • 129