25

I am trying to implement Left outer join extension method with return type IQueryable.

The function that I have written is as follows

public static IQueryable<TResult> LeftOuterJoin2<TOuter, TInner, TKey, TResult>(
        this IQueryable<TOuter> outer,
        IQueryable<TInner> inner,
        Func<TOuter, TKey> outerKeySelector,
        Func<TInner, TKey> innerKeySelector,
        Func<TOuter, TInner, TResult> resultSelector)
{
        return
          from outerItem in outer
          join innerItem in inner on outerKeySelector(outerItem) 
            equals innerKeySelector(innerItem) into joinedData
          from r in joinedData.DefaultIfEmpty()
          select resultSelector(outerItem, r);
}

It can't generate the query. The reason might be: I have used Func<> instead of Expression<>. I tried with Expression<> as well. It gives me an error on outerKeySelector(outerItem) line, which is outerKeySelector is a variable which is being used as a method

I found some discussions on SO (such as here) and CodeProjects, but those work for IEnumerable types not for IQueryable.

Community
  • 1
  • 1
N Rocking
  • 2,947
  • 2
  • 21
  • 24
  • What are the exact error messages you are getting? The way I'm thinking is `IQueryable` actually *is* an `IEnumerable`, and therefore the method that works for IEnumerable should work for this instance too, have you tried using what works for `IEnumerable` and then simply casting to `IQueryable` by calling `.AsQueryable()`? – aevitas Feb 07 '14 at 02:35
  • Difference is, IQueryable is transformed by query provider into correct SQL which is then executed against database, when IEnumerable is base for LINQ to Objects. IQueryable requires expression tree as parameters, IEnumerable is ok with delegates. – MarcinJuraszek Feb 07 '14 at 02:47

6 Answers6

38

Intro

This question is very interesting. The problem is Funcs are delegates and Expressions are trees, they are completely different structures. When you use your current extension implementation it uses loops and executes your selectors on each step for each element and it works well. But when we talk about entity framework and LINQ we need tree traversal for translation it to SQL query. So it's a "little" harder than Funcs (but I like Expressions anyway) and there are some problems described below.

When you want to do left outer join you can use something like this (taken from here: How to implement left join in JOIN Extension method)

var leftJoin = p.Person.Where(n => n.FirstName.Contains("a"))
                   .GroupJoin(p.PersonInfo, 
                              n => n.PersonId,
                              m => m.PersonId,
                              (n, ms) => new { n, ms = ms.DefaultIfEmpty() })
                   .SelectMany(z => z.ms.Select(m => new { n = z.n, m ));

It is good, but it is not extension method we need. I guess you need something like this:

using (var db = new Database1Entities("..."))
{
     var my = db.A.LeftOuterJoin2(db.B, a => a.Id, b => b.IdA, 
         (a, b) => new { a, b, hello = "Hello World!" });
     // other actions ...
}

There are many hard parts in creating such extensions:

  • Creating complex trees manually, compiler will not help us here
  • Reflection is needed for methods like Where, Select, etc
  • Anonymous types (!! we need codegen here?? I hope no)

Steps

Consider 2 simple tables: A (columns: Id, Text) and B (Columns Id, IdA, Text).

Outer join could be implemented in 3 steps:

// group join as usual + use DefaultIfEmpty
var q1 = Queryable.GroupJoin(db.A, db.B, a => a.Id, b => b.IdA, 
                              (a, b) => new { a, groupB = b.DefaultIfEmpty() });

// regroup data to associated list a -> b, it is usable already, but it's 
// impossible to use resultSelector on this stage, 
// beacuse of type difference (quite deep problem: some anonymous type != TOuter)
var q2 = Queryable.SelectMany(q1, x => x.groupB, (a, b) => new { a.a, b });

// second regroup to get the right types
var q3 = Queryable.SelectMany(db.A, 
                               a => q2.Where(x => x.a == a).Select(x => x.b), 
                               (a, b) => new {a, b});

Code

Ok, I'm not such a good teller, here is he code I have (Sorry I was unable to format it better, but it works!):

public static IQueryable<TResult> LeftOuterJoin2<TOuter, TInner, TKey, TResult>(
        this IQueryable<TOuter> outer,
        IQueryable<TInner> inner,
        Expression<Func<TOuter, TKey>> outerKeySelector,
        Expression<Func<TInner, TKey>> innerKeySelector,
        Expression<Func<TOuter, TInner, TResult>> resultSelector)
    {

        // generic methods
        var selectManies = typeof(Queryable).GetMethods()
            .Where(x => x.Name == "SelectMany" && x.GetParameters().Length == 3)
            .OrderBy(x=>x.ToString().Length)
            .ToList();
        var selectMany = selectManies.First();
        var select = typeof(Queryable).GetMethods().First(x => x.Name == "Select" && x.GetParameters().Length == 2);
        var where = typeof(Queryable).GetMethods().First(x => x.Name == "Where" && x.GetParameters().Length == 2);
        var groupJoin = typeof(Queryable).GetMethods().First(x => x.Name == "GroupJoin" && x.GetParameters().Length == 5);
        var defaultIfEmpty = typeof(Queryable).GetMethods().First(x => x.Name == "DefaultIfEmpty" && x.GetParameters().Length == 1);

        // need anonymous type here or let's use Tuple
        // prepares for:
        // var q2 = Queryable.GroupJoin(db.A, db.B, a => a.Id, b => b.IdA, (a, b) => new { a, groupB = b.DefaultIfEmpty() });
        var tuple = typeof(Tuple<,>).MakeGenericType(
            typeof(TOuter),
            typeof(IQueryable<>).MakeGenericType(
                typeof(TInner)
                )
            );
        var paramOuter = Expression.Parameter(typeof(TOuter));
        var paramInner = Expression.Parameter(typeof(IEnumerable<TInner>));
        var groupJoinExpression = Expression.Call(
            null,
            groupJoin.MakeGenericMethod(typeof (TOuter), typeof (TInner), typeof (TKey), tuple),
            new Expression[]
                {
                    Expression.Constant(outer),
                    Expression.Constant(inner),
                    outerKeySelector,
                    innerKeySelector,
                    Expression.Lambda(
                        Expression.New(
                            tuple.GetConstructor(tuple.GetGenericArguments()),
                            new Expression[]
                                {
                                    paramOuter,
                                    Expression.Call(
                                        null,
                                        defaultIfEmpty.MakeGenericMethod(typeof (TInner)),
                                        new Expression[]
                                            {
                                                Expression.Convert(paramInner, typeof (IQueryable<TInner>))
                                            }
                                )
                                },
                            tuple.GetProperties()
                            ),
                        new[] {paramOuter, paramInner}
                )
                }
            );

        // prepares for:
        // var q3 = Queryable.SelectMany(q2, x => x.groupB, (a, b) => new { a.a, b });
        var tuple2 = typeof (Tuple<,>).MakeGenericType(typeof (TOuter), typeof (TInner));
        var paramTuple2 = Expression.Parameter(tuple);
        var paramInner2 = Expression.Parameter(typeof(TInner));
        var paramGroup = Expression.Parameter(tuple);
        var selectMany1Result = Expression.Call(
            null,
            selectMany.MakeGenericMethod(tuple, typeof (TInner), tuple2),
            new Expression[]
                {
                    groupJoinExpression,
                    Expression.Lambda(
                        Expression.Convert(Expression.MakeMemberAccess(paramGroup, tuple.GetProperty("Item2")),
                                           typeof (IEnumerable<TInner>)),
                        paramGroup
                ),
                    Expression.Lambda(
                        Expression.New(
                            tuple2.GetConstructor(tuple2.GetGenericArguments()),
                            new Expression[]
                                {
                                    Expression.MakeMemberAccess(paramTuple2, paramTuple2.Type.GetProperty("Item1")),
                                    paramInner2
                                },
                            tuple2.GetProperties()
                            ),
                        new[]
                            {
                                paramTuple2,
                                paramInner2
                            }
                )
                }
            );

        // prepares for final step, combine all expressinos together and invoke:
        // var q4 = Queryable.SelectMany(db.A, a => q3.Where(x => x.a == a).Select(x => x.b), (a, b) => new { a, b });
        var paramTuple3 = Expression.Parameter(tuple2);
        var paramTuple4 = Expression.Parameter(tuple2);
        var paramOuter3 = Expression.Parameter(typeof (TOuter));
        var selectManyResult2 = selectMany
            .MakeGenericMethod(
                typeof(TOuter),
                typeof(TInner),
                typeof(TResult)
            )
            .Invoke(
                null,
                new object[]
                    {
                        outer,
                        Expression.Lambda(
                            Expression.Convert(
                                Expression.Call(
                                    null,
                                    select.MakeGenericMethod(tuple2, typeof(TInner)),
                                    new Expression[]
                                        {
                                            Expression.Call(
                                                null,
                                                where.MakeGenericMethod(tuple2),
                                                new Expression[]
                                                    {
                                                        selectMany1Result,
                                                        Expression.Lambda( 
                                                            Expression.Equal(
                                                                paramOuter3,
                                                                Expression.MakeMemberAccess(paramTuple4, paramTuple4.Type.GetProperty("Item1"))
                                                            ),
                                                            paramTuple4
                                                        )
                                                    }
                                            ),
                                            Expression.Lambda(
                                                Expression.MakeMemberAccess(paramTuple3, paramTuple3.Type.GetProperty("Item2")),
                                                paramTuple3
                                            )
                                        }
                                ), 
                                typeof(IEnumerable<TInner>)
                            ),
                            paramOuter3
                        ),
                        resultSelector
                    }
            );

        return (IQueryable<TResult>)selectManyResult2;
    }

Usage

And the usage again:

db.A.LeftOuterJoin2(db.B, a => a.Id, b => b.IdA, 
       (a, b) => new { a, b, hello = "Hello World!" });

Looking at this you can think what is the sql query for all this? It might be huge. Guess what? It's quite small:

SELECT 
1 AS [C1], 
[Extent1].[Id] AS [Id], 
[Extent1].[Text] AS [Text], 
[Join1].[Id1] AS [Id1], 
[Join1].[IdA] AS [IdA], 
[Join1].[Text2] AS [Text2], 
N'Hello World!' AS [C2]
FROM  [A] AS [Extent1]
INNER JOIN  (SELECT [Extent2].[Id] AS [Id2], [Extent2].[Text] AS [Text], [Extent3].[Id]    AS [Id1], [Extent3].[IdA] AS [IdA], [Extent3].[Text2] AS [Text2]
    FROM  [A] AS [Extent2]
    LEFT OUTER JOIN [B] AS [Extent3] ON [Extent2].[Id] = [Extent3].[IdA] ) AS [Join1] ON [Extent1].[Id] = [Join1].[Id2]

Hope it helps.

Community
  • 1
  • 1
Tony
  • 7,345
  • 3
  • 26
  • 34
  • This is also basically the whole reason for the LINQ "language" - once you get into joins, doing anything using just the extension methods is a huge pain. The LINQ keyword way produces a much easier to read code (even though it does the same thing behind the curtains). – Luaan Feb 10 '14 at 10:26
  • 4
    The most heroic answer I've seen in a while. – spender Apr 14 '16 at 19:14
12

The accepted answer is a great start to explain the complexities behind a left outer join.

I found three rather serious issues with it, especially when taking this extension method and using it in more complex queries (chaining multiple left outer joins with normal joins then summarizing/max/count/...) Before you copy the selected answer into your production environment, please do read on.

Consider the original example from the linked SO post, which represents just about any left outer join done in LINQ:

var leftJoin = p.Person.Where(n => n.FirstName.Contains("a"))
                   .GroupJoin(p.PersonInfo, 
                              n => n.PersonId,
                              m => m.PersonId,
                              (n, ms) => new { n, ms = ms })
                   .SelectMany(z => z.ms.DefaultIfEmpty(), (n, m) => new { n = n, m ));
  • The usage of a Tuple works, but when this is used as part of more complex queries, EF fails (cannot use constructors). To get around this, you either need to generate a new anonymous class dynamically (search stack overflow) or use a constructor-less type. I created this

    internal class KeyValuePairHolder<T1, T2>
    {
        public T1 Item1 { get; set; }
        public T2 Item2 { get; set; }
    }
    
  • The usage of the "Queryable.DefaultIfEmpty" method. In the original and in the GroupJoin methods, correct methods that are chosen by the compiler are the "Enumerable.DefaultIfEmpty" methods. This has no influence in a simple query, but notice how the accepted answer has a bunch of Converts (between IQueryable and IEnumerable). Those cast also cause issues in more complex queries. It's ok to use the "Enumerable.DefaultIfEmpty" method in an Expression, EF knows not to execute it but to translate it into a join instead.

  • Finally, this is the bigger issue: there are two selects done whereas the original only does one select. You can read the cause in the code comments (beacuse of type difference (quite deep problem: some anonymous type != TOuter)) and see it in the SQL (Select from A inner join (a left outer join b)) The issue here is that the Original SelectMany method takes an object created in the Join method of type: KeyValuePairHolder of TOuter and IEnumerable of Tinner as it's first parameter, but the resultSelector expression passed takes a simple TOUter as it's first parameter. You can use an ExpressionVisitor to rewrite the expression that is passed into the correct form.

    internal class ResultSelectorRewriter<TOuter, TInner, TResult> : ExpressionVisitor
    {
        private Expression<Func<TOuter, TInner, TResult>> resultSelector;
        public Expression<Func<KeyValuePairHolder<TOuter, IEnumerable<TInner>>, TInner, TResult>> CombinedExpression { get; private set; }
    
        private ParameterExpression OldTOuterParamExpression;
        private ParameterExpression OldTInnerParamExpression;
        private ParameterExpression NewTOuterParamExpression;
        private ParameterExpression NewTInnerParamExpression;
    
    
        public ResultSelectorRewriter(Expression<Func<TOuter, TInner, TResult>> resultSelector)
        {
            this.resultSelector = resultSelector;
            this.OldTOuterParamExpression = resultSelector.Parameters[0];
            this.OldTInnerParamExpression = resultSelector.Parameters[1];
    
            this.NewTOuterParamExpression = Expression.Parameter(typeof(KeyValuePairHolder<TOuter, IEnumerable<TInner>>));
            this.NewTInnerParamExpression = Expression.Parameter(typeof(TInner));
    
            var newBody = this.Visit(this.resultSelector.Body);
            var combinedExpression = Expression.Lambda(newBody, new ParameterExpression[] { this.NewTOuterParamExpression, this.NewTInnerParamExpression });
            this.CombinedExpression = (Expression<Func<KeyValuePairHolder<TOuter, IEnumerable<TInner>>, TInner, TResult>>)combinedExpression;
        }
    
    
        protected override Expression VisitParameter(ParameterExpression node)
        {
            if (node == this.OldTInnerParamExpression)
                return this.NewTInnerParamExpression;
            else if (node == this.OldTOuterParamExpression)
                return Expression.PropertyOrField(this.NewTOuterParamExpression, "Item1");
            else
                throw new InvalidOperationException("What is this sorcery?", new InvalidOperationException("Did not expect a parameter: " + node));
    
        } 
    }
    

Using the expression visitor and KeyValuePairHolder to avoid usage of Tuples, my updated version of the selected answer below fixes the three issues, is shorter, and produces shorter SQL:

 internal class QueryReflectionMethods
    {
        internal static System.Reflection.MethodInfo Enumerable_Select = typeof(Enumerable).GetMethods().First(x => x.Name == "Select" && x.GetParameters().Length == 2);
        internal static System.Reflection.MethodInfo Enumerable_DefaultIfEmpty = typeof(Enumerable).GetMethods().First(x => x.Name == "DefaultIfEmpty" && x.GetParameters().Length == 1);

        internal static System.Reflection.MethodInfo Queryable_SelectMany = typeof(Queryable).GetMethods().Where(x => x.Name == "SelectMany" && x.GetParameters().Length == 3).OrderBy(x => x.ToString().Length).First();
        internal static System.Reflection.MethodInfo Queryable_Where = typeof(Queryable).GetMethods().First(x => x.Name == "Where" && x.GetParameters().Length == 2);
        internal static System.Reflection.MethodInfo Queryable_GroupJoin = typeof(Queryable).GetMethods().First(x => x.Name == "GroupJoin" && x.GetParameters().Length == 5);
        internal static System.Reflection.MethodInfo Queryable_Join = typeof(Queryable).GetMethods(System.Reflection.BindingFlags.Static | System.Reflection.BindingFlags.Public).First(c => c.Name == "Join");
        internal static System.Reflection.MethodInfo Queryable_Select = typeof(Queryable).GetMethods().First(x => x.Name == "Select" && x.GetParameters().Length == 2);



        public static IQueryable<TResult> CreateLeftOuterJoin<TOuter, TInner, TKey, TResult>(
                   IQueryable<TOuter> outer,
                   IQueryable<TInner> inner,
                   Expression<Func<TOuter, TKey>> outerKeySelector,
                   Expression<Func<TInner, TKey>> innerKeySelector,
                   Expression<Func<TOuter, TInner, TResult>> resultSelector)
        { 

            var keyValuePairHolderWithGroup = typeof(KeyValuePairHolder<,>).MakeGenericType(
                typeof(TOuter),
                typeof(IEnumerable<>).MakeGenericType(
                    typeof(TInner)
                    )
                );
            var paramOuter = Expression.Parameter(typeof(TOuter));
            var paramInner = Expression.Parameter(typeof(IEnumerable<TInner>));
            var groupJoin =
                Queryable_GroupJoin.MakeGenericMethod(typeof(TOuter), typeof(TInner), typeof(TKey), keyValuePairHolderWithGroup)
                .Invoke(
                    "ThisArgumentIsIgnoredForStaticMethods",
                    new object[]{
                    outer,
                    inner,
                    outerKeySelector,
                    innerKeySelector,
                    Expression.Lambda(
                        Expression.MemberInit(
                            Expression.New(keyValuePairHolderWithGroup), 
                            Expression.Bind(
                                keyValuePairHolderWithGroup.GetMember("Item1").Single(),  
                                paramOuter
                                ), 
                            Expression.Bind(
                                keyValuePairHolderWithGroup.GetMember("Item2").Single(), 
                                paramInner
                                )
                            ),
                        paramOuter, 
                        paramInner
                        )
                    }
                );


            var paramGroup = Expression.Parameter(keyValuePairHolderWithGroup);
            Expression collectionSelector = Expression.Lambda(                    
                            Expression.Call(
                                    null,
                                    Enumerable_DefaultIfEmpty.MakeGenericMethod(typeof(TInner)),
                                    Expression.MakeMemberAccess(paramGroup, keyValuePairHolderWithGroup.GetProperty("Item2"))) 
                            ,
                            paramGroup
                        );

            Expression newResultSelector = new ResultSelectorRewriter<TOuter, TInner, TResult>(resultSelector).CombinedExpression;


            var selectMany1Result =
                Queryable_SelectMany.MakeGenericMethod(keyValuePairHolderWithGroup, typeof(TInner), typeof(TResult))
                .Invoke(
                    "ThisArgumentIsIgnoredForStaticMethods", new object[]{
                        groupJoin,
                        collectionSelector,
                        newResultSelector
                    }
                );
            return (IQueryable<TResult>)selectMany1Result;
        }
    }
Community
  • 1
  • 1
  • While your method seem to work with EF6, I have tried to use your suggested approach with EF Core 2.0 and failed to get results. I am not sure if this is a bug with EF Core 2.0. I've asked the question here: https://stackoverflow.com/questions/46537158/trying-to-implement-a-leftjoin-extension-method-to-work-with-ef-core-2-0 – Sudarsha Hewa Oct 03 '17 at 11:26
9

As stated in previous answers, when you want your IQueryable to be translated into SQL you need to use Expression instead of Func, so you have to go the Expression Tree route.

However, here's a way you can achieve the same result without having to build the Expression tree yourself. The trick is, you need to reference LinqKit (available via NuGet) and call AsExpandable() on the query. This will take care of building the underlying expression tree (see how here).

The example below uses the GroupJoin with SelectMany and DefaultIfEmpty() approach:

Code

    public static IQueryable<TResult> LeftOuterJoin<TOuter, TInner, TKey, TResult>(
        this IQueryable<TOuter> outer,
        IQueryable<TInner> inner,
        Expression<Func<TOuter, TKey>> outerKeySelector,
        Expression<Func<TInner, TKey>> innerKeySelector,
        Expression<Func<TOuter, TInner, TResult>> resultSelector)
    {
        return outer
            .AsExpandable()// Tell LinqKit to convert everything into an expression tree.
            .GroupJoin(
                inner,
                outerKeySelector,
                innerKeySelector,
                (outerItem, innerItems) => new { outerItem, innerItems })
            .SelectMany(
                joinResult => joinResult.innerItems.DefaultIfEmpty(),
                (joinResult, innerItem) => 
                    resultSelector.Invoke(joinResult.outerItem, innerItem));
    }

Sample Data

Let's assume we have the following EF entities, and the users and addresses variables are the access to the underlying DbSet:

public class User
{
    public int Id { get; set; }
    public string FirstName { get; set; }
    public string LastName { get; set; }
}

public class UserAddress
{
    public int UserId { get; set; }
    public string LastName { get; set; }
    public string Street { get; set; }
}

IQueryable<User> users;
IQueryable<UserAddress> addresses;

Usage 1

Let's join by user id:

var result = users.LeftOuterJoin(
            addresses,
            user => user.Id,
            address => address.UserId,
            (user, address) => new { user.Id, address.Street });

This translates to (using LinqPad):

SELECT 
[Extent1].[Id] AS [Id],     
[Extent2].[Street] AS [Street]
FROM  [dbo].[Users] AS [Extent1]
LEFT OUTER JOIN [dbo].[UserAddresses] AS [Extent2] 
ON [Extent1].[Id] = [Extent2].[UserId]

Usage 2

Now let's join on multiple properties using an anonymous type as key:

var result = users.LeftOuterJoin(
            addresses,
            user => new { user.Id, user.LastName },
            address => new { Id = address.UserId, address.LastName },
            (user, address) => new { user.Id, address.Street });

Please note that the anonymous type properties must have the same names, otherwise you'll get a syntax error.

That's why we have Id = address.UserId instead of just address.UserId.

This will be translated to:

SELECT 
[Extent1].[Id] AS [Id],     
[Extent2].[Street] AS [Street]
FROM  [dbo].[Users] AS [Extent1]
LEFT OUTER JOIN [dbo].[UserAddresses] AS [Extent2] 
ON ([Extent1].[Id] = [Extent2].[UserId]) AND ([Extent1].[LastName] = [Extent2].[LastName])
RaduV
  • 91
  • 1
  • 4
4

This is the .LeftJoin extension method I created last year when I wanted to simplify the .GroupJoin. I've had good luck with it. I included the XML comments so you get full intellisense. There's also an overload with an IEqualityComparer. I hope you find it useful.

My full suite of Join Extensions is here: https://github.com/jolsa/Extensions/blob/master/ExtensionLib/JoinExtensions.cs

// JoinExtensions: Created 07/12/2014 - Johnny Olsa

using System.Linq;

namespace System.Collections.Generic
{
    /// <summary>
    /// Join Extensions that .NET should have provided?
    /// </summary>
    public static class JoinExtensions
    {
        /// <summary>
        /// Correlates the elements of two sequences based on matching keys. A specified
        /// System.Collections.Generic.IEqualityComparer&lt;T&gt; is used to compare keys.
        /// </summary>
        /// <typeparam name="TOuter">The type of the elements of the first sequence.</typeparam>
        /// <typeparam name="TInner">The type of the elements of the second sequence.</typeparam>
        /// <typeparam name="TKey">The type of the keys returned by the key selector functions.</typeparam>
        /// <typeparam name="TResult">The type of the result elements.</typeparam>
        /// <param name="outer">The first sequence to join.</param>
        /// <param name="inner">The sequence to join to the first sequence.</param>
        /// <param name="outerKeySelector">A function to extract the join key from each element of the first sequence.</param>
        /// <param name="innerKeySelector">A function to extract the join key from each element of the second sequence.</param>
        /// <param name="resultSelector">A function to create a result element from two combined elements.</param>
        /// <param name="comparer">A System.Collections.Generic.IEqualityComparer&lt;T&gt; to hash and compare keys.</param>
        /// <returns>
        /// An System.Collections.Generic.IEnumerable&lt;T&gt; that has elements of type TResult
        /// that are obtained by performing an left outer join on two sequences.
        /// </returns>
        /// <example>
        /// Example:
        /// <code>
        /// class TestClass
        /// {
        ///        static int Main()
        ///        {
        ///            var strings1 = new string[] { "1", "2", "3", "4", "a" };
        ///            var strings2 = new string[] { "1", "2", "3", "16", "A" };
        ///            
        ///            var lj = strings1.LeftJoin(
        ///                strings2,
        ///                a => a,
        ///                b => b,
        ///                (a, b) => (a ?? "null") + "-" + (b ?? "null"),
        ///                StringComparer.OrdinalIgnoreCase)
        ///                .ToList();
        ///        }
        ///    }
        ///    </code>
        /// </example>
        public static IEnumerable<TResult> LeftJoin<TOuter, TInner, TKey, TResult>(this IEnumerable<TOuter> outer,
            IEnumerable<TInner> inner, Func<TOuter, TKey> outerKeySelector, Func<TInner, TKey> innerKeySelector,
            Func<TOuter, TInner, TResult> resultSelector, IEqualityComparer<TKey> comparer)
        {
            return outer.GroupJoin(
                inner,
                outerKeySelector,
                innerKeySelector,
                (o, ei) => ei
                    .Select(i => resultSelector(o, i))
                    .DefaultIfEmpty(resultSelector(o, default(TInner))), comparer)
                    .SelectMany(oi => oi);
        }

        /// <summary>
        /// Correlates the elements of two sequences based on matching keys. The default
        /// equality comparer is used to compare keys.
        /// </summary>
        /// <typeparam name="TOuter">The type of the elements of the first sequence.</typeparam>
        /// <typeparam name="TInner">The type of the elements of the second sequence.</typeparam>
        /// <typeparam name="TKey">The type of the keys returned by the key selector functions.</typeparam>
        /// <typeparam name="TResult">The type of the result elements.</typeparam>
        /// <param name="outer">The first sequence to join.</param>
        /// <param name="inner">The sequence to join to the first sequence.</param>
        /// <param name="outerKeySelector">A function to extract the join key from each element of the first sequence.</param>
        /// <param name="innerKeySelector">A function to extract the join key from each element of the second sequence.</param>
        /// <param name="resultSelector">A function to create a result element from two combined elements.</param>
        /// <returns>
        /// An System.Collections.Generic.IEnumerable&lt;T&gt; that has elements of type TResult
        /// that are obtained by performing an left outer join on two sequences.
        /// </returns>
        /// <example>
        /// Example:
        /// <code>
        /// class TestClass
        /// {
        ///        static int Main()
        ///        {
        ///            var strings1 = new string[] { "1", "2", "3", "4", "a" };
        ///            var strings2 = new string[] { "1", "2", "3", "16", "A" };
        ///            
        ///            var lj = strings1.LeftJoin(
        ///                strings2,
        ///                a => a,
        ///                b => b,
        ///                (a, b) => (a ?? "null") + "-" + (b ?? "null"))
        ///                .ToList();
        ///        }
        ///    }
        ///    </code>
        /// </example>
        public static IEnumerable<TResult> LeftJoin<TOuter, TInner, TKey, TResult>(this IEnumerable<TOuter> outer,
            IEnumerable<TInner> inner, Func<TOuter, TKey> outerKeySelector, Func<TInner, TKey> innerKeySelector,
            Func<TOuter, TInner, TResult> resultSelector)
        {
            return outer.LeftJoin(inner, outerKeySelector, innerKeySelector, resultSelector, default(IEqualityComparer<TKey>));
        }

    }
}
JohnnyIV
  • 109
  • 5
  • I just realized that my extensions are for IEnumerable and not IQueryable. When I tested my .LeftJoin in LINQPad and viewed the SQL, it pulls both tables and does the .LeftJoin locally, so that's something to consider. For my purposes, I was always using local data, so it didn't matter. I just wanted to be clear that the LEFT JOIN does not occur on the server with these extensions. – JohnnyIV Dec 04 '15 at 17:32
  • ...and I realized I didn't thoroughly read the initial question which was specifically looking for IQueryable, so apologies. These extension methods are still useful as long as you don't need the left join to occur on the server. – JohnnyIV Dec 04 '15 at 20:16
  • I'm using your LeftJoin extension method but getting null reference exceptions when I link them together. – Justin Mar 02 '17 at 18:20
  • is it possible to modify the code to accept strings for parentkey, childkey and selector? – Asım Gündüz Dec 27 '17 at 11:54
  • @Justin, can you give me an example? LeftJoin has inherent null problems, I'll add a new comment with a couple ways to handle it. When I posted this, I didn't notice that the question was around translating to SQL. This code works on local items, so the objects will be pulled first and **then** joined instead of doing the outer join on the server. – JohnnyIV Dec 28 '17 at 20:50
  • @Licentia, can you give me an example of what you're trying to do? Maybe I can help. – JohnnyIV Dec 28 '17 at 20:55
  • @JohnnyIV There is a dynamic linq library where you can get from the nuget : System.linq.dynamic.core this library allows you to use the extension methods in this way => foo.Join(bar,"fooId","barId", "new(outer.fooId, outer.fooName, inner.barId, inner.bar..."); this produces an inner join.. I was wondering if it is possible to have a left outer join in such way – Asım Gündüz Dec 28 '17 at 21:02
  • @Licentia, I wasn't aware of this library. It looks like it resolves the string to a field or property at runtime. I can see some uses for such a thing, but so far I haven't needed this level of flexibility. It would be useful for .OrderBy. My first thought would be to decompile it to see how it works. It must use reflection to accomplish this. I may take a crack at finding a way to do this easily. – JohnnyIV Dec 28 '17 at 21:33
  • @Licentia, I added another example below that should give you what you want, or give you a base to create the solution you need. – JohnnyIV Dec 29 '17 at 01:17
0

An update to my previous answer. When I posted it, I didn't notice that the question was around translating to SQL. This code works on local items, so the objects will be pulled first and then joined instead of doing the outer join on the server. But to handle nulls using the Join extensions I posted earlier, here's an example:

public class Person
{
    public int Id { get; set; }
    public string Name { get; set; }
}
public class EmailAddress
{
    public int Id { get; set; }
    public Email Email { get; set; }
}
public class Email
{
    public string Name { get; set; }
    public string Address { get; set; }
}

public static void Main()
{
    var people = new []
    {
        new Person() { Id = 1, Name = "John" },
        new Person() { Id = 2, Name = "Paul" },
        new Person() { Id = 3, Name = "George" },
        new Person() { Id = 4, Name = "Ringo" }
    };
    var addresses = new[]
    {
        new EmailAddress() { Id = 2, Email = new Email() { Name = "Paul", Address = "Paul@beatles.com" } },
        new EmailAddress() { Id = 3, Email = new Email() { Name = "George", Address = "George@beatles.com" } },
        new EmailAddress() { Id = 4, Email = new Email() { Name = "Ringo", Address = "Ringo@beatles.com" } }
    };

    var joinedById = people.LeftJoin(addresses, p => p.Id, a => a.Id, (p, a) => new
    {
        p.Id,
        p.Name,
        a?.Email.Address
    }).ToList();

    Console.WriteLine("\r\nJoined by Id:\r\n");
    joinedById.ForEach(j => Console.WriteLine($"{j.Id}-{j.Name}: {j.Address ?? "<null>"}"));

    var joinedByName = people.LeftJoin(addresses, p => p.Name, a => a?.Email.Name, (p, a) => new
    {
        p.Id,
        p.Name,
        a?.Email.Address
    }, StringComparer.OrdinalIgnoreCase).ToList();

    Console.WriteLine("\r\nJoined by Name:\r\n");
    joinedByName.ForEach(j => Console.WriteLine($"{j.Id}-{j.Name}: {j.Address ?? "<null>"}"));

}
JohnnyIV
  • 109
  • 5
  • @RaduV has an excellent solution for handling server joins. I tried it and I like it. I will add that when applicable, I prefer `IEnumerable` joins when possible because you're not limited to syntax compatible with the database. But it is beneficial to perform inner/outer joins on the server for performance and to limit the amount of data to process. – JohnnyIV Dec 28 '17 at 22:00
0

@Licentia, this is what I came up with to resolve your issue. I created DynamicJoin and DynamicLeftJoin extension methods similar to what you showed me, but I handled the output differently since string parsing is vulnerable to many problems. This won't join on anonymous types, but you can tweak it to do so. It also doesn't have overloads for IComparable, but could easily be added. Property names must be cased the same as the type. This is used in conjunction with my extension methods above (i.e. it won't work without them). I hope it helps!

public class Person
{
    public int Id { get; set; }
    public string Name { get; set; }
}
public class EmailAddress
{
    public int PersonId { get; set; }
    public Email Email { get; set; }
}
public class Email
{
    public string Name { get; set; }
    public string Address { get; set; }
}

public static void Main()
{
    var people = new[]
    {
        new Person() { Id = 1, Name = "John" },
        new Person() { Id = 2, Name = "Paul" },
        new Person() { Id = 3, Name = "George" },
        new Person() { Id = 4, Name = "Ringo" }
    };
    var addresses = new[]
    {
        new EmailAddress() { PersonId = 2, Email = new Email() { Name = "Paul", Address = "Paul@beatles.com" } },
        new EmailAddress() { PersonId = 3, Email = new Email() { Name = "George", Address = "George@beatles.com" } },
        new EmailAddress() { PersonId = 4, Email = new Email() { Name = "Ringo" } }
    };

    Console.WriteLine("\r\nInner Join:\r\n");
    var innerJoin = people.DynamicJoin(addresses, "Id", "PersonId", "outer.Id", "outer.Name", "inner.Email").ToList();
    innerJoin.ForEach(j => Console.WriteLine($"{j.Id}-{j.Name}: {j?.Email?.Address ?? "<null>"}"));

    Console.WriteLine("\r\nOuter Join:\r\n");
    var leftJoin = people.DynamicLeftJoin(addresses, "Id", "PersonId", "outer.Id", "outer.Name", "inner.Email").ToList();
    leftJoin.ForEach(j => Console.WriteLine($"{j.Id}-{j.Name}: {j?.Email?.Address ?? "<null>"}"));

}

public static class DynamicJoinExtensions
{
    private const string OuterPrefix = "outer.";
    private const string InnerPrefix = "inner.";

    private class Processor<TOuter, TInner>
    {
        private readonly Type _typeOuter = typeof(TOuter);
        private readonly Type _typeInner = typeof(TInner);
        private readonly PropertyInfo _keyOuter;
        private readonly PropertyInfo _keyInner;
        private readonly List<string> _outputFields;
        private readonly Dictionary<string, PropertyInfo> _resultProperties;

        public Processor(string outerKey, string innerKey, IEnumerable<string> outputFields)
        {
            _outputFields = outputFields.ToList();

            //  Check for properties with the same name
            string badProps = string.Join(", ", _outputFields.Select(f => new { property = f, name = GetName(f) })
                .GroupBy(f => f.name, StringComparer.OrdinalIgnoreCase)
                .Where(g => g.Count() > 1)
                .SelectMany(g => g.OrderBy(f => f.name, StringComparer.OrdinalIgnoreCase).Select(f => f.property)));
            if (!string.IsNullOrEmpty(badProps))
                throw new ArgumentException($"One or more {nameof(outputFields)} are duplicated: {badProps}");

            _keyOuter = _typeOuter.GetProperty(outerKey);
            _keyInner = _typeInner.GetProperty(innerKey);

            //  Check for valid keys
            if (_keyOuter == null || _keyInner == null)
                throw new ArgumentException($"One or both of the specified keys is not a valid property");

            //  Check type compatibility
            if (_keyOuter.PropertyType != _keyInner.PropertyType)
                throw new ArgumentException($"Keys must be the same type. ({nameof(outerKey)} type: {_keyOuter.PropertyType.Name}, {nameof(innerKey)} type: {_keyInner.PropertyType.Name})");

            Func<string, Type, IEnumerable<KeyValuePair<string, PropertyInfo>>> getResultProperties = (prefix, type) =>
               _outputFields.Where(f => f.StartsWith(prefix, StringComparison.OrdinalIgnoreCase))
                   .Select(f => new KeyValuePair<string, PropertyInfo>(f, type.GetProperty(f.Substring(prefix.Length))));

            //  Combine inner/outer outputFields with PropertyInfo into a dictionary
            _resultProperties = getResultProperties(OuterPrefix, _typeOuter).Concat(getResultProperties(InnerPrefix, _typeInner))
                .ToDictionary(k => k.Key, v => v.Value, StringComparer.OrdinalIgnoreCase);

            //  Check for properties that aren't found
            badProps = string.Join(", ", _resultProperties.Where(kv => kv.Value == null).Select(kv => kv.Key));
            if (!string.IsNullOrEmpty(badProps))
                throw new ArgumentException($"One or more {nameof(outputFields)} are not valid: {badProps}");

            //  Check for properties that aren't the right format
            badProps = string.Join(", ", _outputFields.Where(f => !_resultProperties.ContainsKey(f)));
            if (!string.IsNullOrEmpty(badProps))
                throw new ArgumentException($"One or more {nameof(outputFields)} are not valid: {badProps}");

        }
        //  Inner Join
        public IEnumerable<dynamic> Join(IEnumerable<TOuter> outer, IEnumerable<TInner> inner) =>
            outer.Join(inner, o => GetOuterKeyValue(o), i => GetInnerKeyValue(i), (o, i) => CreateItem(o, i));
        //  Left Outer Join
        public IEnumerable<dynamic> LeftJoin(IEnumerable<TOuter> outer, IEnumerable<TInner> inner) =>
            outer.LeftJoin(inner, o => GetOuterKeyValue(o), i => GetInnerKeyValue(i), (o, i) => CreateItem(o, i));

        private static string GetName(string fieldId) => fieldId.Substring(fieldId.IndexOf('.') + 1);
        private object GetOuterKeyValue(TOuter obj) => _keyOuter.GetValue(obj);
        private object GetInnerKeyValue(TInner obj) => _keyInner.GetValue(obj);
        private object GetResultProperyValue(string key, object obj) => _resultProperties[key].GetValue(obj);
        private dynamic CreateItem(TOuter o, TInner i)
        {
            var obj = new ExpandoObject();
            var dict = (IDictionary<string, object>)obj;
            _outputFields.ForEach(f =>
            {
                var source = f.StartsWith(OuterPrefix, StringComparison.OrdinalIgnoreCase) ? (object)o : i;
                dict.Add(GetName(f), source == null ? null : GetResultProperyValue(f, source));
            });
            return obj;
        }
    }

    public static IEnumerable<dynamic> DynamicJoin<TOuter, TInner>(this IEnumerable<TOuter> outer,
            IEnumerable<TInner> inner, string outerKey, string innerKey,
            params string[] outputFields) =>
        new Processor<TOuter, TInner>(outerKey, innerKey, outputFields).Join(outer, inner);
    public static IEnumerable<dynamic> DynamicLeftJoin<TOuter, TInner>(this IEnumerable<TOuter> outer,
            IEnumerable<TInner> inner, string outerKey, string innerKey,
            params string[] outputFields) =>
        new Processor<TOuter, TInner>(outerKey, innerKey, outputFields).LeftJoin(outer, inner);
}
JohnnyIV
  • 109
  • 5