28

Given a class structure like this:

public class GrandParent
{
    public Parent Parent { get; set;}
}
public class Parent
{
    public Child Child { get; set;}
}

public class Child
{
    public string Name { get; set;}
}

and the following method signature:

Expression<Func<TOuter, TInner>> Combine (Expression<Func<TOuter, TMiddle>>> first, Expression<Func<TMiddle, TInner>> second);

How can I implement said method so that I can call it like this:

Expression<Func<GrandParent, Parent>>> myFirst = gp => gp.Parent;
Expression<Func<Parent, string>> mySecond = p => p.Child.Name;

Expression<Func<GrandParent, string>> output = Combine(myFirst, mySecond);

such that output ends up as:

gp => gp.Parent.Child.Name

Is this possible?

The contents of each Func will only ever be a MemberAccess. I'd rather not end up with output being a nested function call.

Thanks

Delimitry
  • 2,987
  • 4
  • 30
  • 39
Andrew Bullock
  • 36,616
  • 34
  • 155
  • 231
  • 1
    (replying to comment on Eric's answer) If you aren't going to invoke, why not just teach your existing parsing code how to read `Invoke`? – Marc Gravell Nov 11 '09 at 21:40
  • 1
    youre right, i could do, it just feels hacky. Im going to spike both approaches and see which one feels best. An answer might have been that its really simple to combine the expressions, in which case that would have been preferable. – Andrew Bullock Nov 11 '09 at 22:31

8 Answers8

26

OK; pretty long snippet, but here's a starter for an expression-rewriter; it doesn't handle a few cases yet (I'll fix it later), but it works for the example given and a lot of others:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Linq.Expressions;
using System.Text.RegularExpressions;

public class GrandParent
{
    public Parent Parent { get; set; }
}
public class Parent
{
    public Child Child { get; set; }
    public string Method(string s) { return s + "abc"; }
}

public class Child
{
    public string Name { get; set; }
}
public static class ExpressionUtils
{
    public static Expression<Func<T1, T3>> Combine<T1, T2, T3>(
        this Expression<Func<T1, T2>> outer, Expression<Func<T2, T3>> inner, bool inline)
    {
        var invoke = Expression.Invoke(inner, outer.Body);
        Expression body = inline ? new ExpressionRewriter().AutoInline(invoke) : invoke;
        return Expression.Lambda<Func<T1, T3>>(body, outer.Parameters);
    }
}
public class ExpressionRewriter
{
    internal Expression AutoInline(InvocationExpression expression)
    {
        isLocked = true;
        if(expression == null) throw new ArgumentNullException("expression");
        LambdaExpression lambda = (LambdaExpression)expression.Expression;
        ExpressionRewriter childScope = new ExpressionRewriter(this);
        var lambdaParams = lambda.Parameters;
        var invokeArgs = expression.Arguments;
        if (lambdaParams.Count != invokeArgs.Count) throw new InvalidOperationException("Lambda/invoke mismatch");
        for(int i = 0 ; i < lambdaParams.Count; i++) {
            childScope.Subst(lambdaParams[i], invokeArgs[i]);
        }
        return childScope.Apply(lambda.Body);
    }
    public ExpressionRewriter()
    {
         subst = new Dictionary<Expression, Expression>();
    }
    private ExpressionRewriter(ExpressionRewriter parent)
    {
        if (parent == null) throw new ArgumentNullException("parent");
        subst = new Dictionary<Expression, Expression>(parent.subst);
        inline = parent.inline;
    }
    private bool isLocked, inline;
    private readonly Dictionary<Expression, Expression> subst;
    private void CheckLocked() {
        if(isLocked) throw new InvalidOperationException(
            "You cannot alter the rewriter after Apply has been called");

    }
    public ExpressionRewriter Subst(Expression from,
        Expression to)
    {
        CheckLocked();
        subst.Add(from, to);
        return this;
    }
    public ExpressionRewriter Inline() {
        CheckLocked();
        inline = true;
        return this;
    }
    public Expression Apply(Expression expression)
    {
        isLocked = true;
        return Walk(expression) ?? expression;
    }

    private static IEnumerable<Expression> CoalesceTerms(
        IEnumerable<Expression> sourceWithNulls, IEnumerable<Expression> replacements)
    {
        if(sourceWithNulls != null && replacements != null) {
            using(var left = sourceWithNulls.GetEnumerator())
            using (var right = replacements.GetEnumerator())
            {
                while (left.MoveNext() && right.MoveNext())
                {
                    yield return left.Current ?? right.Current;
                }
            }
        }
    }
    private Expression[] Walk(IEnumerable<Expression> expressions) {
        if(expressions == null) return null;
        return expressions.Select(expr => Walk(expr)).ToArray();
    }
    private static bool HasValue(Expression[] expressions)
    {
        return expressions != null && expressions.Any(expr => expr != null);
    }
    // returns null if no need to rewrite that branch, otherwise
    // returns a re-written branch
    private Expression Walk(Expression expression)
    {
        if (expression == null) return null;
        Expression tmp;
        if (subst.TryGetValue(expression, out tmp)) return tmp;
        switch(expression.NodeType) {
            case ExpressionType.Constant:
            case ExpressionType.Parameter:
                {
                    return expression; // never a need to rewrite if not already matched
                }
            case ExpressionType.MemberAccess:
                {
                    MemberExpression me = (MemberExpression)expression;
                    Expression target = Walk(me.Expression);
                    return target == null ? null : Expression.MakeMemberAccess(target, me.Member);
                }
            case ExpressionType.Add:
            case ExpressionType.Divide:
            case ExpressionType.Multiply:
            case ExpressionType.Subtract:
            case ExpressionType.AddChecked:
            case ExpressionType.MultiplyChecked:
            case ExpressionType.SubtractChecked:
            case ExpressionType.And:
            case ExpressionType.Or:
            case ExpressionType.ExclusiveOr:
            case ExpressionType.Equal:
            case ExpressionType.NotEqual:
            case ExpressionType.AndAlso:
            case ExpressionType.OrElse:
            case ExpressionType.Power:
            case ExpressionType.Modulo:
            case ExpressionType.GreaterThan:
            case ExpressionType.GreaterThanOrEqual:
            case ExpressionType.LessThan:
            case ExpressionType.LessThanOrEqual:
            case ExpressionType.LeftShift:
            case ExpressionType.RightShift:
            case ExpressionType.Coalesce:
            case ExpressionType.ArrayIndex:
                {
                    BinaryExpression binExp = (BinaryExpression)expression;
                    Expression left = Walk(binExp.Left), right = Walk(binExp.Right);
                    return (left == null && right == null) ? null : Expression.MakeBinary(
                        binExp.NodeType, left ?? binExp.Left, right ?? binExp.Right, binExp.IsLiftedToNull,
                        binExp.Method, binExp.Conversion);
                }
            case ExpressionType.Not:
            case ExpressionType.UnaryPlus:
            case ExpressionType.Negate:
            case ExpressionType.NegateChecked:
            case ExpressionType.Convert: 
            case ExpressionType.ConvertChecked:
            case ExpressionType.TypeAs:
            case ExpressionType.ArrayLength:
                {
                    UnaryExpression unExp = (UnaryExpression)expression;
                    Expression operand = Walk(unExp.Operand);
                    return operand == null ? null : Expression.MakeUnary(unExp.NodeType, operand,
                        unExp.Type, unExp.Method);
                }
            case ExpressionType.Conditional:
                {
                    ConditionalExpression ce = (ConditionalExpression)expression;
                    Expression test = Walk(ce.Test), ifTrue = Walk(ce.IfTrue), ifFalse = Walk(ce.IfFalse);
                    if (test == null && ifTrue == null && ifFalse == null) return null;
                    return Expression.Condition(test ?? ce.Test, ifTrue ?? ce.IfTrue, ifFalse ?? ce.IfFalse);
                }
            case ExpressionType.Call:
                {
                    MethodCallExpression mce = (MethodCallExpression)expression;
                    Expression instance = Walk(mce.Object);
                    Expression[] args = Walk(mce.Arguments);
                    if (instance == null && !HasValue(args)) return null;
                    return Expression.Call(instance, mce.Method, CoalesceTerms(args, mce.Arguments));
                }
            case ExpressionType.TypeIs:
                {
                    TypeBinaryExpression tbe = (TypeBinaryExpression)expression;
                    tmp = Walk(tbe.Expression);
                    return tmp == null ? null : Expression.TypeIs(tmp, tbe.TypeOperand);
                }
            case ExpressionType.New:
                {
                    NewExpression ne = (NewExpression)expression;
                    Expression[] args = Walk(ne.Arguments);
                    if (HasValue(args)) return null;
                    return ne.Members == null ? Expression.New(ne.Constructor, CoalesceTerms(args, ne.Arguments))
                        : Expression.New(ne.Constructor, CoalesceTerms(args, ne.Arguments), ne.Members);
                }
            case ExpressionType.ListInit:
                {
                    ListInitExpression lie = (ListInitExpression)expression;
                    NewExpression ctor = (NewExpression)Walk(lie.NewExpression);
                    var inits = lie.Initializers.Select(init => new
                    {
                        Original = init,
                        NewArgs = Walk(init.Arguments)
                    }).ToArray();
                    if (ctor == null && !inits.Any(init => HasValue(init.NewArgs))) return null;
                    ElementInit[] initArr = inits.Select(init => Expression.ElementInit(
                            init.Original.AddMethod, CoalesceTerms(init.NewArgs, init.Original.Arguments))).ToArray();
                    return Expression.ListInit(ctor ?? lie.NewExpression, initArr);

                }
            case ExpressionType.NewArrayBounds:
            case ExpressionType.NewArrayInit:
                /* not quite right... leave as not-implemented for now
                {
                    NewArrayExpression nae = (NewArrayExpression)expression;
                    Expression[] expr = Walk(nae.Expressions);
                    if (!HasValue(expr)) return null;
                    return expression.NodeType == ExpressionType.NewArrayBounds
                        ? Expression.NewArrayBounds(nae.Type, CoalesceTerms(expr, nae.Expressions))
                        : Expression.NewArrayInit(nae.Type, CoalesceTerms(expr, nae.Expressions));
                }*/
            case ExpressionType.Invoke:
            case ExpressionType.Lambda:
            case ExpressionType.MemberInit:
            case ExpressionType.Quote:
                throw new NotImplementedException("Not implemented: " + expression.NodeType);
            default:
                throw new NotSupportedException("Not supported: " + expression.NodeType);
        }

    }
}
static class Program
{
    static void Main()
    {
        Expression<Func<GrandParent, Parent>> myFirst = gp => gp.Parent;
        Expression<Func<Parent, string>> mySecond = p => p.Child.Name;

        Expression<Func<GrandParent, string>> outputWithInline = myFirst.Combine(mySecond, false);
        Expression<Func<GrandParent, string>> outputWithoutInline = myFirst.Combine(mySecond, true);

        Expression<Func<GrandParent, string>> call =
                ExpressionUtils.Combine<GrandParent, Parent, string>(
                gp => gp.Parent, p => p.Method(p.Child.Name), true);

        unchecked
        {
            Expression<Func<double, double>> mathUnchecked =
                ExpressionUtils.Combine<double, double, double>(x => (x * x) + x, x => x - (x / x), true);
        }
        checked
        {
            Expression<Func<double, double>> mathChecked =
                ExpressionUtils.Combine<double, double, double>(x => x - (x * x) , x => (x / x) + x, true);
        }
        Expression<Func<int,int>> bitwise =
            ExpressionUtils.Combine<int, int, int>(x => (x & 0x01) | 0x03, x => x ^ 0xFF, true);
        Expression<Func<int, bool>> logical =
            ExpressionUtils.Combine<int, bool, bool>(x => x == 123, x => x != false, true);
        Expression<Func<int[][], int>> arrayAccess =
            ExpressionUtils.Combine<int[][], int[], int>(x => x[0], x => x[0], true);
        Expression<Func<string, bool>> isTest =
            ExpressionUtils.Combine<string,object,bool>(s=>s, s=> s is Regex, true);

        Expression<Func<List<int>>> f = () => new List<int>(new int[] { 1, 1, 1 }.Length);
        Expression<Func<string, Regex>> asTest =
            ExpressionUtils.Combine<string, object, Regex>(s => s, s => s as Regex, true);
        var initTest = ExpressionUtils.Combine<int, int[], List<int>>(i => new[] {i,i,i}, 
                    arr => new List<int>(arr.Length), true);
        var anonAndListTest = ExpressionUtils.Combine<int, int, List<int>>(
                i => new { age = i }.age, i => new List<int> {i, i}, true);
        /*
        var arrBoundsInit = ExpressionUtils.Combine<int, int[], int[]>(
            i => new int[i], arr => new int[arr[0]] , true);
        var arrInit = ExpressionUtils.Combine<int, int, int[]>(
            i => i, i => new int[1] { i }, true);*/
    }
}
Marc Gravell
  • 1,026,079
  • 266
  • 2,566
  • 2,900
20

I am assuming that your goal is to obtain the expression tree that you would have obtained, had you actually compiled the "combined" lambda. It's much easier to construct a new expression tree that simply invokes the given expression trees appropriately, but I assume that's not what you want.

  • extract the body of first, cast it to MemberExpression. Call this firstBody.
  • extract the body of second, call this secondBody
  • extract the parameter of first. Call this firstParam.
  • extract the parameter of second. Call this secondParam.
  • Now, the hard part. Write a visitor pattern implementation which searches through secondBody looking for the single usage of secondParam. (This will be much easier if you know that it's only member access expressions, but you can solve the problem in general.) When you find it, construct a new expression of the same type as its parent, substituting in firstBody for the parameter. Continue to rebuild the transformed tree on the way back out; remember, all you have to rebuild is the "spine" of the tree that contains the parameter reference.
  • the result of the visitor pass will be a rewritten secondBody with no occurrences of secondParam, only occurences of expressions involving firstParam.
  • construct a new lambda expression with that body as its body, and firstParam as its param.
  • and you're done!

Matt Warren's blog might be a good thing for you to read. He designed and implemented all this stuff and has written a lot about ways to rewrite expression trees effectively. (I only did the compiler end of things.)

UPDATE:

As this related answer points out, in .NET 4 there is now a base class for expression rewriters that makes this sort of thing a lot easier.

Community
  • 1
  • 1
Eric Lippert
  • 647,829
  • 179
  • 1,238
  • 2,067
  • 2
    I've always been of the opinion that the ability to replace an expression in an existing expression (perhaps replacing all instances of a given `ParameterExpression` with some other known expression) is a missed trick. `Expression.Invoke` is an option, but is poorly supported by EF (LINQ-to-SQL works, though). – Marc Gravell Nov 11 '09 at 20:14
  • 1
    (obviously creating a new expression via some kind of visitor; npt changing the existing one) – Marc Gravell Nov 11 '09 at 20:15
  • 2
    +1, very interesting solution, it would be great to see this in action :-) – Darin Dimitrov Nov 11 '09 at 20:22
  • 1
    For info, I had an implementation of such a visitor a while ago that worked for most of the 3.5 expression-types. I should revisit it at some point (it only took an hour or so), updating it for 4.0. @Darin; let me know (see profile) if you want me to try finding it on my hdd. – Marc Gravell Nov 11 '09 at 20:30
  • 1
    This sounds like exactly what I need. I understand all that in principle, but where my knowledge breaks down is how exactly to do step 5, how to build the new lambda. Ill google for Matt Warren's blog. @Marc id be interested to see it :) – Andrew Bullock Nov 11 '09 at 20:54
  • your first point of "obtain the expression tree that you would have obtained" is correct. I want the expression as If id just gone `gp => gp.Parent.Child.Name` in the first place. I don't actually need to invoke this, I have a vistor elsewhere which will examine it which is what i meant by "i dont want nested calls". I cant have a wrapper which invokes both in a chain, because i won't be invoking ;) – Andrew Bullock Nov 11 '09 at 20:58
  • meh, im stuck. How do I build the Expressions once traversed up the tree to the secondParam? Do i need Expression.PropertyOrField? – Andrew Bullock Nov 12 '09 at 00:29
  • Sample rewriter added as an answer. @Eric - any idea why the `checked` context doesn't seem to be used (i.e. even in the `checked` context I don't get `AddChecked` etc)? (I'm not even going to mention that this is part of the non-existent Expression/compiler spec ;-p) – Marc Gravell Nov 12 '09 at 08:03
14

I'm not sure what you mean by it not being a nested function call, but this will do the trick - with an example:

using System;
using System.IO;
using System.Linq.Expressions;

class Test    
{    
    static Expression<Func<TOuter, TInner>> Combine<TOuter, TMiddle, TInner>
        (Expression<Func<TOuter, TMiddle>> first, 
         Expression<Func<TMiddle, TInner>> second)
    {
        var parameter = Expression.Parameter(typeof(TOuter), "x");
        var firstInvoke = Expression.Invoke(first, new[] { parameter });
        var secondInvoke = Expression.Invoke(second, new[] { firstInvoke} );

        return Expression.Lambda<Func<TOuter, TInner>>(secondInvoke, parameter);
    }

    static void Main()
    {
        Expression<Func<int, string>> first = x => (x + 1).ToString();
        Expression<Func<string, StringReader>> second = y => new StringReader(y);

        Expression<Func<int, StringReader>> output = Combine(first, second);
        Func<int, StringReader> compiled = output.Compile();
        var reader = compiled(10);
        Console.WriteLine(reader.ReadToEnd());
    }
}

I don't know how efficient the generated code will be compared with a single lambda expression, but I suspect it won't be too bad.

Jon Skeet
  • 1,421,763
  • 867
  • 9,128
  • 9,194
  • 1
    You can probably simplify this (remove an invoke an a parameter expression) by reusing (separately) the parameter and body from the outer expression. – Marc Gravell Nov 11 '09 at 20:16
  • 1
    Like so: `return Expression.Lambda>(Expression.Invoke(second, first.Body), first.Parameters);` – Marc Gravell Nov 11 '09 at 20:20
  • 1
    Note also that EF in 3.5SP1 hates this ;-p LINQ-to-SQL is OK with it, though. So it is provider-specific. – Marc Gravell Nov 11 '09 at 20:21
5

For a complete solution have a look at LINQKit:

Expression<Func<GrandParent, string>> output = gp => mySecond.Invoke(myFirst.Invoke(gp));
output = output.Expand().Expand();

output.ToString() prints out

gp => gp.Parent.Child.Name

whereas Jon Skeet's solution yields

x => Invoke(p => p.Child.Name,Invoke(gp => gp.Parent,x))

I guess that's what you're referring to as 'nested function calls'.

Sebastian Ullrich
  • 1,170
  • 6
  • 10
2

Try this:

public static Expression<Func<TOuter, TInner>> Combine<TOuter, TMiddle, TInner>(
    Expression<Func<TOuter, TMiddle>> first, 
    Expression<Func<TMiddle, TInner>> second)
{
    return x => second.Compile()(first.Compile()(x));
}

and the usage:

Expression<Func<GrandParent, Parent>> myFirst = gp => gp.Parent;
Expression<Func<Parent, string>> mySecond = p => p.Child.Name;
Expression<Func<GrandParent, string>> output = Combine(myFirst, mySecond);
var grandParent = new GrandParent 
{ 
    Parent = new Parent 
    { 
        Child = new Child 
        { 
            Name = "child name" 
        } 
    } 
};
var childName = output.Compile()(grandParent);
Console.WriteLine(childName); // prints "child name"
Darin Dimitrov
  • 1,023,142
  • 271
  • 3,287
  • 2,928
  • 1
    My guess is that the resulting expression tree wouldn't be suitable for use in (say) LINQ to SQL. Whether mine would or not, I don't know - but it keeps things as expression trees without compiling them into intermediate methods, which I suspect is a good start :) – Jon Skeet Nov 11 '09 at 20:06
  • 1
    @Jon, I agree with you, but compiling the expressions was the first thing that came to my mind :-) – Darin Dimitrov Nov 11 '09 at 20:11
1
    public static Expression<Func<T, TResult>> And<T, TResult>(this Expression<Func<T, TResult>> expr1, Expression<Func<T, TResult>> expr2)
    {
        var invokedExpr = Expression.Invoke(expr2, expr1.Parameters.Cast<Expression>());
        return Expression.Lambda<Func<T, TResult>>(Expression.AndAlso(expr1.Body, invokedExpr), expr1.Parameters);
    }

    public static Expression<Func<T, bool>> Or<T>(this Expression<Func<T, bool>> expr1, Expression<Func<T, bool>> expr2)
    {
        var invokedExpr = Expression.Invoke(expr2, expr1.Parameters.Cast<Expression>());
        return Expression.Lambda<Func<T, bool>>(Expression.OrElse(expr1.Body, invokedExpr), expr1.Parameters);
    }
suneelsarraf
  • 923
  • 9
  • 7
1

After a half-day's digging came up with the following solution (much simpler than the accepted answer):

For generic lambda composition:

    public static Expression<Func<X, Z>> Compose<X, Y, Z>(Expression<Func<Y, Z>> f, Expression<Func<X, Y>> g)
    {
        return Expression.Lambda<Func<X, Z>>(Expression.Invoke(f, Expression.Invoke(g, g.Parameters[0])), g.Parameters);
    }

This combines two expressions in one, i.e. applies the first expression to the result of the second.

So if we have f(y) and g(x), combine(f,g)(x) === f(g(x))

Transitive and associative, so the combinator can be chained

More specifically, for property access (needed for MVC/EF):

    public static Expression<Func<X, Z>> Property<X, Y, Z>(Expression<Func<X, Y>> fObj, Expression<Func<Y, Z>> fProp)
    {
        return Expression.Lambda<Func<X, Z>>(Expression.Property(fObj.Body, (fProp.Body as MemberExpression).Member as PropertyInfo), fObj.Parameters);
    }

Note: fProp must be a simple property access expression, such as x => x.Prop.

fObj can be any expression (but must be MVC-compatible)

ArunasR
  • 1,907
  • 1
  • 14
  • 15
0

With a toolkit called Layer Over LINQ, there's an extension method that does exactly this, combines two expressions to create a new one suitable for use in LINQ to Entities.

Expression<Func<GrandParent, Parent>>> myFirst = gp => gp.Parent;
Expression<Func<Parent, string>> mySecond = p => p.Child.Name;

Expression<Func<GrandParent, string>> output = myFirst.Chain(mySecond);
Jim
  • 9
  • 2
  • 2
    It is OK that you offer your toolkit as a solution, however the FAQ does state that you must disclose that you are the author. (In the answer, not just in your profile.) – Danny Varod May 22 '11 at 14:23