3

Why compiled lambda build over Expression.Call is slightly slower than delegate that should do the same? And how to avoid it?

Explaining BenchmarkDotNet results. We are comparing CallBuildedReal vs CallLambda; others two CallBuilded and CallLambdaConst are "subforms" of CallLambda and shows the equal numbers. But difference with CallBuildedReal is significal.

//[Config(typeof(Config))]
[RankColumn, MinColumn, MaxColumn, StdDevColumn, MedianColumn]
[ClrJob , CoreJob]
[HtmlExporter, MarkdownExporter]
[MemoryDiagnoser /*, InliningDiagnoser*/]
public class BenchmarkCallSimple
{
    static Func<StringBuilder, int, int, bool> callLambda;
    static Func<StringBuilder, int, int, bool> callLambdaConst;
    static Func<StringBuilder, int, int, bool> callBuilded;
    static Func<StringBuilder, int, int, bool> callBuildedReal;
    private static bool Append<T>(StringBuilder sb, T i1, T i2, Func<T, T, T> operation)
    {
        sb.Append(operation(i1, i2));
        return true;
    }

    private static Func<StringBuilder, T, T, bool> BuildCallMethod<T>(Func<T, T, T> operation)
    {
        return (sb, i1, i2)=> { sb.Append(operation(i1, i2)); return true; };
    }

    private static int AddMethod(int a, int b)
    {
        return a + b;
    }

    static BenchmarkCallSimple()
    {       

        var x = Expression.Parameter(typeof(int));
        var y = Expression.Parameter(typeof(int));
        var additionExpr = Expression.Add(x, y);

        callLambdaConst = BuildCallMethod<int>(AddMethod);
        callLambda = BuildCallMethod<int>((a, b) => a + b);

        var operationDelegate = Expression.Lambda<Func<int, int, int>>(additionExpr, x, y).Compile();
        callBuilded = BuildCallMethod(operationDelegate);

        var operationExpressionConst = Expression.Constant(operationDelegate, operationDelegate.GetType());

        var sb1 = Expression.Parameter(typeof(StringBuilder), "sb");
        var i1  = Expression.Parameter(typeof(int), "i1");
        var i2  = Expression.Parameter(typeof(int), "i2");
        var appendMethodInfo = typeof(BenchmarkCallSimple).GetTypeInfo().GetDeclaredMethod(nameof(BenchmarkCallSimple.Append));
        var appendMethodInfoGeneric = appendMethodInfo.MakeGenericMethod(typeof(int));
        var appendCallExpression = Expression.Call(appendMethodInfoGeneric,
                new Expression[] { sb1, i1, i2, operationExpressionConst }
            );
        var appendLambda = Expression.Lambda(appendCallExpression, new[] { sb1, i1, i2 });
        callBuildedReal = (Func<StringBuilder, int, int, bool>)(appendLambda.Compile());
    }

    [Benchmark]
    public string CallBuildedReal()
    {
        StringBuilder sb = new StringBuilder();
        var b = callBuildedReal(sb, 1, 2);
        return sb.ToString();
    }

    [Benchmark]
    public string CallBuilded()
    {
        StringBuilder sb = new StringBuilder();
        var b = callBuilded(sb, 1, 2);
        return sb.ToString();
    }

    [Benchmark]
    public string CallLambda()
    {
        StringBuilder sb = new StringBuilder();
        var b = callLambda(sb, 1, 2);
        return sb.ToString();
    }

    [Benchmark]
    public string CallLambdaConst()
    {
        StringBuilder sb = new StringBuilder();
        var b = callLambdaConst(sb, 1, 2);
        return sb.ToString();
    }
}

Results:

BenchmarkDotNet=v0.10.5, OS=Windows 10.0.14393
Processor=Intel Core i5-2500K CPU 3.30GHz (Sandy Bridge), ProcessorCount=4
Frequency=3233539 Hz, Resolution=309.2587 ns, Timer=TSC
  [Host] : Clr 4.0.30319.42000, 64bit RyuJIT-v4.6.1648.0
  Clr    : Clr 4.0.30319.42000, 64bit RyuJIT-v4.6.1648.0
  Core   : .NET Core 4.6.25009.03, 64bit RyuJIT


          Method |  Job | Runtime |     Mean |    Error |   StdDev |      Min |      Max |   Median | Rank |  Gen 0 | Allocated |
---------------- |----- |-------- |---------:|---------:|---------:|---------:|---------:|---------:|-----:|-------:|----------:|
 CallBuildedReal |  Clr |     Clr | 137.8 ns | 2.903 ns | 4.255 ns | 133.6 ns | 149.6 ns | 135.6 ns |    7 | 0.0580 |     192 B |
     CallBuilded |  Clr |     Clr | 122.7 ns | 2.068 ns | 1.934 ns | 118.5 ns | 126.2 ns | 122.6 ns |    6 | 0.0576 |     192 B |
      CallLambda |  Clr |     Clr | 119.8 ns | 1.342 ns | 1.255 ns | 117.9 ns | 121.7 ns | 119.6 ns |    5 | 0.0576 |     192 B |
 CallLambdaConst |  Clr |     Clr | 121.7 ns | 1.347 ns | 1.194 ns | 120.1 ns | 124.5 ns | 121.6 ns |    6 | 0.0571 |     192 B |
 CallBuildedReal | Core |    Core | 114.8 ns | 2.263 ns | 2.117 ns | 112.7 ns | 118.8 ns | 113.7 ns |    3 | 0.0594 |     191 B |
     CallBuilded | Core |    Core | 109.0 ns | 1.701 ns | 1.591 ns | 106.5 ns | 112.2 ns | 108.8 ns |    2 | 0.0599 |     191 B |
      CallLambda | Core |    Core | 107.0 ns | 1.181 ns | 1.105 ns | 105.7 ns | 109.4 ns | 106.8 ns |    1 | 0.0593 |     191 B |
 CallLambdaConst | Core |    Core | 117.3 ns | 2.706 ns | 3.704 ns | 113.4 ns | 127.8 ns | 116.0 ns |    4 | 0.0592 |     191 B |

Benchmark code:

Note 1: there is similar SO thread "Performance of expression trees" where build expression show best result in benchmark.

Note 2: I should be close to answer when I will get IL code of compiled expression, so I'm trying to learn how to get the IL code of compiled expression (linqpad?, ilasm integrated to VS?, dynamic assembly?), but if you know simple plugin that can do it from VS - it will help me a lot.

Note 3: this doesn't work

    var assemblyBuilder = System.AppDomain.CurrentDomain.DefineDynamicAssembly(new AssemblyName("testLambda"),System.Reflection.Emit.AssemblyBuilderAccess.Save);
    var modelBuilder = assemblyBuilder.DefineDynamicModule("testLambda_module", "testLambda.dll");
    var typeBuilder = modelBuilder.DefineType("testLambda_type");
    var method = typeBuilder.DefineMethod("testLambda_method", MethodAttributes.Public | MethodAttributes.Static, typeof(bool), 
        new[] { typeof(StringBuilder), typeof(int), typeof(int), typeof(bool) });
    appendLambda.CompileToMethod(method);
    typeBuilder.CreateType();
    assemblyBuilder.Save("testLambda.dll");

Because of System.TypeInitializationException: "InvalidOperationException: CompileToMethod cannot compile constant 'System.Func3[System.Int32,System.Int32,System.Int32]' because it is a non-trivial value, such as a live object. Instead, create an expression tree that can construct this value." That meansappendLambda` contains a parameter type of is Func which is not primitive type and there is a limitation for CompileToMethod to use only primitives.

Roman Pokrovskij
  • 9,449
  • 21
  • 87
  • 142
  • Your code does not compile in current form (has some references to BenchmarkJsonSimple and additionExpr). It's important to have compilable code in question about micro optimization. – Evk May 29 '17 at 09:54
  • Thank you. I have improved it. "Last minute changes". Now code is fully compatable, but I still not so sure that this makes answer better. :) It is now "to massive". – Roman Pokrovskij May 29 '17 at 10:04
  • To view generated IL - create module at runtime then use `Expression.CompileToMethod` to compile it there, then save module to disk and explore IL with usual tools. As for the code - it's always better if you can copy paste some code and run it right away. – Evk May 29 '17 at 10:38
  • I can't do it straightforward because of System.TypeInitializationException... `appendLambda` contains a parameter that is Func which is not premeteve type and there is limitation for CompileToMethod (as I understand - all types should be primitives) – Roman Pokrovskij May 29 '17 at 14:32
  • 1
    Compiled expression trees are slower than "native" code... See for example https://stackoverflow.com/questions/29397282/compiled-expression-tree-slow-due-to-jit-methodaccesscheck ... There is an extra security check done every time you access the generated method. There were even some comments directly in the question https://stackoverflow.com/questions/24802222/performance-of-expression-trees that you answered. – xanatos May 30 '17 at 07:26

1 Answers1

4

The compiled expression may be slower because of the reasons:

TL;DR;

The question is, why is the compiled delegate way slower than a manually-written delegate? Expression.Compile creates a DynamicMethod and associates it with an anonymous assembly to run it in a sand-boxed environment. This makes it safe for a dynamic method to be emitted and executed by partially trusted code but adds some run-time overhead.

There are tools like FastExpressionCompiler which help to mitigate the problem (disclaimer: I am an author)

Update: View IL of compiled delegate

  1. It is possible to get the compiled delegate IL as byte array:

    var hello = "Hello";
    Expression<Func<string>> getGreetingExpr = () => hello + " me";
    
    var getGreeting = getGreetingExpr.Compile();
    
    var methodBody = getGreeting.Method.GetMethodBody();
    
    var ilBytes = methodBody.GetILAsByteArray();
    
  2. You need a way to parse/read the array and convert it into IL instructions and parameters.

Pity, but I did not find the tooling or robust NuGet package to allow me to do so :-(

Here is the related SO question.

The closest tool may be this.

dadhi
  • 4,807
  • 19
  • 25
  • Thank you, your project is based on breathtaking idea worth a github star from me. But I still have the same headache: what is the best way to get il code of compiled expression tree? I would prefer to get it directly in VS debugger. What do you use for this? – Roman Pokrovskij May 29 '17 at 10:16
  • BTW: It is interesting that in https://stackoverflow.com/questions/24802222/performance-of-expression-trees/44233174#44233174 compiled build lambda works even faster than delegate. Why? – Roman Pokrovskij May 29 '17 at 10:22
  • You may compile to dynamic assembly and examine the IL with something like dnSpy – dadhi May 29 '17 at 11:01
  • May be I can but how can I pass through System.TypeInitializationException: "InvalidOperationException: CompileToMethod cannot compile constant 'System.Func`3[System.Int32,System.Int32,System.Int32]' because it is a non-trivial value, such as a live object. Instead, create an expression tree that can construct this value." – Roman Pokrovskij May 29 '17 at 14:25
  • Heh, that's because compiling to method does not support closure over constants. So, you may either make expression static by providing consts through parameters. Or decompile delegate Body byte array. – dadhi May 29 '17 at 17:44
  • Thank you for your help. Can you explain what is "making expression static" ? I use "MethodAttributes.Static" as DefineMethod parameter (code was added to my question) and this doesn't work. Which tool can decompile Body byte array? And how to get byte array[] ? There is method to serialize method body of deleage? I Can't find it :( – Roman Pokrovskij May 30 '17 at 10:37
  • 1
    Updated my answer with sample of getting the IL bytes. By static delegate I meant making it equivalent of static method, which does not use any run-time state, aka pure/referential transparent function. – dadhi May 30 '17 at 11:23
  • 1
    Added link to https://stackoverflow.com/questions/2436082/msil-inspection – dadhi May 30 '17 at 11:34