2

Background and Problem

I have a site that exposes a custom report generation functionality where a user is able to dynamically select the fields they would like to include on a report. The fields map to the properties in documents that are stored in CosmosDb. A single document averages about 4kb. A typical report might consist of between 100 to 10k+ documents depending on a date range criteria and the amount of data the tenant has.

The document contains nested relationships similar to this:

public class Root
{
    public string BusinessId { get; set; }

    public bool SomeBoolean { get; set; }

    public DateTime MyDateTime { get; set; }

    public List<MyNested> MyNestedItems { get; set; }
}

public class MyNested
{
    public DateTime SomeDate { get; set; }

    public int SomeInteger { get; set; }

    public string SomeString { get; set; }
}

If the user selects only MyObject fields the end result is a single report row per document. If the user selects MyNestedObject fields the end result is a report row per MyNestedObject. In this case data points for the MyObject fields would be duplicated per row.

My current implementation is returning the entire document from CosmosDb and then shaping the results in code to match only the fields the user selected. This is the over-fetching problem that I am attempting to solve.

Possible Solution

I've attempted to build a dynamic projection based on an input like this:

public class Search
{
    public string BusinessId { get; set; }

    public RootFieldsToInclude RootFieldsToInclude { get; set; }

    public MyNestedFieldsToInclude MyNestedFieldsToInclude { get; set; }
}

public class RootFieldsToInclude
{
    public bool BusinessId { get; set; }

    public bool SomeBoolean { get; set; }

    public bool MyDateTime { get; set; }
}
public class MyNestedFieldsToInclude
{
    public bool SomeDate { get; set; }

    public bool SomeInteger { get; set; }

    public bool SomeString { get; set; }
}

The boolean fields marked as true on the search request would drive the properties to include on the request to CosmosDb.

public class MyRepo
{
    private readonly DocumentClient _client;

    public MyRepo()
    {
        _client = new DocumentClient(new Uri("https://xxxxxxxx.documents.azure.com:443/"), "xxxxxxxx");
    }

    const string DatabaseName = "TransactionDb";
    const string CollectionName = "Roots";

    public async Task<IEnumerable<dynamic>> GetDataAsync()
    {
        var search = new Search
        {
            BusinessId = "BBBBB",
            RootFieldsToInclude = new RootFieldsToInclude
            {
                BusinessId = true,
                MyDateTime = false,
                SomeBoolean = true,
            },
            MyNestedFieldsToInclude = new MyNestedFieldsToInclude
            {
                SomeDate = false,
                SomeInteger = false,
                SomeString = true
            }
        };

        var query = _client.CreateDocumentQuery<Root>(UriFactory.CreateDocumentCollectionUri(DatabaseName, CollectionName))
                           .Where(x => x.BusinessId == search.BusinessId)
                          // Example of what query would look like given example search
                          // .Select(x => new {
                          //     x.BusinessId,
                          //     MyNestedItems = x.MyNestedItems.Select(y => new
                          //    {
                          //      y.SomeString
                          //    },
                          //    X.SomeBoolean
                          // });
                           .Select(DynamicSelectGenerator<Root>(search));

        return await CosmosHelper.QueryAsync(query);
    }

    // Approach sourced from: https://stackoverflow.com/questions/606104/how-to-create-linq-expression-tree-to-select-an-anonymous-type
    private Expression<Func<T, dynamic>> DynamicSelectGenerator<T>(Search search)
    {
        var rootFields = GetRootFieldsToInclude(search.RootFieldsToInclude);

        // input parameter "o"
        var xParameter = Expression.Parameter(typeof(T), "o");

        // create initializers
        var rootFieldBindings = rootFields.Select(o => o.Trim())
            .Select(o =>
            {
                // property "Field1"
                var mi = typeof(Root).GetProperty(o);

                // original value "o.Field1"
                var xOriginal = Expression.Property(xParameter, mi);

                if (o == "MyNestedItems")
                {
                    // When included this fails with 'System.ArgumentException: Incorrect number of arguments supplied for call to method 'System.Collections.Generic.IEnumerable`1[System.Object] Select[MyNested,Object]'
                    //var nestedExpression = GetNestMemberInitExpression(search.MyNestedFieldsToInclude);
                    //var selectMethod = (Expression<Func<Root, IEnumerable<MyNested>>>)(_ => _.MyNestedItems.Select(c => default(MyNested)));
                    //return Expression.Bind(mi, Expression.Call(((MethodCallExpression)selectMethod.Body).Method, nestedExpression));
                }

                // set value "Field1 = o.Field1"
                return Expression.Bind(mi, xOriginal);
            }
        ).ToList();

        // new statement "new Root()"
        var newRoot = Expression.New(typeof(Root));

        // initialization "new Root { Field1 = o.Field1, Field2 = o.Field2 }"
        var newRootExpression = Expression.MemberInit(newRoot, rootFieldBindings);

        // expression "o => new Data { Field1 = o.Field1, Field2 = o.Field2 }"
        return Expression.Lambda<Func<T, dynamic>>(newRootExpression, xParameter);
    }

    private IEnumerable<string> GetRootFieldsToInclude(RootFieldsToInclude rootFieldsToInclude)
    {
        var results = typeof(Root).GetProperties().Select(propertyInfo => propertyInfo.Name).ToList();
        if (rootFieldsToInclude.BusinessId == false)
        {
            results.Remove("BusinessId");
        }

        if (rootFieldsToInclude.MyDateTime == false)
        {
            results.Remove("MyDateTime");
        }

        if (rootFieldsToInclude.SomeBoolean == false)
        {
            results.Remove("SomeBoolean");
        }

        // results.Remove("MyNestedItems");

        return results;
    }

    private MemberInitExpression GetNestMemberInitExpression(MyNestedFieldsToInclude myNestedFieldsToInclude)
    {
        var myNestedFields = GetNestedFieldsToInclude(myNestedFieldsToInclude);

        // input parameter "o"
        var xParameter2 = Expression.Parameter(typeof(MyNested), "n");

        // new statement "new Data()"
        var newNestedItems = Expression.New(typeof(MyNested));

        // create initializers
        var myNestedFieldBindings = myNestedFields.Select(o => o.Trim())
            .Select(o =>
            {
                // property "Field1"
                var mi = typeof(MyNested).GetProperty(o);

                // original value "o.Field1"
                var xOriginal = Expression.Property(xParameter2, mi);

                // set value "Field1 = o.Field1"
                return Expression.Bind(mi, xOriginal);
            }
        ).ToList();

        // initialization "new Data { Field1 = o.Field1, Field2 = o.Field2 }"
        return Expression.MemberInit(newNestedItems, myNestedFieldBindings);
    }

    private IEnumerable<string> GetNestedFieldsToInclude(MyNestedFieldsToInclude myNestedFieldsToInclude)
    {
        var results = typeof(MyNested).GetProperties().Select(propertyInfo => propertyInfo.Name).ToList();

        if (myNestedFieldsToInclude.SomeDate == false)
        {
            results.Remove("SomeDate");
        }

        if (myNestedFieldsToInclude.SomeInteger == false)
        {
            results.Remove("SomeInteger");
        }

        if (myNestedFieldsToInclude.SomeString == false)
        {
            results.Remove("SomeString");
        }

        return results;
    }
}

public class CosmosHelper
{
    public static async Task<IEnumerable<T>> QueryAsync<T>(IQueryable<T> query)
    {
        var docQuery = query.AsDocumentQuery();

        var results = new List<T>();
        while (docQuery.HasMoreResults)
        {
            results.AddRange(await docQuery.ExecuteNextAsync<T>());
        }

        return results;
    }
}

[UPDATE 9/17] I am able to build the expression to the root level of fields and it is executing against CosmosDB as expected. I've updated the above code to reflect this current state.

Challenge is now getting the MyNestedItems portion to be created correctly.

Questions:

  1. Am I on the right path or should I be considering a different approach?
  2. How do I build a dynamic expression off of my SearchRequest object?

Thanks in advance!

Stephen McDowell
  • 839
  • 9
  • 21

0 Answers0