1

I have an Azure function written in C# which receives JSON by HTTP POST request, in which I am sure about nodes: ssid, dim, type, list_of_business_keys. Each item in the list, has one or more columns, which name and type I do not know, before I receive the JSON. Let's say that for the first example they are column_1 and column_2, both of Int64 type:

{ "ssid" : 1, 
  "dim" : 2,
  "type" : 3,
  "list_of_business_keys":
  [
      {"business_key" : {"column_1" : 100, "column_2" : 1000}},
      {"business_key" : {"column_1" : 200, "column_2" : 1000}},
      {"business_key" : {"column_1" : 300, "column_2" : 1000}},
      {"business_key" : {"column_1" : 400, "column_2" : 1000}},
      {"business_key" : {"column_1" : 500, "column_2" : 1000}}
  ]
}

What I would like to achieve is to convert this JSON to a DataTable, that I would later use as a table type parameter to invoke a Stored Procedure from Azure SQL Database. So I would like this DataTable to look like this:

enter image description here

I have written following code to achieve that:

#r "Microsoft.WindowsAzure.Storage"
#r "Newtonsoft.Json"
#r "System.Net"
#r "System.Data"

using System;
using System.Net;
using System.Data;
using System.Data.SqlClient;
using Microsoft.WindowsAzure.Storage.Table;
using Newtonsoft.Json;

public static async Task<HttpResponseMessage> Run(HttpRequestMessage req, TraceWriter log)
{
    string resultAsString = await req.Content.ReadAsStringAsync();

    KeyList keyList = JsonConvert.DeserializeObject<KeyList>(resultAsString);

    List<ListOfBusinessKey> list = keyList.list_of_business_keys;

    DataTable tbl = new DataTable();

    tbl.Columns.Add(new DataColumn("ssid", typeof(Int64)));
    tbl.Columns.Add(new DataColumn("dim", typeof(Int64)));
    tbl.Columns.Add(new DataColumn("type", typeof(Int64)));
    tbl.Columns.Add(new DataColumn("column_1", typeof(Int64)));
    tbl.Columns.Add(new DataColumn("column_2", typeof(Int64)));



    foreach (var key in list) {
        tbl.Rows.Add(keyList.ssid, keyList.dim, keyList.type, key.business_key.column_1, key.business_key.column_2);  
    }

    foreach (var row in tbl.Rows){
        log.Info(row.ToString());
    }

    foreach (DataRow dataRow in tbl.Rows)
    {
        foreach (var item in dataRow.ItemArray)
        {
            log.Info(item.ToString());
        }
    }

    return req.CreateResponse(keyList);

}

public class BusinessKey
{
    public int column_1 { get; set; }
    public int column_2 { get; set; }
}

public class ListOfBusinessKey
{
    public BusinessKey business_key { get; set; }
}

public class KeyList
{
    public int ssid { get; set; }
    public int dim { get; set; }
    public int type { get; set; }
    public List<ListOfBusinessKey> list_of_business_keys { get; set; }
}

And this seems to work.

My problem is that to deserialize received JSON I have to create classes based on JSON's structure. The same is with creating the data table. I know the col names and their types so I create the DataTable implicitly. But what happens when I do not know the structure before?

So let's now I assume that I receive following JSON:

{ "ssid" : 1, 
  "dim" : 2,
  "type" : 3,
  "list_of_business_keys":
  [
      {"business_key" : {"xxx" : "abc", "yyy" : 1000, "zzz" : 123}},
      {"business_key" : {"xxx" : "cde", "yyy" : 1000, "zzz" : 456}},
      {"business_key" : {"xxx" : "efg", "yyy" : 1000, "zzz" : 789}},
      {"business_key" : {"xxx" : "hij", "yyy" : 1000, "zzz" : 12 }},
      {"business_key" : {"xxx" : "klm", "yyy" : 1000, "zzz" : 345}}
  ]
}

and I would like to receive following DT:

enter image description here

Is this possible to change the code, to "dynamically" convert JSON to desired format of DataTable? If yes, what should I do to achieve that?

Thanks in advance!

Jangcy
  • 553
  • 1
  • 6
  • 17
  • How much of the JSON schema is fixed vs variable? Are the properties `"ssid"`, `"dim"` and `"type"` fixed? Are the properties `"list_of_business_keys"` and `"business_key"` fixed? – dbc Apr 18 '18 at 18:19
  • `"ssid", "dim", "type", "list_of_business_keys"` and `"business_key"` are fixed. Changeable are only nodes inside `business key` (`"xxx", "yyy", "zzz"` in this example). Of course inside one JSON all of these business key columns will have the same name (no situation, that we have `xxx` in one row and `aaa` instead in second row) – Jangcy Apr 18 '18 at 19:12
  • I'm also wondering, if using JSONs like: `{"ssid" : 1, "dim" : 2, "type" : 3, {"business_key" : {"xxx" : "abc", "yyy" : 1000, "zzz" : 123}}},{"ssid" : 1, "dim" : 2, "type" : 3, {"business_key" : {"xxx" : "abc", "yyy" : 1000, "zzz" : 123}}}` would make it simplier to do – Jangcy Apr 18 '18 at 19:46

3 Answers3

1

You could define business_key as a Dictionary<string, object> and then dynamically populate the DataTable based on the property names and data types actually encountered in the JSON file.

Define the following types and extension methods:

public class ListOfBusinessKey
{
    public Dictionary<string, object> business_key { get; set; }
}

public class KeyList
{
    public int ssid { get; set; }
    public int dim { get; set; }
    public int type { get; set; }
    public List<ListOfBusinessKey> list_of_business_keys { get; set; }

    public DataTable ToDataTable()
    {
        var tbl = new DataTable();
        tbl.Columns.Add(new DataColumn("ssid", typeof(Int64)));
        tbl.Columns.Add(new DataColumn("dim", typeof(Int64)));
        tbl.Columns.Add(new DataColumn("type", typeof(Int64)));

        var columnQuery = EnumerableExtensions.Merge(
            list_of_business_keys
            .SelectMany(k => k.business_key)
            .Select(p => new KeyValuePair<string, Type>(p.Key, p.Value == null ? typeof(object) : p.Value.GetType())),
            p => p.Key, (p1, p2) => new KeyValuePair<string, Type>(p1.Key, MergeTypes(p1.Value, p2.Value)));
        foreach (var c in columnQuery)
            tbl.Columns.Add(c.Key, c.Value);

        foreach (var d in list_of_business_keys.Select(k => k.business_key))
        {
            var row = tbl.NewRow();
            row["ssid"] = ssid;
            row["dim"] = dim;
            row["type"] = type;
            foreach (var p in d.Where(p => p.Value != null))
            {
                row[p.Key] = Convert.ChangeType(p.Value, tbl.Columns[p.Key].DataType, CultureInfo.InvariantCulture);
            }
            tbl.Rows.Add(row);
        }
        return tbl;
    }

    static Type MergeTypes(Type type1, Type type2)
    {
        // Enhance as needed
        if (type1 == type2)
            return type1;
        if (type2 == typeof(object))
            return type1;
        if (type1 == typeof(object))
            return type2;
        if (type1.IsAssignableFrom(type2))
            return type1;
        if (type2.IsAssignableFrom(type1))
            return type2;
        if (typeof(IConvertible).IsAssignableFrom(type1) && typeof(IConvertible).IsAssignableFrom(type2))
        {
            if (type1 == typeof(string))
                return type1;
            if (type2 == typeof(string))
                return type2;
            if ((type1 == typeof(long) || type1 == typeof(int)) && (type2 == typeof(decimal) || type2 == typeof(double)))
                return type2;
            if ((type2 == typeof(long) || type2 == typeof(int)) && (type1 == typeof(decimal) || type1 == typeof(double)))
                return type1;
        }
        throw new ArgumentException(string.Format("Cannot merge types {0} and {1}", type1, type2));
    }
}

public static class EnumerableExtensions
{
    public static IEnumerable<TSource> Merge<TSource, TKey>(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TSource, TSource, TSource> mergeSelector)
    {
        if (source == null || keySelector == null || mergeSelector == null)
            throw new ArgumentNullException();
        return MergeIterator(source, keySelector, mergeSelector);
    }

    static IEnumerable<TSource> MergeIterator<TSource, TKey>(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, Func<TSource, TSource, TSource> mergeSelector)
    {
        var dictionary = new Dictionary<TKey, TSource>();
        foreach (TSource element in source)
        {
            var key = keySelector(element);
            TSource oldElement;
            if (!dictionary.TryGetValue(key, out oldElement))
            {
                dictionary[key] = element;
            }
            else
            {
                dictionary[key] = mergeSelector(element, oldElement);
            }
        }
        return dictionary.Values;
    }
}

Then deserialize and convert to a DataTable as follows:

var keyList = JsonConvert.DeserializeObject<KeyList>(json);
var tbl = keyList.ToDataTable();

Note there is some awkwardness required to deal with situations where one of the "business_key" properties is first encountered with a null value and later encountered with a non-null value, or first encountered with an integral value and later encountered with a decimal or double value. In such cases the more general type needs to be preferred over the initially encountered type.

Sample working .Net fiddle.

Update

The purpose of EnumerableExtensions.Merge() and MergeTypes() is to infer a correct Type to use for for DataColumn.DataType by looking through all the cells that will get added to the column and choosing a "most appropriate" type by merging the observed types of each cell. Compared to simply looking at the cells in the first row, this handles situations like:

  1. The first row contains a null cell. In such cases subsequent rows mush be checked.
  2. The first row contains a cell with an integer value like 123 but a subsequent row has a cell with a floating-point value like 12.123. In this case the type inferred needs to switch from long to double or decimal.
  3. The first row contains a cell with a numeric value like 1000 but a subsequent row has a cell with a string value like "Unknown" or "$122.22" (or whatever). In this case the type inferred needs to switch to string.
  4. Two cells that will be added to the same column have completely incompatible types such as long and DateTime. In this case an exception is thrown.

Newtonsoft's own DataTableConverter infers DataColumn.DataType only from the first row, which periodically causes problems like the ones from DateTime column type becomes String type after deserializing DataTable and deserialize a datatable with a missing first column. The code in this answer takes advantage of the fact that the entire JSON was pre-loaded into a JToken hierarchy to avoid those problems.

The third sample JSON string in the example fiddle includes samples of cases #1-#3.

If you are sure none of the above can happen you could simplify MergeTypes() to simply throw an exception when the types are not identical.

dbc
  • 104,963
  • 20
  • 228
  • 340
  • Hi @dbc, that's a perfect piece of code that really does, what I tried to achieve. It works very well in VS, but there is a problem when I try to use the code inside `.csx` script (and this is what I use when writing Azure Function in C#). For every Extended function I get a similar error message: `[Error] run.csx(71,33): error CS1109: Extension methods must be defined in a top level static class; KeyListExtensions is a nested class`. Is there a way to somehow work it out? Of course all of these classes are not nested. – Jangcy Apr 19 '18 at 09:14
  • `Environment version: 4.0.30319.42000` `Json.NET version: Newtonsoft.Json, Version=9.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed` If it's needed – Jangcy Apr 19 '18 at 09:17
  • @Jangcy - seems like this is a limitation of csx scripts, see [How to define an extension method in a scriptcs csx script](https://stackoverflow.com/q/16948562). Solution is to make the extension methods into object methods or plain old static methods. Answer updated. – dbc Apr 19 '18 at 16:27
  • I've managed to deal with that limitation by adding the extension methods in another .csx file and loading it into the main one. After some modifications your method works like a charm! Thank you very much! – Jangcy Apr 19 '18 at 16:35
  • Hi @dbc. I know that it has been a while now, but could you please explain what do your methods: merge, mergeIterator, mergeSelector, mergeTypes do? I just want to be sure that I completely understand what happens in this piece of code. Thanks in advance. – Jangcy Apr 26 '18 at 12:33
0

It smells like a solution for NoSql database. e.g. Azure Table Storage You can have different column names for different object types.

Azure table storage store multiple types

wolszakp
  • 1,109
  • 11
  • 25
0

First create function to convert jsonstring to datatable:

   public DataTable JsonStringToDataTable(string jsonString)
   {
      DataTable dt = new DataTable();
      string[] jsonStringArray = Regex.Split(jsonString.Replace("[", "").Replace("]", ""), "},{");
      List<string> ColumnsName = new List<string>();
      foreach (string jSA in jsonStringArray)
      {
         string[] jsonStringData = Regex.Split(jSA.Replace("{", "").Replace("}", ""), ",");
         foreach (string ColumnsNameData in jsonStringData)
         {
            try
            {
               int idx = ColumnsNameData.IndexOf(":");
               string ColumnsNameString = ColumnsNameData.Substring(0, idx - 1).Replace("\"", "");
               if (!ColumnsName.Contains(ColumnsNameString))
               {
                  ColumnsName.Add(ColumnsNameString);
               }
            }
            catch (Exception ex)
            {
               throw new Exception(string.Format("Error Parsing Column Name : {0}", ColumnsNameData));
            }
         }
         break;
      }
      foreach (string AddColumnName in ColumnsName)
      {
         dt.Columns.Add(AddColumnName);
      }
      foreach (string jSA in jsonStringArray)
      {
         string[] RowData = Regex.Split(jSA.Replace("{", "").Replace("}", ""), ",");
         DataRow nr = dt.NewRow();
         foreach (string rowData in RowData)
         {
            try
            {
               int idx = rowData.IndexOf(":");
               string RowColumns = rowData.Substring(0, idx - 1).Replace("\"", "");
               string RowDataString = rowData.Substring(idx + 1).Replace("\"", "");
               nr[RowColumns] = RowDataString;
            }
            catch (Exception ex)
            {
               continue;
            }
         }
         dt.Rows.Add(nr);
      }
      return dt;
   }

Then call this function :

 string FileName = "JSONString.txt";
 var stream = File.OpenText(Server.MapPath(FileName));
 string JsonString = stream.ReadToEnd();
 DataTable dt = JsonStringToDataTable(JsonString); 
Chintan
  • 133
  • 8