4

While I was deserializing some JSON data to DataSet, the resultant dataset may lose its column schema. that means, When I deserialize some JSON, it populates the Dataset with Int64 objects rather than Int32. I would like it to choose Int32.

I know, Json.NET by default reads integer values as Int64 because there is no way to know whether the value should be Int32 or Int64.

JsonSerializerSettings settings = new JsonSerializerSettings()
    {
        Converters = { new PrimitiveJsonConverter() },
    };
DataSet myDataSet = JsonConvert.DeserializeObject<DataSet>(jsonString, settings);

So I have created custom JsonConverter, to override default functionality.

using DevExpress.XtraPrinting.Native.WebClientUIControl;
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.Serialization.Formatters;
using System.Text;
using System.Threading.Tasks;
using JsonConverter = Newtonsoft.Json.JsonConverter;

namespace CashlessAdmin.API.Handler
{
    public sealed class PrimitiveJsonConverter : JsonConverter
    {
        readonly JsonSerializer defaultSerializer = new JsonSerializer();

        public override bool CanConvert(Type objectType)
        {
            return objectType.IsIntegerTypes();

        }

        public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
        {
            switch (reader.TokenType)
            {
                case JsonToken.Integer:
                    if(Convert.ToInt64(reader.Value) < System.Int32.MaxValue)
                    {
                        return Convert.ToInt32(reader.Value);
                    }
                    return reader.Value;
                case JsonToken.Float: // Accepts numbers like 4.00
                case JsonToken.Null:
                    return defaultSerializer.Deserialize(reader, objectType);
                default:
                    throw new JsonSerializationException(string.Format("Token \"{0}\" of type {1} was not a JSON integer", reader.Value, reader.TokenType));
            }
        }

        public override bool CanWrite { get { return false; } }

        public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
        {
            throw new NotImplementedException();
        }
    }

    public static class JsonExtensions
    {
        public static bool IsIntegerTypes(this Type type)
        {
            type = Nullable.GetUnderlyingType(type) ?? type;
            if (type == typeof(long)
                || type == typeof(ulong)
                || type == typeof(int)
                || type == typeof(uint)
                || type == typeof(short)
                || type == typeof(ushort)
                || type == typeof(byte)
                || type == typeof(sbyte)
                || type == typeof(System.Numerics.BigInteger))
                return true;
            return false;
        }
    }
}

But the result will be the same as the previous case.

Jinto John
  • 365
  • 4
  • 22
  • Possibly related: [deserialize a datatable with a missing first column](https://stackoverflow.com/a/32731816/3744182). – dbc Feb 07 '19 at 19:55
  • Alternatively, subclass `JsonTextReader` and return `int` instead of `long` as shown in [Overriding Default Primitive Type Handling in Json.Net](https://stackoverflow.com/q/9914333/3744182). Though the answer there is obsolete... – dbc Feb 07 '19 at 21:27
  • OK, I added an [new answer](https://stackoverflow.com/a/54584806/3744182) to [Overriding Default Primitive Type Handling in Json.Net](https://stackoverflow.com/q/9914333/3744182) that works on more recent versions of Json.NET. – dbc Feb 08 '19 at 01:18

2 Answers2

3

The reason your code does not work is that, when initially inferring column type, DataTableConverter does not attempt to deserialize the first value encountered for a column. Instead, it simply reads it using JsonReader.Read() and then sets the column type equal to the observed token type, in DataTableConverter.GetColumnDataType(). Your method PrimitiveJsonConverter.Read() just isn't getting called at this point. And, since JsonReader.Read() is designed to return a long instead of an int for integer values, the data table column types end up as long.

You have a few options to override Newtonsoft's default behavior and get Int32 column types:

  1. You could use a typed DataSet. In this situation the column types will be predefined.

  2. You could read using PreferInt32JsonTextReader from this answer to Overriding Default Primitive Type Handling in Json.Net (Json.NET 10.0.1 or later).

  3. You could convert columns to Int32 after deserialization. First, introduce the following extension method:

    public static class DataTableExtensions
    {
        public static DataTable RemapInt64ColumnsToInt32(this DataTable table)
        {
            if (table == null)
                throw new ArgumentNullException();
            for (int iCol = 0; iCol < table.Columns.Count; iCol++)
            {
                var col = table.Columns[iCol];
                if (col.DataType == typeof(Int64)
                    && table.AsEnumerable().Where(r => !r.IsNull(col)).Select(r => (Int64)r[col]).All(i => i >= int.MinValue && i <= int.MaxValue))
                {
                    ReplaceColumn(table, col, typeof(Int32), (o, t) => o == null ? null : Convert.ChangeType(o, t, NumberFormatInfo.InvariantInfo));
                }
            }
            return table;
        }
    
        private static DataColumn ReplaceColumn(DataTable table, DataColumn column, Type newColumnType, Func<object, Type, object> map)
        {
            var newValues = table.AsEnumerable()
                .Select(r => r.IsNull(column) ? (object)DBNull.Value : map(r[column], newColumnType))
                .ToList();
    
            var ordinal = column.Ordinal;
            var name = column.ColumnName;
            var @namespace = column.Namespace;
    
            var newColumn = new DataColumn(name, newColumnType);
            newColumn.Namespace = @namespace;
            table.Columns.Remove(column);
            table.Columns.Add(newColumn);
            newColumn.SetOrdinal(ordinal);
    
            for (int i = 0; i < table.Rows.Count; i++)
                if (!(newValues[i] is DBNull))
                    table.Rows[i][newColumn] = newValues[i];
    
            return newColumn;
        }    
    }
    

    And then do:

    var myDataSet = JsonConvert.DeserializeObject<DataSet>(json);
    myDataSet.Tables.Cast<DataTable>().Aggregate((object)null, (o, dt) => dt.RemapInt64ColumnsToInt32());
    

    Related: How To Change DataType of a DataColumn in a DataTable?.

  4. You could fork your own version of DataTableConverter and modify the logic of DataTableConverter.GetColumnDataType() to return typeof(Int32) for JsonToken.Integer tokens.

    For an example of what would be involved, see this answer to deserialize a datatable with a missing first column.

    Since your root object is a DataSet, you would also need to fork your own version of DataSetConverter and make it use your customized DataTableConverter, as shown in this answer to DateTime column type becomes String type after deserializing DataTable property on Custom Class.

OP asks, what about its performance...?

You have to test it and see, see https://ericlippert.com/2012/12/17/performance-rant/.

That being said, in general, with huge data sets, you want to avoid loading the entire data set into memory in some intermediate representation (e.g. a JToken hierarchy or single large string) before final deserialization. Options #1, #2 and #4 avoid doing so. #3 does load a portion of the data into an intermediate representation; some but not all DataTable columns end up being loaded and then replaced. Thus performance may be OK, but maybe not -- you need to check.

dbc
  • 104,963
  • 20
  • 228
  • 340
  • 1
    @JintoJohn - *what about its performance* - see https://ericlippert.com/2012/12/17/performance-rant/. You have to test it and see. – dbc Feb 08 '19 at 08:41
  • 1
    @JintoJohn - that being said, in general, with huge data sets, you want to avoid loading the entire data set into memory in some intermediate representation (e.g. a `JToken` hierarchy) before final deserialization. Options #1, #2 and #4 avoid doing so. I'm not sure about #3, only some `DataTable` columns end up being loaded and then replaced, so performance may be OK, but maybe not -- you need to check. – dbc Feb 08 '19 at 08:42
0

I suggest you to use this DataTableConverter (Fork from main DataTableConverter of Newtonsoft.Json). This converter has two benefit: 1. Column's DataType store in serialized json and Dont change after deserialize. 2. Size of serialized json has been decreased and performance has been Optimized, Because ColumnName do not store for each row.

public class DataTableConverter : JsonConverter
{
    /// <summary>
    /// Writes the JSON representation of the object.
    /// </summary>
    /// <param name="writer">The <see cref="JsonWriter"/> to write to.</param>
    /// <param name="value">The value.</param>
    /// <param name="serializer">The calling serializer.</param>
    public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
    {
        DataTable table = (DataTable)value;
        DefaultContractResolver resolver = serializer.ContractResolver as DefaultContractResolver;

        writer.WriteStartObject();

        writer.WritePropertyName("Columns");
        serializer.Serialize(writer, GetColumnDataTypes(table));

        writer.WritePropertyName("Rows");
        writer.WriteStartArray();

        foreach (DataRow row in table.Rows)
        {
            serializer.Serialize(writer, row.ItemArray);
        }

        writer.WriteEndArray();
        writer.WriteEndObject();
    }

    /// <summary>
    /// Reads the JSON representation of the object.
    /// </summary>
    /// <param name="reader">The <see cref="JsonReader"/> to read from.</param>
    /// <param name="objectType">Type of the object.</param>
    /// <param name="existingValue">The existing value of object being read.</param>
    /// <param name="serializer">The calling serializer.</param>
    /// <returns>The object value.</returns>
    public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
    {
        if (reader.TokenType == JsonToken.Null)
        {
            return null;
        }

        DataTable dataTable = existingValue as DataTable;

        if (dataTable == null)
        {
            // handle typed datasets
            dataTable = (objectType == typeof(DataTable))
                    ? new DataTable()
                    : (DataTable)Activator.CreateInstance(objectType);
        }

        // DataTable is inside a DataSet
        // populate the name from the property name
        if (reader.TokenType == JsonToken.PropertyName)
        {
            dataTable.TableName = (string)reader.Value;

            reader.Read();

            if (reader.TokenType == JsonToken.Null)
            {
                return dataTable;
            }
        }

        if (reader.TokenType == JsonToken.StartObject)
        {
            reader.Read();
            if (reader.TokenType == JsonToken.PropertyName && (string)reader.Value == "Columns")
            {
                reader.Read();

                Dictionary<string, string> columnTypes = new Dictionary<string, string>();
                columnTypes = serializer.Deserialize<Dictionary<string, string>>(reader);

                foreach (KeyValuePair<string, string> column in columnTypes)
                {
                    dataTable.Columns.Add(column.Key, Type.GetType(column.Value));
                }
            }
            reader.Read();
            reader.Read();
        }

        if (reader.TokenType != JsonToken.StartArray)
        {
            throw new JsonSerializationException($"Unexpected JSON token when reading DataTable. Expected StartArray, got {reader.TokenType}.");
        }

        reader.Read();

        while (reader.TokenType != JsonToken.EndArray)
        {
            DataRow dr = dataTable.NewRow();
            dr.ItemArray = serializer.Deserialize<System.Object[]>(reader);
            dataTable.Rows.Add(dr);

            reader.Read();
        }

        reader.Read();

        return dataTable;
    }

    private static Dictionary<string, string> GetColumnDataTypes(DataTable dt)
    {
        Dictionary<string, string> columnTypes = new Dictionary<string, string>();
        foreach (DataColumn column in dt.Columns)
            columnTypes.Add(column.ColumnName, column.DataType.FullName);

        return columnTypes;
    }

    /// <summary>
    /// Determines whether this instance can convert the specified value type.
    /// </summary>
    /// <param name="valueType">Type of the value.</param>
    /// <returns>
    ///     <c>true</c> if this instance can convert the specified value type; otherwise, <c>false</c>.
    /// </returns>
    public override bool CanConvert(Type valueType)
    {
        return typeof(DataTable).IsAssignableFrom(valueType);
    }
}
Sadegh
  • 639
  • 1
  • 5
  • 14