Using JSON.NET I am reading JSON objects in an array from a large file. As the JSON object is read, it is conditionally converted to the destination class, and returned as an item in an IEnumerable.
I use an IEnumerable to allow me to "pull" objects from the file and process them as they are read, avoiding having to read all objects into memory.
I use a similar technique when reading rows from a CSV file, where I use CsvHelper ShouldSkipRecord() to conditionally process the row in the CSV file.
I have not found a way to filter the JSON object as it is read from the array, and I end up using LINQ Where to filter the objects before they are converted and added to the IEnumerable. Problem is that the Where clause reads all the objects into memory, defeating the purpose of using IEnumerable.
I know I can manually read each object, and then process them, but I am looking for a more elegant way to have a form of callback that will allow me to pull records and the callback filter records I do not want.
E.g. how I filter rows in a CSV file:
internal static bool ShouldSkipRecord(string[] fields)
{
// Skip rows with incomplete data
// 2019-01-24 20:46:57 UTC,63165,4.43,6.23,6.80,189,-18,81.00,16.00,6.23
// 2019 - 01 - 24 20:47:40 UTC,63166,4.93,5.73,5.73,0,-20,,,5.73
if (fields.Length < 10)
return true;
// Temperature and humidity is optional, air quality is required
if (string.IsNullOrEmpty(fields[9]))
return true;
return false;
}
E.g. how I filter JSON objects:
internal static PurpleAirData Convert(Feed jsonData)
{
PurpleAirData data = new PurpleAirData()
{
TimeStamp = jsonData.CreatedAt.DateTime,
AirQuality = Double.Parse(jsonData.Field8)
};
// Temperature and humidity is optional
if (double.TryParse(jsonData.Field6, out double val))
data.Temperature = val;
if (double.TryParse(jsonData.Field7, out val))
data.Humidity = val;
return data;
}
internal static IEnumerable<PurpleAirData> Load(JsonTextReader jsonReader)
{
// Deserialize objects in parts
jsonReader.SupportMultipleContent = true;
JsonSerializer serializer = new JsonSerializer();
// Read Channel
// TODO : Add format checking
jsonReader.Read();
jsonReader.Read();
jsonReader.Read();
Channel channel = serializer.Deserialize<Channel>(jsonReader);
// Read the Feeds
jsonReader.Read();
jsonReader.Read();
// TODO : The Where results in a full in-memory iteration defeating the purpose of the streaming iteration
return serializer.Deserialize<List<Feed>>(jsonReader).Where(feed => !string.IsNullOrEmpty(feed.Field8)).Select(Convert);
}
Example JSON:
{
"channel":{
"id":622370,
"name":"AirMonitor_e81a",
"latitude":"0.0",
"longitude":"0.0",
"field1":"PM1.0 (ATM)",
"field2":"PM2.5 (ATM)",
"field3":"PM10.0 (ATM)",
"field4":"Uptime",
"field5":"RSSI",
"field6":"Temperature",
"field7":"Humidity",
"field8":"PM2.5 (CF=1)",
"created_at":"2018-11-09T00:35:34Z",
"updated_at":"2018-11-09T00:35:35Z",
"last_entry_id":65435
},
"feeds":[
{
"created_at":"2019-01-10T23:56:09Z",
"entry_id":56401,
"field1":"1.00",
"field2":"1.80",
"field3":"1.80",
"field4":"369",
"field5":"-30",
"field6":"66.00",
"field7":"59.00",
"field8":"1.80"
},
{
"created_at":"2019-01-10T23:57:29Z",
"entry_id":56402,
"field1":"1.08",
"field2":"2.44",
"field3":"3.33",
"field4":"371",
"field5":"-32",
"field6":"66.00",
"field7":"59.00",
"field8":"2.44"
},
{
"created_at":"2019-01-26T00:14:04Z",
"entry_id":64400,
"field1":"0.27",
"field2":"0.95",
"field3":"1.25",
"field4":"213",
"field5":"-27",
"field6":"72.00",
"field7":"40.00",
"field8":"0.95"
}
]
}
Example JSON:
[
{
"monthlyrainin": 0.01,
"humidityin": 42,
"eventrainin": 0,
"humidity": 29,
"maxdailygust": 20.13,
"dateutc": 1549476900000,
"battout": "1",
"lastRain": "2019-02-05T19:21:00.000Z",
"dailyrainin": 0,
"tempf": 52.2,
"winddir": 286,
"totalrainin": 0.01,
"dewPoint": 20.92,
"baromabsin": 29.95,
"hourlyrainin": 0,
"feelsLike": 52.2,
"yearlyrainin": 0.01,
"uv": 1,
"weeklyrainin": 0.01,
"solarradiation": 157.72,
"windspeedmph": 0,
"tempinf": 73.8,
"windgustmph": 0,
"battin": "1",
"baromrelin": 30.12,
"date": "2019-02-06T18:15:00.000Z"
},
{
"dewPoint": 20.92,
"tempf": 52.2,
"maxdailygust": 20.13,
"humidityin": 42,
"windspeedmph": 4.03,
"eventrainin": 0,
"tempinf": 73.6,
"feelsLike": 52.2,
"dateutc": 1549476600000,
"windgustmph": 4.92,
"hourlyrainin": 0,
"monthlyrainin": 0.01,
"battin": "1",
"humidity": 29,
"totalrainin": 0.01,
"baromrelin": 30.12,
"winddir": 314,
"lastRain": "2019-02-05T19:21:00.000Z",
"yearlyrainin": 0.01,
"baromabsin": 29.94,
"dailyrainin": 0,
"battout": "1",
"uv": 1,
"solarradiation": 151.86,
"weeklyrainin": 0.01,
"date": "2019-02-06T18:10:00.000Z"
}]
Is there a way in JSON.NET to filter objects as they are read?