If you have already loaded your JSON array into memory as, say, a List<JToken>
, you can write it to newline delimited JSON by using the answer from Serialize as NDJSON using Json.NET.
However, since BigQuery newline delimited JSON files do tend to be... big, I'm going to suggest instead an entirely streaming solution:
public static class JsonExtensions
{
public static void ToNewlineDelimitedJson(Stream readStream, Stream writeStream)
{
var encoding = new UTF8Encoding(false, true);
// Let caller dispose the underlying streams.
using (var textReader = new StreamReader(readStream, encoding, true, 1024, true))
using (var textWriter = new StreamWriter(writeStream, encoding, 1024, true))
{
ToNewlineDelimitedJson(textReader, textWriter);
}
}
public static void ToNewlineDelimitedJson(TextReader textReader, TextWriter textWriter)
{
using (var jsonReader = new JsonTextReader(textReader) { CloseInput = false, DateParseHandling = DateParseHandling.None })
{
ToNewlineDelimitedJson(jsonReader, textWriter);
}
}
enum State { BeforeArray, InArray, AfterArray };
public static void ToNewlineDelimitedJson(JsonReader jsonReader, TextWriter textWriter)
{
var state = State.BeforeArray;
do
{
if (jsonReader.TokenType == JsonToken.Comment || jsonReader.TokenType == JsonToken.None || jsonReader.TokenType == JsonToken.Undefined || jsonReader.TokenType == JsonToken.PropertyName)
{
// Do nothing
}
else if (state == State.BeforeArray && jsonReader.TokenType == JsonToken.StartArray)
{
state = State.InArray;
}
else if (state == State.InArray && jsonReader.TokenType == JsonToken.EndArray)
{
state = State.AfterArray;
}
else
{
// Formatting.None is the default; I set it here for clarity.
using (var jsonWriter = new JsonTextWriter(textWriter) { Formatting = Formatting.None, CloseOutput = false })
{
jsonWriter.WriteToken(jsonReader);
}
// http://specs.okfnlabs.org/ndjson/
// Each JSON text MUST conform to the [RFC7159] standard and MUST be written to the stream followed by the newline character \n (0x0A).
// The newline charater MAY be preceeded by a carriage return \r (0x0D). The JSON texts MUST NOT contain newlines or carriage returns.
textWriter.Write("\n");
// Root value wasn't an array after all, so end writing with one item.
if (state == State.BeforeArray)
state = State.AfterArray;
}
}
while (jsonReader.Read() && state != State.AfterArray);
}
}
Then use it as follows:
using (var readStream = File.OpenRead(fromFileName))
using (var writeStream = File.Open(toFileName, FileMode.Create))
{
JsonExtensions.ToNewlineDelimitedJson(readStream, writeStream);
}
This takes advantage of the method JsonWriter.WriteToken(JsonReader)
to write and format directly from a JsonReader
to a JsonWriter
without ever loading the entire JSON token hierarchy into memory.
Working sample .Net fiddle.