Your basic requirement is to transform JSON that contains a property "fileContent": "...base64..."
to "fileRoute": "/route/to/file"
while also writing the value of fileContent
out into a separate binary file without materializing the value of fileContent
as a complete string.
It's unclear whether this can be done with .NET Core 3.1 implementation of System.Text.Json
. Even if it could, it wouldn't be easy. Simply generating a Utf8JsonReader
from a Stream
requires work, see Parsing a JSON file with .NET core 3.0/System.text.Json. Having done so, there is a method Utf8JsonReader.ValueSequence
that returns the raw value of the last processed token as a ReadOnlySequence<byte>
slice of the input payload. However, the method doesn't seem easy to use, as it works only when the token is contained within multiple segments, doesn't guarantee the value is well-formed, and doesn't unescape JSON escape sequences.
And Newtonsoft won't work at all here because JsonTextReader
always fully materializes each primitive string value.
As an alternative, you might consider the readers and writers returned by JsonReaderWriterFactory
. These readers and writers are used by DataContractJsonSerializer
and translate JSON to XML on-the-fly as it is being read and written. Since the base classes for these readers and writers are XmlReader
and XmlWriter
, they support reading string values in chunks via XmlReader.ReadValueChunk(Char[], Int32, Int32)
. Even better, they support reading Base64 binary values in chunks via XmlReader.ReadContentAsBase64(Byte[], Int32, Int32)
.
Given these readers and writers, we can use a streaming transformation algorithm to transform the fileContent
node(s) to fileRoute
nodes, while simultaneously extracting the Base64 binary into separate binary files.
First, introduce the following XML streaming transformation methods, based loosely on Combining the XmlReader and XmlWriter classes for simple streaming transformations by Mark Fussell and this answer to Automating replacing tables from external files:
public static class XmlWriterExtensions
{
// Adapted from this answer https://stackoverflow.com/a/28903486
// to https://stackoverflow.com/questions/28891440/automating-replacing-tables-from-external-files/
// By https://stackoverflow.com/users/3744182/dbc
/// <summary>
/// Make a DEEP copy of the current xmlreader node to xmlwriter, allowing the caller to transform selected elements.
/// </summary>
/// <param name="writer"></param>
/// <param name="reader"></param>
/// <param name="shouldTransform"></param>
/// <param name="transform"></param>
public static void WriteTransformedNode(this XmlWriter writer, XmlReader reader, Predicate<XmlReader> shouldTransform, Action<XmlReader, XmlWriter> transform)
{
if (reader == null || writer == null || shouldTransform == null || transform == null)
throw new ArgumentNullException();
int d = reader.NodeType == XmlNodeType.None ? -1 : reader.Depth;
do
{
if (reader.NodeType == XmlNodeType.Element && shouldTransform(reader))
{
using (var subReader = reader.ReadSubtree())
{
transform(subReader, writer);
}
// ReadSubtree() places us at the end of the current element, so we need to move to the next node.
reader.Read();
}
else
{
writer.WriteShallowNode(reader);
}
}
while (!reader.EOF && (d < reader.Depth || (d == reader.Depth && reader.NodeType == XmlNodeType.EndElement)));
}
/// <summary>
/// Make a SHALLOW copy of the current xmlreader node to xmlwriter, and advance the XML reader past the current node.
/// </summary>
/// <param name="writer"></param>
/// <param name="reader"></param>
public static void WriteShallowNode(this XmlWriter writer, XmlReader reader)
{
// Adapted from https://learn.microsoft.com/en-us/archive/blogs/mfussell/combining-the-xmlreader-and-xmlwriter-classes-for-simple-streaming-transformations
// By Mark Fussell https://learn.microsoft.com/en-us/archive/blogs/mfussell/
// and rewritten to avoid using reader.Value, which fully materializes the text value of a node.
if (reader == null)
throw new ArgumentNullException("reader");
if (writer == null)
throw new ArgumentNullException("writer");
switch (reader.NodeType)
{
case XmlNodeType.None:
// This is returned by the System.Xml.XmlReader if a Read method has not been called.
reader.Read();
break;
case XmlNodeType.Element:
writer.WriteStartElement(reader.Prefix, reader.LocalName, reader.NamespaceURI);
writer.WriteAttributes(reader, true);
if (reader.IsEmptyElement)
{
writer.WriteEndElement();
}
reader.Read();
break;
case XmlNodeType.Text:
case XmlNodeType.Whitespace:
case XmlNodeType.SignificantWhitespace:
case XmlNodeType.CDATA:
case XmlNodeType.XmlDeclaration:
case XmlNodeType.ProcessingInstruction:
case XmlNodeType.EntityReference:
case XmlNodeType.DocumentType:
case XmlNodeType.Comment:
//Avoid using reader.Value as this will fully materialize the string value of the node. Use WriteNode instead,
// it copies text values in chunks. See: https://referencesource.microsoft.com/#system.xml/System/Xml/Core/XmlWriter.cs,368
writer.WriteNode(reader, true);
break;
case XmlNodeType.EndElement:
writer.WriteFullEndElement();
reader.Read();
break;
default:
throw new XmlException(string.Format("Unknown NodeType {0}", reader.NodeType));
}
}
}
public static partial class XmlReaderExtensions
{
// Taken from this answer https://stackoverflow.com/a/54136179/3744182
// To https://stackoverflow.com/questions/54126687/xmlreader-how-to-read-very-long-string-in-element-without-system-outofmemoryex
// By https://stackoverflow.com/users/3744182/dbc
public static bool CopyBase64ElementContentsToFile(this XmlReader reader, string path)
{
using (var stream = File.Create(path))
{
byte[] buffer = new byte[8192];
int readBytes = 0;
while ((readBytes = reader.ReadElementContentAsBase64(buffer, 0, buffer.Length)) > 0)
{
stream.Write(buffer, 0, readBytes);
}
}
return true;
}
}
Next, using JsonReaderWriterFactory
, introduce the following method(s) to stream from one JSON file to another, rewriting fileContent
nodes as required:
public static class JsonPatchExtensions
{
public static string[] PatchFileContentToFileRoute(string oldJsonFileName, string newJsonFileName, FilenameGenerator generator)
{
var newNames = new List<string>();
using (var inStream = File.OpenRead(oldJsonFileName))
using (var outStream = File.Open(newJsonFileName, FileMode.Create))
using (var xmlReader = JsonReaderWriterFactory.CreateJsonReader(inStream, XmlDictionaryReaderQuotas.Max))
using (var xmlWriter = JsonReaderWriterFactory.CreateJsonWriter(outStream))
{
xmlWriter.WriteTransformedNode(xmlReader,
r => r.LocalName == "fileContent" && r.NamespaceURI == "",
(r, w) =>
{
r.MoveToContent();
var name = generator.GenerateNewName();
r.CopyBase64ElementContentsToFile(name);
w.WriteStartElement("fileRoute", "");
w.WriteAttributeString("type", "string");
w.WriteString(name);
w.WriteEndElement();
newNames.Add(name);
});
}
return newNames.ToArray();
}
}
public abstract class FilenameGenerator
{
public abstract string GenerateNewName();
}
// Replace the following with whatever algorithm you need to generate unique binary file names.
public class IncrementalFilenameGenerator : FilenameGenerator
{
readonly string prefix;
readonly string extension;
int count = 0;
public IncrementalFilenameGenerator(string prefix, string extension)
{
this.prefix = prefix;
this.extension = extension;
}
public override string GenerateNewName()
{
var newName = Path.ChangeExtension(prefix + (++count).ToString(), extension);
return newName;
}
}
Then call it as follows:
var binaryFileNames = JsonPatchExtensions.PatchFileContentToFileRoute(
oldJsonFileName,
newJsonFileName,
// Replace the following with your actual binary file name generation algorithm
new IncrementalFilenameGenerator("Question59839437_fileContent_", ".bin"));
Sources:
Demo fiddle here.