0

I am trying to deserialize Json data from a web feed that is ~1.7 GB in size. I started with the following code:

public override void CreateNewOutputRows()
{

    //Set Webservice URL
    string wUrl = "webserviceURLgoeshere";

    try
    {

        RootObject outPutResponse = GetWebServiceResult(wUrl);

        foreach (Impression imp in outPutResponse.impressions)
        {

            ImpressionsSheetOutputBuffer.AddRow();
            ImpressionsSheetOutputBuffer.token = imp.token;
            ImpressionsSheetOutputBuffer.userid = imp.userid;
            ImpressionsSheetOutputBuffer.itemid = imp.itemid;
            ImpressionsSheetOutputBuffer.view = imp.view;
            ImpressionsSheetOutputBuffer.imageguid = imp.imageguid;
            ImpressionsSheetOutputBuffer.bytes = imp.bytes;
            ImpressionsSheetOutputBuffer.format = imp.format;

            ImpressionIDBuffer.AddRow();
            ImpressionIDBuffer.oid = imp.imId.oid;

            ImpressionParamsBuffer.AddRow();
            ImpressionParamsBuffer.origformat = imp.imParams.origFormat;
            ImpressionParamsBuffer.size = imp.imParams.size;

            ImpressionTimeBuffer.AddRow();
            ImpressionTimeBuffer.numLong = Int32.Parse(imp.imTime.numLong);
        }
    }

    catch (Exception e)
    {
        FailComponent(e.ToString());
    }
}

private RootObject GetWebServiceResult(string wUrl)
{

    HttpWebRequest httpWReq = (HttpWebRequest)WebRequest.Create(wUrl);
    HttpWebResponse httpWResp = (HttpWebResponse)httpWReq.GetResponse();
    RootObject jsonResponse = null;

    try
    {

        if (httpWResp.StatusCode == HttpStatusCode.OK)
        {

            Stream responseStream = httpWResp.GetResponseStream();
            string jsonString = null;

            using (StreamReader reader = new StreamReader(responseStream))
            {
                jsonString = reader.ReadToEnd();
                reader.Close();
            }

            JavaScriptSerializer sr = new JavaScriptSerializer();
            jsonResponse = sr.Deserialize<RootObject>(jsonString);

        }

        else
        {
            FailComponent(httpWResp.StatusCode.ToString());

        }
    }

    catch (Exception e)
    {
        FailComponent(e.ToString());
    }
    return jsonResponse;
}

private void FailComponent(string errorMsg)
{
    bool fail = false;
    IDTSComponentMetaData100 compMetadata = this.ComponentMetaData;
    compMetadata.FireError(1, "Error Getting Data From Webservice!", errorMsg, "", 0, out fail);

}

}

public class Id {

    public string oid { get; set; }
}

public class Params {

    public string origFormat { get; set; }
    public string size { get; set; }
}

public class Time {

    public string numLong { get; set; }
}

public class Impression {

    public Id imId { get; set; }
    public string token { get; set; }
    public string userid { get; set; }
    public string itemid { get; set; }
    public string view { get; set; }
    public string imageguid { get; set; }
    public int bytes { get; set; }
    public string format { get; set; }
    public Params imParams { get; set; }
    public Time imTime { get; set; }
}

public class RootObject {
    public List<Impression> impressions { get; set; }
}

However, the StreamReader ReadToEnd method is where the exception gets thrown, as the size of the data is too large.

I tried changing that code to the following:

Stream responseStream = httpWResp.GetResponseStream();

StreamReader reader = new StreamReader(responseStream);

using (var myjson = new JsonTextReader(reader))
{
    JsonSerializer myserialization = new JsonSerializer();
    return (List<RootObject>)myserialization.Deserialize(myjson, typeof(List<RootObject>));
}

This gives me an error that I cannot implicitly convert type List<RootObject> to RootObject. Does anyone see what I might be doing wrong that I can't make this conversion? I used this question to get around the OutOfMemory exception, but now it returns no deserialized items. Any advice would be much appreciated.

EDIT: Json data looks like the following:

{
"_id": {
    "$oid": "000000000000000000000000"
    },
"token": "00000000-0000-0000-0000-000000000000",
"userId": "username",
"itemId": "00000000-0000-0000-0000-000000000000",
"view": "view1",
"imageguid": "00000000-0000-0000-0000-000000000000",
"bytes": 1000,
"format": "PNG",
"params": {
    "originalFormat": "tif",
    "size": "50x50"
    },
"time": {
    "$numberLong": "1458748200000"
    }
}
{
"_id": {
    "$oid": "100000000000000000000000"
     },
"token": "00000000-0000-0000-0000-000000000000",
"userId": "username",
"itemId": "00000000-0000-0000-0000-000000000000",
"view": "view1",
"imageguid": "00000000-0000-0000-0000-000000000000",
"bytes": 1000,
"format": "PNG",
"params": {
    "originalFormat": "tif",
    "size": "50x50"
    },
"time": {
    "$numberLong": "1458748200000"
    }
}
Community
  • 1
  • 1
Anthony
  • 83
  • 1
  • 1
  • 7
  • 64bit app should work fine. Please make sure you are not running as x86. – Alexei Levenkov Jul 22 '16 at 20:10
  • 1
    `~1.7 GB` it appears you're running out of memory. Solution? Don't do that. How? Better design. Like what? Tons of things. Give me one. Process the file incrementally, don't try to load it all into memory. How. Depends. That's why people hire developers :/ –  Jul 22 '16 at 20:11
  • Anthony, if your new problem is "cannot implicitly convert type `List` to `RootObject`" then that should be your question, otherwise people like @Will will try to answer the "out of memory" question instead of your actual question, "why isn't this deserializing?". – Quantic Jul 22 '16 at 20:14
  • 1
    Don't try to read the whole thing at once. The whole point of a StreamReader is that you can stream the data. You don't have to get the whole thing at once. Do ReadLine or something. Consider using async/await if you need to keep your application responsive while it gets the massive file. – David Cram Jul 22 '16 at 20:18
  • Can you share what your JSON file looks like? – dbc Jul 23 '16 at 14:19
  • dbc Yes thanks, I just added a sample of what the Json data looks like. There are a couple million of these. – Anthony Jul 24 '16 at 00:06
  • In order to help you parse the JSON in a memory-efficient manner, we need to see the actual JSON including the root containers(s), not just the individual objects. Are those objects in an array, e.g. `[{"single object"}, {"single object"}, ..., {"single object"}]` ? Or are they contained in some root object? You can reduce the number of objects down to one or two, but otherwise please show enough of the file for us to be able to determine its schema. – dbc Jul 24 '16 at 09:45
  • Basically what we need is a [mcve] showing a complete example of the kind of file you are trying to parse -- only with one or two objects rather than hundreds of thousands. – dbc Jul 24 '16 at 09:58
  • dbc I added another object to the edit. They don't appear to be in an array. All I see is one object listed after another, e.g. {"single object"}{"single object"},etc. I don't see array brackets anywhere. – Anthony Jul 24 '16 at 21:19
  • 1
    Possible duplicate of [Parsing large json file in .NET](http://stackoverflow.com/q/32227436/10263) – Brian Rogers Jul 24 '16 at 21:47
  • 1
    Anthony, your example JSON is not a valid JSON because it cannot have multiple root objects. See [here](http://stackoverflow.com/questions/11639886/how-to-read-a-json-file-containing-multiple-root-elements). – Quantic Jul 25 '16 at 15:48
  • Thanks for pointing this out. – Anthony Jul 25 '16 at 17:02

1 Answers1

0

You should create some rule to how you separate each object, and serialize them seperately.

Basically you could append stream.ReadLine() 18 times (assuming all objects are written exactly like you posted)

If they aren't you should use stream.ReadLine() to count your open and closing curly brackets until you reach the end of each object and serialize them separately that way.

I am guessing there are better ways, but these are quite simple and should solve your problem...

gilmishal
  • 1,884
  • 1
  • 22
  • 37