3

I have a JSON file with a single field that takes a huge amount of space when loaded into memory. The other fields are reasonable, but I'm trying to take care not to load that particular field unless I absolutely have to.

{
    "Field1": "value1",
    "Field2": "value2",
    "Field3": "a very very long string that potentially takes a few GB of memory"
}

When reading that file into memory, I'd want to ignore Field3 (because loading it could crash my app). Here's some code that I would assume does that because it uses io streams rather than passing a []byte type to the Unmarshal command.

package main

import (
    "encoding/json"
    "os"
)

func main() {
    type MyStruct struct {
        Field1 string
        Field2 string
    }
    fi, err := os.Open("myJSONFile.json")
    if err != nil {
        os.Exit(2)
    }
    // create an instance and populate
    var mystruct MyStruct
    err = json.NewDecoder(fi).Decode(&mystruct)
    if err != nil {
        os.Exit(2)
    }
    // do some other stuff
}

The issue is that the built-in json.Decoder type reads the entire file into memory on Decode before throwing away key-values that don't match the struct's fields (as has been pointed out on StackOverflow before: link).

Are there any ways of decoding JSON in Go without keeping the entire JSON object in memory?

Community
  • 1
  • 1
eric chiang
  • 2,575
  • 2
  • 20
  • 23

1 Answers1

2

You could write a custom io.Reader that you feed to json.Decoder and that will pre-read your json file and skip that specific field.

The other option is to write your own decoder, more complicated and messy.

//edit it seemed like a fun exercise, so here goes:

type IgnoreField struct {
    io.Reader
    Field string
    buf   bytes.Buffer
}

func NewIgnoreField(r io.Reader, field string) *IgnoreField {
    return &IgnoreField{
        Reader: r,
        Field:  field,
    }
}
func (iF *IgnoreField) Read(p []byte) (n int, err error) {
    if n, err = iF.Reader.Read(p); err != nil {
        return
    }
    s := string(p)
    fl := `"` + iF.Field + `"`
    if i := strings.Index(s, fl); i != -1 {
        l := strings.LastIndex(s[0:i], ",")
        if l == -1 {
            l = i
        }
        iF.buf.WriteString(s[0:l])

        s = s[i+1+len(fl):]
        i = strings.Index(s, `"`)
        if i != -1 {
            s = s[i+1:]
        }
        for {
            i = strings.Index(s, `"`) //end quote
            if i != -1 {
                s = s[i+1:]
                fmt.Println("Skipped")
                break
            } else {
                if n, err = iF.Reader.Read(p); err != nil {
                    return
                }
                s = string(p)
            }
        }
        iF.buf.WriteString(s)
    }
    ln := iF.buf.Len()
    if ln >= len(p) {
        tmp := iF.buf.Bytes()
        iF.buf.Reset()
        copy(p, tmp[0:len(p)])
        iF.buf.Write(p[len(p):])
        ln = len(p)
    } else {
        copy(p, iF.buf.Bytes())
        iF.buf.Reset()
    }
    return ln, nil
}

func main() {
    type MyStruct struct {
        Field1 string
        Field2 string
    }
    fi, err := os.Open("myJSONFile.json")
    if err != nil {
         os.Exit(2)
    }
    // create an instance and populate
    var mystruct MyStruct
    err := json.NewDecoder(NewIgnoreField(fi, "Field3")).Decode(&mystruct)
    if err != nil {
        fmt.Println(err)
    }
    fmt.Println(mystruct)
}

playground

OneOfOne
  • 95,033
  • 20
  • 184
  • 185
  • Wow, didn't expect that. Thanks a ton! – eric chiang Jul 28 '14 at 20:14
  • You're welcome. Just few things, it's not fully tested of course, and `i = strings.Index(s, `"`) //end quote` will explode if `"` is anywhere inside your `Field3` value, even if it's skipped like `\"`. – OneOfOne Jul 28 '14 at 20:18
  • Yeah, I noticed that. Definitely did plan to test it a bit first. Best not to copy/paste code from stackoverflow :) – eric chiang Jul 28 '14 at 20:23