4

I keep getting Java heap space error when storing JSON object to file. The input file is 180 MB.

I am creating a JSON Object which consist of JSON array of JSON Objects. I always come across this error when working on files which are not that large.
What would be the best method to store such objects to file ?

public class ProcessData {

    public static void createJson(String path) throws Exception 
    {
        BufferedReader br = new BufferedReader(new FileReader(path));
        FileWriter fw = new FileWriter("restaurants.json");

        try 
        {
            JSONObject storeObj = new JSONObject();
            JSONArray restaurantArray = new JSONArray();

            String line;
            StringBuilder sb = new StringBuilder();

            while ((line = br.readLine()) != null) 
            {
                JSONObject obj = new JSONObject();
                String[] vals = line.split("\\t");
                obj.put("id", vals[0]);
                String[] tempvals = vals[1].split("\\|");
                String[] details = tempvals[0].split(",");

                obj.put("name", details[0]);
                sb.setLength(0);
                sb.append(details[2]);
                sb.append(", ");
                sb.append(details[3]);
                sb.append(", ");
                sb.append(details[1]);
                sb.append(", ");
                sb.append(details[4]);

                String address = sb.toString();
                address.replace("\\s+", " ");
                obj.put("address", address);
                JSONArray arr = new JSONArray();

                for (int i = 0; i < tempvals.length; ++i) 
                {
                    JSONObject objtemp = new JSONObject();
                    details = tempvals[i].split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");

                    objtemp.put("inspectionDate", details[7]);
                    objtemp.put("code", details[9]);
                    objtemp.put("description", details[10]);
                    objtemp.put("score", details[12]);
                    objtemp.put("grade", details[13]);
                    objtemp.put("gradeDate", details[14]);
                    arr.add(objtemp);
                }
                obj.put("violationDetails", arr);
                restaurantArray.add(obj);
          }
          storeObj.put("restaurants", restaurantArray);
          fw.write(storeObj.toString());
        } 
        catch (Exception e) 
        {
            e.printStackTrace();
        } 
        finally 
        {
            fw.flush();
            fw.close();
            br.close();
        }
    }

    public static void main(String args[]) throws Exception {
        try{
        createJson("output/part-r-00000");
        }
        catch(Exception e){
            e.printStackTrace();
        }
    }
}
Himanshu
  • 4,327
  • 16
  • 31
  • 39
Viraj
  • 777
  • 1
  • 13
  • 32
  • Best to use a library that offers a streaming API. See http://stackoverflow.com/questions/444380/is-there-a-streaming-api-for-json – WW. Apr 15 '15 at 02:58
  • @WW. Yes. I am using simple JSON already. – Viraj Apr 15 '15 at 04:17
  • 4
    Why are you loading the whole JSON object into memory instead of writing each individual "JSONObject obj" into the file. You can start by writing first "[", then a "," between each obj write. Let me know if you want an example of how to do so. Also, if you needed to load the whole thing in memory, are you using Xmx option to increase your memory heap – faljbour Apr 15 '15 at 05:01
  • @faljbour Yes. your method worked. Yes, I am also using the Xmx option to increase my VM memory but still doesn't suffice. – Viraj Apr 15 '15 at 15:29
  • @Viraj can you please provide sample data of your file ? – Yagnesh Agola Apr 16 '15 at 05:02
  • @JavaDev It is an output from a mapreduce job which consist of the following fields: `key(Long) \t listOfRestaurantInspectionResults(String)`. If you want I can share a sample but each key has a lengthy list of chars. – Viraj Apr 16 '15 at 05:11
  • @Viraj just post a sample which contain less result if possible – Yagnesh Agola Apr 16 '15 at 05:21
  • @Viraj please find my answer below may this will help you. – Yagnesh Agola Apr 16 '15 at 06:15

1 Answers1

1

I have tried with removing crete JSONObject part from your function and create your JSON string manually and write it to file directly without buffering into your object.

Below is the updated code of your function :

public static void createJson(String path) throws Exception 
{
    BufferedReader br = new BufferedReader(new FileReader(path));
    FileWriter fw = new FileWriter("restaurants.json");
    try 
    {
        String line;
        StringBuilder sb = new StringBuilder();

        fw.write("{\"restaurants\":");
        line = br.readLine();
        while (line != null) 
        {
            fw.write("[{");
            String[] vals = line.split("\\t");

            fw.write("{");
            fw.write("\"id\":\""+vals[0]+"\",");

            String[] tempvals = vals[1].split("\\|");
            String[] details = tempvals[0].split(",");

            fw.write("\"name\":\""+details[0]+"\",");

            sb.setLength(0);
            sb.append(details[2]);
            sb.append(", ");
            sb.append(details[3]);
            sb.append(", ");
            sb.append(details[1]);
            sb.append(", ");
            sb.append(details[4]);

            String address = sb.toString();
            address.replace("\\s+", " ");

            fw.write("\"address\":\""+address+"\",");

            fw.write("\"violationDetails\":[");
            StringBuilder sbStr = new StringBuilder();
            for (int i = 0; i < tempvals.length; ++i) 
            {
                details = tempvals[i].split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");

                sbStr.append("{");
                sbStr.append("\"inspectionDate\":\""+details[7]+"\",");
                sbStr.append("\"code\":\""+details[9]+"\",");
                sbStr.append("\"description\":\""+details[10]+"\",");
                sbStr.append("\"score\":\""+details[12]+"\",");
                sbStr.append("\"grade\":\""+details[13]+"\",");
                sbStr.append("\"gradeDate\":\""+details[14]+"\",");
                sbStr.append("},");
            }
            if(tempvals.length>0)
                sbStr.substring(0, sbStr.length()-1);

            fw.write(sbStr.toString()+"]");

            line = br.readLine();
            if(line != null)
                fw.write("}],");
            else
                fw.write("}]");
        }
        fw.write("}");
    } 
    catch (Exception e) 
    {
        e.printStackTrace();
    } 
    finally 
    {
        fw.flush();
        fw.close();
        br.close();
    }
}

I haven't checked with real input file data so please check with your correct data.

I hope this will solved your problem.

May your JSON output in file look like this:

{ "restaurants": [ { "id": "id", "name": "name", "address": "Address", "violationDetails": [ { "inspectionDate": "details7", "code": "details9", "description": "details10", "score": "details12", "grade": "details13", "gradeDate": "details14" } ] } ] }

Yagnesh Agola
  • 4,556
  • 6
  • 37
  • 50