How to load JTable with 2 million rows

Question

I am writing an application that uses a JTable to display lines of a log file. I have the data parsed, but when I try to add the rows to my AbstractTableModel I receive either a "gc overhead limit exceeded" or "java.lang.OutOfMemoryError: Java heap space"error. Is there to configure the garbage collector or change my AbstractTableModel to allow me to load the needed rows?

package gui;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import javax.swing.table.AbstractTableModel;
import saxxmlparse.logEvent;

/**
 *
 * @author David.Crosser
 */
public class MyTableModel extends AbstractTableModel {

    private String[] columnNames = new String[]{"Type", "Time", "TID", "LID", "User", "Message", "Query", "Protocol", "Port", "IP", "Error"};
    private List<logEvent> data;

    public MyTableModel() {
        data = new ArrayList<>(25);
    }

    @Override
    public Class<?> getColumnClass(int columnIndex) {
        if (columnIndex == 1) {
            //return Date.class;
                        return String.class;

        } else {
            return String.class;
        }
    }

    @Override
    public String getColumnName(int col) {
        return columnNames[col];
    }

    @Override
    public int getColumnCount() {
        return columnNames.length;
    }

    @Override
    public int getRowCount() {
        return data.size();
    }

    @Override
    public Object getValueAt(int row, int col) {
        logEvent value = data.get(row);
        Object retObj=null;
        switch (col) {
            case 0:
                retObj = value.getType();
                break;
            case 1:
                retObj = value.getTime();
                break;
            case 2:
                retObj = value.getTid();
                break;
                case 3:
                retObj = value.getLid();
                break;
                case 4:
                retObj = value.getUser();
                break;
                 case 5:
                retObj = value.getMsg();
                break;
                 case 6:
                retObj = value.getQuery();
                break;
                  case 7:
                retObj = value.getProtocol();
                break;
                 case 8:
                retObj = value.getPort();
                break;
                 case 9:
                retObj = value.getIp();
                break;
                 case 10:
                retObj = "N";
                break;
        }
        return retObj;
    }

    public void addRow(logEvent value) {
        int rowCount = getRowCount();
        data.add(value);
        fireTableRowsInserted(rowCount, rowCount);
    }

    public void addRows(logEvent... value) {
        addRows(Arrays.asList(value));
    }

    public void addRows(List<logEvent> rows) {
        int rowCount = getRowCount();
        data.addAll(rows);
        fireTableRowsInserted(rowCount, getRowCount() - 1);
    }
}

    package gui;

import java.sql.ResultSet;
import java.util.List;
import javax.swing.SwingWorker;
import saxxmlparse.logEvent;

/**
 *
 * @author David.Crosser
 */
public class TableSwingWorker extends SwingWorker<MyTableModel, logEvent> {

    private final MyTableModel tableModel;
    String query;
    dataBase.Database db;
    int totalRows=0;

    public TableSwingWorker(dataBase.Database db, MyTableModel tableModel, String query) {

        this.tableModel = tableModel;
        this.query = query;
        this.db = db;
    }

    @Override
    protected MyTableModel doInBackground() throws Exception {

        // This is a deliberate pause to allow the UI time to render
        Thread.sleep(2000);

        ResultSet rs = db.queryTable(query);

        System.out.println("Start polulating");

        while (rs.next()) {

            logEvent data = new logEvent();

            for (int i = 0; i <= tableModel.getColumnCount(); i++) {
                switch (i) {
                    case 0:
                        data.setType((String)rs.getObject(i+1));
                        break;
                    case 1:
                        data.setTime((String)rs.getObject(i+1));
                        break;
                    case 2:
                        data.setTid((String)rs.getObject(i+1));
                        break;
                    case 3:
                        data.setLid((String)rs.getObject(i+1));
                        break;
                    case 4:
                        data.setUser((String)rs.getObject(i+1));
                        break;
                    case 5:
                        data.setMsg((String)rs.getObject(i+1));
                        break;
                    case 6:
                        data.setQuery((String)rs.getObject(i+1));
                        break;
                    case 7:
                        data.setProtocol((String)rs.getObject(i+1));
                        break;
                    case 8:
                        data.setPort((String)rs.getObject(i+1));
                        break;
                    case 9:
                        data.setIp((String)rs.getObject(i+1));
                        break;
                    case 10:
                        data.setError((String)rs.getObject(i+1));
                        break;
                }
            }
            publish(data);

            Thread.yield();
        }
        return tableModel;
    }

    @Override
    protected void process(List<logEvent> chunks) {
        totalRows += chunks.size();
        System.out.println("Adding " + chunks.size() + " rows --- Total rows:" + totalRows);
        tableModel.addRows(chunks);
    }
}

Load a few to view at a time. Or configure the JVM to have access to more memory. See http://stackoverflow.com/questions/6452765/how-to-increase-heap-size-of-jvm — bradimus, Nov 10 '16 at 21:25
Why was my previous comment deleted? Comments like "+1 thanks" are off-topic, as are meta discussions about the merits of commenting (that should be a Q in meta), but that was not what I did. I gave a useful notice that this is a common issue. The comment was therefore about the type of problem at hand and its place in computer science; someone who misunderstands this could benefit from such a comment. 2 people might have benefited from that. I have read SO's pages on what is appropriate. If you disagree with the content enough to delete, you should enlighten us with the reasoning. — Loduwijk, Nov 10 '16 at 23:47
See also this related [example](http://stackoverflow.com/a/25526869/230513). — trashgod, Nov 11 '16 at 08:59

Loduwijk · Answer 1 · 2016-11-10T21:43:11.687

My answer will be applicable to the general type of problem where you need to work on a very large data set, not just your specific "2 million rows in a table" problem.

When you have the problem where the data you need to operate on is larger than some container (in your case, more memory than your system physically has, but this can apply to any data larger than its container - physical, virtual, logical, or otherwise), you need to create a mechanism for streaming only the data you need at any given time, and possibly slightly more if you want a buffer.

For example, if you want to be able to show 10 rows in your table, and the data set is way too large, then you need to create a table model which knows about the 10 rows that are currently being displayed, and have it swap that data out with what it needs when the view changes. So create a table model which holds the 10 records, not 2 million. Or for the optional buffering I mentioned, make the model hold 30 records; the 10 records in the view, and the 10 before and after it so that you can immediately change the data as the user scrolls the table so that small scrollbar increments are highly responsive - then the problem of streaming data on the fly is only apparent when the user scrolls very far very fast (ie: click the scrollbar "thumb" and drag it immediately from top to bottom).

This would be the same way compression algorithms compress/decompress 100GB of data; that's not all in memory at once. Or how software which is backed by tape drives work (they have no choice since it's not random-access). Or, the example almost everyone is familiar with: this is how online video streaming works. Think of YouTube and the loading bar at the bottom of the video with its grey buffer zone; if you "fast-forward" to a time in that buffer zone it often switches immediately, but if you change to a time past that the video might stop for a second while it loads the next frame (and then it buffers more). This is how an enormous table works too, except that you are "streaming" to the data model from either memory or from disk, the stream source and destination being in the same process. Otherwise, same idea.

How to load JTable with 2 million rows

1 Answers1

Linked

Related