0

The script below fetches each row from table (q_iTable) having 2 billion records in database db1, gets two column values (iIndex and pIndex) and modifies them into json data structure. This modified data is then inserted into iTable in db2. Also, the values of columns from the two tables are checked and the update or insert of iTable in db2 is done accordingly.

This transfer of data from q_iTable to iTable is currently being done at a rate of 700 records/minute which will basically takes years to transfer the entire 2 billion record. I would like to know how I can make this process quick such that the entire 2 billion record can be transferred faster. Any help/guidance would be highly appreciated!

public class Transfer {
    public static void main(String[] args) throws SQLException {
        Logger log = LoggerFactory.getLogger(Transfer.class);
        Connection con = null;
        PreparedStatement stat = null;
        ResultSet results = null;
        Connection con2 =null;
        Statement stmt2 = null;
        try {
            con = DriverManager.getConnection("jdbc:mysql:ip_address/port", "username", "password");
            con.setAutoCommit(false);
            stat = con.prepareStatement(
                    "SELECT * FROM db1.q_iTable;",
                    ResultSet.TYPE_FORWARD_ONLY,
                    ResultSet.CONCUR_READ_ONLY);
            stat.setFetchSize(Integer.MIN_VALUE);
            results = stat.executeQuery();

            con2 = DriverManager.getConnection("jdbc:mysql:ip_address/port", "username", "password");
            stmt2 = con2.createStatement();
            while (results.next()) {
                String iSerial = results.getString("iSerial");
                String iIndex = results.getString("iIndex");
                String pIndex = results.getString("pIndex");

                JSONObject jsonSubObject = null;
                JSONArray jsonArrayRET = new JSONArray();
                jsonSubObject = new JSONObject();
                jsonSubObject.put("iIndex", iIndex);
                jsonSubObject.put("pIndex", pIndex);

                jsonArrayRET.add(jsonSubObject);

                String properties = jsonArrayRET.toString();

                String prevSQL = "SELECT * FROM db2.iTable ORDER BY id DESC LIMIT 1;";
                ResultSet rs = stmt2.executeQuery( prevSQL );
                if(rs.next()) {
                    String previSerial = rs.getString("iSerial");
                    String previIndex = rs.getString("iIndex");
                    String prevpIndex = rs.getString("pIndex");

                    if((previSerial.equals(iSerial)) && (previIndex.equals(iIndex))) {
                        String query = "update db2.iTable set iIndex=\""+iIndex+"\" ORDER BY id DESC LIMIT 1;";
                        stmt2.executeUpdate(query);
                    } else {
                        String query = "insert into db2.iTable values(" + NULL + ", \'" + iSerial + "\', \'" + properties + "\');";
                        stmt2.executeUpdate(query);
                    }
                } else  {
                    String query = "insert into db2.iTable values(" + NULL + ", \'" + iSerial + "\', \'" + properties + "\');";
                    stmt2.executeUpdate(query);
                }
            }
        } catch (SQLException e) {
                throw new IllegalStateException("cannot connect", e);
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (ProcessingException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            results.close();
            stat.close();
            con.close();
            con2.close();
            stmt2.close();
        }
    }
}

EDIT with incorporating addBatch() and executeBatch():

I am using addBatch() and executeBatch() but I am not observing much of a difference with the data transfer rate. Any suggestions?

public class Transfer {
    public static void main(String[] args) throws SQLException {
        Logger log = LoggerFactory.getLogger(Transfer.class);
        Connection con = null;
        PreparedStatement stat = null;
        ResultSet results = null;
        Connection con2 =null;
        Statement stmt2 = null;
        try {
            con = DriverManager.getConnection("jdbc:mysql:ip_address/port", "username", "password");
            con.setAutoCommit(false);
            stat = con.prepareStatement(
                    "SELECT * FROM db1.q_iTable;",
                    ResultSet.TYPE_FORWARD_ONLY,
                    ResultSet.CONCUR_READ_ONLY);
            stat.setFetchSize(Integer.MIN_VALUE);
            results = stat.executeQuery();

            con2 = DriverManager.getConnection("jdbc:mysql:ip_address/port", "username", "password");
            stmt2 = con2.createStatement();
            while (results.next()) {
                int count = 0;
                String iSerial = results.getString("iSerial");
                String iIndex = results.getString("iIndex");
                String pIndex = results.getString("pIndex");

                JSONObject jsonSubObject = null;
                JSONArray jsonArrayRET = new JSONArray();
                jsonSubObject = new JSONObject();
                jsonSubObject.put("iIndex", iIndex);
                jsonSubObject.put("pIndex", pIndex);

                jsonArrayRET.add(jsonSubObject);

                String properties = jsonArrayRET.toString();

                String prevSQL = "SELECT * FROM db2.iTable ORDER BY id DESC LIMIT 1;";
                ResultSet rs = stmt2.executeQuery( prevSQL );
                if(rs.next()) {
                    String previSerial = rs.getString("iSerial");
                    String previIndex = rs.getString("iIndex");
                    String prevpIndex = rs.getString("pIndex");

                    if((previSerial.equals(iSerial)) && (previIndex.equals(iIndex))) {
                        String query = "update db2.iTable set iIndex=\""+iIndex+"\" ORDER BY id DESC LIMIT 1;";
                        stmt2.addBatch(query);
                    } else {
                        String query = "insert into db2.iTable values(" + NULL + ", \'" + iSerial + "\', \'" + properties + "\');";
                        stmt2.addBatch(query);
                    }
                } else  {
                    String query = "insert into db2.iTable values(" + NULL + ", \'" + iSerial + "\', \'" + properties + "\');";
                    stmt2.addBatch(query);
                }
                if (count % 1000 == 0 ) {
                    pgStmt.executeBatch();
                    pgCon.commit();
                }
            }
        } catch (SQLException e) {
                throw new IllegalStateException("cannot connect", e);
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (ProcessingException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            results.close();
            stat.close();
            con.close();
            con2.close();
            stmt2.close();
        }
    }
}
Mark Rotteveel
  • 100,966
  • 191
  • 140
  • 197
Mooni
  • 121
  • 12
  • See https://stackoverflow.com/a/4355097/4933735 . Also see comments of this answer. – grnc Apr 18 '18 at 18:23
  • ... and also that question's [second answer](https://stackoverflow.com/a/23938071/2144390) which talks about `rewriteBatchedStatements=true` – Gord Thompson Apr 18 '18 at 18:33
  • @A.Görünücü, I tried using batch but haven't observed a big difference yet. I have added the modified code in the EDIT section above. Thanks – Mooni Apr 18 '18 at 19:46
  • Your code suggests that db2.iTable may be empty when the process starts. Will that always be the case? – Gord Thompson Apr 18 '18 at 21:17
  • 2
    You are reading `q_iTable` row-by-row which will always be the slowest way to process the data. If possible, I'd suggest you leave out JDBC/Java and process the data with [native bulk operations](https://stackoverflow.com/questions/3242504/how-can-i-transfer-data-between-2-mysql-databases), handling JSON processing with [built-in functions](https://dev.mysql.com/doc/refman/5.7/en/json-creation-functions.html) while loading the data into the target DB. This will likely be orders of magnitude faster than what you'll achieve through JDBC. – Mick Mnemonic Apr 18 '18 at 22:35
  • @GordThompson, yes that would always be the case – Mooni Apr 19 '18 at 01:23
  • @MickMnemonicI looked into it however, having for loop and if conditions would be challenging – Mooni Apr 19 '18 at 13:10

0 Answers0