I am trying to select around 1 million records from redshift table and then need to insert back those to redshift table (after doing some manipulations)
But, that is taking very long. I waited around 1 hour for the program to terminate but no luck. Console also does not seem to print the print statements
but seems stuck after printing few statements.
Tried the same for 100 records, it works fine and takes around 2 minutes.
This is part of my code:
conn.setAutoCommit(false);
stmt = conn.createStatement();
stmt.setFetchSize(100);
ResultSet rsSelect = stmt.executeQuery("select * from table");
System.out.println("select done !");
String queryInsert = "insert into table"
+"(event_id,domain_userid,collector_tstamp,se_category,se_action,se_label,se_property)"
+"values(?,?,?,?,?,?,?)";
PreparedStatement preparedStatement = conn.prepareStatement(queryInsert);
final int batchSize = 10000;
int count = 0;
System.out.println("about to go into loop !");
while(rsSelect.next()){
String event_id = rsSelect.getString("event_id");
String domain_userid = rsSelect.getString("domain_userid");
Timestamp collector_tstamp = rsSelect.getTimestamp("collector_tstamp");
String se_category = rsSelect.getString("se_category");
String se_action = rsSelect.getString("se_action");
String se_label = rsSelect.getString("se_label");
String se_property = rsSelect.getString("se_property");
//some manipulations
preparedStatement.setString(1, event_id);
preparedStatement.setString(2, domain_userid);
preparedStatement.setTimestamp(3, collector_tstamp);
preparedStatement.setString(4, se_category);
preparedStatement.setString(5, se_action);
preparedStatement.setString(6, se_label);
preparedStatement.setString(7, se_property);
preparedStatement.addBatch();
if(++count % batchSize == 0){
preparedStatement.executeBatch();
System.out.println("batch execution!");
}
}
System.out.println("out of loop");
preparedStatement.executeBatch();
preparedStatement.close();
conn.commit();
conn.close();