1

I have an excel that filled about 50k-60k rows.

I have to make that excel content uploaded into MySQL, usually I use the apache poi to read and upload it into MySQL, but this file cannot be read using apache poi cause the file was to LARGE.

Can anybody guide me how to do that? Here is my sample code to upload the content into MySQL using apache poi (it works for some little xlsx files that contains 1000-2000 rows)

public static void uploadCrossSellCorpCard(FileItem file, String dbtable) {
    System.out.println("UploadUtil Running" + file.getFileName().toString());
    try {
        for(int i = 0; i<=sheetx.getLastRowNum(); i++){
            row = sheetx.getRow(i);
            
            try{
                int oc = (int) row.getCell(0).getNumericCellValue();
                if((String.valueOf(oc).matches("[A-Za-z0-9]{3}"))){
                    String rm_name = row.getCell(1).getStringCellValue();
                    String company = row.getCell(2).getStringCellValue();
                    String product = row.getCell(3).getStringCellValue();
                    String detail = row.getCell(4).getStringCellValue();
                    String type = row.getCell(5).getStringCellValue();
                    
                    String sql = "INSERT INTO " + dbtable + " VALUES('"
                            + oc + "','" + rm_name + "','" + company + "','"
                            + product + "','" + detail + "','" + type + "')";           
                    save(sql);
                    System.out.println("Import rows " + i);
                }
            } catch (IllegalStateException e) {
                e.printStackTrace();
            } catch (NullPointerException e) {
                System.out.println(e);
            }
        }
        System.out.println("Success import xlsx to mysql table");
    } catch (NullPointerException e){
        System.out.println(e);
        System.out.println("Select the file first before uploading");
    }
}

Note: I use hibernate method for handle upload schema.. "save(sql)" is calling my hibernate method

Nimantha
  • 6,405
  • 6
  • 28
  • 69
Diastowo
  • 42
  • 1
  • 10
  • i dont know how to solve the excel issue (other than export it to CSV and import that), but for the insert you should use a PreparedStatement with a BatchUpdate - you preparedStatement.addBatch() each row then preparedStatement.clearBatch(); every few thousand rows (this pushes the data from memory to the DB). It is infinitely faster than individual inserts. – slipperyseal May 11 '16 at 04:14
  • i dont have any problem about inserting.. its already works with hibernate, yes also with PreparedStatement.. now my problem is my excel that want to upload is to large and cannot be read if using apache poi, i've already increase my java heap size but still have the same problem.. – Diastowo May 11 '16 at 04:18
  • check this out. there is a streaming API... http://stackoverflow.com/questions/11891851/how-to-load-a-large-xlsx-file-with-apache-poi – slipperyseal May 11 '16 at 04:26
  • this question has **nothing** to with *uploading* in general, its actually some kind of data extraction & database operation - thing ... hey if you're new to programming you should'nt make terms up ... thats weird – specializt May 11 '16 at 10:47

1 Answers1

0

You can try using Apache POI SAX - read the section --> XSSF and SAX (Event API) on https://poi.apache.org/spreadsheet/how-to.html

You can read entire excel with 60k rows or even 100k rows just like reading an xml file. only thing you need to take care is empty cell since xml tag for empty cell will just skip the cell it but you may like to update null value in db table for the cell representing empty value.

Solution --> you can read each row and fire insert statement in a loop. and keep watch on empty cell by monitoring cell address if gap occurs then check respective column name and accordingly update your insert statement with null value.

I hope this helps you. below sample code read excel and store it in ArrayList of ArrayList for tabular representation. I am printing message in console - "new row begins" before start reading and printing row. and cell number of each value before printing cell value itself.

I have not taken care of cell gaps for empty cell but that you can code it based on finding cell gap since in my case I don't have empty cell. look for cell address in the console that helps you in spotting any gap and handling it as you wish.

Run this code and works fine for me. don't forget to add xmlbeans-2.3.0.jar other then jars required by import statements.

import java.io.InputStream;
import java.util.ArrayList;

import org.apache.commons.lang3.time.DurationFormatUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public class ExcelToStringArray implements Cloneable {

    public static ArrayList<ArrayList<StringBuilder>> stringArrayToReturn = new ArrayList<ArrayList<StringBuilder>>();
    public static ArrayList<StringBuilder> retainedString;
    public static Integer lineCounter = 0;

    public ArrayList<ArrayList<StringBuilder>> GetSheetInStringArray(String PathtoFilename, String rId)
            throws Exception {
        ExcelToStringArray myParser = new ExcelToStringArray();
        myParser.processOneSheet(PathtoFilename, rId);
        return stringArrayToReturn;
    }

    public void processOneSheet(String PathtoFilename, String rId) throws Exception {
        OPCPackage pkg = OPCPackage.open(PathtoFilename);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);

        InputStream sheet = r.getSheet(rId);
        InputSource sheetSource = new InputSource(sheet);
        parser.parse(sheetSource);
        sheet.close();
    }

    public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
        XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
        ContentHandler handler = new SheetHandler(sst);
        parser.setContentHandler(handler);
        return parser;
    }

    private class SheetHandler extends DefaultHandler {
        private SharedStringsTable sst;
        private String lastContents;
        private boolean nextIsString;

        private SheetHandler(SharedStringsTable sst) {
            this.sst = sst;
        }

        public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {

            if (name.equals("row")) {
                retainedString = new ArrayList<StringBuilder>();

                if (retainedString.isEmpty()) {
                    stringArrayToReturn.add(retainedString);
                    retainedString.clear();
                }

                System.out.println("New row begins");

                retainedString.add(new StringBuilder(lineCounter.toString()));
                lineCounter++;
            }
            // c => cell
            if (name.equals("c")) {
                // Print the cell reference
                System.out.print(attributes.getValue("r") + " - ");

                // System.out.print(attributes.getValue("r") + " - ");
                // Figure out if the value is an index in the SST
                String cellType = attributes.getValue("t");
                if (cellType != null && cellType.equals("s")) {
                    nextIsString = true;
                } else {
                    nextIsString = false;
                }
            }
            // Clear contents cache
            lastContents = "";
        }

        public void endElement(String uri, String localName, String name) throws SAXException {
            // Process the last contents as required.
            // Do now, as characters() may be called more than once
            if (nextIsString) {
                int idx = Integer.parseInt(lastContents);
                lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
                nextIsString = false;
            }

            // v => contents of a cell
            // Output after we've seen the string contents
            if (name.equals("v")) {
                System.out.println(lastContents);
                // value of cell what it string or number
                retainedString.add(new StringBuilder(lastContents));
            }
        }

        public void characters(char[] ch, int start, int length) throws SAXException {
            lastContents += new String(ch, start, length);
        }
    }

    public static void main(String[] args) throws Exception {
        StopWatch watch = new StopWatch();
        watch.start();
        ExcelToStringArray generate = new ExcelToStringArray();
        // rID1 is first sheet in my workbook for rId2 for second sheet and so
        // on.
        generate.GetSheetInStringArray("D:\\Users\\NIA\\Desktop\\0000_MasterTestSuite.xlsx", "rId10");

        watch.stop();

        System.out.println(DurationFormatUtils.formatDurationWords(watch.getTime(), true, true));
        System.out.println("done");
        System.out.println(generate.stringArrayToReturn);

    }

}