Here's my code
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.Vector;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
public class ReadExcelFile {
public static void main(final String[] args) {
final String fileName = "C:\\temp\\carreras.xls";
final Vector<Vector<Cell>> dataHolder = ReadCSV(fileName);
printCellDataToConsole(dataHolder);
}
public static Vector<Vector<Cell>> ReadCSV(final String fileName) {
final Vector<Vector<Cell>> cellVectorHolder = new Vector<Vector<Cell>>();
try {
final FileInputStream myInput = new FileInputStream(fileName);
final POIFSFileSystem myFileSystem = new POIFSFileSystem(myInput);
final HSSFWorkbook myWorkBook = new HSSFWorkbook(myFileSystem);
final HSSFSheet mySheet = myWorkBook.getSheetAt(0);
final Iterator<Row> rowIter = mySheet.rowIterator();
while (rowIter.hasNext()) {
final HSSFRow myRow = (HSSFRow) rowIter.next();
final Iterator<Cell> cellIter = myRow.cellIterator();
final Vector<Cell> cellStoreVector = new Vector<Cell>();
while (cellIter.hasNext()) {
final HSSFCell myCell = (HSSFCell) cellIter.next();
cellStoreVector.addElement(myCell);
}
cellVectorHolder.addElement(cellStoreVector);
}
} catch (final Exception e) {
e.printStackTrace();
}
return cellVectorHolder;
}
private static void printCellDataToConsole(
final Vector<Vector<Cell>> dataHolder) {
for (int i = 0; i < dataHolder.size(); i++) {
final Vector<Cell> cellStoreVector = dataHolder.elementAt(i);
for (int j = 0; j < cellStoreVector.size(); j++) {
final HSSFCell myCell = (HSSFCell) cellStoreVector.elementAt(j);
final String stringCellValue = myCell.toString();
System.out.print(stringCellValue + "|");
}
System.out.println();
}
}
}
It works as expected except for one little detail: If it finds an empty cell, it gets skipped. The expected output is:
1|2||4
3||7|2
however the output is
1|2|4
3|7|2
How can I iterate through a entire Excel file and get the expected behavior? Correcting the xls files is not an option since we need to parse about 400 excel files.