0

NOTE: I have searched everywhere and this is not a duplicate question.

I'm trying to create/insert a fixed number of "new" rows into a spreadsheet, using Apache POI (poi-4.1.0) and Java, based on the content of a cell in the same row - See the diagram below (the "|" represents a column break):

1 Foo | 1001, 1002, 1003 |Yes
2 Bar | 1010 |Yes
3 Slf | 2500, 1500, 5200 |Yes

Essentially, what would happen is that I would then insert two new rows between rows 1 & 2 and again after line 3, duplicating all data from the source row, except that instead of having three values (or however many there may be) in column two, there would only be one - See the diagram below (the "|" represents a column break):

1 Foo | 1001 |Yes
2 Foo1 | 1002 |Yes
3 Foo2 | 1003 |Yes
4 Bar | 1001 |Yes
5 Slf | 2500 |Yes
6 Slf1 | 1500 |Yes
7 Slf2 | 5200 |Yes

This process would be repeated, only on the cells that have multiple values, until all rows in the file had been read and processed. I should note that the new lines would be appended in the same file.

Here is what I have for code (I used the code on this page as a template and attempted to update it to match the current release of POI I am using):

public class XLSXFileAdd {
private static final String prpfile = "src/main/resources/fileinfo/directories.properties";

public static void main(String[] args) throws Exception{      
    File infile = new File(XLSXFileAdd.getCIFile()); //Gets the fully-qualified path to the input file.
    XSSFWorkbook workbook = new XSSFWorkbook(new FileInputStream(infile));
    XSSFSheet sheet = workbook.getSheet("Sheet1"); //Opens the XLSX document at the specified worksheet.
    String msNum; //The string that will hold the contents of the cell with multiple values.
    XLSXFileAdd xfa = new XLSXFileAdd();
    for(int i = 1; i <= sheet.getLastRowNum(); i++){
        Row row = sheet.getRow(i);            
        msNum = String.valueOf(row.getCell(2));
        if(i == 2 && msNum.length() > 4){ //If the current column in the row is equal to the cell that could contain multiple values and the number of values in the cell are greater than 1 (length is greater than 4 for multiple values)
            xfa.copyRows(workbook,sheet,i, i);
        }else{
            //Read and parse the file normally (the method used here works without issue so I have not copied it to reduce clutter and confusion).
        }               
    }
}

private static String getCIFile(){
    File propfile = new File(prpfile);
    Properties properties = new Properties();
    try{
        FileInputStream fis = new FileInputStream(propfile);
        properties.load(fis);
    }catch(IOException ex){
        Logger.getLogger(XLSXFileAdd.class.getName()).log(Level.SEVERE, null, ex);
}
    String filename = (String)properties.get("xlsx.input.custdata");
    return filename;
}

private void copyRows(XSSFWorkbook workbook,XSSFSheet worksheet,int sourceRowNum, int destinationRowNum){
    //Get source & destination row
    Row newRow = worksheet.getRow(destinationRowNum);
    Row sourceRow = worksheet.getRow(sourceRowNum);

    //Check if the row will be overwritten; if the row is populated, push down all rows by one and create a new row
    if(newRow != null){
        worksheet.shiftRows(destinationRowNum,worksheet.getLastRowNum(), 1);
    }else{
        newRow = worksheet.createRow(destinationRowNum);
    }

    //Loop through source columns to add to new row.
    for(int i = 0; i < sourceRow.getLastCellNum(); i++){
        Cell oldCell = sourceRow.getCell(i);
        Cell newCell = newRow.createCell(i);

        //If the old is not populated (null), jump to next cell
        if(oldCell == null){
            newCell = null;
            continue;
        }

        //Set newly created cells to the style of the source cell
        newCell.setCellStyle(oldCell.getCellStyle());

        //Set the cell data type
        newCell.setCellType(oldCell.getCellType());

        //Set the value of the cell
        switch(oldCell.getCellType()){
            case _NONE:
                newCell.setCellValue(oldCell.getStringCellValue());
                break;
            case BLANK:
                break;
            case BOOLEAN:
                newCell.setCellValue(oldCell.getBooleanCellValue());
                break;
            case ERROR:
                newCell.setCellErrorValue(oldCell.getErrorCellValue());
                break;
            case FORMULA:
                newCell.setCellFormula(oldCell.getCellFormula());
                break;
            case NUMERIC:
                newCell.setCellValue(oldCell.getNumericCellValue());
                break;
            case STRING:
                newCell.setCellValue(oldCell.getRichStringCellValue());
                break;       
        }           
    }

    //Check for merged regions and copy said regions to new row.
    for(int i = 0; i <worksheet.getNumMergedRegions(); i++){
        CellRangeAddress cellRangeAddress = worksheet.getMergedRegion(i);
        if(cellRangeAddress.getFirstRow() == sourceRow.getRowNum()){
            CellRangeAddress newCellRangeAddress = new CellRangeAddress(newRow.getRowNum(),
                    (newRow.getRowNum()+(cellRangeAddress.getLastRow() - cellRangeAddress.getFirstRow()
                            )),cellRangeAddress.getFirstColumn(),cellRangeAddress.getLastColumn());
            worksheet.addMergedRegion(newCellRangeAddress);
        }
    }        
}

When I run the code, it runs normally and says it completed successfully, however, when I attempt to open the modified file it complains about corrupted data and removes all but two lines from the file (if I allow MS Excel to attempt to repair it). I have also tried redirecting the output to a different file, but the results are the same - it corrupts the data and only shows two lines with a blank row between them.

So my question(s) is/are this: 1) Is there a better way to do what I am wanting to do? 2) If there is not [a better way], what am I doing wrong that is causing it to corrupt all the data I am trying to write.

SirTech
  • 11
  • 4
  • 1
    `Sheet.shiftRows` has bugs in `apache poi` version greater than `3.17`. See https://stackoverflow.com/questions/55980407/apache-poi-shiftrows-corrupts-file-and-deletes-content/55980744#55980744 and https://bz.apache.org/bugzilla/buglist.cgi?quicksearch=shiftrows. It always had have bugs and always will have because of it's complexity. Try to avoid the need for moving cell contents. – Axel Richter Oct 01 '19 at 03:51
  • Can you add a new sheet, populate that as you go, then delete the original? No need to use shift rows, and likely a much simpler set of logic – Gagravarr Oct 01 '19 at 12:04
  • @Gagravarr, I could do that; however, I'd prefer not to. Please correct me if I am wrong, but I assume would it possible to write the contents of the row containing the cell with multiple values to a `String[][]` array, delete the original row and then append the new rows to the bottom of the file (below the last row) formatted as stated in my OP? Or would that make the logic more complex? I'm trying to go for simplicity as this entire XLSX parsing is going to be automated. – SirTech Oct 02 '19 at 12:55

1 Answers1

0

So I managed to solve my problem. Instead of trying to append below the source row, I decided to append the row to the end of the file instead, which made the logic much easier. Here is the code that I created to solve the issue:

public void addAddtlRows(Sheet sheet,Workbook workbook,DataFormatter formatter, ImportDataFormatter fmt, File file){

     //Loads and parses the regular expression into memory and creates a new StringBuilder() instance.
    final Pattern p = Pattern.compile(regex);
    StringBuilder sb = new StringBuilder();

    //Create the array which holds all the entries from a cell that contains multiple entries
    String[] sysNumber;

    //The number of the last row in the sheet.
    int lastRow = sheet.getLastRowNum();

    //Instantiates an integer that will be assigned the length of the array later
    int arrayLength;

    //Loops through the each row in the sheet 
    for(int r = 1; r < lastRow; r++){
        Row row = sheet.getRow(r);
        String cellData = formatter.formatCellValue(row.getCell(2));
        String active = formatter.formatCellValue(row.getCell(4));


        if((cellData.length() > 4) && (active.equals("Yes"))){

             /** Checks whether or not we are on the cell containing the
             * numbers and whether or not they are currently active.
             * If we are, get values for all cells in the row
             */
            String an = formatter.formatCellValue(row.getCell(0));
            String cn = formatter.formatCellValue(row.getCell(1));
            String ca = formatter.formatCellValue(row.getCell(3));
            String es = formatter.formatCellValue(row.getCell(4));
            String i10 = formatter.formatCellValue(row.getCell(5));
            String i9 = formatter.formatCellValue(row.getCell(6));
            String ia = formatter.formatCellValue(row.getCell(7));
            String rp = formatter.formatCellValue(row.getCell(8));

            /**
             * Checks the contents of the cell for more than one entry
             * If the cell contains more than one number, process
             * the data accordingly
             */

            fmt.setSysNum(cellData);
            String[] sys = String.valueOf(fmt.getSysNum()).split(",");

            /**
             * Assign the length value of the 'sysNumber' array to
             * the integer 'arrayLength'
             */
            arrayLength = sys.length;

            /**
             * Loop through each entry in the string array, creating
             * a new row on each iteration and pasting the data from
             * the old cells to the new ones
             */
            for(int n = 0; n < arrayLength; n++){
                Row nRow = sheet.createRow(sheet.getPhysicalNumberOfRows());
                nRow.createCell(0).setCellValue(an);
                nRow.createCell(1).setCellValue(cn);
                nRow.createCell(2).setCellValue(sys[n]);
                nRow.createCell(3).setCellValue(ca);
                nRow.createCell(4).setCellValue(es);
                nRow.createCell(5).setCellValue(i10);
                nRow.createCell(6).setCellValue(i9);
                nRow.createCell(7).setCellValue(ia);
                nRow.createCell(8).setCellValue(rp);

            } 
        }              
    }

    //Writes the newly added contents of the worksheet to the workbook.
    try {
    workbook.write(new FileOutputStream(file));
    } catch (FileNotFoundException ex) {
    Logger.getLogger(MapMultipleSNToDBFields.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
    Logger.getLogger(MapMultipleSNToDBFields.class.getName()).log(Level.SEVERE, null, ex);
    }
}
SirTech
  • 11
  • 4