The Excel Streaming Reader is named Reader because it is for reading only. There is a StreamingWorkbook which implements Workbook but most of it's methods are not implemented yet. So it is a draft until now.
If it comes to huge Excel
files then we must consider a totally different approach than creating HSSFWorkbook
or XSSFWorkbook
from those huge files.
A Workbook
can only be created as a whole. But of course we could take single parts of the Excel
files like the sheets part (containing rows and cells) or the styles part (containing the styles of the cells, the fonts, fills, borders etc.) or the shared strings part (containing text contents of the cells) and parse those. This leads to less resource consuming since the single parts are less huge than the whole file. But it also needs more knowledge about the internal structure of the file. And the easiest task is reading and parsing the single parts but if it comes to changings and so to the need writing those into the part streams, then it gets quickly complicated because then we need also to know about and to take into account the part relationships.
Fortunately the XSSF
(*.xlsx
) parts are XML
and as such to parse (read) and also to write using StAX.
The following code uses this approach. It gets the styles table part and the sheet1 part form the ZIP package of an *.xlsx
file and then sets red background color in each cell of every 5th row from Sheet1 of this file. Also if the file maybe huge, the code should nevertheless running with as less possible resource amount.
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.*;
import org.apache.poi.ss.usermodel.*;
import javax.xml.stream.XMLEventFactory;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLEventWriter;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.XMLEvent;
import javax.xml.namespace.QName;
import java.io.File;
import java.io.OutputStream;
import java.util.regex.Pattern;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
class StaxReadAndWriteTestRowBGColor {
public static void main(String[] args) {
try {
File file = new File("file.xlsx");
OPCPackage opcpackage = OPCPackage.open(file);
//get the styles table
PackagePart stylestabletablepart = opcpackage.getPartsByName(Pattern.compile("/xl/styles.xml")).get(0);
StylesTable stylestable = new StylesTable(stylestabletablepart);
//get the sheet1 package part
PackagePart sheetpart = opcpackage.getPartsByName(Pattern.compile("/xl/worksheets/sheet1.xml")).get(0);
//create reader and writer for the sheet1 package part
XMLEventReader reader = XMLInputFactory.newInstance().createXMLEventReader(sheetpart.getInputStream());
XMLEventWriter writer = XMLOutputFactory.newInstance().createXMLEventWriter(sheetpart.getOutputStream());
//create a factory for producing XML elements and attributes
XMLEventFactory eventFactory = XMLEventFactory.newInstance();
int rowsCount = 0;
Map<Integer, Integer> styleIdxMap = new HashMap<>(); //a Map for mapping old styleIdx to new styleIdx
while(reader.hasNext()){ //loop over all XML in sheet1.xml
XMLEvent event = (XMLEvent)reader.next();
if(event.isStartElement()){
StartElement startElement = (StartElement)event;
QName startElementName = startElement.getName();
if(startElementName.getLocalPart().equalsIgnoreCase("row")) { //start element of row
rowsCount++;
} else if (startElementName.getLocalPart().equalsIgnoreCase("c")) { //start element of cell
if (rowsCount % 5 == 0) { // every 5th row
Attribute attribute;
Iterator attributeIterator = startElement.getAttributes(); //get cell's attributes
List<Attribute> attributeList = new ArrayList<>();
int styleIdx = 0;
while (attributeIterator.hasNext()) {
attribute = (Attribute)attributeIterator.next();
if ("s".equals(attribute.getName().getLocalPart())) { //cell has style attribute already
styleIdx = Integer.valueOf(attribute.getValue()); //so get the styleIdx
//but don't put in the attributeList since we wants creating it new
} else {
attributeList.add(attribute);
}
}
XSSFCellStyle cellstyle;
cellstyle = stylestable.getStyleAt(styleIdx);
if (cellstyle.getFillForegroundColor() != IndexedColors.RED.getIndex()
&& cellstyle.getFillPatternEnum() != FillPatternType.SOLID_FOREGROUND) {
if (styleIdxMap.get(styleIdx) == null) { //old cell style is not mapped until now
cellstyle = (XSSFCellStyle)cellstyle.clone(); //so clone style
styleIdxMap.put(styleIdx, (int)cellstyle.getIndex()); //and put in the map
} else {
cellstyle = stylestable.getStyleAt(styleIdxMap.get(styleIdx)); //else get from already mapped style
}
cellstyle.setFillForegroundColor(IndexedColors.RED.getIndex());
cellstyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
}
styleIdx = (int)cellstyle.getIndex(); //get final styleIdx
attribute = eventFactory.createAttribute("s", Integer.toString(styleIdx)); //create new style attribute
attributeList.add(attribute); //add it to the attributeList now
StartElement newstartElement = eventFactory.createStartElement(startElementName,
attributeList.iterator(),
startElement.getNamespaces());
event = newstartElement; //create a new event for the writer using the new cell StartElement
}
}
}
writer.add(event); //by default write each read event
}
writer.flush();
//write out the styles table
OutputStream out = stylestabletablepart.getOutputStream();
stylestable.writeTo(out);
out.close();
opcpackage.close();
} catch (Exception ex) {
ex.printStackTrace();
}
}
}