You seem more comfortable using arrays so here is a different approach that
employs that data structure;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
/**
* A simple example that shows how to use the event model to read a single
sheet
* from an OOXML based Excel workbook and store the contents of each cell
into
* a multi-dimensional array of type Object. Note that it is important to
ensure
* that the array is sized appropriately as no checks are made in the code
to
* ensure that there is space in it for the contents of a cell.
*
* Currently, the class will place the contents of each cell into an element
of
* a multi-dimensional array in a manner that reflects that cells location
on
* the worksheet. A worksheet that looks like this;
*
* |-------|-------|-------|-------|-------|
* | One | | Two | | |
* |-------|-------|-------|-------|-------|
* | | Three | | Four | |
* |-------|-------|-------|-------|-------|
* | | | | | |
* |-------|-------|-------|-------|-------|
* | | | Six | | |
* |-------|-------|-------|-------|-------|
*
* will produce an array that looks like this;
*
* {{"One", null, "Two", null, null},
* {null, "Three", null, "Four", null},
* {null, null, null, null, null},
* {null, null, "Six", null, null}}
*
* it would be a trivial task to modify the code if this behaviour was not
* required.
*
* @author Mark Beardsley
* @version 1.00 8th April 2013
*/
public class ArrayExample {
private Object[][] dataArray = null;
/**
* Create an instance of the ArrayExample class using the following
* parameters;
*
* @param rows A primitive int whose value will indicate how many rows
to
* allow space for. Note that it is critical to allow space to
* accomodate all rows on the sheet.
* @param cols A primitive int whose value will indicate how many
columns
* to allow space for. Note that it is critical to allow enough
space
* to accomodate all cells on every row on the sheet.
*/
public ArrayExample(int rows, int cols) {
this.dataArray = new Object[rows][cols];
}
/**
* Call this method to process one sheet within a specific Excel
workbook.
*
* @param filename An instance of the String class encapsulating the
path to
* and name of an Excel workbook file. Note this file must be an
OOXML
* format file.
* @param sheetname An instance of the String class that encapsulates
the
* name of a single sheet within the workbook. Typically, sheets
will
* be named either 'rIdn' or 'rSheetn' where n is a whole number
* value indicating the sheets position in the workbook. Thus,
you
* would pass either "rId1" or "rSheet1" to process the first
sheet
* in the workbook.
* @throws Exception
*/
public void processOneSheet(String filename, String sheetname) throws
Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader( pkg );
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
InputStream sheet2 = r.getSheet(sheetname);
if(sheet2 != null) {
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
sheet2.close();
}
else {
throw new IllegalArgumentException("No sheet exists in the "
+ "workbook with the name specified: " + sheetname);
}
}
/**
* Get an XMLReader to handle parsing of the workbook's xml markup.
*
* @param sst An instance of the SharedStringsTable class that
encapsulates
* the xml markup that defines the workbook's shared strings
table.
* @return An instance of the XMLReader class that may be used to handle
* parsing of the .xlsx file's xml markup.
* @throws SAXException
*/
public XMLReader fetchSheetParser(SharedStringsTable sst) throws
SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader(
"org.apache.xerces.parsers.SAXParser");
ContentHandler handler = new SheetHandler(sst, this.dataArray);
parser.setContentHandler(handler);
return parser;
}
public Object[][] getDataArray() {
return(this.dataArray);
}
/**
* Handlers use a mechanism called a 'callback' to provide the necessary
* functionality to interpret the xml markup contained within the
various
* sheetn.xml (where n is an integer that identifies the sheet, e.g.
* sheet1.xml)files in the OOXML archive. In simple terms, the parser
* will call the startElement method when it encounters the start
element tag
* within the markup and the endElement method when it encounters the
end
* element tag of that. We must write code in these various methods that
* determines how the element's content - if there are any - will be
handled.
*/
private static class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private Object[][] dataArray = null;
private CellReference cellRef = null;
/**
* Create an instance of the SheetHandler class using the following
* parameters
*
* @param sst An instance of the SharedStringsTable class that
* encapsulates the workbook's shared strings table.
* @param bookData A three dimensional array of type Object that
will
* be used to hold the data recovered from the worksheet.
*/
private SheetHandler(SharedStringsTable sst, Object[][] dataArray) {
this.sst = sst;
this.dataArray = dataArray;
}
/**
* Called whenever the parser encounters the start tag of an element
* within the xml markup.
*
* @param uri An instance of the String class that will encapsulate
the
* name of the Namespace. It will be empty if namespaces are
not
* used.
* @param localName An instance of the String class that will
* encapsulatethe local name for the element.
* @param name An instance of the String class that will encapsulate
the
* qualified name for the element.
* @param attributes An instance of the Attributes class that will
* encapsulate any of the element's attributes. Empty if
there
* are none.
* @throws SAXException Thrown if a problem is encountered parsing
the
* xml markup.
*/
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
if (name.equals("c")) {
// Firstly, get the cells address and store into the cellRef
// variable.
cellRef = new CellReference(attributes.getValue("r"));
// Figure out if the value is an index in the SST set a flag
// to indicate that the Shared Strings Table should be
// interrogated for the cells contents.
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
/**
* Called when the parser encounters the end element tag within the
* xml markup.
*
* @param uri An instance of the String class that will encapsulate
the
* name of the Namespace. It will be empty if namespaces are
not
* used.
* @param localName An instance of the String class that will
* encapsulatethe local name for the element.
* @param name An instance of the String class that will encapsulate
the
* qualified name for the element.
* @throws SAXException Thrown if a problem is encountered parsing
the
* xml markup.
*/
public void endElement(String uri, String localName, String name)
throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if (nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new
XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
// v => contents of a cell
if (name.equals("v")) {
// Store the contents of the cell into the array. Note how
// the CellReference object is derefernced to provide the
// column and row indices for the cell.
dataArray[cellRef.getRow()][cellRef.getCol()] =
lastContents;
}
}
/**
* Called to process the elements character data.
*
* @param ch An array of type character that contains the character
data
* recovered from the element.
* @param start A primitive int whose value indicates the start
position
* of the character data within the ch array.
* @param length A primitive int whose value indicates how many
* characters were read.
* @throws SAXException Thrown if a problem is encountered whilst
* parsing the xml markup.
*/
public void characters(char[] ch, int start, int length)
throws SAXException {
lastContents += new String(ch, start, length);
}
}
public static void main(String[] args) {
ArrayExample ae = null;
try {
ae = new ArrayExample(600, 10);
ae.processOneSheet("C:/temp/Book2.xlsx", "rId1");
Object[][] sheetData = ae.getDataArray();
Object[] rowData = null;
Object cellData = null;
for(int i = 0; i < sheetData.length; i++) {
rowData = sheetData[i];
for(int j = 0; j < rowData.length; j++) {
System.out.print(i + ", " + j);
cellData = rowData[j];
if(cellData != null) {
System.out.println("\t" + cellData.toString());
}
else {
System.out.println("\tEmpty cell.");
}
}
}
}
catch(Exception ex) {
System.out.println("Caught an: " + ex.getClass().getName());
System.out.println("Message: " + ex.getMessage());
System.out.println("Stacktrace follows:.....");
ex.printStackTrace(System.out);
}
}
}
--
View this message in context:
http://apache-poi.1045710.n5.nabble.com/Apache-POI-works-but-it-uses-too-RAM-is-it-a-limit-of-your-library-tp5712425p5712509.html
Sent from the POI - User mailing list archive at Nabble.com.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]