You seem more comfortable using arrays so here is a different approach that
employs that data structure;

import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;

import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

/**
 * A simple example that shows how to use the event model to read a single
sheet
 * from an OOXML based Excel workbook and store the contents of each cell
into
 * a multi-dimensional array of type Object. Note that it is important to
ensure
 * that the array is sized appropriately as no checks are made in the code
to 
 * ensure that there is space in it for the contents of a cell.
 * 
 * Currently, the class will place the contents of each cell into an element
of
 * a multi-dimensional array in a manner that reflects that cells location
on
 * the worksheet. A worksheet that looks like this;
 * 
 * |-------|-------|-------|-------|-------|
 * |  One  |       |  Two  |       |       |
 * |-------|-------|-------|-------|-------|
 * |       | Three |       | Four  |       |
 * |-------|-------|-------|-------|-------|
 * |       |       |       |       |       |
 * |-------|-------|-------|-------|-------|
 * |       |       |  Six  |       |       |
 * |-------|-------|-------|-------|-------|
 * 
 * will produce an array that looks like this;
 * 
 * {{"One", null, "Two", null, null},
 *  {null, "Three", null, "Four", null},
 *  {null, null, null, null, null},
 *  {null, null, "Six", null, null}}
 * 
 * it would be a trivial task to modify the code if this behaviour was not
 * required.
 * 
 * @author Mark Beardsley
 * @version 1.00 8th April 2013
 */
public class ArrayExample {
    
    private Object[][] dataArray = null;
    
    /**
     * Create an instance of the ArrayExample class using the following
     * parameters;
     * 
     * @param rows A primitive int whose value will indicate how many rows
to
     *        allow space for. Note that it is critical to allow space to
     *        accomodate all rows on the sheet.
     * @param cols A primitive int whose value will indicate how many
columns
     *        to allow space for. Note that it is critical to allow enough
space
     *        to accomodate all cells on every row on the sheet.
     */
    public ArrayExample(int rows, int cols) {
        this.dataArray = new Object[rows][cols];
    }
    
    /**
     * Call this method to process one sheet within a specific Excel
workbook.
     * 
     * @param filename An instance of the String class encapsulating the
path to
     *        and name of an Excel workbook file. Note this file must be an
OOXML
     *        format file.
     * @param sheetname An instance of the String class that encapsulates
the
     *        name of a single sheet within the workbook. Typically, sheets
will
     *        be named either 'rIdn' or 'rSheetn' where n is a whole number
     *        value indicating the sheets position in the workbook. Thus,
you
     *        would pass either "rId1" or "rSheet1" to process the first
sheet
     *        in the workbook.
     * @throws Exception 
     */
    public void processOneSheet(String filename, String sheetname) throws
Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader( pkg );
        SharedStringsTable sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);
        InputStream sheet2 = r.getSheet(sheetname);
        if(sheet2 != null) {
            InputSource sheetSource = new InputSource(sheet2);
            parser.parse(sheetSource);
            sheet2.close();
        }
        else {
            throw new IllegalArgumentException("No sheet exists in the "
                    + "workbook with the name specified: " + sheetname);
        }
    }
    
    /**
     * Get an XMLReader to handle parsing of the workbook's xml markup.
     * 
     * @param sst An instance of the SharedStringsTable class that
encapsulates
     *        the xml markup that defines the workbook's shared strings
table.
     * @return An instance of the XMLReader class that may be used to handle
     *         parsing of the .xlsx file's xml markup.
     * @throws SAXException 
     */
    public XMLReader fetchSheetParser(SharedStringsTable sst) throws
SAXException {
        XMLReader parser = XMLReaderFactory.createXMLReader(
                "org.apache.xerces.parsers.SAXParser");
        ContentHandler handler = new SheetHandler(sst, this.dataArray);
        parser.setContentHandler(handler);
        return parser;
    }
    
    public Object[][] getDataArray() {
        return(this.dataArray);
    }
    
    /**
     * Handlers use a mechanism called a 'callback' to provide the necessary
     * functionality to interpret the xml markup contained within the
various
     * sheetn.xml (where n is an integer that identifies the sheet, e.g. 
     * sheet1.xml)files in the OOXML archive. In simple terms, the parser
     * will call the startElement method when it encounters the start
element tag
     * within the markup and the endElement method when it encounters the
end
     * element tag of that. We must write code in these various methods that 
     * determines how the element's content - if there are any - will be
handled.
     */
    private static class SheetHandler extends DefaultHandler {

        private SharedStringsTable sst;
        private String lastContents;
        private boolean nextIsString;
        private Object[][] dataArray = null;
        private CellReference cellRef = null;

        /**
         * Create an instance of the SheetHandler class using the following
         * parameters
         * 
         * @param sst An instance of the SharedStringsTable class that 
         *        encapsulates the workbook's shared strings table.
         * @param bookData A three dimensional array of type Object that
will
         *        be used to hold the data recovered from the worksheet.
         */
        private SheetHandler(SharedStringsTable sst, Object[][] dataArray) {
            this.sst = sst;
            this.dataArray = dataArray;
        }

        /**
         * Called whenever the parser encounters the start tag of an element
         * within the xml markup.
         * 
         * @param uri An instance of the String class that will encapsulate
the
         *        name of the Namespace. It will be empty if namespaces are
not
         *        used.
         * @param localName An instance of the String class that will 
         *        encapsulatethe local name for the element.
         * @param name An instance of the String class that will encapsulate
the
         *        qualified name for the element.
         * @param attributes An instance of the Attributes class that will 
         *        encapsulate any of the element's attributes. Empty if
there 
         *        are none.
         * @throws SAXException Thrown if a problem is encountered parsing
the
         *         xml markup.
         */
        public void startElement(String uri, String localName, String name,
                Attributes attributes) throws SAXException {
            
            if (name.equals("c")) {
                // Firstly, get the cells address and store into the cellRef
                // variable.
                cellRef = new CellReference(attributes.getValue("r"));
                // Figure out if the value is an index in the SST set a flag
                // to indicate that the Shared Strings Table should be 
                // interrogated for the cells contents.
                String cellType = attributes.getValue("t");
                if (cellType != null && cellType.equals("s")) {
                    nextIsString = true;
                } else {
                    nextIsString = false;
                }
            }
            // Clear contents cache
            lastContents = "";
        }

        /**
         * Called when the parser encounters the end element tag within the
         * xml markup.
         * 
         * @param uri An instance of the String class that will encapsulate
the
         *        name of the Namespace. It will be empty if namespaces are
not
         *        used.
         * @param localName An instance of the String class that will 
         *        encapsulatethe local name for the element.
         * @param name An instance of the String class that will encapsulate
the
         *        qualified name for the element.
         * @throws SAXException Thrown if a problem is encountered parsing
the
         *         xml markup.
         */
        public void endElement(String uri, String localName, String name)
                throws SAXException {
            // Process the last contents as required.
            // Do now, as characters() may be called more than once
            if (nextIsString) {
                int idx = Integer.parseInt(lastContents);
                lastContents = new
XSSFRichTextString(sst.getEntryAt(idx)).toString();
                nextIsString = false;
            }

            // v => contents of a cell
            if (name.equals("v")) {
                // Store the contents of the cell into the array. Note how
                // the CellReference object is derefernced to provide the
                // column and row indices for the cell.
                dataArray[cellRef.getRow()][cellRef.getCol()] =
lastContents;
            }
        }

        /**
         * Called to process the elements character data.
         * 
         * @param ch An array of type character that contains the character
data
         *        recovered from the element.
         * @param start A primitive int whose value indicates the start
position
         *        of the character data within the ch array.
         * @param length A primitive int whose value indicates how many 
         *        characters were read.
         * @throws SAXException Thrown if a problem is encountered whilst 
         *         parsing the xml markup.
         */
        public void characters(char[] ch, int start, int length)
                throws SAXException {
            lastContents += new String(ch, start, length);
        }
    }
    
    public static void main(String[] args) {
        ArrayExample ae = null;
        try {
            ae = new ArrayExample(600, 10);
            ae.processOneSheet("C:/temp/Book2.xlsx", "rId1");
            Object[][] sheetData = ae.getDataArray();
            Object[] rowData = null;
            Object cellData = null;
            for(int i = 0; i < sheetData.length; i++) {
                rowData = sheetData[i];
                for(int j = 0; j < rowData.length; j++) {
                    System.out.print(i + ", " + j);
                    cellData = rowData[j];
                    if(cellData != null) {
                        System.out.println("\t" + cellData.toString());
                    }
                    else {
                        System.out.println("\tEmpty cell.");
                    }
                }
            }
        }
        catch(Exception ex) {
            System.out.println("Caught an: " + ex.getClass().getName());
            System.out.println("Message: " + ex.getMessage());
            System.out.println("Stacktrace follows:.....");
            ex.printStackTrace(System.out);
        }
    }
}



--
View this message in context: 
http://apache-poi.1045710.n5.nabble.com/Apache-POI-works-but-it-uses-too-RAM-is-it-a-limit-of-your-library-tp5712425p5712509.html
Sent from the POI - User mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to