What are your jvm parameters? For large workbooks you really need to set -Xmx2g or more. I've gone as high as 16g. If you can break up the data into multiple sheets that helps too.
On Tue, May 2, 2017, 01:35 Mubasshar Ahmad <sham...@msn.com> wrote: > Hello, > > > I spent a lot of time on searching the solution but not succeeded. Thats > why I am writing to you. > > > Actually I have to read a large XLSX file haing 200 columns and 30000 > rows. I have tried both SAX parsing ( > http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api) approach-1 > and new XSSFWorkbook() approach-2 as follows: > > > XSSFWorkbook workbook = new XSSFWorkbook (file); > XSSFSheet sheet = workbook.getSheetAt(0); > Iterator ite = sheet.rowIterator(); > while(ite.hasNext()){ > Row row = ite.next(); > Iterator<Cell> cite = row.cellIterator(); > while(cite.hasNext()) > > { > Cell c = cite.next(); > System.out.print(c.toString() +" "); > } > } > > > But the problem is that in approach 1, it takes too much time for parsing > the file, 10 minutes for 2000 rows. And in approach-2, it got stuck on the > first line of the code snippet and throws OutOfMemoryException. > > > Please suggest me something so that I may parse a very large XLSX file > more than 2 GB size. > > > Here is the handler code. > > > //////////////////////////////////////////////////////// START > //////////////////////////////////////////////// > > > public void startElement(String uri, String localName, String name, > Attributes attributes) throws SAXException { > // c => cell, row ==> row, r ==> cell referrence > if(name.equals(C)) > { > cellType = attributes.getValue(T); > } > if(name.equalsIgnoreCase(ROW2)) > { > //reset rowcount for loading header of new file import > if(handler.getClass().getSimpleName().equals(EXCEL_COLUMN_HELPER) && > rowCount > 0 && canContinue && counterReset){ > //rowcount has value because of an old import file > rowCount = 0; > counterReset=false; > } > > row = new ArrayList<TableCellValue<?>>(); > isRowStarted = true; > rowNumber++; > } > } > public void endElement(String uri, String localName, String name) > throws SAXException { > if(name.equalsIgnoreCase(ROW2)) > { > isRowStarted = false; > if(canContinue) > { > if(handler.getClass().getSimpleName().equals(EXCEL_COLUMN_HELPER)) > { > //processing header row > //its a header row > if(rowNumber==headerRow) > { > canContinue = false; > handler.acceptRow(row); > } > rowCount = rowCount + 1; > } > if(handler.getClass().getSimpleName().equals(EXCEL_TABLE_PASTER)) > { > // accept the data rows. These are the rows after the header row > if(rowNumber > headerRow) > { > rowData.add(row); > if((rowData.size() == blockSize) || (rowNumber == rowCount)) > { > List<List<TableCellValue<?>>> tempRowData = new > ArrayList<List<TableCellValue<?>>>(rowData); > handler.acceptRows(tempRowData); > rowData.clear(); > } > } > } > }else > { > if(handler.getClass().getSimpleName().equals(EXCEL_COLUMN_HELPER)){ > rowCount = rowCount + 1; > } > } > } > if(name.equals(C) && canContinue){ > if(contents == null || contents.length() < 0){ > //it is a blank cell > row.add( new TableCellValue<Object>(null)); > } > else > { > processRecord(); > } > contents = EMPTY; //clear contents after value edited cell closed > } > } > > /** > * Cache contents of a cell > * It is called for some cells more than once > * we append to get full value of a cell > */ > public void characters(char[] ch, int start, int length) > throws SAXException { > if(!(new String(ch, start, length).equalsIgnoreCase(NEW_LINE))) > { > contents += new String(ch, start, length); //save contents of specific cell > } > } > /** > * process each record > */ > private void processRecord(){ > try > { > if(canContinue) > { > if(rowNumber == headerRow) > { > if(cellType == null) > { > if(isRowStarted) > { > row.add( new TableCellValue<Object>(contents) ); > } > > } > else > { > //its headers of table > if(reader.getSharedStringsTable().getCount() > 0) > { > //its shared strings table format > if > (!reader.getSharedStringsTable().getItems().get(Integer.parseInt(contents)).isSetT()) > { > List<CTRElt> list = > reader.getSharedStringsTable().getItems().get(Integer.parseInt(contents)).getRList(); > contents = EMPTY; > for (CTRElt c : list) > { > contents += c.getT(); > } > } > else > { > contents = > reader.getSharedStringsTable().getItems().get(Integer.parseInt(contents)).getT(); > } > } > if(cellType.equalsIgnoreCase(INLINE_STR) || cellType.equalsIgnoreCase(S)) > { > if(isRowStarted){ > row.add( new TableCellValue<Object>(contents) ); > } > } > } > } > else > { > if(cellType != null) > { > if(reader.getSharedStringsTable().getCount() > 0 && > contents.trim().length() > 0) > { > //rows are saved in sharedTable > contents = (new > XSSFRichTextString(reader.getSharedStringsTable().getEntryAt((Integer.parseInt(contents))))).toString(); > } > if(cellType.equalsIgnoreCase(S)) > { > row.add( new TableCellValue<Object>(contents) ); > > } > else if(cellType.equalsIgnoreCase(N)) > { > Number number = null; > try > { > number = NumberFormat.getInstance().parse(contents ); > } catch (ParseException e) > { > e.printStackTrace(); > } > row.add( new TableCellValue<Object>( > NumbersUtil.convertToAppropriateType(number) ) ); > } > else if(cellType.equals(B)) > { > row.add( new TableCellValue<Object>(new Boolean(contents) )); > } > else if(cellType.equals(STR)) > { > row.add( new TableCellValue<Object>(contents)); > } > else if(cellType.equals(INLINE_STR)) > { > row.add( new TableCellValue<Object>(contents)); > } > else > { > row.add( new TableCellValue<Object>(null)); > } > } > else > { > if(reader.getSharedStringsTable().getCount() > 0) > { > row.add( new TableCellValue<Object>(contents) ); > } > } > } > } > }catch(Exception ex){ > System.out.println(ex.getMessage()); > } > } > > > /////////////////////////////////////////// END > /////////////////////////////////////////////////// > > > > Best Regards, > > > Mubasshar Ahmad > >