Java读Excel文件

来源:互联网 发布:锁眼卫星数据 编辑:程序博客网 时间:2024/06/05 18:57

如果用XSSFWorkbook 来读取excel,遇到文件太大时候,会导致oom,  OutOfMemoryError: Java heap space

所以apache官网上有推荐 

If memory footprint is an issue, then for XSSF, you can get at the underlying XML data, and process it yourself.

http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api

通过xml的方式读取excel,一行一行的处理,也不需要将整个文件载入,导致内存不足情况

带注释的官网例子:

package com.datacenter.hbase.adapter; import java.io.InputStream;import java.util.Iterator;import java.util.concurrent.atomic.AtomicInteger;import org.apache.poi.xssf.eventusermodel.XSSFReader;import org.apache.poi.xssf.eventusermodel.XSSFReader.SheetIterator;import org.apache.poi.xssf.model.SharedStringsTable;import org.apache.poi.xssf.usermodel.XSSFRichTextString;import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;import org.apache.poi.openxml4j.opc.OPCPackage;import org.xml.sax.Attributes;import org.xml.sax.ContentHandler;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.XMLReader;import org.xml.sax.helpers.DefaultHandler;import org.xml.sax.helpers.XMLReaderFactory;public class ExcelFileHandler {/** * 解析一个sheet  * @param filename * @throws Exception */public void processOneSheet(String filename,String sheetName) throws Exception {OPCPackage pkg = OPCPackage.open(filename);XSSFReader r = new XSSFReader( pkg );SharedStringsTable sst = r.getSharedStringsTable(); //打开文件获取文件句柄XMLReader parser = fetchSheetParser(sst);// To look up the Sheet Name / Sheet Order / rID,//  you need to process the core Workbook stream.// Normally it's of the form rId# or rSheet#InputStream sheet2 = r.getSheet( sheetName );InputSource sheetSource = new InputSource(sheet2);parser.parse(sheetSource);sheet2.close();}/** * 解析多个sheet * @param filename * @throws Exception */public void processAllSheets(String filename) throws Exception {OPCPackage pkg = OPCPackage.open(filename);XSSFReader r = new XSSFReader( pkg );SharedStringsTable sst = r.getSharedStringsTable();System.out.println( "sheet.size="+sst.getCount() );for( CTRst rst : sst.getItems() ){//System.out.println( rst.get);}XMLReader parser = fetchSheetParser(sst); XSSFReader.SheetIterator sheets = (SheetIterator) r.getSheetsData();while(sheets.hasNext()) {System.out.println("Processing new sheet:\n");InputStream sheet = sheets.next();System.out.println( "sheetname:"+sheets.getSheetName() );;InputSource sheetSource = new InputSource(sheet);parser.parse(sheetSource);sheet.close();System.out.println("");}}public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {XMLReader parser =XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");//自定义处理ContentHandler handler = new SheetHandler(sst);parser.setContentHandler(handler);return parser;}/**  * See org.xml.sax.helpers.DefaultHandler javadocs  * 将excel作为xml来处理 ,类似如下格式 * <row> *  <cell/> *  <cell/> * <cell/> * <cell/> * </row> *  */private static class SheetHandler extends DefaultHandler {private SharedStringsTable sst;private String lastContents;private boolean nextIsString;private AtomicInteger counter = new AtomicInteger( 0 ) ;private SheetHandler(SharedStringsTable sst) {this.sst = sst;}public void exist(){if( counter .get() == 10 ){System.exit( 0 );}}/** * 处理<cell> */public void startElement(String uri, String localName, String name,Attributes attributes) throws SAXException {// c => cellif(name.equals("c")) {String rowNum = attributes.getValue("r") ;// Print the cell referenceif( rowNum.startsWith("A") ){exist() ;counter.incrementAndGet() ;System.out.print( rowNum + " - "); //坐标}// Figure out if the value is an index in the SST// cellType == null ,if there is emptyString cellType = attributes.getValue("t"); if(cellType != null && cellType.equals("s")) {//下面是否是字符串nextIsString = true;} else {nextIsString = false;}}// Clear contents cachelastContents = "";}/** * 处理结尾的 </cell> */public void endElement(String uri, String localName, String name)throws SAXException {// Process the last contents as required.// Do now, as characters() may be called more than onceif(nextIsString) {int idx = Integer.parseInt(lastContents);//获取单元格数据lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();nextIsString = false;}// v => contents of a cell// Output after we've seen the string contentsif(name.equals("v")) {//处理每个单元格数据if(  lastContents == null  || "".equals( lastContents )  ){System.out.print("  #  null");}System.out.print("  #  "+lastContents);}if( "row".equals( name ) ){ //一行结束System.out.println(); //change a line}}public void characters(char[] ch, int start, int length)throws SAXException {lastContents += new String(ch, start, length);}}public static void main(String[] args) throws Exception {String file = "F:\\excel\\IPAddressInfo.xlsx" ;ExcelFileHandler example = new ExcelFileHandler();//example.processOneSheet( file , "rId1");example.processAllSheets( file );}}



网上的例子:  原文  http://www.cnblogs.com/lonelyxmas/archive/2013/01/29/2881958.html

上面的代码是没有处理空值的情况的,因为xml读取的时候会跳过该cell,如果要处理空值。

参考:http://www.bbsmax.com/A/MyJxWYr2zn/





0 0
原创粉丝点击