poi事件驱动模式导入读取大批量excel2003/2007(减少内存消耗)

来源:互联网 发布:中建七局网络协同平台 编辑:程序博客网 时间:2024/06/05 19:43

import java.io.InputStream;
import java.io.PushbackInputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;


import org.apache.poi.POIXMLDocument;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.util.IOUtils;
/**
 *取得文件流,调用读取方法
 * @author lhx
 *
 */
public class FileImpotUtil {
    public static void getRecords(String filePath, String titleLength) throws Exception {
        URL urlFile;
        try {
            urlFile = new URL(filePath);
        } catch (MalformedURLException e3) {
            //                        logger.error(e3.getMessage(), e3);
            throw new Exception("获取下载文件地址失败!");
        }

        InputStream inp = null;
        URLConnection conn = null;
        try {
            conn = urlFile.openConnection();
            conn.setConnectTimeout(3000);
            conn.setReadTimeout(3 * 60 * 1000);

            inp = conn.getInputStream();


            if (!inp.markSupported()) {
                inp = new PushbackInputStream(inp, 8);
            }


            // Ensure that there is at least some data there
            byte[] header8 = IOUtils.peekFirst8Bytes(inp);
            IRowReader reader = new RowReader();
            if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
                Excel2003Reader excel03 = new Excel2003Reader();
                excel03.setRowReader(reader);
                excel03.process(inp);
                // 处理excel2007文件
            } else if (POIXMLDocument.hasOOXMLHeader(inp)) {
                OPCPackage pag = OPCPackage.open(inp);
                Excel2007Reader excel07 = new Excel2007Reader(pag, Integer.parseInt(titleLength), reader);
                excel07.process();
                pag.close();
            }
        } catch (Exception e) {
            throw new Exception("获取下载文件地址失败!");
        } finally {
            if (inp != null) {
                inp.close();
            }
        }
    }

}

------------------------------------------------------------------------------------------------------------------------------------

package com.cmos.ngesopcontrol.util.excelUtil;


import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;


import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;


/**
 * 抽象Excel2003读取器,通过实现HSSFListener监听器,采用事件驱动模式解析excel2003
 * 中的内容,遇到特定事件才会触发,大大减少了内存的使用。
 * 注意Excel2003Reader类中引用的Record类在hssf包下
 */
public class Excel2003Reader implements HSSFListener {
    private int minColumns = -1;
    private POIFSFileSystem fs;
    private int lastRowNumber;
    private int lastColumnNumber;


    /** Should we output the formula, or the value it has? */
    private boolean outputFormulaValues = true;


    /** For parsing Formulas */
    private SheetRecordCollectingListener workbookBuildingListener;
    // excel2003工作薄
    private HSSFWorkbook stubWorkbook;


    // Records we pick up as we process
    private SSTRecord sstRecord;
    private FormatTrackingHSSFListener formatListener;


    // 表索引
    private int sheetIndex = -1;
    private BoundSheetRecord[] orderedBSRs;
    @SuppressWarnings("unchecked")
    private ArrayList boundSheetRecords = new ArrayList();


    // For handling formulas with string results
    private int nextRow;
    private int nextColumn;
    private boolean outputNextStringRecord;
    // 当前行
    private int curRow = 0;
    // 存储行记录的容器
    private List<String> rowlist = new ArrayList<String>();;
    @SuppressWarnings("unused")
    private String sheetName;


    private IRowReader rowReader;


    public void setRowReader(IRowReader rowReader) {
        this.rowReader = rowReader;
    }


    /**
     * 遍历excel下所有的sheet
     * @throws IOException
     */
    public void process(InputStream inp) throws IOException {
        fs = new POIFSFileSystem(inp);
        MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
        formatListener = new FormatTrackingHSSFListener(listener);
        HSSFEventFactory factory = new HSSFEventFactory();
        HSSFRequest request = new HSSFRequest();
        if (outputFormulaValues) {
            request.addListenerForAllRecords(formatListener);
        } else {
            workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
            request.addListenerForAllRecords(workbookBuildingListener);
        }
        factory.processWorkbookEvents(request, fs);
    }


    /**
     * HSSFListener 监听方法,处理 Record
     */
    @Override
    @SuppressWarnings("unchecked")
    public void processRecord(Record record) {
        int thisRow = -1;
        int thisColumn = -1;
        String thisStr = null;
        String value = null;
        switch (record.getSid()) {
        case BoundSheetRecord.sid:
            boundSheetRecords.add(record);
            break;
        case BOFRecord.sid:
            BOFRecord br = (BOFRecord)record;
            if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
                // 如果有需要,则建立子工作薄
                if (workbookBuildingListener != null && stubWorkbook == null) {
                    stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
                }


                sheetIndex++;
                if (orderedBSRs == null) {
                    orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
                }
                sheetName = orderedBSRs[sheetIndex].getSheetname();
            }
            break;


        case SSTRecord.sid:
            sstRecord = (SSTRecord)record;
            break;


        case BlankRecord.sid:
            BlankRecord brec = (BlankRecord)record;
            thisRow = brec.getRow();
            thisColumn = brec.getColumn();
            thisStr = "";
            rowlist.add(thisColumn, thisStr);
            break;
        case BoolErrRecord.sid: // 单元格为布尔类型
            BoolErrRecord berec = (BoolErrRecord)record;
            thisRow = berec.getRow();
            thisColumn = berec.getColumn();
            thisStr = berec.getBooleanValue() + "";
            rowlist.add(thisColumn, thisStr);
            break;


        case FormulaRecord.sid: // 单元格为公式类型
            FormulaRecord frec = (FormulaRecord)record;
            thisRow = frec.getRow();
            thisColumn = frec.getColumn();
            if (outputFormulaValues) {
                if (Double.isNaN(frec.getValue())) {
                    // Formula result is a string
                    // This is stored in the next record
                    outputNextStringRecord = true;
                    nextRow = frec.getRow();
                    nextColumn = frec.getColumn();
                } else {
                    thisStr = formatListener.formatNumberDateCell(frec);
                }
            } else {
                thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
            }
            rowlist.add(thisColumn, thisStr);
            break;
        case StringRecord.sid:// 单元格中公式的字符串
            if (outputNextStringRecord) {
                // String for formula
                StringRecord srec = (StringRecord)record;
                thisStr = srec.getString();
                thisRow = nextRow;
                thisColumn = nextColumn;
                outputNextStringRecord = false;
            }
            break;
        case LabelRecord.sid:
            LabelRecord lrec = (LabelRecord)record;
            curRow = thisRow = lrec.getRow();
            thisColumn = lrec.getColumn();
            value = lrec.getValue().trim();
            value = value.equals("") ? " " : value;
            rowlist.add(thisColumn, value);
            break;
        case LabelSSTRecord.sid: // 单元格为字符串类型
            LabelSSTRecord lsrec = (LabelSSTRecord)record;
            curRow = thisRow = lsrec.getRow();
            thisColumn = lsrec.getColumn();
            if (sstRecord == null) {
                rowlist.add(thisColumn, " ");
            } else {
                value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
                value = value.equals("") ? " " : value;
                rowlist.add(thisColumn, value);
            }
            break;
        case NumberRecord.sid: // 单元格为数字类型
            NumberRecord numrec = (NumberRecord)record;
            curRow = thisRow = numrec.getRow();
            thisColumn = numrec.getColumn();
            value = formatListener.formatNumberDateCell(numrec).trim();
            value = value.equals("") ? " " : value;
            // 向容器加入列值
            rowlist.add(thisColumn, value);
            break;
        default:
            break;
        }


        // 遇到新行的操作
        if (thisRow != -1 && thisRow != lastRowNumber) {
            lastColumnNumber = -1;
        }


        // 空值的操作
        if (record instanceof MissingCellDummyRecord) {
            MissingCellDummyRecord mc = (MissingCellDummyRecord)record;
            curRow = thisRow = mc.getRow();
            thisColumn = mc.getColumn();
            rowlist.add(thisColumn, " ");
        }


        // 更新行和列的值
        if (thisRow > -1) {
            lastRowNumber = thisRow;
        }
        if (thisColumn > -1) {
            lastColumnNumber = thisColumn;
        }


        // 行结束时的操作
        if (record instanceof LastCellOfRowDummyRecord) {
            if (minColumns > 0) {
                // 列值重新置空
                if (lastColumnNumber == -1) {
                    lastColumnNumber = 0;
                }
            }
            lastColumnNumber = -1;
            // 每行结束时, 调用getRows() 方法
            rowReader.getRows(sheetIndex, curRow, rowlist);


            // 清空容器
            rowlist.clear();
        }
    }


}

------------------------------------------------------------------------------------------------------------------------------------------------

package com.cmos.ngesopcontrol.util.excelUtil;


import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;


import javax.xml.parsers.ParserConfigurationException;


import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;


/** A rudimentary XLSX -> CSV processor modeled on the
 * POI sample program XLS2CSVmra from the package
 * org.apache.poi.hssf.eventusermodel.examples.
 * As with the HSSF version, this tries to spot missing
 * rows and cells, and output empty entries for them.
 * <p/>
 * Data sheets are read using a SAX parser to keep the
 * memory footprint relatively small, so this should be
 * able to read enormous workbooks. The styles table and
 * the shared-string table must be kept in memory. The
 * standard POI styles table class is used, but a custom
 * (read-only) class is used for the shared string table
 * because the standard POI SharedStringsTable grows very
 * quickly with the number of unique strings.
 * <p/>
 * For a more advanced implementation of SAX event parsing
 * of XLSX files, see {@link XSSFEventBasedExcelExtractor}
 * and {@link XSSFSheetXMLHandler}. Note that for many cases,
 * it may be possible to simply use those with a custom
 * {@link SheetContentsHandler} and no SAX code needed of
 * your own! */
/**
 *
 * @author lhx
 *
 */
public class Excel2007Reader {
    /**
     * Uses the XSSF Event SAX helpers to do most of the work
     * of parsing the Sheet XML, and outputs the contents
     * as a (basic) CSV.
     */
    private List<String> rows = new ArrayList<String>();


    private final OPCPackage xlsxPackage;


    /**
     * Number of columns to read starting with leftmost
     */
    private int minColumns;


    private IRowReader rowReader;


    /**
     * Destination for data
     */
    private class SheetToCSV implements SheetContentsHandler {
        private String[] record;
        // private int minColumns;
        private int thisColumn = 0;


        public SheetToCSV() {
            super();
            // this.minColumns = minColumns;
        }


        @Override
        public void startRow(int rowNum) {
            //            record = new String[minColumns];
            //            System.out.print(rowNum + ":");
        }


        @Override
        public void endRow(int rowNum) {
            //            thisColumn = 0;
            //            System.out.println();
            rowReader.getRows(0, rowNum, rows);
            rows.clear();
            // System.out.println("**********************************");


        }


        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
            rows.add(formattedValue);
            //            record[thisColumn] = formattedValue;
            //            thisColumn++;
            //            System.out.print(formattedValue + "  ");


        }


        @Override
        public void headerFooter(String text, boolean isHeader, String tagName) {
            // Skip, no headers or footers in CSV
        }


    }


    /**
     * Creates a new XLSX -> CSV converter
     *
     * @param pkg        The XLSX package to process
     * @param output     The PrintStream to output the CSV to
     * @param minColumns The minimum number of columns to output, or -1 for no minimum
     */
    public Excel2007Reader(OPCPackage pkg, int minColumns, IRowReader rowReader) {
        xlsxPackage = pkg;
        this.minColumns = minColumns;
        this.rowReader = rowReader;
    }


    /**
     * Parses and shows the content of one sheet
     * using the specified styles and shared-strings tables.
     *
     * @param styles
     * @param strings
     * @param sheetInputStream
     */
    public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler,
        InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException {
        DataFormatter formatter = new DataFormatter();
        InputSource sheetSource = new InputSource(sheetInputStream);
        try {
            XMLReader sheetParser = SAXHelper.newXMLReader();
            ContentHandler handler = new XSSFSheetXMLHandler(styles, null, strings, sheetHandler, formatter, false);
            sheetParser.setContentHandler(handler);
            sheetParser.parse(sheetSource);
        } catch (ParserConfigurationException e) {
            throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
        }
    }


    /**
     * Initiates the processing of the XLS workbook file to CSV.
     *
     * @throws IOException
     * @throws OpenXML4JException
     * @throws ParserConfigurationException
     * @throws SAXException
     */
    public void process()
            throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(xlsxPackage);
        XSSFReader xssfReader = new XSSFReader(xlsxPackage);
        StylesTable styles = xssfReader.getStylesTable();
        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator)xssfReader.getSheetsData();
        // int index = 0;
        // while (iter.hasNext()) {
        InputStream stream = iter.next();
        // String sheetName = iter.getSheetName();
        // this.output.println();
        // this.output.println(sheetName + " [index=" + index + "]:");
        processSheet(styles, strings, new SheetToCSV(), stream);
        stream.close();
    }
}

-----------------------------------------------------------------------------------------------------------------------------

package com.cmos.ngesopcontrol.util.excelUtil;


import java.util.List;


public interface IRowReader {


    /**业务逻辑实现方法
     * @param sheetIndex
     * @param curRow
     * @param rowlist
     */
    public void getRows(int sheetIndex, int curRow, List<String> rowlist);
}

---------------------------------------------------------------------------------------------------------------------------------

package com.cmos.ngesopcontrol.util.excelUtil;


import java.util.List;


public class RowReader implements IRowReader {


    /* 业务逻辑实现方法
     * @see com.eprosun.util.excel.IRowReader#getRows(int, int, java.util.List) */
    @Override
    public void getRows(int sheetIndex, int curRow, List<String> rowlist) {
        // TODO Auto-generated method stub
        System.out.print(curRow + " ");
        for (int i = 0; i < rowlist.size(); i++) {
            System.out.print(rowlist.get(i) + " ");
        }
        System.out.println();
    }


}

-----------------------------------------------------------------------------------------------------------------------------------