poi事件驱动模式导入读取大批量excel2003/2007(减少内存消耗)
来源:互联网 发布:中建七局网络协同平台 编辑:程序博客网 时间:2024/06/05 19:43
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.util.IOUtils;
/**
*取得文件流,调用读取方法
* @author lhx
*
*/
public class FileImpotUtil {
public static void getRecords(String filePath, String titleLength) throws Exception {
URL urlFile;
try {
urlFile = new URL(filePath);
} catch (MalformedURLException e3) {
// logger.error(e3.getMessage(), e3);
throw new Exception("获取下载文件地址失败!");
}
InputStream inp = null;
URLConnection conn = null;
try {
conn = urlFile.openConnection();
conn.setConnectTimeout(3000);
conn.setReadTimeout(3 * 60 * 1000);
inp = conn.getInputStream();
inp = new PushbackInputStream(inp, 8);
}
// Ensure that there is at least some data there
byte[] header8 = IOUtils.peekFirst8Bytes(inp);
IRowReader reader = new RowReader();
if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
Excel2003Reader excel03 = new Excel2003Reader();
excel03.setRowReader(reader);
excel03.process(inp);
// 处理excel2007文件
} else if (POIXMLDocument.hasOOXMLHeader(inp)) {
OPCPackage pag = OPCPackage.open(inp);
Excel2007Reader excel07 = new Excel2007Reader(pag, Integer.parseInt(titleLength), reader);
excel07.process();
pag.close();
}
} catch (Exception e) {
throw new Exception("获取下载文件地址失败!");
} finally {
if (inp != null) {
inp.close();
}
}
}
}
------------------------------------------------------------------------------------------------------------------------------------
package com.cmos.ngesopcontrol.util.excelUtil;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* 抽象Excel2003读取器,通过实现HSSFListener监听器,采用事件驱动模式解析excel2003
* 中的内容,遇到特定事件才会触发,大大减少了内存的使用。
* 注意Excel2003Reader类中引用的Record类在hssf包下
*/
public class Excel2003Reader implements HSSFListener {
private int minColumns = -1;
private POIFSFileSystem fs;
private int lastRowNumber;
private int lastColumnNumber;
/** Should we output the formula, or the value it has? */
private boolean outputFormulaValues = true;
/** For parsing Formulas */
private SheetRecordCollectingListener workbookBuildingListener;
// excel2003工作薄
private HSSFWorkbook stubWorkbook;
// Records we pick up as we process
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
// 表索引
private int sheetIndex = -1;
private BoundSheetRecord[] orderedBSRs;
@SuppressWarnings("unchecked")
private ArrayList boundSheetRecords = new ArrayList();
// For handling formulas with string results
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;
// 当前行
private int curRow = 0;
// 存储行记录的容器
private List<String> rowlist = new ArrayList<String>();;
@SuppressWarnings("unused")
private String sheetName;
private IRowReader rowReader;
public void setRowReader(IRowReader rowReader) {
this.rowReader = rowReader;
}
/**
* 遍历excel下所有的sheet
* @throws IOException
*/
public void process(InputStream inp) throws IOException {
fs = new POIFSFileSystem(inp);
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
}
/**
* HSSFListener 监听方法,处理 Record
*/
@Override
@SuppressWarnings("unchecked")
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
String value = null;
switch (record.getSid()) {
case BoundSheetRecord.sid:
boundSheetRecords.add(record);
break;
case BOFRecord.sid:
BOFRecord br = (BOFRecord)record;
if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
// 如果有需要,则建立子工作薄
if (workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
}
sheetIndex++;
if (orderedBSRs == null) {
orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
}
sheetName = orderedBSRs[sheetIndex].getSheetname();
}
break;
case SSTRecord.sid:
sstRecord = (SSTRecord)record;
break;
case BlankRecord.sid:
BlankRecord brec = (BlankRecord)record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "";
rowlist.add(thisColumn, thisStr);
break;
case BoolErrRecord.sid: // 单元格为布尔类型
BoolErrRecord berec = (BoolErrRecord)record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = berec.getBooleanValue() + "";
rowlist.add(thisColumn, thisStr);
break;
case FormulaRecord.sid: // 单元格为公式类型
FormulaRecord frec = (FormulaRecord)record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if (outputFormulaValues) {
if (Double.isNaN(frec.getValue())) {
// Formula result is a string
// This is stored in the next record
outputNextStringRecord = true;
nextRow = frec.getRow();
nextColumn = frec.getColumn();
} else {
thisStr = formatListener.formatNumberDateCell(frec);
}
} else {
thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
}
rowlist.add(thisColumn, thisStr);
break;
case StringRecord.sid:// 单元格中公式的字符串
if (outputNextStringRecord) {
// String for formula
StringRecord srec = (StringRecord)record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord)record;
curRow = thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
value = lrec.getValue().trim();
value = value.equals("") ? " " : value;
rowlist.add(thisColumn, value);
break;
case LabelSSTRecord.sid: // 单元格为字符串类型
LabelSSTRecord lsrec = (LabelSSTRecord)record;
curRow = thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord == null) {
rowlist.add(thisColumn, " ");
} else {
value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
value = value.equals("") ? " " : value;
rowlist.add(thisColumn, value);
}
break;
case NumberRecord.sid: // 单元格为数字类型
NumberRecord numrec = (NumberRecord)record;
curRow = thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
value = formatListener.formatNumberDateCell(numrec).trim();
value = value.equals("") ? " " : value;
// 向容器加入列值
rowlist.add(thisColumn, value);
break;
default:
break;
}
// 遇到新行的操作
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
// 空值的操作
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord)record;
curRow = thisRow = mc.getRow();
thisColumn = mc.getColumn();
rowlist.add(thisColumn, " ");
}
// 更新行和列的值
if (thisRow > -1) {
lastRowNumber = thisRow;
}
if (thisColumn > -1) {
lastColumnNumber = thisColumn;
}
// 行结束时的操作
if (record instanceof LastCellOfRowDummyRecord) {
if (minColumns > 0) {
// 列值重新置空
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
}
lastColumnNumber = -1;
// 每行结束时, 调用getRows() 方法
rowReader.getRows(sheetIndex, curRow, rowlist);
// 清空容器
rowlist.clear();
}
}
}
------------------------------------------------------------------------------------------------------------------------------------------------
package com.cmos.ngesopcontrol.util.excelUtil;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/** A rudimentary XLSX -> CSV processor modeled on the
* POI sample program XLS2CSVmra from the package
* org.apache.poi.hssf.eventusermodel.examples.
* As with the HSSF version, this tries to spot missing
* rows and cells, and output empty entries for them.
* <p/>
* Data sheets are read using a SAX parser to keep the
* memory footprint relatively small, so this should be
* able to read enormous workbooks. The styles table and
* the shared-string table must be kept in memory. The
* standard POI styles table class is used, but a custom
* (read-only) class is used for the shared string table
* because the standard POI SharedStringsTable grows very
* quickly with the number of unique strings.
* <p/>
* For a more advanced implementation of SAX event parsing
* of XLSX files, see {@link XSSFEventBasedExcelExtractor}
* and {@link XSSFSheetXMLHandler}. Note that for many cases,
* it may be possible to simply use those with a custom
* {@link SheetContentsHandler} and no SAX code needed of
* your own! */
/**
*
* @author lhx
*
*/
public class Excel2007Reader {
/**
* Uses the XSSF Event SAX helpers to do most of the work
* of parsing the Sheet XML, and outputs the contents
* as a (basic) CSV.
*/
private List<String> rows = new ArrayList<String>();
private final OPCPackage xlsxPackage;
/**
* Number of columns to read starting with leftmost
*/
private int minColumns;
private IRowReader rowReader;
/**
* Destination for data
*/
private class SheetToCSV implements SheetContentsHandler {
private String[] record;
// private int minColumns;
private int thisColumn = 0;
public SheetToCSV() {
super();
// this.minColumns = minColumns;
}
@Override
public void startRow(int rowNum) {
// record = new String[minColumns];
// System.out.print(rowNum + ":");
}
@Override
public void endRow(int rowNum) {
// thisColumn = 0;
// System.out.println();
rowReader.getRows(0, rowNum, rows);
rows.clear();
// System.out.println("**********************************");
}
@Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
rows.add(formattedValue);
// record[thisColumn] = formattedValue;
// thisColumn++;
// System.out.print(formattedValue + " ");
}
@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
// Skip, no headers or footers in CSV
}
}
/**
* Creates a new XLSX -> CSV converter
*
* @param pkg The XLSX package to process
* @param output The PrintStream to output the CSV to
* @param minColumns The minimum number of columns to output, or -1 for no minimum
*/
public Excel2007Reader(OPCPackage pkg, int minColumns, IRowReader rowReader) {
xlsxPackage = pkg;
this.minColumns = minColumns;
this.rowReader = rowReader;
}
/**
* Parses and shows the content of one sheet
* using the specified styles and shared-strings tables.
*
* @param styles
* @param strings
* @param sheetInputStream
*/
public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler,
InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch (ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
/**
* Initiates the processing of the XLS workbook file to CSV.
*
* @throws IOException
* @throws OpenXML4JException
* @throws ParserConfigurationException
* @throws SAXException
*/
public void process()
throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(xlsxPackage);
XSSFReader xssfReader = new XSSFReader(xlsxPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator)xssfReader.getSheetsData();
// int index = 0;
// while (iter.hasNext()) {
InputStream stream = iter.next();
// String sheetName = iter.getSheetName();
// this.output.println();
// this.output.println(sheetName + " [index=" + index + "]:");
processSheet(styles, strings, new SheetToCSV(), stream);
stream.close();
}
}
-----------------------------------------------------------------------------------------------------------------------------
package com.cmos.ngesopcontrol.util.excelUtil;
import java.util.List;
public interface IRowReader {
/**业务逻辑实现方法
* @param sheetIndex
* @param curRow
* @param rowlist
*/
public void getRows(int sheetIndex, int curRow, List<String> rowlist);
}
---------------------------------------------------------------------------------------------------------------------------------
package com.cmos.ngesopcontrol.util.excelUtil;
import java.util.List;
public class RowReader implements IRowReader {
/* 业务逻辑实现方法
* @see com.eprosun.util.excel.IRowReader#getRows(int, int, java.util.List) */
@Override
public void getRows(int sheetIndex, int curRow, List<String> rowlist) {
// TODO Auto-generated method stub
System.out.print(curRow + " ");
for (int i = 0; i < rowlist.size(); i++) {
System.out.print(rowlist.get(i) + " ");
}
System.out.println();
}
}
-----------------------------------------------------------------------------------------------------------------------------------
- poi事件驱动模式导入读取大批量excel2003/2007(减少内存消耗)
- poi读取excel2003 2007
- POI读取excel2003兼容2007
- 使用POI导入导出Excel2003、2007示例
- 使用POI导入导出Excel2003、2007示例
- poi读取excel2003和2007问题
- POI 读取Excel2003、2007的一个Demo
- 关于POI读取EXCEL2003----2007问题
- Excel2003、2007 -- 读写大批量数据
- POI事件模式读取Excel 2007(三)
- POI 导入EXCEL2003 和EXCEL2007
- poi读取和写入excel2003
- Java 操作 Excel (读取Excel2003 2007,Poi实现)
- 知识总结-Java 操作 Excel (读取Excel2003 2007,Poi实现)
- Java 操作 Excel (读取Excel2003 2007,Poi实现)
- Java 操作 Excel (读取Excel2003 2007,Poi实现)
- Java 操作 Excel (读取Excel2003 2007,Poi实现)
- POI excel2003 2007
- LeetCode日常刷题(3)
- C语言中比较字符串大小
- Centos7.3_64位系统下yum命令安装MySQL5.7
- 线程(7)--wait()方法、notify()方法、notifyAll()方法
- 4-apache kylin企业级开源大数据分析平台
- poi事件驱动模式导入读取大批量excel2003/2007(减少内存消耗)
- 为什么“15。。。”会导致微信ANR?
- 腾讯云_tomcat_SSL证书安装
- java源码之Arraylist
- 访问Google的两种方法
- Jmeter学习—001—JDBC测试计划—连接Oracle
- 验证用户名只包含字母,中文
- C++封装Mysql数据库
- 通用权限管理设计 之 数据权限