Java Web 解决POI导入大批量Excel2007数据报内存溢出问题
来源:互联网 发布:小学生沉迷网络的案例 编辑:程序博客网 时间:2024/06/03 21:38
使用Java做Excel的导入一般使用 Apache POI 这个类库,其支持Excel2003和2007版本的数据导入和导出。
使用POI能够导出大数据保证内存不溢出的一个重要原因是SXSSFWorkbook生成的EXCEL为2007版本,修改EXCEL2007文件后缀为ZIP打开可以看到,每一个Sheet都是一个xml文件,单元格格式和单元格坐标均用标签表示。直接使用SXSSFWorkbook来到导出EXCEL本身就是POI为了大数据量导出而量身定制的,所以导出可以直接使用
SXSSFWorkbook方式。但是在导入大批量的数据时,通常会出现出现内存溢出问题,使用SXSSFWorkbook进行数据缓存和调整JVM内存等根本无法解决此问题,因为在用FileInputStream读取文件时就已经报内存溢出了。因为最近业务要在系统中导入千万行级别的数据,我测试了一下系统原本的代码导入50000多行,也就是10M左右就不行了,为了解决这个问题可以将excel的xlsx格式转换为易读取的csv格式进行读取。
import java.io.File;import java.io.IOException;import java.io.InputStream;import java.io.PrintStream;import java.text.SimpleDateFormat;import java.util.ArrayList;import java.util.Date;import java.util.List;import javax.xml.parsers.ParserConfigurationException;import javax.xml.parsers.SAXParser;import javax.xml.parsers.SAXParserFactory;import org.apache.poi.hssf.usermodel.HSSFDateUtil;import org.apache.poi.openxml4j.exceptions.OpenXML4JException;import org.apache.poi.openxml4j.opc.OPCPackage;import org.apache.poi.openxml4j.opc.PackageAccess;import org.apache.poi.ss.usermodel.BuiltinFormats;import org.apache.poi.ss.usermodel.DataFormatter;import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;import org.apache.poi.xssf.eventusermodel.XSSFReader;import org.apache.poi.xssf.model.StylesTable;import org.apache.poi.xssf.usermodel.XSSFCellStyle;import org.apache.poi.xssf.usermodel.XSSFRichTextString;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.XMLReader;import org.xml.sax.helpers.DefaultHandler;import org.xml.sax.Attributes; /** * CSV与XLSX格式的转换类 */public class XLSXCovertCSVReader { enum xssfDataType { BOOL,ERROR,FORMULA,INLINESTR,SSTINDEX,NUMBER } /** * 使用xssf_sax_API驱动解析Excel */ class MyXSSFSheetHandler extends DefaultHandler { /** * Table with styles */ private StylesTable stylesTable; /** * Table with unique strings */ private ReadOnlySharedStringsTable sharedStringsTable; /** * Destination for data */ private final PrintStream output; /** * 最小单元格列数 */ private final int minColumnCount; //Set when V start element is seen private boolean vIsOpen; //Set when cell start is seen; //used when cell close element is seen private xssfDataType nextDataType; //Used to format numeric cell values. private short formatIndex; private String formatString; private final DataFormatter formatter ; private int thisColumn = -1; //The last column printed to the output stream private int lastColumnNumber = -1; //Gathers characters as they are seen private StringBuffer value; private String[] record; private List<String[]> rows = new ArrayList<String[]>(); private boolean isCellNull = false; /** * * @param styles * @param strings * @param cols * @param target */ public MyXSSFSheetHandler(StylesTable styles, ReadOnlySharedStringsTable strings, int cols, PrintStream target) { this.stylesTable = styles; this.sharedStringsTable = strings; this.minColumnCount = cols; this.output = target; this.value = new StringBuffer(); this.nextDataType = xssfDataType.NUMBER; this.formatter = new DataFormatter(); record = new String[this.minColumnCount]; rows.clear(); } /* * (non-Javadoc) * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) */ public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { if ("inlineStr".equals(name) || "v".equals(name)) { vIsOpen = true; //Clear contents cache value.setLength(0); } // c => cell else if ("c".equals(name)) { //Get the cell reference String r = attributes.getValue("r"); int firstDigit = -1; for (int c = 0; c < r.length(); ++c) { if (Character.isDigit(r.charAt(c))) { firstDigit = c; break; } } thisColumn = nameToColumn(r.substring(0, firstDigit)); //Set up defaults. this.nextDataType = xssfDataType.NUMBER; this.formatIndex = -1; this.formatString = null; String cellType = attributes.getValue("t"); String cellStyleStr = attributes.getValue("s"); if("b".equals(cellType)) nextDataType = xssfDataType.BOOL; else if ("e".equals(cellType)) nextDataType = xssfDataType.ERROR; else if ("inlineStr".equals(cellType)) nextDataType = xssfDataType.INLINESTR; else if("s".equals(cellType)) nextDataType = xssfDataType.SSTINDEX; else if("str".equals(cellType)) nextDataType = xssfDataType.FORMULA; else if(cellStyleStr != null) { // It's a number, but almost certainly one // with a special style or format int styleIndex = Integer.parseInt(cellStyleStr); XSSFCellStyle style = stylesTable.getStyleAt(styleIndex); this.formatIndex = style.getDataFormat(); this.formatString = style.getDataFormatString(); if (this.formatString == null) this.formatString = BuiltinFormats .getBuiltinFormat(this.formatIndex); } } } public void endElement(String uri, String localName, String name) throws SAXException { String thisStr = null; // v => contents of a cell if ("v".equals(name)) { // Process the value contents as required. // Do now, as characters() may be called more than once switch (nextDataType) { case BOOL: char first = value.charAt(0); thisStr = first == '0' ? "FALSE" : "TRUE"; break; case ERROR: thisStr = "\"ERROR:" + value.toString() + '"'; break; case FORMULA: // A formula could result in a string value, // so always add double-quote characters. thisStr = '"' + value.toString() + '"'; break; case INLINESTR: // TODO: have seen an example of this, so it's untested. XSSFRichTextString rtsi = new XSSFRichTextString( value.toString()); thisStr = '"' + rtsi.toString() + '"'; break; case SSTINDEX: String sstIndex = value.toString(); try { int idx = Integer.parseInt(sstIndex); XSSFRichTextString rtss = new XSSFRichTextString( sharedStringsTable.getEntryAt(idx)); thisStr = rtss.toString(); } catch (NumberFormatException ex) { output.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString()); } break; case NUMBER: String n = value.toString(); if (HSSFDateUtil.isADateFormat(this.formatIndex, n)) { Double d = Double.parseDouble(n); Date date=HSSFDateUtil.getJavaDate(d); thisStr=formateDateToString(date); } else if (this.formatString != null) thisStr = formatter.formatRawCellContents( Double.parseDouble(n), this.formatIndex, this.formatString); else thisStr = n; break; default: thisStr = "(TODO: Unexpected type: " + nextDataType + ")"; break; } // Output after we've seen the string contents // Emit commas for any fields that were missing on this row if (lastColumnNumber == -1) { lastColumnNumber = 0; } if (thisStr == null || "".equals(isCellNull)) { isCellNull = true; } record[thisColumn] = thisStr; // Update column if (thisColumn > -1) lastColumnNumber = thisColumn; } else if ("row".equals(name)) { // Print out any missing commas if needed if (minColumns > 0) { // Columns are 0 based if (lastColumnNumber == -1) { lastColumnNumber = 0; } if (isCellNull == false && record[0] != null && record[1] != null) { rows.add(record.clone()); isCellNull = false; for (int i = 0; i < record.length; i++) { record[i] = null; } } } lastColumnNumber = -1; } } public List<String[]> getRows() { return rows; } public void setRows(List<String[]> rows) { this.rows = rows; } /** * Captures characters only if a suitable element is open. Originally * was just "v"; extended for inlineStr also. */ public void characters(char[] ch, int start, int length) throws SAXException { if (vIsOpen) value.append(ch, start, length); } private int nameToColumn(String name) { int column = -1; for(int i = 0; i < name.length(); ++i) { int c = name.charAt(i); column = (column + 1) *26 + c - 'A'; } return column; } private String formateDateToString(Date date) { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); return sdf.format(date); } } private OPCPackage xlsxPackage; private int minColumns; private PrintStream output; /** * Creates a new XLSX -> CSV converter * @param pkg * @param output * @param sheetName * @param minColumns */ public XLSXCovertCSVReader(OPCPackage pkg, PrintStream output, int minColumns) { this.xlsxPackage = pkg; this.output = output; this.minColumns = minColumns; } /** * * @param styles * @param strings * @param sheetInputStream * @return */ public List<String[]> processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws ParserConfigurationException, SAXException, IOException{ InputSource sheetSource = new InputSource(sheetInputStream); SAXParserFactory saxFactory = SAXParserFactory.newInstance(); SAXParser saxParser = saxFactory.newSAXParser(); XMLReader sheetParser = saxParser.getXMLReader(); MyXSSFSheetHandler handler = new MyXSSFSheetHandler(styles, strings, this.minColumns, this.output); sheetParser.setContentHandler(handler); sheetParser.parse(sheetSource); return handler.getRows(); } public List<String[]> process() throws IOException, SAXException, OpenXML4JException, ParserConfigurationException{ ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable( this.xlsxPackage); XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); List<String[]> list = null; StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader .getSheetsData(); int index = 0; while (iter.hasNext()) { InputStream stream = iter.next();// String sheetNameTemp = iter.getSheetName();// if(this.sheetName.equals(sheetNameTemp)) { //多个sheet表格的数据拼接成一个list if(null != list){ list.addAll(processSheet(styles, strings, stream)); stream.close(); ++index; } else { list = processSheet(styles, strings, stream); stream.close(); ++index; }// } } return list; } public static List<String[]> readerExcel(File file, int minColumns) throws IOException, SAXException, OpenXML4JException, ParserConfigurationException {// File file = new File(path); OPCPackage p = OPCPackage.open(file, PackageAccess.READ); XLSXCovertCSVReader xlsx2csv = new XLSXCovertCSVReader(p, System.out, minColumns); List<String[]> list = xlsx2csv.process(); p.close(); return list; }
调用转换类读取Excel当中的数据存储到List容器中
//得到表格中的数据 List<String[]> list = XLSXCovertCSVReader.readerExcel(file,11); //第二行开始为数据行 int count = 0; for(int j = 1; j<list.size(); j++){ excelData = setField1(excelData, list.get(j)); if (null != excelData) { ++count; //...赋值给数据领域的JavaBean... entityList.add(数据对象); //采用计数器,读一万行存储一次,这个地方另起线程存储比较好 if(0 == count % 10000){ saveRecords(entityList); entityList.clear(); } } }
菜鸟刚上路,大神请绕道!
0 0
- Java Web 解决POI导入大批量Excel2007数据报内存溢出问题
- POI : 解决大批量数据导出Excel产生内存溢出的方案
- POI实现大数据EXCLE导入导出,解决内存溢出问题
- JAVA解决大批量导出数据到excel产生内存溢出的方案
- 解决大批量数据导出Excel产生内存溢出的方案
- 解决大批量数据导出Excel产生内存溢出的方案
- 解决大批量数据导出Excel产生内存溢出的方案
- 解决大批量数据导出Excel产生内存溢出的方案
- 解决大批量数据导出Excel产生内存溢出的方案
- 解决大批量数据导出Excel产生内存溢出的方案
- 解决大批量数据导出Excel产生内存溢出的方案
- poi解决内存消耗过大溢出问题
- java海量数据导出xls分页解决报内存溢出问题
- java+poi+excel2007批量修改/导入用户信息
- poi导入excel2007没问题导入2003报错IncompatibleClassChangeError
- 解决Eclipse报内存溢出问题
- mysql 导入数据时内存溢出问题
- 解决POI大数据导出Excel内存溢出、应用假死
- SublimeText 相关
- 用冒泡排序模拟实现qsort
- 回归故乡发展
- LeetCode | 506. Relative Ranks
- LeetCode 412. Fizz Buzz
- Java Web 解决POI导入大批量Excel2007数据报内存溢出问题
- 字符串替换
- Storm原理剖析
- 为树莓派添加 DS1302 实时时钟(硬件时钟)
- django进阶-3
- poj 2251 bfs基础
- Android—App—必备开发组件—调试工具篇—Stetho[配合OkHttp框架使用]
- grep
- nginx 对某个目录或整个网站进行登录认证的方法