poi读取excel2007大文件

来源:互联网 发布:陕西干部网络教育官网 编辑:程序博客网 时间:2024/05/18 05:53

jar包:poi3.7、xbean、xercesImpl

package com.saxfs;import java.io.InputStream;import java.util.ArrayList;import java.util.Iterator;import java.util.List;import org.apache.poi.openxml4j.opc.OPCPackage;import org.apache.poi.xssf.eventusermodel.XSSFReader;import org.apache.poi.xssf.model.SharedStringsTable;import org.apache.poi.xssf.usermodel.XSSFRichTextString;import org.xml.sax.Attributes;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.XMLReader;import org.xml.sax.helpers.DefaultHandler;import org.xml.sax.helpers.XMLReaderFactory;public abstract class  XxlsAbstract extends DefaultHandler {/** * @param args */public static void main(String[] args) {// TODO Auto-generated method stub}private SharedStringsTable sst;      private String lastContents;      private boolean nextIsString;        private int sheetIndex = -1;      private List<String> rowlist = new ArrayList<String>();      private int curRow = 0;      private int curCol = 0;        //excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型  //  public abstract void optRows(int curRow, List<String> rowlist) throws SQLException ;            //excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型      public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws Exception;            //只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3      public void processOneSheet(String filename,int sheetId) throws Exception {          OPCPackage pkg = OPCPackage.open(filename);          XSSFReader r = new XSSFReader(pkg);          SharedStringsTable sst = r.getSharedStringsTable();                    XMLReader parser = fetchSheetParser(sst);            // rId2 found by processing the Workbook          // 根据 rId# 或 rSheet# 查找sheet          InputStream sheet2 = r.getSheet("rId"+sheetId);          sheetIndex++;          InputSource sheetSource = new InputSource(sheet2);          parser.parse(sheetSource);          sheet2.close();      }        /**      * 遍历 excel 文件      */      public void process(String filename) throws Exception {          OPCPackage pkg = OPCPackage.open(filename);          XSSFReader r = new XSSFReader(pkg);          SharedStringsTable sst = r.getSharedStringsTable();            XMLReader parser = fetchSheetParser(sst);            Iterator<InputStream> sheets = r.getSheetsData();          while (sheets.hasNext()) {              curRow = 0;              sheetIndex++;              InputStream sheet = sheets.next();              InputSource sheetSource = new InputSource(sheet);              parser.parse(sheetSource);              sheet.close();          }      }        public XMLReader fetchSheetParser(SharedStringsTable sst)              throws SAXException {          XMLReader parser = XMLReaderFactory                  .createXMLReader("org.apache.xerces.parsers.SAXParser");          this.sst = sst;          parser.setContentHandler(this);          return parser;      }        public void startElement(String uri, String localName, String name,              Attributes attributes) throws SAXException {          // c => 单元格          if (name.equals("c")) {              // 如果下一个元素是 SST 的索引,则将nextIsString标记为true              String cellType = attributes.getValue("t");              if (cellType != null && cellType.equals("s")) {                  nextIsString = true;              } else {                  nextIsString = false;              }          }          // 置空          lastContents = "";      }        public void endElement(String uri, String localName, String name)              throws SAXException {          // 根据SST的索引值的到单元格的真正要存储的字符串          // 这时characters()方法可能会被调用多次          if (nextIsString) {              try {                  int idx = Integer.parseInt(lastContents);                  lastContents = new XSSFRichTextString(sst.getEntryAt(idx))                          .toString();              } catch (Exception e) {                }          }            //t 是数据标签          // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符          if (name.equals("v")) {              String value = lastContents.trim();              value = value.equals("")?" ":value;              rowlist.add(curCol, value);              curCol++;          }else if(name.equals("t")){            String value = lastContents.trim();              value = value.equals("")?" ":value;              rowlist.add(curCol, value);              curCol++;         }else {              //如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法              if (name.equals("row")) {                  try {                      optRows(sheetIndex,curRow,rowlist);                  } catch (Exception e) {                      e.printStackTrace();                  }                  rowlist.clear();                  curRow++;                  curCol = 0;              }          }      }        public void characters(char[] ch, int start, int length)              throws SAXException {          //得到单元格内容的值          lastContents += new String(ch, start, length);      }  }


测试类如下:

package com.saxfs;import java.util.List;public class XxlsPrint extends XxlsAbstract {@Overridepublic void optRows(int sheetIndex, int curRow, List<String> rowlist)throws Exception {for (int i = 0; i < rowlist.size(); i++) {              System.out.print("'" + rowlist.get(i) + "',");          }          System.out.println();  }/** * @param args */public static void main(String[] args) {XxlsPrint howto = new XxlsPrint();          try {howto.processOneSheet("f:/dcs/cess.xlsx",3);//完整的代码//howto.process("f:/dcs/cess.xlsx");} catch (Exception e) {// TODO Auto-generated catch blocke.printStackTrace();}  }}


 

原创粉丝点击