采用POI技术进行大数据导入
来源:互联网 发布:win10引导 ubuntu 编辑:程序博客网 时间:2024/06/01 15:54
class ExcelImportUtil extends DefaultHandler {
private ExcelImportParse importExcel;
private StylesTable stylesTable;
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private List<String> rowlist = new ArrayList<>();
private HashMap<Integer, String> rowMap = new HashMap<Integer, String>();
private int curRow = 0;
// private int curCol = 0;
// 定义前一个元素和当前元素的位置,用来计算其中空的单元格数量,如A6和A8等
private String preRef = null, ref = null;
// 定义该文档一行最大的单元格数,用来补全一行最后可能缺失的单元格
private String maxRef = null;
private CellDataType nextDataType = CellDataType.SSTINDEX;
private final DataFormatter formatter = new DataFormatter();
private short formatIndex;
private String formatString;
private Logger logger = LoggerFactory.getLogger(getClass());
private int sheetIndex = -1;
private Long headerIs = null;
// 用一个enum表示单元格可能的数据类型
enum CellDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, DATE, NULL
}
public ExcelImportUtil(ExcelImportParse importExcel) {
this.importExcel = importExcel;
}
public void process(InputStream is,Long headerIs) throws Exception {
if (null == importExcel) {
throw new ValidationException(new ValidationError("importExcel","需要rowStrategy"));
}
this.headerIs=headerIs;
OPCPackage pkg = OPCPackage.open(is);
XSSFReader r = new XSSFReader(pkg);
stylesTable = r.getStylesTable();
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while (sheets.hasNext()) {
sheetIndex++;
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
importExcel.doService();
}
public XMLReader fetchSheetParser(SharedStringsTable sst)
throws SAXException {
XMLReader parser = XMLReaderFactory
.createXMLReader("org.apache.xerces.parsers.SAXParser");
this.sst = sst;
parser.setContentHandler(this);
return parser;
}
/**
* 解析一个element的开始时触发事件
* @param uri
* @param localName
* @param name
* @param attributes
* @throws SAXException
*/
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// c => cell
if (name.equals("c")) {
// 前一个单元格的位置
if (preRef == null) {
preRef = attributes.getValue("r");
} else {
preRef = ref;
}
// 当前单元格的位置
ref = attributes.getValue("r");
this.setNextDataType(attributes);
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
/**
* 根据element属性设置数据类型
*
* @param attributes
*/
public void setNextDataType(Attributes attributes) {
nextDataType = CellDataType.NUMBER;
formatIndex = -1;
formatString = null;
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
if ("b".equals(cellType)) {
nextDataType = CellDataType.BOOL;
} else if ("e".equals(cellType)) {
nextDataType = CellDataType.ERROR;
} else if ("inlineStr".equals(cellType)) {
nextDataType = CellDataType.INLINESTR;
} else if ("s".equals(cellType)) {
nextDataType = CellDataType.SSTINDEX;
} else if ("str".equals(cellType)) {
nextDataType = CellDataType.FORMULA;
}
if (cellStyleStr != null) {
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
formatIndex = style.getDataFormat();
formatString = style.getDataFormatString();
if ("m/d/yy" == formatString) {
nextDataType = CellDataType.DATE;
// full format is "yyyy-MM-dd hh:mm:ss.SSS";
formatString = "yyyy-MM-dd";
}
if (formatString == null) {
nextDataType = CellDataType.NULL;
formatString = BuiltinFormats.getBuiltinFormat(formatIndex);
}
}
}
private int getColNum(String ref) {
ref = ref.replaceAll("\\d+", "");
int num = 0;
int result = 0;
int length = ref.length();
for (int i = 0; i < length; i++) {
char ch = ref.charAt(length - i - 1);
num = (int) (ch - 'A' + 1);
num *= Math.pow(26, i);
result += num;
}
return result;
}
/**
* 解析一个element元素结束时触发事件
* @param uri
* @param localName
* @param name
* @throws SAXException
*/
public void endElement(String uri, String localName, String name)
throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if (nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
.toString();
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if (name.equals("v")) {
String value = this.getDataValue(lastContents.trim(), "");
rowMap.put(getColNum(ref), value);
} else {
// 如果标签名称为 row,这说明已到行尾,调用 optRows() 方法
if (name.equals("row")) {
// 默认第一行为表头,以该行单元格数目为最大数目
if (curRow == 0) {
maxRef = ref;
}
for (int i = 0; i <= getColNum(maxRef); i++) {
String val = rowMap.get(i + 1);
if (val == null) {
val = "";
}
rowlist.add(i, val);
}
//
try {
if (!rowlist.isEmpty()) {
importExcel.optRow(sheetIndex,curRow,rowlist,headerIs);
}
} catch (Exception e) {
logger.error(e.getMessage(), e);
throw new SAXException(e.getMessage());
}
// getRows(1, curRow, rowlist);
curRow++;
// 一行的末尾重置一些数据
rowlist.clear();
rowMap.clear();
preRef = null;
ref = null;
}
}
}
/**
* 根据数据类型获取数据
*
* @param value
* @param thisStr
* @return
*/
public String getDataValue(String value, String thisStr)
{
switch (nextDataType) {
// 这几个的顺序不能随便交换,交换了很可能会导致数据错误
case BOOL:
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR:
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA:
thisStr = '"' + value.toString() + '"';
break;
case INLINESTR:
XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
thisStr = rtsi.toString();
rtsi = null;
break;
case SSTINDEX:
String sstIndex = value.toString();
thisStr = value.toString();
break;
case NUMBER:
if (formatString != null) {
thisStr = formatter.formatRawCellContents(
Double.parseDouble(value), formatIndex, formatString)
.trim();
} else {
thisStr = value;
}
thisStr = thisStr.replace("_", "").trim();
break;
case DATE:
try {
thisStr = formatter.formatRawCellContents(
Double.parseDouble(value), formatIndex, formatString);
} catch (NumberFormatException ex) {
thisStr = value.toString();
}
thisStr = thisStr.replace(" ", "");
break;
default:
thisStr = "";
break;
}
return thisStr;
}
/**
* 获取element的文本数据
* @param ch
* @param start
* @param length
* @throws SAXException
*/
public void characters(char[] ch, int start, int length)
throws SAXException {
lastContents += new String(ch, start, length);
}
/**
* 计算两个单元格之间的单元格数目(同一行)
*
* @param ref
* @param preRef
* @return
*/
public int countNullCell(String ref, String preRef) {
// excel2007最大行数是1048576,最大列数是16384,最后一列列名是XFD
String xfd = ref.replaceAll("\\d+", "");
String xfd_1 = preRef.replaceAll("\\d+", "");
xfd = fillChar(xfd, 3, '@', true);
xfd_1 = fillChar(xfd_1, 3, '@', true);
char[] letter = xfd.toCharArray();
char[] letter_1 = xfd_1.toCharArray();
int res = (letter[0] - letter_1[0]) * 26 * 26
+ (letter[1] - letter_1[1]) * 26 + (letter[2] - letter_1[2]);
return res - 1;
}
/**
* 字符串的填充
*
* @param str
* @param len
* @param let
* @param isPre
* @return
*/
String fillChar(String str, int len, char let, boolean isPre) {
int len_1 = str.length();
if (len_1 < len) {
if (isPre) {
for (int i = 0; i < (len - len_1); i++) {
str = let + str;
}
} else {
for (int i = 0; i < (len - len_1); i++) {
str = str + let;
}
}
}
return str;
}
public void cleanData() {
this.sheetIndex = -1;
this.rowlist = new ArrayList<>();
this.curRow = 0;
}
}
阅读全文
0 0
- 采用POI技术进行大数据导入
- POI 导入大数据
- poi导入100万大数据
- 利用POI技术把excel数据导入mysql
- 采用MapReduce与Hadoop进行大数据分析
- POI海量数据导入
- POI数据导入Excel
- POI数据导入数据库
- POI-数据导入导出
- poi导入数据
- POI-数据导入导出
- poi大数据量excel导入
- POI 大数据导出
- 使用poi导入excel数据
- 使用Oracle SQL Loader进行大数据导入
- 在阿里云Rds上采用MySQLdump工具对数据进行导出导入
- 在阿里云Rds上采用MySQLdump工具对数据进行导出导入
- Java 使用poi导入excel,结合xml文件进行数据验证的例子
- 生成简单流水号
- rowid的组成
- C/C++之回调函数
- leetcode(2):Add Two Numbers
- 解决MyBatis进行数据库操作时因参数过长导致打印日志过长的问题
- 采用POI技术进行大数据导入
- java后台post请求
- kafka集群搭建和使用Java写kafka生产者消费者
- 仿淘宝搜索框之---ie9以下版本placeholder效果
- Tortoise SVN安装
- 中间件 rpc是什么?php中流行的中间件rpc框架有哪些
- mysql 关于DATE函数的扩展
- MyBatis 注解 pagehelper 分页
- 二叉搜索树的操作集