JAVA解析PDF、WORD、EXCEL文档

来源：互联网发布：淘宝上的旗舰店可信吗编辑：程序博客网时间：2024/06/05 12:05

java解析pdf、doc、docx、xls、xlsx格式文档

读取PDF文件jar引用

<dependency><groupId>org.apache.pdfbox</groupId><artifactId>pdfbox</artifactId><version>1.8.13</version></dependency>

读取WORD文件jar引用

<dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>3.16-beta1</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>3.16-beta1</version></dependency>

读取EXCEL文件jar引用

<!-- EXCEL --><dependency><groupId>org.apache.xmlbeans</groupId><artifactId>xmlbeans</artifactId><version>2.6.0</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>3.16-beta1</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml-schemas</artifactId><version>3.16-beta1</version></dependency>

读取WORD文件方法

/** *  * @Title: getTextFromWord * @Description: 读取word * @param filePath *            文件路径 * @return: String 读出的Word的内容 */public static String getTextFromWord(String filePath) {String result = null;File file = new File(filePath);FileInputStream fis = null;try {fis = new FileInputStream(file);@SuppressWarnings("resource")WordExtractor wordExtractor = new WordExtractor(fis);result = wordExtractor.getText();} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {if (fis != null) {try {fis.close();} catch (IOException e) {e.printStackTrace();}}}return result;}

读取PDF文件方法

/** *  * @Title: getTextFromPdf * @Description: 读取pdf文件内容 * @param filePath * @return: 读出的pdf的内容 */public static String getTextFromPdf(String filePath) {String result = null;FileInputStream is = null;PDDocument document = null;try {is = new FileInputStream(filePath);PDFParser parser = new PDFParser(is);parser.parse();document = parser.getPDDocument();PDFTextStripper stripper = new PDFTextStripper();result = stripper.getText(document);} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {if (is != null) {try {is.close();} catch (IOException e) {e.printStackTrace();}}if (document != null) {try {document.close();} catch (IOException e) {e.printStackTrace();}}}return result;}

读取EXCEL，xls格式

/** * @param filePath *            文件路径 * @return 读出的Excel的内容 */@SuppressWarnings({"resource", "deprecation"})public static String getTextFromExcel(String filePath) {StringBuffer buff = new StringBuffer();try {// 创建对Excel工作簿文件的引用HSSFWorkbook wb = new HSSFWorkbook(new FileInputStream(filePath));// 创建对工作表的引用。for (int numSheets = 0; numSheets < wb.getNumberOfSheets(); numSheets++) {if (null != wb.getSheetAt(numSheets)) {HSSFSheet aSheet = wb.getSheetAt(numSheets);// 获得一个sheetfor (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet.getLastRowNum(); rowNumOfSheet++) {if (null != aSheet.getRow(rowNumOfSheet)) {HSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 获得一个行for (int cellNumOfRow = 0; cellNumOfRow <= aRow.getLastCellNum(); cellNumOfRow++) {if (null != aRow.getCell(cellNumOfRow)) {HSSFCell aCell = aRow.getCell(cellNumOfRow);// 获得列值switch (aCell.getCellType()) {case HSSFCell.CELL_TYPE_FORMULA :break;case HSSFCell.CELL_TYPE_NUMERIC :buff.append(aCell.getNumericCellValue()).append('\t');break;case HSSFCell.CELL_TYPE_STRING :buff.append(aCell.getStringCellValue()).append('\t');break;}}}buff.append('\n');}}}}} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}return buff.toString();}

读取EXCEL，xlxs格式

@SuppressWarnings("deprecation")public static String getTextFromExcel2007(String filePath) {StringBuffer buff = new StringBuffer();try {// 创建对Excel工作簿文件的引用@SuppressWarnings("resource")XSSFWorkbook wb = new XSSFWorkbook(new FileInputStream(filePath));// 创建对工作表的引用。for (int numSheets = 0; numSheets < wb.getNumberOfSheets(); numSheets++) {if (null != wb.getSheetAt(numSheets)) {XSSFSheet aSheet = wb.getSheetAt(numSheets);// 获得一个sheetfor (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet.getLastRowNum(); rowNumOfSheet++) {if (null != aSheet.getRow(rowNumOfSheet)) {XSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 获得一个行for (int cellNumOfRow = 0; cellNumOfRow <= aRow.getLastCellNum(); cellNumOfRow++) {if (null != aRow.getCell(cellNumOfRow)) {XSSFCell aCell = aRow.getCell(cellNumOfRow);// 获得列值switch (aCell.getCellType()) {case HSSFCell.CELL_TYPE_FORMULA :break;case HSSFCell.CELL_TYPE_NUMERIC :buff.append(aCell.getNumericCellValue()).append('\t');break;case HSSFCell.CELL_TYPE_STRING :buff.append(aCell.getStringCellValue()).append('\t');break;}}}buff.append('\n');}}}}} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}return buff.toString();}

1 0