Java实现读取PDF
来源:互联网 发布:af淘宝旗舰店真假 编辑:程序博客网 时间:2024/06/07 04:00
package com.lss.common.pdf;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.List;
import org.apache.log4j.Logger;
import org.apache.pdfbox.TextToPDF;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.interactive.form.PDTextbox;
import org.apache.pdfbox.util.PDFTextStripper;
public class PDFUtils {
private final static Logger logger = Logger.getLogger(PDFUtils.class);
public static void main(String[] args) throws IOException {
TextToPDF pdfCreator = new TextToPDF();
StringReader reader = new StringReader("1");
PDDocument pdfDoc = pdfCreator.createPDFFromText(reader);
reader.close();
pdfDoc.close();
PDPage page=new PDPage();
pdfDoc.addPage(page);
// In order for the PDF document to be openable by Adobe Reader, it
// needs
// to have some pages in it. So we'll check that.
PDDocumentCatalog docCatalog = pdfDoc.getDocumentCatalog();
List allPages = docCatalog.getAllPages();
System.out.println(allPages.size());
}
/**
* 读取PDF文件
*
* @param path
* @return
* @throws Exception
*/
public String readPDFFile(String path) {
try {
StringBuffer content = new StringBuffer();
FileInputStream fis = new FileInputStream(path);
PDFParser p = new PDFParser(fis);
p.parse();
PDFTextStripper ts = new PDFTextStripper();
content.append(ts.getText(p.getPDDocument()));
fis.close();
return content.toString();
} catch (Exception ex) {
logger.error(ex);
}
return null;
}
public String readPDFFile(File file) throws Exception {
StringBuffer content = new StringBuffer();
FileInputStream fis = null;
PDDocument pdfDocument = null;
StringWriter writer = new StringWriter();
PDFTextStripper stripper = new PDFTextStripper();
try {
fis = new FileInputStream(file);
pdfDocument = PDDocument.load(fis);
stripper.writeText(pdfDocument, writer);
content.append(writer.getBuffer().toString());
} catch (IOException e) {
logger.error(e);
} finally {
if (writer != null) {
writer.close();
writer = null;
}
if (fis != null) {
fis.close();
file = null;
}
if (pdfDocument != null) {
COSDocument cos = pdfDocument.getDocument();
cos.close();
pdfDocument.close();
}
}
return content.toString();
}
}
- Java实现读取PDF
- java读取pdf文件
- java 读取pdf
- JAVA读取PDF文件
- JAVA读取PDF文件
- java读取pdf总结
- java读取pdf文件
- JAVA读取PDF信息
- Java读取pdf中文
- java读取pdf
- Java实现读取pdf文件内容(how to read pdf in java)
- java读取生成PDF,iText操作PDF
- java读取pdf文件内容
- java读取pdf文件内容
- java读取pdf文件内容
- java读取pdf文件内容
- java读取pdf文件内容
- java读取PDF的方法
- SQL语句
- Java实现大文件分割
- linux c/c++ 注意
- pythonPkg_xml.etree.ElementTree
- 使用Amoeba for mysql实现mysql读写分离
- Java实现读取PDF
- C++ 0x 之 Lambda:贤妻与娇娃,你娶谁当老婆?听 FP 如何点化 C++
- Apache与Tomcat集群配置
- 有关Linux Shell的学习笔记
- Java实现文件压缩 使用GZIP和Zip方式
- Eclipsdebug的简单用法
- 定义Window进入和退出效果
- Java Base64编码及解码
- ListView的小技巧,小知识