用Java简单的读取pdf文件中的数据
来源:互联网 发布:剑三体服激活码 淘宝 编辑:程序博客网 时间:2024/05/29 03:23
- import java.io.File;
- import java.io.FileOutputStream;
- import java.io.OutputStreamWriter;
- import java.io.Writer;
- import java.net.MalformedURLException;
- import java.net.URL;
- import org.pdfbox.pdmodel.PDDocument;
- import org.pdfbox.util.PDFTextStripper;
-
- public class PdfReader {
- public void readFdf(String file) throws Exception {
-
- boolean sort = false;
-
- String pdfFile = file;
-
- String textFile = null;
-
- String encoding = "UTF-8";
-
- int startPage = 1;
-
- int endPage = Integer.MAX_VALUE;
-
- Writer output = null;
-
- PDDocument document = null;
- try {
- try {
-
- URL url = new URL(pdfFile);
-
- document = PDDocument.load(pdfFile);
-
- String fileName = url.getFile();
-
- if (fileName.length() > 4) {
- File outputFile = new File(fileName.substring(0, fileName.length() - 4) + ".txt");
- textFile = outputFile.getName();
- }
- } catch (MalformedURLException e) {
-
-
- document = PDDocument.load(pdfFile);
- if (pdfFile.length() > 4) {
- textFile = pdfFile.substring(0, pdfFile.length() - 4) + ".txt";
- }
- }
-
- output = new OutputStreamWriter(new FileOutputStream(textFile), encoding);
-
- PDFTextStripper stripper = null;
- stripper = new PDFTextStripper();
-
- stripper.setSortByPosition(sort);
-
- stripper.setStartPage(startPage);
-
- stripper.setEndPage(endPage);
-
- stripper.writeText(document, output);
- } finally {
- if (output != null) {
-
- output.close();
- }
- if (document != null) {
-
- document.close();
- }
- }
- }
-
-
-
-
- public static void main(String[] args) {
-
- PdfReader pdfReader = new PdfReader();
- try {
-
- pdfReader.readFdf("E://SpringGuide.pdf");
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- }