Java实现读取PDF

来源:互联网 发布:af淘宝旗舰店真假 编辑:程序博客网 时间:2024/06/07 04:00

package com.lss.common.pdf;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.List;

import org.apache.log4j.Logger;
import org.apache.pdfbox.TextToPDF;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.interactive.form.PDTextbox;
import org.apache.pdfbox.util.PDFTextStripper;

public class PDFUtils {
 private final static Logger logger = Logger.getLogger(PDFUtils.class);

 public static void main(String[] args) throws IOException {

  TextToPDF pdfCreator = new TextToPDF();
  StringReader reader = new StringReader("1");
  PDDocument pdfDoc = pdfCreator.createPDFFromText(reader);
  reader.close();
  pdfDoc.close();
  PDPage page=new PDPage();
  pdfDoc.addPage(page);

  // In order for the PDF document to be openable by Adobe Reader, it
  // needs
  // to have some pages in it. So we'll check that.
  PDDocumentCatalog docCatalog = pdfDoc.getDocumentCatalog();
  List allPages = docCatalog.getAllPages();
  System.out.println(allPages.size());
 }

 /**
  * 读取PDF文件
  *
  * @param path
  * @return
  * @throws Exception
  */
 public String readPDFFile(String path) {
  try {
   StringBuffer content = new StringBuffer();
   FileInputStream fis = new FileInputStream(path);
   PDFParser p = new PDFParser(fis);
   p.parse();
   PDFTextStripper ts = new PDFTextStripper();
   content.append(ts.getText(p.getPDDocument()));
   fis.close();
   return content.toString();
  } catch (Exception ex) {
   logger.error(ex);
  }
  return null;
 }

 public String readPDFFile(File file) throws Exception {
  StringBuffer content = new StringBuffer();
  FileInputStream fis = null;
  PDDocument pdfDocument = null;
  StringWriter writer = new StringWriter();
  PDFTextStripper stripper = new PDFTextStripper();
  try {
   fis = new FileInputStream(file);
   pdfDocument = PDDocument.load(fis);
   stripper.writeText(pdfDocument, writer);
   content.append(writer.getBuffer().toString());
  } catch (IOException e) {
   logger.error(e);
  } finally {
   if (writer != null) {
    writer.close();
    writer = null;
   }
   if (fis != null) {
    fis.close();
    file = null;
   }
   if (pdfDocument != null) {
    COSDocument cos = pdfDocument.getDocument();
    cos.close();
    pdfDocument.close();
   }
  }
  return content.toString();
 }
}

原创粉丝点击