PDFbox的一些用法

来源:互联网 发布:网络系统解决方案 编辑:程序博客网 时间:2024/06/07 16:26

1.到PDFbox官网下载相关的jar包

https://pdfbox.apache.org/download


2.读取文件并进行相应操作

package kang;import java.awt.print.Book;import java.awt.print.PageFormat;import java.awt.print.Paper;import java.awt.print.PrinterException;import java.awt.print.PrinterJob;import java.io.BufferedWriter;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.io.OutputStreamWriter;import java.io.Writer;import java.util.Iterator;import javax.print.attribute.HashPrintRequestAttributeSet;import javax.print.attribute.PrintRequestAttributeSet;import javax.print.attribute.standard.PageRanges;import org.apache.pdfbox.cos.COSName;import org.apache.pdfbox.io.RandomAccessBuffer;import org.apache.pdfbox.pdfparser.PDFParser;import org.apache.pdfbox.pdmodel.PDDocument;import org.apache.pdfbox.pdmodel.PDPage;import org.apache.pdfbox.pdmodel.PDPageContentStream;import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode;import org.apache.pdfbox.pdmodel.PDResources;import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;import org.apache.pdfbox.printing.PDFPageable;import org.apache.pdfbox.printing.PDFPrintable;import org.apache.pdfbox.text.PDFTextStripper;public class PdfReader {public static void main(String[] args) throws IOException{// 待解析PDFFile pdfFile = new File("C:\\Users\\Administrator\\Desktop\\java线程池执行原理分析.pdf");      // 空白PDFFile pdfFile_out = new File("C:\\Users\\Administrator\\Desktop\\Doc1.pdf");File pdfFile_outDoc = new File("C:\\Users\\Administrator\\Desktop\\3.doc");String imagePath="C:\\Users\\Administrator\\Desktop\\1.jpg";test1(pdfFile,imagePath,pdfFile_out,pdfFile_outDoc);}/*输出到doc文件,没有图片 * */private static void PrintToDoc(PDDocument document,File pdfFile_outDoc) throws IOException {// 获取页码int pages = document.getNumberOfPages();// 读文本内容PDFTextStripper stripper=new PDFTextStripper();// 设置按顺序输出FileOutputStream fos=new FileOutputStream(pdfFile_outDoc);Writer writer=new OutputStreamWriter(fos,"UTF-8");stripper.setSortByPosition(true);stripper.setStartPage(1);stripper.setEndPage(pages);//String content = stripper.getText(document);//System.out.println(content); stripper.writeText(document,writer);writer.close();fos.close();document.close();}/** * PDFprint the document at its actual size. This is the recommended way to print. * 将pdf文件输出为xps格式文档 * XPS 是XML Paper Specification(XML文件规格书)的简称,是一种电子文件格式,它是微软公司开发的一种文档保存与查看的规范。 */private static void PDFprint(PDDocument document) throws IOException, PrinterException{PrinterJob job = PrinterJob.getPrinterJob();job.setPageable(new PDFPageable(document));job.print();}/** * Prints using custom PrintRequestAttribute values. * 带上要复制的页数,复制特定的页数 */private static void printWithAttributes(PDDocument document)throws IOException, PrinterException{PrinterJob job = PrinterJob.getPrinterJob();job.setPageable(new PDFPageable(document));PrintRequestAttributeSet attr = new HashPrintRequestAttributeSet();attr.add(new PageRanges(1, 1)); // pages 1 to 1job.print(attr);}/** * Prints with a print preview dialog. * 跳出提示框,问要从哪里开始复制,复制到哪里,还有一些其他参数 */private static void printWithDialog(PDDocument document) throws IOException, PrinterException{PrinterJob job = PrinterJob.getPrinterJob();job.setPageable(new PDFPageable(document));if (job.printDialog()){job.print();}}/** * Prints with a print preview dialog and custom PrintRequestAttribute values.     跳出提示框,参数设置可选项比较多 */private static void printWithDialogAndAttributes(PDDocument document)throws IOException, PrinterException{PrinterJob job = PrinterJob.getPrinterJob();job.setPageable(new PDFPageable(document));PrintRequestAttributeSet attr = new HashPrintRequestAttributeSet();attr.add(new PageRanges(1, 1)); // pages 1 to 1if (job.printDialog(attr)){job.print(attr);}}/** * Prints using a custom page size and custom margins. * 设置纸张页面的大小 */private static void printWithPaper(PDDocument document)throws IOException, PrinterException{PrinterJob job = PrinterJob.getPrinterJob();job.setPageable(new PDFPageable(document));// define custom paperPaper paper = new Paper();paper.setSize(306, 396); // 1/72 inchpaper.setImageableArea(0, 0, paper.getWidth(), paper.getHeight()); // no margins// custom page formatPageFormat pageFormat = new PageFormat();pageFormat.setPaper(paper);// override the page formatBook book = new Book();// append all pagesbook.append(new PDFPrintable(document), pageFormat, document.getNumberOfPages());job.setPageable(book);job.print();}/** * Add an image to an existing PDF document. *将图片插入pdf文件并另存到另一个pdf文件中 * @param inputFile The input PDF to add the image to. * @param imagePath The filename of the image to put in the PDF. * @param outputFile The file to write to the pdf to. * */public static void createPDFFromImage( File inputFile, String imagePath, File outputFile )throws IOException{try (PDDocument doc = PDDocument.load(inputFile)){//we will add the image to the first page.PDPage page = doc.getPage(0);// createFromFile is the easiest way with an image file// if you already have the image in a BufferedImage, // call LosslessFactory.createFromImage() insteadPDImageXObject pdImage = PDImageXObject.createFromFile(imagePath, doc);try (PDPageContentStream contentStream = new PDPageContentStream(doc, page, AppendMode.APPEND, true, true)){// contentStream.drawImage(ximage, 20, 20 )  // better method inspired by http://stackoverflow.com/a/22318681/535646// reduce this value if the image is too largefloat scale = 1f;contentStream.drawImage(pdImage, 20, 20, pdImage.getWidth() * scale, pdImage.getHeight() * scale);}doc.save(outputFile);}}private static void test1(File pdfFile ,String imagePath,File pdfFile_out, File pdfFile_outDoc) {PDDocument document = null;try{// 方式一:/**            InputStream input = null;            input = new FileInputStream( pdfFile );            //加载 pdf 文档            PDFParser parser = new PDFParser(new RandomAccessBuffer(input));            parser.parse();            document = parser.getPDDocument(); **/// 方式二:(方便很多)document=PDDocument.load(pdfFile);//document.save("C:\\Users\\Administrator\\Desktop\\aa.pdf");//直接复制pdfPrintToDoc(document, pdfFile_outDoc);//PDFprint(document);//printWithAttributes(document);//printWithDialog(document);//printWithDialogAndAttributes(document);//printWithPaper(document);//createPDFFromImage(pdfFile,imagePath,pdfFile_out);}catch(Exception e){System.out.println(e);}}}



各种文档就产生了:



原创粉丝点击