PDFbox的一些用法
来源:互联网 发布:网络系统解决方案 编辑:程序博客网 时间:2024/06/07 16:26
1.到PDFbox官网下载相关的jar包
https://pdfbox.apache.org/download
2.读取文件并进行相应操作
package kang;import java.awt.print.Book;import java.awt.print.PageFormat;import java.awt.print.Paper;import java.awt.print.PrinterException;import java.awt.print.PrinterJob;import java.io.BufferedWriter;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.io.OutputStreamWriter;import java.io.Writer;import java.util.Iterator;import javax.print.attribute.HashPrintRequestAttributeSet;import javax.print.attribute.PrintRequestAttributeSet;import javax.print.attribute.standard.PageRanges;import org.apache.pdfbox.cos.COSName;import org.apache.pdfbox.io.RandomAccessBuffer;import org.apache.pdfbox.pdfparser.PDFParser;import org.apache.pdfbox.pdmodel.PDDocument;import org.apache.pdfbox.pdmodel.PDPage;import org.apache.pdfbox.pdmodel.PDPageContentStream;import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode;import org.apache.pdfbox.pdmodel.PDResources;import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;import org.apache.pdfbox.printing.PDFPageable;import org.apache.pdfbox.printing.PDFPrintable;import org.apache.pdfbox.text.PDFTextStripper;public class PdfReader {public static void main(String[] args) throws IOException{// 待解析PDFFile pdfFile = new File("C:\\Users\\Administrator\\Desktop\\java线程池执行原理分析.pdf"); // 空白PDFFile pdfFile_out = new File("C:\\Users\\Administrator\\Desktop\\Doc1.pdf");File pdfFile_outDoc = new File("C:\\Users\\Administrator\\Desktop\\3.doc");String imagePath="C:\\Users\\Administrator\\Desktop\\1.jpg";test1(pdfFile,imagePath,pdfFile_out,pdfFile_outDoc);}/*输出到doc文件,没有图片 * */private static void PrintToDoc(PDDocument document,File pdfFile_outDoc) throws IOException {// 获取页码int pages = document.getNumberOfPages();// 读文本内容PDFTextStripper stripper=new PDFTextStripper();// 设置按顺序输出FileOutputStream fos=new FileOutputStream(pdfFile_outDoc);Writer writer=new OutputStreamWriter(fos,"UTF-8");stripper.setSortByPosition(true);stripper.setStartPage(1);stripper.setEndPage(pages);//String content = stripper.getText(document);//System.out.println(content); stripper.writeText(document,writer);writer.close();fos.close();document.close();}/** * PDFprint the document at its actual size. This is the recommended way to print. * 将pdf文件输出为xps格式文档 * XPS 是XML Paper Specification(XML文件规格书)的简称,是一种电子文件格式,它是微软公司开发的一种文档保存与查看的规范。 */private static void PDFprint(PDDocument document) throws IOException, PrinterException{PrinterJob job = PrinterJob.getPrinterJob();job.setPageable(new PDFPageable(document));job.print();}/** * Prints using custom PrintRequestAttribute values. * 带上要复制的页数,复制特定的页数 */private static void printWithAttributes(PDDocument document)throws IOException, PrinterException{PrinterJob job = PrinterJob.getPrinterJob();job.setPageable(new PDFPageable(document));PrintRequestAttributeSet attr = new HashPrintRequestAttributeSet();attr.add(new PageRanges(1, 1)); // pages 1 to 1job.print(attr);}/** * Prints with a print preview dialog. * 跳出提示框,问要从哪里开始复制,复制到哪里,还有一些其他参数 */private static void printWithDialog(PDDocument document) throws IOException, PrinterException{PrinterJob job = PrinterJob.getPrinterJob();job.setPageable(new PDFPageable(document));if (job.printDialog()){job.print();}}/** * Prints with a print preview dialog and custom PrintRequestAttribute values. 跳出提示框,参数设置可选项比较多 */private static void printWithDialogAndAttributes(PDDocument document)throws IOException, PrinterException{PrinterJob job = PrinterJob.getPrinterJob();job.setPageable(new PDFPageable(document));PrintRequestAttributeSet attr = new HashPrintRequestAttributeSet();attr.add(new PageRanges(1, 1)); // pages 1 to 1if (job.printDialog(attr)){job.print(attr);}}/** * Prints using a custom page size and custom margins. * 设置纸张页面的大小 */private static void printWithPaper(PDDocument document)throws IOException, PrinterException{PrinterJob job = PrinterJob.getPrinterJob();job.setPageable(new PDFPageable(document));// define custom paperPaper paper = new Paper();paper.setSize(306, 396); // 1/72 inchpaper.setImageableArea(0, 0, paper.getWidth(), paper.getHeight()); // no margins// custom page formatPageFormat pageFormat = new PageFormat();pageFormat.setPaper(paper);// override the page formatBook book = new Book();// append all pagesbook.append(new PDFPrintable(document), pageFormat, document.getNumberOfPages());job.setPageable(book);job.print();}/** * Add an image to an existing PDF document. *将图片插入pdf文件并另存到另一个pdf文件中 * @param inputFile The input PDF to add the image to. * @param imagePath The filename of the image to put in the PDF. * @param outputFile The file to write to the pdf to. * */public static void createPDFFromImage( File inputFile, String imagePath, File outputFile )throws IOException{try (PDDocument doc = PDDocument.load(inputFile)){//we will add the image to the first page.PDPage page = doc.getPage(0);// createFromFile is the easiest way with an image file// if you already have the image in a BufferedImage, // call LosslessFactory.createFromImage() insteadPDImageXObject pdImage = PDImageXObject.createFromFile(imagePath, doc);try (PDPageContentStream contentStream = new PDPageContentStream(doc, page, AppendMode.APPEND, true, true)){// contentStream.drawImage(ximage, 20, 20 ) // better method inspired by http://stackoverflow.com/a/22318681/535646// reduce this value if the image is too largefloat scale = 1f;contentStream.drawImage(pdImage, 20, 20, pdImage.getWidth() * scale, pdImage.getHeight() * scale);}doc.save(outputFile);}}private static void test1(File pdfFile ,String imagePath,File pdfFile_out, File pdfFile_outDoc) {PDDocument document = null;try{// 方式一:/** InputStream input = null; input = new FileInputStream( pdfFile ); //加载 pdf 文档 PDFParser parser = new PDFParser(new RandomAccessBuffer(input)); parser.parse(); document = parser.getPDDocument(); **/// 方式二:(方便很多)document=PDDocument.load(pdfFile);//document.save("C:\\Users\\Administrator\\Desktop\\aa.pdf");//直接复制pdfPrintToDoc(document, pdfFile_outDoc);//PDFprint(document);//printWithAttributes(document);//printWithDialog(document);//printWithDialogAndAttributes(document);//printWithPaper(document);//createPDFFromImage(pdfFile,imagePath,pdfFile_out);}catch(Exception e){System.out.println(e);}}}
各种文档就产生了:
阅读全文
0 0
- PDFbox的一些用法
- C# PDF转换成TXT --PDFBox的用法
- PDFBox
- pdfbox
- PDFBox与lucene的集成
- Visual C# 引用 PDFBox的dll
- PDFBox的PDF转图片功能
- IOS中PDFBox枚举的各个含义
- CheckListBox的一些用法!
- printf的一些用法
- Calendar的一些用法
- AS3 的一些用法
- JTable的一些用法
- sscanf的一些用法
- sscanf的一些用法
- Mapinfo的一些用法
- setsockopt的一些用法
- lookupedit的一些用法。
- 算法竞赛入门经典(第2版)习题3-9 子序列(All in All) Uva10340
- SpringMVC学习(三)——SpringMVC的配置文件
- Python 正则... 有些乱
- androd的自定义Adapter中的NullPointerException
- html定义列表嵌套
- PDFbox的一些用法
- Java中SAX方式解析XML
- Struts2核心技术 (二)
- UVa140宽带
- STL之set/multiset容器
- 学习【OpenCV入门教程之九】 非线性滤波专场:中值滤波、双边滤波---思维导图笔记
- ubuntu14.04 google chrome浏览器提示adobe flash player过期问题解决
- 以太网、互联网
- java基础第一章_进制