pdf 提取图片

来源:互联网 发布:现在做淘宝很难 编辑:程序博客网 时间:2024/06/03 19:24

pdf 提取图片 ,提取的图片不是背景子图片。相当于把每一页pdf转换成图片


一共两种方法,

第一种提取速度较快 一秒几张。但是会丢失画质

第二种提取速度慢,两秒一张,但是不会丢失画质


------------------------------------------------------------------------

先说第一种


线上jar包



  <dependency> <groupId>org.apache.pdfbox</groupId>
  <artifactId>pdfbox</artifactId> <version>2.0.8</version> </dependency>
  
  <!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox-tools -->
  <dependency> <groupId>org.apache.pdfbox</groupId>
  <artifactId>pdfbox-tools</artifactId> <version>2.0.8</version> </dependency>
  
  




/** * 获得pdf的图片 *  * @param pdfFilePath *            如 C:\\Users\\TOSHIBA\\Desktop\\Helloworld.pdf * @param imaDir *            如 C:\\Users\\TOSHIBA\\Desktop\\ */public static void PDF2ImagByPdfBox(String pdfFilePath, String imaDir) {if (!pdfFilePath.endsWith("pdf") && !pdfFilePath.endsWith("PDF")) {throw new RuntimeException("不是pdf文件");}File pdfFile = new File(pdfFilePath);if (!pdfFile.exists()) {throw new RuntimeException("pdf文件不存在");}if (imaDir.charAt(imaDir.length() - 1) != '\\') {imaDir += "\\";}PDDocument pdf = null;try {pdf = PDDocument.load(new File(pdfFilePath));PDFRenderer pdfRenderer = new PDFRenderer(pdf);PDPageTree pageTree = pdf.getPages();int pageCounter = 0;for (PDPage page : pageTree) {float width = page.getCropBox().getWidth();float scale = 1.0f;if (width > 720) {scale = 720 / width;}BufferedImage bim = pdfRenderer.renderImage(pageCounter, scale,ImageType.RGB);ImageIOUtil.writeImage(bim, imaDir + (pageCounter++) + ".png",300);}} catch (IOException e) {e.printStackTrace();}}


-------------------------------------------------------------------------------------------------------------------------------




第二种

<dependency>
 <groupId>org.apache.pdfbox</groupId>
 <artifactId>pdfbox-app</artifactId>
 <version>2.0.2</version>
 </dependency>



/** * @param pdfFilePath *            如 C:\\Users\\TOSHIBA\\Desktop\\Helloworld.pdf * @param imaDir *            如 C:\\Users\\TOSHIBA\\Desktop\\ */public static void PDF2ImagByPdfBox(String pdfFilePath, String imaDir) {File file = new File(pdfFilePath);if (!pdfFilePath.endsWith("pdf") && !pdfFilePath.endsWith("PDF")) {throw new RuntimeException("不是pdf文件");}if (imaDir.charAt(imaDir.length() - 1) != '\\') {imaDir += "\\";}PDDocument doc=null;try { doc = PDDocument.load(file);PDFRenderer renderer = new PDFRenderer(doc);int pageCount = doc.getNumberOfPages();for (int i = 0; i < pageCount; i++) {BufferedImage image = renderer.renderImageWithDPI(i, 296);ImageIO.write(image, "PNG", new File(imaDir + i + ".png"));System.out.println("转换页数" + i+ "----- "+ "总共页数"+ pageCount);}} catch (IOException e) {e.printStackTrace();}finally{try {if(null!=doc){doc.close();}} catch (Exception e2) {e2.printStackTrace();}}}