word 转html,pdf转图片
来源:互联网 发布:汉口北淘宝商学院 编辑:程序博客网 时间:2024/06/06 06:45
maven配置:
<dependency><groupId>org.apache.poi</groupId><artifactId>ooxml-schemas</artifactId>
<version>1.1</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>3.9</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId>
<version>3.9</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml-schemas</artifactId>
<version>3.9</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId>
<version>3.9</version></dependency><dependency><groupId>org.apache.pdfbox</groupId><artifactId>pdfbox-examples</artifactId>
<version>1.8.9</version></dependency><dependency><groupId>org.docx4j</groupId><artifactId>docx4j-ImportXHTML</artifactId><version>3.2.2</version><exclusions><exclusion><artifactId>slf4j-log4j12</artifactId><groupId>org.slf4j</groupId></exclusion><exclusion><artifactId>log4j</artifactId><groupId>log4j</groupId></exclusion></exclusions></dependency><dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.8.1</version></dependency>
</pre><pre>java代码:
private List<AttachmentConvertResultBean> convertToPicture(String inputFileName) { List<AttachmentConvertResultBean> picList = new ArrayList<AttachmentConvertResultBean>(); if (StringUtils.isEmpty(inputFileName)) { LOGGER.error("输入的文件名称为空"); } else { File inputFile = new File(inputFileName); if (!inputFile.exists()) { LOGGER.error("要转换的文件不存在, " + inputFileName); } else { if (inputFileName.lastIndexOf(".") > 0 && inputFileName.lastIndexOf(".") < inputFileName.length() - 1) { String fileType = inputFileName.substring(inputFileName.lastIndexOf(".") + 1, inputFileName.length()); // doc文件转换 html if (!StringUtils.isEmpty(fileType) && ("DOC".equals(fileType.toUpperCase()))) { picList = FileConvertUtil.docCovertToHtml(inputFileName); } else if (!StringUtils.isEmpty(fileType) && ("DOCX".equals(fileType.toUpperCase()))) { // docx文件转换 html picList = FileConvertUtil.docxConvertToHtml(inputFileName); } else if (!StringUtils.isEmpty(fileType) && ("PDF".equals(fileType.toUpperCase()))) { // pdf文件转换图片 picList = FileConvertUtil.pdfConvertToJpg(inputFileName); } else { LOGGER.error("要转换的文件既不是 Word,也不是PDF,Excel或者其他类型的文件不支持转换 " + inputFileName); return null; } } } } return picList; }
/** * doc 文档 转换成 Html * * @param fileName doc文件路径 * @return * @see [相关类/方法](可选) * @since [产品/模块版本](可选) */ @SuppressWarnings({ "rawtypes", "finally" }) public static List<AttachmentConvertResultBean> docCovertToHtml(String fileName) { List<AttachmentConvertResultBean> resultList = new ArrayList<AttachmentConvertResultBean>(); try { HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); File file = new File(fileName); if (!file.exists()) { LOGGER.error("要转换的文件不存在 -》 " + fileName); return resultList; } final File parentFile = file.getParentFile(); wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { return parentFile.getAbsolutePath() + suggestedName; } }); wordToHtmlConverter.processDocument(wordDocument); // save pictures List pics = wordDocument.getPicturesTable().getAllPictures(); String shutFileName = file.getName(); if (pics != null) { FileOutputStream fis = null; // 文件转换结果 File jpgparentFile; AttachmentConvertResultBean resultBean; for (int i = 0; i < pics.size(); i++) { Picture pic = (Picture) pics.get(i); try { if (!StringUtils.isEmpty(shutFileName) && shutFileName.lastIndexOf(".") > -1) { shutFileName = shutFileName.substring(0, shutFileName.lastIndexOf(".")); } jpgparentFile = new File(parentFile.getAbsolutePath() + File.separatorChar + shutFileName); if (!jpgparentFile.exists()) { jpgparentFile.mkdirs(); } fis = new FileOutputStream(parentFile.getAbsolutePath() + File.separatorChar + shutFileName + File.separatorChar + pic.suggestFullFileName()); pic.writeImageContent(fis); } catch (FileNotFoundException e) { e.printStackTrace(); LOGGER.error(e.getMessage()); } finally { if (null != fis) { fis.close(); } } } Document htmlDocument = wordToHtmlConverter.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "HTML"); serializer.transform(domSource, streamResult); out.close(); writeFile(new String(out.toByteArray()), parentFile.getAbsolutePath() + File.separatorChar + shutFileName + ".html"); resultBean = new AttachmentConvertResultBean(); resultBean.setTargetFileIndex(1); resultBean.setTargetFilePath(parentFile.getAbsolutePath() + File.separatorChar + shutFileName + ".html"); resultList.add(resultBean); resultBean.setTargetFileType("html"); } } catch (Exception e) { LOGGER.error(e.getMessage()); } finally { return resultList; } }
/** * docx 文档 转换成 html * * @param inputFileFullPath * @return * @see [相关类/方法](可选) * @since [产品/模块版本](可选) */ @SuppressWarnings("finally") public static List<AttachmentConvertResultBean> docxConvertToHtml(String inputFileFullPath) { List<AttachmentConvertResultBean> picList = new ArrayList<AttachmentConvertResultBean>(); if (StringUtils.isEmpty(inputFileFullPath)) { LOGGER.error("要转换的文件不存在,文件路径 -> " + inputFileFullPath); return picList; } else { // 输入文件 File inputFile = new File(inputFileFullPath); // 文件存在 ,而且是文件 if (inputFile.exists() && inputFile.isFile()) { XWPFDocument document = null; try { document = new XWPFDocument(new FileInputStream(inputFile)); XHTMLOptions options = XHTMLOptions.create();// .indent( 4 ); // 父目录 路径 String parentFilePath = inputFile.getParentFile().getAbsolutePath(); // docx 中的 图片存储路径 File imageFolder = new File(parentFilePath); options.setExtractor(new FileImageExtractor(imageFolder)); // URI resolver options.URIResolver(new FileURIResolver(imageFolder)); String inputFileName = inputFile.getName(); if (!StringUtils.isEmpty(inputFileName) && inputFileName.lastIndexOf(".") > 0) { inputFileName = inputFileName.substring(0, inputFileName.lastIndexOf(".")); } // 拼接html 输出路径 OutputStream out = new FileOutputStream(new File(parentFilePath + File.separatorChar + inputFileName + ".html")); // docx转换html XHTMLConverter.getInstance().convert(document, out, options); AttachmentConvertResultBean resultBean = new AttachmentConvertResultBean(); resultBean.setTargetFileIndex(1); resultBean.setTargetFilePath(parentFilePath + File.separatorChar + inputFileName + ".html"); resultBean.setTargetFileType("html"); picList.add(resultBean); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { return picList; } } else { LOGGER.error("要转换的文件不存在,文件路径 -> " + inputFileFullPath); return picList; } } }
/** * PDF转换jpg 图片 * * @param inputFilePath * @return * @see [相关类/方法](可选) * @since [产品/模块版本](可选) */ @SuppressWarnings({ "rawtypes", "finally" }) public static List<AttachmentConvertResultBean> pdfConvertToJpg(String inputFilePath) { List<AttachmentConvertResultBean> picList = new ArrayList<AttachmentConvertResultBean>(); PDDocument doc; try { File pdfFile = new File(inputFilePath); String fileName; if (!pdfFile.exists()) { LOGGER.error("需要转换的文件不存在 -> " + inputFilePath); } fileName = pdfFile.getName(); if (!StringUtils.isEmpty(fileName) && fileName.lastIndexOf(".") > 0) { fileName = fileName.substring(0, fileName.lastIndexOf(".")); } File parentFile = pdfFile.getParentFile(); doc = PDDocument.load(inputFilePath); List pages = doc.getDocumentCatalog().getAllPages(); File outFile; // 文件转换结果 AttachmentConvertResultBean resultBean; for (int i = 0; i < pages.size(); i++) { PDPage page = (PDPage) pages.get(i); BufferedImage image = page.convertToImage(); Iterator iter = ImageIO.getImageWritersBySuffix("jpg"); ImageWriter writer = (ImageWriter) iter.next(); outFile = new File(parentFile.getAbsolutePath() + File.separatorChar + fileName + File.separatorChar + fileName + "_" + i + ".jpg"); resultBean = new AttachmentConvertResultBean(); resultBean.setTargetFileIndex(i + 1); resultBean.setTargetFilePath(parentFile.getAbsolutePath() + File.separatorChar + fileName + File.separatorChar + fileName + "_" + i + ".jpg"); resultBean.setTargetFileType("jpg"); picList.add(resultBean); if (!outFile.getParentFile().exists()) { outFile.getParentFile().mkdirs(); } FileOutputStream out = new FileOutputStream(outFile); ImageOutputStream outImage = ImageIO.createImageOutputStream(out); writer.setOutput(outImage); writer.write(new IIOImage(image, null, null)); } doc.close(); } catch (IOException e) { e.printStackTrace(); LOGGER.error("文件转换失败 -> " + inputFilePath); } finally { return picList; } }
// 输出html文件 private static void writeFile(String content, String path) { FileOutputStream fos = null; BufferedWriter bw = null; org.jsoup.nodes.Document doc = Jsoup.parse(content); content = doc.html(); try { File file = new File(path); fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8")); bw.write(content); } catch (Exception e) { e.printStackTrace(); LOGGER.error(e.getMessage()); } finally { try { if (bw != null) bw.close(); if (fos != null) fos.close(); } catch (Exception e) { LOGGER.error(e.getMessage()); } } }
0 0
- word 转html,pdf转图片
- WORD转HTML,PDF转图片小记
- Word PDF转图片
- pdf图片转word编辑
- 图片PDF文件转Word
- C# web实现word 转Html、office转Html、pdf转图片 在线预览文件
- C# web实现word 转Html、office转Html、pdf转图片 在线预览文件
- excel,word,txt,img,pdf,转html
- HTML转图片或者HTML转PDF
- Word文档或PDF转图片
- 图片型 pdf 转 word - 学生帮
- 解决word转pdf后图片失真
- word转pdf时,图片错位,乱跑
- PDF转word(图片转word)简单方法
- 使用Jacob将Word、Excel转html,pdf转html
- excel 转pdf html 图片显示问题
- 通过jacob 把word转pdf ,excel转html,word转html
- java poi- 实现 word Excel pdf ppt 转 HTML
- 一些设计上的基本常识
- 排序
- Centos6.5 x86_64下 静默安装Oracle 11g R2
- 南邮 OJ 1028 Digital Roots
- 微服务实战
- word 转html,pdf转图片
- NGUI界面Drawcall优化
- 8. Document getElementById() 方法
- Sizeof与Strlen的区别与联系
- 程序员技术练级攻略
- ubuntu修改主机名
- 文件加解密(二)——使用密码
- 读取raw文件夹的数据
- [LeetCode] Shortest Palindrome I