word转HTML 基本版
来源:互联网 发布:base64decoder 源码 编辑:程序博客网 时间:2024/05/29 04:36
同时支持doc和docx,话不多说,直接上代码
项目依赖 pom.xml
<dependency> <groupId>commons-lang</groupId> <artifactId>commons-lang</artifactId> <version>2.6</version></dependency><dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.14</version></dependency><dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.14</version></dependency><dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.14</version></dependency><dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId> <version>1.0.6</version></dependency><dependency> <groupId>net.sf.jtidy</groupId> <artifactId>jtidy</artifactId> <version>r938</version></dependency>
正式代码是
package com.zbj.spring.boot.util;import lombok.Cleanup;import org.apache.commons.lang.StringUtils;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.xwpf.converter.core.BasicURIResolver;import org.apache.poi.xwpf.converter.core.FileImageExtractor;import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;import org.apache.poi.xwpf.usermodel.XWPFDocument;import org.w3c.dom.Document;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerException;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.OutputStreamWriter;/** * WordToHtml * * @author weigang * @create 2017-10-13 **/public class WordToHtml { public static void main(String[] args) {// String sourceFileName = "D:/test/员工劳动合同.docx"; String sourceFileName = "D:/test/员工劳动合同.doc"; try { convertWordToHtml(sourceFileName); } catch (IOException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } } public static void convertWordToHtml(String path) throws IOException, ParserConfigurationException, TransformerException { String htmlPath = "D:/test/index.html"; String imagePath = "D:/test/image/"; if (StringUtils.isBlank(path)) { return; } if (path.endsWith("docx")) { // 2007 及以后 XWPFDocument document = new XWPFDocument(new FileInputStream(path)); XHTMLOptions options = XHTMLOptions.create(); options.setExtractor(new FileImageExtractor(new File(imagePath))); options.URIResolver(new BasicURIResolver("image")); @Cleanup OutputStreamWriter streamWriter = new OutputStreamWriter(new FileOutputStream(htmlPath)); XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance(); xhtmlConverter.convert(document, streamWriter, options); } else { // 2003 之前 HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(path)); Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document); // 保存图片,并返回图片的相对路径 wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> { File imageFile = new File(imagePath); if(!imageFile.exists()){ imageFile.mkdirs(); } try (FileOutputStream out = new FileOutputStream(imagePath + name)) { out.write(content); } catch (Exception e) { e.printStackTrace(); } return "image/" + name; }); wordToHtmlConverter.processDocument(wordDocument); Document htmlDocument = wordToHtmlConverter.getDocument(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(new File(htmlPath)); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); } }}
对于word文件,在网上随便下载个合同或者文件即可
阅读全文
0 0
- word转HTML 基本版
- word转HTML 升级版
- 【word | html】word(doc | docx) 转 html
- java word转HTML
- Java Word转Html
- html转word
- HTML 转 Word
- poi word转html
- Java Word转Html
- java word转html
- php word 转 html
- word 转html
- HTML2DOC html转word
- php word 转 html
- Word转HTML
- word转html
- word转html
- word转html
- TIPS_备忘
- PHP filter过滤器的基础使用
- 数据结构实验四之顺序栈
- 解决idea引入jsp标签库报500的问题
- Google离线API地图调用示例源码功能
- word转HTML 基本版
- 判断手机端和PC端的方法
- 前端知识库 code_xzh 2017-03-02 22:30:55 浏览20219 评论18 javascript html5 react 性能 HTTPS http js 浏览器 angular
- 如何写注册表文件
- hadoop之 安全模式及SafeModeException
- 【Android】Activity右滑返回的实现
- js 设置隐藏域获取innerText的值,select下拉框回显
- Poj 1328 Radar Installation
- 武汉校招,我都问了什么