word转HTML 基本版

来源:互联网 发布:base64decoder 源码 编辑:程序博客网 时间:2024/05/29 04:36

同时支持doc和docx,话不多说,直接上代码
项目依赖 pom.xml

<dependency>    <groupId>commons-lang</groupId>    <artifactId>commons-lang</artifactId>    <version>2.6</version></dependency><dependency>    <groupId>org.apache.poi</groupId>    <artifactId>poi</artifactId>    <version>3.14</version></dependency><dependency>    <groupId>org.apache.poi</groupId>    <artifactId>poi-ooxml</artifactId>    <version>3.14</version></dependency><dependency>    <groupId>org.apache.poi</groupId>    <artifactId>poi-scratchpad</artifactId>    <version>3.14</version></dependency><dependency>    <groupId>fr.opensagres.xdocreport</groupId>    <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>    <version>1.0.6</version></dependency><dependency>    <groupId>net.sf.jtidy</groupId>    <artifactId>jtidy</artifactId>    <version>r938</version></dependency>

正式代码是

package com.zbj.spring.boot.util;import lombok.Cleanup;import org.apache.commons.lang.StringUtils;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.xwpf.converter.core.BasicURIResolver;import org.apache.poi.xwpf.converter.core.FileImageExtractor;import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;import org.apache.poi.xwpf.usermodel.XWPFDocument;import org.w3c.dom.Document;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerException;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.OutputStreamWriter;/** * WordToHtml * * @author weigang * @create 2017-10-13 **/public class WordToHtml {    public static void main(String[] args) {//      String sourceFileName = "D:/test/员工劳动合同.docx";        String sourceFileName = "D:/test/员工劳动合同.doc";        try {            convertWordToHtml(sourceFileName);        } catch (IOException e) {            e.printStackTrace();        } catch (ParserConfigurationException e) {            e.printStackTrace();        } catch (TransformerException e) {            e.printStackTrace();        }    }    public static void convertWordToHtml(String path) throws IOException, ParserConfigurationException, TransformerException {        String htmlPath = "D:/test/index.html";        String imagePath = "D:/test/image/";        if (StringUtils.isBlank(path)) {            return;        }        if (path.endsWith("docx")) { // 2007 及以后            XWPFDocument document = new XWPFDocument(new FileInputStream(path));            XHTMLOptions options = XHTMLOptions.create();            options.setExtractor(new FileImageExtractor(new File(imagePath)));            options.URIResolver(new BasicURIResolver("image"));            @Cleanup OutputStreamWriter streamWriter = new OutputStreamWriter(new FileOutputStream(htmlPath));            XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();            xhtmlConverter.convert(document, streamWriter, options);        } else { // 2003 之前            HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(path));            Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();            WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);            // 保存图片,并返回图片的相对路径            wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {                File imageFile = new File(imagePath);                if(!imageFile.exists()){                    imageFile.mkdirs();                }                try (FileOutputStream out = new FileOutputStream(imagePath + name)) {                    out.write(content);                } catch (Exception e) {                    e.printStackTrace();                }                return "image/" + name;            });            wordToHtmlConverter.processDocument(wordDocument);            Document htmlDocument = wordToHtmlConverter.getDocument();            DOMSource domSource = new DOMSource(htmlDocument);            StreamResult streamResult = new StreamResult(new File(htmlPath));            TransformerFactory tf = TransformerFactory.newInstance();            Transformer serializer = tf.newTransformer();            serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");            serializer.setOutputProperty(OutputKeys.INDENT, "yes");            serializer.setOutputProperty(OutputKeys.METHOD, "html");            serializer.transform(domSource, streamResult);        }    }}

对于word文件,在网上随便下载个合同或者文件即可

原创粉丝点击