word转HTML 升级版

来源：互联网发布：linux内核的优化编辑：程序博客网时间：2024/05/29 18:15

升级版采用网络图片替换

同时支持doc和docx，话不多说，直接上代码
pom.xml依赖参照上一篇博客新增如下依赖

<dependency>    <groupId>org.projectlombok</groupId>    <artifactId>lombok</artifactId></dependency>

正式代码

package com.zbj.poi.util;import com.google.common.base.Preconditions;import com.google.common.collect.Lists;import lombok.Cleanup;import lombok.extern.slf4j.Slf4j;import org.apache.commons.lang3.StringUtils;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;import org.apache.poi.xwpf.usermodel.XWPFDocument;import org.apache.poi.xwpf.usermodel.XWPFPictureData;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.stereotype.Component;import org.springframework.util.ObjectUtils;import org.w3c.dom.Document;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerException;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import java.io.BufferedInputStream;import java.io.BufferedOutputStream;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.OutputStreamWriter;import java.util.List;import java.util.UUID;/** * PoiUtils * * @author weigang * @create 2017-10-17 **/@Component@Slf4jpublic class PoiUtils {    public static final String DOCX = "docx";    public static final String DOC = "doc";    public static final String TEMP_PATH = "D:/test/"; // 操作完成删除文件    public String wordToHtml(File fileSource) throws IOException, ParserConfigurationException, TransformerException {        Preconditions.checkNotNull(fileSource, "原始文件不能为空");        String htmlPath = TEMP_PATH + "index.html";        File file = new File(htmlPath);        // 2007 及以后        if (fileSource.getName().endsWith(DOCX) || StringUtils.containsIgnoreCase(fileSource.getName(), DOCX)) {            XWPFDocument document = new XWPFDocument(new FileInputStream(fileSource));            List<XWPFPictureData> allPictures = document.getAllPictures();            allPictures.forEach(picture -> picture.suggestFileExtension());            XHTMLOptions options = XHTMLOptions.create();            List<String> pictureList = Lists.newArrayList();            options.setExtractor((imagePath, imageData) -> {                // 上传图片，并将图片地址返回                pictureList.add(downloadUrl);            });            options.URIResolver(uri -> {                // uri default: word/media/image1.png   从1开始                if(StringUtils.isBlank(uri)){                    return uri;                }                String imageIndex = uri.substring(uri.indexOf("image") + Constant.FIVE, uri.indexOf("."));                log.info("uri: {}; imageIndex: {}", uri, imageIndex);                try {                    String newUrl = pictureList.get(Integer.valueOf(imageIndex) - Constant.ONE);                    if (StringUtils.isNotBlank(newUrl)) {                        return newUrl;                    }                } catch (Exception e){                    log.warn("处理 word 中图片异常: {}", e);                }                return uri;            });            @Cleanup OutputStreamWriter streamWriter = new OutputStreamWriter(new FileOutputStream(htmlPath));            XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance();            xhtmlConverter.convert(document, streamWriter, options);        } else {            // 2003 之前            HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileSource));            Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();                WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);            // 上传图片，并将图片地址返回                pictureList.add(downloadUrl);                log.info("user download image url: {}", downloadUrl);                return downloadUrl;            });        wordToHtmlConverter.processDocument(wordDocument);            Document htmlDocument = wordToHtmlConverter.getDocument();            DOMSource domSource = new DOMSource(htmlDocument);            StreamResult streamResult = new StreamResult(file);            TransformerFactory tf = TransformerFactory.newInstance();            Transformer serializer = tf.newTransformer();            serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");            serializer.setOutputProperty(OutputKeys.INDENT, "yes");            serializer.setOutputProperty(OutputKeys.METHOD, "html");            serializer.transform(domSource, streamResult);        }        // 文件转String        @Cleanup BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(htmlPath), Constant.UTF_8));        String line;        StringBuilder builder = new StringBuilder();        while (!ObjectUtils.isEmpty(line = br.readLine())) {            builder.append(line);        }        // 删除文件        file.deleteOnExit();        return builder.toString();    }    public static File inputStreamToFile(InputStream is, String fileName) throws IOException {        File file = new File(TEMP_PATH, fileName);        if (fileName.endsWith(DOCX) || fileName.endsWith(DOC)) {            @Cleanup BufferedInputStream bis = new BufferedInputStream(is);            @Cleanup BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));            // 自定义缓存区大小            byte[] bytes = new byte[1024 * 5];            int length;            while (Constant.NEGATIVE_ONE != (length = bis.read(bytes))) {                bos.write(bytes, Constant.ZERO, length);                bos.flush();            }        }        return file;    }    public static void deleteFileExist(File file) {        if (!ObjectUtils.isEmpty(file)) {            file.deleteOnExit();        }    }}

阅读全文

0 0