word转HTML 升级版
来源:互联网 发布:linux内核的优化 编辑:程序博客网 时间:2024/05/29 18:15
升级版采用网络图片替换
同时支持doc和docx,话不多说,直接上代码
pom.xml依赖参照上一篇博客 新增如下依赖
<dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId></dependency>
正式代码
package com.zbj.poi.util;import com.google.common.base.Preconditions;import com.google.common.collect.Lists;import lombok.Cleanup;import lombok.extern.slf4j.Slf4j;import org.apache.commons.lang3.StringUtils;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;import org.apache.poi.xwpf.usermodel.XWPFDocument;import org.apache.poi.xwpf.usermodel.XWPFPictureData;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.stereotype.Component;import org.springframework.util.ObjectUtils;import org.w3c.dom.Document;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerException;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import java.io.BufferedInputStream;import java.io.BufferedOutputStream;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.OutputStreamWriter;import java.util.List;import java.util.UUID;/** * PoiUtils * * @author weigang * @create 2017-10-17 **/@Component@Slf4jpublic class PoiUtils { public static final String DOCX = "docx"; public static final String DOC = "doc"; public static final String TEMP_PATH = "D:/test/"; // 操作完成删除文件 public String wordToHtml(File fileSource) throws IOException, ParserConfigurationException, TransformerException { Preconditions.checkNotNull(fileSource, "原始文件不能为空"); String htmlPath = TEMP_PATH + "index.html"; File file = new File(htmlPath); // 2007 及以后 if (fileSource.getName().endsWith(DOCX) || StringUtils.containsIgnoreCase(fileSource.getName(), DOCX)) { XWPFDocument document = new XWPFDocument(new FileInputStream(fileSource)); List<XWPFPictureData> allPictures = document.getAllPictures(); allPictures.forEach(picture -> picture.suggestFileExtension()); XHTMLOptions options = XHTMLOptions.create(); List<String> pictureList = Lists.newArrayList(); options.setExtractor((imagePath, imageData) -> { // 上传图片,并将图片地址返回 pictureList.add(downloadUrl); }); options.URIResolver(uri -> { // uri default: word/media/image1.png 从1开始 if(StringUtils.isBlank(uri)){ return uri; } String imageIndex = uri.substring(uri.indexOf("image") + Constant.FIVE, uri.indexOf(".")); log.info("uri: {}; imageIndex: {}", uri, imageIndex); try { String newUrl = pictureList.get(Integer.valueOf(imageIndex) - Constant.ONE); if (StringUtils.isNotBlank(newUrl)) { return newUrl; } } catch (Exception e){ log.warn("处理 word 中图片异常: {}", e); } return uri; }); @Cleanup OutputStreamWriter streamWriter = new OutputStreamWriter(new FileOutputStream(htmlPath)); XHTMLConverter xhtmlConverter = (XHTMLConverter) XHTMLConverter.getInstance(); xhtmlConverter.convert(document, streamWriter, options); } else { // 2003 之前 HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileSource)); Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document); // 上传图片,并将图片地址返回 pictureList.add(downloadUrl); log.info("user download image url: {}", downloadUrl); return downloadUrl; }); wordToHtmlConverter.processDocument(wordDocument); Document htmlDocument = wordToHtmlConverter.getDocument(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(file); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); } // 文件转String @Cleanup BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(htmlPath), Constant.UTF_8)); String line; StringBuilder builder = new StringBuilder(); while (!ObjectUtils.isEmpty(line = br.readLine())) { builder.append(line); } // 删除文件 file.deleteOnExit(); return builder.toString(); } public static File inputStreamToFile(InputStream is, String fileName) throws IOException { File file = new File(TEMP_PATH, fileName); if (fileName.endsWith(DOCX) || fileName.endsWith(DOC)) { @Cleanup BufferedInputStream bis = new BufferedInputStream(is); @Cleanup BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file)); // 自定义缓存区大小 byte[] bytes = new byte[1024 * 5]; int length; while (Constant.NEGATIVE_ONE != (length = bis.read(bytes))) { bos.write(bytes, Constant.ZERO, length); bos.flush(); } } return file; } public static void deleteFileExist(File file) { if (!ObjectUtils.isEmpty(file)) { file.deleteOnExit(); } }}
阅读全文
0 0
- word转HTML 升级版
- word转HTML 基本版
- 升级版pdf转word转换器
- 【word | html】word(doc | docx) 转 html
- java word转HTML
- Java Word转Html
- html转word
- HTML 转 Word
- poi word转html
- Java Word转Html
- java word转html
- php word 转 html
- word 转html
- HTML2DOC html转word
- php word 转 html
- Word转HTML
- word转html
- word转html
- 最新版勤哲Excel服务器V2017.13.0.1无限用户支持手机APP,微信,任意安装,支持后续升级
- unity3d url 解码 编码
- win10 新建、重命名或删除文件夹不自动刷新的问题
- 51nod 1070 Bash游戏 V4(博弈——找规律)
- java中比较字符串的大小(compareTo方法的使用)
- word转HTML 升级版
- JSP完成数据分页显示
- linux中静态动态获取网络
- Linux学习日记-
- 线程中的问题
- opencv学习——cv2.xfeatures2d.SIFT_create().detectAndCompute()
- 2017.10.19开学第八周周中训练总结
- CSS 构造文本
- 查看linux系统是32位还是64位