word转html工具
来源:互联网 发布:游族网络林奇妻子韩薇 编辑:程序博客网 时间:2024/05/17 11:56
import java.io.BufferedWriter;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStreamWriter;import java.util.List;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerException;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import org.apache.commons.io.output.ByteArrayOutputStream;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.PicturesManager;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.hwpf.usermodel.Picture;import org.apache.poi.hwpf.usermodel.PictureType;import org.apache.poi.xwpf.converter.core.FileImageExtractor;import org.apache.poi.xwpf.converter.core.FileURIResolver;import org.apache.poi.xwpf.converter.core.IURIResolver;import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;import org.apache.poi.xwpf.usermodel.XWPFDocument;import org.apache.poi.xwpf.usermodel.XWPFPictureData;import org.w3c.dom.Document;public class WordToHtml { public static void main(String[] args) { try { wordToHtml("d:\\12.docx", "d:\\", "123.html"); wordToHtml("d:\\2.doc", "d:\\", "12.html"); } catch (TransformerException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParserConfigurationException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static void wordToHtml(String wordPath,String htmlPath,String newFilename) throws TransformerException, IOException, ParserConfigurationException { convert2Html(wordPath, htmlPath, newFilename); } public static void writeFile(String content, String path) { FileOutputStream fos = null; BufferedWriter bw = null; try { File file = new File(path); if(!file.exists()){ } fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos)); bw.write(content); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } finally { try { if (bw != null) bw.close(); if (fos != null) fos.close(); } catch (IOException ie) { } } } /** * 将word转换成html * 支持 .doc and .docx * @param fileName word文件名 * @param outPutFilePath html存储路径 * @param newFileName html名 * @throws TransformerException * @throws IOException * @throws ParserConfigurationException */ public static void convert2Html(String fileName, String outPutFilePath,String newFileName) throws TransformerException, IOException, ParserConfigurationException { String substring = fileName.substring(fileName.lastIndexOf(".")+1); ByteArrayOutputStream out = new ByteArrayOutputStream(); /** * word2007和word2003的构建方式不同, * 前者的构建方式是xml,后者的构建方式是dom树。 * 文件的后缀也不同,前者后缀为.docx,后者后缀为.doc * 相应的,apache.poi提供了不同的实现类。 */ if("docx".equals(substring)){// writeFile(new String("<html><head> <meta http-equiv=\"content-type\" content=\"text/html\" charset=\"utf-8\"/></head>对不起,.docx格式的word文档,暂时不能生成预览</html>".getBytes("utf-8")), outPutFilePath+newFileName); //step 1 : load DOCX into XWPFDocument InputStream inputStream = new FileInputStream(new File(fileName)); XWPFDocument document = new XWPFDocument(inputStream); //step 2 : prepare XHTML options final String imageUrl = ""; XHTMLOptions options = XHTMLOptions.create(); options.setExtractor(new FileImageExtractor(new File(outPutFilePath + imageUrl))); options.setIgnoreStylesIfUnused(false); options.setFragment(true); options.URIResolver(new IURIResolver() {// @Override 重写的方法,加上这个报错,你看看是啥问题 public String resolve(String uri) { return imageUrl + uri; } }); //step 3 : convert XWPFDocument to XHTML XHTMLConverter.getInstance().convert(document, out, options); }else{ HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder() .newDocument()); wordToHtmlConverter.setPicturesManager( new PicturesManager() { public String savePicture( byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches ) { return suggestedName; } } ); wordToHtmlConverter.processDocument(wordDocument); //save pictures List pics=wordDocument.getPicturesTable().getAllPictures(); if(pics!=null){ for(int i=0;i<pics.size();i++){ Picture pic = (Picture)pics.get(i); System.out.println(); try { pic.writeImageContent(new FileOutputStream(outPutFilePath + pic.suggestFullFileName())); } catch (FileNotFoundException e) { e.printStackTrace(); } } } Document htmlDocument = wordToHtmlConverter.getDocument(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); //这个应该是转换成xml的 Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); } out.close(); writeFile(new String(out.toByteArray()), outPutFilePath+newFileName); }}
阅读全文
0 0
- word转html工具
- 自动化工具word文档批量转html
- C#实现的word转html命令行工具
- 【word | html】word(doc | docx) 转 html
- java word转HTML
- Java Word转Html
- html转word
- HTML 转 Word
- poi word转html
- Java Word转Html
- java word转html
- php word 转 html
- word 转html
- HTML2DOC html转word
- php word 转 html
- Word转HTML
- word转html
- word转html
- Timesten for windows 配置流程
- 单例模式的七种写法
- 看我如何利用简单的配置错误“渗透”BBC新闻网
- Delphi窗体任务栏隐藏和透明
- Spring Cloud微服务云架构的代码结构
- word转html工具
- centos7安装配置oracle(包括yum下载依赖包)(不含监听配置)
- gulp教程之gulp-less
- PHP WebShell变形技术总结
- Python3爬虫学习笔记(3.正则详解及实例)
- gulp简单使用
- Oracle序列的创建与重置
- C#加密解密
- Spring Cloud微服务架构代码结构详细讲解