POI实现DOC/DOCX转HTML
来源:互联网 发布:不在淘宝 提货码 编辑:程序博客网 时间:2024/05/17 01:17
1.使用HWPF处理DOC
public class DocToHtml { private static final String encoding = "UTF-8"; public static String convert2Html(String wordPath) throws FileNotFoundException, TransformerException, IOException, ParserConfigurationException { if( wordPath == null || "".equals(wordPath) ) return ""; File file = new File(wordPath); if( file.exists() && file.isFile() ) return convert2Html(new FileInputStream(file)); else return ""; } public static String convert2Html(String wordPath, String context) throws FileNotFoundException, TransformerException, IOException, ParserConfigurationException { if( wordPath == null || "".equals(wordPath) ) return ""; File file = new File(wordPath); if( file.exists() && file.isFile() ) return convert2Html(new FileInputStream(file), context); else return ""; } public static String convert2Html(InputStream is) throws TransformerException, IOException, ParserConfigurationException { return convert2Html(is, ""); } public static String convert2Html(InputStream is, HttpServletRequest req) throws TransformerException, IOException, ParserConfigurationException { return convert2Html(is, req.getContextPath()); } public static String convert2Html(InputStream is, final String context) throws IOException, ParserConfigurationException, TransformerException { HWPFDocument wordDocument = new HWPFDocument(is); WordToHtmlConverter converter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder() .newDocument()); SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmssSSS"); final String prefix = sdf.format(new Date()); final Map<Object, String> suffixMap = new HashMap<Object, String>(); converter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { String prefixContext = context.replace("\\", "").replace("/", ""); prefixContext = StringUtils.isNotBlank(prefixContext) ? "/" + prefixContext + "/" : prefixContext; suffixMap.put(new String(content).replace(" ", "").length(), suggestedName); return prefixContext + UeConstants.VIEW_IMAGE_PATH + "/" + UeConstants.UEDITOR_PATH + "/" + UeConstants.UEDITOR_IMAGE_PATH + "/" + prefix + "_" + suggestedName; } }); converter.processDocument(wordDocument); List<Picture> pics = wordDocument.getPicturesTable().getAllPictures(); if (pics != null) { for(Picture pic : pics) { try { pic.writeImageContent(new FileOutputStream( UeConstants.IMAGE_PATH + "/" + prefix + "_" + suffixMap.get(new String(pic.getContent()).replace(" ", "").length()))); } catch (FileNotFoundException e) { e.printStackTrace(); } } } StringWriter writer = new StringWriter(); Transformer serializer = TransformerFactory.newInstance().newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, encoding); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform( new DOMSource(converter.getDocument()), new StreamResult(writer) ); writer.close(); return writer.toString(); }}
2.使用XWPFDocument处理DOCX
public class XHTMLConverterTestCase extends AbstractXWPFPOIConverterTest{ protected void doGenerate( String fileInName ) throws IOException { doGenerateSysOut( fileInName ); doGenerateHTMLFile( fileInName ); } protected void doGenerateSysOut( String fileInName ) throws IOException { long startTime = System.currentTimeMillis(); XWPFDocument document = new XWPFDocument( AbstractXWPFPOIConverterTest.class.getResourceAsStream( fileInName ) ); XHTMLOptions options = XHTMLOptions.create().indent( 4 ); OutputStream out = System.out; XHTMLConverter.getInstance().convert( document, out, options ); System.err.println( "Elapsed time=" + ( System.currentTimeMillis() - startTime ) + "(ms)" ); } protected void doGenerateHTMLFile( String fileInName ) throws IOException { String root = "target"; String fileOutName = root + "/" + fileInName + ".html"; long startTime = System.currentTimeMillis(); XWPFDocument document = new XWPFDocument( AbstractXWPFPOIConverterTest.class.getResourceAsStream( fileInName ) ); XHTMLOptions options = XHTMLOptions.create();// .indent( 4 ); // Extract image File imageFolder = new File( root + "/images/" + fileInName ); options.setExtractor( new FileImageExtractor( imageFolder ) ); // URI resolver options.URIResolver( new FileURIResolver( imageFolder ) ); OutputStream out = new FileOutputStream( new File( fileOutName ) ); XHTMLConverter.getInstance().convert( document, out, options ); System.out.println( "Generate " + fileOutName + " with " + ( System.currentTimeMillis() - startTime ) + " ms." ); }}
项目下载地址:http://download.csdn.net/detail/luka2008/7902285
2 5
- POI实现DOC/DOCX转HTML
- 【word | html】word(doc | docx) 转 html
- Doc、Docx转成HTML
- 使用POI将office(doc/docx/ppt/pptx/xls/xlsx)文件转html格式(附带源码)
- apach poi实现doc转html图片显示问题
- POI读取.doc 和.docx的区别
- POI Word DOC格式转Html
- poi实现word文档的导入(针对.doc .docx rtf)
- Converting .docx into (.doc, .pdf, .html)
- word(doc,docx)转换为HTML
- openoffice3.* doc/docx转pdf
- POI处理office文档(doc、docx、xls、xlsx、ppt、pptx)
- poi操作doc、docx、xls、xlsx文档总结
- Apache poi操作doc,docx,xsl,xslx文件
- java使用poi读取doc和docx文件
- JAVA使用POI操作word文档实例,兼容doc,docx
- poi修改word文档doc/docx不支持图片
- poi将word docx转化为html
- Android基础学习笔记(一)
- 使用HTML5抓取 Audio & Video
- Spring-AOP注解
- ZOJ 3261 Connections in GalaxyWar(并查集:离线处理)
- 详解策略路由与路由策略
- POI实现DOC/DOCX转HTML
- Android逆向分析必备网址大全
- JSTL使用方法介绍
- 学习linux入门经典书籍
- SDUT 选夫婿1
- 初入Cocos2d
- sharepoint 2010 在aspx 写lambda 时错误
- ExtJS中的Cookie处理
- Linux(ubuntu) eclipse svn搭环境