WORD03,WOR07 和PPT转HTML

来源:互联网 发布:英语听力考试软件 编辑:程序博客网 时间:2024/05/20 04:49

项目中需要用OFFICE系列转成HTML展示出来,所以写了个工具类,转03,07与PPT的,由于EXCEL基本不需要,所以没有。由于POI是将整个个文件读入到内存中,所以无法转换过大的OFFICE文档。


需要引入的JAR包(MAVEN):

<dependency>            <groupId>org.apache.poi</groupId>            <artifactId>poi</artifactId>            <version>3.9</version>        </dependency>        <dependency>            <groupId>org.apache.poi</groupId>            <artifactId>poi-ooxml</artifactId>            <version>3.9</version>        </dependency>        <dependency>            <groupId>org.apache.poi</groupId>            <artifactId>poi-scratchpad</artifactId>            <version>3.9</version>        </dependency>          <dependency>            <groupId>fr.opensagres.xdocreport</groupId>            <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>            <version>1.0.4</version>        </dependency> 

xdocreport所需要的包如果找不到可以在下面地址下载:

下载地址


代码:

package com.resource.util;import java.awt.Color;import java.awt.Dimension;import java.awt.Graphics2D;import java.awt.RenderingHints;import java.awt.image.BufferedImage;import java.io.BufferedWriter;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.FileWriter;import java.io.IOException;import java.io.OutputStreamWriter;import java.util.List;import javax.xml.parsers.DocumentBuilder;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerException;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import org.apache.commons.io.output.ByteArrayOutputStream;import org.apache.commons.lang3.StringUtils;import org.apache.poi.hslf.model.TextRun;import org.apache.poi.hslf.usermodel.RichTextRun;import org.apache.poi.hslf.usermodel.SlideShow;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.HtmlDocumentFacade;import org.apache.poi.hwpf.converter.PicturesManager;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.hwpf.usermodel.Picture;import org.apache.poi.hwpf.usermodel.PictureType;import org.apache.poi.poifs.filesystem.POIFSFileSystem;import org.apache.poi.xwpf.converter.core.FileImageExtractor;import org.apache.poi.xwpf.converter.core.IURIResolver;import org.apache.poi.xwpf.converter.xhtml.DefaultContentHandlerFactory;import org.apache.poi.xwpf.converter.xhtml.IContentHandlerFactory;import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;import org.apache.poi.xwpf.usermodel.XWPFDocument;import org.w3c.dom.Document;import org.w3c.dom.Element;import org.xml.sax.ContentHandler;public class OfficeTool {private String path;//文件路径private String imageSavePath;//图片保存保存路径private String imageUrl;//图片 src属性private String output;private HtmlDocumentFacade htmlDocumentFacade;private Element window;private Element topbar;private Element info;private Element outline;private Element page;private Element ul;public OfficeTool(File file) {this.path = file.getAbsolutePath();String filename = this.getFileName(path, false);this.imageSavePath = file.getParentFile().getAbsolutePath() + "/"+ filename + "_img/";this.imageUrl = this.imageSavePath.replaceAll("([D][:])*[/|\\\\]+", "/");System.out.println(imageUrl);this.output = file.getParentFile().getAbsolutePath() + "/" + filename + ".html";new File(this.imageSavePath).mkdirs();}public OfficeTool(String path, String imageSavePath, String imageUrl) {this.path = path;this.imageSavePath = imageSavePath;this.imageUrl = imageUrl;File imFile = new File(imageSavePath);imFile.mkdirs();}public OfficeTool(String path, String imageSavePath, String imageUrl,String output) {this.path = path;this.imageSavePath = imageSavePath;this.imageUrl = imageUrl;this.output = output;File imFile = new File(imageSavePath);imFile.mkdirs();}public String get03Html() {String html = "";try {html = convert2Html(path);} catch (TransformerException | IOException| ParserConfigurationException e) {e.printStackTrace();}return html;}public String get07Html() {String html = "";try {html = this.doGenerateHTMLFile();} catch (IOException e) {e.printStackTrace();}return html;}public String getPPTHtml(){String html = "";try {this.convert(this.path, this.output);//html = MyFileUtils.fileToString(output,"UTF-8");html = MyFileUtils.fileToString(output,"GB2312");} catch (IOException | TransformerException e) {e.printStackTrace();}return html;}public String save03AsHtml(String content) {this.writeFile(content, output);return output;}public String save07AsHtml(String content) {this.writeFile(content, output);return output;}private void writeFile(String content, String path) {try (FileOutputStream fos = new FileOutputStream(new File(path));BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos, "GB2312"))){//bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8"));bw.write(content);} catch (IOException ioe) {ioe.printStackTrace();} }private String convert2Html(String fileName) throws TransformerException,IOException, ParserConfigurationException {POIFSFileSystem fss = new POIFSFileSystem(new FileInputStream(fileName));HWPFDocument wordDocument = new HWPFDocument(fss);WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());wordToHtmlConverter.setPicturesManager(new PicturesManager() {public String savePicture(byte[] content, PictureType pictureType,String suggestedName, float widthInches, float heightInches) {return imageUrl + suggestedName;}});wordToHtmlConverter.processDocument(wordDocument);// save picturesList pics = wordDocument.getPicturesTable().getAllPictures();if (pics != null) {for (int i = 0; i < pics.size(); i++) {Picture pic = (Picture) pics.get(i);try {pic.writeImageContent(new FileOutputStream(imageSavePath+ pic.suggestFullFileName()));} catch (FileNotFoundException e) {e.printStackTrace();}}}Document htmlDocument = wordToHtmlConverter.getDocument();ByteArrayOutputStream out = new ByteArrayOutputStream();DOMSource domSource = new DOMSource(htmlDocument);StreamResult streamResult = new StreamResult(out);TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();//serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");serializer.setOutputProperty(OutputKeys.INDENT, "yes");serializer.setOutputProperty(OutputKeys.METHOD, "html");serializer.transform(domSource, streamResult);out.close();return out.toString();}/** * word07转HTML *  * @return * @throws IOException */private String doGenerateHTMLFile() throws IOException {XWPFDocument document = new XWPFDocument(new FileInputStream(new File(path)));XHTMLOptions options = XHTMLOptions.create();// .indent( 4 );IContentHandlerFactory f = new DefaultContentHandlerFactory();// Extract imageoptions.setExtractor(new FileImageExtractor(new File(imageSavePath)));// URI resolveroptions.URIResolver(new IURIResolver() {@Overridepublic String resolve(String uri) {return imageUrl + uri;}});ByteArrayOutputStream out = new ByteArrayOutputStream();ContentHandler contentHandler = f.create(out, null, options);XHTMLConverter.getInstance().convert(document, out, options);out.close();return out.toString();}/** * PPT to HTML * @param filePath PPT file path * @param outputhtml file path * @throws IOException * @throws TransformerException */public void convert(String filePath, String output) throws IOException,TransformerException {this.init();File pptFile = new File(filePath);if (!isPPt(pptFile)) {return;}try {process(pptFile, output);} catch (Exception e) {e.printStackTrace();}saveAsHtml(output, htmlDocumentFacade.getDocument());}/** * convert ppt to '.png' file and generate '.html' code. *  * @param pptFile * @param output *            , html save path; * @throws Exception */private void process(File pptFile, String output) throws Exception {FileInputStream is = new FileInputStream(pptFile);SlideShow ppt = new SlideShow(is);is.close();Dimension pgsize = ppt.getPageSize();org.apache.poi.hslf.model.Slide[] slide = ppt.getSlides();this.info.appendChild(this.htmlDocumentFacade.createText(this.getFileName(pptFile.getPath(), true)));for (int i = 0; i < slide.length; i++) {addSlideTitle(slide[i]);TextRun[] truns = slide[i].getTextRuns();for (int k = 0; k < truns.length; k++) {RichTextRun[] richTexts = truns[k].getRichTextRuns();for (int l = 0; l < richTexts.length; l++) {String fontName = richTexts[l].getFontName();// rtruns[l].setFontIndex(1);// System.out.println(fontName);// POI bug???if (isTrueType(fontName))richTexts[l].setFontName("宋体");elserichTexts[l].setFontName(fontName);}}BufferedImage img = new BufferedImage(pgsize.width, pgsize.height,BufferedImage.TYPE_INT_RGB);Graphics2D graphics = img.createGraphics();graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING,RenderingHints.VALUE_ANTIALIAS_ON);graphics.setRenderingHint(RenderingHints.KEY_RENDERING,RenderingHints.VALUE_RENDER_QUALITY);graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION,RenderingHints.VALUE_INTERPOLATION_BICUBIC);graphics.setRenderingHint(RenderingHints.KEY_FRACTIONALMETRICS,RenderingHints.VALUE_FRACTIONALMETRICS_ON);graphics.setColor(Color.white);graphics.clearRect(0, 0, pgsize.width, pgsize.height);slide[i].draw(graphics);// String imgName = StringUtil.getFileName(pptFile.getPath(),false)// + "_" + (i + 1) + ".png";String imgName = getFileName(pptFile.getPath(), false) + "_"+ (i + 1) + ".png";// String imgPath = StringUtil.getFilePath(output) + "/images/";String imgPath = this.imageSavePath;if (i == 0) {new File(imgPath).mkdir();}FileOutputStream out = new FileOutputStream(imgPath + imgName);javax.imageio.ImageIO.write(img, "png", out);out.close();addSlide(imageUrl + imgName, slide[i].getSlideNumber());}}/** * generate outline; *  * @param slide */private void addSlideTitle(org.apache.poi.hslf.model.Slide slide) {String title = StringUtils.isBlank(slide.getTitle()) ? slide.getSlideNumber() + ": Untitled" : slide.getSlideNumber()+ ": " + slide.getTitle();Element list = htmlDocumentFacade.createListItem();Element a = htmlDocumentFacade.createHyperlink("#link"+ slide.getSlideNumber());// a.setTextContent(title);a.appendChild(htmlDocumentFacade.createText(title));// a.setNodeValue(title);list.appendChild(a);this.ul.appendChild(list);}/** * generate slide, block style *  * @param imagePath * @param index */private void addSlide(String imagePath, int index) {Element slideBlock = htmlDocumentFacade.createBlock();slideBlock.setAttribute("class", "slide");// anchor pointerElement link = htmlDocumentFacade.createBookmark("link" + index);Element img = htmlDocumentFacade.createImage(imagePath);// img.setAttribute("style", "position:relative;top:15%;");slideBlock.appendChild(link);slideBlock.appendChild(img);this.page.appendChild(slideBlock);}private void init() {try {// build documentDocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();// DOMImplementation domImpl = builder.getDOMImplementation();// DocumentType doctype = domImpl.createDocumentType("",// "-//W3C//DTD HTML 4.01 Transitional//EN",// "http://www.w3.org/TR/html4/strict.dtd");// Document document = domImpl.createDocument(null, null, doctype);Document document = builder.newDocument();htmlDocumentFacade = new HtmlDocumentFacade(document);// glob layoutwindow = htmlDocumentFacade.createBlock();window.setAttribute("id", "window");info = htmlDocumentFacade.createBlock();info.setAttribute("id", "info");outline = htmlDocumentFacade.createBlock();outline.setAttribute("id", "outline");page = htmlDocumentFacade.createBlock();page.setAttribute("id", "page");// outline layoutul = htmlDocumentFacade.createUnorderedList();outline.appendChild(ul);window.appendChild(info);window.appendChild(outline);window.appendChild(page);htmlDocumentFacade.getBody().appendChild(window);/* * DocumentType docType = * htmlDocumentFacade.getDocument().getDoctype(); *  * System.out.println("docType :    -------  " + docType); */setCommonStyle(htmlDocumentFacade.getDocument());} catch (ParserConfigurationException e) {e.printStackTrace();}}private void saveAsHtml(String output, org.w3c.dom.Document document)throws IOException, TransformerException {FileWriter out = new FileWriter(output);DOMSource domSource = new DOMSource(document);StreamResult streamResult = new StreamResult(out);TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();// TODO set encoding from a command argument//serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");serializer.setOutputProperty(OutputKeys.INDENT, "yes");serializer.setOutputProperty(OutputKeys.METHOD, "html");serializer.setOutputProperty(OutputKeys.STANDALONE, "yes");serializer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC,"-//W3C//DTD HTML 4.01 Transitional//EN"); //serializer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,"http://www.w3.org/TR/html4/strict.dtd");serializer.transform(domSource, streamResult);out.close();}private static boolean isPPt(File file) {if (!file.canRead())return false;int sep = file.getPath().lastIndexOf(".");if ("ppt".equals(file.getPath().substring(sep + 1,file.getPath().length())))return true;return false;}public static boolean isTrueType(String fontName) {String[] trueType = new String[] { "Tahoma", "Times New Roman","Calibri", "Arial" };for (String type : trueType) {if (type.equals(fontName))return true;}return false;}private void setCommonStyle(Document document) {Element styleSheet = (Element) document.getElementsByTagName("style").item(0);if (styleSheet == null)return;String sep = "\n";StringBuffer sb = new StringBuffer();sb.append(sep);sb.append("html{height: 100%;overflow-y:hidden;}");sb.append(sep);sb.append("body{height:100%;overflow-y: hidden;margin:0; background:#bdc2cd}");sb.append(sep);sb.append("#window{min-width: 800px;height:100%;}");sb.append(sep);// sb.append("#topbar{min-height:24px;}");// sb.append(sep);sb.append("#info{font-size: 24px;font-weight: 800;border-bottom:2px solid gray;height:5%; background:#eee;padding-left:5px;}");sb.append(sep);sb.append("#outline{float:left;height:95%;width:20%;overflow-y: auto; overflow-x:hidden;background:#fff; "+ "margin-right:3%; -moz-box-shadow: 4px 4px 12px #2b2b2b;-webkit-box-shadow: 4px 4px 12px #2b2b2b;"+ "box-shadow: 4px 4px 12px #2b2b2b;}");sb.append(sep);sb.append("#outline ul li {list-style:none;line-height: 25px;}");sb.append(sep);sb.append("#outline ul li a{text-decoration:none;white-space:nowrap;text-overflow:ellipsis;}");sb.append(sep);sb.append("#page{float:left;overflow-y: auto;overflow-x:hidden;height:95%;width:77%;margin-right:-20%;background:#bdc2cd; text-align:center;}");sb.append(sep);sb.append("#page div{width:100%;height:100%;}");sb.append(sep);// styleSheet.setNodeValue(sb.toString());// styleSheet.setTextContent(sb.toString());styleSheet.appendChild(document.createTextNode(sb.toString()));}private String getFileName(String path, boolean issuffix) {String name = "";int index = path.lastIndexOf("/") == -1 ? path.lastIndexOf("\\") : path.lastIndexOf("/");if (StringUtils.isNotBlank(path)) {if (issuffix) {name = path.substring(index + 1);} else {name = path.substring(index + 1, path.lastIndexOf("."));}}return name;}private String getFilePath(String path) {String p = "";int index = path.lastIndexOf("/") == -1 ? path.lastIndexOf("\\") : path.lastIndexOf("/");if (StringUtils.isNotBlank(path)) {p = path.substring(0, index);}return p;}}

0 0
原创粉丝点击