POI Word DOC格式转Html

来源:互联网 发布:pascal语言编程设计 编辑:程序博客网 时间:2024/05/22 03:44
package com.office;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.OutputStreamWriter;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.PicturesManager;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.hwpf.usermodel.PictureType;import org.w3c.dom.Document;public class OfficeConvert {public static String GetFileExt(File f) {String ext = null;String s = f.getName();int i = s.lastIndexOf('.');if (i > 0 && i < s.length() - 1) {ext = s.substring(i + 1).toLowerCase();}return ext;}public static boolean ConvertToHtml(final String input, final String output) {File file = new File(input);if (!file.exists())return false;String ext = GetFileExt(file);FileInputStream fileInputStream = null;FileOutputStream fileOutputStream = null;try {if (ext.equals("doc")) {fileInputStream = new FileInputStream(input);HWPFDocument hwpfDocument = new HWPFDocument(fileInputStream);Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);File imageDir = new File(output + ".img");final String suggestDirName = imageDir.getName();if (!imageDir.isDirectory()) {imageDir.mkdirs();}//save imagewordToHtmlConverter.setPicturesManager(new PicturesManager() {@Overridepublic String savePicture(byte[] contet,PictureType pictureType, String suggestedName,float widthInches, float heightInches) {String imgagePath = output + ".img/" + suggestedName;File file = new File(imgagePath);FileOutputStream fos = null;try {fos = new FileOutputStream(file);fos.write(contet);fos.close();} catch (Exception e) {e.printStackTrace();}return suggestDirName + "/" + suggestedName;}});wordToHtmlConverter.processDocument(hwpfDocument);Transformer transformer = TransformerFactory.newInstance().newTransformer();transformer.setOutputProperty(OutputKeys.INDENT, "yes");transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8");transformer.setOutputProperty(OutputKeys.METHOD, "html");fileOutputStream = new FileOutputStream(output);transformer.transform(new DOMSource(wordToHtmlConverter.getDocument()),new StreamResult(new OutputStreamWriter(fileOutputStream, "UTF-8")));return true;} else if (ext.equals("docx")){} else if (ext.equals("ppt")) {} else if (ext.equals("pptx")) {} else if (ext.equals("xls")) {} else if (ext.equals("xlsx")) {}} catch (Exception e) {e.printStackTrace();} finally {try {if (null != fileOutputStream)fileOutputStream.close();if (null != fileInputStream)fileInputStream.close();} catch (IOException e) {e.printStackTrace();}}return false;}}