POI实现DOC/DOCX转HTML

来源:互联网 发布:不在淘宝 提货码 编辑:程序博客网 时间:2024/05/17 01:17

1.使用HWPF处理DOC

public class DocToHtml {          private static final String encoding = "UTF-8";    public static String convert2Html(String wordPath)            throws FileNotFoundException, TransformerException, IOException,            ParserConfigurationException {        if( wordPath == null || "".equals(wordPath) ) return "";        File file = new File(wordPath);        if( file.exists() && file.isFile() )            return convert2Html(new FileInputStream(file));        else            return "";    }        public static String convert2Html(String wordPath, String context)    throws FileNotFoundException, TransformerException, IOException,    ParserConfigurationException {        if( wordPath == null || "".equals(wordPath) ) return "";        File file = new File(wordPath);        if( file.exists() && file.isFile() )            return convert2Html(new FileInputStream(file), context);        else            return "";    }      public static String convert2Html(InputStream is)            throws TransformerException, IOException,            ParserConfigurationException {        return convert2Html(is, "");    }    public static String convert2Html(InputStream is, HttpServletRequest req) throws TransformerException, IOException,    ParserConfigurationException {        return convert2Html(is, req.getContextPath());    }        public static String convert2Html(InputStream is, final String context) throws IOException, ParserConfigurationException, TransformerException {        HWPFDocument wordDocument = new HWPFDocument(is);        WordToHtmlConverter converter = new WordToHtmlConverter(                DocumentBuilderFactory.newInstance().newDocumentBuilder()                        .newDocument());                SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmssSSS");        final String prefix = sdf.format(new Date());        final Map<Object, String> suffixMap = new HashMap<Object, String>();                converter.setPicturesManager(new PicturesManager() {                public String savePicture(byte[] content, PictureType pictureType,                        String suggestedName, float widthInches, float heightInches) {                    String prefixContext = context.replace("\\", "").replace("/", "");                    prefixContext = StringUtils.isNotBlank(prefixContext) ? "/" + prefixContext + "/" : prefixContext;                    suffixMap.put(new String(content).replace(" ", "").length(), suggestedName);                                        return  prefixContext                            + UeConstants.VIEW_IMAGE_PATH + "/" + UeConstants.UEDITOR_PATH                            + "/" + UeConstants.UEDITOR_IMAGE_PATH + "/"                            + prefix + "_"                            + suggestedName;                }        });        converter.processDocument(wordDocument);                List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();        if (pics != null) {            for(Picture pic : pics) {                try {                    pic.writeImageContent(new FileOutputStream(                            UeConstants.IMAGE_PATH                                 + "/" + prefix + "_" + suffixMap.get(new String(pic.getContent()).replace(" ", "").length())));                } catch (FileNotFoundException e) {                    e.printStackTrace();                }            }        }                StringWriter writer = new StringWriter();                Transformer serializer = TransformerFactory.newInstance().newTransformer();        serializer.setOutputProperty(OutputKeys.ENCODING, encoding);        serializer.setOutputProperty(OutputKeys.INDENT, "yes");        serializer.setOutputProperty(OutputKeys.METHOD, "html");        serializer.transform(                new DOMSource(converter.getDocument()),                new StreamResult(writer) );        writer.close();        return writer.toString();    }}  


2.使用XWPFDocument处理DOCX

public class XHTMLConverterTestCase    extends AbstractXWPFPOIConverterTest{    protected void doGenerate( String fileInName )        throws IOException    {        doGenerateSysOut( fileInName );        doGenerateHTMLFile( fileInName );    }    protected void doGenerateSysOut( String fileInName )        throws IOException    {        long startTime = System.currentTimeMillis();        XWPFDocument document = new XWPFDocument( AbstractXWPFPOIConverterTest.class.getResourceAsStream( fileInName ) );        XHTMLOptions options = XHTMLOptions.create().indent( 4 );        OutputStream out = System.out;        XHTMLConverter.getInstance().convert( document, out, options );        System.err.println( "Elapsed time=" + ( System.currentTimeMillis() - startTime ) + "(ms)" );    }    protected void doGenerateHTMLFile( String fileInName )        throws IOException    {        String root = "target";        String fileOutName = root + "/" + fileInName + ".html";        long startTime = System.currentTimeMillis();        XWPFDocument document = new XWPFDocument( AbstractXWPFPOIConverterTest.class.getResourceAsStream( fileInName ) );        XHTMLOptions options = XHTMLOptions.create();// .indent( 4 );        // Extract image        File imageFolder = new File( root + "/images/" + fileInName );        options.setExtractor( new FileImageExtractor( imageFolder ) );        // URI resolver        options.URIResolver( new FileURIResolver( imageFolder ) );        OutputStream out = new FileOutputStream( new File( fileOutName ) );        XHTMLConverter.getInstance().convert( document, out, options );        System.out.println( "Generate " + fileOutName + " with " + ( System.currentTimeMillis() - startTime ) + " ms." );    }}



项目下载地址:http://download.csdn.net/detail/luka2008/7902285

2 5
原创粉丝点击