java使用openoffice将office系列文档转换为PDF

来源:互联网 发布:贴吧防秒删软件 编辑:程序博客网 时间:2024/05/01 22:34

前导:

  1. 开发过程中经常会使用java将office系列文档转换为PDF, 一般都使用微软提供的openoffice+jodconverter 实现转换文档。
  2. openoffice既有windows版本也有linux版。不用担心生产环境是linux系统。
  3. 关于linux系统安装openoffice软件请参照:待更新...
  4. java使用SWFTools将PDF转成swf并使用flexpaper播放PDF

1、openoffice依赖jar,以maven为例:

<dependency><groupId>com.artofsolving</groupId><artifactId>jodconverter</artifactId><version>2.2.1</version></dependency><dependency><groupId>org.openoffice</groupId><artifactId>jurt</artifactId><version>3.0.1</version></dependency><dependency><groupId>org.openoffice</groupId><artifactId>ridl</artifactId><version>3.0.1</version></dependency><dependency><groupId>org.openoffice</groupId><artifactId>juh</artifactId><version>3.0.1</version></dependency><dependency><groupId>org.openoffice</groupId><artifactId>unoil</artifactId><version>3.0.1</version></dependency><!--jodconverter2.2.1必须依赖slf4j-jdk14必须这个版本,不然源码中日志会报错,很low的一个问题--><dependency><groupId>org.slf4j</groupId><artifactId>slf4j-jdk14</artifactId><version>1.4.3</version></dependency>


2、直接上转换代码,需要监听openoffice应用程序8100端口即可。

public void convert(File sourceFile, File targetFile) {try {// 1: 打开连接OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100);connection.connect();DocumentConverter converter = new OpenOfficeDocumentConverter(connection);// 2:获取FormatDocumentFormatRegistry factory = new BasicDocumentFormatRegistry();DocumentFormat inputDocumentFormat = factory.getFormatByFileExtension(getExtensionName(sourceFile.getAbsolutePath()));DocumentFormat outputDocumentFormat = factory.getFormatByFileExtension(getExtensionName(targetFile.getAbsolutePath()));// 3:执行转换converter.convert(sourceFile, inputDocumentFormat, targetFile, outputDocumentFormat);} catch (ConnectException e) {log.info("文档转换PDF失败");}}


3、需注意:jodconverter 在转换2007版本以后的xxx.docx文档会报错,原因大家都明03后缀名xxx.doc  07以后版本xxx.docx

查看jodconverter源码发现documentFormat不支持xxx.docx格式BasicDocumentFormatRegistry中public DocumentFormat getFormatByFileExtension(String extension)默认支持是使用doc格式

BasicDocumentFormatRegistry类源码

//// JODConverter - Java OpenDocument Converter// Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com>//// This library is free software; you can redistribute it and/or// modify it under the terms of the GNU Lesser General Public// License as published by the Free Software Foundation; either// version 2.1 of the License, or (at your option) any later version.//// This library is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU// Lesser General Public License for more details.// http://www.gnu.org/copyleft/lesser.html//package com.artofsolving.jodconverter;import java.util.ArrayList;import java.util.Iterator;import java.util.List;public class BasicDocumentFormatRegistry implements DocumentFormatRegistry {private List/*<DocumentFormat>*/ documentFormats = new ArrayList();public void addDocumentFormat(DocumentFormat documentFormat) {documentFormats.add(documentFormat);}protected List/*<DocumentFormat>*/ getDocumentFormats() {return documentFormats;}/** * @param extension the file extension * @return the DocumentFormat for this extension, or null if the extension is not mapped */public DocumentFormat getFormatByFileExtension(String extension) {        if (extension == null) {            return null;        }        String lowerExtension = extension.toLowerCase();for (Iterator it = documentFormats.iterator(); it.hasNext();) {DocumentFormat format = (DocumentFormat) it.next();if (format.getFileExtension().equals(lowerExtension)) {return format;}}return null;}public DocumentFormat getFormatByMimeType(String mimeType) {for (Iterator it = documentFormats.iterator(); it.hasNext();) {DocumentFormat format = (DocumentFormat) it.next();if (format.getMimeType().equals(mimeType)) {return format;}}return null;}}

BasicDocumentFormatRegistry的默认实现类DefaultDocumentFormatRegistry  中支持的文件格式如下

//// JODConverter - Java OpenDocument Converter// Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com>//// This library is free software; you can redistribute it and/or// modify it under the terms of the GNU Lesser General Public// License as published by the Free Software Foundation; either// version 2.1 of the License, or (at your option) any later version.//// This library is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU// Lesser General Public License for more details.// http://www.gnu.org/copyleft/lesser.html//package com.artofsolving.jodconverter;public class DefaultDocumentFormatRegistry extends BasicDocumentFormatRegistry {public DefaultDocumentFormatRegistry() {final DocumentFormat pdf = new DocumentFormat("Portable Document Format", "application/pdf", "pdf");        pdf.setExportFilter(DocumentFamily.DRAWING, "draw_pdf_Export");pdf.setExportFilter(DocumentFamily.PRESENTATION, "impress_pdf_Export");pdf.setExportFilter(DocumentFamily.SPREADSHEET, "calc_pdf_Export");pdf.setExportFilter(DocumentFamily.TEXT, "writer_pdf_Export");addDocumentFormat(pdf);final DocumentFormat swf = new DocumentFormat("Macromedia Flash", "application/x-shockwave-flash", "swf");        swf.setExportFilter(DocumentFamily.DRAWING, "draw_flash_Export");swf.setExportFilter(DocumentFamily.PRESENTATION, "impress_flash_Export");addDocumentFormat(swf);final DocumentFormat xhtml = new DocumentFormat("XHTML", "application/xhtml+xml", "xhtml");xhtml.setExportFilter(DocumentFamily.PRESENTATION, "XHTML Impress File");xhtml.setExportFilter(DocumentFamily.SPREADSHEET, "XHTML Calc File");xhtml.setExportFilter(DocumentFamily.TEXT, "XHTML Writer File");addDocumentFormat(xhtml);// HTML is treated as Text when supplied as input, but as an output it is also// available for exporting Spreadsheet and Presentation formatsfinal DocumentFormat html = new DocumentFormat("HTML", DocumentFamily.TEXT, "text/html", "html");html.setExportFilter(DocumentFamily.PRESENTATION, "impress_html_Export");html.setExportFilter(DocumentFamily.SPREADSHEET, "HTML (StarCalc)");html.setExportFilter(DocumentFamily.TEXT, "HTML (StarWriter)");addDocumentFormat(html);final DocumentFormat odt = new DocumentFormat("OpenDocument Text", DocumentFamily.TEXT, "application/vnd.oasis.opendocument.text", "odt");odt.setExportFilter(DocumentFamily.TEXT, "writer8");addDocumentFormat(odt);final DocumentFormat sxw = new DocumentFormat("OpenOffice.org 1.0 Text Document", DocumentFamily.TEXT, "application/vnd.sun.xml.writer", "sxw");sxw.setExportFilter(DocumentFamily.TEXT, "StarOffice XML (Writer)");addDocumentFormat(sxw);final DocumentFormat doc = new DocumentFormat("Microsoft Word", DocumentFamily.TEXT, "application/msword", "doc");doc.setExportFilter(DocumentFamily.TEXT, "MS Word 97");addDocumentFormat(doc);final DocumentFormat rtf = new DocumentFormat("Rich Text Format", DocumentFamily.TEXT, "text/rtf", "rtf");rtf.setExportFilter(DocumentFamily.TEXT, "Rich Text Format");addDocumentFormat(rtf);final DocumentFormat wpd = new DocumentFormat("WordPerfect", DocumentFamily.TEXT, "application/wordperfect", "wpd");addDocumentFormat(wpd);final DocumentFormat txt = new DocumentFormat("Plain Text", DocumentFamily.TEXT, "text/plain", "txt");        // set FilterName to "Text" to prevent OOo from tryign to display the "ASCII Filter Options" dialog        // alternatively FilterName could be "Text (encoded)" and FilterOptions used to set encoding if needed        txt.setImportOption("FilterName", "Text");txt.setExportFilter(DocumentFamily.TEXT, "Text");addDocumentFormat(txt);final DocumentFormat wikitext = new DocumentFormat("MediaWiki wikitext", "text/x-wiki", "wiki");wikitext.setExportFilter(DocumentFamily.TEXT, "MediaWiki");        addDocumentFormat(wikitext);final DocumentFormat ods = new DocumentFormat("OpenDocument Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.oasis.opendocument.spreadsheet", "ods");ods.setExportFilter(DocumentFamily.SPREADSHEET, "calc8");addDocumentFormat(ods);final DocumentFormat sxc = new DocumentFormat("OpenOffice.org 1.0 Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.sun.xml.calc", "sxc");sxc.setExportFilter(DocumentFamily.SPREADSHEET, "StarOffice XML (Calc)");addDocumentFormat(sxc);final DocumentFormat xls = new DocumentFormat("Microsoft Excel", DocumentFamily.SPREADSHEET, "application/vnd.ms-excel", "xls");xls.setExportFilter(DocumentFamily.SPREADSHEET, "MS Excel 97");addDocumentFormat(xls);        final DocumentFormat csv = new DocumentFormat("CSV", DocumentFamily.SPREADSHEET, "text/csv", "csv");        csv.setImportOption("FilterName", "Text - txt - csv (StarCalc)");        csv.setImportOption("FilterOptions", "44,34,0");  // Field Separator: ','; Text Delimiter: '"'          csv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)");        csv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "44,34,0");        addDocumentFormat(csv);        final DocumentFormat tsv = new DocumentFormat("Tab-separated Values", DocumentFamily.SPREADSHEET, "text/tab-separated-values", "tsv");        tsv.setImportOption("FilterName", "Text - txt - csv (StarCalc)");        tsv.setImportOption("FilterOptions", "9,34,0");  // Field Separator: '\t'; Text Delimiter: '"'        tsv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)");        tsv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "9,34,0");        addDocumentFormat(tsv);final DocumentFormat odp = new DocumentFormat("OpenDocument Presentation", DocumentFamily.PRESENTATION, "application/vnd.oasis.opendocument.presentation", "odp");odp.setExportFilter(DocumentFamily.PRESENTATION, "impress8");addDocumentFormat(odp);final DocumentFormat sxi = new DocumentFormat("OpenOffice.org 1.0 Presentation", DocumentFamily.PRESENTATION, "application/vnd.sun.xml.impress", "sxi");sxi.setExportFilter(DocumentFamily.PRESENTATION, "StarOffice XML (Impress)");addDocumentFormat(sxi);final DocumentFormat ppt = new DocumentFormat("Microsoft PowerPoint", DocumentFamily.PRESENTATION, "application/vnd.ms-powerpoint", "ppt");ppt.setExportFilter(DocumentFamily.PRESENTATION, "MS PowerPoint 97");addDocumentFormat(ppt);                final DocumentFormat odg = new DocumentFormat("OpenDocument Drawing", DocumentFamily.DRAWING, "application/vnd.oasis.opendocument.graphics", "odg");        odg.setExportFilter(DocumentFamily.DRAWING, "draw8");        addDocumentFormat(odg);                final DocumentFormat svg = new DocumentFormat("Scalable Vector Graphics", "image/svg+xml", "svg");        svg.setExportFilter(DocumentFamily.DRAWING, "draw_svg_Export");        addDocumentFormat(svg);  }}

 解决方法:重写BasicDocumentFormatRegistry类中public DocumentFormat getFormatByFileExtension(String extension)方法,只要是后缀名包含doc则使用doc的documentFormat文档格式

//// JODConverter - Java OpenDocument Converter// Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com>//// This library is free software; you can redistribute it and/or// modify it under the terms of the GNU Lesser General Public// License as published by the Free Software Foundation; either// version 2.1 of the License, or (at your option) any later version.//// This library is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU// Lesser General Public License for more details.// http://www.gnu.org/copyleft/lesser.html//package com.artofsolving.jodconverter;import java.util.ArrayList;import java.util.Iterator;import java.util.List;/** * 重写 BasicDocumentFormatRegistry 文档格式 * @author HuGuangJun */public class BasicDocumentFormatRegistry implements DocumentFormatRegistry {private List/* <DocumentFormat> */ documentFormats = new ArrayList();public void addDocumentFormat(DocumentFormat documentFormat) {documentFormats.add(documentFormat);}protected List/* <DocumentFormat> */ getDocumentFormats() {return documentFormats;}/** * @param extension *            the file extension * @return the DocumentFormat for this extension, or null if the extension *         is not mapped */public DocumentFormat getFormatByFileExtension(String extension) {if (extension == null) {return null;}//将文件名后缀统一转化if (extension.indexOf("doc") >= 0) {extension = "doc";}if (extension.indexOf("ppt") >= 0) {extension = "ppt";}if (extension.indexOf("xls") >= 0) {extension = "xls";}String lowerExtension = extension.toLowerCase();for (Iterator it = documentFormats.iterator(); it.hasNext();) {DocumentFormat format = (DocumentFormat) it.next();if (format.getFileExtension().equals(lowerExtension)) {return format;}}return null;}public DocumentFormat getFormatByMimeType(String mimeType) {for (Iterator it = documentFormats.iterator(); it.hasNext();) {DocumentFormat format = (DocumentFormat) it.next();if (format.getMimeType().equals(mimeType)) {return format;}}return null;}}


1 0
原创粉丝点击