openoffice命令行方式将word、excel、ppt转成pdf

来源:互联网 发布:电脑相册制作软件 编辑:程序博客网 时间:2024/06/07 08:47

上一篇记录了安装libreoffice的方式将office文档转换成pdf,接下来将使用openoffice实现同样的功能。

首先到openoffice官网下载最新的rpm压缩包:

https://www.openoffice.org/download/index.html

目前我下载最的的是:Apache_OpenOffice_4.1.3_Linux_x86-64_install-rpm_zh-CN.tar.gz

解压并进入到相关目录安装rpm包:

[root@instance-32spzihn src]# tar -zxvf Apache_OpenOffice_4.1.3_Linux_x86-64_install-rpm_zh-CN.tar.gz[root@instance-32spzihn src]# cd zh-CN/RPMS[root@instance-32spzihn RPMS]# yum install *.rpm[root@instance-32spzihn RPMS]# cd /opt/openoffice4/program

安装完成并进入到对应的program目录之后,启动openoffice:

[root@instance-32spzihn program]# ./soffice -headless -accept="socket,host=127.0.0.1,port=8100;urp;" -nofirststartwizard &

发现报错:

[root@instance-32spzihn program]# javaldx: Could not find a Java Runtime Environment!

不过openoffice服务已经启动,由于只需要用到转换接口命令,不影响功能的情况下,我就不深究了。


接下来就需要一份python的脚本程序,因为转换需要通过py程序去调用api处理,将下面程序保存到topdf.py文件:

## PyODConverter (Python OpenDocument Converter) v1.1 - 2009-11-14## This script converts a document from one office format to another by# connecting to an OpenOffice.org instance via Python-UNO bridge.## Copyright (C) 2008-2009 Mirko Nasato <mirko@artofsolving.com># Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html# - or any later version.#DEFAULT_OPENOFFICE_PORT = 8100import unofrom os.path import abspath, isfile, splitextfrom com.sun.star.beans import PropertyValuefrom com.sun.star.task import ErrorCodeIOExceptionfrom com.sun.star.connection import NoConnectExceptionFAMILY_TEXT = "Text"FAMILY_WEB = "Web"FAMILY_SPREADSHEET = "Spreadsheet"FAMILY_PRESENTATION = "Presentation"FAMILY_DRAWING = "Drawing"#---------------------## Configuration Start ##---------------------## see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter# most formats are auto-detected; only those requiring options are defined hereIMPORT_FILTER_MAP = {    "txt": {        "FilterName": "Text (encoded)",        "FilterOptions": "utf8"    },    "csv": {        "FilterName": "Text - txt - csv (StarCalc)",        "FilterOptions": "44,34,0"    }}EXPORT_FILTER_MAP = {    "pdf": {        FAMILY_TEXT: { "FilterName": "writer_pdf_Export" },        FAMILY_WEB: { "FilterName": "writer_web_pdf_Export" },        FAMILY_SPREADSHEET: { "FilterName": "calc_pdf_Export" },        FAMILY_PRESENTATION: { "FilterName": "impress_pdf_Export" },        FAMILY_DRAWING: { "FilterName": "draw_pdf_Export" }    },    "html": {        FAMILY_TEXT: { "FilterName": "HTML (StarWriter)" },        FAMILY_SPREADSHEET: { "FilterName": "HTML (StarCalc)" },        FAMILY_PRESENTATION: { "FilterName": "impress_html_Export" }    },    "odt": {        FAMILY_TEXT: { "FilterName": "writer8" },        FAMILY_WEB: { "FilterName": "writerweb8_writer" }    },    "doc": {        FAMILY_TEXT: { "FilterName": "MS Word 97" }    },    "rtf": {        FAMILY_TEXT: { "FilterName": "Rich Text Format" }    },    "txt": {        FAMILY_TEXT: {            "FilterName": "Text",            "FilterOptions": "utf8"        }    },    "ods": {        FAMILY_SPREADSHEET: { "FilterName": "calc8" }    },    "xls": {        FAMILY_SPREADSHEET: { "FilterName": "MS Excel 97" }    },    "csv": {        FAMILY_SPREADSHEET: {            "FilterName": "Text - txt - csv (StarCalc)",            "FilterOptions": "44,34,0"        }    },    "odp": {        FAMILY_PRESENTATION: { "FilterName": "impress8" }    },    "ppt": {        FAMILY_PRESENTATION: { "FilterName": "MS PowerPoint 97" }    },    "swf": {        FAMILY_DRAWING: { "FilterName": "draw_flash_Export" },        FAMILY_PRESENTATION: { "FilterName": "impress_flash_Export" }    }}PAGE_STYLE_OVERRIDE_PROPERTIES = {    FAMILY_SPREADSHEET: {        #--- Scale options: uncomment 1 of the 3 ---        # a) 'Reduce / enlarge printout': 'Scaling factor'        "PageScale": 100,        # b) 'Fit print range(s) to width / height': 'Width in pages' and 'Height in pages'        #"ScaleToPagesX": 1, "ScaleToPagesY": 1000,        # c) 'Fit print range(s) on number of pages': 'Fit print range(s) on number of pages'        #"ScaleToPages": 1,        "PrintGrid": False    }}#-------------------## Configuration End ##-------------------#class DocumentConversionException(Exception):    def __init__(self, message):        self.message = message    def __str__(self):        return self.messageclass DocumentConverter:        def __init__(self, port=DEFAULT_OPENOFFICE_PORT):        localContext = uno.getComponentContext()        resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)        try:            context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)        except NoConnectException:            raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port        self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)    def convert(self, inputFile, outputFile):        inputUrl = self._toFileUrl(inputFile)        outputUrl = self._toFileUrl(outputFile)        loadProperties = { "Hidden": True }        inputExt = self._getFileExt(inputFile)        if IMPORT_FILTER_MAP.has_key(inputExt):            loadProperties.update(IMPORT_FILTER_MAP[inputExt])                document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(loadProperties))        try:            document.refresh()        except AttributeError:            pass        family = self._detectFamily(document)        self._overridePageStyleProperties(document, family)                outputExt = self._getFileExt(outputFile)        storeProperties = self._getStoreProperties(document, outputExt)        try:            document.storeToURL(outputUrl, self._toProperties(storeProperties))        finally:            document.close(True)    def _overridePageStyleProperties(self, document, family):        if PAGE_STYLE_OVERRIDE_PROPERTIES.has_key(family):            properties = PAGE_STYLE_OVERRIDE_PROPERTIES[family]            pageStyles = document.getStyleFamilies().getByName('PageStyles')            for styleName in pageStyles.getElementNames():                pageStyle = pageStyles.getByName(styleName)                for name, value in properties.items():                    pageStyle.setPropertyValue(name, value)    def _getStoreProperties(self, document, outputExt):        family = self._detectFamily(document)        try:            propertiesByFamily = EXPORT_FILTER_MAP[outputExt]        except KeyError:            raise DocumentConversionException, "unknown output format: '%s'" % outputExt        try:            return propertiesByFamily[family]        except KeyError:            raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)        def _detectFamily(self, document):        if document.supportsService("com.sun.star.text.WebDocument"):            return FAMILY_WEB        if document.supportsService("com.sun.star.text.GenericTextDocument"):            # must be TextDocument or GlobalDocument            return FAMILY_TEXT        if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):            return FAMILY_SPREADSHEET        if document.supportsService("com.sun.star.presentation.PresentationDocument"):            return FAMILY_PRESENTATION        if document.supportsService("com.sun.star.drawing.DrawingDocument"):            return FAMILY_DRAWING        raise DocumentConversionException, "unknown document family: %s" % document    def _getFileExt(self, path):        ext = splitext(path)[1]        if ext is not None:            return ext[1:].lower()    def _toFileUrl(self, path):        return uno.systemPathToFileUrl(abspath(path))    def _toProperties(self, dict):        props = []        for key in dict:            prop = PropertyValue()            prop.Name = key            prop.Value = dict[key]            props.append(prop)        return tuple(props)if __name__ == "__main__":    from sys import argv, exit        if len(argv) < 3:        print "USAGE: python %s <input-file> <output-file>" % argv[0]        exit(255)    if not isfile(argv[1]):        print "no such input file: %s" % argv[1]        exit(1)    try:        converter = DocumentConverter()            converter.convert(argv[1], argv[2])    except DocumentConversionException, exception:        print "ERROR! " + str(exception)        exit(1)    except ErrorCodeIOException, exception:        print "ERROR! ErrorCodeIOException %d" % exception.ErrCode        exit(1)

将文件保存到program目录下面,然后就可以执行转换命令了:

[root@instance-32spzihn program]# ./python topdf.py zzz.docx zzz_1.pdf

同样的命令也可以转换其它格式的文件,如:

[root@instance-32spzihn program]# ./python topdf.py zzz.docx zzz_1.html

相对来说,libreoffice实现起来要比openoffice要简单点,转换效果差不多,若转换中文文档乱码,请参数libreoffice的字体部分。






原创粉丝点击