python doc word文件转txt、html

来源:互联网 发布:免费解压软件 编辑:程序博客网 时间:2024/05/18 16:58

下载pywin32

win32com

重点语法:

doc.SaveAs(os.path.join(parent,'html',title+'.html') ,8)os.mkdir(os.path.join(parent,'txt'))doc.SaveAs(os.path.join(parent,'txt', title + '.txt'),4)
# -*- coding:utf-8 -*-import osfrom win32com import client as wcrootdir =os.getcwd()word = wc.Dispatch('Word.Application')try:    f_list = []    os_dict = {i:[j,k] for i ,j,k in os.walk(rootdir)}    for parent,dirnames,filenames in os.walk(rootdir):        for filename in filenames:            if u'.doc' in filename and u'~$' not in filename:                title = filename[:-4]                f_list.append(filename)                word.Visible = 0                doc = word.Documents.Open(os.path.join(parent,filename))                if u'txt' in dirnames :                    if title+'.txt' not in os_dict[os.path.join(parent,'txt')][1]:                        doc.SaveAs(os.path.join(parent,'txt',title+'.txt'), 4)                else:                    os.mkdir(os.path.join(parent,'txt'))                    doc.SaveAs(os.path.join(parent,'txt', title + '.txt'),4)                if u'html' in dirnames:                    if title + '.html' not in os_dict[os.path.join(parent, 'html')][1]:                        doc.SaveAs(os.path.join(parent,'html',title+'.html') ,8)                else:                    os.mkdir(os.path.join(parent,'html'))                    doc.SaveAs(os.path.join(parent,'html', title + '.html'), 8)finally:    word.Quit()print(f_list)


0 0
原创粉丝点击