Python 处理excel并转为table

来源:互联网 发布:mac 安装多个xcode 编辑:程序博客网 时间:2024/06/12 20:38

使用xlrd来读取,xlrd的下载及安装可以参看:
Python "xlrd" package for extracting data from Excel files


---------------------------------------------------------------------------------
#coding=utf-8

import xlrd
import os, types, datetime

#excel存放目录
dir = u'D://temp//excel'
#生成html存放目录,需要事先建立
htmlroot = u'D://temp//html'

#excel从1899/12/31计算时间
__s_date = datetime.date(1899, 12, 31).toordinal() - 1

#读取时间格式的单元
def getdate(date):
    if isinstance(date, float):
        date = int(date)
    d = datetime.date.fromordinal(__s_date + date)
    return d.strftime("%Y-%m-%d")


#遍历文件夹
for root, dirs, files in os.walk(dir):
    for file in files:
        #生成excel文件路径
        path = os.path.join(root, file)
        print u'converting...   ' + path
      
        book = xlrd.open_workbook(path) #打开excel文件
      
        #遍历excel文件中的sheet
        for shn in range(book.nsheets):
            sh = book.sheet_by_index(shn)
            #判断该表是否为空
            if sh.nrows == 0:
                continue
          
            #搜索“路径”列
            lujing = 0
            for col in range(sh.ncols):
                if cmp(sh.cell_value(0, col),u'路径') == 0:
                    lujing = col
          
            #确认第一列是否为“编号”
            flag = False
            if cmp(sh.cell_value(0, 0),u'编号') == 0:
                flag = True
     
            #组合生成的文件名
            target = file[:-4]+'_'+sh.name+'.html'
            target = os.path.join(htmlroot, target)
          
            #用tab存储从excel中读取的数据
            tab = ['<table>']
            for row in range(sh.nrows):
                tab.append('<tr>')
                for col in range(lujing+1): #“路径”为最后一列,遍历应该是sh.ncols
                    tab.append('<td>')
                  
                    #增加“编号”
                    if col == 0:
                        if row == 0:
                            tab.append('编号')
                        else:
                            tab.append(row)

                                                 
                    #将“路径”列的内容转为链接
                    elif col == lujing:
                        if sh.cell_value(row,col):
                            #将“路径”改名为“附件”
                            if row == 0:
                                tab.append('附件')
                                break
                          
                            #分割多个路径
                            tmpstr = str(sh.cell_value(row,col))
                                #路径分割符号不统一,先替换掉
                            tmpstr = tmpstr.replace('/n', ';')
                            tmpstr = tmpstr.replace(';', ';') #中文分号
                            tmpstr = tmpstr.replace(' root', 'root')
                            strlist = tmpstr.split(';')
                            i = 0
                            for s in strlist:
                                link = os.path.join(htmlroot, s)
                                if os.path.exists(link):
                                    link = '<a href="' + s.replace('//', '/') + '">查看</a>'
                                else:
                                    link = s
                              
                                if i > 0:
                                    link = '<br/>' + link
                                tab.append(link)
                                i += 1
                    else:
                        #对时间类型进行转换,cell_type(i,j)返回该单元格的数据格式,3表示日期类型
#                        if type(sh.cell_value(row, col)) is types.FloatType:
                        if sh.cell_type(row, col) == 3:  
                            date = getdate(sh.cell_value(row, col))
                            tab.append(date)
                        else:
                            tab.append(sh.cell_value(row, col))
                    tab.append('</td>')
                  
                    #如果第一列不是“编号”,需要读取
                    if flag == False and col == 0:
                        tab.append('<td>')
                        tab.append(sh.cell_value(row, col))
                        tab.append('</td>')
              
                tab.append('</tr>')
            tab.append('</table>')
          
            #将数据写入文件
            html = open(target, 'w')
            for h in tab:
                html.write(str(h))
            html.close()
            print 'output:',target,'done'

---------------------------------------------------------------------------------------------
开始没考虑到代码会这么长,所以没有分成多个函数,看起来比较搓。。
修改“dir”和“htmlroot”可以设置excel文件的路径及生成的html代码的路径
修改 link = '<a href="' + s.replace('//', '/') + '">查看</a>' 可以修改链接
s.replace('//', '/') 避免服务器是linux系统而发生错误
xlrd的基本用法可以参考:用python读取Excel文件(转)
xlrd对excel中的时间单元格数据的读取可以参考:关于python处理作Excel文件的一些问题,包括时间处理 (转)

原创粉丝点击