python 统计excel 字数

来源:互联网 发布:python 网络爬虫代码 编辑:程序博客网 时间:2024/05/16 01:27
# -*- coding:utf-8 -*-import xlrdimport sysimport reimport typesimport osimport sysreload(sys)sys.setdefaultencoding('utf8')def open_excel(filename = 'file.xls'):try:data = xlrd.open_workbook(filename)return dataexcept Exception,e:print str(e)def countWordOfExcel(filename = 'file.xls'):data = open_excel(filename)tables = data.sheets()words = 0phanzi = re.compile(u'[\u4e00-\u9fa5]')penglish = re.compile(r'[A-Za-z\']+')for table in tables:nrows = table.nrowsncols = table.ncolsfor i in range(nrows):row = table.row_values(i)if row:for j in range(ncols):cell = row[j]#print cell#数字长度为1if type(cell) is types.IntType or type(cell) is types.FloatType:words += 1else:cell = cell.decode('utf8').replace(' ', '')chinese = len(phanzi.findall(cell))#中文english_words = len(penglish.findall(cell))#单词个数words += chinese + english_words#print wordsreturn wordsif __name__ == '__main__':print "请输入excel文件所在的文件夹(全路径):".decode()dirname = raw_input()print u"统计中..."files = os.listdir(dirname)result = ""words = 0for file in files:if file.find('xls') > 0:filename = os.path.join(file)cur = countWordOfExcel(filename)words += curprint filenameresult += "%s,%d\n" % (filename, cur)print u"统计完成, 当前目录下所有 .xls(x) 中总共 %d 字符, 请于当前目录下 统计结果.csv 文件中查看详情" % (words)with open('统计结果.txt'.decode(),'w') as out:out.write('文件名,包含字数\n'.encode())out.write(result.decode('gbk').encode('utf8'))
0 0
原创粉丝点击