python读取文件乱码

来源：互联网发布：淘宝等花开杂货铺编辑：程序博客网时间：2024/06/01 22:27

方法一：使用codecs

import codecs

f = codecs.open('nlpir/Readme.txt','r','GBK')line = f.readline()while line:    print line,    line = f.readline()f.close()

上面的方法很慢，可以直接读取整个文件

codecs.open('nlpir/Readme.txt','r','GBK').read()
还有这样读的readlines()
方法二：
#读取文档 
def read_file():    path1='K:\\SogouC.reduced\\Reduced\\C000008\\10.txt'    f = open(path1,'r+')    file_list = f.read().decode("gbk")    print file_list    f.close()


#逐行读取文档 
def read_file_line():    path1='K:\\SogouC.reduced\\Reduced\\C000008\\10.txt'    f = open(path1,'r+')    line = f.readline()    while line:        print line.decode('gbk')        line = f.readline()    f.close()


带bom头
def readWeibo():    f = codecs.open(u'H:/数据/weibo/weibo.txt','r','utf-8')    line = f.readline()    if line[:3] == codecs.BOM_UTF8:        line = line[3:]#去除bom头    i=0    while line:        print line,        line = f.readline()        if i>10:            break    f.close()    returnreadWeibo()

显示行数的读取
def test_read():    file_path = u"D:/dev_data/idf_data/weibo/weibo_text.txt"    f = open(file_path, "r")    for i,line in enumerate(f):        print i,line,        if i>10:            break;    f.close()

0 0