python读取文件乱码

来源:互联网 发布:淘宝等花开杂货铺 编辑:程序博客网 时间:2024/06/01 22:27
方法一:使用codecs
import codecs
f = codecs.open('nlpir/Readme.txt','r','GBK')line = f.readline()while line:    print line,    line = f.readline()f.close()

上面的方法很慢,可以直接读取整个文件
codecs.open('nlpir/Readme.txt','r','GBK').read()
还有这样读的readlines()

方法二:

#读取文档

def read_file():    path1='K:\\SogouC.reduced\\Reduced\\C000008\\10.txt'    f = open(path1,'r+')    file_list = f.read().decode("gbk")    print file_list    f.close()


#逐行读取文档

def read_file_line():    path1='K:\\SogouC.reduced\\Reduced\\C000008\\10.txt'    f = open(path1,'r+')    line = f.readline()    while line:        print line.decode('gbk')        line = f.readline()    f.close()


带bom头

def readWeibo():    f = codecs.open(u'H:/数据/weibo/weibo.txt','r','utf-8')    line = f.readline()    if line[:3] == codecs.BOM_UTF8:        line = line[3:]#去除bom头    i=0    while line:        print line,        line = f.readline()        if i>10:            break    f.close()    returnreadWeibo()

显示行数的读取
def test_read():    file_path = u"D:/dev_data/idf_data/weibo/weibo_text.txt"    f = open(file_path, "r")    for i,line in enumerate(f):        print i,line,        if i>10:            break;    f.close()



0 0
原创粉丝点击