Python3 日记 - 文件（一）

来源：互联网发布：小狐狸加速器 for mac 编辑：程序博客网时间：2024/05/08 02:47

注：以下内容为学习笔记，多数是从书本、资料中得来，只为加深印象，及日后参考。然而本人表达能力较差，写的不好。因非翻译、非转载，只好选原创，但多数乃摘抄，实为惭愧。但若能帮助一二访客，幸甚！

2012-12-17 星期一

1.读取文件

'''Created on Dec 17, 2012读取文件@author: liury_lab'''# 最方便的方法是一次性读取文件中的所有内容放到一个大字符串中：all_the_text = open('d:/text.txt').read()print(all_the_text)all_the_data = open('d:/data.txt', 'rb').read()print(all_the_data)# 更规范的方法file_object = open('d:/text.txt')try:    all_the_text = file_object.read()    print(all_the_text)finally:    file_object.close()    # 下面的方法每行后面有‘\n’ file_object = open('d:/text.txt')try:    all_the_text = file_object.readlines()    print(all_the_text)finally:    file_object.close()    # 三句都可将末尾的'\n'去掉 file_object = open('d:/text.txt')try:    #all_the_text = file_object.read().splitlines()    #all_the_text = file_object.read().split('\n')    all_the_text = [L.rstrip('\n') for L in file_object]    print(all_the_text)finally:    file_object.close()    # 逐行读file_object = open('d:/text.txt')try:    for line in file_object:        print(line, end = '')finally:    file_object.close()    # 每次读取文件的一部分def read_file_by_chunks(file_name, chunk_size = 100):        file_object = open(file_name, 'rb')    while True:        chunk = file_object.read(chunk_size)        if not chunk:            break        yield chunk    file_object.close()        for chunk in read_file_by_chunks('d:/data.txt', 4):    print(chunk)

输出：

hello python
hello world
b'ABCDEFG\r\nHELLO\r\nhello'
hello python
hello world
['hello python\n', 'hello world']
['hello python', 'hello world']
hello python
hello worldb'ABCD'
b'EFG\r'
b'\nHEL'
b'LO\r\n'
b'hell'
b'o'

2012-12-18 星期二

2.写入文件

'''Created on Dec 18, 2012写入文件@author: liury_lab'''# 最简单的方法all_the_text = 'hello python'open('d:/text.txt', 'w').write(all_the_text)all_the_data = b'abcd1234'open('d:/data.txt', 'wb').write(all_the_data)# 更好的办法file_object = open('d:/text.txt', 'w')file_object.write(all_the_text)file_object.close()# 分段写入list_of_text_strings = ['hello', 'python', 'hello', 'world']file_object = open('d:/text.txt', 'w')for string in list_of_text_strings:    file_object.writelines(string)list_of_text_strings = ['hello', 'python', 'hello', 'world']file_object = open('d:/text.txt', 'w')file_object.writelines(list_of_text_strings)

2012-12-19 星期三

3.搜索和替换文件中的文本

# 将文件中的某个字符串改变成另一个# 下面代码实现从一个特定文件或标准输入读取文件，然后替换字符串，然后写入一个指定的文件import os, sysnargs = len(sys.argv)if not 3 <= nargs <= 5:    print('usage: %s search_text repalce_text [infile [outfile]]' % \          os.path.basename(sys.argv[0]))else:    search_text = sys.argv[1]    replace_text = sys.argv[2]    input_file = sys.stdin    output_file = sys.stdout        if nargs > 3:        input_file = open(sys.argv[3])    if nargs > 4:        output_file = open(sys.argv[4], 'w')        for s in input_file:        output_file.write(s.replace(search_text, replace_text))    output_file.close()    input_file.close()

在MyEclipse + pyDev 环境下设置运行参数：

运行结果：

进一步提高效率：

# 进一步精简，一次性完成，不用循环，避免产生两个字符串的拷贝，提高速度和内存效率nargs = len(sys.argv)if not 3 <= nargs <= 5:    print('usage: %s search_text repalce_text [infile [outfile]]' % \          os.path.basename(sys.argv[0]))else:    search_text = sys.argv[1]    replace_text = sys.argv[2]    input_file = sys.stdin    output_file = sys.stdout        if nargs > 3:        input_file = open(sys.argv[3])    if nargs > 4:        output_file = open(sys.argv[4], 'w')        output_file.write(input_file.read().replace(search_text, replace_text))    output_file.close()    input_file.close()

2012-12-20 星期四

4.从文件中读取指定行

# Python的标准库linecache模块非常适合这个任务import linecachethe_line = linecache.getline('d:/FreakOut.cpp', 222)print (the_line)# linecache读取并缓存文件中所有的文本，若文件很大，而只读一行，则效率低下。# 可显示使用循环, 注意enumerate从0开始计数，而line_number从1开始def getline(the_file_path, line_number):    if line_number < 1:        return ''    for cur_line_number, line in enumerate(open(the_file_path, 'rU')):        if cur_line_number == line_number-1:            return line        return ''    the_line = linecache.getline('d:/FreakOut.cpp', 222)print (the_line)

输出：

int Draw_Rectangle(int x1, int y1, int x2, int y2, int color)

int Draw_Rectangle(int x1, int y1, int x2, int y2, int color)

2012-12-21 星期五

5.Python 3.x 读取UTF-8文件，及统计文件行数

'''Created on Dec 21, 2012Python 读取UTF-8文件统计文件的行数目@author: liury_lab'''# -*- coding: utf-8 -*-import codecs# 对较小的文件，最简单的方法是将文件读入一个行列表中，然后计算列表的长度即可count = len(codecs.open('d:/FreakOut.cpp', 'rU', 'utf-8').readlines())print(count)# 对较大的文件，可循环计数count = -1for count, line in enumerate(codecs.open('d:/FreakOut.cpp', 'rU', 'utf-8')):    passcount += 1print(count)# 对于像windows结束标记有'\n'的，还可以有如下办法：count = 0the_file = codecs.open('d:/FreakOut.cpp', 'rb', 'utf-8')while (True):    buffer = the_file.read(8192*1024)    if not buffer:        break    count += buffer.count('\n')count += 1the_file.close()print(count)

2012-12-22 星期六

6.处理文件中的每个词

'''Created on Dec 21, 2012处理文件中的每个词@author: liury_lab'''import codecsthe_file = codecs.open('d:/text.txt', 'rU', 'UTF-8')for line in the_file:    for word in line.split():        print(word, end = "|")the_file.close()        # 若词的定义有变，可使用正则表达式# 如词被定义为数字字母，连字符或单引号构成的序列import rethe_file = codecs.open('d:/text.txt', 'rU', 'UTF-8')print()print('************************************************************************')re_word = re.compile('[\w\'-]+')for line in the_file:    for word in re_word.finditer(line):        print(word.group(0), end = "|")the_file.close()# 封装成迭代器def words_of_file(file_path, line_to_words = str.split):    the_file = codecs.open('d:/text.txt', 'rU', 'UTF-8')    for line in the_file:        for word in line_to_words(line):            yield word    the_file.close()print()print('************************************************************************')for word in words_of_file('d:/text.txt'):    print(word, end = '|')def words_by_re(file_path, repattern = '[\w\'-]+'):    the_file = codecs.open('d:/text.txt', 'rU', 'UTF-8')    re_word = re.compile('[\w\'-]+')    def line_to_words(line):        for mo in re_word.finditer(line):            yield mo.group(0) # 原书为return，发现结果不对，改为yield        return words_of_file(file_path, line_to_words)print()print('************************************************************************')for word in words_by_re('d:/text.txt'):    print(word, end = '|')

输出：

2012-12-23 星期日

7.随机输入\输出及更新随机存取文件

'''Created on Dec 21, 2012随机输入输出@author: liury_lab'''# 给定一个包含很多固定长度记录的大二进制文件，随机读取一条记录（不逐条读取）the_file = open('d:/recon_0.yuv', 'rb')record_size = 16record_number = 6the_file.seek(record_size * record_number)buffer = the_file.read(record_size)for ch in buffer:    print(ch, end = ' ')

结果：

# 给定一个包含很多固定长度记录的大二进制文件，想读取其中一条记录并且修改该条记录某些子段的值，然后写回文件。# 方法：读取记录，解包，执行更新，然后重新组合成记录，找到正确位置，写入import structformat_string = '16b'   # 一条记录是16个字节整数the_file = open('d:/recon_0.yuv', 'r+b')record_size = struct.calcsize(format_string)record_number = 400the_file.seek(record_size * record_number)buffer = the_file.read(record_size)fields = list(struct.unpack(format_string, buffer))print(fields)# 进行计算，并修改相关的子段for i in range(0, 16):    fields[i] += 1print(fields)buffer = struct.pack(format_string, *fields)the_file.seek(record_size * record_number)the_file.write(buffer)the_file.close()

结果：