Python——文件处理

来源：互联网发布：淘宝引擎优化编辑：程序博客网时间：2024/05/16 10:16
#!/usr/bin/env python# -*- coding:utf-8 -*-import sys, os, zipfile, tempfile, tarfile, fnmatch;def read_file(fileName):    '''    read file and print file line count    '''    f = open(fileName, 'r')    try:        line_list = f.readlines()        #read file each line without '\n'        #line_list = f.read().splitlines()        #line_list = f.read().split('\n')        #line_list = [L.rstrip('\n') for L in f.readlines()]        print len(line_list)    finally:        f.close()def read_file_by_chunks(fileName, chunksize=100):    file_object = open(fileName, 'rb')    while True:        chunk = file_object.read(chunksize)        if not chunk:            break;        yield chunk    file_object.close()def search_replace_text_infile(stext='', rtext='', input_file=sys.stdin, output_file=sys.stdout):    if isinstance(input_file, basestring):        ifile = open(input_file, 'r')    else:        ifile = input_file        if isinstance(output_file, basestring):        ofile = open(output_file, 'w')    else:        ofile = output_file        for s in ifile:        ofile.write(s.replace(stext, rtext))        if isinstance(ifile, basestring):        ifile.close()    if isinstance(ofile, basestring):        ofile.close()    def getline(filename, desired_line_number):    if desired_line_number < 1:        return ''        for current_line, line in enumerate(open(filename, 'rU')):        if desired_line_number-1 == current_line:            return line.rstrip('\n')    return ''def linecount_w(filename):    sys_cmd = 'wc -l ' + filename    return int(os.popen(sys_cmd).read().split()[0])    def linecount_1(filename):    return len(open(filename, 'r').readlines())    def linecount_2(filename):    linecount = -1    for linecount, line in enumerate(open(filename, 'r')):        pass    return linecount+1def linecount_3(filename):    linecount = 0    f = open(filename, 'r')    while True:        chunk = f.read(65535)        if not chunk:            break        linecount += chunk.count('\n')    return linecountdef words_of_file_in_firstline(filename, line_to_words=str.split):    firstline = getline(filename, 1)    for word in line_to_words(firstline):        yield word    def read_from_zip_file(filename):    zipf = zipfile.ZipFile(filename, 'r')    for fname in zipf.namelist():        bytes = zipf.read(fname)        print 'File %s in %s has %d bytes' % (fname, filename, len(bytes))    zipf.close()def just_for_fun():    #create a temp file with zip suffix    handle, tmpFileName = tempfile.mkstemp('.zip')    os.close(handle)        #open temp zip file    tmpZfile = zipfile.ZipFile(tmpFileName, 'w')    #write something to temp zip file    #paremeter 1:the name of the file in temp file    #parameter 2:The file contents is the string 'bytes'    tmpZfile.writestr('hello.py', 'def f(): \                                        return "hello world from " + __file__\n')    tmpZfile.close()    #set python module search path    #put temp zip file into first one    sys.path.insert(0, tmpFileName)    #import 'hello' module    import hello    #call hello module's f function    print hello.f()    os.unlink(tmpFileName)    try:    from cStringIO import StringIOexcept ImportError:    from StringIO import StringIOclass ZipString(zipfile.ZipFile):    '''    read zip file content from a string    '''    def __init__(self, dataString):        zipfile.ZipFile.__init__(self, StringIO(dataString))def make_tar(source_folder, dest_folder, compression='bz2'):    if compression:        file_ext = '.' + compression        compressionPara = ':' + compression    else:        file_ext = ''        compressionPara = ''        arcname = os.path.basename(source_folder)    dest_file = "%s.tar%s" % (arcname, file_ext)    dest_path = os.path.join(dest_folder, dest_file)        out = tarfile.TarFile.open(dest_path, 'w'+compressionPara)    out.add(source_folder, arcname)    out.close()    return dest_pathCHUNK_SIZE = 16 * 1024def adapte_file(fileObj):    '''    use file-like object to adapte a real file object    '''    if isinstance(fileObj, file):        return fileObj        tmpFileObj = tempfile.TemporaryFile()    while True:        data = fileObj.read(CHUNK_SIZE)        if not data:            break        tmpFileObj.write(data)        fileObj.close()    tmpFileObj.seek(0)    return tmpFileObjdef all_files(rootPath, patterns='*', single_level=False):    patterns = patterns.split(';')    for path, dirs, files in os.walk(rootPath):        files.sort()        for file in files:            for pattern in patterns:                #test whether filename matchs pattern                if fnmatch.fnmatch(file, pattern):                    yield os.path.join(path, file)                    break        if single_level:            breakdef swapextensions(rootPath, before, after):    if before[:1] != '.':        before = '.' + before    extLen = -len(before)    if after[:1] != '.':        after = '.' + after        swapCount = 0    for path, dirs, files in os.walk(rootPath):        for file in files:            if file[extLen:] == before:                oldfile = os.path.join(path, file)                newfile = oldfile[:extLen] + after                os.rename(oldfile, newfile)                swapCount += 1    return swapCountdef search_file(filename, searchPath, pathsep=os.pathsep):    for path in searchPath.split(pathsep):        candidate = os.path.join(path, filename)        if os.path.isfile(candidate):            return os.path.abspath(candidate)def addPythonSearchPath(newPath):    '''    add new path to module search path of python    return:    1  -  success    0  -  added path already on sys.path    -1 -  added path doesn't exist    '''    if not os.path.exists(newPath):        return -1    newPath = os.path.abspath(newPath)        if sys.platform == 'win32':        newPath = newPath.lower()    for searchPath in sys.path:        searchPath = os.path.abspath(searchPath)        if sys.platform == 'win32':            searchPath = searchPath.lower()        else:            if newPath in (searchPath, searchPath + os.sep):                return 0        sys.path.append(newPath)    return 1if __name__ == '__main__':    read_file('apache_log')        totalbytes = 0    for chunk in read_file_by_chunks('twittericon.png'):        totalbytes += len(chunk)    print 'twittericon.png file size is %d bytes' % totalbytes        #search_replace_text_infile('who', '***')        desired_line_number = 1    print 'The %d line in "apache_log" is "%s"' % (desired_line_number, getline('apache_log', desired_line_number))        import linecache    #also relate clearcache, checkcache    print 'The %d line in "sample.txt" is "%s"' % (desired_line_number, linecache.getline('sample.txt', desired_line_number).rstrip('\n'))        for f in linecount_w, linecount_1, linecount_2, linecount_3:        print f.__name__, f('apache_log')            wordlist = []    for word in words_of_file_in_firstline('apache_log'):        #revert each word        wordlist.append(word[::-1])    print ' '.join(wordlist)        read_from_zip_file('Archive.zip')        just_for_fun()        #print make_tar('./OCP', '.')        for path in all_files('./OCP', '*.py'):        print path            path = './OCP'    before = 'html'    after = 'htm'    print 'the count of swap extensions from %s to %s at path "%s" is %d' % (before, after, path, swapextensions(path, before, after))        searchpath = '/bin' + os.pathsep + '/usr/bin'    find_file = search_file('ls', searchpath)    if find_file:        print 'File "ls" found at %s.' % find_file    else:        print 'File "ls" not found.'        print 'origin search path:'    for x in sys.path:        print x    print "add new path to module search path of python, result:%d" % addPythonSearchPath('./OCP')    print 'New search path:'    for x in sys.path:        print x