Python3批量转换文本文件编码

来源:互联网 发布:intelj java.util.map 编辑:程序博客网 时间:2024/04/30 13:57


Python3批量转换文本文件编码

#-*- coding: utf-8 -*-
try:
    fromchardet.universaldetector importUniversalDetector
    IsAuto=True
except ImportError:
    IsAuto=False
importos
importos.path
importglob
 
defConvert_Auto( filename,out_enc="utf-8"): 
    ''' Re-encode text file with auto detec current encode. Need chardet Lib.
Input Parameter:
        filename: full path and file name, e.g. c:\dir1\file.txt
        out_enc: new encode. Default as 'utf-8'
Output Parameter
        None'''
    try
        f=open(filename,'rb')
        b=b' '
        b+=f.read(1024)
        u=UniversalDetector()
        u.reset()
        u.feed(b)
        u.close()
        f.seek(0)
        b=f.read()
        f.close()
        in_enc=u.result['encoding']
        new_content=b.decode(in_enc,'ignore')
        f=open(filename,'w', encoding=out_enc)
        f.write(new_content)
        f.close()
        print("Success: "+filename+" converted from "+in_enc+" to "+out_enc+" !")
    exceptIOError:
        print("Error: "+filename+" FAIL to converted from "+in_enc+" to "+out_enc+" !" )
 
defConvert_Manu( filename,in_enc='gbk', out_enc="utf-8"): 
    ''' Re-encode text file with manual decide input text encode.
Input Parameter:
        filename: full path and file name, e.g. c:\dir1\file.txt
        in_enc:  current encode. Default as 'gbk'
        out_enc: new encode. Default as 'utf-8'
Output Parameter
        None'''
    try
        print("convert " + filename)
        f=open(filename,'rb')
        b=f.read()
        f.close()
        new_content=b.decode(in_enc,'ignore')
        f=open(filename,'w', encoding=out_enc)
        f.write(new_content)
        f.close()
        print("Success: "+filename+" converted from "+in_enc+" to "+out_enc+" !")
    exceptIOError:
        print("Error: "+filename+" FAIL to converted from "+in_enc+" to "+out_enc+" !" )
 
 
defexplore(dir, IsLoopSubDIR=True):
    '''Convert files encoding.
    Input: 
        dir         : Current folder
        IsLoopSubDIR:   True -- Include files in sub folder
                        False-- Only include files in current folder
    Output:
        NONE
    '''
    ifIsLoopSubDIR:
        flist=getSubFileList(dir,'.txt')
    else:
        flist=getCurrFileList(dir,'.txt')
    forfname inflist:
        ifIsAuto:
            Convert_Auto(fname,'utf-8')
        else:
            Convert_Manu(fname,'gbk','utf-8')
 
     
defgetSubFileList(dir, suffix=''):
    '''Get all file list with specified  suffix under current folder(Include sub folder)
    Input: 
        dir     :   Current folder
        suffix  :   default to blank, means select all files.
    Output:
        File list
    '''
    flist=[]
    forroot, dirs, files inos.walk(os.getcwd()):
        forname infiles:
            ifname.endswith(suffix):
                flist.append(os.path.join(root,  name))
    returnflist
 
defgetCurrFileList(dir, suffix=''):
    '''Get all file list with specified suffix under current level folder
    Input: 
        dir     :   Current folder
        suffix  :   default to blank, means select all files.
    Output:
        File list
    '''
    ifsuffix=='':  
        files=glob.glob('*')
    else:
        files=glob.glob('*'+suffix)
    flist=[]   
    forf infiles:
        flist.append(os.path.join(os.getcwd(), f))
    returnflist
         
         
defmain(): 
    explore(os.getcwd(),True)
     
if__name__ =="__main__"
   main() 
Python3批量转换文本文件编码
0 0
原创粉丝点击