Python3批量转换文本文件编码
来源:互联网 发布:intelj java.util.map 编辑:程序博客网 时间:2024/04/30 13:57
Python3批量转换文本文件编码
#-*- coding: utf-8 -*-
try
:
from
chardet.universaldetector
import
UniversalDetector
IsAuto
=
True
except
ImportError:
IsAuto
=
False
import
os
import
os.path
import
glob
def
Convert_Auto( filename,out_enc
=
"utf-8"
):
''' Re-encode text file with auto detec current encode. Need chardet Lib.
Input Parameter:
filename: full path and file name, e.g. c:\dir1\file.txt
out_enc: new encode. Default as 'utf-8'
Output Parameter
None'''
try
:
f
=
open
(filename,
'rb'
)
b
=
b
' '
b
+
=
f.read(
1024
)
u
=
UniversalDetector()
u.reset()
u.feed(b)
u.close()
f.seek(
0
)
b
=
f.read()
f.close()
in_enc
=
u.result[
'encoding'
]
new_content
=
b.decode(in_enc,
'ignore'
)
f
=
open
(filename,
'w'
, encoding
=
out_enc)
f.write(new_content)
f.close()
print
(
"Success: "
+
filename
+
" converted from "
+
in_enc
+
" to "
+
out_enc
+
" !"
)
except
IOError:
print
(
"Error: "
+
filename
+
" FAIL to converted from "
+
in_enc
+
" to "
+
out_enc
+
" !"
)
def
Convert_Manu( filename,in_enc
=
'gbk'
, out_enc
=
"utf-8"
):
''' Re-encode text file with manual decide input text encode.
Input Parameter:
filename: full path and file name, e.g. c:\dir1\file.txt
in_enc: current encode. Default as 'gbk'
out_enc: new encode. Default as 'utf-8'
Output Parameter
None'''
try
:
print
(
"convert "
+
filename)
f
=
open
(filename,
'rb'
)
b
=
f.read()
f.close()
new_content
=
b.decode(in_enc,
'ignore'
)
f
=
open
(filename,
'w'
, encoding
=
out_enc)
f.write(new_content)
f.close()
print
(
"Success: "
+
filename
+
" converted from "
+
in_enc
+
" to "
+
out_enc
+
" !"
)
except
IOError:
print
(
"Error: "
+
filename
+
" FAIL to converted from "
+
in_enc
+
" to "
+
out_enc
+
" !"
)
def
explore(
dir
, IsLoopSubDIR
=
True
):
'''Convert files encoding.
Input:
dir : Current folder
IsLoopSubDIR: True -- Include files in sub folder
False-- Only include files in current folder
Output:
NONE
'''
if
IsLoopSubDIR:
flist
=
getSubFileList(
dir
,
'.txt'
)
else
:
flist
=
getCurrFileList(
dir
,
'.txt'
)
for
fname
in
flist:
if
IsAuto:
Convert_Auto(fname,
'utf-8'
)
else
:
Convert_Manu(fname,
'gbk'
,
'utf-8'
)
def
getSubFileList(
dir
, suffix
=
''):
'''Get all file list with specified suffix under current folder(Include sub folder)
Input:
dir : Current folder
suffix : default to blank, means select all files.
Output:
File list
'''
flist
=
[]
for
root, dirs, files
in
os.walk(os.getcwd()):
for
name
in
files:
if
name.endswith(suffix):
flist.append(os.path.join(root, name))
return
flist
def
getCurrFileList(
dir
, suffix
=
''):
'''Get all file list with specified suffix under current level folder
Input:
dir : Current folder
suffix : default to blank, means select all files.
Output:
File list
'''
if
suffix
=
=
'':
files
=
glob.glob(
'*'
)
else
:
files
=
glob.glob(
'*'
+
suffix)
flist
=
[]
for
f
in
files:
flist.append(os.path.join(os.getcwd(), f))
return
flist
def
main():
explore(os.getcwd(),
True
)
if
__name__
=
=
"__main__"
:
main()
Python3批量转换文本文件编码
0 0
- Python3批量转换文本文件编码
- Python3批量转换文本文件编码
- Python3批量转换文本文件编码
- Python3批量转换文本文件编码
- 批量转换文本文件编码
- 批量转换纯文本文件编码
- python3 批量编码格式转换
- Python3 批量转换文件编码 Encoding
- Python3 批量转换文件编码 Encoding
- Python3 批量转换文件编码 Encoding
- 一个批量转换文本文件编码的程序(Python)
- 一个批量转换文本文件编码的程序(Python)
- 使用C#批量转换文本文件编码为UTF8
- 文件编码批量转换——转换一个目录中所有文本文件的编码
- Windows 下 使用 GitBash 批量转换文本文件编码及批量添加文本行
- python3编码转换
- python3常见编码转换
- mac下批量转换文本文件编码的shell脚本(支持子目录)
- 数据结构实验之链表一:顺序建立链表
- 翻转链表
- Java环境变量的标准配置
- spark实例:用spark-submit运行spark程序
- iOS开发(Swift):创建UINavigationView的三种方法
- Python3批量转换文本文件编码
- 数据结构实验之链表二:逆序建立链表
- DFS之素数环
- 数据结构实验之链表三:链表的逆置
- Android高级UI ImageView ImageButton RadioButton CheckBox ProgressBar属性和用法总结
- Python 对文件进行编码转换
- 学习鸟哥的Linux私房菜笔记(5)——目录
- 触摸事件和手势(UITouch)
- centos7 快速安装 mariadb(mysql)