Python中用encoding声明的文件编码和文件的实际编码之间的关系
来源:互联网 发布:android 离线数据缓存 编辑:程序博客网 时间:2024/05/21 05:07
声明的编码和实际的编码匹配的时候:声明为UTF-8编码,文件实际编码也的确是UTF-8
(1)示例代码:
#!/usr/bin/python# -*- coding: utf-8 -*-"""-------------------------------------------------------------[Function][Date][Author][Contact]-------------------------------------------------------------"""import sysif sys.platform == "win32": import codecs from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID original_stderr = sys.stderr # If any exception occurs in this code, we'll probably try to print it on stderr, # which makes for frustrating debugging if stderr is directed to our wrapper. # So be paranoid about catching errors and reporting them to original_stderr, # so that we can at least see them. def _complain(message): print >>original_stderr, message if isinstance(message, str) else repr(message) # Work around <http://bugs.python.org/issue6058>. codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) # Make Unicode console output work independently of the current code page. # This also fixes <http://bugs.python.org/issue1602>. # Credit to Michael Kaplan <http://www.siao2.com/2010/04/07/9989346.aspx> # and TZOmegaTZIOY # <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>. try: # <http://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx> # HANDLE WINAPI GetStdHandle(DWORD nStdHandle); # returns INVALID_HANDLE_VALUE, NULL, or a valid handle # # <http://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx> # DWORD WINAPI GetFileType(DWORD hFile); # # <http://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx> # BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode); GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(("GetStdHandle", windll.kernel32)) STD_OUTPUT_HANDLE = DWORD(-11) STD_ERROR_HANDLE = DWORD(-12) GetFileType = WINFUNCTYPE(DWORD, DWORD)(("GetFileType", windll.kernel32)) FILE_TYPE_CHAR = 0x0002 FILE_TYPE_REMOTE = 0x8000 GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(("GetConsoleMode", windll.kernel32)) INVALID_HANDLE_VALUE = DWORD(-1).value def not_a_console(handle): if handle == INVALID_HANDLE_VALUE or handle is None: return True return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or GetConsoleMode(handle, byref(DWORD())) == 0) old_stdout_fileno = None old_stderr_fileno = None if hasattr(sys.stdout, 'fileno'): old_stdout_fileno = sys.stdout.fileno() if hasattr(sys.stderr, 'fileno'): old_stderr_fileno = sys.stderr.fileno() STDOUT_FILENO = 1 STDERR_FILENO = 2 real_stdout = (old_stdout_fileno == STDOUT_FILENO) real_stderr = (old_stderr_fileno == STDERR_FILENO) if real_stdout: hStdout = GetStdHandle(STD_OUTPUT_HANDLE) if not_a_console(hStdout): real_stdout = False if real_stderr: hStderr = GetStdHandle(STD_ERROR_HANDLE) if not_a_console(hStderr): real_stderr = False if real_stdout or real_stderr: # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars, # LPDWORD lpCharsWritten, LPVOID lpReserved); WriteConsoleW = WINFUNCTYPE(BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)(("WriteConsoleW", windll.kernel32)) class UnicodeOutput: def __init__(self, hConsole, stream, fileno, name): self._hConsole = hConsole self._stream = stream self._fileno = fileno self.closed = False self.softspace = False self.mode = 'w' self.encoding = 'utf-8' self.name = name self.flush() def isatty(self): return False def close(self): # don't really close the handle, that would only cause problems self.closed = True def fileno(self): return self._fileno def flush(self): if self._hConsole is None: try: self._stream.flush() except Exception as e: _complain("%s.flush: %r from %r" % (self.name, e, self._stream)) raise def write(self, text): try: if self._hConsole is None: if isinstance(text, unicode): text = text.encode('utf-8') self._stream.write(text) else: if not isinstance(text, unicode): text = str(text).decode('utf-8') remaining = len(text) while remaining: n = DWORD(0) # There is a shorter-than-documented limitation on the # length of the string passed to WriteConsoleW (see # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>. retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None) if retval == 0 or n.value == 0: raise IOError("WriteConsoleW returned %r, n.value = %r" % (retval, n.value)) remaining -= n.value if not remaining: break text = text[n.value:] except Exception as e: _complain("%s.write: %r" % (self.name, e)) raise def writelines(self, lines): try: for line in lines: self.write(line) except Exception as e: _complain("%s.writelines: %r" % (self.name, e)) raise if real_stdout: sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '<Unicode console stdout>') else: sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '<Unicode redirected stdout>') if real_stderr: sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '<Unicode console stderr>') else: sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '<Unicode redirected stderr>') except Exception as e: _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,)) # While we're at it, let's unmangle the command-line arguments: # This works around <http://bugs.python.org/issue2128>. GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32)) CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32)) argc = c_int(0) argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc)) argv = [argv_unicode[i].encode('utf-8') for i in xrange(0, argc.value)] if not hasattr(sys, 'frozen'): # If this is an executable produced by py2exe or bbfreeze, then it will # have been invoked directly. Otherwise, unicode_argv[0] is the Python # interpreter, so skip that. argv = argv[1:] # Also skip option arguments to the Python interpreter. while len(argv) > 0: arg = argv[0] if not arg.startswith(u"-") or arg == u"-": break argv = argv[1:] if arg == u'-m': # sys.argv[0] should really be the absolute path of the module source, # but never mind break if arg == u'-c': argv[0] = u'-c' break # if you like: sys.argv = argv####################################################################### def declare_encoding_vs_real_encoding_declareUtf8RealUtf8(): """ Demo Python declare encoding vs. real file encoding """ helpInfo = """在当前Python文件的第二行,用 # -*- coding: utf-8 -*- 去声明当前文件编码是utf-8 所以,当前文件也必须是UTF-8编码的。 如此: 1. Python解析器解析当前文件,才会去按照UTF-8解析 2. 当前文件中的中文字符也是UTF-8编码 3. 然后在用decode("utf-8")去解码,才是正确 """; realUtf8Char = "我是UTF-8的中文字符串"; decodedUnicodeStr = realUtf8Char.decode("utf-8"); #在windows的cmd中,此处Unicode字符串,才能正常输出:decodedUnicodeStr= 我是UTF-8的中文字符串 print "decodedUnicodeStr=",decodedUnicodeStr; ###############################################################################if __name__=="__main__": declare_encoding_vs_real_encoding_declareUtf8RealUtf8();
(2)在Notepad++中,可以看出,当前文件我的确已经设置成了UTF-8:
(2)运行效果如下:
声明的编码和实际的编码不匹配的时候:声明为UTF-8,文件实际编码是(ANSI的)GBK
(1)示例代码:
#!/usr/bin/python# -*- coding: utf-8 -*-"""-------------------------------------------------------------[Function][Date][Author][Contact]-------------------------------------------------------------"""#---------------------------------import---------------------------------------import sys #------------------------------------------------------------------------------if sys.platform == "win32": import codecs from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID original_stderr = sys.stderr # If any exception occurs in this code, we'll probably try to print it on stderr, # which makes for frustrating debugging if stderr is directed to our wrapper. # So be paranoid about catching errors and reporting them to original_stderr, # so that we can at least see them. def _complain(message): print >>original_stderr, message if isinstance(message, str) else repr(message) # Work around <http://bugs.python.org/issue6058>. codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) # Make Unicode console output work independently of the current code page. # This also fixes <http://bugs.python.org/issue1602>. # Credit to Michael Kaplan <http://www.siao2.com/2010/04/07/9989346.aspx> # and TZOmegaTZIOY # <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>. try: # <http://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx> # HANDLE WINAPI GetStdHandle(DWORD nStdHandle); # returns INVALID_HANDLE_VALUE, NULL, or a valid handle # # <http://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx> # DWORD WINAPI GetFileType(DWORD hFile); # # <http://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx> # BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode); GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(("GetStdHandle", windll.kernel32)) STD_OUTPUT_HANDLE = DWORD(-11) STD_ERROR_HANDLE = DWORD(-12) GetFileType = WINFUNCTYPE(DWORD, DWORD)(("GetFileType", windll.kernel32)) FILE_TYPE_CHAR = 0x0002 FILE_TYPE_REMOTE = 0x8000 GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(("GetConsoleMode", windll.kernel32)) INVALID_HANDLE_VALUE = DWORD(-1).value def not_a_console(handle): if handle == INVALID_HANDLE_VALUE or handle is None: return True return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or GetConsoleMode(handle, byref(DWORD())) == 0) old_stdout_fileno = None old_stderr_fileno = None if hasattr(sys.stdout, 'fileno'): old_stdout_fileno = sys.stdout.fileno() if hasattr(sys.stderr, 'fileno'): old_stderr_fileno = sys.stderr.fileno() STDOUT_FILENO = 1 STDERR_FILENO = 2 real_stdout = (old_stdout_fileno == STDOUT_FILENO) real_stderr = (old_stderr_fileno == STDERR_FILENO) if real_stdout: hStdout = GetStdHandle(STD_OUTPUT_HANDLE) if not_a_console(hStdout): real_stdout = False if real_stderr: hStderr = GetStdHandle(STD_ERROR_HANDLE) if not_a_console(hStderr): real_stderr = False if real_stdout or real_stderr: # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars, # LPDWORD lpCharsWritten, LPVOID lpReserved); WriteConsoleW = WINFUNCTYPE(BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)(("WriteConsoleW", windll.kernel32)) class UnicodeOutput: def __init__(self, hConsole, stream, fileno, name): self._hConsole = hConsole self._stream = stream self._fileno = fileno self.closed = False self.softspace = False self.mode = 'w' self.encoding = 'utf-8' self.name = name self.flush() def isatty(self): return False def close(self): # don't really close the handle, that would only cause problems self.closed = True def fileno(self): return self._fileno def flush(self): if self._hConsole is None: try: self._stream.flush() except Exception as e: _complain("%s.flush: %r from %r" % (self.name, e, self._stream)) raise def write(self, text): try: if self._hConsole is None: if isinstance(text, unicode): text = text.encode('utf-8') self._stream.write(text) else: if not isinstance(text, unicode): text = str(text).decode('utf-8') remaining = len(text) while remaining: n = DWORD(0) # There is a shorter-than-documented limitation on the # length of the string passed to WriteConsoleW (see # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>. retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None) if retval == 0 or n.value == 0: raise IOError("WriteConsoleW returned %r, n.value = %r" % (retval, n.value)) remaining -= n.value if not remaining: break text = text[n.value:] except Exception as e: _complain("%s.write: %r" % (self.name, e)) raise def writelines(self, lines): try: for line in lines: self.write(line) except Exception as e: _complain("%s.writelines: %r" % (self.name, e)) raise if real_stdout: sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '<Unicode console stdout>') else: sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '<Unicode redirected stdout>') if real_stderr: sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '<Unicode console stderr>') else: sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '<Unicode redirected stderr>') except Exception as e: _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,)) # While we're at it, let's unmangle the command-line arguments: # This works around <http://bugs.python.org/issue2128>. GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32)) CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32)) argc = c_int(0) argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc)) argv = [argv_unicode[i].encode('utf-8') for i in xrange(0, argc.value)] if not hasattr(sys, 'frozen'): # If this is an executable produced by py2exe or bbfreeze, then it will # have been invoked directly. Otherwise, unicode_argv[0] is the Python # interpreter, so skip that. argv = argv[1:] # Also skip option arguments to the Python interpreter. while len(argv) > 0: arg = argv[0] if not arg.startswith(u"-") or arg == u"-": break argv = argv[1:] if arg == u'-m': # sys.argv[0] should really be the absolute path of the module source, # but never mind break if arg == u'-c': argv[0] = u'-c' break # if you like: sys.argv = argv####################################################################### #!/usr/bin/python# -*- coding: utf-8 -*-"""-------------------------------------------------------------------------------[Function][Date][Author][Contact]-------------------------------------------------------------------------------""" def declare_encoding_vs_real_encoding_declareUtf8RealGBK(): """ Demo Python declare encoding vs. real file encoding """ helpInfo = """在当前Python文件的第二行,用 # -*- coding: utf-8 -*- 去声明当前文件编码是utf-8 在这里故意把当前文件的本身编码转换为了GBK,即:声明的编码是UTF-8, 真正文件的编码是GBK -> 声明的编码,和真正文件编码,之间不一致 -> 实际经过测试,由于本身文件编码是GBK的,虽然声明了UTF-8编码,但是Python解析器,实际上也会自动识别,并按照真正的GBK编码去解析, 所以结果就是,当前文件,Python是按照,文件真实编码GBK去解码的。所以: 后面的,当前Python文件出现的字符串,都是GBK的 -> 对于GBK编码的字符串,去用GBK去decode,当时是正常的,可以得到Unicode的字符串的; 对于GBK编码的字符串,用UTF-8去解码,当然会出错 """; realEncodingGbkChar = "此处我实际上是GBK的中文字符串"; useGbkDecodedUnicodeChar = realEncodingGbkChar.decode("gbk"); #此处可以正常获得Unicode字符串 print "useGbkDecodedUnicodeChar=",useGbkDecodedUnicodeChar; #在Windows的cmd中输出Unicode字符串,可以正常输出:useGbkDecodedUnicodeChar= 此处我实际上是GBK的中文字符串 useUtf8DecodedUnicodeChar = realEncodingGbkChar.decode("utf-8"); #此处就会报错:UnicodeDecodeError: 'utf8' codec can't decode byte 0xb4 in position 0: invalid start byte###############################################################################if __name__=="__main__": declare_encoding_vs_real_encoding_declareUtf8RealGBK();
(2)在Notepad++中,可以看出,当前文件我是故意,已经转换为GBK了:
(3)运行效果如下:
声明的编码和实际的编码不匹配的时候:声明为UTF-8,文件实际编码是GBK
#!/usr/bin/python# -*- coding: GBK -*-"""-------------------------------------------------------------[Function][Date][Author][Contact]-------------------------------------------------------------"""#---------------------------------import---------------------------------------import sys #------------------------------------------------------------------------------if sys.platform == "win32": import codecs from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID original_stderr = sys.stderr # If any exception occurs in this code, we'll probably try to print it on stderr, # which makes for frustrating debugging if stderr is directed to our wrapper. # So be paranoid about catching errors and reporting them to original_stderr, # so that we can at least see them. def _complain(message): print >>original_stderr, message if isinstance(message, str) else repr(message) # Work around <http://bugs.python.org/issue6058>. codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) # Make Unicode console output work independently of the current code page. # This also fixes <http://bugs.python.org/issue1602>. # Credit to Michael Kaplan <http://www.siao2.com/2010/04/07/9989346.aspx> # and TZOmegaTZIOY # <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>. try: # <http://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx> # HANDLE WINAPI GetStdHandle(DWORD nStdHandle); # returns INVALID_HANDLE_VALUE, NULL, or a valid handle # # <http://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx> # DWORD WINAPI GetFileType(DWORD hFile); # # <http://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx> # BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode); GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(("GetStdHandle", windll.kernel32)) STD_OUTPUT_HANDLE = DWORD(-11) STD_ERROR_HANDLE = DWORD(-12) GetFileType = WINFUNCTYPE(DWORD, DWORD)(("GetFileType", windll.kernel32)) FILE_TYPE_CHAR = 0x0002 FILE_TYPE_REMOTE = 0x8000 GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(("GetConsoleMode", windll.kernel32)) INVALID_HANDLE_VALUE = DWORD(-1).value def not_a_console(handle): if handle == INVALID_HANDLE_VALUE or handle is None: return True return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or GetConsoleMode(handle, byref(DWORD())) == 0) old_stdout_fileno = None old_stderr_fileno = None if hasattr(sys.stdout, 'fileno'): old_stdout_fileno = sys.stdout.fileno() if hasattr(sys.stderr, 'fileno'): old_stderr_fileno = sys.stderr.fileno() STDOUT_FILENO = 1 STDERR_FILENO = 2 real_stdout = (old_stdout_fileno == STDOUT_FILENO) real_stderr = (old_stderr_fileno == STDERR_FILENO) if real_stdout: hStdout = GetStdHandle(STD_OUTPUT_HANDLE) if not_a_console(hStdout): real_stdout = False if real_stderr: hStderr = GetStdHandle(STD_ERROR_HANDLE) if not_a_console(hStderr): real_stderr = False if real_stdout or real_stderr: # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars, # LPDWORD lpCharsWritten, LPVOID lpReserved); WriteConsoleW = WINFUNCTYPE(BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)(("WriteConsoleW", windll.kernel32)) class UnicodeOutput: def __init__(self, hConsole, stream, fileno, name): self._hConsole = hConsole self._stream = stream self._fileno = fileno self.closed = False self.softspace = False self.mode = 'w' self.encoding = 'utf-8' self.name = name self.flush() def isatty(self): return False def close(self): # don't really close the handle, that would only cause problems self.closed = True def fileno(self): return self._fileno def flush(self): if self._hConsole is None: try: self._stream.flush() except Exception as e: _complain("%s.flush: %r from %r" % (self.name, e, self._stream)) raise def write(self, text): try: if self._hConsole is None: if isinstance(text, unicode): text = text.encode('utf-8') self._stream.write(text) else: if not isinstance(text, unicode): text = str(text).decode('utf-8') remaining = len(text) while remaining: n = DWORD(0) # There is a shorter-than-documented limitation on the # length of the string passed to WriteConsoleW (see # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>. retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None) if retval == 0 or n.value == 0: raise IOError("WriteConsoleW returned %r, n.value = %r" % (retval, n.value)) remaining -= n.value if not remaining: break text = text[n.value:] except Exception as e: _complain("%s.write: %r" % (self.name, e)) raise def writelines(self, lines): try: for line in lines: self.write(line) except Exception as e: _complain("%s.writelines: %r" % (self.name, e)) raise if real_stdout: sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '<Unicode console stdout>') else: sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '<Unicode redirected stdout>') if real_stderr: sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '<Unicode console stderr>') else: sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '<Unicode redirected stderr>') except Exception as e: _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,)) # While we're at it, let's unmangle the command-line arguments: # This works around <http://bugs.python.org/issue2128>. GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32)) CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32)) argc = c_int(0) argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc)) argv = [argv_unicode[i].encode('utf-8') for i in xrange(0, argc.value)] if not hasattr(sys, 'frozen'): # If this is an executable produced by py2exe or bbfreeze, then it will # have been invoked directly. Otherwise, unicode_argv[0] is the Python # interpreter, so skip that. argv = argv[1:] # Also skip option arguments to the Python interpreter. while len(argv) > 0: arg = argv[0] if not arg.startswith(u"-") or arg == u"-": break argv = argv[1:] if arg == u'-m': # sys.argv[0] should really be the absolute path of the module source, # but never mind break if arg == u'-c': argv[0] = u'-c' break # if you like: sys.argv = argv####################################################################### def declare_encoding_vs_real_encoding_declareGBKRealUtf8(): """ Demo Python declare encoding vs. real file encoding """ helpInfo = """在当前Python文件的第二行,用 # -*- coding: GBK -*- 去声明当前文件编码是GBK 在这里故意把当前文件的本身编码转换为了utf-8,即:声明的编码是gbk, 真正文件的编码是utf-8 -> 声明的编码,和真正文件编码,之间不一致 -> 实际经过测试,由于本身文件编码是utf-8的,声明了GBK编码,Python解析器,并不会自动识别是按照GBK编码去解析, 所以结果就是,当前文件,Python是声明的编码GBK去解码的。 """; realEncodingGbkChar = "此处我实际上是GBK的中文字符串"; # useGbkDecodedUnicodeChar = realEncodingGbkChar.decode("utf-8"); #此处可以正常获得Unicode字符串 # print "useGbkDecodedUnicodeChar=",useGbkDecodedUnicodeChar; #在Windows的cmd中输出Unicode字符串,可以正常输出:useGbkDecodedUnicodeChar= 此处我实际上是GBK的中文字符串 #useUtf8DecodedUnicodeChar = realEncodingGbkChar.decode("utf-8"); #此处就会报错:UnicodeDecodeError: 'utf8' codec can't decode byte 0xb4 in position 0: invalid start byte###############################################################################if __name__=="__main__": declare_encoding_vs_real_encoding_declareGBKRealUtf8();
(2) 运行:
直接报错
0 0
- Python中用encoding声明的文件编码和文件的实际编码之间的关系
- Python中用encoding声明的文件编码和文件的实际编码之间的关系
- Python中用encoding声明的文件编码和文件的实际编码之间的关系
- xml的encoding与文件编码关系
- 文件和文件名字的编码方式转换工具
- python的编码声明
- Python文件编码的声明方法
- python 文件头的编码声明问题
- 字符编码之间的关系
- ubuntu中用python把文件的编码改成utf8
- python 编码问题-文件编码格式和编码声明的作用
- JDBC中操作文件时的数据库、表、字段和文件的编码方式必须保持一致
- 修改myeclipse的默认编码和文件打开方式
- linux下 文件夹和文件的字符集编码方式转换
- 既然读文件要使用和文件编码一致的编码,那么javac编译文件也需要读取文件,它使用什么编码呢?
- Python 字符编码学习-ascii编码,Unicode和UTF-8之间的关系
- 几种编码之间的关系
- 关于python对程序编码的声明
- 最近邻算法(KNN)
- 抓取维基百科数据
- 【插件开发二】开发OpenFire的消息拦截器
- 非递归实现二叉树遍历(思路+代码)
- 二分查找算法及其变种
- Python中用encoding声明的文件编码和文件的实际编码之间的关系
- ViewPager Adapter FragmentStatePagerAdapter FragmentPagerAdapter 使用详解
- 迭代器
- POJ 1177 Picture(线段树+扫描线)
- Eclipse OS X Yosemite 提示– To open“Eclipse” you need to install the legacy Java SE 6 runtime的解决办法
- pdo+mysql+php
- mybatis实战教程(mybatis in action),mybatis入门到精通
- saltstack相关的一些总结
- 把以下IP存入一个txt文件,编写程序把这些IP按数值大小,从小到达排序并打印出来。 61.54.231.245 61.54.231.9 61.54.231.246 61.54.231.48