Python中用encoding声明的文件编码和文件的实际编码之间的关系

来源:互联网 发布:android 离线数据缓存 编辑:程序博客网 时间:2024/05/21 05:07

声明的编码和实际的编码匹配的时候:声明为UTF-8编码,文件实际编码也的确是UTF-8

(1)示例代码:

#!/usr/bin/python# -*- coding: utf-8 -*-"""-------------------------------------------------------------[Function][Date][Author][Contact]-------------------------------------------------------------"""import sysif sys.platform == "win32":    import codecs    from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int    from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID    original_stderr = sys.stderr    # If any exception occurs in this code, we'll probably try to print it on stderr,    # which makes for frustrating debugging if stderr is directed to our wrapper.    # So be paranoid about catching errors and reporting them to original_stderr,    # so that we can at least see them.    def _complain(message):        print >>original_stderr, message if isinstance(message, str) else repr(message)    # Work around <http://bugs.python.org/issue6058>.    codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)    # Make Unicode console output work independently of the current code page.    # This also fixes <http://bugs.python.org/issue1602>.    # Credit to Michael Kaplan <http://www.siao2.com/2010/04/07/9989346.aspx>    # and TZOmegaTZIOY    # <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>.    try:        # <http://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx>        # HANDLE WINAPI GetStdHandle(DWORD nStdHandle);        # returns INVALID_HANDLE_VALUE, NULL, or a valid handle        #        # <http://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx>        # DWORD WINAPI GetFileType(DWORD hFile);        #        # <http://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx>        # BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode);        GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(("GetStdHandle", windll.kernel32))        STD_OUTPUT_HANDLE = DWORD(-11)        STD_ERROR_HANDLE = DWORD(-12)        GetFileType = WINFUNCTYPE(DWORD, DWORD)(("GetFileType", windll.kernel32))        FILE_TYPE_CHAR = 0x0002        FILE_TYPE_REMOTE = 0x8000        GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(("GetConsoleMode", windll.kernel32))        INVALID_HANDLE_VALUE = DWORD(-1).value        def not_a_console(handle):            if handle == INVALID_HANDLE_VALUE or handle is None:                return True            return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR                    or GetConsoleMode(handle, byref(DWORD())) == 0)        old_stdout_fileno = None        old_stderr_fileno = None        if hasattr(sys.stdout, 'fileno'):            old_stdout_fileno = sys.stdout.fileno()        if hasattr(sys.stderr, 'fileno'):            old_stderr_fileno = sys.stderr.fileno()        STDOUT_FILENO = 1        STDERR_FILENO = 2        real_stdout = (old_stdout_fileno == STDOUT_FILENO)        real_stderr = (old_stderr_fileno == STDERR_FILENO)        if real_stdout:            hStdout = GetStdHandle(STD_OUTPUT_HANDLE)            if not_a_console(hStdout):                real_stdout = False        if real_stderr:            hStderr = GetStdHandle(STD_ERROR_HANDLE)            if not_a_console(hStderr):                real_stderr = False        if real_stdout or real_stderr:            # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars,            #                           LPDWORD lpCharsWritten, LPVOID lpReserved);            WriteConsoleW = WINFUNCTYPE(BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)(("WriteConsoleW", windll.kernel32))            class UnicodeOutput:                def __init__(self, hConsole, stream, fileno, name):                    self._hConsole = hConsole                    self._stream = stream                    self._fileno = fileno                    self.closed = False                    self.softspace = False                    self.mode = 'w'                    self.encoding = 'utf-8'                    self.name = name                    self.flush()                def isatty(self):                    return False                def close(self):                    # don't really close the handle, that would only cause problems                    self.closed = True                def fileno(self):                    return self._fileno                def flush(self):                    if self._hConsole is None:                        try:                            self._stream.flush()                        except Exception as e:                            _complain("%s.flush: %r from %r" % (self.name, e, self._stream))                            raise                def write(self, text):                    try:                        if self._hConsole is None:                            if isinstance(text, unicode):                                text = text.encode('utf-8')                            self._stream.write(text)                        else:                            if not isinstance(text, unicode):                                text = str(text).decode('utf-8')                            remaining = len(text)                            while remaining:                                n = DWORD(0)                                # There is a shorter-than-documented limitation on the                                # length of the string passed to WriteConsoleW (see                                # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>.                                retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None)                                if retval == 0 or n.value == 0:                                    raise IOError("WriteConsoleW returned %r, n.value = %r" % (retval, n.value))                                remaining -= n.value                                if not remaining:                                    break                                text = text[n.value:]                    except Exception as e:                        _complain("%s.write: %r" % (self.name, e))                        raise                def writelines(self, lines):                    try:                        for line in lines:                            self.write(line)                    except Exception as e:                        _complain("%s.writelines: %r" % (self.name, e))                        raise            if real_stdout:                sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '<Unicode console stdout>')            else:                sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '<Unicode redirected stdout>')            if real_stderr:                sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '<Unicode console stderr>')            else:                sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '<Unicode redirected stderr>')    except Exception as e:        _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,))    # While we're at it, let's unmangle the command-line arguments:    # This works around <http://bugs.python.org/issue2128>.    GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))    CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))    argc = c_int(0)    argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))    argv = [argv_unicode[i].encode('utf-8') for i in xrange(0, argc.value)]    if not hasattr(sys, 'frozen'):        # If this is an executable produced by py2exe or bbfreeze, then it will        # have been invoked directly. Otherwise, unicode_argv[0] is the Python        # interpreter, so skip that.        argv = argv[1:]        # Also skip option arguments to the Python interpreter.        while len(argv) > 0:            arg = argv[0]            if not arg.startswith(u"-") or arg == u"-":                break            argv = argv[1:]            if arg == u'-m':                # sys.argv[0] should really be the absolute path of the module source,                # but never mind                break            if arg == u'-c':                argv[0] = u'-c'                break    # if you like:    sys.argv = argv####################################################################### def declare_encoding_vs_real_encoding_declareUtf8RealUtf8():    """        Demo Python declare encoding vs. real file encoding    """    helpInfo = """在当前Python文件的第二行,用    # -*- coding: utf-8 -*-    去声明当前文件编码是utf-8    所以,当前文件也必须是UTF-8编码的。    如此:    1. Python解析器解析当前文件,才会去按照UTF-8解析    2. 当前文件中的中文字符也是UTF-8编码    3. 然后在用decode("utf-8")去解码,才是正确    """;    realUtf8Char = "我是UTF-8的中文字符串";    decodedUnicodeStr = realUtf8Char.decode("utf-8");    #在windows的cmd中,此处Unicode字符串,才能正常输出:decodedUnicodeStr= 我是UTF-8的中文字符串    print "decodedUnicodeStr=",decodedUnicodeStr; ###############################################################################if __name__=="__main__":    declare_encoding_vs_real_encoding_declareUtf8RealUtf8();

(2)在Notepad++中,可以看出,当前文件我的确已经设置成了UTF-8:

declare encoding utf8 real is utf8

(2)运行效果如下:

both is utf8 match so output cn char ok

声明的编码和实际的编码不匹配的时候:声明为UTF-8,文件实际编码是(ANSI的)GBK

(1)示例代码:

#!/usr/bin/python# -*- coding: utf-8 -*-"""-------------------------------------------------------------[Function][Date][Author][Contact]-------------------------------------------------------------"""#---------------------------------import---------------------------------------import sys #------------------------------------------------------------------------------if sys.platform == "win32":    import codecs    from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int    from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID    original_stderr = sys.stderr    # If any exception occurs in this code, we'll probably try to print it on stderr,    # which makes for frustrating debugging if stderr is directed to our wrapper.    # So be paranoid about catching errors and reporting them to original_stderr,    # so that we can at least see them.    def _complain(message):        print >>original_stderr, message if isinstance(message, str) else repr(message)    # Work around <http://bugs.python.org/issue6058>.    codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)    # Make Unicode console output work independently of the current code page.    # This also fixes <http://bugs.python.org/issue1602>.    # Credit to Michael Kaplan <http://www.siao2.com/2010/04/07/9989346.aspx>    # and TZOmegaTZIOY    # <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>.    try:        # <http://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx>        # HANDLE WINAPI GetStdHandle(DWORD nStdHandle);        # returns INVALID_HANDLE_VALUE, NULL, or a valid handle        #        # <http://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx>        # DWORD WINAPI GetFileType(DWORD hFile);        #        # <http://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx>        # BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode);        GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(("GetStdHandle", windll.kernel32))        STD_OUTPUT_HANDLE = DWORD(-11)        STD_ERROR_HANDLE = DWORD(-12)        GetFileType = WINFUNCTYPE(DWORD, DWORD)(("GetFileType", windll.kernel32))        FILE_TYPE_CHAR = 0x0002        FILE_TYPE_REMOTE = 0x8000        GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(("GetConsoleMode", windll.kernel32))        INVALID_HANDLE_VALUE = DWORD(-1).value        def not_a_console(handle):            if handle == INVALID_HANDLE_VALUE or handle is None:                return True            return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR                    or GetConsoleMode(handle, byref(DWORD())) == 0)        old_stdout_fileno = None        old_stderr_fileno = None        if hasattr(sys.stdout, 'fileno'):            old_stdout_fileno = sys.stdout.fileno()        if hasattr(sys.stderr, 'fileno'):            old_stderr_fileno = sys.stderr.fileno()        STDOUT_FILENO = 1        STDERR_FILENO = 2        real_stdout = (old_stdout_fileno == STDOUT_FILENO)        real_stderr = (old_stderr_fileno == STDERR_FILENO)        if real_stdout:            hStdout = GetStdHandle(STD_OUTPUT_HANDLE)            if not_a_console(hStdout):                real_stdout = False        if real_stderr:            hStderr = GetStdHandle(STD_ERROR_HANDLE)            if not_a_console(hStderr):                real_stderr = False        if real_stdout or real_stderr:            # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars,            #                           LPDWORD lpCharsWritten, LPVOID lpReserved);            WriteConsoleW = WINFUNCTYPE(BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)(("WriteConsoleW", windll.kernel32))            class UnicodeOutput:                def __init__(self, hConsole, stream, fileno, name):                    self._hConsole = hConsole                    self._stream = stream                    self._fileno = fileno                    self.closed = False                    self.softspace = False                    self.mode = 'w'                    self.encoding = 'utf-8'                    self.name = name                    self.flush()                def isatty(self):                    return False                def close(self):                    # don't really close the handle, that would only cause problems                    self.closed = True                def fileno(self):                    return self._fileno                def flush(self):                    if self._hConsole is None:                        try:                            self._stream.flush()                        except Exception as e:                            _complain("%s.flush: %r from %r" % (self.name, e, self._stream))                            raise                def write(self, text):                    try:                        if self._hConsole is None:                            if isinstance(text, unicode):                                text = text.encode('utf-8')                            self._stream.write(text)                        else:                            if not isinstance(text, unicode):                                text = str(text).decode('utf-8')                            remaining = len(text)                            while remaining:                                n = DWORD(0)                                # There is a shorter-than-documented limitation on the                                # length of the string passed to WriteConsoleW (see                                # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>.                                retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None)                                if retval == 0 or n.value == 0:                                    raise IOError("WriteConsoleW returned %r, n.value = %r" % (retval, n.value))                                remaining -= n.value                                if not remaining:                                    break                                text = text[n.value:]                    except Exception as e:                        _complain("%s.write: %r" % (self.name, e))                        raise                def writelines(self, lines):                    try:                        for line in lines:                            self.write(line)                    except Exception as e:                        _complain("%s.writelines: %r" % (self.name, e))                        raise            if real_stdout:                sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '<Unicode console stdout>')            else:                sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '<Unicode redirected stdout>')            if real_stderr:                sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '<Unicode console stderr>')            else:                sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '<Unicode redirected stderr>')    except Exception as e:        _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,))    # While we're at it, let's unmangle the command-line arguments:    # This works around <http://bugs.python.org/issue2128>.    GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))    CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))    argc = c_int(0)    argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))    argv = [argv_unicode[i].encode('utf-8') for i in xrange(0, argc.value)]    if not hasattr(sys, 'frozen'):        # If this is an executable produced by py2exe or bbfreeze, then it will        # have been invoked directly. Otherwise, unicode_argv[0] is the Python        # interpreter, so skip that.        argv = argv[1:]        # Also skip option arguments to the Python interpreter.        while len(argv) > 0:            arg = argv[0]            if not arg.startswith(u"-") or arg == u"-":                break            argv = argv[1:]            if arg == u'-m':                # sys.argv[0] should really be the absolute path of the module source,                # but never mind                break            if arg == u'-c':                argv[0] = u'-c'                break    # if you like:    sys.argv = argv####################################################################### #!/usr/bin/python# -*- coding: utf-8 -*-"""-------------------------------------------------------------------------------[Function][Date][Author][Contact]-------------------------------------------------------------------------------"""  def declare_encoding_vs_real_encoding_declareUtf8RealGBK():    """        Demo Python declare encoding vs. real file encoding    """    helpInfo = """在当前Python文件的第二行,用    # -*- coding: utf-8 -*-    去声明当前文件编码是utf-8    在这里故意把当前文件的本身编码转换为了GBK,即:声明的编码是UTF-8, 真正文件的编码是GBK -> 声明的编码,和真正文件编码,之间不一致 ->    实际经过测试,由于本身文件编码是GBK的,虽然声明了UTF-8编码,但是Python解析器,实际上也会自动识别,并按照真正的GBK编码去解析,    所以结果就是,当前文件,Python是按照,文件真实编码GBK去解码的。所以:    后面的,当前Python文件出现的字符串,都是GBK的 ->    对于GBK编码的字符串,去用GBK去decode,当时是正常的,可以得到Unicode的字符串的;    对于GBK编码的字符串,用UTF-8去解码,当然会出错    """;    realEncodingGbkChar = "此处我实际上是GBK的中文字符串";    useGbkDecodedUnicodeChar = realEncodingGbkChar.decode("gbk"); #此处可以正常获得Unicode字符串    print "useGbkDecodedUnicodeChar=",useGbkDecodedUnicodeChar; #在Windows的cmd中输出Unicode字符串,可以正常输出:useGbkDecodedUnicodeChar= 此处我实际上是GBK的中文字符串    useUtf8DecodedUnicodeChar = realEncodingGbkChar.decode("utf-8"); #此处就会报错:UnicodeDecodeError: 'utf8' codec can't decode byte 0xb4 in position 0: invalid start byte###############################################################################if __name__=="__main__":    declare_encoding_vs_real_encoding_declareUtf8RealGBK();

(2)在Notepad++中,可以看出,当前文件我是故意,已经转换为GBK了:
declare utf8 real encoding is ansi gbk
(3)运行效果如下:

这里写图片描述

声明的编码和实际的编码不匹配的时候:声明为UTF-8,文件实际编码是GBK

#!/usr/bin/python# -*- coding: GBK -*-"""-------------------------------------------------------------[Function][Date][Author][Contact]-------------------------------------------------------------"""#---------------------------------import---------------------------------------import sys #------------------------------------------------------------------------------if sys.platform == "win32":    import codecs    from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int    from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID    original_stderr = sys.stderr    # If any exception occurs in this code, we'll probably try to print it on stderr,    # which makes for frustrating debugging if stderr is directed to our wrapper.    # So be paranoid about catching errors and reporting them to original_stderr,    # so that we can at least see them.    def _complain(message):        print >>original_stderr, message if isinstance(message, str) else repr(message)    # Work around <http://bugs.python.org/issue6058>.    codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)    # Make Unicode console output work independently of the current code page.    # This also fixes <http://bugs.python.org/issue1602>.    # Credit to Michael Kaplan <http://www.siao2.com/2010/04/07/9989346.aspx>    # and TZOmegaTZIOY    # <http://stackoverflow.com/questions/878972/windows-cmd-encoding-change-causes-python-crash/1432462#1432462>.    try:        # <http://msdn.microsoft.com/en-us/library/ms683231(VS.85).aspx>        # HANDLE WINAPI GetStdHandle(DWORD nStdHandle);        # returns INVALID_HANDLE_VALUE, NULL, or a valid handle        #        # <http://msdn.microsoft.com/en-us/library/aa364960(VS.85).aspx>        # DWORD WINAPI GetFileType(DWORD hFile);        #        # <http://msdn.microsoft.com/en-us/library/ms683167(VS.85).aspx>        # BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode);        GetStdHandle = WINFUNCTYPE(HANDLE, DWORD)(("GetStdHandle", windll.kernel32))        STD_OUTPUT_HANDLE = DWORD(-11)        STD_ERROR_HANDLE = DWORD(-12)        GetFileType = WINFUNCTYPE(DWORD, DWORD)(("GetFileType", windll.kernel32))        FILE_TYPE_CHAR = 0x0002        FILE_TYPE_REMOTE = 0x8000        GetConsoleMode = WINFUNCTYPE(BOOL, HANDLE, POINTER(DWORD))(("GetConsoleMode", windll.kernel32))        INVALID_HANDLE_VALUE = DWORD(-1).value        def not_a_console(handle):            if handle == INVALID_HANDLE_VALUE or handle is None:                return True            return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR                    or GetConsoleMode(handle, byref(DWORD())) == 0)        old_stdout_fileno = None        old_stderr_fileno = None        if hasattr(sys.stdout, 'fileno'):            old_stdout_fileno = sys.stdout.fileno()        if hasattr(sys.stderr, 'fileno'):            old_stderr_fileno = sys.stderr.fileno()        STDOUT_FILENO = 1        STDERR_FILENO = 2        real_stdout = (old_stdout_fileno == STDOUT_FILENO)        real_stderr = (old_stderr_fileno == STDERR_FILENO)        if real_stdout:            hStdout = GetStdHandle(STD_OUTPUT_HANDLE)            if not_a_console(hStdout):                real_stdout = False        if real_stderr:            hStderr = GetStdHandle(STD_ERROR_HANDLE)            if not_a_console(hStderr):                real_stderr = False        if real_stdout or real_stderr:            # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars,            #                           LPDWORD lpCharsWritten, LPVOID lpReserved);            WriteConsoleW = WINFUNCTYPE(BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID)(("WriteConsoleW", windll.kernel32))            class UnicodeOutput:                def __init__(self, hConsole, stream, fileno, name):                    self._hConsole = hConsole                    self._stream = stream                    self._fileno = fileno                    self.closed = False                    self.softspace = False                    self.mode = 'w'                    self.encoding = 'utf-8'                    self.name = name                    self.flush()                def isatty(self):                    return False                def close(self):                    # don't really close the handle, that would only cause problems                    self.closed = True                def fileno(self):                    return self._fileno                def flush(self):                    if self._hConsole is None:                        try:                            self._stream.flush()                        except Exception as e:                            _complain("%s.flush: %r from %r" % (self.name, e, self._stream))                            raise                def write(self, text):                    try:                        if self._hConsole is None:                            if isinstance(text, unicode):                                text = text.encode('utf-8')                            self._stream.write(text)                        else:                            if not isinstance(text, unicode):                                text = str(text).decode('utf-8')                            remaining = len(text)                            while remaining:                                n = DWORD(0)                                # There is a shorter-than-documented limitation on the                                # length of the string passed to WriteConsoleW (see                                # <http://tahoe-lafs.org/trac/tahoe-lafs/ticket/1232>.                                retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None)                                if retval == 0 or n.value == 0:                                    raise IOError("WriteConsoleW returned %r, n.value = %r" % (retval, n.value))                                remaining -= n.value                                if not remaining:                                    break                                text = text[n.value:]                    except Exception as e:                        _complain("%s.write: %r" % (self.name, e))                        raise                def writelines(self, lines):                    try:                        for line in lines:                            self.write(line)                    except Exception as e:                        _complain("%s.writelines: %r" % (self.name, e))                        raise            if real_stdout:                sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '<Unicode console stdout>')            else:                sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '<Unicode redirected stdout>')            if real_stderr:                sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '<Unicode console stderr>')            else:                sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '<Unicode redirected stderr>')    except Exception as e:        _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,))    # While we're at it, let's unmangle the command-line arguments:    # This works around <http://bugs.python.org/issue2128>.    GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))    CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))    argc = c_int(0)    argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))    argv = [argv_unicode[i].encode('utf-8') for i in xrange(0, argc.value)]    if not hasattr(sys, 'frozen'):        # If this is an executable produced by py2exe or bbfreeze, then it will        # have been invoked directly. Otherwise, unicode_argv[0] is the Python        # interpreter, so skip that.        argv = argv[1:]        # Also skip option arguments to the Python interpreter.        while len(argv) > 0:            arg = argv[0]            if not arg.startswith(u"-") or arg == u"-":                break            argv = argv[1:]            if arg == u'-m':                # sys.argv[0] should really be the absolute path of the module source,                # but never mind                break            if arg == u'-c':                argv[0] = u'-c'                break    # if you like:    sys.argv = argv#######################################################################  def declare_encoding_vs_real_encoding_declareGBKRealUtf8():    """        Demo Python declare encoding vs. real file encoding    """    helpInfo = """在当前Python文件的第二行,用    # -*- coding: GBK -*-    去声明当前文件编码是GBK    在这里故意把当前文件的本身编码转换为了utf-8,即:声明的编码是gbk, 真正文件的编码是utf-8 -> 声明的编码,和真正文件编码,之间不一致 ->    实际经过测试,由于本身文件编码是utf-8的,声明了GBK编码,Python解析器,并不会自动识别是按照GBK编码去解析,    所以结果就是,当前文件,Python是声明的编码GBK去解码的。    """;    realEncodingGbkChar = "此处我实际上是GBK的中文字符串";   # useGbkDecodedUnicodeChar = realEncodingGbkChar.decode("utf-8"); #此处可以正常获得Unicode字符串   # print "useGbkDecodedUnicodeChar=",useGbkDecodedUnicodeChar; #在Windows的cmd中输出Unicode字符串,可以正常输出:useGbkDecodedUnicodeChar= 此处我实际上是GBK的中文字符串    #useUtf8DecodedUnicodeChar = realEncodingGbkChar.decode("utf-8"); #此处就会报错:UnicodeDecodeError: 'utf8' codec can't decode byte 0xb4 in position 0: invalid start byte###############################################################################if __name__=="__main__":    declare_encoding_vs_real_encoding_declareGBKRealUtf8();

(2) 运行:
直接报错

0 0
原创粉丝点击