python字符串编码测试

来源:互联网 发布:身份证sql判断 编辑:程序博客网 时间:2024/05/18 10:43
#encoding=utf8import chardetdef str_detect(str):    try:        print ":".join("{:02x}".format(ord(c)) for c in str)        t = chardet.detect(str)        print t        if t['encoding']=="utf-8":            print str    except:        print type(str)        pass    print ""def enc(str, enc):    try:        s = str.encode(enc)        str_detect(s)    except:        print "ERR:encode"     def dec(str, enc):    try:        s = str.decode(enc)        str_detect(s)    except:        print "ERR:decode"        passprint "============================="cn = "中文"str_detect(cn)cn1 = u"中文1"str_detect(cn1)enc(cn1, 'utf-8')enc(cn1, 'utf-16')enc(cn1, 'gb2312')enc(cn1, "ISO-8859-1")cn2 = "中文2"str_detect(cn2)dec(cn2, 'utf-8')dec(cn1, 'utf-16')dec(cn1, 'gb2312')dec(cn1, "ISO-8859-1")cn3 = "中文3"dec_str = cn3.decode('utf-8')str_detect(dec_str)enc_str = dec_str.encode('utf-16')str_detect(enc_str)end_str = enc_str.decode('utf-16')str_detect(end_str)# str利用decode方法根据str的编码将其解码为unicode字符串类型# str利用encode根据特定的编码将unicode字符串类型转换为特定的编码# 注:系统不一样结果会不同

结果:

=============================e4:b8:ad:e6:96:87{'confidence': 0.7525, 'language': '', 'encoding': 'utf-8'}中文4e2d:6587:31<type 'unicode'>e4:b8:ad:e6:96:87:31{'confidence': 0.7525, 'language': '', 'encoding': 'utf-8'}中文1ff:fe:2d:4e:87:65:31:00{'confidence': 1.0, 'language': '', 'encoding': 'UTF-16'}d6:d0:ce:c4:31{'confidence': 0.682639754276994, 'language': 'Russian', 'encoding': 'KOI8-R'}ERR:encodee4:b8:ad:e6:96:87:32{'confidence': 0.7525, 'language': '', 'encoding': 'utf-8'}中文24e2d:6587:32<type 'unicode'>ERR:decodeERR:decodeERR:decode4e2d:6587:33<type 'unicode'>ff:fe:2d:4e:87:65:33:00{'confidence': 1.0, 'language': '', 'encoding': 'UTF-16'}4e2d:6587:33<type 'unicode'>



原创粉丝点击