Python2编码判断Demo

来源:互联网 发布:哪家4g网络好 编辑:程序博客网 时间:2024/06/10 19:12
def unicode_it(html):    if isinstance(html, str):        r = chardet.detect(html)        if r["confidence"] >= 0.7:            encoding = r["encoding"]            if encoding.lower() == "gb2312":                charset = charset_pattern.findall(html)                if not charset or charset[0].lower().strip() == "gbk":                    encoding = "gbk"            print encoding            s = html.decode(encoding)            return s    else:        return html