python 判断unicode字符串是汉字/数字/字母,全角/半角转换

来源:互联网 发布:软件培训计划方案 编辑:程序博客网 时间:2024/05/16 07:55

文本处理经常会判断字符串是否是字母,数字或者汉字的问题,python str模块提供了些方法,但是不支持unicode,所以模仿str的相关方法,实现了unicode 字符串的汉字/数字/字母判断方法,以及全角半角转换。



#!/usr/bin/env python# -*- coding: UTF-8 -*-import sys#set default encoding as UTF-8reload(sys)sys.setdefaultencoding('utf-8')#judge a character is a Chinese Characterdef is_Chinese(uchar):if len(uchar) != 1:raise TypeError,'expected a character, but a string found!'if uchar >= u'\u4e00' and uchar <= u'\u9fa5':return Trueelse:return False#Judge a ustr is all Chinesedef is_all_Chinese(ustr): for uchar in ustr:if not is_Chinese(uchar):return Falsereturn True#Judge a char is a numberdef is_digit(uchar):if len(uchar) != 1:raise TypeError,'expected a character, but a string found!'if uchar >= u'\u0030' and uchar<=u'\u0039':return Trueelse:return False#Judge a str is all numdef is_all_digit(ustr):for uchar in ustr:if not is_digit(uchar):return Falsereturn True#Judge a char is a alphabetdef is_alpha(uchar):if len(uchar) != 1:raise TypeError,'expected a character, but a string found!'if (uchar >= u'\u0041' and uchar<=u'\u005a') or \   (uchar >= u'\u0061' and uchar<=u'\u007a'):return Trueelse:return False#Judge a str is all alphabetdef is_all_alpha(ustr):for uchar in ustr:if not is_alpha(uchar):return Falsereturn True #半角转全角def B2Q(uchar):if len(uchar) != 1:raise TypeError,'expected a character, but a string found!'inner_code = ord(uchar)if inner_code < 0x0020 or inner_code > 0x7e:      #不是半角字符就返回原来的字符return ucharif inner_code == 0x0020: #除了空格其他的全角半角的公式为:半角=全角-0xfee0inner_code = 0x3000else:inner_code += 0xfee0return unichr(inner_code) #全角转半角def Q2B(uchar):if len(uchar) != 1:raise TypeError,'expected a character, but a string found!'inner_code = ord(uchar)if inner_code == 0x3000:inner_code = 0x0020else:inner_code -= 0xfee0if inner_code < 0x0020 or inner_code > 0x7e:      #转完之后不是半角字符返回原来的字符return ucharreturn unichr(inner_code) #把字符串全角转半角def stringQ2B(ustring):return ''.join([Q2B(uchar) for uchar in ustring])#main functionif __name__ == '__main__':pass