使用pyhunspell检查多国语言词汇

来源:互联网 发布:php怎么做直播网站 编辑:程序博客网 时间:2024/04/27 18:38
import codecsimport hunspellhun_dicts={0x0001:'ar',0x0002:'bg_BG',0x0005:'cs',0x0006:'da_DK',0x0007:'de_DE,de_AT,de_CH',0x0008:'el',0x0009:'en_CA,en_US,en_AU,en_ZA,en_GB',0x000A:'es',0x000B:'fi_FI',0x000C:'fr',0x000D:'he',0x000E:'hu_HU',0x0010:'it_IT',0x0012:'ko',0x0013:'nl',0x0014:'nb,nn',0x0015:'pl',0x0016:'pt,pt_BR',0x0018:'ro_RO',0x0019:'ru_RU',0x001B:'sk',0x001C:'sq_AL',0x001D:'sv_SE',0x001E:'th',0x001F:'tr',0x0020:'ur_PK',0x0021:'id_ID',0x0022:'uk',0x0024:'sl',0x0025:'et_EE',0x0026:'lv_LV',0x0027:'lt',0x0029:'fa',0x002A:'vi',0x002F:'mk_MK',0x0034:'xh_ZA',0x0035:'zu_ZA',0x0036:'af_ZA',0x0039:'hi_IN',0x003e:'ms_MY',0x0041:'sw_TZ',0x0044:'am_ET',#0x0058:'hausa',0x0059:'hr',0x0061:'kk_KZ',0x0080:'sh',0x0084:'ta_IN',#0x0093:'yoruba',#0x0095:'igbo',}def get_hunspell(lang):if lang not in hun_dicts:return Nonepath = '/usr/share/hunspell/'hps = []for dict in hun_dicts[lang].split(','):aff=path+dict+'.aff'dic=path+dict+'.dic'hps.append(hunspell.HunSpell(dic,aff))return hpsdef is_spell(word, hs_list):for hs in hs_list:dic_encode = hs.get_dic_encoding()if (dic_encode == 'TIS620-2533'):dic_encode = 'tis_620'if(hs.spell(word.encode(dic_encode))):return True return False

0 0
原创粉丝点击