pinyin4j使用示例(支持多音字)

来源:互联网 发布:c 动漫网站源码 编辑:程序博客网 时间:2024/04/24 19:42

pinyin4j的主页:http://pinyin4j.sourceforge.net/
pinyin4j能够根据中文字符获取其对应的拼音,而且拼音的格式可以定制。
pinyin4j是一个支持将中文转换到拼音的Java开源类库。

  1. 支持简体中文和繁体中文字符;
  2. 支持转换到汉语拼音,通用拼音, 威妥玛拼音(威玛拼法), 注音符号第二式, 耶鲁拼法和国语罗马字;
  3. 支持多音字,即可以获取一个中文字符的多种发音;
  4. 支持多种字符串输出格式,比如支持Unicode格式的字符ü和声调符号(阴平 "ˉ",阳平"ˊ",上声"ˇ",去声"ˋ")的输出。

示例代码:

[java] view plain copy
  1. public class Pinyin4jUtil {    
  2.         
  3.         /**  
  4.          * 汉字转换位汉语拼音首字母,英文字符不变,特殊字符丢失 支持多音字,生成方式如(长沙市长:cssc,zssz,zssc,cssz)  
  5.          *   
  6.          * @param chines  
  7.          *            汉字  
  8.          * @return 拼音  
  9.          */    
  10.         public static String converterToFirstSpell(String chines) {    
  11.             StringBuffer pinyinName = new StringBuffer();    
  12.             char[] nameChar = chines.toCharArray();    
  13.             HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();    
  14.             defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);    
  15.             defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);    
  16.             for (int i = 0; i < nameChar.length; i++) {    
  17.                 if (nameChar[i] > 128) {    
  18.                     try {    
  19.                         // 取得当前汉字的所有全拼    
  20.                         String[] strs = PinyinHelper.toHanyuPinyinStringArray(    
  21.                                 nameChar[i], defaultFormat);    
  22.                         if (strs != null) {    
  23.                             for (int j = 0; j < strs.length; j++) {    
  24.                                 // 取首字母    
  25.                                 pinyinName.append(strs[j].charAt(0));    
  26.                                 if (j != strs.length - 1) {    
  27.                                     pinyinName.append(",");    
  28.                                 }    
  29.                             }    
  30.                         }    
  31.                         // else {    
  32.                         // pinyinName.append(nameChar[i]);    
  33.                         // }    
  34.                     } catch (BadHanyuPinyinOutputFormatCombination e) {    
  35.                         e.printStackTrace();    
  36.                     }    
  37.                 } else {    
  38.                     pinyinName.append(nameChar[i]);    
  39.                 }    
  40.                 pinyinName.append(" ");    
  41.             }    
  42.             // return pinyinName.toString();    
  43.             return parseTheChineseByObject(discountTheChinese(pinyinName.toString()));    
  44.         }    
  45.         
  46.         /**  
  47.          * 汉字转换位汉语全拼,英文字符不变,特殊字符丢失  
  48.          * 支持多音字,生成方式如(重当参:zhongdangcen,zhongdangcan,chongdangcen  
  49.          * ,chongdangshen,zhongdangshen,chongdangcan)  
  50.          *   
  51.          * @param chines  
  52.          *            汉字  
  53.          * @return 拼音  
  54.          */    
  55.         public static String converterToSpell(String chines) {    
  56.             StringBuffer pinyinName = new StringBuffer();    
  57.             char[] nameChar = chines.toCharArray();    
  58.             HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();    
  59.             defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);    
  60.             defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);    
  61.             for (int i = 0; i < nameChar.length; i++) {    
  62.                 if (nameChar[i] > 128) {    
  63.                     try {    
  64.                         // 取得当前汉字的所有全拼    
  65.                         String[] strs = PinyinHelper.toHanyuPinyinStringArray(    
  66.                                 nameChar[i], defaultFormat);    
  67.                         if (strs != null) {    
  68.                             for (int j = 0; j < strs.length; j++) {    
  69.                                 pinyinName.append(strs[j]);    
  70.                                 if (j != strs.length - 1) {    
  71.                                     pinyinName.append(",");    
  72.                                 }    
  73.                             }    
  74.                         }    
  75.                     } catch (BadHanyuPinyinOutputFormatCombination e) {    
  76.                         e.printStackTrace();    
  77.                     }    
  78.                 } else {    
  79.                     pinyinName.append(nameChar[i]);    
  80.                 }    
  81.                 pinyinName.append(" ");    
  82.             }    
  83.             // return pinyinName.toString();    
  84.             return parseTheChineseByObject(discountTheChinese(pinyinName.toString()));    
  85.         }    
  86.         
  87.         /**  
  88.          * 去除多音字重复数据  
  89.          *   
  90.          * @param theStr  
  91.          * @return  
  92.          */    
  93.         private static List<Map<String, Integer>> discountTheChinese(String theStr) {    
  94.             // 去除重复拼音后的拼音列表    
  95.             List<Map<String, Integer>> mapList = new ArrayList<Map<String, Integer>>();    
  96.             // 用于处理每个字的多音字,去掉重复    
  97.             Map<String, Integer> onlyOne = null;    
  98.             String[] firsts = theStr.split(" ");    
  99.             // 读出每个汉字的拼音    
  100.             for (String str : firsts) {    
  101.                 onlyOne = new Hashtable<String, Integer>();    
  102.                 String[] china = str.split(",");    
  103.                 // 多音字处理    
  104.                 for (String s : china) {    
  105.                     Integer count = onlyOne.get(s);    
  106.                     if (count == null) {    
  107.                         onlyOne.put(s, new Integer(1));    
  108.                     } else {    
  109.                         onlyOne.remove(s);    
  110.                         count++;    
  111.                         onlyOne.put(s, count);    
  112.                     }    
  113.                 }    
  114.                 mapList.add(onlyOne);    
  115.             }    
  116.             return mapList;    
  117.         }    
  118.         
  119.         /**  
  120.          * 解析并组合拼音,对象合并方案(推荐使用)  
  121.          *   
  122.          * @return  
  123.          */    
  124.         private static String parseTheChineseByObject(    
  125.                 List<Map<String, Integer>> list) {    
  126.             Map<String, Integer> first = null// 用于统计每一次,集合组合数据    
  127.             // 遍历每一组集合    
  128.             for (int i = 0; i < list.size(); i++) {    
  129.                 // 每一组集合与上一次组合的Map    
  130.                 Map<String, Integer> temp = new Hashtable<String, Integer>();    
  131.                 // 第一次循环,first为空    
  132.                 if (first != null) {    
  133.                     // 取出上次组合与此次集合的字符,并保存    
  134.                     for (String s : first.keySet()) {    
  135.                         for (String s1 : list.get(i).keySet()) {    
  136.                             String str = s + s1;    
  137.                             temp.put(str, 1);    
  138.                         }    
  139.                     }    
  140.                     // 清理上一次组合数据    
  141.                     if (temp != null && temp.size() > 0) {    
  142.                         first.clear();    
  143.                     }    
  144.                 } else {    
  145.                     for (String s : list.get(i).keySet()) {    
  146.                         String str = s;    
  147.                         temp.put(str, 1);    
  148.                     }    
  149.                 }    
  150.                 // 保存组合数据以便下次循环使用    
  151.                 if (temp != null && temp.size() > 0) {    
  152.                     first = temp;    
  153.                 }    
  154.             }    
  155.             String returnStr = "";    
  156.             if (first != null) {    
  157.                 // 遍历取出组合字符串    
  158.                 for (String str : first.keySet()) {    
  159.                     returnStr += (str + ",");    
  160.                 }    
  161.             }    
  162.             if (returnStr.length() > 0) {    
  163.                 returnStr = returnStr.substring(0, returnStr.length() - 1);    
  164.             }    
  165.             return returnStr;    
  166.         }    
  167.         
  168.     }    
测试代码
[java] view plain copy
  1. <span style="white-space:pre">  </span>    String str = "长沙市长";    
  2.                 
  3.             String pinyin = Pinyin4jUtil.converterToSpell(str);    
  4.             System.out.println(str+" pin yin :"+pinyin);    
  5.                 
  6.             pinyin = Pinyin4jUtil.converterToFirstSpell(str);    
  7.             System.out.println(str+" short pin yin :"+pinyin); <p style="margin-top:0px; margin-bottom:25px; line-height:28.8px; color:rgb(64,64,64); font-family:"Microsoft YaHei",Verdana,sans-serif,SimSun; font-size:16px"><span style="line-height:26px; font-family:Arial; font-size:14px">运行结果:</span></p><p style="margin-top:0px; margin-bottom:25px; line-height:28.8px; color:rgb(64,64,64); font-family:"Microsoft YaHei",Verdana,sans-serif,SimSun; font-size:16px"><span style="line-height:26px; font-family:Arial; font-size:14px">长沙市长 pin yin :zhangshashichang,changshashichang,zhangshashizhang,changshashizhang<br style="line-height:10px">长沙市长 short pin yin :cssc,zssz,zssc,cssz<br style="line-height:10px"></span></p><div><span style="line-height:26px; font-family:Arial; font-size:14px">  
  8. </span></div>  
原创粉丝点击