pinyin4J 使用示例(支持多音字)

来源:互联网 发布:php从入门到精通第4版 编辑:程序博客网 时间:2024/04/24 15:06
/** * 汉字转换拼音工具类 *  * @Project UserCenter * @Author shiming.xin * @Version 1.0 * @Date Mar 30, 2011 10:36:07 AM */public class PinYin4JCn { /**  * 汉字转换位汉语拼音首字母,英文字符不变,特殊字符丢失  * 支持多音字,生成方式如(重当参:cdc,zds,cds,zdc)  *   * @param chines  *            汉字  * @return 拼音  */ public static String converterToFirstSpell(String chines) {  StringBuffer pinyinName = new StringBuffer();  char[] nameChar = chines.toCharArray();  HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();  defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);  defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);  for (int i = 0; i < nameChar.length; i++) {   if (nameChar[i] > 128) {    try {     // 取得当前汉字的所有全拼     String[] strs = PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat);     if (strs != null) {      for (int j = 0; j < strs.length; j++) {       // 取首字母       pinyinName.append(strs[j].charAt(0));       if (j != strs.length - 1) {        pinyinName.append(",");       }      }     }     // else {     // pinyinName.append(nameChar[i]);     // }    } catch (BadHanyuPinyinOutputFormatCombination e) {     e.printStackTrace();    }   } else {    pinyinName.append(nameChar[i]);   }   pinyinName.append(" ");  }  // return pinyinName.toString();  return parseTheChineseByObject(discountTheChinese(pinyinName.toString())); } /**  * 汉字转换位汉语全拼,英文字符不变,特殊字符丢失  * 支持多音字,生成方式如(重当参:zhongdangcen,zhongdangcan,chongdangcen,chongdangshen,zhongdangshen,chongdangcan)  *   * @param chines  *            汉字  * @return 拼音  */ public static String converterToSpell(String chines) {  StringBuffer pinyinName = new StringBuffer();  char[] nameChar = chines.toCharArray();  HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();  defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);  defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);  for (int i = 0; i < nameChar.length; i++) {   if (nameChar[i] > 128) {    try {     // 取得当前汉字的所有全拼     String[] strs = PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat);     if (strs != null) {      for (int j = 0; j < strs.length; j++) {       pinyinName.append(strs[j]);       if (j != strs.length - 1) {        pinyinName.append(",");       }      }     }    } catch (BadHanyuPinyinOutputFormatCombination e) {     e.printStackTrace();    }   } else {    pinyinName.append(nameChar[i]);   }   pinyinName.append(" ");  }  // return pinyinName.toString();  return parseTheChineseByObject(discountTheChinese(pinyinName.toString())); } /**  * 去除多音字重复数据  *   * @param theStr  * @return  */ private static List<Map<String, Integer>> discountTheChinese(String theStr) {  // 去除重复拼音后的拼音列表  List<Map<String, Integer>> mapList = new ArrayList<Map<String, Integer>>();  // 用于处理每个字的多音字,去掉重复  Map<String, Integer> onlyOne = null;  String[] firsts = theStr.split(" ");  // 读出每个汉字的拼音  for (String str : firsts) {   onlyOne = new Hashtable<String, Integer>();   String[] china = str.split(",");   // 多音字处理   for (String s : china) {    Integer count = onlyOne.get(s);    if (count == null) {     onlyOne.put(s, new Integer(1));    } else {     onlyOne.remove(s);     count++;     onlyOne.put(s, count);    }   }   mapList.add(onlyOne);  }  return mapList; } /**  * 解析并组合拼音,对象合并方案(推荐使用)  *   * @return  */ private static String parseTheChineseByObject(List<Map<String, Integer>> list) {  Map<String, Integer> first = null; // 用于统计每一次,集合组合数据  // 遍历每一组集合  for (int i = 0; i < list.size(); i++) {   // 每一组集合与上一次组合的Map   Map<String, Integer> temp = new Hashtable<String, Integer>();   // 第一次循环,first为空   if (first != null) {    // 取出上次组合与此次集合的字符,并保存    for (String s : first.keySet()) {     for (String s1 : list.get(i).keySet()) {      String str = s + s1;      temp.put(str, 1);     }    }    // 清理上一次组合数据    if(temp != null && temp.size() > 0){     first.clear();    }   } else {    for (String s : list.get(i).keySet()) {     String str = s;     temp.put(str, 1);    }   }   // 保存组合数据以便下次循环使用   if(temp != null && temp.size() > 0){    first = temp;   }  }  String returnStr = "";  if(first != null){   // 遍历取出组合字符串   for (String str : first.keySet()) {    returnStr += (str + ",");   }  }  if(returnStr.length() > 0){   returnStr = returnStr.substring(0, returnStr.length() - 1);  }  return returnStr; } /**  * 解析并组合拼音,循环读取方案(不灵活,不推荐使用)  *   * 现在有如下几个数组: {1,2,3} {4,5} {7,8,9} {5,2,8}  * 要求写出算法对以上数组进行数据组合,如:1475,1472,1478,1485,1482....如此类推,得到的组合刚好是以上数组的最隹组合(不多不少).  * 注:要求有序组合(并非象“全排列算法”那般得到的组合是无序的):组合过程中,第一组数组排第一位、第二组排第二位、第三组排第三位....  *   * @param list  * @return  */ private static String parseTheChineseByFor(List<Map<String, Integer>> list) {  StringBuffer sbf = new StringBuffer();  int size = list.size();  switch (size) {  case 1:   for (String s : list.get(0).keySet()) {    String str = s;    sbf.append(str + ",");   }   break;  case 2:   for (String s : list.get(0).keySet()) {    for (String s1 : list.get(1).keySet()) {     String str = s + s1;     sbf.append(str + ",");    }   }   break;  case 3:   for (String s : list.get(0).keySet()) {    for (String s1 : list.get(1).keySet()) {     for (String s2 : list.get(2).keySet()) {      String str = s + s1 + s2;      sbf.append(str + ",");     }    }   }   break;  // 此处省略了数据组装过程,组装后的数据结构如下。  // 注:List<Map<String, Integer>> list:List存的就是有多少组数据上面的是4组  // Map就是具体的某一个数组(此处用Map主要是方便对其中数组中重复元素作处理)  // StringBuffer sbf = new StringBuffer();--用于记录组合字符的缓冲器  case 4:   for (String s : list.get(0).keySet()) {    for (String s1 : list.get(1).keySet()) {     for (String s2 : list.get(2).keySet()) {      for (String s3 : list.get(3).keySet()) {       String str = s + s1 + s2 + s3;       // 此处的sbf为StringBuffer       sbf.append(str + ",");      }     }    }   }   break;  case 5:   for (String s : list.get(0).keySet()) {    for (String s1 : list.get(1).keySet()) {     for (String s2 : list.get(2).keySet()) {      for (String s3 : list.get(3).keySet()) {       for (String s4 : list.get(4).keySet()) {        String str = s + s1 + s2 + s3 + s4;        sbf.append(str + ",");       }      }     }    }   }   break;  case 6:   for (String s : list.get(0).keySet()) {    for (String s1 : list.get(1).keySet()) {     for (String s2 : list.get(2).keySet()) {      for (String s3 : list.get(3).keySet()) {       for (String s4 : list.get(4).keySet()) {        for (String s5 : list.get(5).keySet()) {         String str = s + s1 + s2 + s3 + s4 + s5;         sbf.append(str + ",");        }       }      }     }    }   }   break;  case 7:   for (String s : list.get(0).keySet()) {    for (String s1 : list.get(1).keySet()) {     for (String s2 : list.get(2).keySet()) {      for (String s3 : list.get(3).keySet()) {       for (String s4 : list.get(4).keySet()) {        for (String s5 : list.get(5).keySet()) {         for (String s6 : list.get(6).keySet()) {          String str = s + s1 + s2 + s3 + s4 + s5 + s6;          sbf.append(str + ",");         }        }       }      }     }    }   }   break;  }  String str = sbf.toString();  return str.substring(0, str.length() - 1); } public static void main(String[] args) {  // 重当参差 重庆的j 刘煜,帕哈丁  String str = "重当参";  System.out.println(converterToFirstSpell(str));  System.out.println(converterToSpell(str)); }}


 

使用pinyin4j将汉字转换成拼音需要下载pinyin4j的jar包,这里使用的是pinyin4j-2.5.0.jar;下载地址:http://sourceforge.net/projects/pinyin4j/files/latest/download?source=files

历史各个版本下载地址:http://sourceforge.net/projects/pinyin4j/files/

 

本文转自:http://hi.baidu.com/stevenblake/item/fc3906eba5dc2bf0fa42ba0b