
来源:互联网 发布:ubuntu 反应慢 编辑:程序博客网 时间:2024/05/29 04:25
package com.webssky.jcseg.filter;

import java.util.HashMap;
import java.util.Map;

 * a class to deal with Chinese numeric. <br />
 * @author chenxin
 * {@link http://www.webssky.com}
public class CNNMFilter {
* chinese numeric chars. <br />
* i have put the chars into the lexicon file lex-cn-numeric.lex for the old version. <r />
* it's better to follow the current work.
private static final Character[] CN_NUMERIC = {
'一', '二', '三', '四', '五','六', '七', '八', '九',
'壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌','玖',
'十', '百', '千', '拾', '佰', '仟', '万', '亿', '○', 'O', '零'};

private static Map<Character, Integer> cnNumeric = null;

static {
cnNumeric = new HashMap<Character, Integer>(40, 0.85f);
for ( int j = 0; j < 9; j++ ) 
cnNumeric.put(CN_NUMERIC[j], j + 1);
for ( int j = 9; j < 18; j++ )
cnNumeric.put(CN_NUMERIC[j], j - 8);
cnNumeric.put('两', 2);
cnNumeric.put('十', 10);
cnNumeric.put('拾', 10);
cnNumeric.put('百', 100);
cnNumeric.put('佰', 100);
cnNumeric.put('千', 1000);
cnNumeric.put('仟', 1000);
cnNumeric.put('万', 10000);
cnNumeric.put('亿', 100000000);

* check the given char is chinese numeric or not. <br />

* @param c <br />
* @return boolean true yes and false for not.
public static int isCNNumeric( char c ) {
Integer i = cnNumeric.get(c);
if ( i == null ) return -1;
return i.intValue();

* a static method to turn the Chinese numeric to Arabic numbers.

* @param cnn
* @param boolea flag
* @return int
public static int cnNumericToArabic( String cnn, boolean flag ) {

cnn = cnn.trim();
if ( cnn.length() == 1 )
return isCNNumeric(cnn.charAt(0));

if ( flag ) cnn = cnn.replace('佰', '百')
.replace('仟', '千').replace('拾', '十').replace('零', ' ');
int yi = -1, wan = -1, qian = -1, bai = -1, shi = -1;
int val = 0;
yi = cnn.lastIndexOf('亿');
if ( yi > -1 ) {
val += cnNumericToArabic( cnn.substring(0, yi), false ) * 100000000;
if ( yi < cnn.length() - 1 )
cnn = cnn.substring(yi + 1, cnn.length());
cnn = "";

if ( cnn.length() == 1 ) {
int arbic = isCNNumeric(cnn.charAt(0));
if ( arbic <= 10 )
val += arbic * 10000000;
cnn = "";

wan = cnn.lastIndexOf('万');
if ( wan > -1 ) {
val += cnNumericToArabic( cnn.substring(0, wan), false ) * 10000;
if ( wan < cnn.length() - 1 )
cnn = cnn.substring(wan + 1, cnn.length());
cnn = "";
if ( cnn.length() == 1 ) {
int arbic = isCNNumeric(cnn.charAt(0));
if ( arbic <= 10 )
val += arbic * 1000;
cnn = "";

qian = cnn.lastIndexOf('千'); 
if ( qian > -1 ) {
val +=  cnNumericToArabic( cnn.substring(0, qian), false ) * 1000;
if ( qian < cnn.length() - 1 )
cnn = cnn.substring(qian + 1, cnn.length());
cnn = "";
if ( cnn.length() == 1 ) {
int arbic = isCNNumeric(cnn.charAt(0));
if ( arbic <= 10 )
val += arbic * 100;
cnn = "";

bai = cnn.lastIndexOf('百');
if ( bai > -1 ) {
val += cnNumericToArabic( cnn.substring(0, bai), false ) * 100;
if ( bai < cnn.length() - 1 )
cnn = cnn.substring(bai + 1, cnn.length());
cnn = "";
if ( cnn.length() == 1 ) {
int arbic = isCNNumeric(cnn.charAt(0));
if ( arbic <= 10 )
val += arbic * 10;
cnn = "";

shi = cnn.lastIndexOf('十');
if ( shi > -1 ) {
if ( shi == 0 )
val += 1 * 10;
val += cnNumericToArabic( cnn.substring(0, shi), false ) * 10;
if ( shi < cnn.length() - 1 )
cnn = cnn.substring(shi + 1, cnn.length());
cnn = "";

cnn = cnn.trim();
for ( int j = 0; j < cnn.length(); j++ )
val += isCNNumeric(cnn.charAt(j))
* Math.pow(10, cnn.length() - j - 1);

return val;

public static int qCNNumericToArabic( String cnn ) {
int val = 0;
cnn = cnn.trim();
for ( int j = 0; j < cnn.length(); j++ )
val += isCNNumeric(cnn.charAt(j))
* Math.pow(10, cnn.length() - j - 1);
return val;

/* public static void main(String[] args) {
int val = 0;
long s = System.nanoTime();
//val = cnNumericToArabic("三亿二千零六万七千五百六", true);
//val = cnNumericToArabic("一九九八", true);
long e = System.nanoTime();
System.out.format("Done["+val+"], cost: %.5fsec\n", ((float)(e - s)) / 1E9);


0 0