中文分词Java简单实现

来源:互联网 发布:手机怎么删除淘宝评论 编辑:程序博客网 时间:2024/05/18 13:05

中文分词Java简单实现
存备忘

package helloJavaWorld;//用栈存取词语class StackTest {    private Object[] stack;    //元素个数;    private int size;    //默认长度为10;    public StackTest(){        this(10);    }    //也可以自己设置长度,即容量;    public StackTest(int len){        stack = new Object[len];    }    //返回元素个数;    public int size(){        return size;    }    //返回数组长度,即容量;    public int capacity(){        return stack.length;    }    //实现动态的数组;    public void ensureCapacity(){        if(size() == capacity()){            Object[] newStack = new Object[size() * 3 / 2 + 1];            System.arraycopy(stack, 0, newStack, 0, size());            stack = newStack;        }    }    //入栈;    public void push(Object o){        size++;        ensureCapacity();        stack[size - 1] = o;    }    //判空;    public boolean isEmpty(){        return size == 0;    }    //出栈;    public Object pop(){        //首先要判空;        if(isEmpty()){            throw new ArrayIndexOutOfBoundsException("不能为空");        }        Object o = stack[--size];        stack[size] = null;        return o;    }    }    /**     * 建立Split类     * 设置词典内容     * @author zhangliang     *     */    class Split {        private String[] dictionary = {"我","是","武汉","理工大","理工大学","武汉理工大学","的","一名","二","年级","二年级","学生"};  //词典        private String input = null;        public Split(String input) {            this.input = input;        }         //分词        public void start() {            String temp = null;            StackTest stack = new StackTest(20);            for(int i=0;i<this.input.length();i++) {                temp = this.input.substring(i);                 // 每次从字符串开头截取一个字,并存到temp中                // 如果该词在词典中, 则删除该词并在原始字符串中截取该词                if(this.isInDictionary(temp)) {                    stack.push(temp);   //入栈                    this.input = this.input.replace(temp, "");                    i = -1;  // i=-1是因为要重新查找, 而要先执行循环中的i++                }            }            // 当前循环完毕,词的末尾截去一个字,继续循环, 直到词变为空            if(null != this.input && !"".equals(this.input)) {                this.input = this.input.substring(0,this.input.length()-1);                this.start();            }            //出栈            while (!stack.isEmpty()) {                System.out.print(stack.pop() + "  ");}        }        //判断当前词是否在词典中        public boolean isInDictionary(String temp) {            for(int i=0;i<this.dictionary.length;i++) {                if(temp.equals(this.dictionary[i])) {                    return true;                }            }            return false;        }    }    public class splitChinsesCharacter {         public static void main(String[] args){                String input = "我是武汉理工大学一名二年级的学生";  // 要匹配的字符串                new Split(input).start();            }        }
原创粉丝点击