Lucene.Net2.9(中科院分词.net 版) DEMO

来源:互联网 发布:下载隐藏软件 编辑:程序博客网 时间:2024/06/05 04:33

中科院分词 ICTCLAS 的 .net 版本 是吕震宇1.0版本开发

 Lucene.Net2.9

 

接口

 

ICTCLASAnalyzer .cs

using System;using System.Collections.Generic;using System.Linq;using System.Web;using System.IO;using Lucene.Net.Analysis;using Lucene.Net.Analysis.Standard;namespace Demo{    public class ICTCLASAnalyzer : Analyzer    {        //定义要过滤的词                 private string dictPath;        public ICTCLASAnalyzer(string dictPath)        {            this.dictPath = dictPath;        }        public override TokenStream TokenStream(string fieldName, TextReader reader)        {            TokenStream ts = new ICTCLASTokenizer(reader, dictPath);            return ts;        }    }}


 

 

 

ICTCLASTokenizer.cs

using System;using System.Collections.Generic;using System.Linq;using System.Web;using System.IO;using Lucene.Net.Analysis;using Lucene.Net.Analysis.Standard;using SharpICTCLAS;namespace Demo{   class ICTCLASTokenizer : Tokenizer    {        int nKind = 2;        List<WordResult[]> result;        int startIndex = 0;        int endIndex = 0;        int i = 1;        /**//// <summary>        /// 待分词的句子        /// </summary>        private string sentence;        /**//// <summary>Constructs a tokenizer for this Reader. </summary>        public ICTCLASTokenizer(System.IO.TextReader reader, string DictPath)        {            this.input = reader;            sentence = input.ReadToEnd();            sentence = sentence.Replace("\r\n","");            //string DictPath = @"E:\TestDemo\lucene.net+2.9.2+实现索引生成,修改,查询,删除实例\Demo\WordSegmentDate\";            //string DictPath = Path.Combine(Environment.CurrentDirectory, "Data") + Path.DirectorySeparatorChar;            //Console.WriteLine("正在初始化字典库,请稍候");            WordSegment wordSegment = new WordSegment();            wordSegment.InitWordSegment(DictPath);            result = wordSegment.Segment(sentence, nKind);        }        /**//// <summary>进行切词,返回数据流中下一个token或者数据流为空时返回null        /// </summary>       public override Token Next()        {            Token token = null;            while (i < result[0].Length-1)            {                string word = result[0][i].sWord;                endIndex = startIndex + word.Length - 1;                token = new Token(word, startIndex, endIndex);                startIndex = endIndex + 1;                i++;                return token;            }            return null;        }    }}


 

 

DEMO地址: