lucene.net 2.0 中文分词后语法高亮问题

来源:互联网 发布:mac eclipse 真机调试 编辑:程序博客网 时间:2024/05/21 10:42
 

lucene.net 2.0 中文分词后语法高亮问题

Posted on 2008-09-13 19:07 chen eric 阅读(147) 评论(0) 编辑 收藏 

lucene.net 2.0 中文分词后语法高亮问题

文/birdshover  出处/博客园

lucene.net 2.0  src包里自带了Highlighter.Net-2.0.0,可以用来实现语法高亮。

    //定义多条件搜索分析器
  BooleanQuery bquery = new BooleanQuery();
  //定义分词器
  Analyzer analyzer  = new StandardAnalyzer();

  //多条件搜索拆分器
            MultiFieldQueryParser parser = new MultiFieldQueryParser(new string[] { "title", "content" }, analyzer);
            query = parser.Parse(key);
            bquery.Add(query, BooleanClause.Occur.MUST);
                
            DateTime now = DateTime.Now;
  //初始化搜索器
  //实现分布式搜索
  List<IndexReader> indexreaders = new List<IndexReader>();
  string[] dirs = Directory.GetDirectories(dir);
  if (searchtype == SearchType.None)
  {
    foreach (string item in dirs)
    {
    //System.Web.HttpContext.Current.Response.Write(item);
    indexreaders.Add(IndexReader.Open(Path.Combine(Path.Combine(dir, item), "Save")));
    }
  }
  else
  {
    //System.Web.HttpContext.Current.Response.Write(searchtype.ToString());
    indexreaders.Add(IndexReader.Open(Path.Combine(Path.Combine(dir, searchtype.ToString()), "Save")));
  }

  MultiReader reader = new MultiReader(indexreaders.ToArray());
  indexSearcher = new IndexSearcher(reader);

  Hits hits = null;
  hits = indexSearcher.Search(bquery);
  timer = (DateTime.Now - now).TotalMilliseconds;

int count = hits.Length();
  
            /* 计算显示的条目 */
      int      start = (pageNo - 1) * 10;
      int      end = pageNo * 10 > count ? count : pageNo * 10;
            //Response.Write(readerhelper.MyQuery.ToString());
            /* 语法高亮显示设置 */
        Highlighter        highlighter = new Highlighter(new QueryScorer(query ));
      highlighter.SetTextFragmenter(new SimpleFragmenter(100));

for (int i = start; i < end; i++)
{
    Lucene.Net.Documents.Document doc = hits.Doc(i);
              System.String text = doc.Get("content");
    //添加结尾,保证结尾特殊符号不被过滤
              string title = doc.Get("title") + "+aaaaaaaaa";
    Lucene.Net.Analysis.TokenStream tokenStream = analyzer  .TokenStream("content", new System.IO.StringReader(text));
    Lucene.Net.Analysis.TokenStream titkeStream = analyzer .TokenStream("title", new System.IO.StringReader(title));
    System.String result = highlighter.GetBestFragments(tokenStream, text, 2, "...");
    string tresult = highlighter.GetBestFragments(titkeStream, title, 0, "..");
    //祛除标题结尾标记
    if (tresult.Length > 10)
      tresult = tresult.Remove(tresult.Length - 10, 10);
    if (string.IsNullOrEmpty(tresult))
      tresult = title.Remove(title.Length - 10, 10);
    //未标注内容读取
    if (string.IsNullOrEmpty(result))
    {
      if (text.Length > 100)
      result = text.Substring(0, 100);
      else
      result = text;
    }
    if (result.Length < text.Length)
      result = result + "...";
}