lucene.net 2.0 中文分词后语法高亮问题
来源:互联网 发布:mac eclipse 真机调试 编辑:程序博客网 时间:2024/05/21 10:42
lucene.net 2.0 中文分词后语法高亮问题
Posted on 2008-09-13 19:07 chen eric 阅读(147) 评论(0) 编辑 收藏lucene.net 2.0 中文分词后语法高亮问题
文/birdshover 出处/博客园
lucene.net 2.0 src包里自带了Highlighter.Net-2.0.0,可以用来实现语法 高亮。
//定义多条件搜索分析器
BooleanQuery bquery = new BooleanQuery();
//定义分词器
Analyzer analyzer = new StandardAnalyzer();
//多条件搜索拆分器
MultiFieldQueryParser parser = new MultiFieldQueryParser(new string[] { "title", "content" }, analyzer);
query = parser.Parse(key);
bquery.Add(query, BooleanClause.Occur.MUST);
DateTime now = DateTime.Now;
//初始化搜索器
//实现分布式搜索
List<IndexReader> indexreaders = new List<IndexReader>();
string[] dirs = Directory.GetDirectories(dir);
if (searchtype == SearchType.None)
{
foreach (string item in dirs)
{
//System.Web.HttpContext.Current.Response.Write(item);
indexreaders.Add(IndexReader.Open(Path.Combine(Path.Combine(dir, item), "Save")));
}
}
else
{
//System.Web.HttpContext.Current.Response.Write(searchtype.ToString());
indexreaders.Add(IndexReader.Open(Path.Combine(Path.Combine(dir, searchtype.ToString()), "Save")));
}
MultiReader reader = new MultiReader(indexreaders.ToArray());
indexSearcher = new IndexSearcher(reader);
Hits hits = null;
hits = indexSearcher.Search(bquery);
timer = (DateTime.Now - now).TotalMilliseconds;
int count = hits.Length();
/* 计算显示的条目 */
int start = (pageNo - 1) * 10;
int end = pageNo * 10 > count ? count : pageNo * 10;
//Response.Write(readerhelper.MyQuery.ToString());
/* 语法高亮显示设置 */
Highlighter highlighter = new Highlighter(new QueryScorer(query ));
highlighter.SetTextFragmenter(new SimpleFragmenter(100));
for (int i = start; i < end; i++)
{
Lucene.Net.Documents.Document doc = hits.Doc(i);
System.String text = doc.Get("content");
//添加结尾,保证结尾特殊符号不被过滤
string title = doc.Get("title") + "+aaaaaaaaa";
Lucene.Net.Analysis.TokenStream tokenStream = analyzer .TokenStream("content", new System.IO.StringReader(text));
Lucene.Net.Analysis.TokenStream titkeStream = analyzer .TokenStream("title", new System.IO.StringReader(title));
System.String result = highlighter.GetBestFragments(tokenStream, text, 2, "...");
string tresult = highlighter.GetBestFragments(titkeStream, title, 0, "..");
//祛除标题结尾标记
if (tresult.Length > 10)
tresult = tresult.Remove(tresult.Length - 10, 10);
if (string.IsNullOrEmpty(tresult))
tresult = title.Remove(title.Length - 10, 10);
//未标注内容读取
if (string.IsNullOrEmpty(result))
{
if (text.Length > 100)
result = text.Substring(0, 100);
else
result = text;
}
if (result.Length < text.Length)
result = result + "...";
}
lucene.net 2.0 src包里自带了Highlighter.Net-2.0.0,可以用来实现
//定义多条件搜索分析器
BooleanQuery bquery = new BooleanQuery();
//定义分词器
Analyzer analyzer = new StandardAnalyzer();
//多条件搜索拆分器
MultiFieldQueryParser parser = new MultiFieldQueryParser(new string[] { "title", "content" }, analyzer);
query = parser.Parse(key);
bquery.Add(query, BooleanClause.Occur.MUST);
DateTime now = DateTime.Now;
//初始化搜索器
//实现分布式搜索
List<IndexReader> indexreaders = new List<IndexReader>();
string[] dirs = Directory.GetDirectories(dir);
if (searchtype == SearchType.None)
{
foreach (string item in dirs)
{
//System.Web.HttpContext.Current.Response.Write(item);
indexreaders.Add(IndexReader.Open(Path.Combine(Path.Combine(dir, item), "Save")));
}
}
else
{
//System.Web.HttpContext.Current.Response.Write(searchtype.ToString());
indexreaders.Add(IndexReader.Open(Path.Combine(Path.Combine(dir, searchtype.ToString()), "Save")));
}
MultiReader reader = new MultiReader(indexreaders.ToArray());
indexSearcher = new IndexSearcher(reader);
Hits hits = null;
hits = indexSearcher.Search(bquery);
timer = (DateTime.Now - now).TotalMilliseconds;
int count = hits.Length();
/* 计算显示的条目 */
int start = (pageNo - 1) * 10;
int end = pageNo * 10 > count ? count : pageNo * 10;
//Response.Write(readerhelper.MyQuery.ToString());
/* 语法高亮显示设置 */
Highlighter highlighter = new Highlighter(new QueryScorer(query ));
highlighter.SetTextFragmenter(new SimpleFragmenter(100));
for (int i = start; i < end; i++)
{
Lucene.Net.Documents.Document doc = hits.Doc(i);
System.String text = doc.Get("content");
//添加结尾,保证结尾特殊符号不被过滤
string title = doc.Get("title") + "+aaaaaaaaa";
Lucene.Net.Analysis.TokenStream tokenStream = analyzer .TokenStream("content", new System.IO.StringReader(text));
Lucene.Net.Analysis.TokenStream titkeStream = analyzer .TokenStream("title", new System.IO.StringReader(title));
System.String result = highlighter.GetBestFragments(tokenStream, text, 2, "...");
string tresult = highlighter.GetBestFragments(titkeStream, title, 0, "..");
//祛除标题结尾标记
if (tresult.Length > 10)
tresult = tresult.Remove(tresult.Length - 10, 10);
if (string.IsNullOrEmpty(tresult))
tresult = title.Remove(title.Length - 10, 10);
//未标注内容读取
if (string.IsNullOrEmpty(result))
{
if (text.Length > 100)
result = text.Substring(0, 100);
else
result = text;
}
if (result.Length < text.Length)
result = result + "...";
}
- lucene.net 2.0 中文分词后语法高亮问题
- lucene.net 2.0 中文分词后语法高亮问题
- 关于Lucene.net中文分词后的结果着色问题
- Lucene高亮显示及中文分词
- 中文分词高亮问题
- Lucene.net中文分词探究
- Lucene.net中文分词探究
- Lucene.net中文分词探究
- Lucene.net中文分词探究
- Lucene.net中文分词探究
- Lucene.net中文分词探究
- Lucene 中文分词、分页、高亮 索引TXT
- Lucene 3.6 中文分词、分页查询、高亮显示等
- Lucene基础(三)-- 中文分词及高亮显示
- lucene整合中文分词器mmseg4j和高亮highlighter
- Lucene中文分词以及关键字的高亮
- lucene---中文分词IKAnalyzer和高亮highlighter的使用
- lucene(五) 中文分词和高亮显示
- 【蛙蛙推荐】Lucene.net试用
- Lucene(Nutch)距离商业文本搜索引擎还有多远?
- BufferedImage与byte[]互转 .
- 如何学好JAVA
- Java课程设计之学习成绩管理系统
- lucene.net 2.0 中文分词后语法高亮问题
- [信息图表]手机进化简史
- lucene.net开发教程与总结(一)
- 用Lucene.net对数据库建立索引及搜索 (2)
- 推迟满足感
- FilenameFilter
- sed巧妙获取软件包名字
- Drupal 资源
- 类设计者的核查表