基于Lucene站内搜索(转载)
来源:互联网 发布:收到短信源端口 编辑:程序博客网 时间:2024/06/05 11:57
99收藏夹原来的站内收藏搜索是基于数据库的like%%语句,发现速度有点慢,于是想到用Lucene来实现站内收藏的按名字和共享收藏的连接标题来搜索的功能,昨天下午做好了,但是结果有点牵强人意。
首先我用Lucene把数据从数据库导出建立索引,代码如下,注:里面的ChinaTokene方法是调用www.sj110.com的中文分词组件
public class IntranetIndexer
...{
private IndexWriter writer;
public IntranetIndexer(string dictory)
...{
if (Directory.GetFiles(dictory).Length == 0)
...{
writer = new IndexWriter(dictory, new WhitespaceAnalyzer(), true);
}
else
...{
writer = new IndexWriter(dictory,new WhitespaceAnalyzer(),false);
}
writer.SetUseCompoundFile(true);
}
public void AddDataReaderToIndex(SqlDataReader dr)
...{
if (dr.HasRows)
...{
string EndId="0";
while (dr.Read())
...{
/** *//**//*int fieldCount = dr.FieldCount;
for (int i = 0; i < fieldCount; i++)
{
Document doc = new Document();
doc.Add(Field.Text(dr.GetName(i).ToString(),ChinaTokene(dr[i].ToString())));
writer.AddDocument(doc);
EndId = dr[0].ToString();
}*/
Document doc = new Document();
doc.Add(Field.Keyword("this_url",dr["this_url"].ToString()));
doc.Add(Field.Keyword("this_name",dr["this_name"].ToString()));
doc.Add(Field.Text("this_title", ChinaTokene(dr["this_title"].ToString())));
doc.Add(Field.UnIndexed("all_title",dr["this_title"].ToString()));
writer.AddDocument(doc);
EndId = dr["this_id"].ToString();
}
using (StreamWriter sw = new StreamWriter(@"C:Inetpubwwwrootpwqdreamendid.txt"))
...{
sw.WriteLine(EndId);
}
}
}
private string ChinaTokene(string s)
...{
System.Text.StringBuilder sb = new System.Text.StringBuilder();
List<string> results = Sj110.Com.Chinese.Tokenizer.Tokenize(s);
foreach (string str in results)
...{
sb.AppendFormat("{0} ",str);
}
sb = sb.Remove(sb.Length-2,1);
return sb.ToString();
}
public void Close()
...{
writer.Optimize();
writer.Close();
}
...{
private IndexWriter writer;
public IntranetIndexer(string dictory)
...{
if (Directory.GetFiles(dictory).Length == 0)
...{
writer = new IndexWriter(dictory, new WhitespaceAnalyzer(), true);
}
else
...{
writer = new IndexWriter(dictory,new WhitespaceAnalyzer(),false);
}
writer.SetUseCompoundFile(true);
}
public void AddDataReaderToIndex(SqlDataReader dr)
...{
if (dr.HasRows)
...{
string EndId="0";
while (dr.Read())
...{
/** *//**//*int fieldCount = dr.FieldCount;
for (int i = 0; i < fieldCount; i++)
{
Document doc = new Document();
doc.Add(Field.Text(dr.GetName(i).ToString(),ChinaTokene(dr[i].ToString())));
writer.AddDocument(doc);
EndId = dr[0].ToString();
}*/
Document doc = new Document();
doc.Add(Field.Keyword("this_url",dr["this_url"].ToString()));
doc.Add(Field.Keyword("this_name",dr["this_name"].ToString()));
doc.Add(Field.Text("this_title", ChinaTokene(dr["this_title"].ToString())));
doc.Add(Field.UnIndexed("all_title",dr["this_title"].ToString()));
writer.AddDocument(doc);
EndId = dr["this_id"].ToString();
}
using (StreamWriter sw = new StreamWriter(@"C:Inetpubwwwrootpwqdreamendid.txt"))
...{
sw.WriteLine(EndId);
}
}
}
private string ChinaTokene(string s)
...{
System.Text.StringBuilder sb = new System.Text.StringBuilder();
List<string> results = Sj110.Com.Chinese.Tokenizer.Tokenize(s);
foreach (string str in results)
...{
sb.AppendFormat("{0} ",str);
}
sb = sb.Remove(sb.Length-2,1);
return sb.ToString();
}
public void Close()
...{
writer.Optimize();
writer.Close();
}
当然,最后面要把最后的数据库的那条记录的ID写入文本文件
因为在前面的得到DataReader的时候我们首先是根据这个文本文件来获得最后条记录的ID再查询的
代码如下
protected void buttonAddIndex_Click(object sender, EventArgs e)
...{
int endId;
using (StreamReader sr = new StreamReader(@"C:Inetpubwwwrootpwqdreamendid.txt"))
...{
try
...{
endId = Convert.ToInt32(sr.ReadLine());
if (endId < 0)
endId = 0;
}
catch
...{
endId = 0;
}
}
string connStr = ConfigurationManager.AppSettings[0].ToString();
SqlConnection conn = new SqlConnection(connStr);
string selStr = "select this_id,this_title,this_url,this_name from userFav where this_id>@this_id";
SqlCommand comm = new SqlCommand(selStr,conn);
comm.Parameters.AddWithValue("@this_id",endId);
conn.Open();
SqlDataReader dr = comm.ExecuteReader();
IntranetIndexer writer = new IntranetIndexer(@"C:Inetpubwwwrootpwqdreamindex");
writer.AddDataReaderToIndex(dr);
writer.Close();
dr.Close();
dr.Dispose();
conn.Close();
}
...{
int endId;
using (StreamReader sr = new StreamReader(@"C:Inetpubwwwrootpwqdreamendid.txt"))
...{
try
...{
endId = Convert.ToInt32(sr.ReadLine());
if (endId < 0)
endId = 0;
}
catch
...{
endId = 0;
}
}
string connStr = ConfigurationManager.AppSettings[0].ToString();
SqlConnection conn = new SqlConnection(connStr);
string selStr = "select this_id,this_title,this_url,this_name from userFav where this_id>@this_id";
SqlCommand comm = new SqlCommand(selStr,conn);
comm.Parameters.AddWithValue("@this_id",endId);
conn.Open();
SqlDataReader dr = comm.ExecuteReader();
IntranetIndexer writer = new IntranetIndexer(@"C:Inetpubwwwrootpwqdreamindex");
writer.AddDataReaderToIndex(dr);
writer.Close();
dr.Close();
dr.Dispose();
conn.Close();
}
查询的时候用的是ajax调用后台方法,多字段搜索,且都先分词了,结果很牵强,如果输入的是名字,那么取不到url和url,如果输入的象标题,则取不到url和名字,还有界面很不友好,下面是代码:
function showUserShellFav(obj,pageNumber)
...{
var selValue = document.getElementById("searchSel").value;
if(obj!="")
...{
var t = document.getElementById("titleTd");
//var c = document.getElementById("contentTd");
t.innerHTML="搜索关于--<font color='red'>"+obj+"</font>--的共享收藏";
//c.innerHTML="";
showLoad();
PwqzcDream.test.SelectUserShellFav(obj,pageNumber,selValue,onSelectUserShellFavCom);
}
}
function onSelectUserShellFavCom(rel)
...{
hideLoad();
var pageIndexTd = document.getElementById("pageIndexTd");
var c = document.getElementById("contentTd");
var dtContent = rel.value.Tables[0];
var dtCount = rel.value.Tables[1];
pageIndexTd.innerHTML = "";
for(var r=0;r<dtCount.Rows.length;r++)
...{
var cu = dtCount.Rows[r];
pageIndexTd.innerHTML += cu.html;
}
if(dtContent.Rows.length>0)
...{
c.innerHTML = "";
if(PwqzcDream.MyDefault.IsLogin().value)
...{
for(var row=0;row<dtContent.Rows.length;row++)
...{
var cur = dtContent.Rows[row];
c.innerHTML+="<li><a type='"+cur.this_url+"' style='color:#0088e4;position:relative;cursor:pointer;' onmousedown='MouseDownToMove(this,event);' onmousemove='MouseMoveToMove(event);' onmouseup='MouseUpToMove(event);' onclick='openLink(this.type);'>"+cur.this_title+"</a>  <font color='red'>"+cur.this_name+"</font>收藏</li>";
}
}
else
...{
for(var row=0;row<dtContent.Rows.length;row++)
...{
var cur = dtContent.Rows[row];
c.innerHTML+="<li><a href='"+cur.this_url+"' style='color:#0088e4;' target='_blank'>"+cur.this_title+"</a>  <font color='red'>"+cur.this_name+"</font>收藏</li>";
}
}
}
else
...{
c.innerHTML = "对不起,没有找到相关的收藏!";
}
}
...{
var selValue = document.getElementById("searchSel").value;
if(obj!="")
...{
var t = document.getElementById("titleTd");
//var c = document.getElementById("contentTd");
t.innerHTML="搜索关于--<font color='red'>"+obj+"</font>--的共享收藏";
//c.innerHTML="";
showLoad();
PwqzcDream.test.SelectUserShellFav(obj,pageNumber,selValue,onSelectUserShellFavCom);
}
}
function onSelectUserShellFavCom(rel)
...{
hideLoad();
var pageIndexTd = document.getElementById("pageIndexTd");
var c = document.getElementById("contentTd");
var dtContent = rel.value.Tables[0];
var dtCount = rel.value.Tables[1];
pageIndexTd.innerHTML = "";
for(var r=0;r<dtCount.Rows.length;r++)
...{
var cu = dtCount.Rows[r];
pageIndexTd.innerHTML += cu.html;
}
if(dtContent.Rows.length>0)
...{
c.innerHTML = "";
if(PwqzcDream.MyDefault.IsLogin().value)
...{
for(var row=0;row<dtContent.Rows.length;row++)
...{
var cur = dtContent.Rows[row];
c.innerHTML+="<li><a type='"+cur.this_url+"' style='color:#0088e4;position:relative;cursor:pointer;' onmousedown='MouseDownToMove(this,event);' onmousemove='MouseMoveToMove(event);' onmouseup='MouseUpToMove(event);' onclick='openLink(this.type);'>"+cur.this_title+"</a>  <font color='red'>"+cur.this_name+"</font>收藏</li>";
}
}
else
...{
for(var row=0;row<dtContent.Rows.length;row++)
...{
var cur = dtContent.Rows[row];
c.innerHTML+="<li><a href='"+cur.this_url+"' style='color:#0088e4;' target='_blank'>"+cur.this_title+"</a>  <font color='red'>"+cur.this_name+"</font>收藏</li>";
}
}
}
else
...{
c.innerHTML = "对不起,没有找到相关的收藏!";
}
}
- 基于Lucene站内搜索(转载)
- 基于s2sh +mysql的 lucene大型网站的站内搜索实现
- 基于lucene.net 和ICTCLAS2014的站内搜索的实现1
- 基于lucene.net 和ICTCLAS2014的站内搜索的实现2
- Lucene站内搜索引擎
- 初试lucene站内搜索
- 给你的网站加上站内搜索---Spring+Hibernate基于Compass(基于Lucene)实现
- Lucene.net 盘古分词 站内搜索
- 利用Lucene打造站内搜索引擎的思路
- 利用Lucene打造站内搜索引擎的思路
- Lucene.NET----站内搜索引擎资料(推荐-arvin)
- 利用Lucene打造站内搜索引擎的思路
- Lucene.Net与盘古分词实现站内搜索
- 利用Lucene打造站内搜索引擎的思路
- 利用Lucene打造站内搜索引擎的思路
- Lucene.net站内搜索2—Lucene.Net简介和分词
- 基于Lucene/XML的站内全文检索解决方案(转载)
- Lucene:基于Java的全文检索引擎简介(转载)
- list 排序
- C#中的API32(转贴)
- 亿万用户网站MySpace的成功秘密
- JavaMail快速入门1
- 【转摘】变频器设计使用注意事项
- 基于Lucene站内搜索(转载)
- [转载]面试时经常问的c变量定义的问题.
- JavaMail API简介
- ERP实施天下无贼版
- 关于WINDOWS SDK WM_PAINT的一些想法
- 软件项目管理中的一些误区
- Select-Optionsのオプション機能のTO項目非表示化
- VC中多线程编程
- S3c2440A平台HIVE注册表+binfs的实现