lucene实战

来源：互联网发布：淘宝加盟被骗怎么报案编辑：程序博客网时间：2024/06/06 17:58

package timetask;import java.io.File;import java.io.PrintStream;import java.util.Date;import java.util.TimerTask;import javabean.Article;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import table.ArticleTable;import util.Constant;import util.PubFun;public class IndexTask extends TimerTask{  private static boolean isRunning = true;  public void run() {    if (!(isRunning)) return;    try {      indexF();    } catch (Exception e) {      e.printStackTrace();    }  }  private void indexF() throws Exception  {    File indexDir = new File(Constant.SEARCH_INDEX_PATH);    if (!(indexDir.exists())) indexDir.mkdir();    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexDir), conf);    Article[] articles = ArticleTable.loadArticlesForSearchIndex();    System.out.println("articles 共有 " + articles.length + " 条");    long startTime = new Date().getTime();    for (int i = 0; i < articles.length; ++i) {      indexWriter.addDocument(PubFun.getLuceneDoc(articles[i]));    }    indexWriter.forceMerge(1);    indexWriter.close();    long endTime = new Date().getTime();    System.out.println("这花费了" +       (endTime - startTime) +       " 毫秒来把文档增加到索引里面去!");  }}

public static Article[] loadArticlesForSearchIndex() throws Exception {    ArrayList articleList = new ArrayList();    String sql = "select a.id,title,creatime,note,b.html_path,a.content,a.author from cms_article a,cms_column b where a.col_id=b.id";    Object[] args = new Object[2];    args[0] = sql;    args[1] = "ArticleTable===============loadArticlesForSearchIndex has error";    List lists = select(args);    Article article = null;    for (Iterator localIterator = lists.iterator(); localIterator.hasNext(); ) { Object list = localIterator.next();      Object[] list2 = (Object[])list;      article = new Article();      article.setId(((Long)list2[0]).longValue());      article.setTitle((String)list2[1]);      article.setCreatime((Date)list2[2]);      String note = (String)list2[3];      if (note == null) note = "";      article.setNote(note);      article.setHtmlPath((String)list2[4]);      article.setContent((String)list2[5]);      article.setAuthor((String)list2[6]);      articleList.add(article);    }    return ((Article[])articleList.toArray(new Article[articleList.size()]));  }

 public static Document getLuceneDoc(Article article) {    Document document = new Document();    Field fieldId = new Field("id", String.valueOf(article.getId()), Field.Store.YES,       Field.Index.NOT_ANALYZED);    Field fieldTitle = new Field("title", article.getTitle(), Field.Store.YES,       Field.Index.ANALYZED,       Field.TermVector.WITH_POSITIONS_OFFSETS);    Field fieldNote = new Field("note", article.getNote(), Field.Store.YES,       Field.Index.ANALYZED,       Field.TermVector.WITH_POSITIONS_OFFSETS);    Field fieldContent = new Field("content", splitAndFilterString(article.getContent()), Field.Store.YES,       Field.Index.ANALYZED,       Field.TermVector.WITH_POSITIONS_OFFSETS);    Calendar c = Calendar.getInstance();    c.setTime(article.getCreatime());    Field fieldYear = new Field("year", String.valueOf(c.get(1)),       Field.Store.YES, Field.Index.NO);    Field fieldMonth = new Field("month", String.valueOf(c.get(2) + 1),       Field.Store.YES, Field.Index.NO);    Field fieldDay = new Field("day", String.valueOf(c.get(5)),       Field.Store.YES, Field.Index.NO);    Field fieldUrl = new Field("url", InitServlet.WEB_SITE_URL + article.getHtmlPath() +       "/" + getDateTime("yyyy-MM-dd", article.getCreatime()) +       "/" + article.getId() + ".html", Field.Store.YES,       Field.Index.NO);    Field fieldAuthor = new Field("author", (article.getAuthor() == null) ? "" : article.getAuthor(),       Field.Store.YES, Field.Index.NO);    document.add(fieldId);    document.add(fieldTitle);    document.add(fieldNote);    document.add(fieldAuthor);    document.add(fieldContent);    document.add(fieldUrl);    document.add(fieldYear);    document.add(fieldMonth);    document.add(fieldDay);    return document;  }

package timetask;import java.util.Timer;import javax.servlet.ServletContextEvent;import javax.servlet.ServletContextListener;public class CMSListener  implements ServletContextListener{  public static final long DELAY = 2000L;  private Timer timer;  public void contextDestroyed(ServletContextEvent arg0)  {    this.timer.cancel();  }  public void contextInitialized(ServletContextEvent arg0)  {    this.timer = new Timer("更新", true);    this.timer.schedule(new IndexTask(), 2000L, 86400000L);  }}

lucene 添加搜索功能

package page.plugin;import java.io.File;import java.io.StringReader;import java.util.ArrayList;import javabean.SearchResult;import javax.servlet.http.HttpServletRequest;import javax.servlet.http.HttpServletResponse;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryParser.MultiFieldQueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import page.inc.HtmlPage;import util.Constant;import util.InitServlet;public class SearchPage extends HtmlPage{  public String print(HttpServletRequest req, HttpServletResponse resp)    throws Exception  {    File indexDir = new File(Constant.SEARCH_INDEX_PATH);    String querrys = getStringParameter("search_txt", "", req);    int pageNo = getIntParameter("page_no", 1, req);    if (!(querrys.equals("")))    {      IndexReader reader = IndexReader.open(FSDirectory.open(indexDir));      IndexSearcher searcher = new IndexSearcher(reader);      if (searcher != null) {        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);        String[] fields = { "title", "note", "content" };        String[] key = { querrys, querrys, querrys };        Query query = MultiFieldQueryParser.parse(Version.LUCENE_36, key, fields, analyzer);        TopDocs topDocs = searcher.search(query, 153866);        ScoreDoc[] hits = topDocs.scoreDocs;        if (hits.length > 0) {          int sumPage = getTotalPage(hits.length, InitServlet.MESSAGE_PAGE_SIZE);          SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b style=\"color:rgb(204,102,0);\">", "</b>");          Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));          ArrayList results = new ArrayList();          SearchResult s = null;          String hightContent = null;          Document doc = null;          for (int i = (pageNo - 1) * InitServlet.MESSAGE_PAGE_SIZE; (i < pageNo * InitServlet.MESSAGE_PAGE_SIZE) && (i < hits.length); ++i) {            s = new SearchResult();            doc = searcher.doc(hits[i].doc);            String content = doc.get("content");            if ((content == null) || (content.equals(""))) {              s.setContent("");              s.setCnt(0);            } else {              s.setCnt(content.length());              tokenStream = analyzer.tokenStream("content", new StringReader(content));              hightContent = highlighter.getBestFragment(tokenStream, content);              if (hightContent == null)                s.setContent(content);              else {                s.setContent(hightContent);              }            }            s.setNote(doc.get("note"));            TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(doc.get("title")));            hightContent = highlighter.getBestFragment(tokenStream, doc.get("title"));            if (hightContent == null)              s.setTitle(doc.get("title"));            else {              s.setTitle(hightContent);            }            s.setUrl(doc.get("url"));            s.setYear(doc.get("year"));            s.setMonth(doc.get("month"));            s.setDay(doc.get("day"));            s.setAuthor(doc.get("author"));            results.add(s);          }          req.setAttribute("results", (SearchResult[])results.toArray(new SearchResult[results.size()]));          req.setAttribute("cnt", Integer.valueOf(hits.length));          req.setAttribute("sumPage", Integer.valueOf(sumPage));          req.setAttribute("pageSize", Integer.valueOf(InitServlet.MESSAGE_PAGE_SIZE));        }      }    }    req.setAttribute("querrys", querrys);    req.setAttribute("pageNo", Integer.valueOf(pageNo));    return "/jsp/plugin/search/search.jsp";  }}

package javabean;public class SearchResult{  private String id;  private String title;  private String note;  private String content;  private String url;  private String year;  private String month;  private String day;  private String author;  private int cnt;  public String getId()  {    return this.id; }  public void setId(String id) {    this.id = id; }  public String getTitle() {    return this.title; }  public void setTitle(String title) {    this.title = title; }  public String getNote() {    return this.note; }  public void setNote(String note) {    this.note = note; }  public String getContent() {    return this.content; }  public void setContent(String content) {    this.content = content; }  public String getUrl() {    return this.url; }  public void setUrl(String url) {    this.url = url; }  public String getYear() {    return this.year; }  public void setYear(String year) {    this.year = year; }  public String getMonth() {    return this.month; }  public void setMonth(String month) {    this.month = month; }  public String getDay() {    return this.day; }  public void setDay(String day) {    this.day = day; }  public int getCnt() {    return this.cnt; }  public void setCnt(int cnt) {    this.cnt = cnt; }  public String getAuthor() {    return this.author; }  public void setAuthor(String author) {    this.author = author;  }}

protected void indexF(Article article) throws Exception {    File indexDir = new File(Constant.SEARCH_INDEX_PATH);    if (!(indexDir.exists())) indexDir.mkdir();    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));    conf.setOpenMode(IndexWriterConfig.OpenMode.APPEND);    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexDir), conf);    indexWriter.updateDocument(new Term("id", String.valueOf(article.getId())),       PubFun.getLuceneDoc(article));    indexWriter.forceMerge(1);    indexWriter.close();  }