Lucene 建立索引数据库 实现搜索网页

来源:互联网 发布:指尖网络 编辑:程序博客网 时间:2024/05/16 08:11

lucene实现搜索引擎的建立索引数据库,处理搜索结果.

一,创建索引

 public boolean createIndex() throws IOException {
  //.......
  Directory fsDirectory = FSDirectory.getDirectory(Constant.INDEX_DIR, true);
  Analyzer analyzer =  new IK_CAnalyzer();//分析器
  IndexWriter indexWriter = new IndexWriter(fsDirectory, analyzer, true);
  for (int i = 0; i < htmls.length; i++) {
   String htmlPath = htmls[i].getAbsolutePath();
   if (htmlPath.endsWith(".html") || htmlPath.endsWith(".htm")) {
   //添加 Document
   addDocument(htmlPath, indexWriter);
   }
  }
  indexWriter.optimize();
  indexWriter.close();
 }
二 ,添加 Document
 public void addDocument(String htmlPath, IndexWriter indexWriter) {
  HTMLDocParser htmlParser = new HTMLDocParser(htmlPath);
  String path = htmlParser.getPath();
  String title = htmlParser.getTitle();
  Reader content = htmlParser.getContent();
  LineNumberReader reader = new LineNumberReader(content);
  String snap = "";
  try {
   snap = reader.readLine();
   String tp = reader.readLine();
   if (snap.length() < 30 & tp != null) {
    snap = snap + tp;
   }
  } catch (IOException e1) {
   e1.printStackTrace();
  }
  Document document = new Document();
  document.add(new Field("path", path, Field.Store.YES, Field.Index.NO));//文件路径
  document.add(new Field("title", title, Field.Store.YES,Field.Index.TOKENIZED));//文件标题
  document.add(new Field("snap", snap, Field.Store.YES, Field.Index.NO));//快照
  document.add(new Field("content", content));//内容
  try {
   indexWriter.addDocument(document);
  } catch (IOException e) {
   e.printStackTrace();
  }
 }
三 ,显示结果
public List search() {
  List searchResult = new ArrayList();
  if (false == indexManager.ifIndexExist()) {
   try {
    if (false == indexManager.createIndex()) {
     return searchResult;
    }
   } catch (IOException e) {
    e.printStackTrace();
    return searchResult;
   }
  }

  IndexSearcher indexSearcher = null;

  try {
   indexSearcher = new IndexSearcher(indexManager.getIndexDir());
  } catch (IOException ioe) {
   ioe.printStackTrace();
  }

  QueryParser queryParser = new QueryParser("content", analyzer);
  Query query = null;
  try {
   query = queryParser.parse(searchWord);
  } catch (ParseException e) {
   e.printStackTrace();
  }
  if (null != query && null != indexSearcher) {
   try {
    Hits hits = indexSearcher.search(query);
    for (int i = 0; i < hits.length(); i++) {
     SearchResultBean resultBean = new SearchResultBean();
     resultBean.setHtmlPath(hits.doc(i).get("path"));
     resultBean.setHtmlTitle(hits.doc(i).get("title"));
     resultBean.setHtmlSnap(hits.doc(i).get("snap"));
     searchResult.add(resultBean);
    }
   } catch (IOException e) {
    e.printStackTrace();
   }
  }
  return searchResult;
 }

原创粉丝点击