lucene简单使用

来源:互联网 发布:js中两个数组合并 编辑:程序博客网 时间:2024/04/29 19:21

/* 生成索引文件*/
package com.alipay.dtcrawler.test;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

import java.io.*;
import java.util.Date;

public class Test {
public static void main(String args[]) throws IOException {
File fileDir = new File(“D:\test_lucene\lucene”);
File indexDir = new File(“D:\test_lucene\index”);

    Analyzer luceneAnalyzer = new StandardAnalyzer();    IndexWriter indexWriter = new IndexWriter(indexDir,luceneAnalyzer,true);    File[] textFile = fileDir.listFiles();    long startTime = new Date().getTime();    for (int i =0;i<textFile.length;i++){        if (textFile[i].isFile() &&textFile[i].getName().endsWith(".txt")){            System.out.println("File"+textFile[i].getCanonicalPath()+"正在被索引");            String tmp = fileReaderAll(textFile[i].getCanonicalPath(), "utf-8");            System.out.println(tmp);            Document document = new Document();            Field fieldPath = new Field("path",textFile[i].getPath(),Field.Store.YES,Field.Index.NO);            Field fieldBody = new Field("body",tmp,Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.WITH_OFFSETS);            document.add(fieldPath);            document.add(fieldBody);            indexWriter.addDocument(document);        }    }    indexWriter.optimize();    indexWriter.close();    long endTime = new Date().getTime();    System.out.println("这花费了"+(endTime - startTime)+"毫秒来把文档增加到索引里面去!"+fileDir.getPath());}public static String fileReaderAll(String fileName, String charset) throws IOException {    BufferedReader reader = new BufferedReader( new InputStreamReader(            new  FileInputStream(fileName), charset));    String line = new String();    String temp = new String();    while ((line =reader.readLine())!= null){        temp += line;    }    reader.close();    return temp;}

}

/* 根据索引查询数据 */
package com.alipay.dtcrawler.test;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

import java.io.IOException;

public class TestLucene {
public static void main(String args[]) throws IOException {
Hits hits = null;
String tmp =”姚”;
Query query =null;

    IndexSearcher searcher = new IndexSearcher("D:\\test_lucene\\index");    Analyzer analyzer = new StandardAnalyzer();    try {        QueryParser qp = new QueryParser("body",analyzer);        query = qp.parse(tmp);    }catch (Exception e){        e.printStackTrace();    }    if (searcher !=null){        hits=searcher.search(query);        if (hits.length()>0){            System.out.println("共搜索到:"+hits.length()+"条数据;");            for (int i=0;i<hits.length();i++){                System.out.println("搜索到:"+hits.doc(i));            }        }    }}

}

0 0
原创粉丝点击