Lucene全文检索基础

来源:互联网 发布:长宏数据 编辑:程序博客网 时间:2024/05/17 06:36

原文博客:http://blog.csdn.net/tianlincao/article/details/6867127

下面贴出我练习的代码以及一些修改和注释说明:
测试用的版本是lucene2.3.0,lucene1.4以及lucene3.x及以上的版本api会有不同,需要修改下面的一些方法。
TextFileIndexer:

public class TextFileIndexer {    public static void main(String[] args) throws Exception{        File fileDir = new File("F:\\Analyzer\\Resources");        File indexDir = new File("F:\\Analyzer\\Index");        Analyzer luceneAnalyzer = new StandardAnalyzer();        // true表示如果原来已经有索引文件在索引目录下,覆盖        IndexWriter indexWriter = new IndexWriter(indexDir, luceneAnalyzer, true);        File[] textFiles = fileDir.listFiles();        long startTime = new Date().getTime();        for(int i = 0; i < textFiles.length; i++){            if(textFiles[i].isFile() && textFiles[i].getName().endsWith(".txt")){                System.out.println("File " + textFiles[i].getCanonicalPath() + "正在被索引");                String temp = FileReaderAll(textFiles[i].getCanonicalPath(), "UTF-8");                System.out.println(temp);                Document document = new Document();                // 建立两条索引                Field FieldPath = new Field("path", textFiles[i].getPath(),                        Field.Store.YES, Field.Index.NO);                Field FieldBody = new Field("body", temp, Field.Store.YES,                        Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);                document.add(FieldPath);                document.add(FieldBody);                // 入库                indexWriter.addDocument(document);            }        }        // 整合优化索引        indexWriter.optimize();        indexWriter.close();    }    public static String FileReaderAll(String fileName, String charset) throws IOException{        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charset));        String line = new String();        String temp = new String();        while((line = reader.readLine()) != null){            temp += line;        }        reader.close();        return temp;    }}

TestQuery:

public class TestQuery {    public static void main(String[] args) throws Exception{        Hits hits = null;            String queryString = "is";        Query query = null;        IndexSearcher searcher = new IndexSearcher("F:\\Analyzer\\Index");        Analyzer analyzer = new StandardAnalyzer();        try{            QueryParser qp = new QueryParser("body", analyzer);            query = qp.parse(queryString);        }catch (Exception e){            e.printStackTrace();        }        if(searcher != null){            hits = searcher.search(query);            if(hits.length() > 0){                System.out.println("找到:" + hits.length() + " 个结果!");            }        }    }}

StandardAnalyzerTest:

public class StandardAnalyzerTest {    public StandardAnalyzerTest(){    }    public static void main(String[] args) {        Analyzer aAnalyzer =  new StandardAnalyzer();        StringReader sr = new StringReader("lighter javaeye com is the are on");        TokenStream ts = aAnalyzer.tokenStream("name", sr);        try{            int i = 0;            Token t = ts.next();            while(t != null){                i++;                System.out.println("第" + i + "行,结果:" + t.termText());                t = ts.next();            }        }catch (Exception e){            e.printStackTrace();        }    }}

IndexSort:

public class IndexSort {    public static void main(String[] args) throws Exception{        IndexWriter writer = new IndexWriter("F:\\Analyzer\\indexStroe",                new StandardAnalyzer(), true);        Document doc =   new  Document();        doc.add(new  Field( "sort" , "1",Field.Store.YES,Field.Index.TOKENIZED));        writer.addDocument(doc);        doc =   new  Document();        doc.add(new  Field( "sort" , "4",Field.Store.YES,Field.Index.TOKENIZED));        writer.addDocument(doc);        doc =   new  Document();        doc.add(new  Field( "sort" , "3",Field.Store.YES,Field.Index.TOKENIZED));        writer.addDocument(doc);        doc =   new  Document();        doc.add(new  Field( "sort" , "5",Field.Store.YES,Field.Index.TOKENIZED));        writer.addDocument(doc);        doc =   new  Document();        doc.add(new  Field( "sort" , "9",Field.Store.YES,Field.Index.TOKENIZED));        writer.addDocument(doc);        doc =   new  Document();        doc.add(new  Field( "sort" , "6" ,Field.Store.YES,Field.Index.TOKENIZED));        writer.addDocument(doc);        doc =   new  Document();        doc.add(new  Field( "sort" , "7",Field.Store.YES,Field.Index.TOKENIZED));        writer.addDocument(doc);        writer.close();    }}

MyScoreDocComparator:

public class MyScoreDocComparator implements ScoreDocComparator{    private Integer[] sort;    public MyScoreDocComparator(IndexReader reader, String fieldName) throws IOException{        sort =  new Integer[reader.maxDoc()];        for(int i = 0; i < sort.length; i++){            Document doc = reader.document(i);            sort[i] = new Integer(doc.get(fieldName));        }    }    @Override    public int compare(ScoreDoc i, ScoreDoc j) {        if(sort[i.doc] > sort[j.doc]){            return 1;        }else if(sort[i.doc] < sort[j.doc]){            return -1;        }        return 0;    }    @Override    public Comparable sortValue(ScoreDoc scoreDoc) {        return new Integer(sort[scoreDoc.doc]);    }    @Override    public int sortType() {        return SortField.INT;    }}

MySortComparatorSource:

public class MySortComparatorSource implements SortComparatorSource {    @Override    public ScoreDocComparator newComparator(IndexReader indexReader, String fieldName)            throws IOException {        if(fieldName.equals("sort")){            return new MyScoreDocComparator(indexReader, fieldName);        }        return null;    }}

SearchSort:

public class SearchSort {    public static void searchSort1() throws Exception{        IndexSearcher searcher = new IndexSearcher("F:\\Analyzer\\indexStroe");        QueryParser qp = new QueryParser("sort", new StandardAnalyzer());        Query query = qp.parse("4");        Hits hits = searcher.search(query);        System.out.println("有" + hits.length() + "个结果");        for(int i = 0; i < hits.length(); i++){            System.out.println(hits.doc(i).get("sort"));        }    }    public static void searchSort2() throws Exception{        IndexSearcher searcher = new IndexSearcher("F:\\Analyzer\\indexStroe");        // true表示可以有不连续的数值       Query query = new RangeQuery(new Term("sort", "1"), new Term("sort", "9"), true);        // 其他一些SortField的构造//        public SortField (String field, boolean reverse)//根据某个域(field)的名称构造SortField, reverse为false为升序//        public SortField (String field, int type)//        public SortField (String field, int type, boolean reverse)//        public SortField (String field, Locale locale)//        public SortField (String field, Locale locale, boolean reverse)//        public SortField (String field, SortComparatorSource comparator)//        public SortField (String field, SortComparatorSource comparator, booleanreverse)        Hits hits = searcher.search(query, new Sort(new SortField("sort", new MySortComparatorSource())));        System.out.println("有" + hits.length() + "个结果");        for(int i = 0; i < hits.length(); i++){            System.out.println(hits.doc(i).get("sort"));        }    }    public static void main(String[] args) throws Exception{        searchSort1();        searchSort2();    }}

结果:

有1个结果4有7个结果1345679
原创粉丝点击