Lucene4.6实战应用

来源:互联网 发布:历史气温数据下载 编辑:程序博客网 时间:2024/05/19 16:38

       这两天学习了一下Lucene,然后对其进行了应用上的一些封装。主要仿照项目中经常使用的BaseDao的封装方式。对Lucene的一些简单操作进行了封装,在项目中使用起来比较方便。下面介绍一下封装代码。

1.基础类

该类中,主要提供

  • createIndex——创建索引
  • update——更新索引信息
  • delete——删除索引信息
  • searchAll——查询所有
  • pageSearch——支持分页查询

 

import net.sf.json.JSONArray;import net.sf.json.JSONObject;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.FieldType;import org.apache.lucene.document.StoredField;import org.apache.lucene.index.*;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.wltea.analyzer.lucene.IKAnalyzer;import java.beans.PropertyDescriptor;import java.io.File;import java.io.IOException;import java.io.StringReader;import java.lang.reflect.Method;import java.lang.reflect.ParameterizedType;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;/** * 全文检索基础类 * * @author: alex * @time: 14-4-1 下午2:24 * @version: 1.0 */public abstract class BaseLucene<T extends Object> {    protected static Analyzer analyzer = new IKAnalyzer();         //分词器    protected static File indexDir = null;    static {        indexDir = new File(LuceneContants.INDEX_FILE_PATH);       //存放索引文件的目录    }    /**     * 获取文件目录     * @return 文件目录对象     */    private Directory getDirectory() {        Directory directory = null;        try {            directory = FSDirectory.open(indexDir);        } catch (IOException e) {            e.printStackTrace();        }        return directory;    }    /**     * 获取索引创建器     * @param directory 文件目录     * @return 索引创建器对象     */    private IndexWriter getIndexWriter(Directory directory) {        IndexWriter indexWriter = null;        try {            //索引创建器配置            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);            //设置打开索引模式为创建或追加            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);            //索引创建器对象            indexWriter = new IndexWriter(directory, config);        } catch (IOException e) {            e.printStackTrace();        }        return indexWriter;    }    /**     * 关闭文件目录和索引创建器对象     * @param directory  文件目录     * @param indexWriter  索引创建器     */    private void closeDirectoryAndIndexWriter(Directory directory,IndexWriter indexWriter) {        if (indexWriter != null) {            try {                indexWriter.close();            } catch (IOException e) {                e.printStackTrace();            }        }        if (directory != null) {            try {                directory.close();            } catch (IOException e) {                e.printStackTrace();            }        }    }    /**     * 创建索引     * @param entity 泛型实体     * @param keyWordFields 关键字索引字段名集合     */    public void createIndex(T entity,List<String> keyWordFields) {        Directory directory = null;        IndexWriter indexWriter = null;        try {            directory = getDirectory();            indexWriter = getIndexWriter(directory);            //装配成document            Document doc = getDoc(entity,keyWordFields);            indexWriter.addDocument(doc); //写入索引文件        } catch (IOException e) {            e.printStackTrace();        } finally {            closeDirectoryAndIndexWriter(directory,indexWriter);        }    }    /**     * 装配document对象方法     * @param entity  泛型实体     * @param keyWordFields  关键字索引字段名集合     * @return  Document对象     */    private Document getDoc(T entity,List<String> keyWordFields){        FieldType ftIndex = new FieldType(); // 索引类型        ftIndex.setIndexed(true);       //设置索引为true        ftIndex.setStored(true);        //设置保存为true        ftIndex.setTokenized(true);     //设置分词为true        Document doc = new Document();        Map<String,Integer> fieldsMap = getAllFields(entity, keyWordFields);        for (Map.Entry<String,Integer> fieldMap : fieldsMap.entrySet()) {            if(fieldMap.getValue() == LuceneContants.IS_KEY_WORD) {   //索引字段                Field field = new Field(fieldMap.getKey(), getterMethod(entity,fieldMap.getKey()).toString(),ftIndex);                doc.add(field);            } else {      //存储字段                StoredField storedField = new StoredField(fieldMap.getKey(), getterMethod(entity,fieldMap.getKey()).toString());                doc.add(storedField);            }        }        return doc;    }    /**     * 通过反射获取所有实体字段     * @param entity  泛型实体     * @param keyWordFields 关键字索引字段名集合     * @return 实体的所有字段     */    private Map<String,Integer> getAllFields(T entity,List<String> keyWordFields){        Map<String,Integer> fieldsMap = new HashMap<String,Integer>();        java.lang.reflect.Field[] fields = entity.getClass().getDeclaredFields();        for(int i = 0; i < fields.length; i++) {            Integer keyWordFlag = LuceneContants.NO_KEY_WORD;            String fieldName = fields[i].getName();            for(String keyWordField : keyWordFields) {                if(keyWordField.equals(fieldName)) {                    keyWordFlag = LuceneContants.IS_KEY_WORD;                }            }            fieldsMap.put(fieldName,keyWordFlag);        }        return fieldsMap;    }    /**     * 通过反射获取字段值     * @param obj 实体     * @param filedName  字段名     * @return 字段值     */    private Object getterMethod(Object obj, String filedName) {        Object object = null;        try {            Class clazz = obj.getClass();            PropertyDescriptor propertyDescriptor = new PropertyDescriptor(filedName, clazz);            Method getMethod = propertyDescriptor.getReadMethod();//获得get方法            if (propertyDescriptor != null) {                object = getMethod.invoke(obj);     //执行get方法返回一个Object            }        } catch (Exception e) {            e.printStackTrace();        }        return object;    }    /**     * 查询所以索引匹配到的数据     * @param queryWhere 查询条件     * @param defaultQueryField 默认查询的关键字字段     * @param keyWordFields 是关键字且需高亮显示的字段集合     * @return  json格式的字符串     */    public String searchAll(String queryWhere,String defaultQueryField,List<String> keyWordFields) {        if("".equals(queryWhere)){            queryWhere = "(*:*)";            keyWordFields = new ArrayList<String>();        }        String result = null;        IndexReader reader = null;        IndexSearcher indexSearcher = null;        try {            reader = DirectoryReader.open(getDirectory());            indexSearcher = new IndexSearcher(reader);            Query query = new QueryParser(Version.LUCENE_46, defaultQueryField, analyzer).parse(queryWhere);            ScoreDoc[] hits = indexSearcher.search(query, LuceneContants.QUERY_MAX_COUNT).scoreDocs;            result = resultToJson(indexSearcher,query,hits,keyWordFields,0,hits.length);        } catch (Exception e) {            e.printStackTrace();        } finally {            if (reader != null){                try {                    reader.close();                } catch (IOException e) {                    e.printStackTrace();                }            }        }        return result;    }    /**     * 分页查询所以索引匹配到的数据     * @param queryWhere 查询条件     * @param defaultQueryField 默认查询的关键字字段     * @param keyWordFields 是关键字且需高亮显示的字段集合     * @param currPage  当前页     * @param pageSize  每页显示的条数     * @return  json格式的字符串     */    public String pageSearch(String queryWhere,String defaultQueryField,List<String> keyWordFields,int currPage,int pageSize) {        if("".equals(queryWhere)){            queryWhere = "(*:*)";            keyWordFields = new ArrayList<String>();        }        String result = null;        IndexReader reader = null;        IndexSearcher indexSearcher = null;        try {            reader = DirectoryReader.open(getDirectory());            indexSearcher = new IndexSearcher(reader);            Query query = new QueryParser(Version.LUCENE_46, defaultQueryField, analyzer).parse(queryWhere);            ScoreDoc[] hits = indexSearcher.search(query, LuceneContants.QUERY_MAX_COUNT).scoreDocs;            //分页计算            int start = (currPage - 1) * pageSize;            int totalCount = hits.length;            int end = Math.min(currPage * pageSize,totalCount);            result = resultToJson(indexSearcher,query,hits,keyWordFields,start,end);        } catch (Exception e) {            e.printStackTrace();        } finally {            if (reader != null){                try {                    reader.close();                } catch (IOException e) {                    e.printStackTrace();                }            }        }        return result;    }    /**     * 将查询到的数据转成json格式数据返回     * @param indexSearcher  查询索引实体     * @param query   查询实体     * @param hits   匹配的结果对象     * @param keyWordFields   是关键字且需高亮显示的字段集合     * @param start  开始索引     * @param end    结束索引     * @return json格式的字符串     */    private String resultToJson(IndexSearcher indexSearcher, Query query, ScoreDoc[] hits,List<String> keyWordFields,int start,int end){        JSONArray jsonArray = new JSONArray();        try {            Class<T> entityClass = (Class<T>)((ParameterizedType) this.getClass().getGenericSuperclass()).getActualTypeArguments()[0];            T entity = entityClass.newInstance();            Map<String,Integer> fieldsMap = getAllFields(entity,keyWordFields);            for (int i = start; i < end; i++) {                JSONObject jsonObject = new JSONObject();                Document hitDoc = indexSearcher.doc(hits[i].doc);                for(Map.Entry<String, Integer> field : fieldsMap.entrySet()){                    if(field.getValue() == LuceneContants.IS_KEY_WORD) {        //是关键字,且需要高亮显示                        String value = toHighlighter(query, hitDoc, field.getKey());                        jsonObject.put(field.getKey(),value);                    } else {                        jsonObject.put(field.getKey(),hitDoc.get(field.getKey()));                    }                }                jsonArray.add(jsonObject);            }        } catch (Exception e) {            e.printStackTrace();        }        return jsonArray.toString();    }    /**     * 高亮显示设置     * @param query 查询实体     * @param doc   文档对象     * @param field 字段     * @return 高亮设置     */    private String toHighlighter(Query query, Document doc, String field) {        try {            SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"blue\">", "</font>");            Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));            TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field)));            String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));            return highlighterStr == null ? doc.get(field) : highlighterStr;        } catch (IOException e) {            e.printStackTrace();        } catch (InvalidTokenOffsetsException e) {            e.printStackTrace();        }        return null;    }    /**     * 修改索引     * @param entity 泛型实体     * @param keyWordFields 索引字段集合     * @return true成功,false失败     */    public boolean update(T entity,List<String> keyWordFields) {        boolean flag = false;        IndexWriter indexWriter = null;        Directory directory = null;        try{            directory = getDirectory();            indexWriter = getIndexWriter(directory);            Document doc = getDoc(entity,keyWordFields);            //根据Id进行更新索引            Term term = new Term("id", String.valueOf(getterMethod(entity,"id")));            indexWriter.updateDocument(term, doc);            flag = true;        } catch (Exception e){            flag = false;            e.printStackTrace();        } finally {            closeDirectoryAndIndexWriter(directory,indexWriter);        }        return flag;    }    /**     * 删除索引     * @param id 删除的索引Id     * @return true成功,false失败     */    public boolean delete(Integer id) {        if(id == null || id.intValue() < 0) {            throw new RuntimeException("参数不正确!");        }        boolean flag = false;        IndexWriter indexWriter = null;        Directory directory = null;        try{            //索引所放目录            directory = getDirectory();            indexWriter = getIndexWriter(directory);            Term term = new Term("id", String.valueOf(id));            indexWriter.deleteDocuments(term);            flag = true;        }catch (Exception e) {            flag = false;            e.printStackTrace();        } finally {            closeDirectoryAndIndexWriter(directory,indexWriter);        }        return flag;    }}

 常量类:

 

 

/** * 常量描述 * * @author: alex * @time: 14-4-1 下午2:24 * @version: 1.0 */public class LuceneContants {    //索引文件存储目录    public static final String INDEX_FILE_PATH = "D://indexFile";    //查询文件的最大条数    public static final int QUERY_MAX_COUNT = 100000;    //是索引标识    public static final int IS_KEY_WORD = 1;    //不是索引标识    public static final int NO_KEY_WORD = 0;}

 

 

2.service层应用

 

import java.util.ArrayList;import java.util.List;/** * service层 * * @author: alex * @time: 14-4-2 下午1:35 * @version: 1.0 */public class PersonSearchService extends BaseLucene<Person> {    private static List<String> keyWordFields = null;    static {        keyWordFields = new ArrayList<String>();        //给名字和介绍做全文检索        keyWordFields.add("id");        keyWordFields.add("name");        keyWordFields.add("introduce");    }    /**     * 保存用户信息     * @param person  用户     */    public void savePerson(Person person) {        this.createIndex(person,keyWordFields);    }    /**     * 更新用户信息     * @param person 用户实体     * @return true成功,false失败     */    public boolean updatePerson(Person person) {        return this.update(person,keyWordFields);    }    /**     * 删除用户信息     * @param id  用户ID     * @return  true成功,false失败     */    public boolean deletePerson(Integer id) {        return this.delete(id);    }    /**     * 根据条件查询所有     * @param queryWhere 查询条件     * @param defaultQueryField  默认检索字段     * @return  json格式数据     */    public String queryAll(String queryWhere,String defaultQueryField) {        return this.searchAll(queryWhere,defaultQueryField,keyWordFields);    }    /**     * 根据条件分页查询     * @param queryWhere 查询条件     * @param defaultQueryField  默认检索字段     * @param currPage 当前页     * @param pageSize 每页条数     * @return  json格式数据     */    public String pageQuery(String queryWhere,String defaultQueryField,int currPage,int pageSize) {        return this.pageSearch(queryWhere,defaultQueryField,keyWordFields,currPage,pageSize);    }}

 实体类:

 

 

/** * 实体 * * @author: alex * @time: 14-4-2 下午1:32 * @version: 1.0 */public class Person {    private int id;    private String name;    private int age;    private String introduce;    public int getId() {        return id;    }    public void setId(int id) {        this.id = id;    }    public String getName() {        return name;    }    public void setName(String name) {        this.name = name;    }    public int getAge() {        return age;    }    public void setAge(int age) {        this.age = age;    }    public String getIntroduce() {        return introduce;    }    public void setIntroduce(String introduce) {        this.introduce = introduce;    }}

 

 

3.测试类

 

import junit.framework.Assert;import org.junit.AfterClass;import org.junit.BeforeClass;import org.junit.Test;/** * 测试类 * * @author: alex * @time: 14-4-1 上午10:54 * @version: 1.0 */public class PersonSearchServiceTest {    static PersonSearchService personSearchService = null;    @BeforeClass    public static void setUpBeforeClass() throws Exception {        personSearchService = new PersonSearchService();    }    @AfterClass    public static void tearDownAfterClass() throws Exception {    }    @Test    public void testSavePerson() {        Person person = new Person();        person.setId(1);        person.setName("张三");        person.setAge(21);        person.setIntroduce("张三是中国好演员!");        personSearchService.savePerson(person);        person.setId(2);        person.setName("张一三");        person.setAge(22);        person.setIntroduce("张三是中国好替身!");        personSearchService.savePerson(person);        person.setId(3);        person.setName("张三疯");        person.setAge(23);        person.setIntroduce("张三是中国好程序员!");        personSearchService.savePerson(person);        person.setId(4);        person.setName("啊张三");        person.setAge(24);        person.setIntroduce("张三是中国好声音!");        personSearchService.savePerson(person);        person.setId(5);        person.setName("李三");        person.setAge(25);        person.setIntroduce("啊啊啊啊啊啊啊啊啊啊啊啊啊!");        personSearchService.savePerson(person);    }    @Test    public void testUpdatePerson() {        Person person = new Person();        person.setId(4);        person.setName("张三啊");        person.setAge(24);        person.setIntroduce("把啊张三改成了张三啊!");        boolean result = personSearchService.updatePerson(person);        Assert.assertTrue(result);    }    @Test    public void testDeletePerson() {        //删除了第二条        boolean result = personSearchService.deletePerson(2);        Assert.assertTrue(result);    }    @Test    public void testQuery() {        String queryWhere = "name:张三 introduce:中国";        String defaultQueryField = "name";        String result = personSearchService.queryAll(queryWhere, defaultQueryField);        System.out.println(result);    }    @Test    public void testPageQuery() {        String queryWhere = "name:张三 introduce:中国";        String defaultQueryField = "name";        String result = personSearchService.pageQuery(queryWhere, defaultQueryField,1,2);        System.out.println(result);    }}

 

 

4.说明

本示例没有对查询出来的结果进行排序,同时还有一些其他的地方还需完善。代码发出来,给大家参考一下。

示例中应用的jar如下:


开发时,使用的maven,发一下maven的pom文件:

 

<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0"         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">    <modelVersion>4.0.0</modelVersion>    <groupId>lucene</groupId>    <artifactId>lucene</artifactId>    <version>1.0</version>    <dependencies>        <dependency>            <groupId>org.apache.lucene</groupId>            <artifactId>lucene-core</artifactId>            <version>4.6.0</version>        </dependency>        <dependency>            <groupId>org.apache.lucene</groupId>            <artifactId>lucene-highlighter</artifactId>            <version>4.6.0</version>        </dependency>        <dependency>            <groupId>org.apache.lucene</groupId>            <artifactId>lucene-queries</artifactId>            <version>4.6.0</version>        </dependency>        <dependency>            <groupId>org.apache.lucene</groupId>            <artifactId>lucene-queryparser</artifactId>            <version>4.6.0</version>        </dependency>        <dependency>            <groupId>org.apache.lucene</groupId>            <artifactId>lucene-test-framework</artifactId>            <version>4.6.0</version>        </dependency>        <dependency>            <groupId>org.apache.lucene</groupId>            <artifactId>lucene-smartcn</artifactId>            <version>3.6.2</version>        </dependency>        <dependency>            <groupId>IKAnalyzer</groupId>            <artifactId>IKAnalyzer</artifactId>            <version>2012FF_u1</version>        </dependency>        <dependency>            <groupId>net.sf.json-lib</groupId>            <artifactId>json-lib</artifactId>            <version>2.4</version>        </dependency>        <dependency>            <groupId>commons-lang</groupId>            <artifactId>commons-lang</artifactId>            <version>2.5</version>        </dependency>        <dependency>            <groupId>net.sf.ezmorph</groupId>            <artifactId>ezmorph</artifactId>            <version>1.0.6</version>        </dependency>        <dependency>            <groupId>commons-logging</groupId>            <artifactId>commons-logging</artifactId>            <version>1.1</version>        </dependency>        <dependency>            <groupId>commons-beanutils</groupId>            <artifactId>commons-beanutils</artifactId>            <version>1.8.3</version>        </dependency>        <dependency>            <groupId>commons-collections</groupId>            <artifactId>commons-collections</artifactId>            <version>3.2.1</version>        </dependency>    </dependencies></project>

 至于运行结果没有贴出来,有兴趣的同学可以自己运行一下。

 

 

原创粉丝点击