Lucene之——搜索实例

来源:互联网 发布:科蓝软件java面试题 编辑:程序博客网 时间:2024/06/05 19:16

转载请注明出处:http://blog.csdn.net/l1028386804/article/details/49287663

一个搜索功能:要求将所有包括搜索字段的文章的标题列出来(文章的内容存储在Oracle的CLOB字段中),也就是要用Lucene实现对数据库的大字段进行索引和搜索。创建lucene通过java定时任务来完成。

定时调用建立索引方法

package com.qqw.index;import java.util.Timer;public class IndexerServer { /**      * 定时调用建立索引任务      * @author liuyazhuang     * @create 2015-10-20     */      public static void main(String[] args) {          String propFile = "directory.properties";          Config.setConfigFileName(propFile);          Timer   timer = new Timer();          LuceneDBIndexerTask luceneTask=LuceneDBIndexerTask.getInstance();           timer.scheduleAtFixedRate(luceneTask, 0,DataTypeUtil.toLong(Constant.CREATE_INDEX_SLEEP_TIME));      }  }
建立索引的核心实现
package com.qqw.index;import java.io.BufferedReader;  import java.io.File;  import java.io.IOException;  import java.io.StringWriter;  import java.sql.Connection;  import java.sql.DriverManager;  import java.sql.ResultSet;  import java.sql.SQLException;  import java.sql.Statement;  import java.text.SimpleDateFormat;  import java.util.Arrays;  import java.util.Date;  import java.util.TimerTask;    import oracle.sql.CLOB;    import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;  import org.apache.lucene.document.Field;  import org.apache.lucene.index.IndexWriter;  import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.wltea.analyzer.lucene.IKAnalyzer;    /**   * 建立索引的任务类   * @author liuyazhuang  * @create 2015-10-20 */  public class LuceneDBIndexerTask extends TimerTask {      //缺省索引目录    private static String DEFAULT_INDEX_DIR="C:\\IndexDB";       //临时索引目录的父目录      private File parentDir=null;       //被搜索的索引文件      private static LuceneDBIndexerTask index=new LuceneDBIndexerTask();            //构造方法      private LuceneDBIndexerTask(){          String dirStr=Constant.INDEX_STORE_DIRECTORY;          if(dirStr!=null&&!"".equals(dirStr)){              this.parentDir=new File(dirStr);          }else{              this.parentDir=new File(DEFAULT_INDEX_DIR);          }          if(!this.parentDir.exists()){              this.parentDir.mkdir();          }      }      /**      * 单实例访问接口      * @return      */      public static LuceneDBIndexerTask getInstance(){          return index;      }       /**      * 锁定目录以及文件      * 只允许单线程访问      *      */      /*public synchronized  void singleRunning(){         if(flag==false){             flag=true;             run(parentDir);          }     }*/         /**      * 为数据库字段建立索引      */      public void run()  {          System.out.println("====LuceneDBIndexerTask$run()===============");          System.out.println("~~~开始建立索引文件~~~~~~~~~~~~~~~");          Connection conn=null;          Statement stmt=null;          ResultSet rs=null;        String filedir="d:\\fileIndex\\blogs";File indexDir = new File(filedir);        Analyzer analyzer = new IKAnalyzer();IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_33,analyzer);conf.setOpenMode(OpenMode.CREATE);        try {               Class.forName(Constant.DB_DRIVER_STRING).newInstance();               conn = DriverManager.getConnection(Constant.DB_URI_STRING, Constant.DB_USERNAME, Constant.DB_PWD);               stmt = conn.createStatement();               rs = stmt.executeQuery(Constant.DB_QUERY_STRING);               File file=new File(parentDir+File.separator+new SimpleDateFormat("yyyyMMddHHmmss").format(new Date())+File.separator);               if(!file.exists()){                   file.mkdir();               }               IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir), conf);            long startTime = new Date().getTime();              while (rs.next()) {                  Document doc = new Document();                   doc.add(new Field("ARTICLEID", rs.getString("ARTICLEID"), Field.Store.YES,Field.Index.ANALYZED));                   doc.add(new Field("TITLE", rs.getString("TITLE"), Field.Store.YES,Field.Index.ANALYZED));                   doc.add(new Field("USERNAME", rs.getString("USERNAME"), Field.Store.YES,Field.Index.ANALYZED));                   doc.add(new Field("USERID", rs.getString("USERID"), Field.Store.YES,Field.Index.ANALYZED));                   //对日期建立索引                   String createdate=new SimpleDateFormat("yyyy-MM-dd").format(rs.getTimestamp("CREATEDATE"));                   doc.add(new Field("CREATEDATE", createdate, Field.Store.YES,Field.Index.ANALYZED));                   //对大字段建立索引                   BufferedReader in=null;                   String content="";                   CLOB clob =  (CLOB) rs.getClob("CONTENT");                   if (clob != null) {                      //得到一个读入流                    in=new BufferedReader(clob.getCharacterStream());                    StringWriter out=new StringWriter();                    int c;                    while((c=in.read())!=-1){                        out.write(c);                     }                    content=out.toString();                  }                  doc.add(new Field("CONTENT", content, Field.Store.YES, Field.Index.ANALYZED));                  writer.addDocument(doc);              }              writer.optimize();              writer.close();                            //测试一下索引的时间                 long endTime = new Date().getTime();              System.out.println("索引文件"+file.getPath()+"建立成功...");              System.out.println("这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!");              //判断文件目录file下的文件个数如果大于3,就将文件建立最早的文件给删除掉              checkFiles(parentDir);          } catch (IOException e) {              e.printStackTrace();          } catch (SQLException e) {              e.printStackTrace();          } catch (ClassNotFoundException e) {              e.printStackTrace();          } catch (InstantiationException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IllegalAccessException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{           try {               if(rs!=null){                 rs.close();                  }               if(stmt!=null){                   stmt.close();                   }                if(conn!=null){                   conn.close();                   }               } catch (SQLException e) {                  e.printStackTrace();              }          }      }             /**      * 判断文件目录file下的文件个数如果大于3,就将文件建立最早的文件给删除掉      */      public  void checkFiles(File dir) {          int length=dir.listFiles().length;          while(length>3){              //删除生成最早的文件           File [] files=dir.listFiles();           String[] names=dir.list();           Arrays.sort(names);           File deletefile=files[0];           deleteDirectory(deletefile);           length--;         }      }      /*      * 递归删除一个目录以及下面的文件      */      public boolean deleteDirectory(File path) {             if( path.exists() ) {               File[] files = path.listFiles();               for(int i=0; i<files.length; i++) {                  if(files[i].isDirectory()) {                    deleteDirectory(files[i]);                 }                  else {                      //删除文件                 files[i].delete();                  }               }             }             //删除目录          boolean hasdelete=path.delete();          if(hasdelete){              System.out.println("删除索引目录"+path);          }          return hasdelete;        }         public static void main(String[] args) {        new LuceneDBIndexerTask().run();      }    }  
配置文件管理类:
package com.qqw.index;import java.io.IOException;  import java.io.InputStream;  import java.util.Properties;  /**  *   * @author liuyazhuang * @create 2015-10-20 *  */  public class Config {      private static Config cfg = null;      private static String configFileName = null;      private Properties props;      public Config() {          props = new java.util.Properties();      }      /**      * 单例访问接口      * @return      */      public synchronized static Config getInstance() {          if (cfg == null) {              cfg = new Config();              cfg.loadConfig();              return cfg;          } else {              return cfg;          }      }      private int loadConfig() {          if (configFileName != null || configFileName.length() > 0) {              InputStream inputStream = Config.class.getClassLoader()                      .getResourceAsStream("directory.properties");              System.out.println("configFileName=" + configFileName);              try {                  props.load(inputStream);              } catch (IOException e) {                  e.printStackTrace();              }              return 1;          }          return 0;      }        public static void setConfigFileName(String cfg) {          configFileName = cfg;      }      public String getProperty(String keyName) {          return props.getProperty(keyName);      }    }  
常量配置

package com.qqw.index;/**  * 常量配置类 *  * @author liuyazhuang  * @create 2015-10-20  */  public class Constant {        // 隔多长时间建立一次索引      public static final String CREATE_INDEX_SLEEP_TIME = Config.getInstance()              .getProperty("create_index_sleep_time");      // 索引文件存放路径      public static final String INDEX_STORE_DIRECTORY = Config.getInstance()              .getProperty("index_store_directory");      //数据库驱动程序      public static final String DB_DRIVER_STRING = Config.getInstance()      .getProperty("db_driver_string");      //数据库连接URI      public static final String DB_URI_STRING = Config.getInstance()      .getProperty("db_uri_string");      //数据库连接username      public static final String DB_USERNAME= Config.getInstance()      .getProperty("db_username");       //数据库连接pwd      public static final String DB_PWD= Config.getInstance()      .getProperty("db_pwd");      //数据库查询语句db_query_str      public static final String DB_QUERY_STRING= Config.getInstance()      .getProperty("db_query_string");        }  
数据类型处理类:
package com.qqw.index;/**  * 数据类型转换工具类  * @author liuyazhuang * @create 2015-10-20 */  public class DataTypeUtil {       /**      * 将对象转换为整数型      * @param o  源对象      * @return 对应的Long值,如果出错,则返回Long.MIN_VALUE      */      public static long toLong(Object o) {          if (o == null) {              throw new IllegalArgumentException("该对象为空");          }          String s = o.toString();          try {              return Long.parseLong(s);          } catch (Exception ex) {              return Long.MAX_VALUE;          }      }  }  
配置文件
#== the directory for store lucene-index ========#  index_store_directory=D\:/lucene/indexDB/    #======== two hours ========#  #create_index_sleep_time=7200000    #======== two minutes ========#  create_index_sleep_time=7200000  db_driver_string=oracle.jdbc.driver.OracleDriverdb_uri_string=jdbc\:oracle\:thin\:@localhost\:1521\:orcl  db_username=test  db_pwd=testdb_query_string=SELECT  * from journalarticle
核心搜索类:
package com.qqw.search;import java.io.File;  import java.io.IOException;  import java.util.ArrayList;  import java.util.HashMap;import java.util.List;  import java.util.Map;  import org.apache.lucene.document.Document;  import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.search.BooleanClause;import org.apache.lucene.search.IndexSearcher;  import org.apache.lucene.search.Query;  import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.highlight.Formatter;import org.apache.lucene.search.highlight.Fragmenter;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.Scorer;import org.apache.lucene.search.highlight.SimpleFragmenter;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.FSDirectory;  import org.wltea.analyzer.lucene.IKAnalyzer;import org.wltea.analyzer.lucene.IKQueryParser;import org.wltea.analyzer.lucene.IKSimilarity;  /**  * 负责搜索的类 * @author liuyazhuang * @create 2015-10-20 */  public class LuceneDBQuery {        private static LuceneDBQuery search = new LuceneDBQuery();        // 构造方法      private LuceneDBQuery() {        }        /**      * 单实例访问接口      *       * @return      */      public static LuceneDBQuery getInstance() {          return search;      }   public List<Map<String,Object>> seacherStr(String[] indexFields,String[] searchFields,String queryString,String searchdictory,String[] highlighterFields) {List<Map<String,Object>> list = null;TopDocs topDocs = null;Query query = null;IndexSearcher searcher = null;try {searcher = new IndexSearcher(FSDirectory.open(new File(searchdictory)), true);// read-only        BooleanClause.Occur[] flags=new BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST};     query = IKQueryParser.parseMultiField(searchFields, queryString,flags);// 多个//在索引器中使用IKSimilarity相似度评估器searcher.setSimilarity(new IKSimilarity());// 准备高亮器Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlighter\">", "</span>");Scorer fragmentScorer = new QueryScorer(query);Highlighter highlighter = new Highlighter(formatter, fragmentScorer);Fragmenter fragmenter = new SimpleFragmenter(100);// 高亮范围highlighter.setTextFragmenter(fragmenter);if (searcher != null) {topDocs = searcher.search(query, 100);// 100是显示队列的SizeScoreDoc[] hits = topDocs.scoreDocs;System.out.println("共有" + searcher.maxDoc() + "条索引,命中"+ hits.length + "条");list = new ArrayList<Map<String,Object>>();for (int i = 0; i < hits.length; i++) {//长度遍历ScoreDoc scoreDoc = topDocs.scoreDocs[i];// 读取第几条记录int docSn = scoreDoc.doc;// 文档内部编号Document document = searcher.doc(docSn);Map<String,Object> map=new HashMap<String, Object>();// 高亮for (int k = 0; k < indexFields.length; k++) {//遍历所有的字段map.put(indexFields[k], document.get(indexFields[k]));for (int j = 0; j < highlighterFields.length; j++) {//遍历要高亮的字段,要高亮的字段肯定小于等于所有的字段// 如果当前属性值中没有出现关键字,则返回nullString hctemp = highlighter.getBestFragment(new IKAnalyzer(), "\""+highlighterFields[j]+"\"", document.get(highlighterFields[j]));if (hctemp == null) {hctemp = document.get(highlighterFields[j]);}map.put(highlighterFields[j], hctemp);}}list.add(map);}}}catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (InvalidTokenOffsetsException e) {// TODO Auto-generated catch blocke.printStackTrace();}return list;}// 查询分页public List<Map<String,Object>> seacherStrbyPage(String[] indexFields,String[] searchFields,String queryString,String searchdictory, int firstResult, int maxResult,String[] highlighterFields) {List<Map<String,Object>> list = null;TopDocs topDocs = null;Query query = null;IndexSearcher searcher = null;try {searcher = new IndexSearcher(FSDirectory.open(new File(searchdictory)), true);// read-only// QueryParser qp = new QueryParser(Version.LUCENE_33, fields,// new StandardAnalyzer(Version.LUCENE_33));// 有变化的地方 单个字段关联//// 使用IKQueryParser查询分析器构造Query对象// //声明BooleanClause.Occur[]数组,它表示多个条件之间的关系// BooleanClause.Occur[] flags=new// BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST};query = IKQueryParser.parseMultiField(searchFields, queryString);// 多个// //在索引器中使用IKSimilarity相似度评估器searcher.setSimilarity(new IKSimilarity());// query = IKQueryParser.parse(field, queryString);// QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_33,// fields,// new IKAnalyzer());// 有变化的地方 多个地段关联//// query = qp.parse(queryString);// 准备高亮器Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlighter\">", "</span>");Scorer fragmentScorer = new QueryScorer(query);Highlighter highlighter = new Highlighter(formatter, fragmentScorer);Fragmenter fragmenter = new SimpleFragmenter(100);// 高亮范围highlighter.setTextFragmenter(fragmenter);if (searcher != null) {topDocs = searcher.search(query, 100);// 100是显示队列的SizeScoreDoc[] hits = topDocs.scoreDocs;System.out.println("共有" + searcher.maxDoc() + "条索引,命中"+ hits.length + "条");list = new ArrayList<Map<String,Object>>();for (int i = firstResult - 1; i < firstResult + maxResult - 1; i++) {//按照分页的长度遍历//for (int i = 0; i < hits.length; i++) {//长度遍历ScoreDoc scoreDoc = topDocs.scoreDocs[i];// 读取第几条记录int docSn = scoreDoc.doc;// 文档内部编号Document document = searcher.doc(docSn);Map<String,Object> map=new HashMap<String, Object>();// 高亮for (int k = 0; k < indexFields.length; k++) {//遍历所有的字段map.put(indexFields[k], document.get(indexFields[k]));for (int j = 0; j < highlighterFields.length; j++) {//遍历要高亮的字段,要高亮的字段肯定小于等于所有的字段// 如果当前属性值中没有出现关键字,则返回nullString hctemp = highlighter.getBestFragment(new IKAnalyzer(), "\""+highlighterFields[j]+"\"", document.get(highlighterFields[j]));if (hctemp == null) {hctemp = document.get(highlighterFields[j]);}map.put(highlighterFields[j], hctemp);}}list.add(map);}}}catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (InvalidTokenOffsetsException e) {// TODO Auto-generated catch blocke.printStackTrace();}return list;}// 取得符合搜索条件的所有记录总数,以便分页 , 与上面方法类似public int getResultCount(String[] searchFields,String queryString, String searchdictory)throws Exception {TopDocs topDocs = null;Query query = null;IndexSearcher searcher = null;try {searcher = new IndexSearcher(FSDirectory.open(new File(searchdictory)), true);// read-onlyquery = IKQueryParser.parseMultiField(searchFields, queryString);// 多个// //在索引器中使用IKSimilarity相似度评估器searcher.setSimilarity(new IKSimilarity());if (searcher != null) {topDocs = searcher.search(query, 100);// 100是显示队列的Size}} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}// ScoreDoc[] hits = topDocs.scoreDocs;取得还是hits的lengthreturn topDocs.scoreDocs.length;}  }  
配置文件管理类:
package com.qqw.search;import java.io.IOException;  import org.jdom.Document;  import org.jdom.Element;  import org.jdom.JDOMException;  import org.jdom.input.SAXBuilder;   /**  * 配置文件的管理类 * @author liuyazhuang * @create 2015-10-20 */  public class LuceneDBQueryUtil {         public static String getIndexPath(){            String filePath = "zxt_index.xml";          String indexPath="";          SAXBuilder builder = new SAXBuilder(false);          try {              Document doc = builder.build(Thread.currentThread().getContextClassLoader().getResource(filePath));              Element rootElement = doc.getRootElement();               Element index=rootElement.getChild("index");               indexPath=index.getText();               System.out.println(indexPath);          } catch (JDOMException e) {              e.printStackTrace();          } catch (IOException e) {              e.printStackTrace();          }          return indexPath;         }  }  
通过ServletContextListener配置定时任务
package com.qqw.timer;import java.util.Timer;import javax.servlet.ServletContextEvent;import javax.servlet.ServletContextListener;import com.qqw.index.Constant;import com.qqw.index.LuceneDBIndexerTask;/**  * 定时操作 * @author liuyazhuang * @create 2015-10-20 */  public class MyListener implements ServletContextListener {  private Timer timer = null;  public void contextInitialized(ServletContextEvent event) {    timer = new Timer(true);    //设置任务计划,启动和间隔时间    timer.schedule(LuceneDBIndexerTask.getInstance(), 0,Long.valueOf(Constant.CREATE_INDEX_SLEEP_TIME));  }  public void contextDestroyed(ServletContextEvent event) {    timer.cancel();  }}
web.xml 配置
<?xml version="1.0" encoding="UTF-8"?><web-app version="2.4" xmlns="http://java.sun.com/xml/ns/j2ee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://java.sun.com/xml/ns/j2ee/web-app_2_4.xsd">  <welcome-file-list>    <welcome-file>index.jsp</welcome-file>  </welcome-file-list>    <!-- 配置servlet --> <servlet>    <servlet-name>SearchServlet</servlet-name>    <servlet-class>dataFromOracle.servlet.SearchServlet</servlet-class>  </servlet>  <servlet-mapping>    <servlet-name>SearchServlet</servlet-name>    <url-pattern>/SearchServlet</url-pattern>  </servlet-mapping>    <listener><listener-class>com.qqw.timer.MyListener</listener-class></listener></web-app>
数据库表文件
-- Create tablecreate table JOURNALARTICLE(  ARTICLEID  NUMBER(10) not null,  TITLE      VARCHAR2(255) not null,  USERNAME   VARCHAR2(4000) not null,  USERID     VARCHAR2(255) not null,  CREATEDATE TIMESTAMP(6) not null,  CONTENT    CLOB);-- Create/Recreate primary, unique and foreign key constraints alter table JOURNALARTICLE  add constraint ARTICLEID primary key (ARTICLEID);
通过以上的代码,可以做到移植到新项目只需要修改配置文件即可。lucene索引建立,不需要考虑什么时候进行。只要保证数据库连接处于正常状态即可,索引字段和搜索字段都可以通知配置的形式表现出来。分页功能和高亮的功能都在其中。

1 0
原创粉丝点击