利用lucene对整个数据库建立索引

来源：互联网发布：全文期刊数据库编辑：程序博客网时间：2024/05/17 23:29

利用lucene对整个数据库建立索引

导言：

如果要对整个数据库做精确查询或模糊查询，我们怎么才可以做到？还是通过SQL查询吗？答案是否定的。因为，通过SQL对整个数据库做精确查询或模糊查询，速度将非常的慢；

lucene解决了这个问题。通过对表或者文本文件预先建立索引，可以很快的实现全文检索。

思路：

1、通过SQL得到所有表名的集合---->2、遍历所有的表，分别为每个表的每个记录建立索引；同时添加表的中文名以及表的说明的索引---->按Writer\analyzer\document\field的循序建索引。

IndexerDB类：

package com.model;import java.io.File;import java.sql.ResultSet;import java.sql.ResultSetMetaData;import java.util.Date;import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class IndexerDB { // 保存索引文件的地方 private static String INDEX_DIR = "F:\\MyLuceneDB2\\LuceneFileIndexDir"; private String index_dir; private File file = new File(INDEX_DIR); // 将要搜索TXT文件的地方 private String data_dir; private String DATA_DIR = "F:\\Lucene"; private String entityName; // private ApiInfo apiInfo; // private ApiInfoQuery apiInfoQuery = new ApiInfoQuery(); // private ApiInfoService apiInfoService; // private List<ApiIndicator> apiIndicators = new ArrayList<ApiIndicator>(); // private ApiIndicator apiIndicator; private StringBuilder newsb = new StringBuilder(); private ResultSet rs = null; private ResultSet tempRs = null; private ResultSetMetaData rsmd = null; // private SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer( // Version.LUCENE_30, true); private Document doc; private String tableName; public static void main(String[] args) throws Exception {  IndexerDB indexDB = new IndexerDB();  File file = new File(INDEX_DIR);  // 1  indexDB.execute(file); } // 执行对数据库的索引化 // @param file // @return // @throws Exception // public String execute(File file) throws Exception {  long start = new Date().getTime();  // 2  int numIndexed = getAllTableNameFromDBAndIndexing(file);  long end = new Date().getTime();  System.out.println("Indexing " + numIndexed + " files took "    + (end - start) + " milliseconds");  return "success"; } // 查询所有的表，并遍历所有的表； // 调用getDataFromTable(tableName)获取表的记录； // 调用indexData(writer, rs, tableName)对单张表做索引 public int getAllTableNameFromDBAndIndexing(File file) throws Exception {  ResultSet rs = null;  String sql1 = null;  String sql2 = null;  sql1 = "select [name] from [sysobjects] where [type] = 'u' order by [name]";  sql2 = "show tables";  SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(    Version.LUCENE_36, true);  IndexWriter writer = new IndexWriter(FSDirectory.open(file), analyzer,    true, IndexWriter.MaxFieldLength.LIMITED);  try {   rs = JDBCUtil.execute(sql1);  } catch (Exception e) {   rs = JDBCUtil.execute(sql2);  }  while (rs.next()) {   tableName = rs.getString(1);   System.out.println("tableName: " + rs.getString(1));   // ResultSetMetaData rsmd = rs.getMetaData();   ResultSet tempRs = getDataFromTable(tableName);   System.out.println("already get data of table");   indexData(writer, tempRs, tableName);  }  int numIndexed = writer.numDocs();  writer.optimize();  writer.close();  return numIndexed; } // 取得表的数据 public ResultSet getDataFromTable(String tableName) throws Exception {  String sql = "select * from " + tableName;  System.out.println("" + sql);  // String sql = "select * from " + "HK_STKCODE";  return JDBCUtil.execute(sql); } // 对特定表的记录，采用特定writer，索引 // 该方法是lucene的indexer的关键方法 // writer---->document---->field private void indexData(IndexWriter writer, ResultSet rs, String tableName)   throws Exception {  if (rs == null) {   return;  }  // 取得实体名  // System.out.println("tableName: " + tableName);  entityName = getEntityName(tableName);  System.out.println(" .entityName: " + entityName);  while (rs.next()) {   doc = new Document();   rsmd = rs.getMetaData();   int colsNum = rsmd.getColumnCount();   // System.out.println("colsNum: "+colsNum);   for (int i = 1; i < colsNum + 1; i++) {    String columnName = rsmd.getColumnName(i);    // System.out.println("columnName: "+columnName);    // System.out.println(" rs.getString(i): "+ rs.getString(i));    // 1、对该条记录的第一个字段进行索引    doc.add(new Field(columnName, (rs.getString(i) == null ? ""      : rs.getString(i)), Field.Store.YES,      Field.Index.ANALYZED));   }   // 2、在此处添加，表的说明(description);来至于apiinfo;根据表对应的实体名，找到表对应的apiinfo实体   // apiInfoQuery.setEntityname(entityName);   // apiInfo = apiInfoService.gainApiInfoByEntityName(entityName);   // if (apiInfo != null) {   // doc.add(new Field("apiDesc", apiInfo.toString(), Field.Store.YES,   // Field.Index.ANALYZED));   // }   // 3、在此处添加，表的中英文字段：   // 来自于apiindicator;根据表对应的实体名，去APIINFO中取找apiid,再去apiindicator中找apiindicator的实体   // List<ApiIndicator> apiIndicators = apiInfoService   // .gainApiIndicatorsByApiId(apiInfo.getId());   // // 输出指标和描述结合为字符串   // for (int i = 0; i < apiIndicators.size(); i++) {   // apiIndicator = apiIndicators.get(i);   // newsb.append(apiIndicator.getCnname() + " ")   // .append(apiIndicator.getEnname() + " ")   // .append(apiIndicator.getDescription() + "");   // newsb.append("\n");   // }   // System.out.println("newsb.toString(): " + newsb.toString());   // doc.add(new Field("outputFields", newsb.toString(),   // Field.Store.YES, Field.Index.ANALYZED));   // 4、文件名   doc.add(new Field("apiName", entityName, Field.Store.YES,     Field.Index.ANALYZED));   writer.addDocument(doc);  } } // 将表名通过字符串处理成为实体名 public String getEntityName(String tableName) {  // tableName = tableName.substring(0, tableName.indexOf(".txt"));  StringBuilder sb = new StringBuilder();  // 将"_"替换掉，如果有的话，以便处理成与API对应的实体一样的字符串。如：PUB_SEITGC.txt，RSH_RSHRPT_INDFO.txt  if (tableName.indexOf("_") != -1) {   String[] subStrings = tableName.split("_");   for (int i = 0; i < subStrings.length; i++) {    sb.append(subStrings[i]);   }   return sb.toString();  } else {   return tableName;  } } public String getIndex_dir() {  return index_dir; } public void setIndex_dir(String index_dir) {  this.index_dir = index_dir; } public String getData_dir() {  return data_dir; } public void setData_dir(String data_dir) {  this.data_dir = data_dir; } // public ApiInfoService getApiInfoService() { // return apiInfoService; // } // // public void setApiInfoService(ApiInfoService apiInfoService) { // this.apiInfoService = apiInfoService; // }}

JDBCUtil类：

// 数据库查询类//JDBCUtil用于取得连接；JDBCUtil.execute(sql)用于执行SQL，并返回resultset.package com.model;import java.sql.Connection;import java.sql.DriverManager;import java.sql.ResultSet;import java.sql.SQLException;import java.sql.Statement;public class JDBCUtil { public static Connection conn = null; // 建立连接 static void getConntion() {  try {   String driver_class = "com.mysql.jdbc.Driver";   // ?useUnicode=true&characterEncoding=utf-8   String connection_url = "jdbc:mysql://localhost:3306/sy001?useUnicode=true&characterEncoding=UTF-8";   String user_name = "root";   String db_password = "0882115303";   Class.forName(driver_class);   conn = DriverManager.getConnection(connection_url, user_name,     db_password);   conn.setAutoCommit(false);  } catch (Exception e) {   e.printStackTrace();  } } // 查询后，得到数据 public static ResultSet execute(String sql) throws Exception {  // 取得连接  getConntion();  // 写SQL  // 得到一个statment对象  Statement stmt = conn.createStatement();  // 得到一个结果集  return stmt.executeQuery(sql); } static void close() {  if (conn != null) {   try {    conn.close();   } catch (SQLException e) {    e.printStackTrace();   }  } } public static Connection getConn() {  return conn; }}