Lucene 4.10 + Mysql 5.5 创建数据库表索引(Lucene 学习序列1)

来源:互联网 发布:淘宝卖宠物用品怎么样 编辑:程序博客网 时间:2024/06/03 09:26

Lucene 4.10 + Mysql 5.5 创建数据库表索引(Lucene 学习序列1)

          Lucene是apache软件基金会4 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包,即它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。

     Code:

    

package com.qiuzhping.lucene;/* * System Abbrev : * system Name  : * Component No  : * Component Name: * File name     :QueryDataFromDb.java * Author        :Peter.Qiu * Date          :2015年7月28日 * Description   :  <description> *//* Updation record 1: * Updation date        :  2015年7月28日 * Updator          :  Peter.Qiu * Trace No:  <Trace No> * Updation No:  <Updation No> * Updation Content:  <List all contents of updation and all methods updated.> */import java.io.File;import java.io.IOException;import java.sql.Connection;import java.sql.DriverManager;import java.sql.ResultSet;import java.sql.SQLException;import java.sql.Statement;import java.util.Random;import java.util.UUID;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.StringField;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.index.Term;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;/** * <Description functions in a word> * <Detail description> *  * @author  Peter.Qiu * @version  [Version NO, 2015年7月28日] * @see  [Related classes/methods] * @since  [product/module version] */public class QueryDataFromDb {private Directory directory = null;private static boolean insertFlag = true;public static Connection getConnection() throws SQLException,java.lang.ClassNotFoundException {String url = "jdbc:mysql:///hpsdb";Class.forName("com.mysql.jdbc.Driver");String userName = "root";String password = "123456";Connection con = DriverManager.getConnection(url, userName, password);return con;}public static void insertData() throws ClassNotFoundException, SQLException{Connection conn = getConnection();Statement st = conn.createStatement();Random random = new Random();for(int j = 0 ; j < 10 && insertFlag; j ++){StringBuffer sql = new StringBuffer("insert student (name,math) values");for(int i = 0 ; i < 100000 ; i++){String uuid = UUID.randomUUID().toString().replaceAll("-", "");sql.append("('" + uuid + "'," + random.nextInt(100) + "),");}String insert = sql.toString().substring(0, sql.length() - 1);st.execute(insert);}st.close();conn.close();}public void index() throws SQLException, ClassNotFoundException, IOException {IndexWriter writer = null;try {directory = FSDirectory.open(new File("C:/lucene/index02"));Analyzer analyzer = new StandardAnalyzer();IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_4_10_4,analyzer);conf.setOpenMode(OpenMode.CREATE_OR_APPEND);conf.setMaxBufferedDocs(100);writer = new IndexWriter(directory, conf);insertFlag = false;insertData();Connection conn = getConnection();Statement st = conn.createStatement();long count = 1;for(int i = 0 ; i < 10; i ++){String query = "select * from student limit "+ i * 100000+","+ 100000;ResultSet result = st.executeQuery(query);while (result.next()) {Document document = new Document();document.add(new StringField("id", result.getString("id"),Field.Store.YES));document.add(new StringField("name", result.getString("name"), Field.Store.YES));document.add(new StringField("math", result.getString("math"), Field.Store.YES));writer.addDocument(document);count ++;}}System.out.println("Total record : "+count);writer.close();st.close();conn.close();} finally {try {if (writer != null) {writer.close();}} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}public IndexSearcher getSearcher() throws IOException {IndexReader reader = DirectoryReader.open(directory);IndexSearcher searcher = new IndexSearcher(reader);return searcher;}public void searchByTerm(String field, String name, int num) throws IOException {IndexSearcher searcher = getSearcher();// WildcardQuery 模糊查找// TermQuery 精确查找Query query = new TermQuery(new Term(field, name));TopDocs tds = searcher.search(query, num);System.out.println("count:" + tds.totalHits);for (ScoreDoc sd : tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println("id:" + doc.get("id"));System.out.println("name:" + doc.get("name"));System.out.println("math:" + doc.get("math"));}}public static void main(String[] args) throws ClassNotFoundException,SQLException, IOException {QueryDataFromDb indexUtil = new QueryDataFromDb();indexUtil.index();int i = 0 ;long start = System.currentTimeMillis();//查找前90分前2名的信息System.out.println("查找前90分前2名的信息");indexUtil.searchByTerm("math", "90", 2);System.out.println(i+" Spend time:"+(System.currentTimeMillis() - start) + " ms");}}

测试的结果是:

Total record : 1000001
查找前90分前2名的信息
count:36212
id:298904
name:636ab6012e4b429ea54d176f28f5fa1c
math:90
id:299156
name:085af1feb39b42f0be1c6e9f3f814526
math:90
0 Spend time:805 ms


涉及到Lucene 核心包链接:http://mirrors.hust.edu.cn/apache/lucene/java/4.10.4/

代码片段涉及到:

lucene-analyzers-common-4.10.4.jar

lucene-core-4.10.4.jar

lucene-queryparser-4.10.4.jar

mysql-connector-java-5.1.35.jar


1 0
原创粉丝点击