lucene入门demo
来源:互联网 发布:淘宝互刷平台 编辑:程序博客网 时间:2024/05/18 23:12
lucene简单入门
概念:
Document:文档
Field:域
query:查询
analyer:分词器
一个文档 可以包含多个域。
直接上代码
pom.xml
<properties> <lucene.version> 4.0.0</lucene.version> </properties> <dependencies> <!-- 搜索引擎 lucene --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>${lucene.version}</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>${lucene.version}</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>${lucene.version}</version> </dependency> <!--高亮 --> <dependency> <groupId> org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version> ${lucene.version}</version> </dependency> </dependencies>
D:\ftp\lucene\lucene1.txt
Students should be allowed to go out with their friends, but not allowed to drink beer.
D:\ftp\lucene\lucene2.txt
My friend Jerry went to school to see his students but found them drunk which is not allowed.
package com.team.lucene;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStreamReader;import java.nio.charset.StandardCharsets;import lombok.extern.slf4j.Slf4j;import org.apache.commons.io.IOUtils;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.LongField;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.index.Term;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.Filter;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.highlight.Formatter;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleFragmenter;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;/** * @ClassName:LuceneTest.java * @Description:搜索引擎lucene学习 * @author gaoguangjin * @Date 2015-7-13 下午2:54:03 */@Slf4jpublic class LuceneTest { private final static String INDEX_FILE = "d:/ftp/index"; // 需要写入lucene源文件目录 private final static String FILE_PATH = "d:/ftp/lucene"; static Directory directory; static { try { File indexFile = new File(INDEX_FILE); // 将索引存放在磁盘index_file目录 directory = FSDirectory.open(indexFile); } catch (IOException e) { } } public static void main(String[] args) { // 删除指定的索引 String deleteIndexName = "lucene1.txt"; String updateIndexName = "lucene2.txt"; // 创建 createIndex(); search(); // 删除 deleteIndex(deleteIndexName); search(); // 更新 updateIndex(updateIndexName); search(); } /** * @Description:更新索引 相当于先删除原来的,再插入新的document。因为lucene不支持更新单个field * @param updateIndexName * @return:void */ private static void updateIndex(String updateIndexName) { IndexWriter iw = null; try { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iw = new IndexWriter(directory, iwc); Document document = new Document(); Field field1 = new StringField("path", "f:a/b/c", Field.Store.YES); Field field2 = new StringField("fileName", "更新的fileName", Field.Store.YES); Field fied3 = new TextField("contents", "students is a baby", Field.Store.YES); document.add(field1); document.add(field2); document.add(fied3); // 根据term匹配document,如果term匹配准确性不高,将会删除多个索引 Term term = new Term("fileName", updateIndexName); iw.updateDocument(term, document); /** 上一步的updte等于注视的 先删除再更新 **/ // iw.deleteDocuments(term); // iw.addDocument(document); } catch (Exception e) { log.error("删除索引失败!" + e.getLocalizedMessage()); } finally { try { // 需要提交和关闭 iw.commit(); // iw.rollback(); iw.close(); log.info("---------------更新索引-------------------"); } catch (IOException e) { log.error("关闭IndexWriter失败!" + e.getLocalizedMessage()); } } } /** * @Description: 删除索引 * @param deleteIndexName * @return:void */ private static void deleteIndex(String deleteIndexName) { IndexWriter iw = null; try { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iw = new IndexWriter(directory, iwc); // 根据term匹配document,如果term匹配准确性不高,将会删除多个索引 Term term = new Term("fileName", deleteIndexName); iw.deleteDocuments(term); } catch (Exception e) { log.error("删除索引失败!" + e.getLocalizedMessage()); } finally { try { // 需要提交和关闭 iw.commit(); // iw.rollback(); iw.close(); log.info("---------------删除索引-------------------"); } catch (IOException e) { log.error("关闭IndexWriter失败!" + e.getLocalizedMessage()); } } } /** * @Description: 构建索引 * @see:Version.LUCENE_40为版本号,比如maven里面引入的是4.0.0版本的core架包 * @return:void */ private static void createIndex() { BufferedReader br = null; IndexWriter iw = null; try { // File indexFile = new File(INDEX_FILE); // Directory directory = FSDirectory.open(new File(INDEX_FILE)); // 分词器 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); // 配置 IndexWriterConfig indexwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); // 创建新的索引文件时候 追加到已有的索引库 indexwc.setOpenMode(OpenMode.CREATE); // 写入索引 iw = new IndexWriter(directory, indexwc); // 将索引写入指定目录 File[] files = new File(FILE_PATH).listFiles(); for (File file : files) { br = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); // 构建文档,文档可以指一个 HTML 页面,一封电子邮件,或者是一个文本文件。 Document docuemnt = new Document(); // field对象是用来描述一个文档的某个属性的,比如一封电子邮件的标题和内容可以用两个 Field 对象分别描述 Field pathField = new StringField("path", file.getPath(), Field.Store.YES); // 最后的修改时间,不存放到到index里面 Field modifiField = new LongField("modifiField", file.lastModified(), Field.Store.NO); // 内容不妨到index里面 // Field contentFied = new TextField("contents", br); // 内容存放到index里面 Field contentFied = new TextField("contents", IOUtils.toString(br), Field.Store.YES); // 文件名称 Field fileNameFied = new StringField("fileName", file.getName(), Field.Store.YES); // 将field添加到文档里面 docuemnt.add(pathField); docuemnt.add(modifiField); docuemnt.add(contentFied); docuemnt.add(fileNameFied); iw.addDocument(docuemnt); log.info("构建" + file.getAbsolutePath() + "文件索引成功!"); } } catch (Exception e) { log.error("构建索引失败!" + e.getLocalizedMessage()); } finally { try { // 一定要关闭写入索引,不然不写入的噢! iw.close(); br.close(); } catch (IOException e) { log.error("关闭输入流失败!" + e.getLocalizedMessage()); } } } /** * @Description: 进行查询 * @return:void */ private static void search() { try { // 查询条件 String queryStr = "students"; // filed对应的名称 String queryField = "contents"; // File indexFile = new File(INDEX_FILE); // Directory directory = FSDirectory.open(new File(INDEX_FILE)); // 索引文件存放路径 IndexReader indexReader = DirectoryReader.open(directory); // 检索工具 IndexSearcher indexSeacher = new IndexSearcher(indexReader); // 分词器 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); /**************** 用了两种查询query 一个是QueryParser,一个是term ***********************/ // 查询解析器 QueryParser queryParser = new QueryParser(Version.LUCENE_40, queryField, analyzer); Query query = queryParser.parse(queryStr); // 根据trem去查询 // Term term = new Term("fileName", "lucene1.txt"); // Query query = new TermQuery(term); Filter filter = null; // 只取排名前一百的搜索结果,得到命中的文档 TopDocs topDocs = indexSeacher.search(query, null, 100); ScoreDoc[] scores = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scores) { // 获取命中的document的文档编号 int docnumber = scoreDoc.doc; // 根据编号查找到文档 Document document = indexSeacher.doc(docnumber); String path = document.get("path"); String contents = document.get("contents"); String modifiedtime = document.get("modifiField"); String fileName = document.get("fileName"); log.info("查询到数据path:" + path); log.info("查询到数据contents:" + contents); log.info("查询到数据modifiField:" + modifiedtime); log.info("查询到数据fileName:" + fileName); /********************** 下面的纯属个人乐趣 ****************************/ // 高亮功能 对查出来的结果进行高亮 Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE)); String contentsWithLight = highlighter.getBestFragment(analyzer, queryField, contents); log.info("带高亮的代码:" + contentsWithLight); } indexReader.close(); } catch (Exception e) { log.error("lucene查询失败!" + e.getLocalizedMessage()); } }}
0 0
- lucene简单入门demo
- Lucene入门与Demo
- lucene demo 入门配置
- lucene入门demo
- lucene-3.0.2 demo 入门
- Lucene02---Lucene入门与Demo
- Lucene 6.1.0 入门Demo
- lucene第一步--简单的入门demo
- Lucene入门之--运行Lucene-demo
- Lucene入门之--运行Lucene-demo
- Lucene入门之--运行Lucene-demo
- Lucene Demo
- lucene 入门
- lucene入门
- lucene入门
- Lucene入门
- LUCENE入门
- Lucene入门
- php设计模式之装饰器模式
- Linux启动新进程的几种方法及比较
- hadoop Unhealthy Nodes
- 两个类相互调用的问题
- linux配置NTP Server
- lucene入门demo
- ECharts使用——封装类库的使用20150713
- 【zoj】【Attack on Titans】
- CentOS 7.0编译安装Nginx+MySQL+PHP
- 大型高并发高负载web应用系统架构-数据库架构策略
- Objective-C - 改变NSMutableArray的特定元素
- dp cf C. Mr. Kitayuta, the Treasure Hunter
- linux declare
- 1、cocos2d-x Lua安装配置