lucene应用
来源:互联网 发布:毫州康美中药城淘宝店 编辑:程序博客网 时间:2024/05/16 13:01
package com.lin.util;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.util.List;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Field.Index;import org.apache.lucene.document.Field.Store;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.apache.tika.Tika;import org.apache.tika.exception.TikaException;import org.wltea.analyzer.lucene.IKAnalyzer;public class LuceneUtil {private Log log = LogFactory.getLog(LuceneUtil.class);private IndexWriter writer;private IndexReader reader;private static Tika tika = new Tika();/** * 建立索引 * @param srcDriectory 需要建立索引的文件位置 * @param indexDirectory 索引放置位置 * @param analyzer 解析器 * @param versionlucene版本 * @param openMode打开方式(1.创建,2追加,3创建或追加) * @throws IOException * @throws TikaException */@SuppressWarnings("deprecation")public void diskIndex(File srcDriectory,File indexDirectory, Analyzer analyzer, Version version,OpenMode openMode )throws IOException, TikaException {if(!indexDirectory.exists()){indexDirectory.mkdirs();}FSDirectory fsd = FSDirectory.open(indexDirectory);IndexWriterConfig config = new IndexWriterConfig(version, analyzer);config.setOpenMode(openMode);writer = new IndexWriter(fsd, config);List<File> files = FileUtil.listFile(srcDriectory);Document doc = null;for (File file : files) {doc = new Document();doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));doc.add(new Field("path", file.getAbsolutePath(), Store.YES,Index.NO));doc.add(new Field("content", tikaParseFileToString(file), Store.YES,Index.ANALYZED));writer.addDocument(doc);}writer.commit();}/** * 获取查询把柄 * @param indexDirectory * @return * @throws IOException */public IndexSearcher getIndexSearch(File indexDiretory) throws IOException{Directory directory = FSDirectory.open(indexDiretory);return new IndexSearcher(reader.open(directory));}public String search(File indexDirectory,String word,Analyzer analyzer) throws IOException, ParseException{IndexSearcher indexSearch = getIndexSearch(indexDirectory);QueryParser parser = new QueryParser( "content",analyzer);Query query = parser.parse(word);TopDocs docs = indexSearch.search(query, 10);ScoreDoc[] sds = docs.scoreDocs;for(ScoreDoc sd:sds){Document document = indexSearch.doc(sd.doc);System.out.println("name==========="+document.get("name")+"path==========="+document.get("path"));}return null;}public String tikaParseFileToString(File file) throws IOException, TikaException{return tika.parseToString(file);}public static void main(String[] args)throws Exception {//new LuceneUtil().diskIndex(new File("d:\\lucene"), new File("d:\\luceneIndex"), new IKAnalyzer(), Version.LUCENE_4_10_2, OpenMode.CREATE);new LuceneUtil().search(new File("d:\\luceneIndex"),"接口",new IKAnalyzer());Tika tika = new Tika();String str = tika.parseToString(new FileInputStream("d:\\lucene\\IKAnalyzer中文分词器V2012_FF使用手册.pdf"));System.out.println(str);}}
项目依赖使用maven:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"><modelVersion>4.0.0</modelVersion><groupId>com.lin.project</groupId><artifactId>learn</artifactId><packaging>war</packaging><version>0.0.1-SNAPSHOT</version><name>mybatis Maven Webapp</name><url>http://maven.apache.org</url><properties><redis.clients.version>2.6.0</redis.clients.version><spring.data.redis.version>1.4.0.RELEASE</spring.data.redis.version></properties><dependencies><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>3.8.1</version><scope>test</scope></dependency><dependency><groupId>commons-logging</groupId><artifactId>commons-logging</artifactId><version>1.2</version></dependency><dependency><groupId>log4j</groupId><artifactId>log4j</artifactId><version>1.2.17</version></dependency><dependency><groupId>commons-dbcp</groupId><artifactId>commons-dbcp</artifactId><version>1.4</version></dependency><dependency><groupId>org.mybatis</groupId><artifactId>mybatis</artifactId><version>3.2.7</version></dependency><dependency><groupId>org.mybatis</groupId><artifactId>mybatis-spring</artifactId><version>1.2.2</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-core</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-beans</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-tx</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-aop</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-jdbc</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-webmvc</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-web</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-context-support</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-orm</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-test</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.aspectj</groupId><artifactId>aspectjweaver</artifactId><version>1.8.2</version></dependency><dependency><groupId>jstl</groupId><artifactId>jstl</artifactId><version>1.2</version></dependency><dependency><groupId>taglibs</groupId><artifactId>standard</artifactId><version>1.1.2</version></dependency><dependency><groupId>mysql</groupId><artifactId>mysql-connector-java</artifactId><version>5.1.32</version></dependency><dependency><groupId>org.quartz-scheduler</groupId><artifactId>quartz</artifactId><version>2.2.1</version></dependency><dependency><groupId>org.quartz-scheduler</groupId><artifactId>quartz-jobs</artifactId><version>2.2.1</version></dependency><dependency><groupId>org.codehaus.jackson</groupId><artifactId>jackson-core-asl</artifactId><version>1.9.13</version></dependency><dependency><groupId>org.codehaus.jackson</groupId><artifactId>jackson-mapper-asl</artifactId><version>1.9.13</version></dependency><dependency><groupId>commons-fileupload</groupId><artifactId>commons-fileupload</artifactId><version>1.3.1</version></dependency><dependency><groupId>redis.clients</groupId><artifactId>jedis</artifactId><version>${redis.clients.version}</version><type>jar</type><scope>compile</scope></dependency><dependency><groupId>org.springframework.data</groupId><artifactId>spring-data-redis</artifactId><version>${spring.data.redis.version}</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-core</artifactId><version>4.10.2</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-highlighter</artifactId><version>4.10.2</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-queryparser</artifactId><version>4.10.2</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-analyzers-common</artifactId><version>4.10.2</version></dependency><!-- <dependency><groupId>org.apache.tika</groupId><artifactId>tika-app</artifactId><version>1.6</version></dependency> --></dependencies><build><finalName>learn</finalName></build></project>
添加额外tika和IKAnalyzer的jar包
http://pan.baidu.com/s/1o69fCeQ 提取码:122b
http://pan.baidu.com/s/1hq6AalY 提取码:k3xp
0 0
- Lucene应用
- lucene应用
- lucene应用
- lucene的首次应用
- lucene....highlight应用
- Lucene学习笔记(应用)
- lucene 简单应用
- 初次应用lucene
- Lucene基本应用示例
- lucene全文检索应用
- lucene.net 应用资料
- Lucene的基本应用
- Lucene中的堆应用
- Lucene原理与应用
- Lucene初步应用
- Lucene.Net3.0.3应用
- Lucene的原理和应用
- Lucene介绍及简单应用
- Android 手机定位慢的解决方法
- 第十四周项目二—带姓名的成绩单
- 第13周上机实践项目5——字符串操作(2)
- tcp通信中的bind
- BZOJ 2434 NOI2011 阿狸的打字机 fail树+树状数组
- lucene应用
- 使用 CAS 在 Tomcat 中实现单点登录
- linux下安装python
- MongoDB命令及SQL语法对比
- libevent学习__学习历程总结
- 社区发现(Community Detection)算法
- 关于VC6简单实现xp风格界面
- jquery实现跨域请求&SpringMVC解决跨域乱码问题
- JQuery中$.ajax()方法参数详解