lucene应用

来源:互联网 发布:毫州康美中药城淘宝店 编辑:程序博客网 时间:2024/05/16 13:01
package com.lin.util;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.util.List;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Field.Index;import org.apache.lucene.document.Field.Store;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.apache.tika.Tika;import org.apache.tika.exception.TikaException;import org.wltea.analyzer.lucene.IKAnalyzer;public class LuceneUtil {private Log log = LogFactory.getLog(LuceneUtil.class);private IndexWriter writer;private IndexReader reader;private static Tika tika = new Tika();/** * 建立索引 * @param srcDriectory 需要建立索引的文件位置 * @param indexDirectory 索引放置位置 * @param analyzer 解析器 * @param versionlucene版本 * @param openMode打开方式(1.创建,2追加,3创建或追加) * @throws IOException * @throws TikaException  */@SuppressWarnings("deprecation")public void diskIndex(File srcDriectory,File indexDirectory, Analyzer analyzer, Version version,OpenMode openMode )throws IOException, TikaException {if(!indexDirectory.exists()){indexDirectory.mkdirs();}FSDirectory fsd = FSDirectory.open(indexDirectory);IndexWriterConfig config = new IndexWriterConfig(version, analyzer);config.setOpenMode(openMode);writer = new IndexWriter(fsd, config);List<File> files = FileUtil.listFile(srcDriectory);Document doc = null;for (File file : files) {doc = new Document();doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));doc.add(new Field("path", file.getAbsolutePath(), Store.YES,Index.NO));doc.add(new Field("content", tikaParseFileToString(file), Store.YES,Index.ANALYZED));writer.addDocument(doc);}writer.commit();}/** * 获取查询把柄 * @param indexDirectory * @return * @throws IOException */public IndexSearcher getIndexSearch(File indexDiretory) throws IOException{Directory directory = FSDirectory.open(indexDiretory);return new IndexSearcher(reader.open(directory));}public String search(File indexDirectory,String word,Analyzer analyzer) throws IOException, ParseException{IndexSearcher indexSearch = getIndexSearch(indexDirectory);QueryParser parser = new QueryParser( "content",analyzer);Query query = parser.parse(word);TopDocs docs = indexSearch.search(query, 10);ScoreDoc[] sds = docs.scoreDocs;for(ScoreDoc sd:sds){Document document = indexSearch.doc(sd.doc);System.out.println("name==========="+document.get("name")+"path==========="+document.get("path"));}return null;}public String tikaParseFileToString(File file) throws IOException, TikaException{return tika.parseToString(file);}public static void main(String[] args)throws Exception {//new LuceneUtil().diskIndex(new File("d:\\lucene"), new File("d:\\luceneIndex"), new IKAnalyzer(), Version.LUCENE_4_10_2, OpenMode.CREATE);new LuceneUtil().search(new File("d:\\luceneIndex"),"接口",new IKAnalyzer());Tika tika = new Tika();String str = tika.parseToString(new FileInputStream("d:\\lucene\\IKAnalyzer中文分词器V2012_FF使用手册.pdf"));System.out.println(str);}}

项目依赖使用maven:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"><modelVersion>4.0.0</modelVersion><groupId>com.lin.project</groupId><artifactId>learn</artifactId><packaging>war</packaging><version>0.0.1-SNAPSHOT</version><name>mybatis Maven Webapp</name><url>http://maven.apache.org</url><properties><redis.clients.version>2.6.0</redis.clients.version><spring.data.redis.version>1.4.0.RELEASE</spring.data.redis.version></properties><dependencies><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>3.8.1</version><scope>test</scope></dependency><dependency><groupId>commons-logging</groupId><artifactId>commons-logging</artifactId><version>1.2</version></dependency><dependency><groupId>log4j</groupId><artifactId>log4j</artifactId><version>1.2.17</version></dependency><dependency><groupId>commons-dbcp</groupId><artifactId>commons-dbcp</artifactId><version>1.4</version></dependency><dependency><groupId>org.mybatis</groupId><artifactId>mybatis</artifactId><version>3.2.7</version></dependency><dependency><groupId>org.mybatis</groupId><artifactId>mybatis-spring</artifactId><version>1.2.2</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-core</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-beans</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-tx</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-aop</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-jdbc</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-webmvc</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-web</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-context-support</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-orm</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.springframework</groupId><artifactId>spring-test</artifactId><version>4.0.6.RELEASE</version></dependency><dependency><groupId>org.aspectj</groupId><artifactId>aspectjweaver</artifactId><version>1.8.2</version></dependency><dependency><groupId>jstl</groupId><artifactId>jstl</artifactId><version>1.2</version></dependency><dependency><groupId>taglibs</groupId><artifactId>standard</artifactId><version>1.1.2</version></dependency><dependency><groupId>mysql</groupId><artifactId>mysql-connector-java</artifactId><version>5.1.32</version></dependency><dependency><groupId>org.quartz-scheduler</groupId><artifactId>quartz</artifactId><version>2.2.1</version></dependency><dependency><groupId>org.quartz-scheduler</groupId><artifactId>quartz-jobs</artifactId><version>2.2.1</version></dependency><dependency><groupId>org.codehaus.jackson</groupId><artifactId>jackson-core-asl</artifactId><version>1.9.13</version></dependency><dependency><groupId>org.codehaus.jackson</groupId><artifactId>jackson-mapper-asl</artifactId><version>1.9.13</version></dependency><dependency><groupId>commons-fileupload</groupId><artifactId>commons-fileupload</artifactId><version>1.3.1</version></dependency><dependency><groupId>redis.clients</groupId><artifactId>jedis</artifactId><version>${redis.clients.version}</version><type>jar</type><scope>compile</scope></dependency><dependency><groupId>org.springframework.data</groupId><artifactId>spring-data-redis</artifactId><version>${spring.data.redis.version}</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-core</artifactId><version>4.10.2</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-highlighter</artifactId><version>4.10.2</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-queryparser</artifactId><version>4.10.2</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-analyzers-common</artifactId><version>4.10.2</version></dependency><!-- <dependency><groupId>org.apache.tika</groupId><artifactId>tika-app</artifactId><version>1.6</version></dependency> --></dependencies><build><finalName>learn</finalName></build></project>

添加额外tika和IKAnalyzer的jar包

http://pan.baidu.com/s/1o69fCeQ 提取码:122b

http://pan.baidu.com/s/1hq6AalY 提取码:k3xp

0 0
原创粉丝点击