Lucene实例(各种Query使用的例子)

来源:互联网 发布:美工主要做什么 编辑:程序博客网 时间:2024/05/19 02:23

 

在网上看了许多相关的例子,很杂很琐碎,在这里贴出一些源代码,感觉还不错的,介绍了各种Query子类的使用:

 

 

创建索引:

 

package example.lucene;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.util.Properties;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexDeletionPolicy;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;/** * This class reads the input files from the data directory, creates indexes * and writes them in the index directory * @author Amol * */public class Indexer {private IndexWriter indexWriter;/*Location of directory where index files are stored */private String indexDirectory ;/*Location of data directory */private String dataDirectory ;public Indexer(String indexDirectory, String dataDirectory){this.indexDirectory = indexDirectory ;this.dataDirectory = dataDirectory ;}/** * This method creates an instance of IndexWriter which is used * to add Documents and write indexes on the disc. */void createIndexWriter(){if(indexWriter == null){try{//Create instance of Directory where index files will be storedDirectory fsDirectory =  FSDirectory.getDirectory(indexDirectory);/* Create instance of analyzer, which will be used to tokenizethe input data */Analyzer standardAnalyzer = new StandardAnalyzer();//Create a new indexboolean create = true;//Create the instance of deletion policyIndexDeletionPolicy deletionPolicy = new KeepOnlyLastCommitDeletionPolicy(); indexWriter = new IndexWriter(fsDirectory,standardAnalyzer,create, deletionPolicy,IndexWriter.MaxFieldLength.UNLIMITED);}catch(IOException ie){System.out.println("Error in creating IndexWriter");throw new RuntimeException(ie);}}}/** * This method reads data directory and loads all properties files. * It extracts  various fields and writes them to the index using IndexWriter. * @throws IOException  * @throws FileNotFoundException  */void indexData() throws FileNotFoundException, IOException{File[] files = getFilesToBeIndxed();for(File file:files){Properties properties = new Properties();properties.load(new FileInputStream(file));/*Step 1. Prepare the data for indexing. Extract the data. */String sender = properties.getProperty("sender");String receiver = properties.getProperty("receiver");String date = properties.getProperty("date");String month = properties.getProperty("month");String subject = properties.getProperty("subject");String message = properties.getProperty("message");String emaildoc = file.getAbsolutePath();/*Step 2. Wrap the data in the Fields and add them to a Document *//* We plan to show the value of sender, subject and email document    location along with the search results,for this we need to    store their values in the index     */Field senderField =new Field("sender",sender,Field.Store.YES,Field.Index.NOT_ANALYZED);Field receiverfield = new Field("receiver",receiver,Field.Store.NO,Field.Index.NOT_ANALYZED);Field subjectField = new Field("subject",subject,Field.Store.YES,Field.Index.ANALYZED);if(subject.toLowerCase().indexOf("pune") != -1){// Display search results that contain pune in their subject first by setting boost factorsubjectField.setBoost(2.2F);}Field emaildatefield = new Field("date",date,Field.Store.NO,Field.Index.NOT_ANALYZED); Field monthField = new Field("month",month,Field.Store.NO,Field.Index.NOT_ANALYZED); Field messagefield = new Field("message",message,Field.Store.NO,Field.Index.ANALYZED);Field emailDocField =new Field("emailDoc",emaildoc,Field.Store.YES,Field.Index.NO);// Add these fields to a Lucene DocumentDocument doc = new Document();doc.add(senderField);doc.add(receiverfield);doc.add(subjectField);doc.add(emaildatefield);doc.add(monthField);doc.add(messagefield);doc.add(emailDocField);if(sender.toLowerCase().indexOf("job")!=-1){//Display search results that contain 'job' in their sender email addressdoc.setBoost(2.1F);}//Step 3: Add this document to Lucene Index.indexWriter.addDocument(doc);}/* Requests an "optimize" operation on an index, priming theindex for the fastest available search */indexWriter.optimize();/* * Commits all changes to the index and closes all associated files.  */indexWriter.close();}private File[] getFilesToBeIndxed(){File dataDir  = new File(dataDirectory);if(!dataDir.exists()){throw new RuntimeException(dataDirectory+" does not exist");}File[] files = dataDir.listFiles();return files;}}


查询类:

 

 

package example.lucene;import java.io.FileNotFoundException;import java.io.IOException;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.Term;import org.apache.lucene.queryParser.ParseException;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.BooleanClause;import org.apache.lucene.search.BooleanQuery;import org.apache.lucene.search.Filter;import org.apache.lucene.search.FuzzyQuery;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.PhraseQuery;import org.apache.lucene.search.PrefixFilter;import org.apache.lucene.search.PrefixQuery;import org.apache.lucene.search.Query;import org.apache.lucene.search.RangeQuery;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.Searcher;import org.apache.lucene.search.Sort;import org.apache.lucene.search.SortField;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.WildcardQuery;/** * This class demonstrates the usage of Lucene's Indexing and Searching APIs. * The sample code comes with a set of input property files that represent * email messages of a user. This class has various methods that read the input * property files, indexes various fields and provides search on the  * indexed data. * * @author  Amol Sonawane */public class LuceneDemo {//a path to directory where Lucene will store index filesprivate static String indexDirectory = "C:\\Users\\ZEQ\\Desktop\\os-apache-lucenesearch-SampleApplication\\Lucene\\indexdir";// a path to directory which contains data files that need to be indexedprivate static String dataDirectory = "C:\\Users\\ZEQ\\Desktop\\os-apache-lucenesearch-SampleApplication\\Lucene\\datadir";  private Searcher indexSearcher;/** * @param args * @throws IOException  * @throws FileNotFoundException  */public static void main(String[] args) throws FileNotFoundException, IOException { LuceneDemo luceneDemo = new LuceneDemo();//create Lucene indexluceneDemo.createLuceneIndex();// create IndexSearcherluceneDemo.createIndexSearcher(); luceneDemo.termQueryExample(); luceneDemo.rangeQueryExample(); luceneDemo.prefixQueryExample(); luceneDemo.booleanQueryExample(); luceneDemo.phraseQueryExample(); luceneDemo.wildCardQueryExample(); luceneDemo.fuzzyQueryExample(); luceneDemo.queryParserExample(); luceneDemo.fieldBoostFactorExample(); luceneDemo.sortBySenderExample(); luceneDemo.filterExample(); luceneDemo.deletDocumentFromIndex();}private void createLuceneIndex(){Indexer indexer = new Indexer(indexDirectory,dataDirectory);//Create IndexWriterindexer.createIndexWriter();try {//Index dataindexer.indexData();} catch (FileNotFoundException e) {throw new RuntimeException(e);} catch (IOException e) {throw new RuntimeException(e);}}private void createIndexSearcher() throws CorruptIndexException, IOException{/* Create instance of IndexSearcher  */indexSearcher = new IndexSearcher(indexDirectory);}private void showSearchResults(Query query ){try{/* First parameter is the query to be executed and    second parameter indicates the no of search results to fetch */TopDocs topDocs = indexSearcher.search(query,20);System.out.println("Total hits "+topDocs.totalHits);// Get an array of references to matched documentsScoreDoc[] scoreDosArray = topDocs.scoreDocs;for(ScoreDoc scoredoc: scoreDosArray){//Retrieve the matched document and show relevant detailsDocument doc = indexSearcher.doc(scoredoc.doc);System.out.println("\nSender: "+doc.getField("sender").stringValue());System.out.println("Subject: "+doc.getField("subject").stringValue());System.out.println("Email file location: "+doc.getField("emailDoc").stringValue());}System.out.println("---------------------------------------------");}catch(IOException e){e.printStackTrace();}}/* * Searches mails that contain the word "java" in subject field. */private void termQueryExample(){//定期查询System.out.println("TermQuery example: Search mails having the word \"java\"" +" in the subject field");Term term = new Term("subject","java");Query query = new TermQuery(term);        showSearchResults(query);}/** * Searches mails received between 01/06/2009 to 6/06/2009 both inclusive */private void rangeQueryExample(){//范围查询System.out.println("RangeQuery example: Search mails from 01/06/2009 " +"to 6/06/2009 both inclusive");Term begin = new Term("date","20090601");    Term end = new Term("date","20090606");    Query query = new RangeQuery(begin, end, true);    showSearchResults(query);}/** * Searches mails having sender field prefixed by the word "job" */private void prefixQueryExample(){//前缀查询System.out.println("PrefixQuery example: Search mails having sender field prefixed by the word 'job'");PrefixQuery query = new PrefixQuery(new Term("sender","job"));    showSearchResults(query);}/** * Searches mails that contain both "java" and "bangalore" in the subject field    */private void booleanQueryExample(){//布尔查询System.out.println("BooleanQuery: Search mails that have both 'java' " +"and 'bangalore' in the subject field ");Query query1 = new TermQuery(new Term("subject","java"));Query query2 = new TermQuery(new Term("subject","bangalore"));BooleanQuery query = new BooleanQuery();query.add(query1,BooleanClause.Occur.MUST);query.add(query2,BooleanClause.Occur.MUST);showSearchResults(query);}/* * Searches mails that contain a give phrase in the subject field. */private void phraseQueryExample(){//短语查询System.out.println("PhraseQuery example: Search mails that have phrase " +"'job opening j2ee' in the subject field.");PhraseQuery query = new PhraseQuery();query.setSlop(1);//Add terms of the phrases.query.add(new Term("subject","job"));query.add(new Term("subject","opening"));query.add(new Term("subject","j2ee"));showSearchResults(query);}/** * Searches mails that have word 'architect' in subject field. */private void wildCardQueryExample(){//通配符查询System.out.println("WildcardQuery: Search for 'arch*' to find emails that " +"have word 'architect' in subject field.");Query query = new WildcardQuery(new Term("subject","arch*"));showSearchResults(query);}/** * Searches for emails that have word similar to 'admnistrtor' in the  * subject field. Note that we have misspelled the word and looking for * a word that is a close match to this. */private void fuzzyQueryExample(){//模糊查询System.out.println("xxxxxxxxxxxxxxxxxxFuzzyQuery: Search for emails that have word similar "    +"to 'admnistrtor' in the subject field. Note we have misspelled administrator here.");Query query = new FuzzyQuery(new Term("subject", "admnistrtor")); showSearchResults(query);}/** * Shows how to use QueryParser */private void queryParserExample(){////////////////////////查询分析器:可以对用户输入的关键字进行拆分,搜索//first argument is the default field for query termsSystem.out.println("oooooooooooooooooooQueryParser: Searches for mails that have given user" +" entered query expression in the subject field.");QueryParser queryParser = new QueryParser("subject",new StandardAnalyzer());try {/* Searches for emails that contain the words 'job openings' *  and '.net' and 'pune' *///Query query = queryParser.parse("job openings AND .net AND pune");Query query = queryParser.parse("job相关书籍");showSearchResults(query);} catch (ParseException e) {e.printStackTrace();}}private void queryResultsSortingExample(){//查询结果排序Query query = new TermQuery(new Term("java"));//Filter filter = new TermsFilter();//TopDocs topDocs = indexSearcher.search(query, n)}/** * Delete all the mails from the index that were received in May 2009. */private void deletDocumentFromIndex(){//从索引文件删掉try {//Check how many emails received in May 2009Query query = new WildcardQuery(new Term("month","05"));System.out.println("---------------------------------------------");System.out.println("\nSearching for mails that were received in May");showSearchResults(query);IndexReader indexReader = IndexReader.open(indexDirectory);indexReader.deleteDocuments(new Term("month","05"));//close associate index files and save deletions to diskindexReader.close();createIndexSearcher();System.out.println("After deleting mails received in May, " +"searching for mails that were received in May");showSearchResults(query);} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}/* * Shows the result of boosting fields. */void fieldBoostFactorExample(){/*Searches mails that have word 'job' in their subject field, giving  importance to mails having word 'pune' See Indexer.java line 102 */System.out.println("Boosting fields and documents: Searches mails that" +" have 'job' in their subject field, giving more importance to " +" mails having 'pune' in their subject field");WildcardQuery query = new WildcardQuery(new Term("subject","job*"));showSearchResults(query);}/** * Shows how to sort the results */void sortBySenderExample(){//按发件人排序/* Search mails having the word 'job' in subject and return results   sorted by sender's email in descending order. */SortField sortField = new SortField("sender",true);Sort sortBySender = new Sort(sortField);WildcardQuery query = new WildcardQuery(new Term("subject","job*"));try {System.out.println("Sorting results: Search mails having the word 'job' in subject");System.out.println("--- Showing results sorted by relevance");TopDocs topDocs = indexSearcher.search(query,20);printResults(topDocs);//Pass the sort criteria to searchSystem.out.println("--- Sorting by sender names in descending order");topDocs = indexSearcher.search(query,null,20,sortBySender);printResults(topDocs);System.out.println("--- Sorting by the document index order");topDocs = indexSearcher.search(query,null,20,Sort.INDEXORDER);printResults(topDocs);} catch (IOException e) {e.printStackTrace();}}private void printResults(TopDocs topDocs)throws CorruptIndexException, IOException {for(ScoreDoc scoredoc: topDocs.scoreDocs){//Retrieve the matched document and show relevant detailsDocument doc = indexSearcher.doc(scoredoc.doc);System.out.println("Sender: "+doc.getField("sender").stringValue());}}/* * Searches for mails that have 'job' in the subject field, applies a filter * to exclude mails that have sender address prefixed by word 'job'  */void filterExample(){//过滤Term prefix = new Term("sender","jobs");Filter prefixFilter = new PrefixFilter(prefix);WildcardQuery query = new WildcardQuery(new Term("subject","job*"));try {System.out.println("Search for mails that have 'job' in the subject" +" field, apply a filter to exclude mails that have sender" +" email prefixed by 'job'");System.out.println("---------------------------------------------");System.out.println("--- Before applying prefix filter");TopDocs topDocs = indexSearcher.search(query,20);printResults(topDocs);System.out.println("--- After applying prefix filter");topDocs = indexSearcher.search(query,prefixFilter,20);printResults(topDocs);} catch (IOException e) {e.printStackTrace();}}}


 

运行结果:

 

TermQuery example: Search mails having the word "java" in the subject fieldTotal hits 2Sender: jobs-alert@monster.comSubject: Job openings for Java Professionals at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc2.propertiesSender: smitha@careernet.comSubject: Job openings for Java Developer at Pune ?à???é??Email file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc1.properties---------------------------------------------RangeQuery example: Search mails from 01/06/2009 to 6/06/2009 both inclusiveTotal hits 2Sender: dailyjobs@jobsahead.comSubject: Job openings for .Net Prefesionals at PuneEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc5.propertiesSender: smitha@career.comSubject: Job openings for J2EE Technical Lead at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc4.properties---------------------------------------------PrefixQuery example: Search mails having sender field prefixed by the word 'job'Total hits 2Sender: jobs-alert@monster.comSubject: Job openings for Java Professionals at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc2.propertiesSender: jobs@naukri.comSubject: Job openings for Database Administrators at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc3.properties---------------------------------------------BooleanQuery: Search mails that have both 'java' and 'bangalore' in the subject field Total hits 1Sender: jobs-alert@monster.comSubject: Job openings for Java Professionals at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc2.properties---------------------------------------------PhraseQuery example: Search mails that have phrase 'job opening j2ee' in the subject field.Total hits 1Sender: nilesh@bestjobs.comSubject: Job opening for J2EE architect at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc6.properties---------------------------------------------WildcardQuery: Search for 'arch*' to find emails that have word 'architect' in subject field.Total hits 1Sender: nilesh@bestjobs.comSubject: Job opening for J2EE architect at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc6.properties---------------------------------------------xxxxxxxxxxxxxxxxxxFuzzyQuery: Search for emails that have word similar to 'admnistrtor' in the subject field. Note we have misspelled administrator here.Total hits 1Sender: jobs@naukri.comSubject: Job openings for Database Administrators at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc3.properties---------------------------------------------oooooooooooooooooooQueryParser: Searches for mails that have given user entered query expression in the subject field.Total hits 0---------------------------------------------Boosting fields and documents: Searches mails that have 'job' in their subject field, giving more importance to  mails having 'pune' in their subject fieldTotal hits 6Sender: dailyjobs@jobsahead.comSubject: Job openings for .Net Prefesionals at PuneEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc5.propertiesSender: jobs-alert@monster.comSubject: Job openings for Java Professionals at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc2.propertiesSender: jobs@naukri.comSubject: Job openings for Database Administrators at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc3.propertiesSender: nilesh@bestjobs.comSubject: Job opening for J2EE architect at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc6.propertiesSender: smitha@careernet.comSubject: Job openings for Java Developer at Pune ?à???é??Email file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc1.propertiesSender: smitha@career.comSubject: Job openings for J2EE Technical Lead at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc4.properties---------------------------------------------Sorting results: Search mails having the word 'job' in subject--- Showing results sorted by relevanceSender: dailyjobs@jobsahead.comSender: jobs-alert@monster.comSender: jobs@naukri.comSender: nilesh@bestjobs.comSender: smitha@careernet.comSender: smitha@career.com--- Sorting by sender names in descending orderSender: smitha@careernet.comSender: smitha@career.comSender: nilesh@bestjobs.comSender: jobs@naukri.comSender: jobs-alert@monster.comSender: dailyjobs@jobsahead.com--- Sorting by the document index orderSender: smitha@careernet.comSender: jobs-alert@monster.comSender: jobs@naukri.comSender: smitha@career.comSender: dailyjobs@jobsahead.comSender: nilesh@bestjobs.comSearch for mails that have 'job' in the subject field, apply a filter to exclude mails that have sender email prefixed by 'job'------------------------------------------------ Before applying prefix filterSender: dailyjobs@jobsahead.comSender: jobs-alert@monster.comSender: jobs@naukri.comSender: nilesh@bestjobs.comSender: smitha@careernet.comSender: smitha@career.com--- After applying prefix filterSender: jobs-alert@monster.comSender: jobs@naukri.com---------------------------------------------Searching for mails that were received in MayTotal hits 3Sender: jobs-alert@monster.comSubject: Job openings for Java Professionals at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc2.propertiesSender: jobs@naukri.comSubject: Job openings for Database Administrators at BangaloreEmail file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc3.propertiesSender: smitha@careernet.comSubject: Job openings for Java Developer at Pune ?à???é??Email file location: C:\Users\ZEQ\Desktop\os-apache-lucenesearch-SampleApplication\Lucene\datadir\doc1.properties---------------------------------------------After deleting mails received in May, searching for mails that were received in MayTotal hits 0---------------------------------------------


 

资源文件:

 

 

doc1.properties

 sender = smitha@careernet.com
receiver = amol@gmail.com
date=20090501
month=05
subject=Job openings for Java Developer at Pune
message= Position: Software Developer, Experience: 6 years, Skills: Java, J2EE, Spring, Hibernate Companies: Sybase, Avaya at Pune

doc2.properties

 

sender=jobs-alert@monster.com
receiver=amol@gmail.com
date=20090510
month=05
subject=Job openings for Java Professionals at Bangalore
message= Position: Senior Software Engineer, Experience: 8+ years, Skills: Java, J2EE, EJB 3.0, JSP, Servlets, Oracle, Websphere Company: AOL, HP, IBM Location: Bangalore

doc3.properties:

 

    sender=jobs@naukri.com
receiver=amol@gmail.com
date=20090520
month=05
subject=Job openings for Database Administrators at Bangalore
message= Position: Database Administrator, Experience: 8+ years, Skills: Oracle 10g, DB2 9.0 Company: Wipro Technologies Pvt Ltd Bangalore

doc4.properties

sender=smitha@career.com
receiver=amol@gmail.com
date=20090601
month=06
subject=Job openings for J2EE Technical Lead at Bangalore
message= Position: J2EE Technical Lead, Experience: 12+ years, Skills: Design and modelling experince in BI domain using Java, J2EE, EJB Company: Intute Tech Pvt Ltd Bangalore

doc5.properties:

 

sender=dailyjobs@jobsahead.com
receiver=amol@gmail.com
date=20090606
month=06
subject=Job openings for .Net Prefesionals at Pune
message= Position: Senior Software Engineer, Experience: 6+ years, Skills: C++, C#, ASP.Net Company: Geometric software pvt Ltd, Pune

 

doc6.properties

 

   sender=nilesh@bestjobs.com
receiver=amol@gmail.com
date=20090611
month=06
subject=Job opening for J2EE architect at Bangalore
message= Position: J2EE architect, Experience: 12+ years, Skills: Design and modelling experince in BI domain using Java, J2EE, EJB Company: TCS Bangalore

 

原创粉丝点击