java LuceneCrawlerSvcImpl

来源:互联网 发布:广电授权的网络机顶盒 编辑:程序博客网 时间:2024/04/19 22:22
package com.jeecms.jspgou.lucene;


import java.io.File;
import java.io.IOException;
import java.util.Date;


import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.baomro.shop.dao.CrawlerHistoryDao;
import com.baomro.shop.entity.CmsAcquisition;
import com.baomro.shop.entity.CrawlerHistory;
import com.jeecms.common.page.Pagination;


@Service
public class LuceneCrawlerSvcImpl implements LuceneCrawlerSvc {
public int index(Integer id,String path,String pathfile, Date start, Date end,Boolean create)
throws CorruptIndexException, LockObtainFailedException,
IOException {
Analyzer luceneAnalyzer = new IKAnalyzer();
IndexWriter indexWriter = null;
Directory indexDir = null ;
try { 
File docDir = new File(path);
if (!docDir.exists()) {  
docDir.mkdirs();  
}  
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,luceneAnalyzer);
indexDir=new SimpleFSDirectory(new File(path));
if (create) {  
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);  
             } else {  
            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);  
        }  
indexWriter = new IndexWriter(indexDir,config);
           int count = cDao.luceneWriteIndex(id,path,pathfile,indexWriter, start, end);
           return count;
}  finally {  
if(indexWriter!=null)
indexWriter.close();
}
}
public int indexSingle(String path,String pathfile,String contnethtml,CrawlerHistory doc,Boolean create)
throws CorruptIndexException, LockObtainFailedException,
IOException {
Analyzer luceneAnalyzer = new IKAnalyzer();
IndexWriter indexWriter = null;
Directory indexDir = null ;
try { 
File docDir = new File(path);
if (!docDir.exists()) {  
docDir.mkdirs();  
}  
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,luceneAnalyzer);
indexDir=new SimpleFSDirectory(new File(path));
if (create) {  
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);  
             } else {  
            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);  
        }  
indexWriter = new IndexWriter(indexDir,config);
           int count = cDao.luceneWriteIndexSingle(contnethtml,doc,pathfile,indexWriter);
           return count;
}  finally {  
if(indexWriter!=null)
indexWriter.close();
}


}
public int delindexSingle(String path,CmsAcquisition acqu)
throws CorruptIndexException, LockObtainFailedException,
IOException {
Analyzer luceneAnalyzer = new IKAnalyzer();
IndexWriter indexWriter = null;
Directory indexDir = null ;
try { 
File docDir = new File(path);
if (!docDir.exists()) {  
docDir.mkdirs();  
}  
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,luceneAnalyzer);
indexDir=new SimpleFSDirectory(new File(path));
indexWriter = new IndexWriter(indexDir,config);
           int count = cDao.luceneDeleteIndexSingle(acqu,indexWriter);
           return count;
}  finally {  
if(indexWriter!=null)
indexWriter.close();
}
}
public int delindexSingleForKeyWord(String path,String  key,String value)
throws CorruptIndexException, LockObtainFailedException,
IOException {
Analyzer luceneAnalyzer = new IKAnalyzer();
IndexWriter indexWriter = null;
Directory indexDir = null ;
try { 
File docDir = new File(path);
if (!docDir.exists()) {  
docDir.mkdirs();  
}  
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,luceneAnalyzer);
indexDir=new SimpleFSDirectory(new File(path));
indexWriter = new IndexWriter(indexDir,config);
           int count = cDao.luceneDeleteIndexSingleForKeyWord(key,value,indexWriter);
           return count;
}  finally {  
if(indexWriter!=null)
indexWriter.close();
}
}
public Pagination search(String path, String queryString,String shrink, String startTime, String endTime,int pageNo, int pageSize)
throws CorruptIndexException, IOException, ParseException {
try {  
            Directory dir = FSDirectory.open(new File(path));  
            IndexReader reader = IndexReader.open(dir);  
             IndexSearcher searcher = new IndexSearcher(reader);  
             TopScoreDocCollector res = TopScoreDocCollector.create((pageNo - 1) * pageSize+ pageSize, false);  
             Analyzer analyzer = new IKAnalyzer(true); 
             searcher.search(LuceneCrawler.createQuery(queryString,shrink,startTime,endTime,analyzer), res);  
  return LuceneCrawler.getResult(reader, res, pageNo,
pageSize);
         } catch (Exception e) {  
            e.printStackTrace();  
         }  
return null;
}
public Pagination searchSort(String path, String queryString,String shrink, String startTime, String endTime,int pageNo, int pageSize)
throws CorruptIndexException, IOException, ParseException {
try {  
            Directory dir = FSDirectory.open(new File(path));  
            IndexReader reader = IndexReader.open(dir);  
             IndexSearcher searcher = new IndexSearcher(reader);  
             TopScoreDocCollector res = TopScoreDocCollector.create((pageNo - 1) * pageSize+ pageSize, false);  
             Analyzer analyzer = new IKAnalyzer(true); 
             Sort a = new Sort(new SortField[]{new SortField(LuceneCrawler.INFODATE, SortField.DOUBLE, true)});
             TopDocs topDocs= searcher.search(LuceneCrawler.createQuery(queryString,shrink,startTime,endTime,analyzer), null,(pageNo - 1) * pageSize+ pageSize, a);
  return LuceneCrawler.getResultSort(topDocs,searcher, res, pageNo,
pageSize);
         } catch (Exception e) {  
            e.printStackTrace();  
         }  
return null;
}


public int luceneWriteIndexSingle(LuceneCrawler doc,IndexWriter writer) throws CorruptIndexException, IOException {
int count = 1;
writer.addDocument(LuceneCrawler.createDocument(doc));
return count;
}
public int luceneDeleteIndexSingle(LuceneCrawler doc,IndexWriter writer) throws CorruptIndexException, IOException {
int count = 1;
writer.deleteDocuments(new Term(LuceneCrawler.SID, LuceneCrawler.SID));
return count;
}
private CrawlerHistoryDao cDao;
@Autowired
public void setcDao(CrawlerHistoryDao cDao) {
this.cDao = cDao;
}




}
0 0
原创粉丝点击