操作lucene索引的工具类

来源:互联网 发布:linux c开发工程师 编辑:程序博客网 时间:2024/06/05 20:14

话不多话都在注释上


public class OperatorIndex {
public static final String INDEX_PATH = "D:/indexDir";


// 创建分词器
private Analyzer analyzer = null;
// 索引保存目录
private File indexFile = null;
// 目录对象
private Directory directory = null;
// 创建indexWriterConfig 只能实例化一次用一次
private IndexWriterConfig indexWriterConfig = null;
SimpleDateFormat simpleDateFormat;


private IndexSearcher indexSearcher;


public void init() throws IOException {
analyzer = new IKAnalyzer();
indexFile = new File(INDEX_PATH);
directory = FSDirectory.open(indexFile);
simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
System.out.println("##初始化完成##");


}


// 数据库中需要建立索引的比表封装成Document对象


public Document createDocument(Article article) {
Document document = new Document();
document.add(new TextField("id", article.getId() + "", Field.Store.YES));
document.add(new TextField("title", article.getTitle(), Field.Store.YES));
document.add(new TextField("content", article.getContent(),
Field.Store.YES));
return document;
}


// 获得日期
public String getDate() {
return simpleDateFormat.format(new Date());


}


// 查询所有索引
public void openIndexFile() throws IOException {
System.out.println("读取索引开始...");
IndexReader indexReader = IndexReader.open(directory);
// 获取索引个数
int maxDoc = indexReader.maxDoc();
System.out.println("maxDoc:" + maxDoc);
Article article = null;
for (int i = 0; i < maxDoc; i++) {
Document document = indexReader.document(i);
article = new Article();
if (document.get("id") == null) {
System.out.println("id 为空");


}


article.setId(Integer.parseInt(document.get("id")));
article.setTitle(document.get("title"));
article.setContent(document.get("content"));
System.out.println(article);


}
indexReader.close();
System.out.println("读取索引结束");


}


// 创建索引
public void createIndex(Article article) throws IOException {
indexWriterConfig = new IndexWriterConfig(Version.LATEST, analyzer);
IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
// 创建索引前删除以前的索引
// indexWriter.deleteAll();
Document createDocument = createDocument(article);
indexWriter.addDocument(createDocument);
// indexWriter.commit();
indexWriter.close();


System.out.println("[" + getDate() + "]" + "lucene写入索引到" + "["
+ indexFile.getAbsolutePath() + "]" + "成功");
}


// 批量创建索引
public void createIndexes(List<Article> articles) throws IOException {
// indexWriterConfig = new IndexWriterConfig(Version.LATEST, analyzer);
// IndexWriter indexWriter = new IndexWriter(directory,
// indexWriterConfig);
// indexWriter.deleteAll();


for (Article article : articles) {
createIndex(article);


}


}


// 删除索引
public void deleteIndex(int id) throws IOException {
if (indexFile.exists()) {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(
Version.LATEST, analyzer);
IndexWriter indexWriter = new IndexWriter(directory,
indexWriterConfig);
indexWriter.deleteDocuments(new Term("id", String.valueOf(id)));
System.out.println("[" + getDate() + "]" + "lucene删除索引成功");
indexWriter.close();


} else {
System.out.println("删除索引失败");
}


}


// 批量删除索引
public void deleteIndexes(List<Article> articles) {
if (articles == null || articles.size() == 0) {
return;


}
for (Article article : articles) {
try {
deleteIndex(article.getId());
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("删除索引失败");
e.printStackTrace();
}


}


}


// 更新索引 先删除索引在添加索引
public void updateIndex(Article article) throws IOException {
deleteIndex(article.getId());
createIndex(article);


}

//查询索引
public void searchIndex(String keyword) {
IndexReader indexReader = null;
try {
indexReader = IndexReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);


QueryParser queryParser = new QueryParser(Version.LATEST, "title",
new IKAnalyzer(false));
Query query = queryParser.parse(keyword.trim());


TopDocs topDocs = indexSearcher.search(query, 100);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
if (scoreDocs == null || scoreDocs.length == 0) {
System.out.println("很遗憾!没有找到!");


}
for (ScoreDoc scoreDoc : scoreDocs) {
Document document = indexSearcher.doc(scoreDoc.doc);
System.out.println(document.toString());
System.out.println("[title:" + document.get("title")
+ ",content:" + document.get("content") + "]");


}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}


}
}


}

//分页查询
public void paginationQuery(String keyword, int pageSize, int currentPage) {
String[] fields = { "title", "content" };
IndexReader indexReader = null;
try {
MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(
Version.LATEST, fields, new IKAnalyzer(false));
Query query = multiFieldQueryParser.parse(keyword.trim());


indexReader = IndexReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);


// 搜索返回的结果并取前100的结果
TopDocs topDocs = indexSearcher.search(query, 100);
TopDocs allDocs = indexSearcher
.search(new MatchAllDocsQuery(), 100);
int totalHits = topDocs.totalHits;
System.out.println("总数:" + totalHits);
// 搜索返回的结果集合
ScoreDoc[] scoreDocs = topDocs.scoreDocs;


int begin = (currentPage - 1) * pageSize;
int end = Math.min(begin + pageSize, scoreDocs.length);


for (int i = begin; i < end; i++) {
Document document = indexSearcher.doc(scoreDocs[i].doc);
System.out.println("[title:" + document.get("title") + "]");


}
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}


}
}


}

//查询高亮显示
public void highlighterSearch() {
IndexReader indexReader = null;
try {
indexReader = IndexReader.open(directory);


IndexSearcher indexSearcher = new IndexSearcher(indexReader);


Term term = new Term("content", "中国");


TermQuery termQuery = new TermQuery(term);


TopDocs topDocs = indexSearcher
.search(termQuery, Integer.MAX_VALUE);
System.out.println("查询结果数:" + topDocs.totalHits);
System.out.println("最大的评分" + topDocs.getMaxScore());


ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
Document document = indexSearcher.doc(scoreDoc.doc);
System.out.println("检索条件:" + term.toString());
String content = document.get("content");


System.out.println("content:" + document.get("content"));


// 高亮展示
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
"【", "】");


Highlighter highlighter = new Highlighter(simpleHTMLFormatter,
new QueryScorer(termQuery));
highlighter.setTextFragmenter(new SimpleFragmenter(content
.length()));


if (!"".equals(content)) {
TokenStream tokenStream = new IKAnalyzer().tokenStream(
content, new StringReader(content));
String bestFragment = highlighter.getBestFragment(
tokenStream, content);
System.out.println("高亮显示:" + "检索结果如下所示:");
System.out.println(bestFragment);
// 结束关键字高亮
System.out.println("文件内容:" + content);
// 匹配相关度
System.out.println(scoreDoc.score);


}


}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}


}
}


}


// 释放资源
public void destory() throws IOException {
analyzer.close();
directory.close();
System.out.println("销毁成功");
}


public static void main(String[] args) throws IOException {
OperatorIndex index = new OperatorIndex();
index.init();
// Article article = new Article();
// article.setId(1);
// article.setTitle("hello");
// article.setContent("hello world!");
//
// index.createIndex(article);
// index.openIndexFile();


// index.deleteIndex(1);
// index.openIndexFile();
// article.setContent("hello");
// index.updateIndex(article);
// index.openIndexFile();


List<Article> articles = new ArrayList<Article>();
Article article = new Article(1, "中国", "11111gdfjs中国");
Article article1 = new Article(2, "我爱你中国", "11111gdfjs我爱你中国");
Article article2 = new Article(3, "国中之国", "fdsab;1gdfjs国中之国");
Article article3 = new Article(4, "44", "111gdsa11gdfjs中国将成为世界上最强大的国家");
Article article4 = new Article(5, "55", "111gdas11gdfjs");


articles.add(article);
articles.add(article1);
articles.add(article2);
articles.add(article3);
articles.add(article4);


index.deleteIndexes(articles);
index.createIndexes(articles);
index.openIndexFile();
index.searchIndex("中国");
index.paginationQuery("中国", 1, 1);
index.highlighterSearch();


// index.destory();


}
}



Article.java实体

public class Article {
private int id;
private String title;
private String content;


public int getId() {
return id;
}


public void setId(int id) {
this.id = id;
}


public String getTitle() {
return title;
}


public void setTitle(String title) {
this.title = title;
}


public String getContent() {
return content;
}


public void setContent(String content) {
this.content = content;
}


public Article(int id, String title, String content) {
super();
this.id = id;
this.title = title;
this.content = content;
}


public Article() {
// TODO Auto-generated constructor stub
}


@Override
public String toString() {
// TODO Auto-generated method stub
return "article[id:" + id + ",title:" + title + ",content:" + content
+ "]";
}


}

所有的lucene的jar包的链接http://yun.baidu.com/share/link?shareid=3712863572&uk=2787598461


0 0
原创粉丝点击