操作lucene索引的工具类

来源：互联网发布：linux c开发工程师编辑：程序博客网时间：2024/06/05 20:14

话不多话都在注释上

public class OperatorIndex {
public static final String INDEX_PATH = "D:/indexDir";

// 创建分词器
private Analyzer analyzer = null;
// 索引保存目录
private File indexFile = null;
// 目录对象
private Directory directory = null;
// 创建indexWriterConfig 只能实例化一次用一次
private IndexWriterConfig indexWriterConfig = null;
SimpleDateFormat simpleDateFormat;

private IndexSearcher indexSearcher;

public void init() throws IOException {
analyzer = new IKAnalyzer();
indexFile = new File(INDEX_PATH);
directory = FSDirectory.open(indexFile);
simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
System.out.println("##初始化完成##");

}

// 数据库中需要建立索引的比表封装成Document对象

public Document createDocument(Article article) {
Document document = new Document();
document.add(new TextField("id", article.getId() + "", Field.Store.YES));
document.add(new TextField("title", article.getTitle(), Field.Store.YES));
document.add(new TextField("content", article.getContent(),
Field.Store.YES));
return document;
}

// 获得日期
public String getDate() {
return simpleDateFormat.format(new Date());

}

// 查询所有索引
public void openIndexFile() throws IOException {
System.out.println("读取索引开始...");
IndexReader indexReader = IndexReader.open(directory);
// 获取索引个数
int maxDoc = indexReader.maxDoc();
System.out.println("maxDoc:" + maxDoc);
Article article = null;
for (int i = 0; i < maxDoc; i++) {
Document document = indexReader.document(i);
article = new Article();
if (document.get("id") == null) {
System.out.println("id 为空");

}

article.setId(Integer.parseInt(document.get("id")));
article.setTitle(document.get("title"));
article.setContent(document.get("content"));
System.out.println(article);

}
indexReader.close();
System.out.println("读取索引结束");

}

// 创建索引
public void createIndex(Article article) throws IOException {
indexWriterConfig = new IndexWriterConfig(Version.LATEST, analyzer);
IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
// 创建索引前删除以前的索引
// indexWriter.deleteAll();
Document createDocument = createDocument(article);
indexWriter.addDocument(createDocument);
// indexWriter.commit();
indexWriter.close();

System.out.println("[" + getDate() + "]" + "lucene写入索引到" + "["
+ indexFile.getAbsolutePath() + "]" + "成功");
}

// 批量创建索引
public void createIndexes(List<Article> articles) throws IOException {
// indexWriterConfig = new IndexWriterConfig(Version.LATEST, analyzer);
// IndexWriter indexWriter = new IndexWriter(directory,
// indexWriterConfig);
// indexWriter.deleteAll();

for (Article article : articles) {
createIndex(article);

}

}

// 删除索引
public void deleteIndex(int id) throws IOException {
if (indexFile.exists()) {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(
Version.LATEST, analyzer);
IndexWriter indexWriter = new IndexWriter(directory,
indexWriterConfig);
indexWriter.deleteDocuments(new Term("id", String.valueOf(id)));
System.out.println("[" + getDate() + "]" + "lucene删除索引成功");
indexWriter.close();

} else {
System.out.println("删除索引失败");
}

}

// 批量删除索引
public void deleteIndexes(List<Article> articles) {
if (articles == null || articles.size() == 0) {
return;

}
for (Article article : articles) {
try {
deleteIndex(article.getId());
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("删除索引失败");
e.printStackTrace();
}

}

}

// 更新索引先删除索引在添加索引
public void updateIndex(Article article) throws IOException {
deleteIndex(article.getId());
createIndex(article);

}

//查询索引
public void searchIndex(String keyword) {
IndexReader indexReader = null;
try {
indexReader = IndexReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);

QueryParser queryParser = new QueryParser(Version.LATEST, "title",
new IKAnalyzer(false));
Query query = queryParser.parse(keyword.trim());

TopDocs topDocs = indexSearcher.search(query, 100);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
if (scoreDocs == null || scoreDocs.length == 0) {
System.out.println("很遗憾！没有找到！");

}
for (ScoreDoc scoreDoc : scoreDocs) {
Document document = indexSearcher.doc(scoreDoc.doc);
System.out.println(document.toString());
System.out.println("[title:" + document.get("title")
+ ",content:" + document.get("content") + "]");

}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}
}

}

//分页查询
public void paginationQuery(String keyword, int pageSize, int currentPage) {
String[] fields = { "title", "content" };
IndexReader indexReader = null;
try {
MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(
Version.LATEST, fields, new IKAnalyzer(false));
Query query = multiFieldQueryParser.parse(keyword.trim());

indexReader = IndexReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);

// 搜索返回的结果并取前100的结果
TopDocs topDocs = indexSearcher.search(query, 100);
TopDocs allDocs = indexSearcher
.search(new MatchAllDocsQuery(), 100);
int totalHits = topDocs.totalHits;
System.out.println("总数：" + totalHits);
// 搜索返回的结果集合
ScoreDoc[] scoreDocs = topDocs.scoreDocs;

int begin = (currentPage - 1) * pageSize;
int end = Math.min(begin + pageSize, scoreDocs.length);

for (int i = begin; i < end; i++) {
Document document = indexSearcher.doc(scoreDocs[i].doc);
System.out.println("[title：" + document.get("title") + "]");

}
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}
}

}

//查询高亮显示
public void highlighterSearch() {
IndexReader indexReader = null;
try {
indexReader = IndexReader.open(directory);

IndexSearcher indexSearcher = new IndexSearcher(indexReader);

Term term = new Term("content", "中国");

TermQuery termQuery = new TermQuery(term);

TopDocs topDocs = indexSearcher
.search(termQuery, Integer.MAX_VALUE);
System.out.println("查询结果数：" + topDocs.totalHits);
System.out.println("最大的评分" + topDocs.getMaxScore());

ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
Document document = indexSearcher.doc(scoreDoc.doc);
System.out.println("检索条件:" + term.toString());
String content = document.get("content");

System.out.println("content:" + document.get("content"));

// 高亮展示
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
"【", "】");

Highlighter highlighter = new Highlighter(simpleHTMLFormatter,
new QueryScorer(termQuery));
highlighter.setTextFragmenter(new SimpleFragmenter(content
.length()));

if (!"".equals(content)) {
TokenStream tokenStream = new IKAnalyzer().tokenStream(
content, new StringReader(content));
String bestFragment = highlighter.getBestFragment(
tokenStream, content);
System.out.println("高亮显示：" + "检索结果如下所示:");
System.out.println(bestFragment);
// 结束关键字高亮
System.out.println("文件内容:" + content);
// 匹配相关度
System.out.println(scoreDoc.score);

}

}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}
}

}

// 释放资源
public void destory() throws IOException {
analyzer.close();
directory.close();
System.out.println("销毁成功");
}

public static void main(String[] args) throws IOException {
OperatorIndex index = new OperatorIndex();
index.init();
// Article article = new Article();
// article.setId(1);
// article.setTitle("hello");
// article.setContent("hello world!");
//
// index.createIndex(article);
// index.openIndexFile();

// index.deleteIndex(1);
// index.openIndexFile();
// article.setContent("hello");
// index.updateIndex(article);
// index.openIndexFile();

List<Article> articles = new ArrayList<Article>();
Article article = new Article(1, "中国", "11111gdfjs中国");
Article article1 = new Article(2, "我爱你中国", "11111gdfjs我爱你中国");
Article article2 = new Article(3, "国中之国", "fdsab;1gdfjs国中之国");
Article article3 = new Article(4, "44", "111gdsa11gdfjs中国将成为世界上最强大的国家");
Article article4 = new Article(5, "55", "111gdas11gdfjs");

articles.add(article);
articles.add(article1);
articles.add(article2);
articles.add(article3);
articles.add(article4);

index.deleteIndexes(articles);
index.createIndexes(articles);
index.openIndexFile();
index.searchIndex("中国");
index.paginationQuery("中国", 1, 1);
index.highlighterSearch();

// index.destory();

}
}

Article.java实体

public class Article {
private int id;
private String title;
private String content;

public int getId() {
return id;
}

public void setId(int id) {
this.id = id;
}

public String getTitle() {
return title;
}

public void setTitle(String title) {
this.title = title;
}

public String getContent() {
return content;
}

public void setContent(String content) {
this.content = content;
}

public Article(int id, String title, String content) {
super();
this.id = id;
this.title = title;
this.content = content;
}

public Article() {
// TODO Auto-generated constructor stub
}

@Override
public String toString() {
// TODO Auto-generated method stub
return "article[id:" + id + ",title:" + title + ",content:" + content
+ "]";
}

}

所有的lucene的jar包的链接http://yun.baidu.com/share/link?shareid=3712863572&uk=2787598461

0 0