Lucene入门
来源:互联网 发布:程序员经常去的论坛 编辑:程序博客网 时间:2024/05/17 20:28
public class LuceneTest {//创建索引 @Test public void test1() throws IOException { //创建文档对象 Document document = new Document(); document.add(new StringField("id","1", Field.Store.YES)); document.add(new TextField("content","谷歌地图之父跳槽FaceBook", Field.Store.YES)); Document document1 = new Document(); document1.add(new StringField("id","2", Field.Store.YES)); document1.add(new TextField("content","谷歌地图字符加盟FaceBook", Field.Store.YES)); FSDirectory open = FSDirectory.open(new File("D:\\indexDir")); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter writer = new IndexWriter(open, config); writer.addDocument(document); writer.addDocument(document1); writer.commit(); writer.close(); } @Test public void test2() throws IOException { Document document = new Document(); document.add(new StringField("id","1", Field.Store.YES)); document.add(new TextField("content","谷歌地图之父加盟FaceBook", Field.Store.YES)); FSDirectory open = FSDirectory.open(new File("D:\\indexDir")); Analyzer analyzer = new IKAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter writer = new IndexWriter(open, config); writer.addDocument(document); writer.commit(); writer.close(); } //创建索引 @Test public void test3() throws IOException { List<Document> list = new ArrayList<>(); FieldType type = new FieldType(); type.setIndexed(true);//是否索引 type.setStored(true);//是否存储 type.setTokenized(true);//是否分词 Document d1 = new Document(); d1.add(new Field("id","1",type)); d1.add(new TextField("content","传智播客之父跳槽FaceBook,这是真的吗?", Field.Store.YES)); Document d2 = new Document(); d2.add(new Field("id","2", type)); d2.add(new TextField("content","谷歌地图之父加盟FaceBook", Field.Store.YES)); Document d3 = new Document(); d3.add(new Field("id","3",type)); d3.add(new TextField("content","谷歌地图创始人拉斯离开谷歌加盟Facebook", Field.Store.YES)); Document d4 = new Document(); d4.add(new Field("id","4",type)); d4.add(new TextField("content","谷歌地图之父跳槽Facebook与Wave项目取消有关", Field.Store.YES)); Document d5 = new Document(); d5.add(new Field("id","5",type)); TextField field = new TextField("content", "谷歌地图之父拉斯加盟社交网站Facebook", Field.Store.YES); field.setBoost(10); d5.add(field); list.add(d1); list.add(d2); list.add(d3); list.add(d4); list.add(d5); FSDirectory open = FSDirectory.open(new File("E:\\indexDir")); Analyzer analyzer = new IKAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(open, config); writer.addDocuments(list); writer.commit(); writer.close(); } //查询索引 @Test public void test4() throws ParseException, IOException { QueryParser parser = new QueryParser("content", new IKAnalyzer()); Query query = parser.parse("谷歌"); IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("E:\\indexDir")))); TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE); System.out.println(topDocs.totalHits); ScoreDoc[] docs = topDocs.scoreDocs; for (ScoreDoc sd : docs) { System.out.println(sd.doc); Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")); System.out.println(doc.get("content")); } } @Test public void test5() throws ParseException, IOException { QueryParser content = new QueryParser("content", new IKAnalyzer()); Query query = content.parse("跳槽"); IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("e:\\indexDir")))); TopDocs docs = searcher.search(query, Integer.MAX_VALUE); System.out.println(docs.totalHits); ScoreDoc[] scoreDocs = docs.scoreDocs; for (ScoreDoc scoreDoc: scoreDocs) { System.out.println(scoreDoc.doc); Document doc = searcher.doc(scoreDoc.doc); System.out.println(doc.get("id")+"-----------------"+doc.get("content")); } } //词条查询 @Test public void test6() throws IOException { TermQuery query = new TermQuery(new Term("content", "谷歌地图之父加盟FaceBook")); IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("E:\\indexDir")))); TopDocs docs = searcher.search(query, Integer.MAX_VALUE); System.out.println(docs.totalHits); ScoreDoc[] scoreDocs = docs.scoreDocs; for (ScoreDoc sd: scoreDocs) { System.out.println(sd.doc); System.out.println(sd.score); Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")); System.out.println(doc.get("content")); } } //模糊查询 @Test public void test7() throws IOException { WildcardQuery query = new WildcardQuery(new Term("content","*"+"吗"+"*")); IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("E:\\indexDir")))); TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE); System.out.println("命中数目:"+topDocs.totalHits); ScoreDoc[] docs = topDocs.scoreDocs; for (ScoreDoc scoreDoc:docs) { System.out.println("内置id:"+scoreDoc.doc); System.out.println("得分:"+scoreDoc.score); Document doc = searcher.doc(scoreDoc.doc); System.out.println(doc.get("id")); System.out.println(doc.get("content")); } } @Test public void test8() throws IOException { List<Document> list = new ArrayList<>(); Document document = new Document(); document.add(new StringField("id","1", Field.Store.YES)); list.add(document); FSDirectory open = FSDirectory.open(new File("E:\\indexDir")); IKAnalyzer analyzer = new IKAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter writer = new IndexWriter(open, config); writer.addDocuments(list); writer.commit(); writer.close(); } //相似度查询 @Test public void test9() throws IOException { FuzzyQuery query = new FuzzyQuery(new Term("content","胡歌"),2); IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("e:\\indexDir")))); TopDocs search = searcher.search(query, Integer.MAX_VALUE); System.out.println(search.totalHits); ScoreDoc[] scoreDocs = search.scoreDocs; for (ScoreDoc doc: scoreDocs) { System.out.println(doc.doc); System.out.println(doc.score); Document document = searcher.doc(doc.doc); System.out.println(document.get("id")); System.out.println(document.get("content")); } } //查询索引库所有的内容 @Test public void test10() throws IOException { MatchAllDocsQuery query = new MatchAllDocsQuery(); IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("e://indexDir")))); TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE); ScoreDoc[] docs = topDocs.scoreDocs; for (ScoreDoc sd : docs) { System.out.println(sd.doc); System.out.println(sd.score); Document doc = searcher.doc(sd.doc); System.out.println(doc.get("id")); System.out.println(doc.get("content")); } } @Test public void test11() throws IOException { FieldType fieldType = new FieldType(); fieldType.setStored(true); fieldType.setTokenized(true); fieldType.setIndexed(true); Document document = new Document(); document.add(new Field("id","1",fieldType)); document.add(new TextField("content","我就是祁双伟", Field.Store.YES)); FSDirectory open = FSDirectory.open(new File("e:\\indexDir")); IKAnalyzer analyzer = new IKAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter writer = new IndexWriter(open, config); writer.updateDocument(new Term("id","4"),document); writer.commit(); writer.close(); } //删除索引 @Test public void test12() throws IOException { FSDirectory open = FSDirectory.open(new File("e:\\indexDir")); IKAnalyzer analyzer = new IKAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter writer = new IndexWriter(open, config); writer.deleteAll(); writer.commit(); writer.close(); } //高亮显示 @Test public void test13() throws ParseException, IOException, InvalidTokenOffsetsException { QueryParser parser = new QueryParser("content",new IKAnalyzer()); Query query = parser.parse("谷歌");// MatchAllDocsQuery query = new MatchAllDocsQuery(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); Sort sort = new Sort(new SortField("id", SortField.Type.LONG, false)); IndexSearcher searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File("E:\\indexDir")))); TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE,sort); ScoreDoc[] docs = topDocs.scoreDocs; for (ScoreDoc sd : docs) { System.out.println(sd.doc); Document document = searcher.doc(sd.doc);// System.out.println(document.get("id")); String s = highlighter.getBestFragment(new IKAnalyzer(), "content", document.get("content")); System.out.println(s+"得分是:"+sd.score); } }}
0 0
- lucene 入门
- lucene入门
- lucene入门
- Lucene入门
- LUCENE入门
- Lucene入门
- Lucene入门
- Lucene入门
- lucene入门
- Lucene 入门
- Lucene入门
- Lucene入门
- Lucene入门
- Lucene入门
- lucene入门
- Lucene入门
- Lucene入门
- Lucene入门
- android设备的编译规则
- DAY_02 HTML小白学习笔记
- android产品信息设置
- Solr部署tomcat服务器方式
- Codeforces 797E Array Queries 分块思想
- Lucene入门
- 《精彩绝伦的CSS》——布局(二)居中块状框
- 1028. List Sorting (25)-PAT甲级
- 用python生成与调用cntk模型代码演示
- Codeforces Round #409 (rated, Div. 2, based on VK Cup 2017 Round 2) A -- D
- [leetCode刷题笔记]2017.04.16
- 我的编程之旅——开篇记
- 在北京大学,得罪方正者必成“问题人”
- 网站在浏览器头部的小图标如何弄