lucene5--索引域选项

来源：互联网发布：淘宝互刷软件编辑：程序博客网时间：2024/04/18 05:09

先来看一个构造器：

  /**   * Create a field by specifying its name, value and how it will   * be saved in the index. Term vectors will not be stored in the index.   *    * @param name The name of the field   * @param value The string to process   * @param store Whether <code>value</code> should be stored in the index   * @param index Whether the field should be indexed, and if so, if it should   *  be tokenized before indexing    * @throws NullPointerException if name or value is <code>null</code>   * @throws IllegalArgumentException if the field is neither stored nor indexed    */  public Field(String name, String value, Store store, Index index) {    this(name, value, store, index, TermVector.NO);  }1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

我们向Document添加Field可以有更多的设置，那么都是什么意思呢？
name：字段名，很容易理解
value：字段值，也很容易理解
store和index怎么解释，下面就来看一下这两个选项的可选值：
Field.Store.YES或者NO(存储域选项)
设置为YES表示或把这个域中的内容完全存储到文件中，方便进行文本的还原
设置为NO表示把这个域的内容不存储到文件中，但是可以被索引，此时内容无法完全还原
Field.Index(索引选项)
Index.ANALYZED:进行分词和索引，适用于标题、内容等
Index.NOT_ANALYZED:进行索引，但是不进行分词，如果身份证号，姓名，ID等，适用于精确搜索
Index.ANALYZED_NOT_NORMS:进行分词但是不存储norms信息，这个norms中包括了创建索引的时间和权值等信息
Index.NOT_ANALYZED_NOT_NORMS:即不进行分词也不存储norms信息
Index.NO:不进行索引
写个例子看看，由于pom文件与之前的一样，就不贴出了，直接看例子代码：
3.5版本：

package com.darren.lucene35;import java.io.File;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class IndexUtil {    private static final String[] ids = { "1", "2", "3" };    private static final String[] authors = { "Darren", "Tony", "Grylls" };    private static final String[] titles = { "Hello World", "Hello Lucene", "Hello Java" };    private static final String[] contents = { "Hello World, I am on my way", "Today is my first day to study Lucene",            "I like Java" };    /**     * 建立索引     */    public static void index() {        IndexWriter indexWriter = null;        try {            // 1、创建Directory            Directory directory = FSDirectory.open(new File("F:/test/lucene/index"));            // 2、创建IndexWriter            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, analyzer);            indexWriter = new IndexWriter(directory, config);            int size = ids.length;            for (int i = 0; i < size; i++) {                // 3、创建Document对象                Document document = new Document();                // 看看四个参数的意思                /**                 * Create a field by specifying its name, value and how it will be saved in the index. Term vectors will                 * not be stored in the index.                 *                  * @param name                 *            The name of the field                 * @param value                 *            The string to process                 * @param store                 *            Whether <code>value</code> should be stored in the index                 * @param index                 *            Whether the field should be indexed, and if so, if it should be tokenized before indexing                 *                  *            public Field(String name, String value, Store store, Index index) { this(name, value,                 *            store, index, TermVector.NO); }                 */                // 4、为Document添加Field                // 对ID存储，但是不分词也不存储norms信息,这个norms中包括了创建索引的时间和权值等信息                document.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));                // 对Author存储，但是不分词也不存储norms信息,这个norms中包括了创建索引的时间和权值等信息                document.add(new Field("author", authors[i], Field.Store.YES, Field.Index.NOT_ANALYZED));                // 对Title存储，分词                document.add(new Field("title", titles[i], Field.Store.YES, Field.Index.ANALYZED));                // 对Content不存储，但是分词                /**                 * 注：添加内容或文件是默认是不存储的，这个查询时可以证明这个问题                 *                  * new Field(name, reader)                 *                  * 那么问题来了，如果想存文件内容怎么办呢？                 *                  * 那就把文件读出来，比如读出字符串，然后不就能按字符串的方式存储啦                 */                document.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));                // 5、通过IndexWriter添加文档到索引中                indexWriter.addDocument(document);            }        } catch (Exception e) {            e.printStackTrace();        } finally {            try {                if (indexWriter != null) {                    indexWriter.close();                }            } catch (Exception e) {                e.printStackTrace();            }        }    }    /**     * 搜索     */    public static void search() {        IndexReader indexReader = null;        try {            // 1、创建Directory            Directory directory = FSDirectory.open(new File("F:/test/lucene/index"));            // 2、创建IndexReader            indexReader = IndexReader.open(directory);            // 3、根据IndexReader创建IndexSearch            IndexSearcher indexSearcher = new IndexSearcher(indexReader);            // 4、创建搜索的Query            // 使用默认的标准分词器            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);            // 在content中搜索Lucene            // 创建parser来确定要搜索文件的内容，第二个参数为搜索的域            QueryParser queryParser = new QueryParser(Version.LUCENE_35, "content", analyzer);            // 创建Query表示搜索域为content包含Lucene的文档            Query query = queryParser.parse("Lucene");            // 5、根据searcher搜索并且返回TopDocs            TopDocs topDocs = indexSearcher.search(query, 10);            // 6、根据TopDocs获取ScoreDoc对象            ScoreDoc[] scoreDocs = topDocs.scoreDocs;            for (ScoreDoc scoreDoc : scoreDocs) {                // 7、根据searcher和ScoreDoc对象获取具体的Document对象                Document document = indexSearcher.doc(scoreDoc.doc);                // 8、根据Document对象获取需要的值                System.out.println("id : " + document.get("id"));                System.out.println("author : " + document.get("author"));                System.out.println("title : " + document.get("title"));                /**                 * 看看content能不能打印出来，为什么？                 */                System.out.println("content : " + document.get("content"));            }        } catch (Exception e) {            e.printStackTrace();        } finally {            try {                if (indexReader != null) {                    indexReader.close();                }            } catch (Exception e) {                e.printStackTrace();            }        }    }}1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

我在注释中留了问
题，现在我们是这样添加content字段的：

document.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));1
2
1
2

测试代码如下：

package com.darren.lucene35;import org.junit.Test;public class IndexUtilTest {    @Test    public void testIndex() {        IndexUtil.index();    }    @Test    public void testSearch() {        IndexUtil.search();    }}1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

现在跑一下测试看看效果，结果如下：

id : 2author : Tonytitle : Hello Lucenecontent : null1
2
3
4
1
2
3
4

为什么content为null，就是因为没有存，那么我们存一下看看

document.add(new Field("content", contents[i], Field.Store.YES, Field.Index.ANALYZED));1
1

再跑一下测试，注意，要先跑索引，再跑查询

id : 2  author : Tony  title : Hello Lucene  content : Today is my first day to study Lucene  1
2
3
4
1
2
3
4

现在content有值了
索引选项与此类同，不在赘述
4.5版本：
这里先要看看3.5版本的Store和Index到底设置了什么东四，其实在Field的构造器中是这样设置的：

this.isStored = store.isStored();  this.isIndexed = index.isIndexed();  this.isTokenized = index.isAnalyzed();  this.omitNorms = index.omitNorms();  1
2
3
4
5
1
2
3
4
5

是使用的这几个属性，那么这几个属性的值是什么呢：

  public static enum Store {    /** Store the original field value in the index. This is useful for short texts     * like a document's title which should be displayed with the results. The     * value is stored in its original form, i.e. no analyzer is used before it is     * stored.     */    YES {      @Override      public boolean isStored() { return true; }    },    /** Do not store the field value in the index. */    NO {      @Override      public boolean isStored() { return false; }    };    public abstract boolean isStored();  }  /** Specifies whether and how a field should be indexed. */  public static enum Index {    /** Do not index the field value. This field can thus not be searched,     * but one can still access its contents provided it is     * {@link Field.Store stored}. */    NO {      @Override      public boolean isIndexed()  { return false; }      @Override      public boolean isAnalyzed() { return false; }      @Override      public boolean omitNorms()  { return true;  }       },    /** Index the tokens produced by running the field's     * value through an Analyzer.  This is useful for     * common text. */    ANALYZED {      @Override      public boolean isIndexed()  { return true;  }      @Override      public boolean isAnalyzed() { return true;  }      @Override      public boolean omitNorms()  { return false; }         },    /** Index the field's value without using an Analyzer, so it can be searched.     * As no analyzer is used the value will be stored as a single term. This is     * useful for unique Ids like product numbers.     */    NOT_ANALYZED {      @Override      public boolean isIndexed()  { return true;  }      @Override      public boolean isAnalyzed() { return false; }      @Override      public boolean omitNorms()  { return false; }         },    /** Expert: Index the field's value without an Analyzer,     * and also disable the indexing of norms.  Note that you     * can also separately enable/disable norms by calling     * {@link Field#setOmitNorms}.  No norms means that     * index-time field and document boosting and field     * length normalization are disabled.  The benefit is     * less memory usage as norms take up one byte of RAM     * per indexed field for every document in the index,     * during searching.  Note that once you index a given     * field <i>with</i> norms enabled, disabling norms will     * have no effect.  In other words, for this to have the     * above described effect on a field, all instances of     * that field must be indexed with NOT_ANALYZED_NO_NORMS     * from the beginning. */    NOT_ANALYZED_NO_NORMS {      @Override      public boolean isIndexed()  { return true;  }      @Override      public boolean isAnalyzed() { return false; }      @Override      public boolean omitNorms()  { return true;  }         },    /** Expert: Index the tokens produced by running the     *  field's value through an Analyzer, and also     *  separately disable the storing of norms.  See     *  {@link #NOT_ANALYZED_NO_NORMS} for what norms are     *  and why you may want to disable them. */    ANALYZED_NO_NORMS {      @Override      public boolean isIndexed()  { return true;  }      @Override      public boolean isAnalyzed() { return true;  }      @Override      public boolean omitNorms()  { return true;  }         };1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98

明明白白，清清楚楚吧，是预定义好的，对应关系非常清楚，那么我们来看看4.5版本是怎么做的：

package com.darren.lucene45;import java.io.File;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.FieldType;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class IndexUtil {    private static final String[] ids = { "1", "2", "3" };    private static final String[] authors = { "Darren", "Tony", "Grylls" };    private static final String[] titles = { "Hello World", "Hello Lucene", "Hello Java" };    private static final String[] contents = { "Hello World, I am on my way", "Today is my first day to study Lucene",            "I like Java" };    /**     * 建立索引     */    public static void index() {        IndexWriter indexWriter = null;        try {            // 1、创建Directory            Directory directory = FSDirectory.open(new File("F:/test/lucene/index"));            // 2、创建IndexWriter            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer);            indexWriter = new IndexWriter(directory, config);            int size = ids.length;            for (int i = 0; i < size; i++) {                // 3、创建Document对象                Document document = new Document();                // 看看四个参数的意思                // 4、为Document添加Field                /**                 * Create field with String value.                 *                  * @param name                 *            field name                 * @param value                 *            string value                 * @param type                 *            field type                 * @throws IllegalArgumentException                 *             if either the name or value is null, or if the field's type is neither indexed() nor                 *             stored(), or if indexed() is false but storeTermVectors() is true.                 * @throws NullPointerException                 *             if the type is null                 *                  *             public Field(String name, String value, FieldType type)                 */                /**                 * 注意：这里与3.5版本不同，原来的构造函数已过时                 */                /**                 * 注：这里4.5版本使用FieldType代替了原来的Store和Index，不同的Field预定义了一些FieldType                 *                  */                // 对ID存储，但是不分词也不存储norms信息                FieldType idType = TextField.TYPE_STORED;                idType.setIndexed(false);                idType.setOmitNorms(false);                document.add(new Field("id", ids[i], idType));                // 对Author存储，但是不分词                FieldType authorType = TextField.TYPE_STORED;                authorType.setIndexed(false);                document.add(new Field("author", authors[i], authorType));                // 对Title存储，分词                document.add(new Field("title", titles[i], StringField.TYPE_STORED));                // 对Content不存储，但是分词                document.add(new Field("content", contents[i], TextField.TYPE_NOT_STORED));                // 5、通过IndexWriter添加文档到索引中                indexWriter.addDocument(document);            }        } catch (Exception e) {            e.printStackTrace();        } finally {            try {                if (indexWriter != null) {                    indexWriter.close();                }            } catch (Exception e) {                e.printStackTrace();            }        }    }    /**     * 搜索     */    public static void search() {        DirectoryReader indexReader = null;        try {            // 1、创建Directory            Directory directory = FSDirectory.open(new File("F:/test/lucene/index"));            // 2、创建IndexReader            /**             * 注意Reader与3.5版本不同：             *              * 所以使用DirectoryReader             *              * @Deprecated public static DirectoryReader open(final Directory directory) throws IOException { return             *             DirectoryReader.open(directory); }             */            indexReader = DirectoryReader.open(directory);            // 3、根据IndexReader创建IndexSearch            IndexSearcher indexSearcher = new IndexSearcher(indexReader);            // 4、创建搜索的Query            // 使用默认的标准分词器            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);            // 在content中搜索Lucene            // 创建parser来确定要搜索文件的内容，第二个参数为搜索的域            QueryParser queryParser = new QueryParser(Version.LUCENE_45, "content", analyzer);            // 创建Query表示搜索域为content包含Lucene的文档            Query query = queryParser.parse("Lucene");            // 5、根据searcher搜索并且返回TopDocs            TopDocs topDocs = indexSearcher.search(query, 10);            // 6、根据TopDocs获取ScoreDoc对象            ScoreDoc[] scoreDocs = topDocs.scoreDocs;            for (ScoreDoc scoreDoc : scoreDocs) {                // 7、根据searcher和ScoreDoc对象获取具体的Document对象                Document document = indexSearcher.doc(scoreDoc.doc);                // 8、根据Document对象获取需要的值                System.out.println("id : " + document.get("id"));                System.out.println("author : " + document.get("author"));                System.out.println("title : " + document.get("title"));                /**                 * 看看content能不能打印出来，为什么？                 */                System.out.println("content : " + document.get("content"));            }        } catch (Exception e) {            e.printStackTrace();        } finally {            try {                if (indexReader != null) {                    indexReader.close();                }            } catch (Exception e) {                e.printStackTrace();            }        }    }}1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

4.5版本使用了FieldType来代替Store和Index，其实去看看FieldType是什么东四，就是预定义了一些值，比如StringField

package org.apache.lucene.document;import org.apache.lucene.index.FieldInfo.IndexOptions;/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//** A field that is indexed but not tokenized: the entire *  String value is indexed as a single token.  For example *  this might be used for a 'country' field or an 'id' *  field, or any field that you intend to use for sorting *  or access through the field cache. */public final class StringField extends Field {  /** Indexed, not tokenized, omits norms, indexes   *  DOCS_ONLY, not stored. */  public static final FieldType TYPE_NOT_STORED = new FieldType();  /** Indexed, not tokenized, omits norms, indexes   *  DOCS_ONLY, stored */  public static final FieldType TYPE_STORED = new FieldType();  static {    TYPE_NOT_STORED.setIndexed(true);    TYPE_NOT_STORED.setOmitNorms(true);    TYPE_NOT_STORED.setIndexOptions(IndexOptions.DOCS_ONLY);    TYPE_NOT_STORED.setTokenized(false);    TYPE_NOT_STORED.freeze();    TYPE_STORED.setIndexed(true);    TYPE_STORED.setOmitNorms(true);    TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY);    TYPE_STORED.setStored(true);    TYPE_STORED.setTokenized(false);    TYPE_STORED.freeze();  }  /** Creates a new StringField.    *  @param name field name   *  @param value String value   *  @param stored Store.YES if the content should also be stored   *  @throws IllegalArgumentException if the field name or value is null.   */  public StringField(String name, String value, Store stored) {    super(name, value, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);  }}1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

整个类预定义了两种FieldType，分别是TYPE_NOT_STORED和TYPE_STORED，具体的值也是一目了然看看和3.5版本是不是基本一样的，当然还有一些其他的FieldType，比如TextField中预定义了另外两种，

  /** Indexed, tokenized, not stored. */  public static final FieldType TYPE_NOT_STORED = new FieldType();  /** Indexed, tokenized, stored. */  public static final FieldType TYPE_STORED = new FieldType();  static {    TYPE_NOT_STORED.setIndexed(true);    TYPE_NOT_STORED.setTokenized(true);    TYPE_NOT_STORED.freeze();    TYPE_STORED.setIndexed(true);    TYPE_STORED.setTokenized(true);    TYPE_STORED.setStored(true);    TYPE_STORED.freeze();  }1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

当然，还有其他的FieldType，不再一一列出，那么我们来试一下

package com.darren.lucene45;import org.junit.Test;public class IndexUtilTest {    @Test    public void testIndex() {        IndexUtil.index();    }    @Test    public void testSearch() {        IndexUtil.search();    }}1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

此时跑一下测试的testIndex()方法看看效果：

java.lang.IllegalStateException: this FieldType is already frozen and cannot be changed    at org.apache.lucene.document.FieldType.checkIfFrozen(FieldType.java:86)    at org.apache.lucene.document.FieldType.setIndexed(FieldType.java:118)    at com.darren.lucene45.IndexUtil.index(IndexUtil.java:80)    at com.darren.lucene45.IndexUtilTest.testIndex(IndexUtilTest.java:8)    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)    at java.lang.reflect.Method.invoke(Method.java:601)    at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)    at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)    at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)    at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)    at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)    at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)    at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)    at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)    at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)    at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)    at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)    at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)    at org.junit.runners.ParentRunner.run(ParentRunner.java:363)    at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:50)    at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)    at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:467)    at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:683)    at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:390)    at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:197)1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

竟然报错了，那么怎么办呢，原因是因为预定义的值都调用了freeze()方法，这个方法设置

public void freeze() {    this.frozen = true;  }  1
2
3
1
2
3

frozen为true，而FieldType中有这样的方法：

private void checkIfFrozen() {    if (frozen) {      throw new IllegalStateException("this FieldType is already frozen and cannot be changed");    }  }  1
2
3
4
5
1
2
3
4
5

如果为true就抛异常，就是这些预定义的值不可修改，那没办法了，我们只好自己设置了，于是放索引方法改为这样：

   /**     * 建立索引     */    public static void index() {        IndexWriter indexWriter = null;        try {            // 1、创建Directory            Directory directory = FSDirectory.open(new File("F:/test/lucene/index"));            // 2、创建IndexWriter            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer);            indexWriter = new IndexWriter(directory, config);            int size = ids.length;            for (int i = 0; i < size; i++) {                // 3、创建Document对象                Document document = new Document();                // 看看四个参数的意思                // 4、为Document添加Field                /**                 * Create field with String value.                 *                  * @param name                 *            field name                 * @param value                 *            string value                 * @param type                 *            field type                 * @throws IllegalArgumentException                 *             if either the name or value is null, or if the field's type is neither indexed() nor                 *             stored(), or if indexed() is false but storeTermVectors() is true.                 * @throws NullPointerException                 *             if the type is null                 *                  *             public Field(String name, String value, FieldType type)                 */                /**                 * 注意：这里与3.5版本不同，原来的构造函数已过时                 */                /**                 * 注：这里4.5版本使用FieldType代替了原来的Store和Index，不同的Field预定义了一些FieldType                 *                  */                // 对ID存储，但是不分词也不存储norms信息                FieldType idType = new FieldType();                idType.setStored(true);                idType.setIndexed(false);                idType.setOmitNorms(false);                document.add(new Field("id", ids[i], idType));                // 对Author存储，但是不分词                FieldType authorType = new FieldType();                authorType.setStored(true);                authorType.setIndexed(false);                document.add(new Field("author", authors[i], authorType));                // 对Title存储，分词                document.add(new Field("title", titles[i], StringField.TYPE_STORED));                // 对Content不存储，但是分词                document.add(new Field("content", contents[i], TextField.TYPE_NOT_STORED));                // 5、通过IndexWriter添加文档到索引中                indexWriter.addDocument(document);            }        } catch (Exception e) {            e.printStackTrace();        } finally {            try {                if (indexWriter != null) {                    indexWriter.close();                }            } catch (Exception e) {                e.printStackTrace();            }        }    }1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84

再跑一下testIndex()方法，没有出错，得到正常的索引，然后跑一下testSearch()方法看看结果：
id : 2 author : Tony title : Hello Lucene content : null
此时的content也是null，那么改一改对content的设置：
把这句：// 对Content不存储，但是分词 document.add(new Field("content", contents[i], TextField.TYPE_STORED));
再跑测试，记住线索因在查找，结果为：

id : 2  author : Tony  title : Hello Lucene  content : Today is my first day to study Lucene  1
2
3
4
1
2
3
4

此时得到了和3.5版本一样的测试结果，4.5版本完成
5.0版本：
5.0版本与4.5版本相比没有太大改动，先看一下代码：

package com.darren.lucene50;import java.nio.file.FileSystems;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.FieldType;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexOptions;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;public class IndexUtil {    private static final String[] ids = { "1", "2", "3" };    private static final String[] authors = { "Darren", "Tony", "Grylls" };    private static final String[] titles = { "Hello World", "Hello Lucene", "Hello Java" };    private static final String[] contents = { "Hello World, I am on my way", "Today is my first day to study Lucene",            "I like Java" };    /**     * 建立索引     */    public static void index() {        IndexWriter indexWriter = null;        try {            // 1、创建Directory            Directory directory = FSDirectory.open(FileSystems.getDefault().getPath("F:/test/lucene/index"));            // 2、创建IndexWriter            Analyzer analyzer = new StandardAnalyzer();            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);            indexWriter = new IndexWriter(directory, indexWriterConfig);            int size = ids.length;            for (int i = 0; i < size; i++) {                // 3、创建Document对象                Document document = new Document();                // 看看四个参数的意思                // 4、为Document添加Field                /**                 * Create field with String value.                 *                  * @param name                 *            field name                 * @param value                 *            string value                 * @param type                 *            field type                 * @throws IllegalArgumentException                 *             if either the name or value is null, or if the field's type is neither indexed() nor                 *             stored(), or if indexed() is false but storeTermVectors() is true.                 * @throws NullPointerException                 *             if the type is null                 *                  *             public Field(String name, String value, FieldType type)                 */                /**                 * 注意：这里与3.5版本不同，原来的构造函数已过时                 */                /**                 * 注：这里4.5版本类似使用FieldType代替了原来的Store和Index，不同的是Index变成IndexOptions                 *                  */                // 对ID存储，但是不分词也不存储norms信息                FieldType idType = new FieldType();                idType.setStored(true);                idType.setIndexOptions(IndexOptions.DOCS);                idType.setOmitNorms(false);                document.add(new Field("id", ids[i], idType));                // 对Author存储，但是不分词                FieldType authorType = new FieldType();                authorType.setStored(true);                authorType.setIndexOptions(IndexOptions.DOCS);                document.add(new Field("author", authors[i], authorType));                // 对Title存储，分词                document.add(new Field("title", titles[i], StringField.TYPE_STORED));                // 对Content不存储，但是分词                document.add(new Field("content", contents[i], TextField.TYPE_NOT_STORED));                // 5、通过IndexWriter添加文档到索引中                indexWriter.addDocument(document);            }        } catch (Exception e) {            e.printStackTrace();        } finally {            try {                if (indexWriter != null) {                    indexWriter.close();                }            } catch (Exception e) {                e.printStackTrace();            }        }    }    /**     * 搜索     */    public static void search() {        DirectoryReader indexReader = null;        try {            // 1、创建Directory            Directory directory = FSDirectory.open(FileSystems.getDefault().getPath("F:/test/lucene/index"));            // 2、创建IndexReader            indexReader = DirectoryReader.open(directory);            // 3、根据IndexReader创建IndexSearch            IndexSearcher indexSearcher = new IndexSearcher(indexReader);            // 4、创建搜索的Query            // 使用默认的标准分词器            Analyzer analyzer = new StandardAnalyzer();            // 在content中搜索Lucene            // 创建parser来确定要搜索文件的内容，第二个参数为搜索的域            QueryParser queryParser = new QueryParser("content", analyzer);            // 创建Query表示搜索域为content包含Lucene的文档            Query query = queryParser.parse("Lucene");            // 5、根据searcher搜索并且返回TopDocs            TopDocs topDocs = indexSearcher.search(query, 10);            // 6、根据TopDocs获取ScoreDoc对象            ScoreDoc[] scoreDocs = topDocs.scoreDocs;            for (ScoreDoc scoreDoc : scoreDocs) {                // 7、根据searcher和ScoreDoc对象获取具体的Document对象                Document document = indexSearcher.doc(scoreDoc.doc);                // 8、根据Document对象获取需要的值                System.out.println("id : " + document.get("id"));                System.out.println("author : " + document.get("author"));                System.out.println("title : " + document.get("title"));                /**                 * 看看content能不能打印出来，为什么？                 */                System.out.println("content : " + document.get("content"));            }        } catch (Exception e) {            e.printStackTrace();        } finally {            try {                if (indexReader != null) {                    indexReader.close();                }            } catch (Exception e) {                e.printStackTrace();            }        }    }}1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168

FieldType的使用稍有不同，没有了Index而使用IndexOptions代替，现在TextField中预定义的值是这样的：

  /** Indexed, tokenized, not stored. */  public static final FieldType TYPE_NOT_STORED = new FieldType();  /** Indexed, tokenized, stored. */  public static final FieldType TYPE_STORED = new FieldType();  static {    TYPE_NOT_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);    TYPE_NOT_STORED.setTokenized(true);    TYPE_NOT_STORED.freeze();    TYPE_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);    TYPE_STORED.setTokenized(true);    TYPE_STORED.setStored(true);    TYPE_STORED.freeze();  }1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

我们来看看IndexOptions与Index有什么不同：

/** * Controls how much information is stored in the postings lists. * @lucene.experimental */public enum IndexOptions {   // NOTE: order is important here; FieldInfo uses this  // order to merge two conflicting IndexOptions (always  // "downgrades" by picking the lowest).  /** Not indexed */  NONE,  /**    * Only documents are indexed: term frequencies and positions are omitted.   * Phrase and other positional queries on the field will throw an exception, and scoring   * will behave as if any term in the document appears only once.   */  DOCS,  /**    * Only documents and term frequencies are indexed: positions are omitted.    * This enables normal scoring, except Phrase and other positional queries   * will throw an exception.   */    DOCS_AND_FREQS,  /**    * Indexes documents, frequencies and positions.   * This is a typical default for full-text search: full scoring is enabled   * and positional queries are supported.   */  DOCS_AND_FREQS_AND_POSITIONS,  /**    * Indexes documents, frequencies, positions and offsets.   * Character offsets are encoded alongside the positions.    */  DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,}1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

从它的选项看似乎是多了几个功能，可以对词的频率索引、位置索引、甚至偏移量索引，这是之前版本所没有的。其他方面和4.5版本基本一样。

阅读全文

0 0