solr中docValues字段排序源码分析

来源:互联网 发布:慧网淘宝培训 编辑:程序博客网 时间:2024/05/30 13:41
分析docValues的字段的排序实现
 
根据各种字段的类型不同需要有不同的comparator
首先排序需要使用TopFieldCollector来收集doc
topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len,
            false, needScores, needScores, true);
org.apache.lucene.search.TopFieldCollector.create(Sort, int, boolean, boolean, boolean, boolean)
org.apache.lucene.search.TopFieldCollector.create(Sort, int, FieldDoc, boolean, boolean, boolean, boolean)
这个方法中首先新建一个优先队列
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
org.apache.lucene.search.FieldValueHitQueue.create(SortField[], int)
if (fields.length == 1) {//如果排序的字段只有一个就用OneComparatorFieldValueHitQueue
      return new OneComparatorFieldValueHitQueue<T>(fields, size);
    } else {//如果排序的字段有多个那么用MultiComparatorsFieldValueHitQueue
      return new MultiComparatorsFieldValueHitQueue<T>(fields, size);
    }
假设只有一个字段下面就是OneComparatorFieldValueHitQueue的初始化的过程org.apache.lucene.search.FieldValueHitQueue.OneComparatorFieldValueHitQueue.OneComparatorFieldValueHitQueue(SortField[], int)
首先调用父类的初始化org.apache.lucene.search.FieldValueHitQueue.FieldValueHitQueue(SortField[], int)
首先初始化队列的大小然后定义comparator
comparators = new FieldComparator[numComparators];
    reverseMul = new int[numComparators];
 
setComparator(0,field.getComparator(size, 0));
org.apache.lucene.search.SortField.getComparator(int, int)
switch (type) {根据不同的type设置不同的comparator
    case SCORE:
      return new FieldComparator.RelevanceComparator(numHits);
    case DOC:
      return new FieldComparator.DocComparator(numHits);
 
接下来回到solrIndexSearcher就要调用lucene的IndexSearcher的
super.search(query, luceneFilter, collector);
org.apache.lucene.search.IndexSearcher.search(Query, Filter, Collector)
org.apache.lucene.search.IndexSearcher.search(List<AtomicReaderContext>, Weight, Collector)
for (AtomicReaderContext ctx : leaves) { // search each subreader
      try {
        collector.setNextReader(ctx); //为collector设置reader
      } catch (CollectionTerminatedException e) {
        // there is no doc of interest in this reader context
        // continue with the following leaf
        continue;
      }
      Scorer scorer = weight.scorer(ctx, !collector.acceptsDocsOutOfOrder(), true, ctx.reader().getLiveDocs());//生成scorer
      if (scorer != null) {
        try {
          scorer.score(collector);//打分
        } catch (CollectionTerminatedException e) {
          // collection was terminated prematurely
          // continue with the following leaf
        }
      }
    }
collector.setNextReader(ctx); 的过程分析:
org.apache.lucene.search.TopFieldCollector.OneComparatorNonScoringCollector.setNextReader(AtomicReaderContext)
public void setNextReader(AtomicReaderContext context) throws IOException {
      this.docBase = context.docBase; //设置docid的base
      queue.setComparator(0, comparator.setNextReader(context));//设置comparator
      comparator = queue.firstComparator;
    }
comparator.setNextReader(context)分析
@Override
    public FieldComparator<Integer> setNextReader(AtomicReaderContext context) throws IOException {
      // NOTE: must do this before calling super otherwise
      // we compute the docsWithField Bits twice!
      currentReaderValues = FieldCache.DEFAULT.getInts(context.reader(), fieldparsermissingValue != null);
      return super.setNextReader(context);
    }
就会调用FieldCacheImpl的getInts,获取字段的field值
public Ints getInts(AtomicReader reader, String field, IntParser parser, boolean setDocsWithField)
      throws IOException {
    final NumericDocValues valuesIn = reader.getNumericDocValues(field);//首先检查字段是否是docValues的字段,如果是的话那么直接获取valuesIn
    if (valuesIn != null) {
      // Not cached here by FieldCacheImpl (cached instead
      // per-thread by SegmentReader):
      return new Ints() {
        @Override
        public int get(int docID) {
          return (int) valuesIn.get(docID);
        }
      };
    } else {
      final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
      if (info == null) {
        return Ints.EMPTY;
      } else if (info.hasDocValues()) {
        throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
      } else if (!info.isIndexed()) {
        return Ints.EMPTY;
      }
      return (Ints) caches.get(Integer.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField);
    }
  }
 
下面分析收集docid并打分的过程
scorer.score(collector);
org.apache.lucene.search.Scorer.score(Collector)
 public void score(Collector collector) throws IOException {
    assert docID() == -1; // not started
    collector.setScorer(this);
    int doc;
//    int count = 0;
//    while (count < 100 && ((doc = nextDoc()) != NO_MORE_DOCS)) {
    while ((doc = nextDoc()) != NO_MORE_DOCS) { //nextDoc()获取下一个满足检索条件的docid
      collector.collect(doc);//收集并打分
//      count++;
    }
  }
org.apache.lucene.search.TopFieldCollector.OneComparatorNonScoringCollector.collect(int)
@Override
    public void collect(int doc) throws IOException {
      ++totalHits;
      if (queueFull) {
        if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
          // since docs are visited in doc Id order, if compare is 0, it means
          // this document is larger than anything else in the queue, and
          // therefore not competitive.
          return;
        }
        
        // This hit is competitive - replace bottom element in queue & adjustTop
        comparator.copy(bottom.slot, doc);
        updateBottom(doc);
        comparator.setBottom(bottom.slot);
      } else {
        // Startup transient: queue hasn't gathered numHits yet
        final int slot = totalHits - 1;
        // Copy hit into queue
        comparator.copy(slot, doc);
        add(slot, doc, Float.NaN);
        if (queueFull) {
          comparator.setBottom(bottom.slot);
        }
      }
    }
 
org.apache.lucene.search.FieldComparator.IntComparator.copy(int, int)
org.apache.lucene.codecs.lucene42.Lucene42DocValuesProducer.loadNumeric(FieldInfo)