BroadcastShardOperationFailedException TooManyClauses[maxClauseCount is set to

来源:互联网 发布:hf线切割编程软件8.0 编辑:程序博客网 时间:2024/06/06 16:47

luncen 查询条件不能大于1024

package org.apache.lucene.search;/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.IOException;import java.util.ArrayList;import java.util.Iterator;import java.util.List;import java.util.Set;import org.apache.lucene.index.AtomicReaderContext;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.Term;import org.apache.lucene.search.BooleanClause.Occur;import org.apache.lucene.search.similarities.Similarity;import org.apache.lucene.util.Bits;import org.apache.lucene.util.ToStringUtils;/** A Query that matches documents matching boolean combinations of other  * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other  * BooleanQuerys.  */public class BooleanQuery extends Query implements Iterable<BooleanClause> {  private static int maxClauseCount = 10240;  /** Thrown when an attempt is made to add more than {@link   * #getMaxClauseCount()} clauses. This typically happens if   * a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery    * is expanded to many terms during search.    */  public static class TooManyClauses extends RuntimeException {    public TooManyClauses() {      super("maxClauseCount is set to " + maxClauseCount);    }  }  /** Return the maximum number of clauses permitted, 1024 by default.   * Attempts to add more than the permitted number of clauses cause {@link   * TooManyClauses} to be thrown.   * @see #setMaxClauseCount(int)   */  public static int getMaxClauseCount() { return maxClauseCount; }  /**    * Set the maximum number of clauses permitted per BooleanQuery.   * Default value is 1024.   */  public static void setMaxClauseCount(int maxClauseCount) {    if (maxClauseCount < 1) {      throw new IllegalArgumentException("maxClauseCount must be >= 1");    }    BooleanQuery.maxClauseCount = maxClauseCount;  }  private ArrayList<BooleanClause> clauses = new ArrayList<BooleanClause>();  private final boolean disableCoord;  /** Constructs an empty boolean query. */  public BooleanQuery() {    disableCoord = false;  }  /** Constructs an empty boolean query.   *   * {@link Similarity#coord(int,int)} may be disabled in scoring, as   * appropriate. For example, this score factor does not make sense for most   * automatically generated queries, like {@link WildcardQuery} and {@link   * FuzzyQuery}.   *   * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring.   */  public BooleanQuery(boolean disableCoord) {    this.disableCoord = disableCoord;  }  /** Returns true iff {@link Similarity#coord(int,int)} is disabled in   * scoring for this query instance.   * @see #BooleanQuery(boolean)   */  public boolean isCoordDisabled() { return disableCoord; }  /**   * Specifies a minimum number of the optional BooleanClauses   * which must be satisfied.   *   * <p>   * By default no optional clauses are necessary for a match   * (unless there are no required clauses).  If this method is used,   * then the specified number of clauses is required.   * </p>   * <p>   * Use of this method is totally independent of specifying that   * any specific clauses are required (or prohibited).  This number will   * only be compared against the number of matching optional clauses.   * </p>   *   * @param min the number of optional clauses that must match   */  public void setMinimumNumberShouldMatch(int min) {    this.minNrShouldMatch = min;  }  protected int minNrShouldMatch = 0;  /**   * Gets the minimum number of the optional BooleanClauses   * which must be satisfied.   */  public int getMinimumNumberShouldMatch() {    return minNrShouldMatch;  }  /** Adds a clause to a boolean query.   *   * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number   * @see #getMaxClauseCount()   */  public void add(Query query, BooleanClause.Occur occur) {    add(new BooleanClause(query, occur));  }  /** Adds a clause to a boolean query.   * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number   * @see #getMaxClauseCount()   */  public void add(BooleanClause clause) {    if (clauses.size() >= maxClauseCount) {      throw new TooManyClauses();    }    clauses.add(clause);  }  /** Returns the set of clauses in this query. */  public BooleanClause[] getClauses() {    return clauses.toArray(new BooleanClause[clauses.size()]);  }  /** Returns the list of clauses in this query. */  public List<BooleanClause> clauses() { return clauses; }  /** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to   * make it possible to do:   * <pre class="prettyprint">for (BooleanClause clause : booleanQuery) {}</pre>   */  @Override  public final Iterator<BooleanClause> iterator() { return clauses().iterator(); }  /**   * Expert: the Weight for BooleanQuery, used to   * normalize, score and explain these queries.   *   * <p>NOTE: this API and implementation is subject to   * change suddenly in the next release.</p>   */  protected class BooleanWeight extends Weight {    /** The Similarity implementation. */    protected Similarity similarity;    protected ArrayList<Weight> weights;    protected int maxCoord;  // num optional + num required    private final boolean disableCoord;    public BooleanWeight(IndexSearcher searcher, boolean disableCoord)      throws IOException {      this.similarity = searcher.getSimilarity();      this.disableCoord = disableCoord;      weights = new ArrayList<Weight>(clauses.size());      for (int i = 0 ; i < clauses.size(); i++) {        BooleanClause c = clauses.get(i);        Weight w = c.getQuery().createWeight(searcher);        weights.add(w);        if (!c.isProhibited()) {          maxCoord++;        }      }    }    @Override    public Query getQuery() { return BooleanQuery.this; }    @Override    public float getValueForNormalization() throws IOException {      float sum = 0.0f;      for (int i = 0 ; i < weights.size(); i++) {        // call sumOfSquaredWeights for all clauses in case of side effects        float s = weights.get(i).getValueForNormalization();         // sum sub weights        if (!clauses.get(i).isProhibited()) {          // only add to sum for non-prohibited clauses          sum += s;        }      }      sum *= getBoost() * getBoost();             // boost each sub-weight      return sum ;    }    public float coord(int overlap, int maxOverlap) {      // LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away,      // so coord() is not applied. But when BQ cannot optimize itself away      // for a single clause (minNrShouldMatch, prohibited clauses, etc), its      // important not to apply coord(1,1) for consistency, it might not be 1.0F      return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap);    }    @Override    public void normalize(float norm, float topLevelBoost) {      topLevelBoost *= getBoost();                         // incorporate boost      for (Weight w : weights) {        // normalize all clauses, (even if prohibited in case of side affects)        w.normalize(norm, topLevelBoost);      }    }    @Override    public Explanation explain(AtomicReaderContext context, int doc)      throws IOException {      final int minShouldMatch =        BooleanQuery.this.getMinimumNumberShouldMatch();      ComplexExplanation sumExpl = new ComplexExplanation();      sumExpl.setDescription("sum of:");      int coord = 0;      float sum = 0.0f;      boolean fail = false;      int shouldMatchCount = 0;      Iterator<BooleanClause> cIter = clauses.iterator();      for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) {        Weight w = wIter.next();        BooleanClause c = cIter.next();        if (w.scorer(context, true, true, context.reader().getLiveDocs()) == null) {          if (c.isRequired()) {            fail = true;            Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");            sumExpl.addDetail(r);          }          continue;        }        Explanation e = w.explain(context, doc);        if (e.isMatch()) {          if (!c.isProhibited()) {            sumExpl.addDetail(e);            sum += e.getValue();            coord++;          } else {            Explanation r =              new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")");            r.addDetail(e);            sumExpl.addDetail(r);            fail = true;          }          if (c.getOccur() == Occur.SHOULD) {            shouldMatchCount++;          }        } else if (c.isRequired()) {          Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");          r.addDetail(e);          sumExpl.addDetail(r);          fail = true;        }      }      if (fail) {        sumExpl.setMatch(Boolean.FALSE);        sumExpl.setValue(0.0f);        sumExpl.setDescription          ("Failure to meet condition(s) of required/prohibited clause(s)");        return sumExpl;      } else if (shouldMatchCount < minShouldMatch) {        sumExpl.setMatch(Boolean.FALSE);        sumExpl.setValue(0.0f);        sumExpl.setDescription("Failure to match minimum number "+                               "of optional clauses: " + minShouldMatch);        return sumExpl;      }            sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);      sumExpl.setValue(sum);            final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord);      if (coordFactor == 1.0f) {        return sumExpl;                             // eliminate wrapper      } else {        ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),                                                           sum*coordFactor,                                                           "product of:");        result.addDetail(sumExpl);        result.addDetail(new Explanation(coordFactor,                                         "coord("+coord+"/"+maxCoord+")"));        return result;      }    }    @Override    public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,        boolean topScorer, Bits acceptDocs)        throws IOException {      List<Scorer> required = new ArrayList<Scorer>();      List<Scorer> prohibited = new ArrayList<Scorer>();      List<Scorer> optional = new ArrayList<Scorer>();      Iterator<BooleanClause> cIter = clauses.iterator();      for (Weight w  : weights) {        BooleanClause c =  cIter.next();        Scorer subScorer = w.scorer(context, true, false, acceptDocs);        if (subScorer == null) {          if (c.isRequired()) {            return null;          }        } else if (c.isRequired()) {          required.add(subScorer);        } else if (c.isProhibited()) {          prohibited.add(subScorer);        } else {          optional.add(subScorer);        }      }      // NOTE: we could also use BooleanScorer, if we knew      // this BooleanQuery was embedded in another      // BooleanQuery that was also using BooleanScorer (ie,      // BooleanScorer can nest).  But this is hard to      // detect and we never do so today... (ie, we only      // return BooleanScorer for topScorer):      // Check if we can and should return a BooleanScorer      // TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch      // but the same is even true of pure conjunctions...      if (!scoreDocsInOrder && topScorer && required.size() == 0 && minNrShouldMatch <= 1) {        return new BooleanScorer(this, disableCoord, minNrShouldMatch, optional, prohibited, maxCoord);      }            if (required.size() == 0 && optional.size() == 0) {        // no required and optional clauses.        return null;      } else if (optional.size() < minNrShouldMatch) {        // either >1 req scorer, or there are 0 req scorers and at least 1        // optional scorer. Therefore if there are not enough optional scorers        // no documents will be matched by the query        return null;      }            // simple conjunction      if (optional.size() == 0 && prohibited.size() == 0) {        float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord);        return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord);      }            // simple disjunction      if (required.size() == 0 && prohibited.size() == 0 && minNrShouldMatch <= 1 && optional.size() > 1) {        float coord[] = new float[optional.size()+1];        for (int i = 0; i < coord.length; i++) {          coord[i] = disableCoord ? 1.0f : coord(i, maxCoord);        }        return new DisjunctionSumScorer(this, optional.toArray(new Scorer[optional.size()]), coord);      }            // Return a BooleanScorer2      return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord);    }        @Override    public boolean scoresDocsOutOfOrder() {      for (BooleanClause c : clauses) {        if (c.isRequired()) {          return false; // BS2 (in-order) will be used by scorer()        }      }            // scorer() will return an out-of-order scorer if requested.      return true;    }      }  @Override  public Weight createWeight(IndexSearcher searcher) throws IOException {    return new BooleanWeight(searcher, disableCoord);  }  @Override  public Query rewrite(IndexReader reader) throws IOException {    if (minNrShouldMatch == 0 && clauses.size() == 1) {                    // optimize 1-clause queries      BooleanClause c = clauses.get(0);      if (!c.isProhibited()) {  // just return clause        Query query = c.getQuery().rewrite(reader);    // rewrite first        if (getBoost() != 1.0f) {                 // incorporate boost          if (query == c.getQuery()) {                   // if rewrite was no-op            query = query.clone();         // then clone before boost          }          // Since the BooleanQuery only has 1 clause, the BooleanQuery will be          // written out. Therefore the rewritten Query's boost must incorporate both          // the clause's boost, and the boost of the BooleanQuery itself          query.setBoost(getBoost() * query.getBoost());        }        return query;      }    }    BooleanQuery clone = null;                    // recursively rewrite    for (int i = 0 ; i < clauses.size(); i++) {      BooleanClause c = clauses.get(i);      Query query = c.getQuery().rewrite(reader);      if (query != c.getQuery()) {                     // clause rewrote: must clone        if (clone == null) {          // The BooleanQuery clone is lazily initialized so only initialize          // it if a rewritten clause differs from the original clause (and hasn't been          // initialized already).  If nothing differs, the clone isn't needlessly created          clone = this.clone();        }        clone.clauses.set(i, new BooleanClause(query, c.getOccur()));      }    }    if (clone != null) {      return clone;                               // some clauses rewrote    } else {      return this;                                // no clauses rewrote    }  }  // inherit javadoc  @Override  public void extractTerms(Set<Term> terms) {    for (BooleanClause clause : clauses) {      if (clause.getOccur() != Occur.MUST_NOT) {        clause.getQuery().extractTerms(terms);      }    }  }  @Override @SuppressWarnings("unchecked")  public BooleanQuery clone() {    BooleanQuery clone = (BooleanQuery)super.clone();    clone.clauses = (ArrayList<BooleanClause>) this.clauses.clone();    return clone;  }  /** Prints a user-readable version of this query. */  @Override  public String toString(String field) {    StringBuilder buffer = new StringBuilder();    boolean needParens= getBoost() != 1.0 || getMinimumNumberShouldMatch() > 0;    if (needParens) {      buffer.append("(");    }    for (int i = 0 ; i < clauses.size(); i++) {      BooleanClause c = clauses.get(i);      if (c.isProhibited()) {        buffer.append("-");      } else if (c.isRequired()) {        buffer.append("+");      }      Query subQuery = c.getQuery();      if (subQuery != null) {        if (subQuery instanceof BooleanQuery) {  // wrap sub-bools in parens          buffer.append("(");          buffer.append(subQuery.toString(field));          buffer.append(")");        } else {          buffer.append(subQuery.toString(field));        }      } else {        buffer.append("null");      }      if (i != clauses.size()-1) {        buffer.append(" ");      }    }    if (needParens) {      buffer.append(")");    }    if (getMinimumNumberShouldMatch()>0) {      buffer.append('~');      buffer.append(getMinimumNumberShouldMatch());    }    if (getBoost() != 1.0f) {      buffer.append(ToStringUtils.boost(getBoost()));    }    return buffer.toString();  }  /** Returns true iff <code>o</code> is equal to this. */  @Override  public boolean equals(Object o) {    if (!(o instanceof BooleanQuery)) {      return false;    }    BooleanQuery other = (BooleanQuery)o;    return this.getBoost() == other.getBoost()        && this.clauses.equals(other.clauses)        && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch()        && this.disableCoord == other.disableCoord;  }  /** Returns a hash code value for this object.*/  @Override  public int hashCode() {    return Float.floatToIntBits(getBoost()) ^ clauses.hashCode()      + getMinimumNumberShouldMatch() + (disableCoord ? 17:0);  }  }

   

下面是在网上看到得一个解决方法: 

可以通过设置: 
BooleanQuery.setMaxClauseCount(10000); 
来解决问题,但是这样带来的问题是会使得内存开销加大。容易出现OutOfMemory的异常所以需要非常谨慎处理。 

Lucene在做大量term值查询时, 如果这值过多, 超1024个term的话, 会出现

TooManyClauses[maxClauseCount is set to 1024] 的异常,因此建议在term过多的情况下采用filter, 而不是query。

以下是该情形在ES中的测试。

 

 

Java代码  收藏代码
  1. Settings defaultSettings = ImmutableSettings.settingsBuilder().put("client.transport.sniff"true).build();  
  2.         Settings finalSettings = ImmutableSettings.settingsBuilder().put(defaultSettings)  
  3.                 .put("name", NetworkUtils.getLocalAddress().getHostName()).build();  
  4.         TransportClient tmp = new TransportClient(finalSettings);  
  5.         Client client = tmp.addTransportAddress(new InetSocketTransportAddress("127.0.0.1"9300));  
  6.         //demo 100万数据  
  7.         for (int i = 0; i < 1000000; i++)  
  8.         {  
  9.             client.prepareIndex("test2""book",String.valueOf(i)).setSource("bookid", String.valueOf(i), "booktype", String.valueOf(i%10000)).execute()  
  10.             .actionGet();  
  11.         }  
  12.         //demo 近1万个term  
  13.         String[] values = new String[10000];  
  14.         for (int i = 1; i < 10000; i++)  
  15.         {  
  16.             values[i] = String.valueOf(i);  
  17.         }  
  18.         //terms query  
  19.         //TermsQueryBuilder termQueryBuilder = new TermsQueryBuilder("booktype", values);  
  20.         TermsFilterBuilder termsFilterBuilder = new TermsFilterBuilder("booktype", values);  
  21. //      SearchResponse searchResponse = client.prepareSearch().setIndices("test2").setQuery(termQueryBuilder)  
  22. //              .setFrom(0).setSize(100).execute().actionGet();  
  23.         //terms filter  
  24.         SearchResponse searchResponse = client.prepareSearch().setIndices("test2").setQuery(QueryBuilders.matchAllQuery()).setFilter(termsFilterBuilder)  
  25.                 .setFrom(0).setSize(100).execute().actionGet();  
  26.         SearchHits hits = searchResponse.getHits();  
  27.         System.out.println(hits.totalHits());  
  28.         for (SearchHit searchHit : hits)  
  29.         {  
  30.             System.out.println(searchHit.getId() + ":" + searchHit.getSource().get("booktype"));  
  31.         }  

 上述结果会发现, 用TermsQueryBuilder查询的话, 会出现TooManyClauses的异常, 因为设置了9999个term值。因此,当term过多时,建议采用filter, 而不是query. 

 http://lucene-group.group.iteye.com/group/topic/10555

http://maxrocray.iteye.com/blog/1860946

0 0
原创粉丝点击