自己修改的lucene的jjt文件

来源:互联网 发布:阿里云os系统怎么样 编辑:程序博客网 时间:2024/05/16 13:03
/* ***************** *//* Token Definitions *//* ***************** */<*> TOKEN : {  <#_NUM_CHAR:   ["0"-"9"] > //最好不要定义成全局的否则会左右很多的事情| <#_ESCAPED_CHAR: "\\" ~[] > //经过转以后的任意字符如\\s,\\t //~[]表示包含所有的字符| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",                           "[", "]","<", ">","\"","#","%","'", "{", "}","~", "*", "?","\\" ,"=",","]                       | <_ESCAPED_CHAR> ) >| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >}<DEFAULT, RangeIn, RangeEx> SKIP : {  < <_WHITESPACE>>}<DEFAULT> TOKEN : {  <AND:       ([ "a", "A" ] [ "n", "N" ] [ "D", "d" ]) >| <OR:        ([ "o", "O" ] [ "r", "R" ] | ",") >| <NOT:       ([ "N", "n" ] [ "o", "O" ] [ "t", "T" ]) >| <NEAR:     (["N","n"]["e","E"]["A","a"]["r","R"])>  //注意在全局定义的收不要定义([A-Z]|[a-z])*这样的的定义这样也会造成局限性| <WORDFORWARD: (["W","w"]["f","F"](< _NUM_CHAR>)*)>| <WORDNEAR:  (["W","w"](< _NUM_CHAR>)*) >| <SPACE:(["S","s"]["p","P"]["A","a"]["C","c"]["e","E"]) >| <GREATER: ">" >| <LESS: "<" >| <SMALLDOT: "'" >| <PLUS:      "+" >| <MINUS:     "-" >| <LPAREN:    ("(") >| <RPAREN:    (")") >| <COLON:     "=" >| <SHIT:      ["~","#","%"] >| <STAR:      "*" >| <CARAT:     "^" > : Boost| <QUOTED:     "\"" (<_QUOTED_CHAR>)* "\"">| <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >| <FUZZY_SLOP:     < SHIT> ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >| <PREFIXTERM:  ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >| <WILDTERM:  (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >| <RANGEIN_START: ("[") > : RangeIn| <RANGEEX_START: "{" > : RangeEx}<Boost> TOKEN : {<NUMBER:    (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT}<RangeIn> TOKEN : {<RANGEIN_TO: ((["t","T"]["O","o"])|< MINUS>|< SHIT>)>| <RANGEIN_END: ("]")> : DEFAULT| <RANGEIN_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">| <RANGEIN_GOOP: (~["-","~"," ", "]" ])+ >}<RangeEx> TOKEN : {<RANGEEX_TO: < RANGEIN_TO>> //有些定义是可以重复使用的如<RANGE_TO>| <RANGEEX_END: "}"> : DEFAULT| <RANGEEX_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">| <RANGEEX_GOOP: (~["-","~"," ", "}" ])+ >}// *   Query  ::= ( Clause )*// *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )int Conjunction() : {  int ret = CONJ_NONE;}{  [    <AND> { ret = CONJ_AND; }    | <OR>  { ret = CONJ_OR; }  ]  { return ret; }}int Modifiers() : {  int ret = MOD_NONE;}{  [     <PLUS> { ret = MOD_REQ; }     | <MINUS> { ret = MOD_NOT; }     | <NOT> { ret = MOD_NOT; }  ]  { return ret; }}// This makes sure that there is no garbage after the query stringQuery TopLevelQuery(String field) : {Query q;}{q=Query(field) <EOF>{return q;}}Query Query(String field) :{  List<BooleanClause> clauses = new ArrayList<BooleanClause>();  Query q, firstQuery=null;  int conj, mods;}{  mods=Modifiers() q=Clause(field)  {    addClause(clauses, CONJ_NONE, mods, q);    if (mods == MOD_NONE)        firstQuery=q;  }  (    conj=Conjunction() mods=Modifiers() q=Clause(field)    { addClause(clauses, conj, mods, q); }  )*    {      if (clauses.size() == 1 && firstQuery != null)        return firstQuery;      else {  return getBooleanQuery(clauses);      }    }}Query Clause(String field) : {  Query q;  Token fieldToken=null;  Token lparen=null;}{  [    LOOKAHEAD(2)    (    fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);}    | <STAR> <COLON> {field="*";}    )  ]  (   (     LOOKAHEAD(3)    (      LOOKAHEAD(3)       q = distanceQueryStart(field)     | q = Term(field)    )   |(<LPAREN>)    {      lbrace+=1;      appendlbrace+=1;    }q=Query(field) (<RPAREN>|<EOF >    {if(lbrace!=rbrace){throw new ParseException("brace does not matches");}else{return q;}})    {appendlbrace=0;}(<CARAT> boost=<NUMBER>)?    )  )    {      if (boost != null) {        float f = (float)1.0;  try {    f = Float.valueOf(boost.image).floatValue();          q.setBoost(f);  } catch (Exception ignored) { }      }      return q;    }}Query rangeTerm(String field):{  Token term,fuzzySlop=null, goop1, goop2;  Query q;}{   (     ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )         [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )         <RANGEIN_END> )       [ <CARAT> boost=<NUMBER> ]        {          if (goop1.kind == RANGEIN_QUOTED) {            goop1.image = goop1.image.substring(1, goop1.image.length()-1);          }          if (goop2.kind == RANGEIN_QUOTED) {            goop2.image = goop2.image.substring(1, goop2.image.length()-1);          }          q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), true);        }     | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )         [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )         <RANGEEX_END> )       [ <CARAT> boost=<NUMBER> ]        {          if (goop1.kind == RANGEEX_QUOTED) {            goop1.image = goop1.image.substring(1, goop1.image.length()-1);          }          if (goop2.kind == RANGEEX_QUOTED) {            goop2.image = goop2.image.substring(1, goop2.image.length()-1);          }          q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), false);        }      )      {          return q;      }}void distanceQuery(String field,String beforeStr):{  Token term, boost=null;  Token slop=null;}{  (    (      slop=< NEAR>    |slop=< WORDFORWARD>    |slop=< WORDNEAR>    |slop=< SPACE>    )    {      disnwcout+=1;      sb.append(" "+slop.image.trim()+" ");      distanceQuery(field,beforeStr);      return;    }    |term = < TERM>    {       diskycout+=1;       sb.append(term.image.trim());       distanceQuery(field,beforeStr);       return;    }|(      <LPAREN>{sb.append(" ( ");lbrace+=1;}     |< RPAREN>{rbrace+=1;sb.append(" ) ");}      )      {distanceQuery(field,beforeStr);return;}    |[<CARAT> boost=<NUMBER >]  {if(diskycout!=0 && disnwcout!=0 && diskycout-disnwcout!=1){  throw new ParseException("the keyword does not matches");}DistanceQueryFormat format = new DistanceQueryFormat();DistanceQueryFormat.parenMatches(lbrace,rbrace);distanceQuery = format.formatPhraseTerm(field+":"+beforeStr+" "+sb.toString().trim());//System.out.println(field+":"+beforeStr+" "+sb.toString().trim());lbrace=0;rbrace=0;diskycout=0;disnwcout=0;sb = new StringBuffer();  }   )  }Query distanceQueryStart(String field):{  Token slop=null;  Token term = null;  Token lparen = new Token();  lparen.image="";}{(     term =< TERM >(      slop=< NEAR>    |slop=< WORDFORWARD>    |slop=< WORDNEAR>    |slop=< SPACE>    )    |lparen=< LPAREN> term=< TERM>    (      slop=< NEAR>    |slop=< WORDFORWARD>    |slop=< WORDNEAR>    |slop=< SPACE>    )    {      lbrace+=1;    })  {    diskycout+=1;    disnwcout+=1;    String lb="";    for(int i=0;i<appendlbrace;i++)    {      lb+=" ( ";    }    appendlbrace=0;distanceQuery(field,lb+lparen.image.trim()+" "+term.image.trim()+" "+slop);        return distanceQuery;  }}Query smpTerm(String field):{  Token term,nwws ,fuzzySlop=null;  boolean prefix = false;  boolean wildcard = false;  boolean fuzzy = false;  Query q;}{  (    (         term=<TERM>       | term=<STAR> { wildcard=true; }       | term=<PREFIXTERM> { prefix=true; }       | term=<WILDTERM> { wildcard=true; }       | term=<NUMBER>     )     [ (fuzzySlop=<FUZZY_SLOP>| < SHIT>) { fuzzy=true; } ]     [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]     {       String termImage=discardEscapeChar(term.image);       if (wildcard) {       q = getWildcardQuery(field, termImage);       } else if (prefix) {         q = getPrefixQuery(field,           discardEscapeChar(term.image.substring          (0, term.image.length()-1)));       } else if (fuzzy) {         float fms = fuzzyMinSim;         try {            fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();         } catch (Exception ignored) { }        if(fms < 0.0f || fms > 1.0f){          throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");        }        q = getFuzzyQuery(field, termImage,fms);       }else {         q = hasNewAPI ? getFieldQuery(field, termImage, false) : getFieldQuery(field, termImage);       }     }  )  {    return q;  }}Query quotedTerm(String field):{  Query q;  Token term,fuzzySlop=null;}{  (     term=<QUOTED>       [ fuzzySlop=<FUZZY_SLOP> ]       [ <CARAT> boost=<NUMBER> ]       {         int s = phraseSlop;         if (fuzzySlop != null) {           try {             s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();           }           catch (Exception ignored) { }         }         q = getFieldQuery(field, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s);       }  )  {    return q;  }}Query smpRange(String field):{   Token term1,term2,boost=null;   Query q;   Token mark;}{  term1=< TERM >  (mark=< GREATER>|mark=< LESS>)  term2=< TERM >  [ <CARAT> boost=<NUMBER> ]  {     if(mark.image.trim().matches("[>]"))     {   q = getRangeQuery(term1.image.trim(),term2.image.trim(),String.valueOf(Long.MAX_VALUE),true);     }else     {        q = getRangeQuery(term1.image.trim(),String.valueOf(0),term2.image.trim(),true);     }    return q;  }}Query Term(String field) : {  Token term,fuzzySlop=null, goop1, goop2;  boolean prefix = false;  boolean wildcard = false;  boolean fuzzy = false;  Query q;}{  (  (       LOOKAHEAD(2)    q = smpRange(field)       |q = smpTerm(field)      )     |q = rangeTerm(field)     |q = quotedTerm(field)  )  {    if (boost != null) {      float f = (float) 1.0;      try {        f = Float.valueOf(boost.image).floatValue();      }      catch (Exception ignored) {    /* Should this be handled somehow? (defaults to "no boost", if     * boost number is invalid)     */      }      // avoid boosting null queries, such as those caused by stop words      if (q != null) {        q.setBoost(f);      }    }    boost = null;    return q;  }  }

原创粉丝点击