/* ***************** *//* Token Definitions *//* ***************** */<*> TOKEN : { <#_NUM_CHAR: ["0"-"9"] > //最好不要定义成全局的否则会左右很多的事情| <#_ESCAPED_CHAR: "\\" ~[] > //经过转以后的任意字符如\\s,\\t //~[]表示包含所有的字符| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", "[", "]","<", ">","\"","#","%","'", "{", "}","~", "*", "?","\\" ,"=",","] | <_ESCAPED_CHAR> ) >| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >}<DEFAULT, RangeIn, RangeEx> SKIP : { < <_WHITESPACE>>}<DEFAULT> TOKEN : { <AND: ([ "a", "A" ] [ "n", "N" ] [ "D", "d" ]) >| <OR: ([ "o", "O" ] [ "r", "R" ] | ",") >| <NOT: ([ "N", "n" ] [ "o", "O" ] [ "t", "T" ]) >| <NEAR: (["N","n"]["e","E"]["A","a"]["r","R"])> //注意在全局定义的收不要定义([A-Z]|[a-z])*这样的的定义这样也会造成局限性| <WORDFORWARD: (["W","w"]["f","F"](< _NUM_CHAR>)*)>| <WORDNEAR: (["W","w"](< _NUM_CHAR>)*) >| <SPACE:(["S","s"]["p","P"]["A","a"]["C","c"]["e","E"]) >| <GREATER: ">" >| <LESS: "<" >| <SMALLDOT: "'" >| <PLUS: "+" >| <MINUS: "-" >| <LPAREN: ("(") >| <RPAREN: (")") >| <COLON: "=" >| <SHIT: ["~","#","%"] >| <STAR: "*" >| <CARAT: "^" > : Boost| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >| <FUZZY_SLOP: < SHIT> ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >| <RANGEIN_START: ("[") > : RangeIn| <RANGEEX_START: "{" > : RangeEx}<Boost> TOKEN : {<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT}<RangeIn> TOKEN : {<RANGEIN_TO: ((["t","T"]["O","o"])|< MINUS>|< SHIT>)>| <RANGEIN_END: ("]")> : DEFAULT| <RANGEIN_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">| <RANGEIN_GOOP: (~["-","~"," ", "]" ])+ >}<RangeEx> TOKEN : {<RANGEEX_TO: < RANGEIN_TO>> //有些定义是可以重复使用的如<RANGE_TO>| <RANGEEX_END: "}"> : DEFAULT| <RANGEEX_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">| <RANGEEX_GOOP: (~["-","~"," ", "}" ])+ >}// * Query ::= ( Clause )*// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )int Conjunction() : { int ret = CONJ_NONE;}{ [ <AND> { ret = CONJ_AND; } | <OR> { ret = CONJ_OR; } ] { return ret; }}int Modifiers() : { int ret = MOD_NONE;}{ [ <PLUS> { ret = MOD_REQ; } | <MINUS> { ret = MOD_NOT; } | <NOT> { ret = MOD_NOT; } ] { return ret; }}// This makes sure that there is no garbage after the query stringQuery TopLevelQuery(String field) : {Query q;}{q=Query(field) <EOF>{return q;}}Query Query(String field) :{ List<BooleanClause> clauses = new ArrayList<BooleanClause>(); Query q, firstQuery=null; int conj, mods;}{ mods=Modifiers() q=Clause(field) { addClause(clauses, CONJ_NONE, mods, q); if (mods == MOD_NONE) firstQuery=q; } ( conj=Conjunction() mods=Modifiers() q=Clause(field) { addClause(clauses, conj, mods, q); } )* { if (clauses.size() == 1 && firstQuery != null) return firstQuery; else { return getBooleanQuery(clauses); } }}Query Clause(String field) : { Query q; Token fieldToken=null; Token lparen=null;}{ [ LOOKAHEAD(2) ( fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);} | <STAR> <COLON> {field="*";} ) ] ( ( LOOKAHEAD(3) ( LOOKAHEAD(3) q = distanceQueryStart(field) | q = Term(field) ) |(<LPAREN>) { lbrace+=1; appendlbrace+=1; }q=Query(field) (<RPAREN>|<EOF > {if(lbrace!=rbrace){throw new ParseException("brace does not matches");}else{return q;}}) {appendlbrace=0;}(<CARAT> boost=<NUMBER>)? ) ) { if (boost != null) { float f = (float)1.0; try { f = Float.valueOf(boost.image).floatValue(); q.setBoost(f); } catch (Exception ignored) { } } return q; }}Query rangeTerm(String field):{ Token term,fuzzySlop=null, goop1, goop2; Query q;}{ ( ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> ) [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> ) <RANGEIN_END> ) [ <CARAT> boost=<NUMBER> ] { if (goop1.kind == RANGEIN_QUOTED) { goop1.image = goop1.image.substring(1, goop1.image.length()-1); } if (goop2.kind == RANGEIN_QUOTED) { goop2.image = goop2.image.substring(1, goop2.image.length()-1); } q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), true); } | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> ) [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> ) <RANGEEX_END> ) [ <CARAT> boost=<NUMBER> ] { if (goop1.kind == RANGEEX_QUOTED) { goop1.image = goop1.image.substring(1, goop1.image.length()-1); } if (goop2.kind == RANGEEX_QUOTED) { goop2.image = goop2.image.substring(1, goop2.image.length()-1); } q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), false); } ) { return q; }}void distanceQuery(String field,String beforeStr):{ Token term, boost=null; Token slop=null;}{ ( ( slop=< NEAR> |slop=< WORDFORWARD> |slop=< WORDNEAR> |slop=< SPACE> ) { disnwcout+=1; sb.append(" "+slop.image.trim()+" "); distanceQuery(field,beforeStr); return; } |term = < TERM> { diskycout+=1; sb.append(term.image.trim()); distanceQuery(field,beforeStr); return; }|( <LPAREN>{sb.append(" ( ");lbrace+=1;} |< RPAREN>{rbrace+=1;sb.append(" ) ");} ) {distanceQuery(field,beforeStr);return;} |[<CARAT> boost=<NUMBER >] {if(diskycout!=0 && disnwcout!=0 && diskycout-disnwcout!=1){ throw new ParseException("the keyword does not matches");}DistanceQueryFormat format = new DistanceQueryFormat();DistanceQueryFormat.parenMatches(lbrace,rbrace);distanceQuery = format.formatPhraseTerm(field+":"+beforeStr+" "+sb.toString().trim());//System.out.println(field+":"+beforeStr+" "+sb.toString().trim());lbrace=0;rbrace=0;diskycout=0;disnwcout=0;sb = new StringBuffer(); } ) }Query distanceQueryStart(String field):{ Token slop=null; Token term = null; Token lparen = new Token(); lparen.image="";}{( term =< TERM >( slop=< NEAR> |slop=< WORDFORWARD> |slop=< WORDNEAR> |slop=< SPACE> ) |lparen=< LPAREN> term=< TERM> ( slop=< NEAR> |slop=< WORDFORWARD> |slop=< WORDNEAR> |slop=< SPACE> ) { lbrace+=1; }) { diskycout+=1; disnwcout+=1; String lb=""; for(int i=0;i<appendlbrace;i++) { lb+=" ( "; } appendlbrace=0;distanceQuery(field,lb+lparen.image.trim()+" "+term.image.trim()+" "+slop); return distanceQuery; }}Query smpTerm(String field):{ Token term,nwws ,fuzzySlop=null; boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; Query q;}{ ( ( term=<TERM> | term=<STAR> { wildcard=true; } | term=<PREFIXTERM> { prefix=true; } | term=<WILDTERM> { wildcard=true; } | term=<NUMBER> ) [ (fuzzySlop=<FUZZY_SLOP>| < SHIT>) { fuzzy=true; } ] [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ] { String termImage=discardEscapeChar(term.image); if (wildcard) { q = getWildcardQuery(field, termImage); } else if (prefix) { q = getPrefixQuery(field, discardEscapeChar(term.image.substring (0, term.image.length()-1))); } else if (fuzzy) { float fms = fuzzyMinSim; try { fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); } catch (Exception ignored) { } if(fms < 0.0f || fms > 1.0f){ throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); } q = getFuzzyQuery(field, termImage,fms); }else { q = hasNewAPI ? getFieldQuery(field, termImage, false) : getFieldQuery(field, termImage); } } ) { return q; }}Query quotedTerm(String field):{ Query q; Token term,fuzzySlop=null;}{ ( term=<QUOTED> [ fuzzySlop=<FUZZY_SLOP> ] [ <CARAT> boost=<NUMBER> ] { int s = phraseSlop; if (fuzzySlop != null) { try { s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); } catch (Exception ignored) { } } q = getFieldQuery(field, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s); } ) { return q; }}Query smpRange(String field):{ Token term1,term2,boost=null; Query q; Token mark;}{ term1=< TERM > (mark=< GREATER>|mark=< LESS>) term2=< TERM > [ <CARAT> boost=<NUMBER> ] { if(mark.image.trim().matches("[>]")) { q = getRangeQuery(term1.image.trim(),term2.image.trim(),String.valueOf(Long.MAX_VALUE),true); }else { q = getRangeQuery(term1.image.trim(),String.valueOf(0),term2.image.trim(),true); } return q; }}Query Term(String field) : { Token term,fuzzySlop=null, goop1, goop2; boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; Query q;}{ ( ( LOOKAHEAD(2) q = smpRange(field) |q = smpTerm(field) ) |q = rangeTerm(field) |q = quotedTerm(field) ) { if (boost != null) { float f = (float) 1.0; try { f = Float.valueOf(boost.image).floatValue(); } catch (Exception ignored) { /* Should this be handled somehow? (defaults to "no boost", if * boost number is invalid) */ } // avoid boosting null queries, such as those caused by stop words if (q != null) { q.setBoost(f); } } boost = null; return q; } }