自定义语言分析器写法示例 .jjt
来源:互联网 发布:c语言编写99乘法表三角 编辑:程序博客网 时间:2024/06/03 09:26
/** * JJTree template file created by SF JavaCC plugin 1.5.17+ wizard for JavaCC 1.5.0+ */options{ JDK_VERSION = "1.5"; MULTI = false; VISITOR = false; NODE_DEFAULT_VOID = false; STATIC = true; USER_CHAR_STREAM = false; UNICODE_INPUT = true; JAVA_UNICODE_ESCAPE = true;}PARSER_BEGIN(SimpleQueryParser)package com.zyb.query;import java.io.IOException;import java.io.StringReader;import java.text.Collator;import java.text.DateFormat;import java.util.ArrayList;import java.util.Calendar;import java.util.Date;import java.util.HashMap;import java.util.List;import java.util.Locale;import java.util.Map;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.CachingTokenFilter;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.document.DateField;import org.apache.lucene.document.DateTools;import org.apache.lucene.index.Term;import org.apache.lucene.search.BooleanClause;import org.apache.lucene.search.BooleanQuery;import org.apache.lucene.search.FuzzyQuery;import org.apache.lucene.search.MultiTermQuery;import org.apache.lucene.search.MatchAllDocsQuery;import org.apache.lucene.search.MultiPhraseQuery;import org.apache.lucene.search.PhraseQuery;import org.apache.lucene.search.PrefixQuery;import org.apache.lucene.search.Query;import org.apache.lucene.search.TermRangeQuery;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.WildcardQuery;import org.apache.lucene.util.Version;import org.apache.lucene.util.VirtualMethod;import java.io.StringReader;public class SimpleQueryParser{ public static String fname = ""; public static StringBuffer sb = new StringBuffer(); public Query parse(String query) { SimpleQueryParser.ReInit(new StringReader(query)); return null; } public static void main(String args []) { try { SimpleQueryParser parser = new SimpleQueryParser(new StringReader("(cdate=[2012-2011])")); SimpleNode root = parser.start(); root.dump(""); System.out.println(parser.sb.toString()); } catch (Exception e) { e.printStackTrace(); } }}PARSER_END(SimpleQueryParser)SKIP :{ " "| "\t"| "\n"| "\r"| < "//" (~[ "\n", "\r" ])* ( "\n" | "\r" | "\r\n" ) >| < "/*" (~[ "*" ])* "*" ( ~[ "/" ] (~[ "*" ])* "*" )* "/" >}<* >TOKEN :{ < #_NUM_CHAR : [ "0"-"9" ] >| < #_ESCAPED_CHAR : "\\" ~[ ] > /**进行转义之后的字符 */| < #_TERM_START_CHAR : ( ~[ " ", "\t", "\n", "\r", "!", "(", ")", ":", "^", "[", "]", "\"", "{", "}", "~", "*", "?", "\\", "=" ] | < _ESCAPED_CHAR > ) > /** 查询项的开头的字符除了中括号中的这些字符允许任何其他字符或者是斜杠后面通过转义后的任意字符*/| < #_TERM_CHAR : ( < _TERM_START_CHAR > | < _ESCAPED_CHAR > ) > /** 查询项的字符包括 * 查询项开头包括的字符 * 和任意被转义的字符 */ /** * 空格字符包括 */| < #_WHITESPACE : ( " " | "\t" | "\n" | "\r" ) > /** * 被引号引起来的字符除了"和\之外不能被包含之外可以在引号中使用任意字符 * 或者可以使用任意被转义过的字符 */| < #_QUOTED_CHAR : ( ~[ "\"", "\\" ] | < _ESCAPED_CHAR > ) > /** * 范围字符包括以下可以使用的字符集 */| < #_RANGETO : "to" | "TO" | "To" | "tO" | "-" | "~" >| < #_letter : ([ "a"-"z", "A"-"Z" ])+ ([ "0"-"9" ])* >| < #DIGITS : ([ "0"-"9" ])+ > /** *如何表示上万的数进行简写 *如:9w或者可以使9W */| < #_WILDSORT : (" ")* "(" ([ "0"-"9" ])* ([ "W", "w" ]) ")" (" ")* > /** *数字位数范围表示从某个位开始进行一个区间匹配 * 999n */| < #_WILD : (" ")* "(" ([ "0"-"9" ])* ([ "N", "n" ]) ")" (" ")* > /** *4位数字的日期 */| < #_DATEY : ([ "0"-"9" ]){4}> /** *6位数字的日期 */| < #_DATEYM : ([ "0"-"9" ]){6}> /** *8位数字的日期 */| < #_DATEYMD : ([ "0"-"9" ]){8}> /** *日期可以使这三种形式 */| < #_DATE :(< _DATEY >| < _DATEYM >| < _DATEYMD >) >}TOKEN :{ < OR : "or" | "OR" >| < AND : "and" | "AND" >| < NOT : "not" | "NOT" >| < OPEN_PAR : "(" >| < CLOSE_PAR : ")" >| < OPEN_BET : "[" >| < CLOSE_BET : "]" >| < OPEN_BCE : "{" >| < CLOSE_BCE : "}" >}/** *双引号中的字符表示 *除了双引号自己外可以使其他任意转义字符或者不要转义字符 */TOKEN :{ < QUOTED : "\"" (< _QUOTED_CHAR >)+ "\"" >}TOKEN :{ < WORD : ( (< _TERM_START_CHAR >)+ (< _TERM_CHAR >)* ) >}/** * 后缀通用匹配查询如: * name:我* */TOKEN :{ < PREFIXTERM : (< _TERM_START_CHAR >)+ "*" >}TOKEN :{ < DATERANGETERM : "[" (" ")* (< _DATE >) (" ")* < _RANGETO > (" ")* < _DATE > (" ")* "]" >}TOKEN :{ < DATETERM : "[" (" ")* (< _DATE >) (" ")* "]" >}/** *查询域的申明部分 */TOKEN :{ < LETTER : < _letter > (" ")* ("=") >}TOKEN :{ < EQUED : "=" >}TOKEN :{ < RANGESORT : ( ( (< _TERM_START_CHAR >)+ ) | ( "\"" (< _QUOTED_CHAR >)+ "\"" ) ) /** 可以使查询项开始的字符也可以是包在双引号中的字符*/ (< _WILDSORT >) /** 有个单位*/ ( ( (< _TERM_START_CHAR >)+ ) | ( "\"" (< _QUOTED_CHAR >)+ "\"" ) ) >}TOKEN :{ < RANGE : ( ( (< _TERM_START_CHAR >)+ ) | ( "\"" (< _QUOTED_CHAR >)+ "\"" ) ) (< _WILD >) ( ( (< _TERM_START_CHAR >)+ ) | ( "\"" (< _QUOTED_CHAR >)+ "\"" ) ) >}TOKEN :{ < WILDTERM : ( < _TERM_START_CHAR > | [ "*", "?" ] ) ( < _TERM_CHAR > | ([ "*", "?" ]) )* >}SimpleNode start() :{}{ expression() { return jjtThis;/** 返回当前节点的引用*/ }}void expression() :{}{ find()}void find() :{}{ { sb.append(" ( "); } part() /** 第一个元素是不确定的所以首先应该定位到第一个元素是什么*/ ( ( < AND > { sb.append(" and "); } | < OR > { sb.append(" or "); } )? // 循环被执行的条件表达式只有满足了这个表达式后才能执行循环 // 表示< AND>或者<OR >出现一次以上才能执行循环不然就直接执行下一段语句 part() )* { sb.append(" ) "); }}// 定义的语言包含的所有元素void part() :{}{ ( termPrefix() | not() | termType() | ( { //一种以(开头的特殊形式 sb.append(" ( "); } < OPEN_PAR > find() // 从头开始递归 < CLOSE_PAR > { sb.append(" ) "); } ) )}// 找到查询字段域名字以及等号(=)void termPrefix() :{ Token t = null;}{ t = < LETTER > { String termPrefix = t.image.toString(); System.out.println(termPrefix); fname = termPrefix.split("=") [ 0 ].trim(); } part() /** 递归查找下一个元素*/}//取反void not() :{ Token t = null;}{ t = < NOT > { sb.append(" not( "); } part()// 继续递归查找下一个元素 { sb.append(" ) "); }}//所有的term类型//列举出同一种元素中可会能出现的情况void termType() :{}{ ( prefixTerm() | dateRangeTerm() | wildTerm() )}// 针对term元素的一种情况的处理void prefixTerm() :{ Token t = null;}{ t = < PREFIXTERM > { sb.append(fname + "=" + t.image); }}void dateRangeTerm() :{ Token t = null;}{ t = < DATERANGETERM > { sb.append(fname + "=" + t.image); }}void wildTerm() :{ Token t = null;}{ t = < WILDTERM > { sb.append(fname + "=" + t.image); }}