使用flex和bison创建过滤器

来源:互联网 发布:linux查看目录权限 编辑:程序博客网 时间:2024/05/18 01:06

假设我们有一个类具有一些属性。这个类有很多实例。给定一个过滤的规则,挑选符合规则的实例。

假设规则使用类似C语言的语句进行描述。

例子:

属性列表

width     int

height    int

fps         float

os           string

一种可能的过滤器:

width > 640 || height < 450


1. 构造词法器

filter.l

%{#include <stdio.h>#include <stdlib.h>// override inputextern int readInputForLexer(char* buffer, size_t *numBytesRead, int maxBytesToRead);#undef YY_INPUT#define YY_INPUT(b,r,s) readInputForLexer(b, &r, s)#include "filter.tab.h"void yyerror(char *); // need declare this func// yytext: current match, string// modify yylval (of type YYSTYPE; optional) and return component type%}%%[\t\n ]                 ;[0-9]+                { yylval.iVal = atoi(yytext); return INTEGER; }[0-9]*\.[0-9]+          { yylval.fVal = atof(yytext); return FLOAT;   }[a-zA-Z/][a-zA-Z/0-9]*  { yylval.sVal = strdup(yytext); return ID; }[\+\-\*\/\%]            { return *yytext; }==                      { return EQ; }\>                      { return GT; }\<                      { return LT; }\>=                     { return GE; }\<=                     { return LE; }!=                      { return NEQ; }\|\|                    { return OR; }\&\&                    { return AND; }\(                      { return '('; }\)                      { return ')'; }\"(\\.|[^\\"])*\"       { yylval.sVal = strdup(yytext); return STRING; }.                       { yyerror("Bad character"); }%%// https://www.ibm.com/developerworks/cn/linux/sdk/lex/// used for multiple source files. here simply return 1 to// stop parse at EOFint yywrap(void){    return 1;}

2. 创建yacc文件

%{/* Created by Zhenyong Chen, 2017/07/30 * * This demo is going to create a tree structured filter. *     A has a list of properties *     There's a set of rules * The filter will check whether A is qualified * */#include <stdlib.h>#include <stdio.h>#include <string.h>#include <memory.h>#include "syntax.h"#undef YYLEX_PARAMextern int yylex();void yyerror(char *);static Operand *_outputTree = NULL;%}// http://www.gnu.org/software/bison/manual/html_node/Token-Decl.html// override default YYSTYPE (YYSTYPE yylval in lex file)%union {    float fVal;    int   iVal;    char *sVal;    struct Operand *synNode;}%token <iVal> INTEGER "int"%token <fVal> FLOAT "float"%token <sVal> ID%token EQ  "=="%token NEQ "!="%token GT  ">"%token LT  "<"%token GE  ">="%token LE  "<="%token OR  "||"%token AND "&&"%token <sVal> STRING%left '+' '-'%left '*' '/' '%'// tell parser the nonterminal node type%type<synNode> program%type<synNode> expr%type<synNode> AndExpr%type<synNode> OrExpr%%// $$, $1, ... are of type YYSTYPE (http://pubs.opengroup.org/onlinepubs/7908799/xcu/yacc.html)program:    OrExpr                  { $$ = $1; _outputTree = $$;}    |                       { $$ = NULL; _outputTree = $$; } // could be empty program    ;expr:    ID "==" STRING          { $$ = Operand::CreateNonTermNode(Operand::OP_EQ,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); free($3); }    | ID "!=" STRING        { $$ = Operand::CreateNonTermNode(Operand::OP_NE,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); free($3); }    | ID "==" INTEGER       { $$ = Operand::CreateNonTermNode(Operand::OP_EQ,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); }    | ID "!=" INTEGER       { $$ = Operand::CreateNonTermNode(Operand::OP_NE,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); }    | ID "==" FLOAT         { $$ = Operand::CreateNonTermNode(Operand::OP_EQ,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); }    | ID "!=" FLOAT         { $$ = Operand::CreateNonTermNode(Operand::OP_NE,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); }    | ID ">" INTEGER        { $$ = Operand::CreateNonTermNode(Operand::OP_GT,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); }    | ID ">" FLOAT          { $$ = Operand::CreateNonTermNode(Operand::OP_GT,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); }    | ID "<" INTEGER        { $$ = Operand::CreateNonTermNode(Operand::OP_LT,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); }    | ID "<" FLOAT          { $$ = Operand::CreateNonTermNode(Operand::OP_LT,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); }    | ID ">=" INTEGER       { $$ = Operand::CreateNonTermNode(Operand::OP_GE,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); }    | ID ">=" FLOAT         { $$ = Operand::CreateNonTermNode(Operand::OP_GE,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); }    | ID "<=" INTEGER       { $$ = Operand::CreateNonTermNode(Operand::OP_LE,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); }    | ID "<=" FLOAT         { $$ = Operand::CreateNonTermNode(Operand::OP_LE,                                        Operand::CreateVarNode($1),                                        Operand::CreateValueNode($3)); free($1); }    | '(' OrExpr ')'        { $$ = $2; }    ;AndExpr:    expr                    { $$ = $1; }    | AndExpr AND expr      { $$ = Operand::CreateNonTermNode(Operand::OP_AND, $1, $3); }    ;OrExpr:    AndExpr                 { $$ = $1; }    | OrExpr "||" AndExpr   { $$ = Operand::CreateNonTermNode(Operand::OP_OR, $1, $3); }    ;%%void yyerror(char *s){    printf("Compile error: %s\n", s);}static char *_buf = NULL;static int _readOffset = 0;int readInputForLexer(char* buffer, size_t *numBytesRead, int maxBytesToRead){    int numBytesToRead = maxBytesToRead;    int bytesRemaining = (int)strlen(_buf) - _readOffset;    if(bytesRemaining == 0) {        *numBytesRead = 0;        return -1;    }    if (numBytesToRead > bytesRemaining)    {        numBytesToRead = bytesRemaining;    }    memcpy(buffer, _buf+_readOffset, numBytesToRead);    *numBytesRead = numBytesToRead;    _readOffset += numBytesToRead;    return 0;}Operand *parseProgram(const char *text){    if(text == NULL)        return NULL;    _buf = (char *)malloc(strlen(text) + 64);    strcpy(_buf, text);    _readOffset = 0;    _outputTree = NULL;    yyparse();    free(_buf);    _buf = NULL;    Operand *ret = _outputTree;    _outputTree = NULL;    return ret;}

3. 语法树

syntax.h

#if !defined(__SYNTAX_H__)#define __SYNTAX_H__#include <string>class Operand {public:    enum OperatorType {        OP_AND = 1,        OP_OR,        OP_EQ,        OP_NE,        OP_GT,        OP_LT,        OP_GE,        OP_LE,    };        enum OperandType {        OP_NONTERM = 0,        OP_INT,        OP_FLOAT,        OP_STRING,        OP_VAR,    };    enum OperandType eType;        // if OP_NONTERM    enum OperatorType eOperator;    Operand *aOperands[2];    // if OP_VAR    std::string varName;        // otherwise, values    int iVal;    float fVal;    std::string sVal;        // create a leaf node: int/float/string    static Operand *CreateValueNode(int ival);    static Operand *CreateValueNode(float fval);    static Operand *CreateValueNode(const char *sval);    static Operand *CreateVarNode(const char *name);    // create a non-terminal node    static Operand *CreateNonTermNode(enum OperatorType opType, Operand *operand1 = NULL, Operand *operand2 = NULL);    virtual ~Operand();        void Dump(int margin);private:    Operand();};#endif /* __SYNTAX_H__ */

syntax.cpp

#include "syntax.h"Operand::Operand(){    eType = OP_INT;    iVal = 0;}Operand::~Operand(){    if(eType == OP_NONTERM) {        for(int i=0; i<sizeof(aOperands)/sizeof(aOperands[0]); i++) {            if(aOperands[i] != NULL) {                delete aOperands[i];                aOperands[i] = NULL;            }        }    }}Operand *Operand::CreateValueNode(int ival){    Operand *op = new Operand();    op->eType = OP_INT;    op->iVal = ival;    return op;}Operand *Operand::CreateValueNode(float fval){    Operand *op = new Operand();    op->eType = OP_FLOAT;    op->fVal = fval;        return op;}Operand *Operand::CreateValueNode(const char *sval){    Operand *op = new Operand();    op->eType = OP_STRING;    op->sVal = sval;    return op;}Operand *Operand::CreateVarNode(const char *name){    Operand *op = new Operand();    op->eType = OP_VAR;    op->varName = name;        return op;}Operand *Operand::CreateNonTermNode(enum OperatorType opType,                                    Operand *operand1/* = NULL*/,                                    Operand *operand2/* = NULL*/){    Operand *op = new Operand();    op->eType = OP_NONTERM;    op->eOperator = opType;    op->aOperands[0] = operand1;    op->aOperands[1] = operand2;        return op;}static void print_margin(int margin){    while(margin > 0) {        printf(" ");        margin--;    }}const char *operatorTypeToString(enum Operand::OperatorType t){    switch(t) {    case Operand::OP_AND:        return "&&";    case Operand::OP_OR:        return "||";    case Operand::OP_EQ:        return "==";    case Operand::OP_NE:        return "!=";    case Operand::OP_GT:        return ">";    case Operand::OP_LT:        return "<";    case Operand::OP_GE:        return ">=";    case Operand::OP_LE:        return "<=";    default:        return "ERROR";    }}void Operand::Dump(int margin){    if(eType == OP_NONTERM) {        print_margin(margin);        // print operator        printf("%s {\n", operatorTypeToString(eOperator));        for(int i=0; i<sizeof(aOperands)/sizeof(aOperands[0]); i++) {            if(aOperands[i] != NULL) {                aOperands[i]->Dump(margin + 4);            }        }        print_margin(margin);        printf("}\n");    }    else    {        print_margin(margin);        switch(eType) {        case OP_INT:            printf("%d\n", iVal);            break;        case OP_FLOAT:            printf("%f\n", fVal);            break;        case OP_STRING:            printf("%s\n", sVal.c_str());            break;        case OP_VAR:            printf("%s\n", varName.c_str());            break;        default:            break;        }    }}

4. 使用方法

main.cpp

#include <stdio.h>#include "syntax.h"#include <string>#include <list>extern Operand *parseProgram(const char *text);// evaluate synstax treestruct compoundValue {    int type; // 0: void; 1: int; 2: float; 3: std::string; 4: bool        int iVal;    float fVal;    std::string sVal;    bool bVal;};void printValue(struct compoundValue &v){    if(v.type == 1) {        printf("(int)%d\n", v.iVal);    }    else if(v.type == 2) {        printf("(float)%f\n", v.fVal);    }    else if(v.type == 3) {        printf("(string)%s\n", v.sVal.c_str());    }    else if(v.type == 4) {        printf("(bool)%s\n", v.bVal ? "true" : "false");    }    else {        // should not reach here    }}// define several variablesstruct variable {    std::string name;    struct compoundValue value;};std::list<struct variable> gVariableList;void initVarList(){    struct variable v;    v.name = "width";    v.value.type = 1; // int    v.value.iVal = 640;    gVariableList.push_back(v);        v.name = "height";    v.value.type = 1; // int    v.value.iVal = 360;    gVariableList.push_back(v);        v.name = "fps";    v.value.type = 2; // float    v.value.fVal = 25.0;    gVariableList.push_back(v);    v.name = "os";    v.value.type = 3; // std::string    v.value.sVal = "Android";    gVariableList.push_back(v);    v.name = "enabled";    v.value.type = 4; // bool    v.value.bVal = true;    gVariableList.push_back(v);}int get_variable_type(std::string varName){    std::list<struct variable>::iterator it;    for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {        struct variable &v = *it;        if(v.name.compare(varName) == 0) {            return v.value.type;        }    }    return -1; // should not reach here}int get_variable_value_i(std::string varName){    std::list<struct variable>::iterator it;    for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {        struct variable &v = *it;        if(v.name.compare(varName) == 0) {            return v.value.iVal;        }    }    return 0; // should not reach here}float get_variable_value_f(std::string varName){    std::list<struct variable>::iterator it;    for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {        struct variable &v = *it;        if(v.name.compare(varName) == 0) {            return v.value.fVal;        }    }        return 0.0f; // should not reach here}std::string get_variable_value_s(std::string varName){    std::list<struct variable>::iterator it;    for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {        struct variable &v = *it;        if(v.name.compare(varName) == 0) {            return v.value.sVal;        }    }        return ""; // should not reach here}bool get_variable_value_b(std::string varName){    std::list<struct variable>::iterator it;    for(it=gVariableList.begin(); it!=gVariableList.end(); it++) {        struct variable &v = *it;        if(v.name.compare(varName) == 0) {            return v.value.bVal;        }    }        return false; // should not reach here}struct compoundValue eval_syntax(Operand &op){    struct compoundValue v;    // traverse the tree    if(op.eType == Operand::OP_INT) {        v.type = 1;        v.iVal = op.iVal;    }    else if(op.eType == Operand::OP_FLOAT) {        v.type = 2;        v.fVal = op.fVal;    }    else if(op.eType == Operand::OP_STRING) {        v.type = 3;        v.sVal = op.sVal;    }    else if(op.eType == Operand::OP_VAR) {        v.type = get_variable_type(op.varName);        if(v.type == 1) {            v.iVal = get_variable_value_i(op.varName);        }        else if(v.type == 2) {            v.fVal = get_variable_value_f(op.varName);        }        else if(v.type == 3) {            v.sVal = get_variable_value_s(op.varName);        }        else if(v.type == 4) {            v.bVal = get_variable_value_b(op.varName);        }    }    else if(op.eType == Operand::OP_NONTERM) {        if(op.eOperator == Operand::OP_AND) {            struct compoundValue left, right;            left = eval_syntax(*op.aOperands[0]);            right = eval_syntax(*op.aOperands[1]);            v.type = 4;            v.bVal = left.bVal && right.bVal;        }        else if(op.eOperator == Operand::OP_OR) {            struct compoundValue left, right;            left = eval_syntax(*op.aOperands[0]);            v.type = 4;            if(left.bVal) {                v.bVal = true;            }            else {                right = eval_syntax(*op.aOperands[1]);                v.bVal = right.bVal;            }        }        else if(op.eOperator == Operand::OP_EQ) {            struct compoundValue left, right;            left = eval_syntax(*op.aOperands[0]);            right = eval_syntax(*op.aOperands[1]);            v.type = 4;            if(left.type == 1)                v.bVal = (left.iVal == right.iVal);            else if(left.type == 2)                v.bVal = (left.fVal == right.fVal);            else if(left.type == 3)                v.bVal = left.sVal.compare(right.sVal) == 0;            else if(left.type == 4)                v.bVal = (left.bVal == right.bVal);        }        else if(op.eOperator == Operand::OP_NE) {            struct compoundValue left, right;            left = eval_syntax(*op.aOperands[0]);            right = eval_syntax(*op.aOperands[1]);            v.type = 4;            if(left.type == 1)                v.bVal = (left.iVal != right.iVal);            else if(left.type == 2)                v.bVal = (left.fVal != right.fVal);            else if(left.type == 3)                v.bVal = left.sVal.compare(right.sVal) != 0;            else if(left.type == 4)                v.bVal = (left.bVal != right.bVal);        }        else if(op.eOperator == Operand::OP_GT) {            struct compoundValue left, right;            left = eval_syntax(*op.aOperands[0]);            right = eval_syntax(*op.aOperands[1]);            v.type = 4;            if(left.type == 1)                v.bVal = (left.iVal > right.iVal);            else if(left.type == 2)                v.bVal = (left.fVal > right.fVal);        }        else if(op.eOperator == Operand::OP_LT) {            struct compoundValue left, right;            left = eval_syntax(*op.aOperands[0]);            right = eval_syntax(*op.aOperands[1]);            v.type = 4;            if(left.type == 1)                v.bVal = (left.iVal < right.iVal);            else if(left.type == 2)                v.bVal = (left.fVal < right.fVal);        }        else if(op.eOperator == Operand::OP_GE) {            struct compoundValue left, right;            left = eval_syntax(*op.aOperands[0]);            right = eval_syntax(*op.aOperands[1]);            v.type = 4;            if(left.type == 1)                v.bVal = (left.iVal >= right.iVal);            else if(left.type == 2)                v.bVal = (left.fVal >= right.fVal);        }        else if(op.eOperator == Operand::OP_LE) {            struct compoundValue left, right;            left = eval_syntax(*op.aOperands[0]);            right = eval_syntax(*op.aOperands[1]);            v.type = 4;            if(left.type == 1)                v.bVal = (left.iVal <= right.iVal);            else if(left.type == 2)                v.bVal = (left.fVal <= right.fVal);        }    }        return v;}int main(int argc, const char **agrv){    initVarList();    const char *s = "width > 640 || height < 450";    printf("Input: \n%s\n", s);    Operand *p = parseProgram(s);    if(p != NULL) {        p->Dump(0);        struct compoundValue v = eval_syntax(*p);        delete p;        printf("Evaluated:\n");        printValue(v);    }    return 0;}

6. 编译


#!/bin/bashbison -d filter.y && flex filter.l || exit 1g++ -o filter *.c *.cppexit 0


输出结果:

Input: 
width > 640 || height < 450
|| {
    > {
        width
        640
    }
    < {
        height
        450
    }
}
Evaluated:
(bool)true