随想录(c编译器的实现)

来源:互联网 发布:知柏地黄丸 多少钱 编辑:程序博客网 时间:2024/05/19 03:42

【 声明:版权所有,欢迎转载,请勿用于商业用途。 联系信箱:feixiaoxing @163.com】


    无意中在github上发现一个很有意思的项目,比如这里https://github.com/hukun01/CMMInterpreter。它利用flex实现了字符串的识别,利用bison实现了ast语法树的构建,最后直接利用ast进行计算和识别。


    比如,它的lex文件是这么写的,

/***********************************//* File: cmm.l    version 2.0      *//* Flex version for CMM            *//* CMM Interpreter Construction    *//***********************************/%option noyywrap%{#include "globals.h"#include "util.h"#include "scan.h"#include "cmm.tab.h"%}digit[0-9]int_num{digit}+real_num{digit}+"."{digit}*letter[a-zA-Z]identifier{letter}+({letter}|{digit}|_)*({letter}+|{digit}+)|{letter}+whitespace[ \t]*%%"if"{ return IF;}"else"{ return ELSE;}"while"{ return WHILE;}"int"{ yylval.dataType = Int; return INT;} "real"{ yylval.dataType = Real; return REAL;}"bool"{ yylval.dataType = Bool; return BOOL;}"read"{ return READ;}"write"{ return WRITE;}"main"{ return MAIN;}{int_num}{ yylval.intval = atoi(yytext); return INT_VALUE;}{real_num}{ yylval.realval = atof(yytext); return REAL_VALUE;}{identifier}{ yylval.idName = copyString(yytext); return ID;}{whitespace}{/*Do nothing.*/}"+"{ return PLUS;}"-"{ return SUB;}"*"{ return MUL;}"/"{ return DIV;}"<"{ yylval.binaryOperator = 1; return REL_OP;}">"{ yylval.binaryOperator = 2; return REL_OP;}"<="{ yylval.binaryOperator = 3; return REL_OP;}">="{ yylval.binaryOperator = 4; return REL_OP;}"=="{ yylval.binaryOperator = 5; return REL_OP;}"<>"{ yylval.binaryOperator = 6; return REL_OP;}"&&"{ yylval.binaryOperator = 7; return REL_OP;}"||"{ yylval.binaryOperator = 8; return REL_OP;}"="{ return ASSIGN;}"("{ return LPAREN;}")"{ return RPAREN;}";"{ return SEMI;}"{"{ return LBRACE;}"}"{ return RBRACE;}"["{ return LBRACKET;}"]"{ return RBRACKET;}","{ return COMMA;}"//"{ char c = input();  while(c != '\n')  {if (c == EOF) break;  c = input();  }  lineno++;}"/*"{char c;int flag = 1;do{c = input();entry1:if (c == EOF) break;if (c == '\n') lineno++;if (c == '*'){c = input();if (c == '/')flag = 0;elsegoto entry1;}} while (flag);}\n{lineno++;}.{yyerror("Mystery character %s\n", yytext);return ERROR;}%%/* 用于语法分析时初始化词法分析接口 */void iniLexer(void){static int firstTime = TRUE;lineno = 0;if (firstTime){firstTime = FALSE;lineno++;yyin = source;yyout = listing;}}/* 词法分析器专用 TokenType getToken(void){static int firstTime = TRUE;TokenType currentToken;if (firstTime){firstTime = FALSE;lineno++;yyin = source;yyout = listing;}currentToken = yylex();strncpy(tokenString,yytext,MAXTOKENLEN);if (TraceScan) {fprintf(listing,"\t%d: ",lineno);printToken(currentToken);}return currentToken;}*/
    它的yacc文件是这么写的,

/***********************************//* File: cmm.y                     *//* Bison grammar file about CMM    *//* CMM Interpreter Construction    *//***********************************//*预计有1个移进/归约冲突*/%expect 1%{#include "globals.h"#include "util.h"#include "scan.h"#include "parse.h"static TreeNode * savedTree; /* stores syntax tree for later return */%}%union{struct treeNode * node;int intval;double realval;char * idName;int binaryOperator;int dataType;}%token <intval> INT_VALUE%token <realval> REAL_VALUE%token <idName> ID%token <dataType> INT REAL BOOL/* 优先级声明 */%right ASSIGN%left PLUS SUB%left MUL DIV%nonassoc <binaryOperator> REL_OP%nonassoc UMINUS/* 声明文法中用到的tokens */%token IF ELSE WHILE READ WRITE MAIN%token LPAREN RPAREN SEMI LBRACE RBRACE LBRACKET RBRACKET COMMA%token ASSIGN%token NEWLINE ERROR%type <node> stmt_list stmt%type <node> if_stmt decl_stmt compound_stmt while_stmt assign_stmt read_stmt write_stmt%type <node> exp factor bin_exp%type <dataType> type_spec%start program%% /* CMM文法 */program: stmt_list{ savedTree = $1;};stmt_list:{ $$ = NULL; }| stmt_list stmt {TreeNode * t = $1;if (t != NULL){while (t->sibling != NULL){ t = t->sibling;}t->sibling = $2;$$ = $1;}else $$ = $2;};stmt: if_stmt { $$ = $1; }| decl_stmt SEMI { $$ = $1; }| compound_stmt { $$ = $1; }| while_stmt { $$ = $1; }| assign_stmt SEMI { $$ = $1; }| read_stmt SEMI { $$ = $1; }| write_stmt SEMI { $$ = $1; }| error  { $$ = NULL; };compound_stmt: LBRACE stmt_list RBRACE{$$ = newStmtNode(CompoundK);$$->child[0] = $2;$$->lineno = lineno;};decl_stmt: type_spec ID {$$ = newStmtNode(DeclK);$$->attr.name = $2;/* 数组长度为0代表非数组 */$$->arrayLength = 0;$$->type = $1;$$->lineno = lineno;}| type_spec ID LBRACKET INT_VALUE RBRACKET{$$ = newStmtNode(DeclK);$$->attr.name = $2;$$->arrayLength = $4;$$->type = $1;if($$->type == Int) {int int_array_temp[$4];$$->array.intArray = int_array_temp;}else if($$->type == Real) {double real_array_temp[$4];$$->array.realArray = real_array_temp;}$$->lineno = lineno;};type_spec: INT { $$ = $1; }| REAL { $$ = $1; }| BOOL{ $$ = $1; };if_stmt     : IF LPAREN bin_exp RPAREN stmt{$$ = newStmtNode(IfK);$$->child[0] = $3;$$->child[1] = $5;$$->child[2] = NULL;$$->lineno = lineno;}| IF LPAREN bin_exp RPAREN stmt ELSE stmt{$$ = newStmtNode(IfK);$$->child[0] = $3;$$->child[1] = $5;$$->child[2] = $7;$$->lineno = lineno;};while_stmt: WHILE LPAREN bin_exp RPAREN stmt{$$ = newStmtNode(WhileK);$$->child[0] = $3;$$->child[1] = $5;$$->lineno = lineno;};assign_stmt : ID ASSIGN exp{$$ = newStmtNode(AssignK);$$->attr.name = $1;$$->child[0] = $3;$$->child[1] = NULL;$$->lineno = lineno;}| ID LBRACKET exp RBRACKET ASSIGN exp{$$ = newStmtNode(AssignK);$$->attr.name = $1;/* child[1]不为NULL 表示引用的变量是一个数组元素 *//* child[0] 是数组元素的下标 */$$->child[0] = $3;$$->child[1] = $6;$$->lineno = lineno;};read_stmt   : READ LPAREN ID RPAREN{$$ = newStmtNode(ReadK);$$->attr.name = $3;$$->child[0] = NULL;$$->lineno = lineno;}| READ LPAREN ID LBRACKET exp RBRACKET RPAREN{$$ = newStmtNode(ReadK);$$->attr.name = $3;$$->child[0] = $5;$$->lineno = lineno;};write_stmt  : WRITE LPAREN exp RPAREN{$$ = newStmtNode(WriteK);$$->child[0] = $3;$$->lineno = lineno;};exp: factor{$$ = $1; }| bin_exp{$$ = $1; };factor: INT_VALUE{$$ = newExpNode(IntValueK);$$->value.int_val = $1;$$->type = Int;$$->lineno = lineno;}| REAL_VALUE{$$ = newExpNode(RealValueK);$$->value.real_val = $1;$$->type = Real;$$->lineno = lineno;}| LPAREN exp RPAREN{$$ = $2; }| ID{$$ = newExpNode(IdK);$$->attr.name = $1;/* child[0]为NULL代表该变量不是数组,用作解释时 */$$->child[0] = NULL;$$->lineno = lineno;}| ID LBRACKET exp RBRACKET{$$ = newExpNode(IdK);$$->attr.name = $1;$$->child[0] = $3;$$->lineno = lineno;}| error { $$ = NULL; };bin_exp     : /* 关系运算符 */  exp REL_OP exp{$$ = newExpNode(OpK);$$->child[0] = $1;$$->child[1] = $3;$$->attr.op = $2;$$->lineno = lineno;}/* 算数运算符 */| exp PLUS exp{$$ = newExpNode(OpK);$$->child[0] = $1;$$->child[1] = $3;$$->attr.op = PLUS;$$->lineno = lineno;}| exp SUB exp{$$ = newExpNode(OpK);$$->child[0] = $1;$$->child[1] = $3;$$->attr.op = SUB;$$->lineno = lineno;}| exp MUL exp{$$ = newExpNode(OpK);$$->child[0] = $1;$$->child[1] = $3;$$->attr.op = MUL;$$->lineno = lineno;}| exp DIV exp{$$ = newExpNode(OpK);$$->child[0] = $1;$$->child[1] = $3;$$->attr.op = DIV;$$->lineno = lineno;}| SUB exp %prec UMINUS{$$ = newExpNode(OpK);$$->child[0] = $2;$$->child[1] = NULL;$$->attr.op = UMINUS;$$->lineno = lineno;};%%int yyerror(char * message){fprintf(listing,"Syntax error at line %d: %s\n",lineno,message);fprintf(listing,"Current token: %s",tokenString);printToken(yychar);Error = TRUE;return 0;}/* 与主函数交互的语法分析函数 */TreeNode * parse(void){iniLexer();yyparse();return savedTree;}
    编译也非常简单,

#The makefile for interpreter based on Flex and BisonCC = gccFLAG = -winterpreter: main.o util.o cmm.tab.o cmm.lex.o symtab.o analyze.o$(CC) $(FLAG) -o ../bin\&test/interpreter main.o util.o cmm.tab.o cmm.lex.o symtab.o analyze.omain.o: main.c parse.h analyze.h$(CC) $(FLAG) -c main.ccmm.tab.o:cmm.tab.c parse.h$(CC) $(FLAG) -c cmm.tab.ccmm.tab.c:cmm.ybison -d cmm.ycmm.lex.o:cmm.lex.c cmm.tab.h$(CC) $(FLAG) -c cmm.lex.ccmm.lex.c:cmm.lflex -o cmm.lex.c cmm.lsymtab.o:symtab.c symtab.h globals.h$(CC) $(FLAG) -c symtab.canalyze.o:analyze.c globals.h symtab.h analyze.h$(CC) $(FLAG) -c analyze.cutil.o:util.c$(CC) $(FLAG) -c util.cclean:-rm *.o
    最后测试的时候,就是直接利用./interpreter [file_name]就可以了。

    有兴趣的同学可以把这个项目下载下来,多看看,了解一下如果利用flex&bison快速实现一个脚本语言。


原创粉丝点击