Tiny C语言编译程序之词法分析Scanner
来源:互联网 发布:中国网络信息安全联盟 编辑:程序博客网 时间:2024/06/15 17:24
Tiny C语言编译程序之词法分析Scanner
约定:
- 仅允许整数类型,不允许实数类型
- 标识符由大小写英文字母组成,最多52个。其识别按最长匹配原则
- 整数后紧跟非数字,或标识符后紧跟非字母认为是一个新Token开始
- 由{ }括起来符号串都认为是注释部分,该部分在词法分析时被过滤掉
- 识别出的Token由两个变量:currentToken,tokenString识别,其中currentToken代表Token的类属,为一个名为TokenType的枚举类型,在文件globals.h中定义;tokenString代表Token在程序中出现的形式,即其本来面目。例如整数10的currentToken值为NUM,而tokenString值为‘10’;标识符i的currentToken值为ID,而tokenString值为‘i’
画识别符合TINY C语言构词规则的DFA。然后用直接编码的方法构造词法分析器
词法分析器scan.c
/****************************************************//* File: scan.c *//* The scanner implementation for the TINY compiler *//****************************************************/#include "globals.h"#include "util.h"#include "scan.h"/* states in scanner DFA */typedef enum { START,INASSIGN,INCOMMENT,INNUM,INID,DONE } StateType;/* lexeme of identifier or reserved word */char tokenString[MAXTOKENLEN+1];/* BUFLEN = length of the input buffer for source code lines */#define BUFLEN 256static char lineBuf[BUFLEN]; /* holds the current line */static int linepos = 0; /* current position in LineBuf */static int bufsize = 0; /* current size of buffer string */static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF *//* getNextChar fetches the next non-blank character from lineBuf, reading in a new line if lineBuf is exhausted *///获得下一字符static int getNextChar(void){ if (!(linepos < bufsize)) { lineno++; if (fgets(lineBuf,BUFLEN-1,source)) { if (EchoSource) fprintf(listing,"%4d: %s",lineno,lineBuf); bufsize = strlen(lineBuf); linepos = 0; return lineBuf[linepos++]; } else { EOF_flag = TRUE; return EOF; } } else return lineBuf[linepos++];}/* ungetNextChar backtracks one character in lineBuf *///用于回吐字符static void ungetNextChar(void){ if (!EOF_flag) linepos-- ;}/* lookup table of reserved words *///定义保留字表static struct { char* str; TokenType tok; } reservedWords[MAXRESERVED] = {{"if",IF},{"then",THEN},{"else",ELSE},{"end",END}, {"repeat",REPEAT},{"until",UNTIL},{"read",READ}, {"write",WRITE}}; /* lookup an identifier to see if it is a reserved word *//* uses linear search *///进行保留字的匹配static TokenType reservedLookup (char * s){ int i; for (i=0;i<MAXRESERVED;i++) if (!strcmp(s,reservedWords[i].str)) return reservedWords[i].tok; return ID;}/****************************************//* the primary function of the scanner *//****************************************//* function getToken returns the * next token in source file */TokenType getToken(void){ /* index for storing into tokenString */ /* tokenString的索引*/ int tokenStringIndex = 0; /* holds current token to be returned */ /* 保存当前要返回的符号 */ TokenType currentToken; /* 当前的状态——总是从START开始 */ /* current state - always begins at START */ StateType state = START; /* 是否保存到tokenString的标记 */ /* flag to indicate save to tokenString */ int save; while (state != DONE) { int c = getNextChar(); save = TRUE; switch (state) { case START: if (isdigit(c)) state = INNUM; else if (isalpha(c)) /*(字符、:、空格/tab/换行、{、算符及界符等)*/ state = INID; else if (c == ':') state = INASSIGN; else if ((c == ' ') || (c == '\t') || (c == '\n')) save = FALSE; else if (c == '{') { save = FALSE; state = INCOMMENT; } else { state = DONE; switch(c) { case EOF: save = FALSE; currentToken = ENDFILE; break; case '=': currentToken = EQ; break; case '<': currentToken = LT; break; case '+': currentToken = PLUS; break; case '-': currentToken = MINUS; break; case '*': currentToken = TIMES; break; case '/': currentToken = OVER; break; case '(': currentToken = LPAREN; break; case ')': currentToken = RPAREN; break; case ';': currentToken = SEMI; break; default: currentToken = ERROR; break; } } break; case INCOMMENT: /* 仅出现‘}’或EOF(注释未完结束程序)时才改变状态。 */ save = FALSE; if (c == '}') { state = START; }else if (c == EOF) { state = DONE; currentToken = ENDFILE; } break; case INASSIGN: /* ‘=’或其它(出现错误) */ state = DONE; if(c == '=') currentToken = ASSIGN; else { ungetNextChar(); save = FALSE; currentToken = ERROR; } break; case INNUM: if (!isdigit(c)) { /* backup in the input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = NUM; } break; case INID: /* 不是字符则回吐,并进入DONE,且识别出一个ID */ if (!isalpha(c)) { /* backup in the input */ ungetNextChar(); save = FALSE; state = DONE; currentToken = ID; } break; case DONE: default: /* should never happen */ fprintf(listing,"Scanner Bug: state= %d\n",state); state = DONE; currentToken = ERROR; break; } if ((save) && (tokenStringIndex <= MAXTOKENLEN)) tokenString[tokenStringIndex++] = (char) c; if (state == DONE) { tokenString[tokenStringIndex] = '\0'; if (currentToken == ID) currentToken = reservedLookup(tokenString); } } if (TraceScan) { fprintf(listing,"\t%d: ",lineno); printToken(currentToken,tokenString); } return currentToken;} /* end getToken */
对于Tiny语言编写的Sample程序源代码如下:
{ Sample program in TINY language - computes factorial}read x; { input an integer }if 0 < x then { don't compute if x <= 0 } fact := 1; repeat fact := fact * x; x := x - 1 until x = 0; write fact { output factorial of x }end
词法分析主要为后面的各个阶段提供方法getToken;经过词法分析后每行的关键字、标识符以及数字如下:
TINY COMPILATION: SAMPLE.tny 1: { Sample program 2: in TINY language - 3: computes factorial 4: } 5: read x; { input an integer } 5: reserved word: read 5: ID, name= x 5: ; 6: if 0 < x then { don't compute if x <= 0 } 6: reserved word: if 6: NUM, val= 0 6: < 6: ID, name= x 6: reserved word: then 7: fact := 1; 7: ID, name= fact 7: := 7: NUM, val= 1 7: ; 8: repeat 8: reserved word: repeat 9: fact := fact * x; 9: ID, name= fact 9: := 9: ID, name= fact 9: * 9: ID, name= x 9: ; 10: x := x - 1 10: ID, name= x 10: := 10: ID, name= x 10: - 10: NUM, val= 1 11: until x = 0; 11: reserved word: until 11: ID, name= x 11: = 11: NUM, val= 0 11: ; 12: write fact { output factorial of x } 12: reserved word: write 12: ID, name= fact 13: end 13: reserved word: end 14: EOF
备注
内容为课堂所学及网上参考,仅供参考.
阅读全文
1 0
- Tiny C语言编译程序之词法分析Scanner
- Tiny C语言编译程序之语法分析Parser
- Tiny语言编译器开发之词法分析
- Tiny语言编译器开发之词法分析(利…
- 编译原理之Tiny C 词法分析器
- 编译程序初级,词法分析
- c语言 词法分析
- PL/0语言编译程序整理实现:(3)、词法分析
- 使用flex对tiny语言进行词法分析
- 【编译原理】TINY编译器之词法分析(二)
- 自己动手写编译器之TINY编译器词法分析
- UCC--C语言词法分析
- C语言实现词法分析
- 编译器之词法分析c
- 《编译原理及编译程序构造》-词法分析
- Tiny语言编译器之语义分析
- Python实现的C语言词法分析
- C语言,词法分析的“贪心法”
- 最常用的12种设计模式小结
- 【设计模式】—— 代理模式Proxy
- 关于z-index与transform同时使用时的一些感想
- STM32F1系列PB3,PB4,PA13,PA14,PA15用作普通IO口的特殊配置
- Redis 简介与安装
- Tiny C语言编译程序之词法分析Scanner
- 递归、作用域详解及作用域面试题
- 部分Java面试题总结回顾
- Bootstrap排版
- 单调栈的理解和应用
- Java Servlet完全教程
- Android里merge和include标签的使用及区别
- Bootstrap表单
- Windows编程入门-保存设备环境