词法分析器设计

来源：互联网发布：高频彩票算法大揭秘编辑：程序博客网时间：2024/05/22 15:10

词法分析是编制一个读单词的过程，从输入的源程序中，识别出各个具有独立意义的单词，即基本保留字、标识符、常数、运算符、分隔符五大类。程序语言的单词符号一般分为五种：关键字（保留字/基本字）if、while、begin…；标识符：常量名、变量名…；常数：34、56.78、true、‘a’、…；运算符：+、-、*、/、〈、and、or、….、；界限符：，；（） { } /*…。

方法：

词法分析器的设计方法有如下四个步骤：

1.写出该语言的词法规则。

2.把词法规则转换为相应的状态转换图。

3.把各转换图的初态连在一起，构成识别该语言的自动机。

4.设计扫描器；把扫描器作为语法分析的一个过程，当语法分析需要一个单词时，就调用扫描器。扫描器从初态出发，当识别一个单词后便进入终态，送出二元式。

针对该程序设计的DFA 图大致如下：

核心代码（这段是分析代码。其他的定义代码，识别字母代码，识别数字代码，等等就不一一列举了，源程序中有）：

/**      * 初始化并读取源代码文件      * 扫描程序开始执行，直到读取文件结束符EOF      * @throws Exception      */     private void scanning(String originalFile) throws Exception {         this.sourceFile = new BufferedReader(new FileReader(originalFile));                  this.initial();         while(!isEOF) {             getToken();         }         System.out.println("========================> end scanning ...");     }          /**      * 获取下一个字符      * @return      * @throws Exception      */     private char getNextChar() throws Exception {         char nextChar = '\0';                  if(!(charPos < bufSize)) {             if((eachLine = sourceFile.readLine()) != null) {                 lineNum++;                 System.out.println(lineNum + ": " + eachLine);                 lineBuf = eachLine.toCharArray();                 bufSize = eachLine.length();                 charPos = 0;                 nextChar = lineBuf[charPos++];             } else {                 isEOF = true;                 nextChar = '\0';             }         } else {             nextChar = lineBuf[charPos++];         }         return nextChar;     }          /**      * 【按步长（step）】取消获取下一个字符      */     private void unGetNextChar(int step) {         if(!isEOF) {             charPos -= step;         }     }          /**      * 获取一个Token      * @return      * @throws Exception      */     private String getToken() throws Exception {         String tokenStr = "";         String currentToken = "";         int currentState = Start;         boolean isSave;                  // 不同时为EOF和Done状态         while(currentState != Done && !isEOF) {             char c = getNextChar();             isSave = true;                          switch(currentState) {                 case Start:                     if(isDigit(c)) {                         currentState = Num;                     } else if(isLetter(c) || c == '.') { //点号是为了处理头文件iostream.h的格式                         currentState = ID;                     } else if(c == ' ' || c == '\t' || c == '\n') {                         isSave = false;                     } else if(c == '!') {                         currentState = NE;                     } else if(c == '=') {                         currentState = EQ;                     } else if(c == '<') {                         currentState = NM;                     } else if(c == '>') {                         currentState = NL;                     } else if(c == '/') {                         currentState = Coms;                         isSave = false;                     } else if(c == '"') {                         currentState = Str;                     } else {                         currentState = Done; //                      if(isSingle(c)) { //                          currentToken = "" + c; //                          currentState = Done; //                          isSave = false; //                      }                     }                     break;                 case Num:                     if(!isDigit(c)) {                         currentState = Done;                         unGetNextChar(1);                         isSave = false;                     }                     break;                 case ID:                     if(!isLetter(c) && !isDigit(c)) {                         currentState = Done;                         unGetNextChar(1);                         isSave = false;                     }                     break;                 case NE:                     if(c != '=') {                         currentState = Special;                         unGetNextChar(2);                         isSave = false;                     } else {                         currentState = Done;                     }                     break;                 case NM:                     if(c != '=' && c != '<') {                         currentState = Special;                         unGetNextChar(2);                         isSave = false;                     } else {                         currentState = Done;                     }                     break;                 case NL:                     if(c != '=' && c != '>') {                         currentState = Special;                         unGetNextChar(2);                         isSave = false;                     } else {                         currentState = Done;                     }                     break;                 case EQ:                     if(c != '=') {                         currentState = Special;                         unGetNextChar(2);                         isSave = false;                     } else {                         currentState = Done;                     }                     break;                 case Str:                     if(c == '"') {                         currentState = Done;                     }                      break;                 case Coms:                     isSave = false;                     if(c == '/') {                         currentState = LineCom;                     } else if(c == '*') {                         currentState = MulCom1;                     } else {                         currentState = Special;                         unGetNextChar(1);                     }                     break;                 case LineCom:                     isSave = false;                     if(c == '\n') {                         currentState = Done;                     }                     break;                 case MulCom2:                     isSave = false;                     if(c == '*') {                         currentState = MulCom2;                     } else if(c == '/') {                         currentState = Done;                     } else {                         currentState = MulCom1;                     }                     break;                 case Special:                     if(c == '!' || c == '=' || c == '<' || c == '>') { //                  if(isSpecialSingle(c)) {                         currentToken = "" + c;                         currentState = Done;                         isSave = false;                     } else {                         currentToken = "Error";                         currentState = Done;                     }                     break;                 default:                     System.out.println(lineNum + " >> Scanner Bug : state = " + currentState);                     currentState = Done;                     currentToken = "Error";                     break;             }             if(isSave) {                 tokenStr += c;             }             if(currentState == Done) {                 currentToken = tokenStr;                 printToken(currentToken);             }         }         return currentToken;     }

运行结果如下：

待翻译代码：

#include"iostream.h"

main()

{

int i;

cin>>i;

i=i+1;

if(i>=3) cout<<"chenggong";

elsecout<<"shibai";

}

显示结果：

需要源码的朋友

0 0