编译原理-词法分析器

来源：互联网发布：淘宝海外代购流程编辑：程序博客网时间：2024/04/30 10:27

编译原理的词法分析器是我们编译原理课程的实验内容，语法分析器和语义分析等做完了再贴出来。

程序用c++写的，比较粗糙，报错部分还需要改善，欢迎大家批评指正，有疑问的请留言，大家一起进步！

头文件

//#ifndef morpholopy_h#define morpholopy_h//NUM part#define STATE_DONE 0#define STATE_ERROR -1#define STATE_BEGIN 12#define STATE_INTEGER 13#define STATE_DECIMAL_BEGIN 14#define STATE_DECIMAL 15#define STATE_E 16#define STATE_SYMBOL 17#define STATE_POWER 18#define STATE_CHECK 19#endif /* morpholopy_h */

wordAnalyze.cpp

#pragma warning(disable : 4996)#include <iostream>#include <stdio.h>#include <ctype.h>#include <iostream>  #include <fstream> #include <stdlib.h>#include "morpholopy.h"using namespace std;static int wcount = 0;struct Word {char type[50];char value[50];};char* checkRelops(char* ch, Word result[]);char* checkID(char* ch, Word result[]);char* checkNum(char* p, Word words[]);char* checkOP(char* ch, Word word[]);char* checkEqual(char* ch, Word result[]);void setResult_OP(Word word[], char op1, char op2);char* checkLimit(char* ch, Word result[]);char* checkAnnotate(char* ch, Word word[]);void display(Word word[]);//char key[][10] = ｛"auto","double","int","struct","break","else","long","switch","case","enum","register","typedef","char","extern","return","union","const","float","short","unsigned","continue","for","signed","void","default","goto","sizeof","volatile","do","if", "while","static"};char key[][10] = { "auto", "double", "int", "struct", "break", "else", "long", "switch", "case", "enum", "register", "typedef", "char", "extern", "return", "union", "const", "float", "short", "unsigned", "continue", "for", "signed", "void", "default", "goto","sizeof", "volatile", "do", "if", "while", "static" };int main(int argc, const char * argv[]) {char word[1024];Word result[1024];ifstream ifs;ifs.open("text.txt");if (!ifs.is_open()){cout << "Error opening file";exit(1);}while (!ifs.eof()){ifs.read(word, 256);int readCount=ifs.gcount();word[readCount] = '\0';char* start = &word[0];char* end = start;while (*(start) != '\0') {if (*start == ' '){end = end + 1;start = end;}else if ((*start == '/' && *(start + 1) == '*') || (*start == '/' && *(start + 1) == '/')){end = checkAnnotate(start, result);start = end;}else if (*start == '_' || isalpha(*start)){end = checkID(start, result);start = end;}else if (isdigit(*start)){end = checkNum(start, result);start = end;}else if (*start == '<' || *start == '>' || *start == '='){end = checkRelops(start, result);if (end == start) {end = checkEqual(start, result);}start = end;}else if (*start == '+' || *start == '-' || *start == '*' || *start == '/' || *start == '%' || *start == '&' || *start == '|' || *start == '^' || *start == '~' || *start == '.' || *start == ':' || *start == '?'){end = checkOP(start, result);start = end;}else if (*start == '{' || *start == '}' || *start == '<' || *start == '>' || *start == '[' || *start == ']' || *start == '(' || *start == ')' || *start == '@' || *start == '#' || *start == ',' || *start == ';' || *start == '"'){end = checkLimit(start, result);start = end;}else{end = end + 1;start = end;}}}display(result);/*for (int i = 0; i < wcount; i++) {cout << "<" << result[i].type << "," << result[i].value << ">" << endl;}*/int a = 0;cin >> a;cout << a << endl;return 0;}char* checkLimit(char* ch, Word result[]){char* temp = ch;char* afterFirstRef;char* beforeLastRef;if (*ch == '"'){afterFirstRef = ch;ch++;int forword=9;while (forword!=0){if (*ch == '"'){beforeLastRef = ch;forword = 0;//保存第一个引号char* write_ch = &result[wcount].value[0];strcpy(&result[wcount].type[0], "limit");*write_ch = *ch;write_ch++;*write_ch = '\0';wcount++;//保存中间的字符串常量//char* write_ch2 = &result[wcount].value[0];strcpy(&result[wcount].type[0], "const-ref");//*write_ch2 = *ch;afterFirstRef++;int refSize = beforeLastRef - afterFirstRef;for (int i = 0; i < refSize ; i++){result[wcount].value[i] = *afterFirstRef;afterFirstRef++;}result[wcount].value[refSize] = '\0';//write_ch2++;//*write_ch2 = '\0';wcount++;//保存第二个引号char* write_ch3 = &result[wcount].value[0];strcpy(&result[wcount].type[0], "limit");*write_ch3 = *ch;write_ch3++;*write_ch3 = '\0';wcount++;ch++;}else{if (*ch == '\0')//把“\0”当作结束符return temp;//保存引号中间的常量ch++;}}}else{char* write_ch = &result[wcount].value[0];strcpy(&result[wcount].type[0], "limit");*write_ch = *ch;write_ch++;*write_ch = '\0';wcount++;ch++;}return ch;}char* checkEqual(char* ch, Word result[]){strcpy(&result[wcount].type[0], "=");strcpy(&result[wcount].value[0], " ");wcount++;;ch++;return ch;}char* checkRelops(char* ch, Word result[]){char* start = ch;while (true) {if (*ch == '<') {ch++;if (*ch == '=') {strcpy(&result[wcount].type[0], "relop");strcpy(&result[wcount].value[0], "<=");wcount++;ch++;return ch;}else if (*ch == '>'){strcpy(&result[wcount].type[0], "relop");strcpy(&result[wcount].value[0], "<>");wcount++;ch++;return ch;//不等于}else{//小于strcpy(&result[wcount].type[0], "relop");strcpy(&result[wcount].value[0], "<");wcount++;return ch;}}else if (*ch == '=') {ch++;if (*ch == '='){//等等于strcpy(&result[wcount].type[0], "relop");strcpy(&result[wcount].value[0], "==");wcount++;ch++;return ch;}elsereturn start;}else if (*ch == '>') {ch++;if (*ch == '=') {//大于等于strcpy(&result[wcount].type[0], "relop");strcpy(&result[wcount].value[0], ">=");wcount++;ch++;return ch;}else{//大于strcpy(&result[wcount].type[0], "relop");strcpy(&result[wcount].value[0], ">");wcount++;return ch;}}elsereturn start;}}char* checkNum(char* p, Word words[]) {int state = 12;char* write_p = &words[wcount].value[0];//char* pre_p = p;//向前看一个字符while (state > 0){switch (state) {case STATE_BEGIN:if ((*p <= '9' && *p >= '0')) {state = STATE_INTEGER;}else {state = STATE_ERROR;//格式出错}break;case STATE_INTEGER:if ((*p <= '9' && *p >= '0')) {state = STATE_INTEGER;}else if (*p == '.') {state = STATE_DECIMAL_BEGIN;}else if (*p == 'E') {state = STATE_E;}else {state = STATE_DONE;//进入终结状态}break;case STATE_DECIMAL_BEGIN:if ((*p <= '9' && *p >= '0')) {state = STATE_DECIMAL;}else {state = STATE_ERROR;//格式出错}break;case STATE_DECIMAL:if ((*p <= '9' && *p >= '0')) {//state = STATE_DECIMAL;}else if (*p == 'E') {state = STATE_E;}else {state = STATE_DONE;//进入终结状态}break;case STATE_E:if ((*p <= '9' && *p >= '0')) {state = STATE_POWER;}else if (*p == '+' || *p == '-') {state = STATE_SYMBOL;}else {state = STATE_ERROR;//格式出错}break;case STATE_SYMBOL:if ((*p <= '9' && *p >= '0')) {state = STATE_POWER;}else {state = STATE_ERROR;//格式出错}break;case STATE_POWER:if ((*p <= '9' && *p >= '0')) {//state = STATE_POWER;}else {state = STATE_DONE;//进入终结状态}break;}if (state == STATE_DONE) {//正确终结strcpy(&words[wcount].type[0], "num");*write_p = '\0';break;}else if (state == STATE_ERROR) {//格式出错strcpy(&words[wcount].type[0], "NUM_ERROR");*write_p = '\0';break;}else {//当前字符匹配*write_p = *p;++write_p;++p;}}//存储结果的数组的下标移向下一位wcount++;return p;}char* checkID(char* ch, Word result[]){char* write_p = &result[wcount].value[0];int state = 12;while (state > 0) {switch (state) {case STATE_BEGIN:if ((*ch) == '_' || isalpha(*ch)) {state = STATE_CHECK;}break;case STATE_CHECK:if ((*ch) == '_' || isalpha(*ch) || isdigit(*ch)) {state = STATE_CHECK;}elsestate = STATE_DONE;break;default:break;}if (state == STATE_DONE) {strcpy(&result[wcount].type[0], "id");*write_p = '\0';break;}else{*write_p = *ch;++write_p;++ch;}}for (int i = 0; i < sizeof(key); i++) {if (!strcmp(result[wcount].value, key[i])){strcpy(&result[wcount].type[0], "key");}}wcount++;return ch;}char* checkOP(char* ch, Word word[]){ch = ch + 1;if (*ch == *(ch - 1) && *ch != '+' && *ch != '-' && *ch != '*' && *ch != '/' && *ch != '%' && *ch != '~' && *ch != '^' && *ch != '.' && *ch != '?' && *ch != ':'){setResult_OP(word, *ch, *ch);ch++;}else{setResult_OP(word, *(ch - 1), '\0');}strcpy(&word[wcount].type[0], "op");wcount++;return ch;}char* checkAnnotate(char* ch, Word word[]){if (*ch == '/' && *(ch + 1) == '*'){char* temp = ch;ch = ch + 2;int state = 9;while (state != 0){if (*ch == '*' && *(ch + 1) == '/'){ch = ch + 2;state = 0;}else{if (*ch == '\0')//这里写文件结束符{strcpy(&word[wcount].type[0], "ERROR");strcpy(&word[wcount].value[0], "the annotate is not finished...");wcount++;return temp+2;}ch++;}}return ch;}else if (*ch == '/' && *(ch + 1) == '/'){ch = ch + 2;int state = 9;while (state != 0){if (*ch == '\n' || *ch == '\0'){ch = ch + 1;state = 0;}else{/*if (*ch == '\n'){strcpy(&word[wcount].type[0], "AN_ERROR");return ++ch;}*/ch++;}}return ch;}else{return ch;}}void setResult_OP(Word word[], char op1, char op2){if (op2 != '\0'){word[wcount].value[0] = op1;word[wcount].value[1] = op2;word[wcount].value[2] = '\0';}else{word[wcount].value[0] = op1;word[wcount].value[1] = op2;}}void display(Word word[]){for (int i = 0; i <wcount; i++){cout << "<" << word[i].type << "," << word[i].value << ">" << endl;}}

测试文件：text.txt 和.cpp文件放在同一目录下

void setResult_OP(Word word[], char op1, char op2){if (op2 != '\0'){word[wcount].value[0] = op1;}/* this is an annotation ...*/int a=10;a=a+80;float s=2E.2;//单行注释if(a==s){printf("a equal s");}}

输出结果：

0 0