0_基于c++语言的词法解释器(未经测试 尚在改进中)

来源:互联网 发布:linux 查看主机arp 编辑:程序博客网 时间:2024/03/28 23:45

// lexicals_analyzer.cpp#include <cstdio>#include <iostream>#include <cstdlib>#include <windows.h>using namespace std;class lexicals_analyzer{private:static const int MAX_LEXICALS_LENGTH = 200;    //每次最长字符数static const int MAX_TOCKEN_LENGTH = 50;       //每次单个词素字符数static const int GAP = 10;                     //当最大容量与当前容量差10重置内存static const int single_tockens_num = 20;//特殊词素char single_tockens[single_tockens_num]; //特殊字符int larger_times;                        //总字符放大倍数int tocken_larger_times;                 //单个词素放大倍数int cur_cont;                            //字符总数当前容量char *lexicals;                          //总字符数组char *tocken;                            //单个词素数组char *ptocken;                           //指向总字符指针FILE *fp;void add(char **buf, int *pcur_cont, const char ch){if (larger_times*MAX_LEXICALS_LENGTH - *pcur_cont == GAP)//检查扩大总字符容量{++larger_times;*buf = (char*)realloc(*buf, larger_times * MAX_LEXICALS_LENGTH);if (*buf == NULL)cout << "解析失败\n";}if (*buf == NULL){int a = 1;++a;}(*buf)[(*pcur_cont)++] = ch;}bool text_single(const char ch)             //检查是否是特殊字符{for (int i = 0; i<single_tockens_num; ++i){if (ch == single_tockens[i])return true;}return false;}public:lexicals_analyzer(const char *file_path){{single_tockens[0] = '.'; single_tockens[1] = ','; single_tockens[2] = '!';single_tockens[3] = '='; single_tockens[4] = '+'; single_tockens[5] = '-';single_tockens[6] = '*'; single_tockens[7] = '/'; single_tockens[8] = '%';single_tockens[9] = '&'; single_tockens[10] = '~'; single_tockens[11] = '(';single_tockens[12] = ')'; single_tockens[13] = '-'; single_tockens[14] = '[';single_tockens[15] = ']'; single_tockens[16] = '{'; single_tockens[17] = '}';single_tockens[18] = '>'; single_tockens[19] = '<';}larger_times = 1;lexicals = (char*)malloc(larger_times*MAX_LEXICALS_LENGTH);tocken_larger_times = 1;tocken = (char*)malloc(tocken_larger_times * MAX_TOCKEN_LENGTH);if ((fp = fopen(file_path, "r")) == NULL){cerr << "Open file wrong" << endl;exit(1);}cur_cont = 0;char temp;while ((temp = getc(fp)) != EOF){if (temp == '\n' || temp == '\t' || temp == ' ' || temp == ';')//处理分隔符{if (lexicals[cur_cont - 1] != ' ')add(&lexicals, &cur_cont, ' ');}else if (temp == '\"')  //处理双引号{bool flag = true;do{flag = true;if (temp == '\\')flag = false;add(&lexicals, &cur_cont, temp);} while ((temp = getc(fp)) != EOF && (temp != '\"' || !flag));if (temp == EOF)break;add(&lexicals, &cur_cont, temp);add(&lexicals, &cur_cont, ' ');}else if (temp == '\'')      //处理单引号{bool flag = true;do{flag = true;if (temp == '\\')flag = false;add(&lexicals, &cur_cont, temp);} while ((temp = getc(fp)) != EOF && (temp != '\'' || !flag));if (temp == EOF)break;add(&lexicals, &cur_cont, temp);add(&lexicals, &cur_cont, ' ');}else if (temp == '/'){temp = getc(fp);if (temp != EOF && temp == '/')         //处理单行注释{while ((temp = getc(fp)) != EOF && temp != '\n');if (lexicals[cur_cont - 1] != ' ')add(&lexicals, &cur_cont, ' ');}else if (temp != EOF && temp == '*')        //处理多行注释{bool flag = false;while ((temp = getc(fp)) != EOF){if (temp == '*')flag = true;else if (flag && temp == '/')break;else if (flag && temp != '/')flag = false;}if (lexicals[cur_cont - 1] != ' ')add(&lexicals, &cur_cont, ' ');}else        //不是注释 但是正常添加 虽然不合法{if (temp == EOF)break;add(&lexicals, &cur_cont, '/');add(&lexicals, &cur_cont, temp);}}else if (text_single(temp))     //检测特殊字符{if (lexicals[cur_cont - 1] != ' ')add(&lexicals, &cur_cont, ' ');if (temp == EOF)break;add(&lexicals, &cur_cont, temp);add(&lexicals, &cur_cont, ' ');}else        //正常添加{if (temp == EOF)break;add(&lexicals, &cur_cont, temp);}}fclose(fp);lexicals[cur_cont] = '\0';      //添加结束符//-----------------------//cout << lexicals << endl;//system("pause");//------------------------ptocken = lexicals;     //初始化ptocken指向总字符首端}char *get_next_tocken()     //返回ptocken 指向位置第一个字符串的指针{int i = 0;memset(tocken, 0, sizeof(tocken));for (; *ptocken == ' '; ++ptocken);for (; *ptocken != '\0'&&*ptocken != ' '; ++ptocken,++i){if ((MAX_TOCKEN_LENGTH*tocken_larger_times) - i == 5)//扩大单个词素容量{++tocken_larger_times;tocken = (char*)realloc(tocken, tocken_larger_times * MAX_TOCKEN_LENGTH);}tocken[i] = *ptocken;}tocken[i] = '\0';if (tocken[0] == '\0')return NULL;return tocken;}int get_l_length()      //返回总字符的长度{return cur_cont;}};

在学习 编译原理 自己瞎写 尚需改进 欢迎提建议


// main#include "lexicals_analyzer.cpp"using namespace std;int main(){lexicals_analyzer la("a.cpp");char *a;while ((a = la.get_next_tocken()) != NULL){cout << a << endl;}system("pause");return 0;}


0 0
原创粉丝点击