简单词法分析器实现

来源:互联网 发布:淘宝宜家代购真假辨别 编辑:程序博客网 时间:2024/05/21 19:23
编写分析器有两种方法,一种是通过DFA对单词进行识别,二是通过直接编写程序进行识别。本程序采用DFA对单词进行识别。
DFA的实现方法,大概思想和书上一致,在程序中,则是用二维数组代表状态转换矩阵,用一维数组表示终态。
一个词法编辑要实现的功能主要包括以下几点:

能够识别标识符、关键字、数字和运算符,对注释进行过滤,同时还能识别出程序错误。

使用说明:

本程序的输入由当前目录下的in.txt文件读取输入,输出为一系列二元式
#include<stdio.h>#include<string.h>#include<stdlib.h>//用指针而不是二维数组表示,这样就不用指定字符串长度,只是不能修改指针所指向字符串的内容char *key[]={"int","char","float","double","void","const","for","if","else","then","while","switch","break","main","return"};char buffer[20];//存储当前识别出的单词char *identifier[50];char *num[50];int ident_num;//标志符数int number;//数字数int judgement_num(char c) {if (c >= '0' && c <= '9') return 0;else if (c == '.') return 1;return -1;}int judge(char c){if(c=='_') return 2;else if(c>='0'&&c<='9') return 1;else if(c>='a'&&c<='z'||c>='A'&&c<='Z') return 0;return -1;}//next_i,next_j分别代表着状态转换表move的当前状态与接收到的字符//width代表每一状态可能遇到的状态数,length代表终态集大小int DFA(int begin,int move[],int width,int final[],int length,int(*judge)(char)){    int len=0,next_i,next_j,tmp;    char next_char;    memset(buffer,0,sizeof(buffer));    next_char=getchar();    next_i=begin;    while((next_j=judge(next_char))!=-1){tmp=next_i;next_i=move[next_i*width+next_j];if(next_i==-1){            printf("move[%d][%d]has not next state\n",tmp,next_j);            return -1;}buffer[len++]=next_char;        next_char=getchar();    }    ungetc(next_char,stdin);    buffer[len]='\0';    for(int i=0;i<length;i++){if(next_i==final[i])  return 0;    }    return -1;}void is_comment(){    char next_char;    char tmp[5];    memset(tmp,0,sizeof(tmp));    tmp[0]=getchar();    next_char=getchar();    if(next_char=='*'){while((next_char=getchar())!=EOF){            if(next_char=='*'&&getchar()=='/') return;}printf("The comment is error\n");    }    else if(next_char=='/'){        while((next_char=getchar())!='\n');    }    else if(next_char=='='){        printf("(/=,_)\n");    }    else{printf("(/,_)\n");ungetc(next_char,stdin);    }}void is_other(){    char next_char,c=getchar();    if(c=='+'||c=='-'||c=='*'||c=='%'){        next_char=getchar();        if(next_char=='=') printf("(%c=,_)\n",c);        else {printf("(%c,_)\n",c);ungetc(next_char,stdin);        }    }    else if(c=='>'||c=='<'){next_char=getchar();        if(next_char=='=') printf("(rlop,%c=)\n",c);        else {printf("(rlop,%c)\n",c);ungetc(next_char,stdin);        }    }    else if(c=='!'){next_char=getchar();        if(next_char=='=') printf("(!=,_)\n");        else {printf("(not,_)\n");ungetc(next_char,stdin);        }    }    else if(c=='|'){next_char=getchar();        if (next_char == '|')printf("(or,||)\n");else {ungetc(next_char, stdin);}    }    else if(c=='&'){next_char=getchar();        if (next_char == '&')printf("(and,&&)\n");else {ungetc(next_char, stdin);}    }    else if(c=='='||c=='('||c==')'||c=='['||c==']'||c==';'||c==','||c=='{'||c=='}'){        printf("(%c,_)\n",c);    }}void is_digit(){    int begin=0;    int move[]={1,-1,1,2,2,-1};int final[]={1,2};int result=-1;result=DFA(begin,move,2,final,2,judgement_num);if(result==-1){printf("digit DFA error\n");exit(-1);}else if(result==0){        //printf("%s\n",buffer);        for(int i=0;i<number;i++){if(strcmp(buffer,num[i])==0){                printf("(num,%s)\n",buffer);return;}        }        num[number]=(char*)malloc(sizeof(buffer));        strcpy(num[number++],buffer);        printf("(num,%s)\n",buffer);        return;}}void is_letter(){int i;int begin=0;int move[] ={1,-1,1,1,1,1};int final[]={1,2};int result=-1;result=DFA(begin,move,3,final,1,judge);    if(result==-1){printf("letter DFA error\n");exit(-1);    }    else if(result==0){int len=sizeof(key)/sizeof(char *);//如为关键字        for(i=0;i<len;i++){if(strcmp(buffer,key[i])==0){                printf("(key,%s)\n",key[i]);                return;}        }        //如为标志符        for(i=0;i<ident_num;i++){//在当前标志符表中已经存在if(strcmp(buffer,identifier[i])==0){printf("(%id,%s)\n",identifier[i]);return;}        }        //如不存在,则写入        identifier[ident_num]=(char*)malloc(sizeof(buffer));        strcmp(identifier[ident_num++],buffer);        printf("(id,%s)\n",buffer);        return;    }}void init() {ident_num=0;    number=0;}void work() {char c;    while((c=getchar())!=EOF){ungetc(c,stdin);if(judge(c)==2||judge(c)==0) is_letter();else if(judge(c)==1) is_digit();else if(c=='/') is_comment();else is_other();    }}int main() {    freopen("in.txt","r",stdin);    init();    work();    return 0;}


0 0