词法分析器

来源:互联网 发布:买一个域名要多少钱 编辑:程序博客网 时间:2024/05/01 08:25

作者:朱鑫

邮箱:zhuxin@nwsuaf.edu.cn

/*C语言词法分析器,从三个方面处理:关键字,运算符,界符。并且将输出,符号,常数输出到文件,可以忽略同一行的注释//-1 error,代表错误//保留字1auto       2break      3case       4char       5const          6continue       7default        8do9double     10else      11enum      12extern    13float         14for           15goto          16if17int       18long      19register  20return    21short         22signed        23static        24sizeof25struct    26switch    27typedef   28union     29unsigned      30void          31volatile      32while//运算符33= 34==35+ 36++ 37+=38- 39-- 40-= 41->42* 43*=44/ 45/=46% 47%=48(49)50[51]52,53> 54>> 55>>= 56>=57< 58<< 59<<= 60<=61! 62!=63^ 64^=65& 66&& 67&=68| 69|| 70|=71~72? :73.//界符74{75}76;77\//注释78 ////79标示符//80常数*/#include<iostream>#include<fstream>#include<cstdlib>using namespace std;string Keyword[]={"","auto","break","case","char","const","continue","default","do",                     "double","else","enum","extern","float","for","goto","if",                     "int","long","register","return","short","signed","static","sizeof",                     "struct","switch","typedef","union","unsigned","void","volatile","while"                 };string ID[1000];                        //符号表int indexOfID=0;string Const[1000];                     //常数表int indexOfConst=0;bool IsLetter(char ch);                 //判断是不是字母或下划线bool IsDigit(char ch);                  //判断是不是数字int Reserve(string strToken);           //查询是否是关键字,不是的话返回0,是的话返回ID(二分查找)void insertID(string strToken);         //插入符号表void insertConst(string strToken);      //插入常数表int main(void){    int code=0,lineAt=0;                //ID号及行号    string line;                        //一行    int i;    int length=0;    string strToken="";    char ch;    ifstream in;                        //从文件中读取    ofstream out;                       //输出到文件    ofstream IDOut;                     //符号表    ofstream ConstOut;                  //常数表    in.open("in.txt",ios::in);    if(!in)    {        cerr<<"File open or create error!"<<endl;        exit(1);    }    out.open("out.txt",ios::out);    IDOut.open("IDOut.txt",ios::out);    ConstOut.open("ConstOut.txt",ios::out);    while(getline(in,line))    {        lineAt++;        if((length=line.length())==0)        {            continue;        }        i=0;        while(i<=length)        {            while(line[i]==' '||line[i]=='\t'||line[i]=='\n')            {                i++;            }            ch=line[i];            if(ch=='\0')            {                break;            }            strToken+=ch;            //字母            if(IsLetter(ch))            {                while( IsLetter(line[++i]) || IsDigit(line[i]) )                {                    strToken+=line[i];                }                i--;                code=Reserve(strToken);                if(code==0)                {                    code=79;                    insertID(strToken);                }            }            //数字            else if(IsDigit(ch))            {                while(IsDigit(line[++i]))                {                    strToken+=line[i];                }                i--;                code=80;                insertConst(strToken);            }            //等号            else if(ch=='=')            {                if(line[++i]=='=')                {                    code=34;                    strToken+='=';                }                else                {                    i--;                    code=33;                }            }            //加号            else if(ch=='+')            {                if(line[++i]=='+')                {                    code=36;                    strToken+='+';                }                else if(line[i]=='=')                {                    code=37;                    strToken+='=';                }                else                {                    i--;                    code=35;                }            }            //减号            else if(ch=='-')            {                if(line[++i]=='-')                {                    code=39;                    strToken+='-';                }                else if(line[i]=='=')                {                    code=40;                    strToken+='=';                }                else if(line[i]=='>')                {                    code=41;                    strToken+='>';                }                else                {                    i--;                    code=38;                }            }            //乘号            else if(ch=='*')            {                if(line[++i]=='=')                {                    code=43;                    strToken+='=';                }                else                {                    i--;                    code=42;                }            }            //除号            else if(ch=='/')            {                if(line[++i]=='=')                {                    code=45;                    strToken+='=';                }                else if(line[i]=='/')                {                    code=78;                    strToken+='/';                    cout<<"(    "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<strToken<<"\""<<"  )"<<endl;                    out<<"( "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<strToken<<"\""<<"  )"<<endl;                    strToken="";                    break;                }                else                {                    i--;                    code=44;                }            }            //            else if(ch=='%')            {                if(line[++i]=='=')                {                    code=47;                    strToken+='=';                }                else                {                    i--;                    code=46;                }            }            //括号            else if(ch=='(')            {                code=48;            }            else if(ch==')')            {                code=49;            }            else if(ch=='[')            {                code=50;            }            else if(ch==']')            {                code=51;            }            else if(ch==',')            {                code=52;            }            //大于号            else if(ch=='>')            {                if(line[++i]=='>')                {                    strToken+='>';                    if(line[++i]=='=')                    {                        code=55;                        strToken+='=';                        i++;                    }                    else                    {                        i--;                        code=54;                    }                }                else if(line[i]=='=')                {                    code=56;                    strToken+='=';                    i++;                }                else                {                    i--;                    code=53;                }            }            //小于号            else if(ch=='<')            {                if(line[++i]=='<')                {                    strToken+='<';                    if(line[++i]=='=')                    {                        code=59;                        strToken+='=';                    }                    else                    {                        i--;                        code=58;                    }                }                else if(line[i]=='=')                {                    code=60;                    strToken+='=';                }                else                {                    i--;                    code=57;                }            }            //非号            else if(ch=='!')            {                if(line[++i]=='=')                {                    code=62;                    strToken+='=';                }                else                {                    i--;                    code=61;                }            }            //            else if(ch=='^')            {                if(line[++i]=='=')                {                    code=64;                    strToken+='=';                }                else                {                    i--;                    code=63;                }            }            //            else if(ch=='&')            {                if(line[++i]=='&')                {                    code=66;                    strToken+='&';                }                else if(line[i]=='=')                {                    code=67;                    strToken+='=';                }                else                {                    i--;                    code=65;                }            }            //            else if(ch=='|')            {                if(line[++i]=='|')                {                    code=69;                    strToken+='|';                }                else if(line[i]=='=')                {                    code=70;                    strToken+='=';                }                else                {                    i--;                    code=68;                }            }            //            else if(ch=='~')            {                code=71;            }            else if(ch=='?')            {                code=72;            }            else if(ch=='.')            {                code=73;            }            else if(ch=='{')            {                code=74;            }            else if(ch=='}')            {                code=75;            }            else if(ch==';')            {                code=76;            }            else if(ch=='\\')            {                code=77;            }            else            {                code=-1;            }            i++;            if(strToken.length()>1)            {                cout<<"(    "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<strToken<<"\""<<"  )"<<endl;                out<<"( "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<strToken<<"\""<<"  )"<<endl;            }            else            {                cout<<"(    "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<ch<<"\""<<"    )"<<endl;                out<<"( "<<lineAt<<","<<"\""<<code<<"\""<<","<<"\""<<ch<<"\""<<" )"<<endl;            }            strToken="";        }    }    for(int i=0;i<indexOfID;i++)    {        IDOut<<i<<"\t"<<ID[i]<<endl;    }    for(int i=0;i<indexOfConst;i++)    {        ConstOut<<i<<"\t"<<Const[i]<<endl;    }    in.close();    out.close();    IDOut.close();    ConstOut.close();    return 0;}bool IsLetter(char ch){    if( (ch>='a'&&ch<='z') || (ch>='A'&&ch<='Z') || ch=='_')    {        return true;    }    return false;}bool IsDigit(char ch){    if(ch>='0'&&ch<='9')    {        return true;    }    return false;}int Reserve(string strToken){    int left=1;    int right=32;    int middle;    while(left<=right)    {        middle=(left+right)/2;        if(Keyword[middle]==strToken)        {            return middle;        }        else if(Keyword[middle]>strToken)        {            right=middle-1;        }        else        {            left=middle+1;        }    }    return 0;}void insertID(string strToken){    for(int i=0;i<indexOfID;i++)    {        if(strToken==ID[i])        {            return ;        }    }    ID[indexOfID++]=strToken;}void insertConst(string strToken){    for(int i=0;i<indexOfConst;i++)    {        if(strToken==Const[i])        {            return ;        }    }    Const[indexOfConst++]=strToken;}