词法分析主程序

来源：互联网发布：刀剑乱舞极化数据编辑：程序博客网时间：2024/05/02 16:31
/**词法分析主程序**/TkWord * tk_hashtable[MAXKEY];  //单词哈希表DynArray tktable;               //单词动态数组，单词表中放置标识符，包括变量名，函数名，结构定义名DynString tkstr;                //单词字符串DynString sourcestr;            //单词源码字符串int tkvalue;                    //单词值(单词为整型变量)char ch;                        //当前取到的源码字符int token;                      //单词编码int line_num;                   //行号/**从源文件中读取一个字符**/void getch(){    ch=getc(fin);       /**文件尾返回EOF，其他返回实际字节值**/}/**空白字符处理忽略空格，Tab和回车**/void skip_white_space(){    while(ch==' '||ch=='\t'||ch=='\r')    {        if(ch=='\r')            /**windows下.c文件就是文本文件，而windows下文件行末结束符是\r\n，是俩个字符**/        {            getch();            if(ch!='\n')                return;            line_num++;        }        printf("%c",ch);        getch();    }}/**解析注释**/void parse_comment(){    getch();    do    {        do        {            if(ch=='\n'||ch=='*'||ch==CH_EOF)                break;            else                getch();        }while(1);        if(ch=='\n')        /**读到换行符，行数加一**/        {            line_num++;            getch();        }        else if(ch=='*')    /**可能出现注释结束符，先往前读一个字符**/        {            getch();            if(ch=='/')      /**跳出循环**/            {                getch();                return;            }        }        else        {            error("一直到文件尾未看到配对的注释结束符");            return ;        }    }while(1);}/**预处理，忽略空白字符及注释**/void preprocess(){    while(1)    {        if(ch==' '||ch=='\t'||ch=='\r')/**忽略空白字符**/            skip_white_space();        else if(ch=='/')        {            getch();                   /**向前多看一个字符**/            if(ch=='*')                /**是注释就忽略**/                parse_comment();            else            {                ungetc(ch,fin);        /**不是注释就把字符回退到输入流**/                ch='/';                break;                 /**并结束循环**/            }        }        else            break;    }}/**解析标识符**//**判断c是否为字母或下划线**/int is_nodigit(char c){    return (c>='a'&&c<='z')||(c>='A'&&c<='Z')||c=='_';}/**判断c是否是数字**/int is_digit(char c){    return c>='0'&&c<='9';}/**解析标识符**/void parse_identifier(){    dynstring_reset(&tkstr);    dynstring_chcat(&tkstr,ch);    getch();    while(is_nodigit(ch)||is_digit(ch))    {        dynstring_chcat(&tkstr,ch);        getch();    }    dynstring_chcat(&tkstr,'\0');}/**解析整数**//**解析整型常量**/void parse_num(){    dynstring_reset(&tkstr);    dynstring_reset(&sourcestr);    do{        dynstring_chcat(&tkstr,ch);        dynstring_chcat(&sourcestr,ch);        getch();    }while(is_digit(ch));    if(ch=='.')    {        do{        dynstring_chcat(&tkstr,ch);        dynstring_chcat(&sourcestr,ch);        getch();        }while(is_digit(ch));    }    dynstring_chcat(&tkstr,'\0');    dynstring_chcat(&sourcestr,'\0');    tkvalue=atoi(tkstr.data);               /**把字符型转换为整型**/}/**解析字符串**//**解析字符常量，字符串常量sep:字符常量界符标识为单引号    字符串常量界符标识为双引号**/void parse_string(char sep){    char c;    dynstring_reset(&tkstr);    dynstring_reset(&sourcestr);    dynstring_chcat(&sourcestr,sep);    getch();    for(;;)    {        if(ch==sep)            break;                    /**界符匹配**/        else if(ch=='\\')        {            dynstring_chcat(&sourcestr,ch);            getch();            switch(ch)                /**解析转义字符**/            {            case '0':                c='\0';                break;            case 'a':                c='\a';                break;            case 'b':                 /**退格**/                c='\b';                break;            case 't':                c='\t';                break;            case 'n':                c='\n';                break;            case 'v':               /**垂直制表**/                c='\v';                break;            case 'f':               /**换页**/                c='\f';                break;            case 'r':                c='\r';                break;            case '\"':                c='\"';                break;            case '\'':                c='\'';                break;            case '\\':                c='\\';                break;            default:                c=ch;                if(c>='!'&&c<='~')                    warning("非法转义字符:\'\\%c\'",c);                else                    warning("非法转义字符:\'\\0x%x\'",c);                break;            }            dynstring_chcat(&tkstr,c);            dynstring_chcat(&sourcestr,ch);            getch();        }        else        {            dynstring_chcat(&tkstr,ch);            dynstring_chcat(&sourcestr,ch);            getch();        }    }    dynstring_chcat(&tkstr,'\0');    dynstring_chcat(&sourcestr,sep);    dynstring_chcat(&sourcestr,'\0');    getch();}/**取单词**/void get_token(){    preprocess();    switch(ch)    {        case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':case'g':        case 'h':case 'i':case 'j':case 'k':case 'l':case 'm':case'n':        case 'o':case 'p':case 'q':case 'r':case 's':case 't':        case 'u':case 'v':case 'w':case 'x':case 'y':case 'z':        case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':case'G':        case 'H':case 'I':case 'J':case 'K':case 'L':case 'M':case'N':        case 'O':case 'P':case 'Q':case 'R':case 'S':case 'T':        case 'U':case 'V':case 'W':case 'X':case 'Y':case 'Z':        case '_':            {                TkWord *tp;                parse_identifier();                tp=tkword_insert(tkstr.data);                token=tp->tkcode;                break;            }        case '0':case '1':case '2':case '3':        case '4':case '5':case '6':case '7':        case '8':case '9':            parse_num();            token=TK_CINT;            break;        case '+':            getch();            token=TK_PLUS;            break;        case '-':            getch();            if(ch=='>')            {                token=TK_POINTSO;                getch();            }            else                token=TK_MINUS;            break;        case '/':            token=TK_DIVIDE;            getch();            break;        case '%':            token=TK_MOD;            getch();            break;        case '=':            getch();            if(ch=='=')            {                token=TK_EQ;                getch();            }            else                token=TK_ASSIGN;            break;        case '!':            getch();            if(ch=='=')            {                token=TK_NEQ;                getch();            }            else                error("暂不支持'!'（非操作符）");            break;        case '<':            getch();            if(ch=='=')            {                token=TK_LEQ;                getch();            }            else                token=TK_LF;            break;        case '>':            getch();            if(ch=='=')            {                token=TK_GEQ;                getch();            }            else                token=TK_GT;            break;        case '.':            getch();            if(ch=='.')            {                getch();                if(ch!='.')                    error("省略号拼写错误");                else                    token=TK_ELLIPSIS;                getch();            }            else            {                token=TK_DOT;            }            break;        case '&':            token=TK_AND;            getch();            break;        case ';':            token=TK_SEMICOLON;            getch();            break;        case ']':            token=TK_CLOSEBR;            getch();            break;        case '}':            token=TK_END;            getch();            break;        case ')':            token=TK_CLOSEPA;            getch();            break;        case '[':            token=TK_OPENBR;            getch();            break;        case '{':            token=TK_BEGIN;            getch();            break;        case '(':            token=TK_OPENPA;            getch();            break;        case ',':            token=TK_COMMA;            getch();            break;        case '*':            token=TK_STAR;            getch();            break;        case '\'':            parse_string(ch);            token=TK_CCHAR;            tkvalue=*(char *)tkstr.data;    /**强制转换成字符的ascll码**/            break;        case '\"':            parse_string(ch);            token=TK_CSTR;            break;        case EOF:            token=TK_EOF;            break;        default:            error("不认识的字符:0x%02x",ch);/**把ch的值用16进制表示，%02x表示不足2位，高位就用0补充**/            getch();            break;    }}
0 0