C++词法分析(C#描述)

来源:互联网 发布:漂亮的谐音网络用语 编辑:程序博客网 时间:2024/05/17 22:04

近来开始学习编译原来,这是词法分析最基础的,在网上看到很多高手的作品,我也照着做了一个,有错别介意


using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.IO;namespace 词法分析成功改造3{    class Program    {        static void Main(string[] args)        {            StreamReader reader = new StreamReader("D:/test.txt");            string wholeText = reader.ReadToEnd();            Console.WriteLine(wholeText + "\n\n");            LexicalAnalysis manage = new LexicalAnalysis();            Console.WriteLine(manage.Analysis(wholeText));            Console.WriteLine("\n\n" + manage.ErrorNO());            Console.WriteLine(LexicalAnalysis.AllWrongText);            Console.Read();        }        #region 词法分析        class LexicalAnalysis        {            /// <summary>            /// 记录字符位置            /// </summary>            int i = 0, j = 0;                              /// <summary>            /// 行号            /// </summary>            public static int LineNo = 1;                                     /// <summary>            /// 记录识别出的单词               /// </summary>            string token = "";                          /// <summary>            /// 记录每一次得到的完整信息            /// </summary>            string text2 = "";            /// <summary>            /// 用来记录错误信息            /// </summary>            string text3 = "";                                  /// <summary>            /// 记录异常信息            /// </summary>            string wrong = "";                            /// <summary>            /// 错误字段的个数            /// </summary>            public static int errors = 0;                             /// <summary>            /// 记录错误的详细信息            /// </summary>            public static string AllWrongText = "";                             /// <summary>            /// 标记成对出现的界符            /// </summary>            int flag1 = 0, flag2 = 0, flag3, flag4, flag5, flag6 = 0;                                   /// <summary>            /// 关键字(部分)            /// </summary>            string[] keyWords = new string[32]{"auto", "double", "int","struct", "break", "else",                "long","switch","case","enum","register","typedef","char","extern","return",                "union","const","float","short","unsigned","continue","for","signed","void","default",                "goto","sizeof","volatile","do","if","while","static" };            /// <summary>            /// 运算符            /// </summary>            string[] operatorWords = new string[28]{"+","-","*","/","%",">","<",">=","<=","==", "-=",                "+=","*=","/=","!=","=","%=","&","&&","|","||","!","++","--","~","<<",">>","?:"};            /// <summary>            /// 边界符            /// </summary>            string[] boundWords = new string[15] { "{", "}", "[", "]", ";", ",", ".",                 "(", ")", ":", "\"", "#", ">", "<", "\'" };            /// <summary>            /// 词法分析            /// </summary>            /// <param name="str">所要分析的句子</param>            /// <returns>经过分析之后的句子</returns>            public string Analysis(string str)            {                string space = "";                         //空格数                if (str.Length == 0)                           //判断字符串是否为空                    return "";                try                {                    while (str[i] != '\0')  //读入字符判断,空格、字母、数字、界符                    {                        if (str[i] == ' ' || str[i] == '\t' || str[i] == '\r')                        {                            i++;                                     //跳过无意义的字符                        }                        else if (str[i] == '\n')//如果是换行符,则行号加1                        {                            LineNo++;                            i++;                        }                        else if (IsLetter(str[i]))//如果是字母                        {                            i = RecogniseLetter(str, i);                            for (j = 0; j < keyWords.Length; j++)                            {                                if (token.CompareTo(keyWords[j]) == 0)                                {                                    break;                                }                            }                            if (j >= keyWords.Length)//是普通的标志符                            {                                for (int m = 0; m < 12 - token.Length; m++)                                {                                    space = space + " ";                                }                                text2 = text2 + LineNo.ToString() + ":  " + token + space + "标识符" + "\r\n";                                token = "";                                space = "";                            }                            if (j < keyWords.Length)//是保留字                            {                                for (int m = 0; m < 12 - token.Length; m++)                                    space = space + " ";                                text2 = text2 + LineNo.ToString() + ":  " + keyWords[j] + space + "关键字" + "\r\n"; ;                                token = "";                                space = "";                            }                        }//end else if                        else if (IsDigit(str[i]))//如果是数字                        {                            i = RecogniseDigit(str, i);                            for (int m = 0; m < 12 - token.Length; m++)                            {                                space = space + " ";                            }                            text2 = text2 + LineNo.ToString() + ":  " + token + space + "常量" + "\r\n";                            token = "";                            space = "";                        }//end else if                        else if (IsBound(str[i]))                  //识别界符                        {                            i = RecogniseBound(str, i);                            for (int m = 0; m < 12 - token.Length; m++)                            {                                space = space + " ";                            }                            text2 = text2 + LineNo.ToString() + ":  " + token + space + "界符" + "\r\n"; ;                            token = "";                            space = "";                        }//end else if                        else if (IsOperator(str[i]))                        {                            i = RecogniseOperator(str, i);                            for (int m = 0; m < 12 - token.Length; m++)                            {                                space = space + " ";                            }                            text2 = text2 + LineNo.ToString() + ":  " + token + space + "运算符" + "\r\n";                            token = "";                            space = "";                        }//end else if                        else                        {                            error(0); i++;                        }                    }//end while                }//end try                catch (DivideByZeroException e1)                {                    wrong = e1.Message;                }                catch (IndexOutOfRangeException e2)                {                    wrong = e2.Message;                }                catch (Exception e)                {                    wrong = e.Message;                }                return text2;            }            /// <summary>            /// 判断字符是不是字母            /// </summary>            /// <param name="ch">待定字符</param>            /// <returns>是否</returns>            public bool IsLetter(char ch)            {                if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))                {                    return true;                }                else                    return false;            }            /// <summary>            /// 判断字符是不是数字            /// </summary>            /// <param name="ch">待定字符</param>            /// <returns>是否</returns>            public bool IsDigit(char ch)            {                if (ch >= '0' && ch <= '9')                {                    return true;                }                else                    return false;            }            /// <summary>            /// 判断字符是不是下划线            /// </summary>            /// <param name="ch">待定字符</param>            /// <returns>是否</returns>            public bool IsUnderline(char ch)            {                if (ch == '_')                    return true;                else return false;            }            /// <summary>            /// 判断字符是不是边界符            /// </summary>            /// <param name="ch">待定字符</param>            /// <returns></returns>            public bool IsBound(char ch)            {                for (int j = 0; j < boundWords.Length; j++)                {                    if (ch.CompareTo(boundWords[j][0]) == 0)                    {                        return true;                    }                }                return false;            }            /// <summary>            /// 判断字符是不是操作符            /// </summary>            /// <param name="ch">待定字符</param>            /// <returns>是否</returns>            public bool IsOperator(char ch)            {                for (int i = 0; i < operatorWords.Length; i++)                {                    if (ch == operatorWords[i][0])                    {                        return true;                    }                }                return false;            }            /// <summary>            /// 识别整个操作符            /// </summary>            /// <param name="str">字符串</param>            /// <param name="i">字符串中字符的位置</param>            /// <returns>字符位置</returns>            private int RecogniseOperator(string str, int i)            {                char state = '0';                string temp = "";                while (state != '2')                {                    switch (state)                    {                        case '0':                            temp += str[i];                            i++;                            state = '1';                            break;                        case '1':                            //判断为双个运算符                             if (str.Substring(i - 1, 2) == "++" || str.Substring(i - 1, 2) == "--" || str.Substring(i - 1, 2) == "<<" || str.Substring(i - 1, 2) == ">>" || str.Substring(i - 1, 2) == "+=" || str.Substring(i - 1, 2) == "-=" || str.Substring(i - 1, 2) == "*=" || str.Substring(i - 1, 2) == "/=" || str.Substring(i - 1, 2) == "!=" || str.Substring(i - 1, 2) == "%=")                            {                                temp += str[i];                                i++;                                state = '2';                            }                            //三目运算符                            if (str[i - 1] == '?' && str[i] == ':')                            {                                temp += str[i];                                i++;                                state = '2';                            }                            //不是运算符                            else                            {                                state = '2';                            }                            break;                    }//end switch                }//end while                token = temp;                return i;            }            /// <summary>            /// 识别普通的标识符            /// </summary>            /// <param name="str">用来识别的字符串</param>            /// <param name="i">字符串中字符的位置</param>            /// <returns>字符位置</returns>            public int RecogniseLetter(string str, int i)                          //识别单词            {                char state = '0';                string temp = "";                             //记录单词                while (state != '2')                {                    switch (state)                    {                        case '0':                            if (IsLetter(str[i]))                            {                                state = '1';                                temp = temp + str[i];                                i++;                            }                            break;                        case '1':                            if (IsLetter(str[i]) || IsDigit(str[i]) || IsUnderline(str[i]))                            {                                state = '1';                                temp = temp + str[i];                                i++;                            }                            else                                state = '2';                            break;                    }//end switch                }//end while                token = temp;                   //记录识别的字符串                return i;            }            /// <summary>            /// 识别整个常熟            /// </summary>            /// <param name="str">待测字符串</param>            /// <param name="i">字符串中字符的位置</param>            /// <returns>字符位置</returns>            public int RecogniseDigit(string str, int i)                                //识别常数            {                char state = '0';                string temp = "";                while (state != '2')                {                    switch (state)                    {                        case '0':                            if (IsDigit(str[i]))                            {                                temp += str[i];                                state = '1';                                i++;                            }                            break;                        case '1':                            if (IsDigit(str[i]))                            {                                temp += str[i];                                state = '1';                                i++;                            }                            else if (str[i] == '.' && IsDigit(str[i + 1])) //有小数点的数                                                     {                                temp += str[i];                                state = '1';                                i++;                            }                            else                                 state = '2';                            break;                    }//end switch                                  }//end while                token = temp;                return i;            }            /// <summary>            /// 识别标识符            /// </summary>            /// <param name="str">待测字符串</param>            /// <param name="i">字符串中字符的位置</param>            /// <returns>字符位置</returns>            public int RecogniseBound(string str, int i)            {                string temp = "";                for (int k = 0; k < boundWords.Length; k++)                {                    if (str[i].CompareTo(boundWords[k][0]) == 0)                    {                        temp += str[i];                        i++;                        break;                    }                    else                         continue;                }//end for                token = temp;                //这些标志数是用来等会做错误判断用的(没有成双出现就是错误)                if (token == "{" || token == "}")                    flag1++;                else if (token == "[" || token == "]")                    flag2++;                else if (token == "(" || token == ")")                    flag3++;                else if (token == "<" || token == ">")                    flag4++;                else if (token == "\'")                    flag5++;                else if (token == "\"")                    flag6++;                return i;            }            /// <summary>            /// 得到已经识别的字符串            /// </summary>            /// <param name="str">字符串</param>            /// <param name="k">字符位置</param>            /// <returns>字符位置</returns>            public int GetetToken(string str, int k)                                   {                switch (k)                {                    case 1:                        for (int i = 0; i < keyWords.Length; i++)  //关键字                        {                            if (str == keyWords[i])                                return i;                        }                        break;                    case 2:                        for (int i = 0; i < operatorWords.Length; i++)  //运算符                        {                            if (str == operatorWords[i])                                return i + 32;   //+32的原因是keyWords中有32个关键字                        }                        break;                    case 3:                        for (int i = 0; i < boundWords.Length; i++) //界符                        {                            if (str == boundWords[i])                                return i + 60;   //+60的原因是keyWords+operatorWords有60个                        }                        break;                }//end switch                return 0;            }            /// <summary>            /// 错误个数            /// </summary>            /// <returns>错误信息</returns>            public string ErrorNO()                                                  {                error(1);                text3 = errors.ToString() + "   errors";                return text3;            }            /// <summary>            /// 错误信息            /// </summary>            /// <param name="k"></param>            public void error(int k)                                                 {                switch (k)                {                    case 0:    //输入了非法字符                        AllWrongText = AllWrongText + LineNo.ToString() + ": " + "非法字符" + "\r\n";                          errors++;                        break;                    case 1:   //界符不匹配                        if (flag1 % 2 != 0)                        {                            AllWrongText = AllWrongText + "{ 不匹配" + "\r\n";                            errors++;                        }                        if (flag2 % 2 != 0)                        {                            AllWrongText = AllWrongText + "[ 不匹配" + "\r\n";                            errors++;                        }                        if (flag3 % 2 != 0)                        {                            AllWrongText = AllWrongText + "( 不匹配" + "\r\n";                            errors++;                        }                        if (flag4 % 2 != 0)                        {                            AllWrongText = AllWrongText + "< 不匹配" + "\r\n";                            errors++;                        }                        if (flag5 % 2 != 0)                        {                            AllWrongText = AllWrongText + "' 不匹配" + "\r\n";                            errors++;                        }                        if (flag6 % 2 != 0)                        {                            AllWrongText = AllWrongText + " \" 不匹配" + "\r\n";                            errors++;                        }                        break;                }//end switch            }        }        #endregion    }}


原创粉丝点击