编译原理--词法分析

来源:互联网 发布:mysql怎么启动 编辑:程序博客网 时间:2024/04/30 12:24

词法分析

实验目的:设计、编制并调试一个词法分析程序,  加深对词法分析原理的理解。

实验环境:   Codeblocks  12.11

实验内容:

1.待分析的简单词法:

(1)关键字:if   else   break   int  float,包括C语言的32个关键字,并且全部是小写;

(2)运算符:算术、关系、逻辑、赋值、界符;

(3)标识符(ID)定义:

        ID = _ | letter (letter | digit )*

(4)整数(NUM) 定义:

        NUM = digit | digit* (包含float数的识别 )

(5)单行注释和块注释

2.各种单词符号对应的种别码

单词符号

种别码

单词符号

种别码

auto

1

_|letter(letter|digit)*

33

break

2

digit|digit*

34

case

3

digit(digit)*.(digit)*

35

char

4

算术运算符:+

36

const

5

            -

37

continue

6

            *

38

default

7

            /

39

do

8

            %

40

double

9

            ++

41

else

10

            --

42

enum

11

关系运算符:>

43

extern

12

            <

44

float

13

            <>

45

for

14

            >=

46

goto

15

            <=

47

if

16

            ==

48

int

17

            !=

49

long

18

逻辑运算符:&&

50

register

19

            ||

51

return

20

            !

52

short

21

赋值运算符:+=

53

signed

22

            -=

54

sizeof

23

            *=

55

static

24

            /=

56

struct

25

            %=

57

switch

26

界符:{

58

typedef

27

      }

59

union

28

      [

60

unsigned

29

      ]

61

void

30

      (

62

volatile

31

      )

63

while

32

      “

64

头文件

83

      ‘

65

 

 

      =

66

 

 

      ;

67

 

 

      #

68



                                Main()函数的流程图


 

算法描述:

打开文件,依次读取文件的一个字符,若该字符为’\n’则行计数器加一;

若是字母或者下划线,那么一定是标识符或者关键字,则调用id()函数;

若是’/’,则判断下一个字符是否为’/’、’*’,若是则判断为行注释、块注释,调用note()函数,否则调用punc()函数;

若是’ ’、’\t’则忽略然后继续读取下一个字符;

其余的则为运算符一类或者错误符号,调用函数punc()来判断;循环这个过程直至读到文件末尾。

并且把词法分析过程中的结果输出在dos窗口,同时写入文件中保存。


#include <stdio.h>

#include <stdlib.h>

#include <ctype.h>

#include <string.h>

int guanjz(char ch1[]); // 关键字和标识符的判断

FILE *fp,*fp1;

int hanjsq=1;//行计数器,保存行号

char ch ,infile[100],outfile[100];

void id();

void dgt();

void punc();// punctuation 标点符号

void note();

void Cbegin();

void Cend();

int main (void)   {

    Cbegin();

    while(!feof(fp)) // 没有读到文件末尾

    {

        ch=fgetc(fp);

        if(ch==EOF)

            break;

        if(ch==10) //\n

            hanjsq++;

        else if(isalpha(ch)|| ch=='_') // 第一个是字母或者是下划线 (只能是标识符或者关键字)

            id();

        else  if(isdigit(ch)) // 数字

            dgt();

        else if(ch=='/') // 注释

            note();

        else if(ch=='\t'||ch==' ')

            continue;

        else

            punc(); //运算符

    }

    Cend();

    return 0;

}   /***************扫描符号******************/

void punc()    {

    char letter[40][4]= {"+","-","*","/","%","++","--",

                         ">","<","<>",">=","<=","==","!=",

                         "&&","||","!",

                         "+=","-=","*=","/=","%=",

                         "{","}","[","]","(",")","","'","=",";","#"

                        };

    char a[4];

    int i=0;

    a[i++] = ch;

    if(ch=='"')

    {

        printf("line:%d\t\t%c\t\t64\n",hanjsq,ch);

        fprintf(fp1,"line:%d\t\t%c\t\t64\n",hanjsq,ch);

        return;

    }

    ch=fgetc(fp);

    if(ch=='+'||ch=='-'||ch=='>'||ch=='='||ch=='&'||ch=='|')

    {

        a[i++]=ch;

        a[i]='\0';

        for(i=0; i<=32; i++)

            if(!strcmp(a,letter[i]))

            {

                printf("line:%d\t\t%s\t\t%d\n",hanjsq,a,36+i);

                fprintf(fp1,"line:%d\t\t%s\t\t%d\n",hanjsq,a,36+i);

                return ;

            }

         fseek(fp,-1,1);

    }

    else

        fseek(fp,-1,1);

    a[1]='\0';

    for(i=0; i<=32; i++)

        if(!strcmp(a,letter[i]))

        {

            printf("line:%d\t\t%s\t\t%d\n",hanjsq,a,36+i);

            fprintf(fp1,"line:%d\t\t%s\t\t%d\n",hanjsq,a,36+i);

            return ;

        }

    if(32<i)

    {

        printf("line:%d\t\t%s\t\t-1\n",hanjsq,a);

        fprintf(fp1,"line:%d\t\t%s\t\t-1\n",hanjsq,a);

    }

}   /***************扫描数字******************/

void dgt()

{

    char letter[20]; //规定每个数最长20位

    int i=0;

    letter[i++]=ch;

    bool tag=false;

    while(!feof(fp)) //ch!= ' ' && ch!= '\n' && ch!= '\t'

    {

        ch=fgetc(fp);

        if('.'==ch || isdigit(ch))

        {

            letter[i++]=ch;

            if('.'==ch )

                tag=true;

        }

        else

        {

            fseek(fp,-1,1);

            letter[i]='\0';

            if(tag)

            {

                printf("line:%d\t\t%s\t\t35\n",hanjsq,letter); // 小数形式的种别码为35

                fprintf(fp1,"line:%d\t\t%s\t\t35\n",hanjsq,letter); // 小数形式的种别码为35

 

            }

            else

            {

                printf("line:%d\t\t%s\t\t34\n",hanjsq,letter); // 整数的种别码为11

                fprintf(fp1,"line:%d\t\t%s\t\t34\n",hanjsq,letter); // 整数的种别码为11

 

            }

            return ;   }    }   }

/***********标识符和关键字的判断*****************/

void id()   {

    // 第一个是字母或者是下划线 (只能是标识符或者关键字)

    char letter[1000]; //规定每个标识符最长100

    int i=0;

    letter[i++]=ch;

    while(!feof(fp))

    {

        ch=getc(fp);

        if('.'==ch && (ch=getc(fp))=='h' )

        {

            // 判断是否为头文件

            letter[i++]='.';

            letter[i++]='h';

            letter[i]='\0';

            printf("line:%d\t\t%s\t\t83\n",hanjsq,letter);

            fprintf(fp1,"line:%d\t\t%s\t\t83\n",hanjsq,letter);

            return;

        }

        else if(isalpha(ch) || isdigit(ch) )

        {

            letter[i++] =ch;

 

        }

        else if (!isalpha(ch) && !isdigit(ch)) // 标识符结束

        {

            fseek(fp,-1,1);

            letter[i]='\0';

            printf("line:%d\t\t%s\t\t%d\n",hanjsq,letter,guanjz(letter));

            fprintf(fp1,"line:%d\t\t%s\t\t%d\n",hanjsq,letter,guanjz(letter));

            return;

        }

    }

}

/***************判断是否为关键字******************/

int guanjz(char ch1[])

{

    // 定义关键字集

    char ch2[32][9] = {"auto", "break", "case", "char", "const", "continue",

                       "default", "do", "double", "else", "enum", "extern",

                       "float", "for", "goto", "if", "int", "long",

                       "register", "return", "short", "signed", "sizeof", "static",

                       "struct", "switch", "typedef", "union", "unsigned", "void",

                       "volatile", "while"

                      };

    int j = 0;

    for( ; j<32; ++j)

    {

        //逐个对比关键字

        if(!strcmp(ch1,ch2[j])) return j+1; //对应的角标就是对应关键字的种别码

    }

    return 33;// 返回一般标识符的种别码

}     /***************注释******************/

void note()   {

    ch=getc(fp);

    if(ch=='/') // 行注释

    {

        while(1)

        {

            ch=getc(fp);

            if(ch=='\n')

            {

                hanjsq++;

                return;

            }

        }

    }

    else if(ch=='*') // 块注释

    {

        while(1)

        {

            ch=getc(fp);

            if(ch=='\n')

            {

                hanjsq++;

            }

            else if(ch=='*')

            {

                ch=fgetc(fp);

                return;

            }

        }

    }

    else

        punc(); // 那么就应该是标点符号   }

void Cbegin()

{    printf("\t********************请输入读文件名*********************\n");

    scanf("%s",infile);

    if( (fp=fopen(infile,"r")) == NULL)

    {

        printf("Cannot open reading file.\n");

        exit(0);

    }

    printf("\t********************请输入写文件名*********************\n");

    scanf("%s",outfile);

    if( (fp1=fopen(outfile,"w")) == NULL)

    {

        printf("Cannot open writing file.\n");

        exit(0);

    }

printf("\n\t\t**************************************************\n");

    printf("\n\t\t\t\t开始进行词法分析\n");

    printf("\n\t\t**************************************************\n");

    printf("\n\n行号\t\t字符串\t\t种别码\n"); }

void Cend()

{printf("\n\n\t**********************************************************\n\n");

    printf("\t\t\t词法分析结束\n\n");

    printf("\t\t******************************************\n\n");

    printf("\t\t\t分析结果保留在文件:\n\t\t%s\n",outfile);

    printf("\t\t******************************************\n\n");

    printf("\t\t\t欢迎下次使用,谢谢!\n");  printf("\t**********************************************************\n\n");

 

}

运行部分截图:

源程序:


词法分析结果(部分):


总结:

1.将程序看做一个大的字符串,将字符串从左到右进行扫描和分解,根据语言的词法规则,识别出一个一个具有独立意义的单词。在设计、编写和调试词法分析程序过程中。

2.读取程序时,使用的是文件形式,在程序中是根据是否读到文件末尾来判断文件是否读取完毕,所以在文件的末尾一定要加上换行符,这样文件才能正确的读取完毕。

3.种别码的设置需要提前和词法规则协调,使标识符和对应种别码成某种对应的关系,充分利用数组角标来判断出单词符号的种别码。

一定要先整体构思好,才能够尽量一次性写完整。


1 0
原创粉丝点击