lcc源代码解析之sym.c

来源:互联网 发布:阿里云学生机续费40.8 编辑:程序博客网 时间:2024/05/18 02:42

lcc是一款小巧的工业级编译器,代码精简,代码开源,相比gcc更适合编译器初学者阅读。

你可以在这里搞到代码:https://github.com/drh/lcc

但是,怎么说呢,这个代码防盗性较强,几乎没有注释,

我在阅读源码中参考了其他前辈关于lcc的文章以及官方推荐书籍《a Retargetable C Compiler---Design and Implementation》

同时为了为了给其他想学习编译器相关知识的同学提供些帮助,所以我在阅读源代码中用中文对源代码做了较为详细的注释。

中间有理解不对的地方,欢迎指正:jinn.yette@gmail.com

本文解析编译器中的一个很重要的模块,它贯穿整个编译过程,贯穿前端后端。

符号的结构定义在c.h中,如下

struct symbol {char *name; //符号的名称,大多数情况是源程序的符号int scope; //符号作用域,常量CONSTANT,标号LABEL,全局GLOBAL,参数PARAM还是局部变量LOCAL,在第i层生成的local变量,其scope等于LOCAL+iCoordinate src; //符号定义处的位置:文件名,行号和列号/*up字段比较重要,它将符号表中所有符号链接成一个链表,最后进入符号表的那个符号为首从后向前遍历该链表可以访问当前作用域内的所有符号,包括被内嵌符号隐藏的符号这就提供了除hash方式外另外一个符号表查询方式。*/Symbol up;List uses;//如果uses保存一个Coordiante链表,则可表明一个符号的所有使用信息,也可置nullint sclass; //符号的扩展存储类型,AUTO/REGISTER/STATIC/EXTERN/TYPEDEF/ENUM,常量和标号不使用该域unsigned structarg:1;//结构参数标志unsigned addressed:1;//地址访问的变量unsigned computed:1; //地址树的标志.addrtree函数处理unsigned temporary:1;//生成的临时变量标志unsigned generated:1;//生成的符号标志unsigned defined:1; //符号被定义了,避免声明多次Type type;//变量或者常量的类型float ref; //标号或变量的引用计数/*以上各项对于所有符号表的所有符号通用,常量和标号函数需要使用下面union中的一些域*/union { //保存标号struct {int label; //全局分配唯一的标号,这时name保存标号字符串Symbol equatedto;} l;struct {unsigned cfields:1;unsigned vfields:1;Table ftab;/* omit */Field flist;} s;int value;Symbol *idlist;struct {Value min, max;} limits;//保存常量的结构struct {Value v;//保存实际的常量值Symbol loc; //指向符号表的入口} c;struct {Coordinate pt;int label;int ncalls;Symbol *callee;} f;int seg; //全局变量或静态变量给出定义的段Symbol alias;struct {Node cse; //前端生成多次引用公共表达式的临时变量的DAG节点int replace;Symbol next;} t;} u;Xsymbol x;//后端使用的符号扩展,为变量分配的寄存器,调试信息数据等};

代码主要在sym.c文件中,如下

#include "c.h"#include <stdio.h>static char rcsid[] = "$Id: sym.c,v 1.1 2002/08/28 23:12:47 drh Exp $";#define equalp(x) v.x == p->sym.u.c.v.xstruct table {int level;//符号表作用域Table previous;//指向外层(上一层)作用域对应的table/**/struct entry {struct symbol sym;struct entry *link;} *buckets[256];//指针数组,指向哈希链表Symbol all;//指向当前及其外层作用域中所有符号组成的列表的头,该列表是通过symbol的up字段连接起来的};#define HASHSIZE NELEMS(((Table)0)->buckets)static struct tablecns = { CONSTANTS },ext = { GLOBAL },ids = { GLOBAL },tys = { GLOBAL };Table constants   = &cns;Table externals   = &ext;//声明为extern的标示符Table identifiers = &ids;//一般标示符Table globals     = &ids;//上表的一部分Table types       = &tys;//类型标记Table labels;int level = GLOBAL;static int tempid;List loci, symbols;Table newtable(int arena) {Table new;NEW0(new, arena);return new;}Table table(Table tp, int level) {Table new = newtable(FUNC);//FUNC分配区new->previous = tp;new->level = level;if (tp)new->all = tp->all;return new;}/*scan 一个表tp,并对指定作用域lev的的所有符号执行apply指向的函数的操作*/void foreach(Table tp, int lev, void (*apply)(Symbol, void *), void *cl) {assert(tp);while (tp && tp->level > lev)tp = tp->previous;//前继if (tp && tp->level == lev) {Symbol p;Coordinate sav;sav = src;for (p = tp->all; p && p->scope == lev; p = p->up) {src = p->src;(*apply)(p, cl);}src = sav;}}/*进入下一层作用域*/void enterscope(void) {if (++level == LOCAL)tempid = 0;}/*退出当前作用域,返回上一层作用域,清理资源*/void exitscope(void) {rmtypes(level);//types.c,从类型缓冲区删除在当前作用域定义的带标记的类型if (types->level == level)types = types->previous;if (identifiers->level == level) {if (Aflag >= 2) {int n = 0;Symbol p;for (p = identifiers->all; p && p->scope == level; p = p->up)if (++n > 127) {warning("more than 127 identifiers declared in a block\n");break;}}identifiers = identifiers->previous;}assert(level >= GLOBAL);--level;}/*为给定的name在符号表中分配一个符号并装进表中如有需要,还将建立一个新表,返回指向新建符号的指针*/Symbol install(const char *name, Table *tpp, int level, int arena) {Table tp = *tpp;struct entry *p;unsigned h = (unsigned long)name&(HASHSIZE-1);//计算hash值assert(level == 0 || level >= tp->level);if (level > 0 && tp->level < level)tp = *tpp = table(tp, level);//新建tableNEW0(p, arena);//开辟新符号p->sym.name = (char *)name;p->sym.scope = level;p->sym.up = tp->all;tp->all = &p->sym;p->link = tp->buckets[h];tp->buckets[h] = p;return &p->sym;}Symbol relocate(const char *name, Table src, Table dst) {struct entry *p, **q;Symbol *r;unsigned h = (unsigned long)name&(HASHSIZE-1);for (q = &src->buckets[h]; *q; q = &(*q)->link)if (name == (*q)->sym.name)break;assert(*q);/* Remove the entry from src's hash chain  and from its list of all symbols.*/p = *q;*q = (*q)->link;for (r = &src->all; *r && *r != &p->sym; r = &(*r)->up);assert(*r == &p->sym);*r = p->sym.up;/* Insert the entry into dst's hash chain  and into its list of all symbols.  Return the symbol-table entry.*/p->link = dst->buckets[h];dst->buckets[h] = p;p->sym.up = dst->all;dst->all = &p->sym;return &p->sym;}/*通过name字段在tp中查找对应的符号,  如果存在,返回指向符号的指针,  不存在,返回NULL*/Symbol lookup(const char *name, Table tp) {struct entry *p;unsigned h = (unsigned long)name&(HASHSIZE-1);assert(tp);dofor (p = tp->buckets[h]; p; p = p->link)if (name == p->sym.name)return &p->sym;while ((tp = tp->previous) != NULL);return NULL;}int genlabel(int n) {static int label = 1;label += n;return label - n;}/*查找标号,如果有相应编号,则返回指向符号的指针,如果没有,则新建一个标号标号特有的*/Symbol findlabel(int lab) {struct entry *p;unsigned h = lab&(HASHSIZE-1);for (p = labels->buckets[h]; p; p = p->link)if (lab == p->sym.u.l.label)return &p->sym;/*找到,返回地址*/NEW0(p, FUNC);/*没有,新建*/p->sym.name = stringd(lab);p->sym.scope = LABELS;p->sym.up = labels->all;labels->all = &p->sym;/*更新all到新建sym,记住,all总是指向up链表的最后一个点*/p->link = labels->buckets[h];labels->buckets[h] = p;p->sym.generated = 1;p->sym.u.l.label = lab;(*IR->defsymbol)(&p->sym);//告知后端return &p->sym;}/*在常量表中查找给定类型和值的常量,如需要,在表中增加常量v*/Symbol constant(Type ty, Value v) {struct entry *p;unsigned h = v.u&(HASHSIZE-1);static union { int x; char endian; } little = { 1 };ty = unqual(ty);//去掉const和volatilefor (p = constants->buckets[h]; p; p = p->link)//遍历constant符号表if (eqtype(ty, p->sym.type, 1))//类型是否相等switch (ty->op) {case INT:      if (equalp(i)) return &p->sym; break;case UNSIGNED: if (equalp(u)) return &p->sym; break;case FLOAT:if (v.d == 0.0) {float z1 = v.d, z2 = p->sym.u.c.v.d;char *b1 = (char *)&z1, *b2 = (char *)&z2;if (z1 == z2&& (!little.endian && b1[0] == b2[0]||   little.endian && b1[sizeof (z1)-1] == b2[sizeof (z2)-1]))return &p->sym;} else if (equalp(d))return &p->sym;break;case FUNCTION: if (equalp(g)) return &p->sym; break;case ARRAY:case POINTER:  if (equalp(p)) return &p->sym; break;default: assert(0);}NEW0(p, PERM);p->sym.name = vtoa(ty, v);p->sym.scope = CONSTANTS;p->sym.type = ty;p->sym.sclass = STATIC;p->sym.u.c.v = v;p->link = constants->buckets[h];p->sym.up = constants->all;constants->all = &p->sym;constants->buckets[h] = p;if (ty->u.sym && !ty->u.sym->addressed)(*IR->defsymbol)(&p->sym);//告知后端p->sym.defined = 1;return &p->sym;}/*封装整形常量的建立和查找*/Symbol intconst(int n) {Value v;v.i = n;return constant(inttype, v);}/*依据入参存储类型scls,类型ty,作用域lev,产生一个标示符并初始化*/Symbol genident(int scls, Type ty, int lev) {Symbol p;NEW0(p, lev >= LOCAL ? FUNC : PERM);p->name = stringd(genlabel(1));p->scope = lev;p->sclass = scls;p->type = ty;p->generated = 1;if (lev == GLOBAL)//参数和局部变量在其他地方告知后端(*IR->defsymbol)(p);return p;}Symbol temporary(int scls, Type ty) {Symbol p;NEW0(p, FUNC);p->name = stringd(++tempid);p->scope = level < LOCAL ? LOCAL : level;p->sclass = scls;p->type = ty;p->temporary = 1;//根上面这个函数比,就这处差异p->generated = 1;return p;}Symbol newtemp(int sclass, int tc, int size) {//通过btot将类型后缀tc和size映射成type传入temporarySymbol p = temporary(sclass, btot(tc, size));(*IR->local)(p);p->defined = 1;return p;}//返回指向所有符号的链表尾节点的allSymbol allsymbols(Table tp) {return tp->all;}//添加新节点到loci和symbolsvoid locus(Table tp, Coordinate *cp) {loci    = append(cp, loci);symbols = append(allsymbols(tp), symbols);}void use(Symbol p, Coordinate src) {Coordinate *cp;NEW(cp, PERM);*cp = src;p->uses = append(cp, p->uses);}/* findtype - find type ty in identifiers */Symbol findtype(Type ty) {Table tp = identifiers;int i;struct entry *p;assert(tp);dofor (i = 0; i < HASHSIZE; i++)for (p = tp->buckets[i]; p; p = p->link)if (p->sym.type == ty && p->sym.sclass == TYPEDEF)return &p->sym;while ((tp = tp->previous) != NULL);return NULL;}/* mkstr - make a string constant */Symbol mkstr(char *str) {Value v;Symbol p;v.p = str;p = constant(array(chartype, strlen(v.p) + 1, 0), v);if (p->u.c.loc == NULL)p->u.c.loc = genident(STATIC, p->type, GLOBAL);return p;}/* mksymbol - make a symbol for name, install in &globals if sclass==EXTERN */Symbol mksymbol(int sclass, const char *name, Type ty) {Symbol p;if (sclass == EXTERN)p = install(string(name), &globals, GLOBAL, PERM);else {NEW0(p, PERM);p->name = string(name);p->scope = GLOBAL;}p->sclass = sclass;p->type = ty;(*IR->defsymbol)(p);p->defined = 1;return p;}/* vtoa - return string for the constant v of type ty */char *vtoa(Type ty, Value v) {char buf[50];//这个buf是个unused variable,根据github记录,hanson已经将之删除<span style="white-space:pre"></span>//fix log:  https://github.com/drh/lcc/commit/3b3f01b4103cd7b519ae84bd1122c9b03233e687ty = unqual(ty);switch (ty->op) {case INT:      return stringd(v.i);case UNSIGNED: return stringf((v.u&~0x7FFF) ? "0x%X" : "%U", v.u);case FLOAT:    return stringf("%g", (double)v.d);case ARRAY:if (ty->type == chartype || ty->type == signedchar||  ty->type == unsignedchar)return v.p;return stringf("%p", v.p);case POINTER:  return stringf("%p", v.p);case FUNCTION: return stringf("%p", v.g);}assert(0); return NULL;}



0 0
原创粉丝点击