Studying note of GCC-3.4.6 source (19 cont)

来源:互联网 发布:java发送邮件带附件 编辑:程序博客网 时间:2024/06/14 10:31

Field op_stack in cpp_reader aims to handle multiple-include optimization (using #if !defined and #endif to enclose #include directive), it will holds the tokens of the #if or #elseif expression.

 

cpp_create_reader (continue)

 

199    /* The expression parser stack.  */

200    _cpp_expand_op_stack (pfile);

200 

201    /* Initialize the buffer obstack.  */

202    _obstack_begin (&pfile->buffer_ob, 0, 0,

203                 (void *(*) (long)) xmalloc,

204                 (void (*) (void *)) free);

205 

206    _cpp_init_files (pfile);

207 

208    _cpp_init_hashtable (pfile, table);

209 

210    return pfile;

211   }

 

The stack is initialized and expanded by _cpp_expand_op_stack.

 

970  struct op *

971  _cpp_expand_op_stack (cpp_reader *pfile)                                            in cppexp.c

972  {

973    size_t old_size = (size_t) (pfile->op_limit - pfile->op_stack);

974    size_t new_size = old_size * 2 + 20;

975 

976    pfile->op_stack = xrealloc (pfile->op_stack, new_size * sizeof (struct op));

977    pfile->op_limit = pfile->op_stack + new_size;

978 

979    return pfile->op_stack + old_size;

980  }

 

At first, the size of the stack is 20. And the definition of op is given in below.

 

31    struct op                                                                                            in cppexp.c

32    {

33      const cpp_token *token;    /* The token forming op (for diagnostics).  */

34      cpp_num value;         /* The value logically "right" of op.  */

35      enum cpp_ttype op;

36    };

 

The token field of op is used for holding diagnostics information for tokens. The value field records the token’s value (if it has) which is key for semantic action. The op field then holds the token type which is also a key for semantic analysis.

Go deeper into op.

 

165  #define PREV_WHITE (1 << 0) /* If whitespace before this token.  */         in cpplib.h

166  #define DIGRAPH        (1 << 1) /* If it was a digraph.  */

167  #define STRINGIFY_ARG   (1 << 2) /* If macro argument to be stringified.  */

168  #define PASTE_LEFT   (1 << 3) /* If on LHS of a ## operator.  */

169  #define NAMED_OP    (1 << 4) /* C++ named operators.  */

170  #define NO_EXPAND  (1 << 5) /* Do not macro-expand this token.  */

171  #define BOL         (1 << 6) /* Token at beginning of line.  */

 

Above macros are used for the flags field of cpp_token to indicate some characteristics of the tokens. And the definition of cpp_token is as below.

 

175  struct cpp_token                                                                                         in cpplib.h

176  {

177    fileline line;              /* Logical line of first char of token.  */

178    unsigned short col;            /* Column of first char of token.  */

179    ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT;  /* token type */

180    unsigned char flags;          /* flags - see above */

181 

182    union

183    {

184      cpp_hashnode *node;            /* An identifier.  */

185      const cpp_token *source;       /* Inherit padding from this token.  */

186      struct cpp_string str;      /* A string, or number.  */

187      unsigned int arg_no;      /* Argument no. for a CPP_MACRO_ARG.  */

188    } val;

189  };

 

The value field of op is defined as below. Notice that cpp_num_part in the struct is the widest integer type in the machine.

 

4929 struct cpp_num                                                                                         in cpplib.h

4930 {

4931   cpp_num_part high;

4932   cpp_num_part low;

4933   bool unsignedp;  /* True if value should be treated as unsigned.  */

4934   bool overflow;   /* True if the most recent calculation overflowed.  */

4935 };

 

Above at line 179, cpp_ttype is defined as below. Pay attention to the definiotn of OP and TK accompanied.

 

143  #define OP(e, s) e,                                                                                    in cpplib.h

144  #define TK(e, s) e,

145  enum cpp_ttype

146  {

147    TTYPE_TABLE

148    N_TTYPES

149  };

150  #undef OP

151  #undef TK

 

The content of TTYPE_TABLE at line 147 is as below, it contains all possible type.

 

60    #define TTYPE_TABLE                          /                                                in cpplib.h

61      OP(CPP_EQ = 0,      "=")               /

62      OP (CPP_NOT,         "!")                /

63      OP (CPP_GREATER,       ">") /* compare */  /

64      OP (CPP_LESS,              "<")               /

65      OP (CPP_PLUS,              "+") /* math */       /

66      OP (CPP_MINUS,           "-")                /

67      OP (CPP_MULT,             "*")               /

68      OP (CPP_DIV,          "/")                /

69     OP (CPP_MOD,              "%")                     /

70      OP (CPP_AND,        "&")       /* bit ops */    /

71     OP (CPP_OR,           "|")                /

72      OP (CPP_XOR,        "^")               /

73      OP (CPP_RSHIFT,    ">>")                    /

74      OP (CPP_LSHIFT,    "<<")                    /

75      OP (CPP_MIN,        "<?")      /* extension */       /

76      OP (CPP_MAX,              ">?")                    /

77    /

78      OP (CPP_COMPL,           "~")               /

79      OP (CPP_AND_AND,      "&&")    /* logical */    /

80      OP (CPP_OR_OR,           "||")               /

81      OP (CPP_QUERY,           "?")               /

82      OP (CPP_COLON,           ":")                /

83      OP (CPP_COMMA,         ",")  /* grouping */ /

84      OP (CPP_OPEN_PAREN,       "(")                /

85      OP (CPP_CLOSE_PAREN,     ")")                /

86      TK(CPP_EOF,          SPELL_NONE)            /

87      OP (CPP_EQ_EQ,           "==")      /* compare */  /

88      OP (CPP_NOT_EQ,  "!=")                     /

89      OP (CPP_GREATER_EQ,       ">=")                    /

90      OP (CPP_LESS_EQ, "<=")                    /

91    /

92      /* These two are unary + / - in preprocessor expressions.  */ /

93      OP (CPP_PLUS_EQ,       "+=")      /* math */       /

94      OP (CPP_MINUS_EQ,     "-=")                     /

95    /

96      OP (CPP_MULT_EQ,       "*=")                    /

97      OP (CPP_DIV_EQ,   "/=")                     /

98      OP (CPP_MOD_EQ, "%=")                   /

99      OP (CPP_AND_EQ, "&=")     /* bit ops */    /

100    OP (CPP_OR_EQ,           "|=")                     /

101    OP (CPP_XOR_EQ,  "^=")                    /

102    OP (CPP_RSHIFT_EQ,    ">>=")                  /

103    OP (CPP_LSHIFT_EQ,    "<<=")                  /

104    OP (CPP_MIN_EQ,  "<?=")    /* extension */       /

105    OP (CPP_MAX_EQ, ">?=")                  /

106    /* Digraphs together, beginning with CPP_FIRST_DIGRAPH.  */      /

107    OP (CPP_HASH,             "#") /* digraphs */  /

108    OP (CPP_PASTE,            "##")                    /

109    OP (CPP_OPEN_SQUARE,    "[")                /

110     OP (CPP_CLOSE_SQUARE,   "]")                /

111     OP (CPP_OPEN_BRACE,       "{")               /

112     OP (CPP_CLOSE_BRACE,     "}")               /

113     /* The remainder of the punctuation. Order is not significant.  */ /

114     OP (CPP_SEMICOLON,  ";")  /* structure */ /

115     OP (CPP_ELLIPSIS, "...")                     /

116     OP (CPP_PLUS_PLUS,    "++")      /* increment */       /

117     OP (CPP_MINUS_MINUS,     "--")                     /

118     OP (CPP_DEREF,           "->")       /* accessors */ /

119     OP (CPP_DOT,         ".")                /

120    OP (CPP_SCOPE,           "::")               /

121    OP (CPP_DEREF_STAR, "->*")                   /

122    OP (CPP_DOT_STAR,     ".*")                     /

123    OP (CPP_ATSIGN,   "@")  /* used in Objective-C */ /

124  /

125    TK (CPP_NAME,            SPELL_IDENT)    /* word */                    /

126    TK (CPP_AT_NAME,   SPELL_IDENT)    /* @word - Objective-C */       /

127    TK (CPP_NUMBER, SPELL_LITERAL) /* 34_be+ta  */                   /

128  /

129    TK (CPP_CHAR,             SPELL_LITERAL) /* 'char' */                     /

130    TK (CPP_WCHAR,          SPELL_LITERAL) /* L'char' */                   /

131    TK (CPP_OTHER,           SPELL_LITERAL) /* stray punctuation */          /

132  /

133    TK (CPP_STRING,   SPELL_LITERAL) /* "string" */                 /

134    TK (CPP_WSTRING,       SPELL_LITERAL) /* L"string" */               /

135    TK (CPP_OBJC_STRING,   SPELL_LITERAL)  /* @"string" - Objective-C */      /

136    TK (CPP_HEADER_NAME,   SPELL_LITERAL) /* <stdio.h> in #include */    /

137  /

138    TK (CPP_COMMENT,     SPELL_LITERAL) /* Only if output comments.  */ /

139                                       /* SPELL_LITERAL happens to DTRT.  */ /

140    TK (CPP_MACRO_ARG, SPELL_NONE)     /* Macro argument.  */        /

141    TK (CPP_PADDING,       SPELL_NONE)     /* Whitespace for cpp0.  */

 

In the definition, OP should be the abbreviation of “operator”, and TK should be the abbreviation of “token”.

cpp_reader uses hash tables to manage files under compiling and directories in which header files are searching for. Then, it initializes these hash tables.

 

930  void

931  _cpp_init_files (cpp_reader *pfile)                                                              in cppfiles.c

932  {

933    pfile->file_hash = htab_create_alloc (127, file_hash_hash, file_hash_eq,

934                                  NULL, xcalloc, free);

935    pfile->dir_hash = htab_create_alloc (127, file_hash_hash, file_hash_eq,

936                                 NULL, xcalloc, free);

937    allocate_file_hash_entries (pfile);

938  }

 

The last step of cpp_create_reader is invoking below _cpp_init_hashtable.

Here, we see that ident_hash is passed as the argument table at the invocation, which has entries for all identifiers: either macros defined by #define commands (type NT_MACRO), assertions created with #assert (NT_ASSERTION), or neither of the above (NT_VOID). Builtin macros like __LINE__ are flagged NODE_BUILTIN. Poisoned identifiers are flagged NODE_POISONED. NODE_OPERATOR (C++ only) indicates an identifier that behaves like an operator such as "xor". NODE_DIAGNOSTIC is for speed in lex token: it indicates a diagnostic may be required for this node. Currently this only applies to __VA_ARGS__ and poisoned identifiers.

 

47    void

48    _cpp_init_hashtable (cpp_reader *pfile, hash_table *table)                      in cpphash.c

49    {

50      struct spec_nodes *s;

51   

52      if (table == NULL)

53      {

54        pfile->our_hashtable = 1;

55        table = ht_create (13);   /* 8K (=2^13) entries.  */

56        table->alloc_node = (hashnode (*) (hash_table *)) alloc_node;

57   

58        _obstack_begin (&pfile->hash_ob, 0, 0,

59                        (void *(*) (long)) xmalloc,

60                    (void (*) (void *)) free);

61      }

62   

63      table->pfile = pfile;

64      pfile->hash_table = table;

65   

66      /* Now we can initialize things that use the hash table.  */

67      _cpp_init_directives (pfile);

68      _cpp_init_internal_pragmas (pfile);

69   

70      s = &pfile->spec_nodes;

71      s->n_defined      = cpp_lookup (pfile, DSC("defined"));

72      s->n_true           = cpp_lookup (pfile, DSC ("true"));

73      s->n_false          = cpp_lookup (pfile, DSC ("false"));

74      s->n__VA_ARGS__ = cpp_lookup (pfile, DSC ("__VA_ARGS__"));

75      s->n__VA_ARGS__->flags |= NODE_DIAGNOSTIC;

76    }

 

In C/C++, series directives are defined. _cpp_init_directives ensures hashnode of directives are present in hash_table belonging to cpp_reader.

 

1983 void

1984 _cpp_init_directives (cpp_reader *pfile)                                                      in cpplib.c

1985 {

1986   unsigned int i;

1987   cpp_hashnode *node;

1988

1989   for (i = 0; i < (unsigned int) N_DIRECTIVES; i++)

1990   {

1991     node = cpp_lookup (pfile, dtable[i].name, dtable[i].length);

1992     node->is_directive = 1;

1993     node->directive_index = i;

1994   }

1995 }

 

At line 1991 above, dtable is initialized according to the content of DIRECTIVE_TABLE in following way.

 

179    #define D(name, t, origin, flags) /                                                             in cpplib.c

180    { do_##name, (const uchar *) #name, /

181      sizeof #name - 1, origin, flags },

182    static const directive dtable[] =

183    {

184    DIRECTIVE_TABLE

185    };

186    #undef D

187    #undef DIRECTIVE_TABLE

 

The node to record detail of directives has following definition. See handler points to the hanlder.

 

84      struct directive                                                                                         in cpplib.c

85      {

86        directive_handler handler;      /* Function to handle directive.  */

87        const uchar *name;         /* Name of directive.  */

88        unsigned short length;     /* Length of name.  */

89        unsigned char origin;      /* Origin of directive.  */

90        unsigned char flags;     /* Flags describing this directive.  */

91      };

 

Then in DIRECTIVE_TABLE, it is expanded by the macro of D. D is define at line 179 above, takes first line as example, after expansion, it becomes: do_define, (const unchar*) “define”, sizeof “define” -1, KANDR, IN_I.

 

143    #define DIRECTIVE_TABLE                         /                                         in cpplib.c

144    D(define,     T_DEFINE = 0,     KANDR,     IN_I)        /* 270554 */ /

145    D(include,    T_INCLUDE, KANDR,     INCL | EXPAND)  /* 52262 */ /

146    D(endif,       T_ENDIF,      KANDR,     COND)     /* 45855 */ /

147    D(ifdef,              T_IFDEF,       KANDR,     COND | IF_COND) /* 22000 */ /

148    D(if,            T_IF,             KANDR, COND | IF_COND | EXPAND) /* 18162 */ /

149    D(else,         T_ELSE,        KANDR,     COND)     /* 9863 */ /

150    D(ifndef,     T_IFNDEF,    KANDR,     COND | IF_COND) /* 9675 */ /

151    D(undef,      T_UNDEF,     KANDR,     IN_I)        /* 4837 */ /

152    D(line,         T_LINE,        KANDR,     EXPAND)        /* 2465 */ /

153    D(elif,         T_ELIF,         STDC89,    COND | EXPAND)  /* 610 */ /

154    D(error,              T_ERROR,     STDC89,    0)              /* 475 */ /

155    D(pragma,   T_PRAGMA,  STDC89,    IN_I)       /* 195 */ /

156    D(warning,  T_WARNING, EXTENSION, 0)          /* 22 */ /

157    D(include_next,   T_INCLUDE_NEXT,    EXTENSION, INCL | EXPAND)  /* 19 */ /

158    D(ident,       T_IDENT,      EXTENSION, IN_I)        /* 11 */ /

159    D(import,    T_IMPORT,    EXTENSION, INCL | EXPAND)  /* 0 ObjC */ /

160    D(assert,      T_ASSERT,    EXTENSION, 0)             /* 0 SVR4 */    /

161    D(unassert,   T_UNASSERT,      EXTENSION, 0)             /* 0 SVR4 */    /

162    D(sccs,        T_SCCS,        EXTENSION, 0)             /* 0 SVR4? */

 

The second column of above lines will be expanded into an enum type at somewhere else in cpplib.c in similar way. In third and forth columns are macros already defined. For macros in thrid column, they indicate the original of the directives, now we have following macros already.

KANDR: directives come from traditional (K&R) C

STDC89: directives come from the 1989 C standard

EXTENSION: directives are extensions

Macros in forth column indicate characteristics of directives, and we have following macros in below.

COND: indicates a conditional

IF_COND: an opening conditional

INCL: means to treat "..." and <...> as q-char and h-char sequences respectively

IN_I: means this directive should be handled even if -fpreprocessed is in effect (these are the directives with callback hooks)

EXPAND: is set on directives that are always macro-expanded

Next, _cpp_init_directives registers the #pragma the preprocessor itself handles. [4] gives detailed explaination.

#pragma GCC dependency

#pragma GCC dependency allows you to check the relative dates of the current file and anther file. If the other file is more recent than the current file, a warning is issued. This is useful if the current file is derived from the other file, and should be regenerated. The other file is searched for using the normal include search path. Optional trailing text can be used to give more information in the warning message.

       #pragma GCC dependency “parse.y”

       #pragma GCC dependency “/usr/include/time.h” rerun fixincludes

#pragma GCC poison

Sometimes, there is an identifier that you want to remove completely from you program, and make sure that it never creeps back in. To enforce this, you can poison the identifier with this pragma. #pragma GCC poison is followed by a list of identifiers to poison. If any of those identifiers appears anywhere in the source after the directive, it is a hard error. For example,

       #pragma GCC poison printf sprint fprintf

       Sprint (some_string, “hello”);

will produce an error.

If a poisoned identifier appears as part of the expansion of a macro which was defined before the identifier was poisoned, it will not cause an error. This lets you poison an identifier without worrying about system headers defining macros that use it. For example,

       #define strrchr rindex

       #pragma GCC poison rindex

       strrchr (some_string, ‘h’);

will not produce an error.

#pragma GCC system_header

This pragma takes no arguments. It causes the rest of the code in the current file to be treated as if it came from a system header (all warnings, other than those generated by ‘#warning’, are suppressed while GCC is processing a system header. Macros defined in a system header are immune to a few warnings wherever they are expanded. This immunity is granted on an ad-hoc basis, when we find that a warning generates lots of false positives because of code in macros defined in system headers).

#pragma once is a non-standard but widely supported preprocessor directive designed to cause the current source file to be included only once in a single compilation. http://en.wikipedia.org/wiki/Pragma_once gives more details.

 

1048 void

1049 _cpp_init_internal_pragmas (cpp_reader *pfile)                                            in cpplib.c

1050 {

1051   /* Pragmas in the global namespace.  */

1052   cpp_register_pragma (pfile, 0, "once", do_pragma_once);

1053

1054   /* New GCC-specific pragmas should be put in the GCC namespace.  */

1055   cpp_register_pragma (pfile, "GCC", "poison", do_pragma_poison);

1056   cpp_register_pragma (pfile, "GCC", "system_header", do_pragma_system_header);

1057   cpp_register_pragma (pfile, "GCC", "dependency", do_pragma_dependency);

1058 }

 

GCC define pragma_entry for #pragma.

 

49    typedef void (*pragma_cb) (cpp_reader *);                                                   in cpplib.c

50    struct pragma_entry

51    {

52      struct pragma_entry *next;

53      const cpp_hashnode *pragma;   /* Name and length.  */

54      int is_nspace;

55      union {

56        pragma_cb handler;

57        struct pragma_entry *space;

58      } u;

59    };

 

See that for #pragma that takes argument, handler at line 56 will be used, it is a function pointer to offer the functionality of the #pragma; while for #pragma that takes effect upon a range, the space at line 57 will be used to link all other #pragma appear within its space and is_nspace at line 54 will be set too.

The compiler records the #pragma by struct pragma_entry, and all #pragmas are saved in field pragmas in parse_in, then #pragma can be handled as soon as being seen.

 

1005 void

1006 cpp_register_pragma (cpp_reader *pfile, const char *space,                          in cpplib.c

1007                   const char *name, pragma_cb handler)

1008 {

1009   struct pragma_entry **chain = &pfile->pragmas;

1010   struct pragma_entry *entry;

1011   const cpp_hashnode *node;

1012

1013   if (!handler)

1014     abort ();

1015

1016   if (space)

1017   {

1018     node = cpp_lookup (pfile, U space, strlen (space));

1019     entry = lookup_pragma_entry (*chain, node);

1020     if (!entry)

1021       entry = insert_pragma_entry (pfile, chain, node, NULL);

1022     else if (!entry->is_nspace)

1023       goto clash;

1024     chain = &entry->u.space;

1025   }

1026

1027   /* Check for duplicates.  */

1028   node = cpp_lookup (pfile, U name, strlen (name));

1029   entry = lookup_pragma_entry (*chain, node);

1030   if (entry)

1031   {

1032     if (entry->is_nspace)

1033 clash:

1034       cpp_error (pfile, CPP_DL_ICE,

1035                "registering /"%s/" as both a pragma and a pragma namespace",

1036                NODE_NAME (node));

1037     else if (space)

1038       cpp_error (pfile, CPP_DL_ICE, "#pragma %s %s is already registered",

1039                space, name);

1040     else

1041       cpp_error (pfile, CPP_DL_ICE, "#pragma %s is already registered", name);

1042   }

1043   else

1044     insert_pragma_entry (pfile, chain, node, handler);

1045 }

 

Due to the limit type of #pragma, a null terminated simple list is good enough. Notice that for directive like: #pragma GCC dependency etc., GCC forms a space, dependency, posion, and system_header are within this space. Thus node corresponds to GCC is a branch contining nodes for its content.

 

965  static struct pragma_entry *

966  lookup_pragma_entry (struct pragma_entry *chain, const cpp_hashnode *pragma) in cpplib.c

967  {

968    while (chain && chain->pragma != pragma)

969      chain = chain->next;

970 

971    return chain;

972  }

 

In insert_pragma_entry, notice that argument pragma is of type cpp_hashnode which is the identifier for the directive in ident_hash table.

 

977  static struct pragma_entry *

978  insert_pragma_entry (cpp_reader *pfile, struct pragma_entry **chain,             in cpplib.c

979                     const cpp_hashnode *pragma, pragma_cb handler)

980  {

981    struct pragma_entry *new;

982 

983    new = (struct pragma_entry *)

984      _cpp_aligned_alloc (pfile, sizeof (struct pragma_entry));

985    new->pragma = pragma;

986    if (handler)

987    {

988      new->is_nspace = 0;

989      new->u.handler = handler;

990    }

991    else

992    {

993      new->is_nspace = 1;

994      new->u.space = NULL;

995    }

996 

997    new->next = *chain;

998    *chain = new;

999    return new;

1000 }

 

Back _cpp_init_hashtable, spec_nodes of cpp_reader records special identifiers for the langauge. There are “defined”, “true”, “false”, “__VAR_ARGS” nodes, which are unique throughout the system, so we just uses pointer in spec_nodes as below.

 

247  struct spec_nodes                                                                                       in cpphash.h

248  {

249    cpp_hashnode *n_defined;        /* defined operator */

250    cpp_hashnode *n_true;                    /* C++ keyword true */

251    cpp_hashnode *n_false;                   /* C++ keyword false */

252    cpp_hashnode *n__VA_ARGS__;    /* C99 vararg macros */

253  };

 

587  #define DSC(str) (const uchar *)str, sizeof str – 1                                          in cpphash.h

 

With above DSC definition, unique nodes for “defined”, “true”, “false”, and “__VAR_ARGS” are generated as last step of the function.

 

原创粉丝点击