Studying note of GCC-3.4.6 source (78)

来源：互联网发布：经期记录软件编辑：程序博客网时间：2024/05/22 14:35

5.6.1.1.2. Number

Preprocessor knows little about the number except it knows what is digit. It is OK for preprocessor does so and it makes the preprocessor more flexible. But when receiving the digital sequence returned by preprocessor, the lexer now needs know how to interpret it. Routine cpp_classify_number tries to set flags according to the literal string of number.

143 unsigned int

144 cpp_classify_number (cpp_reader *pfile, const cpp_token *token) in cppexp.c

145 {

146 const uchar *str = token->val.str.text;

147 const uchar *limit;

148 unsigned int max_digit, result, radix;

149 enum {NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON} float_flag;

150

151 /* If the lexer has done its job, length one can only be a single

152 digit. Fast-path this very common case. */

153 if (token->val.str.len == 1)

154 return CPP_N_INTEGER | CPP_N_SMALL | CPP_N_DECIMAL;

155

156 limit = str + token->val.str.len;

157 float_flag = NOT_FLOAT;

158 max_digit = 0;

159 radix = 10;

160

161 /* First, interpret the radix. */

162 if (*str == '0')

163 {

164 radix = 8;

165 str++;

166

167 /* Require at least one hex digit to classify it as hex. */

168 if ((*str == 'x' || *str == 'X')

169 && (str[1] == '.' || ISXDIGIT (str[1])))

170 {

171 radix = 16;

172 str++;

173 }

174 }

175

176 /* Now scan for a well-formed integer or float. */

177 for (;;)

178 {

179 unsigned int c = *str++;

180

181 if (ISDIGIT (c) || (ISXDIGIT (c) && radix == 16))

182 {

183 c = hex_value (c);

184 if (c > max_digit)

185 max_digit = c;

186 }

187 else if (c == '.')

188 {

189 if (float_flag == NOT_FLOAT)

190 float_flag = AFTER_POINT;

191 else

192 SYNTAX_ERROR ("too many decimal points in number");

193 }

194 else if ((radix <= 10 && (c == 'e' || c == 'E'))

195 || (radix == 16 && (c == 'p' || c == 'P')))

196 {

197 float_flag = AFTER_EXPON;

198 break;

199 }

200 else

201 {

202 /* Start of suffix. */

203 str--;

204 break;

205 }

206 }

207

208 if (float_flag != NOT_FLOAT && radix == 8)

209 radix = 10;

210

211 if (max_digit >= radix)

212 SYNTAX_ERROR2 ("invalid digit /"%c/" in octal constant", '0' + max_digit);

213

214 if (float_flag != NOT_FLOAT)

215 {

216 if (radix == 16 && CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, c99))

217 cpp_error (pfile, CPP_DL_PEDWARN,

218 "use of C99 hexadecimal floating constant");

219

220 if (float_flag == AFTER_EXPON)

221 {

222 if (*str == '+' || *str == '-')

223 str++;

224

225 /* Exponent is decimal, even if string is a hex float. */

226 if (!ISDIGIT (*str))

227 SYNTAX_ERROR ("exponent has no digits");

228

229 do

230 str++;

231 while (ISDIGIT (*str));

232 }

233 else if (radix == 16)

234 SYNTAX_ERROR ("hexadecimal floating constants require an exponent");

235

236 result = interpret_float_suffix (str, limit - str);

237 if (result == 0)

238 {

239 cpp_error (pfile, CPP_DL_ERROR,

240 "invalid suffix /"%.*s/" on floating constant",

241 (int) (limit - str), str);

242 return CPP_N_INVALID;

243 }

244

245 /* Traditional C didn't accept any floating suffixes. */

246 if (limit != str

247 && CPP_WTRADITIONAL (pfile)

248 && ! cpp_sys_macro_p (pfile))

249 cpp_error (pfile, CPP_DL_WARNING,

250 "traditional C rejects the /"%.*s/" suffix",

251 (int) (limit - str), str);

252

253 result |= CPP_N_FLOATING;

254 }

255 else

256 {

257 result = interpret_int_suffix (str, limit - str);

258 if (result == 0)

259 {

260 cpp_error (pfile, CPP_DL_ERROR,

261 "invalid suffix /"%.*s/" on integer constant",

262 (int) (limit - str), str);

263 return CPP_N_INVALID;

264 }

265

266 /* Traditional C only accepted the 'L' suffix.

267 Suppress warning about 'LL' with -Wno-long-long. */

268 if (CPP_WTRADITIONAL (pfile) && ! cpp_sys_macro_p (pfile))

269 {

270 int u_or_i = (result & (CPP_N_UNSIGNED|CPP_N_IMAGINARY));

271 int large = (result & CPP_N_WIDTH) == CPP_N_LARGE;

272

273 if (u_or_i || (large && CPP_OPTION (pfile, warn_long_long)))

274 cpp_error (pfile, CPP_DL_WARNING,

275 "traditional C rejects the /"%.*s/" suffix",

276 (int) (limit - str), str);

277 }

278

279 if ((result & CPP_N_WIDTH) == CPP_N_LARGE

280 && ! CPP_OPTION (pfile, c99)

281 && CPP_OPTION (pfile, warn_long_long))

282 cpp_error (pfile, CPP_DL_PEDWARN,

283 "use of C99 long long integer constant");

284

285 result |= CPP_N_INTEGER;

286 }

287

288 if ((result & CPP_N_IMAGINARY) && CPP_PEDANTIC (pfile))

289 cpp_error (pfile, CPP_DL_PEDWARN,

290 "imaginary constants are a GCC extension");

291

292 if (radix == 10)

293 result |= CPP_N_DECIMAL;

294 else if (radix == 16)

295 result |= CPP_N_HEX;

296 else

297 result |= CPP_N_OCTAL;

298

299 return result;

300

301 syntax_error:

302 return CPP_N_INVALID;

303 }

In C++, there several representations for a numeric constant, for instance: 1234, 0x4D2, 02322, 1.234e3, 0x4.d2p8 all stand for decimal value of 1234. Of course, the rear two are regarded as floating point number, one uses 10 as log base (e), the other uses 2 (p). Further, these constants can have suffix to restrict its property. Constant of floating point can use f/F (single precision), l/L (double precision), i/I (real part of complex), j/J (imagery part). Constant of integer can use l/L (long), ll/LL (long long), u/U (unsigned), i/I (real part of complex), j/J (imagery part). Here, cpp_classify_number will validate the suffix, and set CPP_N_SMALL, CPP_N_MEDIUM, or CPP_N_LARGE accordingly.

5.6.1.1.2.1. Case of integer

Integer constant is handled by followin function, and an INTERGER_CST node will be built.

490 static tree

491 interpret_integer (const cpp_token *token, unsigned int flags) in c-lex.c

492 {

493 tree value, type;

494 enum integer_type_kind itk;

495 cpp_num integer;

496 cpp_options *options = cpp_get_options (parse_in);

497

498 integer = cpp_interpret_integer (parse_in, token, flags);

499 integer = cpp_num_sign_extend (integer, options->precision);

500 value = build_int_2_wide (integer.low, integer.high);

If it’s an integer number, the number must be first validated for the target machine. Then structure cpp_num is used to collect result from cpp_interpret_integer.

604 struct cpp_num in cpplib.h

605 {

606 cpp_num_part high;

607 cpp_num_part low;

608 bool unsignedp; /* True if value should be treated as unsigned. */

609 bool overflow; /* True if the most recent calculation overflowed. */

610 };

In the definition, slot high, low are defined as the widest integer on the host machine. For Linux/x86, this type is long.

602 typedef unsigned HOST_WIDE_INT cpp_num_part; in cpplib.h

Argument type of cpp_interpret_integer is the result gotten hardly by cpp_classify_number.

311 cpp_num

312 cpp_interpret_integer (cpp_reader *pfile, const cpp_token *token, in cppexp.c

313 unsigned int type)

314 {

315 const uchar *p, *end;

316 cpp_num result;

317

318 result.low = 0;

319 result.high = 0;

320 result.unsignedp = !!(type & CPP_N_UNSIGNED);

321 result.overflow = false;

322

323 p = token->val.str.text;

324 end = p + token->val.str.len;

325

326 /* Common case of a single digit. */

327 if (token->val.str.len == 1)

328 result.low = p[0] - '0';

329 else

330 {

331 cpp_num_part max;

332 size_t precision = CPP_OPTION (pfile, precision);

333 unsigned int base = 10, c = 0;

334 bool overflow = false;

335

336 if ((type & CPP_N_RADIX) == CPP_N_OCTAL)

337 {

338 base = 8;

339 p++;

340 }

341 else if ((type & CPP_N_RADIX) == CPP_N_HEX)

342 {

343 base = 16;

344 p += 2;

345 }

346

347 /* We can add a digit to numbers strictly less than this without

348 needing the precision and slowness of double integers. */

349 max = ~(cpp_num_part) 0;

350 if (precision < PART_PRECISION)

351 max >>= PART_PRECISION - precision;

352 max = (max - base + 1) / base + 1;

353

354 for (; p < end; p++)

355 {

356 c = *p;

357

358 if (ISDIGIT (c) || (base == 16 && ISXDIGIT (c)))

359 c = hex_value (c);

360 else

361 break;

362

363 /* Strict inequality for when max is set to zero. */

364 if (result.low < max)

365 result.low = result.low * base + c;

366 else

367 {

368 result = append_digit (result, c, base, precision);

369 overflow |= result.overflow;

370 max = 0;

371 }

372 }

PART_PRECISION above is the bit size of the type of high and low (it is sizeof(long)). And the precision held in pfile is initialized in cpp_create_reader which is the bit size of long. Code at lines from 349 to 352 calculates the max multiple of base allowed by precision.

Then when the result is still below max, it is safe to shift in current digit. Otherwise, it should be handled carefully by append_digit.

397 static cpp_num

398 append_digit (cpp_num num, int digit, int base, size_t precision) in cppexp.c

399 {

400 cpp_num result;

401 unsigned int shift = 3 + (base == 16);

402 bool overflow;

403 cpp_num_part add_high, add_low;

404

405 /* Multiply by 8 or 16. Catching this overflow here means we don't

406 need to worry about add_high overflowing. */

407 overflow = !!(num.high >> (PART_PRECISION - shift));

408 result.high = num.high << shift;

409 result.low = num.low << shift;

410 result.high |= num.low >> (PART_PRECISION - shift);

411

412 if (base == 10)

413 {

414 add_low = num.low << 1;

415 add_high = (num.high << 1) + (num.low >> (PART_PRECISION - 1));

416 }

417 else

418 add_high = add_low = 0;

419

420 if (add_low + digit < add_low)

421 add_high++;

422 add_low += digit;

423

424 if (result.low + add_low < result.low)

425 add_high++;

426 if (result.high + add_high < result.high)

427 overflow = true;

428

429 result.low += add_low;

430 result.high += add_high;

431

432 /* The above code catches overflow of a cpp_num type. This catches

433 overflow of the (possibly shorter) target precision. */

434 num.low = result.low;

435 num.high = result.high;

436 result = num_trim (result, precision);

437 if (!num_eq (result, num))

438 overflow = true;

439

440 result.unsignedp = num.unsignedp;

441 result.overflow = overflow;

442 return result;

443 }

As it uses slots high and low to hold the interpreted number, it first checks whether this extra digit will cause result overflows. Notice that only for base of 16, shift will be 4, otherwise it is 3. Nevertheless for base of 10, shifting the number with 3 bits only mulitpy the number by 8 instead of 10, so line 414 to 415 adds the left 2 multiple of the number. This addition may overflow, a checking is taken in line 424 to 426.

Obviously, the result number is apt to overwhelm the precision specified by pfile (parameter precision). Routine num_trim trims the result according to precision.

1004 static cpp_num

1005 num_trim (cpp_num num, size_t precision) in cppexp.c

1006 {

1007 if (precision > PART_PRECISION)

1008 {

1009 precision -= PART_PRECISION;

1010 if (precision < PART_PRECISION)

1011 num.high &= ((cpp_num_part) 1 << precision) - 1;

1012 }

1013 else

1014 {

1015 if (precision < PART_PRECISION)

1016 num.low &= ((cpp_num_part) 1 << precision) - 1;

1017 num.high = 0;

1018 }

1019

1020 return num;

1021 }

Anyway, if the trimmed result is not same as that before trimming, of course, overflow occurs. Returned from append_digit, as it is the last digit can be added for the specified precision, at line 670 in cpp_interpret_integer, max is set as 0 and forces condition at line 364 always false for following digits.

cpp_interpret_integer (continue)

374 if (overflow)

375 cpp_error (pfile, CPP_DL_PEDWARN,

376 "integer constant is too large for its type");

377 /* If too big to be signed, consider it unsigned. Only warn for

378 decimal numbers. Traditional numbers were always signed (but

379 we still honor an explicit U suffix); but we only have

380 traditional semantics in directives. */

381 else if (!result.unsignedp

382 && !(CPP_OPTION (pfile, traditional)

383 && pfile->state.in_directive)

384 && !num_positive (result, precision))

385 {

386 if (base == 10)

387 cpp_error (pfile, CPP_DL_WARNING,

388 "integer constant is so large that it is unsigned");

389 result.unsignedp = true;

390 }

391 }

392

393 return result;

394 }

As tree node of INTEGER_CST also uses high and low slots of HOST_WIDE_INT with format of 2 complement coding to hold number. For signed number, sign extension is required. It is done by cpp_num_sign_extend.

1038 cpp_num

1039 cpp_num_sign_extend (cpp_num num, size_t precision) in cppexp.c

1040 {

1041 if (!num.unsignedp)

1042 {

1043 if (precision > PART_PRECISION)

1044 {

1045 precision -= PART_PRECISION;

1046 if (precision < PART_PRECISION

1047 && (num.high & (cpp_num_part) 1 << (precision - 1)))

1048 num.high |= ~(~(cpp_num_part) 0 >> (PART_PRECISION - precision));

1049 }

1050 else if (num.low & (cpp_num_part) 1 << (precision - 1))

1051 {

1052 if (precision < PART_PRECISION)

1053 num.low |= ~(~(cpp_num_part) 0 >> (PART_PRECISION - precision));

1054 num.high = ~(cpp_num_part) 0;

1055 }

1056 }

1057

1058 return num;

1059 }

Then the validated number will be filled the node value created by built_int_2_wide at line 500 in interpret_integer.

Following, it needs associate type for the result. As we have seen, during compiler starts up, type nodes for integer have been created. The type that most fit (containing as less bit as possible) will be selected.

interpret_integer (continue)

502 /* The type of a constant with a U suffix is straightforward. */

503 if (flags & CPP_N_UNSIGNED)

504 itk = narrowest_unsigned_type (value, flags);

505 else

506 {

507 /* The type of a potentially-signed integer constant varies

508 depending on the base it's in, the standard in use, and the

509 length suffixes. */

510 enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);

511 enum integer_type_kind itk_s = narrowest_signed_type (value, flags);

512

513 /* In both C89 and C99, octal and hex constants may be signed or

514 unsigned, whichever fits tighter. We do not warn about this

515 choice differing from the traditional choice, as the constant

516 is probably a bit pattern and either way will work. */

517 if ((flags & CPP_N_RADIX) != CPP_N_DECIMAL)

518 itk = MIN (itk_u, itk_s);

519 else

520 {

521 /* In C99, decimal constants are always signed.

522 In C89, decimal constants that don't fit in long have

523 undefined behavior; we try to make them unsigned long.

524 In GCC's extended C89, that last is true of decimal

525 constants that don't fit in long long, too. */

526

527 itk = itk_s;

528 if (itk_s > itk_u && itk_s > itk_long)

529 {

530 if (!flag_isoc99)

531 {

532 if (itk_u < itk_unsigned_long)

533 itk_u = itk_unsigned_long;

534 itk = itk_u;

535 warning ("this decimal constant is unsigned only in ISO C90");

536 }

537 else if (warn_traditional)

538 warning ("this decimal constant would be unsigned in ISO C90");

539 }

540 }

541 }

542

543 if (itk == itk_none)

544 /* cpplib has already issued a warning for overflow. */

545 type = ((flags & CPP_N_UNSIGNED)

546 ? widest_unsigned_literal_type_node

547 : widest_integer_literal_type_node);

548 else

549 type = integer_types[itk];

550

551 if (itk > itk_unsigned_long

552 && (flags & CPP_N_WIDTH) != CPP_N_LARGE

553 && ! in_system_header && ! flag_isoc99)

554 pedwarn ("integer constant is too large for /"%s/" type",

555 (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");

556

557 TREE_TYPE (value) = type;

558

559 /* Convert imaginary to a complex type. */

560 if (flags & CPP_N_IMAGINARY)

561 value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);

562

563 return value;

564 }

Routine narrowest_unsigned_type and narrowest_signed_type is resemble. For integer constant, it is regarded as CPP_N_SMALL without suffix, and CPP_N_MEDIUM with suffix ‘l/L’, otherwise CPP_N_LARGE (refer to cpp_classify_number).

442 static enum integer_type_kind

443 narrowest_unsigned_type (tree value, unsigned int flags) in c-lex.c

444 {

445 enum integer_type_kind itk;

446

447 if ((flags & CPP_N_WIDTH) == CPP_N_SMALL)

448 itk = itk_unsigned_int;

449 else if ((flags & CPP_N_WIDTH) == CPP_N_MEDIUM)

450 itk = itk_unsigned_long;

451 else

452 itk = itk_unsigned_long_long;

453

454 /* int_fits_type_p must think the type of its first argument is

455 wider than its second argument, or it won't do the proper check. */

456 TREE_TYPE (value) = widest_unsigned_literal_type_node;

457

458 for (; itk < itk_none; itk += 2 /* skip unsigned types */)

459 if (int_fits_type_p (value, integer_types[itk]))

460 return itk;

461

462 return itk_none;

463 }

At line 459, integer_types is designed as unsigned and signed appear alternatively. Then at line 456, see that value is set as widest_unsigned_literal_type_node deliberately (it’s long long in 32 bit x86 system), which is the largest int type the system can support. And it will affect the result of int_fits_type_p.

4243 int

4244 int_fits_type_p (tree c, tree type) in tree.c

4245 {

4246 tree type_low_bound = TYPE_MIN_VALUE (type);

4247 tree type_high_bound = TYPE_MAX_VALUE (type);

4248 int ok_for_low_bound, ok_for_high_bound;

4249

4250 /* Perform some generic filtering first, which may allow making a decision

4251 even if the bounds are not constant. First, negative integers never fit

4252 in unsigned types, */

4253 if ((TREE_UNSIGNED (type) && tree_int_cst_sgn (c) < 0)

4254 /* Also, unsigned integers with top bit set never fit signed types. */

4255 || (! TREE_UNSIGNED (type)

4256 && TREE_UNSIGNED (TREE_TYPE (c)) && tree_int_cst_msb (c)))

4257 return 0;

4258

4259 /* If at least one bound of the type is a constant integer, we can check

4260 ourselves and maybe make a decision. If no such decision is possible, but

4261 this type is a subtype, try checking against that. Otherwise, use

4262 force_fit_type, which checks against the precision.

4263

4264 Compute the status for each possibly constant bound, and return if we see

4265 one does not match. Use ok_for_xxx_bound for this purpose, assigning -1

4266 for "unknown if constant fits", 0 for "constant known *not* to fit" and 1

4267 for "constant known to fit". */

4268

4269 ok_for_low_bound = -1;

4270 ok_for_high_bound = -1;

4271

4272 /* Check if C >= type_low_bound. */

4273 if (type_low_bound && TREE_CODE (type_low_bound) == INTEGER_CST)

4274 {

4275 ok_for_low_bound = ! tree_int_cst_lt (c, type_low_bound);

4276 if (! ok_for_low_bound)

4277 return 0;

4278 }

4279

4280 /* Check if c <= type_high_bound. */

4281 if (type_high_bound && TREE_CODE (type_high_bound) == INTEGER_CST)

4282 {

4283 ok_for_high_bound = ! tree_int_cst_lt (type_high_bound, c);

4284 if (! ok_for_high_bound)

4285 return 0;

4286 }

4287

4288 /* If the constant fits both bounds, the result is known. */

4289 if (ok_for_low_bound == 1 && ok_for_high_bound == 1)

4290 return 1;

4291

4292 /* If we haven't been able to decide at this point, there nothing more we

4293 can check ourselves here. Look at the base type if we have one. */

4294 else if (TREE_CODE (type) == INTEGER_TYPE && TREE_TYPE (type) != 0)

4295 return int_fits_type_p (c, TREE_TYPE (type));

4296

4297 /* Or to force_fit_type, if nothing else. */

4298 else

4299 {

4300 c = copy_node (c);

4301 TREE_TYPE (c) = type;

4302 return !force_fit_type (c, 0);

4303 }

4304 }

We have seen in before, TYPE_MIN_VALUE and TYPE_MAX_VALUE of type node represents the min and max value the type can hold. If out of this range, the type isn’t suitable. Besides, for minus, unsigned type isn’t right; and for unsigned number using most significant bit, signed type can’t be used.

Note that only when the type hasn’t TYPE_MIN_VALUE and TYPE_MAX_VALUE set (e.g., typedef declaration of int), we will enter code below line 4292.

If narrowest_unsigned_type can’t find out a suitable type, it can only use the largest type the system supports (comment at line 544 points out that the compiler should have given warning about overflow). This found type, is set as the type for the parsing number at line 557 in interpret_integer. And this node of number is returned to value field of cp_token.