面试题strtoi实现（一）—— 函数的简单实现

来源：互联网发布：日本人社交软件编辑：程序博客网时间：2024/06/04 23:26

大师级程序地址： http://www.opensource.apple.com/source/xnu/xnu-1456.1.26/bsd/libkern/strtol.c

[cpp] view plaincopyprint?

static inlineint
isupper(char c)
{
return (c >= 'A' && c <='Z');
}
static inlineint
isalpha(char c)
{
return ((c >= 'A' && c <='Z') || (c >= 'a' && c <='z'));
}
static inlineint
isspace(char c)
{
return (c == ' ' || c == '\t' || c =='\n' || c == '\12');
}
static inlineint
isdigit(char c)
{
return (c >= '0' && c <= '9');
}
/*
* Convert a string to a long integer.
*
* Ignores `locale' stuff. Assumes that the upper and lower case
* alphabets and digits are each contiguous.
*/
long
strtol(nptr, endptr, base)
const char *nptr;
char **endptr;
register int base;
{
register constchar *s = nptr;
register unsigned long acc;
register int c;
register unsigned long cutoff;
register int neg = 0, any, cutlim;
/*
* Skip white space and pick up leading +/- sign if any.
* If base is 0, allow 0x for hex and 0 for octal, else
* assume decimal; if base is already 16, allow 0x.
*/
do {
c = *s++;
} while (isspace(c));
if (c == '-') {
neg = 1;
c = *s++;
} else if (c =='+')
c = *s++;
if ((base == 0 || base == 16) &&
c == '0' && (*s == 'x' || *s =='X')) {
c = s[1];
s += 2;
base = 16;
} else if ((base == 0 || base == 2) &&
c == '0' && (*s =='b' || *s == 'B')) {
c = s[1];
s += 2;
base = 2;
}
if (base == 0)
base = c == '0' ? 8 : 10;
/*
* Compute the cutoff value between legal numbers and illegal
* numbers. That is the largest legal value, divided by the
* base. An input number that is greater than this value, if
* followed by a legal input character, is too big. One that
* is equal to this value may be valid or not; the limit
* between valid and invalid numbers is then based on the last
* digit. For instance, if the range for longs is
* [-2147483648..2147483647] and the input base is 10,
* cutoff will be set to 214748364 and cutlim to either
* 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
* a value > 214748364, or equal but the next digit is > 7 (or 8),
* the number is too big, and we will return a range error.
*
* Set any if any `digits' consumed; make it negative to indicate
* overflow.
*/
cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;
cutlim = cutoff % (unsigned long)base;
cutoff /= (unsigned long)base;
for (acc = 0, any = 0;; c = *s++) {
if (isdigit(c))
c -= '0';
else if (isalpha(c))
c -= isupper(c) ? 'A' - 10 :'a' - 10;
else
break;
if (c >= base)
break;
if (any < 0 || acc > cutoff || acc == cutoff && c > cutlim)
any = -1;
else {
any = 1;
acc *= base;
acc += c;
}
}
if (any < 0) {
acc = neg ? LONG_MIN : LONG_MAX;
// errno = ERANGE;
} else if (neg)
acc = -acc;
if (endptr != 0)
*endptr = (char *)(any ? s - 1 : nptr);
return (acc);
}
/*
* Convert a string to an unsigned long integer.
*
* Ignores `locale' stuff. Assumes that the upper and lower case
* alphabets and digits are each contiguous.
*/
unsigned long
strtoul(nptr, endptr, base)
const char *nptr;
char **endptr;
register int base;
{
register constchar *s = nptr;
register unsigned long acc;
register int c;
register unsigned long cutoff;
register int neg = 0, any, cutlim;
/*
* See strtol for comments as to the logic used.
*/
do {
c = *s++;
} while (isspace(c));
if (c == '-') {
neg = 1;
c = *s++;
} else if (c =='+')
c = *s++;
if ((base == 0 || base == 16) &&
c == '0' && (*s == 'x' || *s =='X')) {
c = s[1];
s += 2;
base = 16;
} else if ((base == 0 || base == 2) &&
c == '0' && (*s =='b' || *s == 'B')) {
c = s[1];
s += 2;
base = 2;
}
if (base == 0)
base = c == '0' ? 8 : 10;
cutoff = (unsigned long)ULONG_MAX / (unsignedlong)base;
cutlim = (unsigned long)ULONG_MAX % (unsignedlong)base;
for (acc = 0, any = 0;; c = *s++) {
if (isdigit(c))
c -= '0';
else if (isalpha(c))
c -= isupper(c) ? 'A' - 10 :'a' - 10;
else
break;
if (c >= base)
break;
if (any < 0 || acc > cutoff || acc == cutoff && c > cutlim)
any = -1;
else {
any = 1;
acc *= base;
acc += c;
}
}
if (any < 0) {
acc = ULONG_MAX;
// errno = ERANGE;
} else if (neg)
acc = -acc;
if (endptr != 0)
*endptr = (char *)(any ? s - 1 : nptr);
return (acc);
}

static inline intisupper(char c){    return (c >= 'A' && c <= 'Z');}static inline intisalpha(char c){    return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'));}static inline intisspace(char c){    return (c == ' ' || c == '\t' || c == '\n' || c == '\12');}static inline intisdigit(char c){    return (c >= '0' && c <= '9');}/* * Convert a string to a long integer. * * Ignores `locale' stuff.  Assumes that the upper and lower case * alphabets and digits are each contiguous. */longstrtol(nptr, endptr, base)const char *nptr;char **endptr;register int base;{register const char *s = nptr;register unsigned long acc;register int c;register unsigned long cutoff;register int neg = 0, any, cutlim;/* * Skip white space and pick up leading +/- sign if any. * If base is 0, allow 0x for hex and 0 for octal, else * assume decimal; if base is already 16, allow 0x. */do {c = *s++;} while (isspace(c));if (c == '-') {neg = 1;c = *s++;} else if (c == '+')c = *s++;if ((base == 0 || base == 16) &&    c == '0' && (*s == 'x' || *s == 'X')) {c = s[1];s += 2;base = 16;} else if ((base == 0 || base == 2) &&    c == '0' && (*s == 'b' || *s == 'B')) {c = s[1];s += 2;base = 2;}if (base == 0)base = c == '0' ? 8 : 10;/* * Compute the cutoff value between legal numbers and illegal * numbers.  That is the largest legal value, divided by the * base.  An input number that is greater than this value, if * followed by a legal input character, is too big.  One that * is equal to this value may be valid or not; the limit * between valid and invalid numbers is then based on the last * digit.  For instance, if the range for longs is * [-2147483648..2147483647] and the input base is 10, * cutoff will be set to 214748364 and cutlim to either * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated * a value > 214748364, or equal but the next digit is > 7 (or 8), * the number is too big, and we will return a range error. * * Set any if any `digits' consumed; make it negative to indicate * overflow. */cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;cutlim = cutoff % (unsigned long)base;cutoff /= (unsigned long)base;for (acc = 0, any = 0;; c = *s++) {if (isdigit(c))c -= '0';else if (isalpha(c))c -= isupper(c) ? 'A' - 10 : 'a' - 10;elsebreak;if (c >= base)break;if (any < 0 || acc > cutoff || acc == cutoff && c > cutlim)any = -1;else {any = 1;acc *= base;acc += c;}}if (any < 0) {acc = neg ? LONG_MIN : LONG_MAX;//errno = ERANGE;} else if (neg)acc = -acc;if (endptr != 0)*endptr = (char *)(any ? s - 1 : nptr);return (acc);}/* * Convert a string to an unsigned long integer. * * Ignores `locale' stuff.  Assumes that the upper and lower case * alphabets and digits are each contiguous. */unsigned longstrtoul(nptr, endptr, base)const char *nptr;char **endptr;register int base;{register const char *s = nptr;register unsigned long acc;register int c;register unsigned long cutoff;register int neg = 0, any, cutlim;/* * See strtol for comments as to the logic used. */do {c = *s++;} while (isspace(c));if (c == '-') {neg = 1;c = *s++;} else if (c == '+')c = *s++;if ((base == 0 || base == 16) &&    c == '0' && (*s == 'x' || *s == 'X')) {c = s[1];s += 2;base = 16;} else if ((base == 0 || base == 2) &&    c == '0' && (*s == 'b' || *s == 'B')) {c = s[1];s += 2;base = 2;}if (base == 0)base = c == '0' ? 8 : 10;cutoff = (unsigned long)ULONG_MAX / (unsigned long)base;cutlim = (unsigned long)ULONG_MAX % (unsigned long)base;for (acc = 0, any = 0;; c = *s++) {if (isdigit(c))c -= '0';else if (isalpha(c))c -= isupper(c) ? 'A' - 10 : 'a' - 10;elsebreak;if (c >= base)break;if (any < 0 || acc > cutoff || acc == cutoff && c > cutlim)any = -1;else {any = 1;acc *= base;acc += c;}}if (any < 0) {acc = ULONG_MAX;//errno = ERANGE;} else if (neg)acc = -acc;if (endptr != 0)*endptr = (char *)(any ? s - 1 : nptr);return (acc);}

我们来分析下strtol程序逻辑吧。程序里看到了几个inline函数，这个是实现ctype.h里面字符类型判断

的函数，为后面扫描字符时做准备。使用inline函数的好处是高效，适用于函数代码短小的情况。

register变量存放在寄存器中，CPU对其访问比内存中变量的访问快，适合需要频繁访问的变量。

里面除了定义了用于扫描字串的指针s（指向当前扫描字符的下一个），还定义指向当前扫描字符的c。

（1）程序能处理base为0或者16，而处理前导空格符及正负号后的字串以0x，0X开头的情况（16进制）。

（2）程序能处理base为0或者2，而处理前导空格符及正负号后的字串以0b，0B开头的情况（2进制）。

（3）似乎程序并没有处理base为8，而处理前导空格符及正负号后的字串以'0'开头的情况（8进制）。

事实上，任何进制数的前导0，是”隐式“被处理了的。因为累加和的计算方式是当前的和加上下一个被计入

的字符。如果发现了前导0，扫描后，当前累加和还是0，sum * base + [ 下一个字符数值] =[ 下一个字符数值]

（4）这里有个疑问：这部分程序的目的是想让c被赋值为”处理前导空格，制表符及符号符之后的第一个字符“，

而s指向c后面的字符。如果输入是"0x”或者“0X”怎么办？我们知道，c为'\0'，那s呢？此处是否涉及到越界？

（5）看见那个cutoff没？那个可是个unsigned int类型的，所以能够”容纳“INT_MAX或者-INT_MIN。与我

上一篇文章程序中overflow的意图是一样的。cutlim为cutoff除以base所得余数；之后cutoff变为所能达到

最大值的绝对值除以base所得的”商“。看到这个商和余数，联想到了什么？

如果已经扫描到的数已经大于这个“最大商”，那么，接下来就不能出现合法字符（数字，字母），否则溢出。

因为：sum * base +[ 下一个字符数值] > sum * base > 当前符号下所对应的最大数值

如果已经扫描到的数等于这个“最大商”，那么接下来扫描到的合法字符（数字，字母）不能超过此时的“最大余数”。

sum * base +[ 下一个字符数值] < sum * base + "最大余数“ = 当前符号可表示的最大数的绝对值

如果已经扫描到的数小于这个“最大商”，那么接下来扫描到的合法字符（数字，字母）不能超过此时的“最大余数”。

sum * base +[ 下一个字符数值] < ”最大商“ * base + "最大余数“ = 当前符号可表示的最大数的绝对值

由此，我们得出了扫描数溢出的条件：已经扫描到的数值(sum) > ”最大商“ 或者（sum == "最大商” 并且

当前扫描的字符数值 > "最大余数“ 。

（6）看看循环里的条件c = *s++，感觉存在上述疑问（4）中提出的问题：c为结束符'\0'时，s指向哪里？

另外，判断溢出时，条件any < 0 是必要的么？感觉根本不需要。

（7）退出循环后，any < 0 对应着数值溢出的情况。对endptr是否为NULL进行判断，如果未NULL，说明并

未指向有效的内存，当然不能引用。如果any不为0 （上面看到是1），说明已经扫描到了有效字符，s -1 指

向c，对于扫描完所有字符的情况，此时c为'\0' ；对于包含非法字符的情况，则c为找到的第一个非法字符。

（8）错误代码时存放在全局变量errno，与我前一篇定义错误码并作为strtoi函数返回值是不一样的。

大家也可参考另一类似程序：http://blog.csdn.net/ammana_babi/article/details/1473336

在网上闲逛，无意中看到了JULY大神写的一篇关于strtoInt的文章。该文从题目分析开始，先拿出大家

容易写出的”错误程序”。然后深入分析，找出该如何去进行错误检查和处理，如何可以优化程序。除了

点赞，我不知道还能做些啥，分享给大家，希望大家有空一定要去看看。网址：JULY的strtoInt

到此，文章要结束了。大家还记得兄弟篇的第一篇么？在此也附上网址。

面试题strtoi实现（一）—— 函数的简单实现

0 0