atoi函数的实现二: 测试各实现的正确性

来源:互联网 发布:adobe flash软件下载 编辑:程序博客网 时间:2024/06/01 08:03

linux内核的atoi测试

v_JULY_v君的问题非常好(请见文章的评论)! 每次都让我思考. 现将linux内核的atoi测试代码贴出来, 为了区别了C标准库的atoi函数, 我把测试的函数名改为matoi:

#include <ctype.h>#include <string.h>#include <stdio.h>/*http://lxr.free-electrons.com/source/lib/kstrtox.h#L4*/#define KSTRTOX_OVERFLOW        (1U << 31)const char *_parse_integer_fixup_radix(const char *s, unsigned int *base);unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res);/*http://lxr.free-electrons.com/source/arch/powerpc/boot/types.h#L12*/typedef int                     s32;typedef unsigned int            u32;typedef unsigned long long      u64;/*http://lxr.free-electrons.com/source/drivers/media/pci/ngene/ngene-dvb.c#L127*/static u32 overflow;/*http://lxr.free-electrons.com/source/include/linux/kernel.h#L29*/#define ULLONG_MAX      (~0ULL)#define unlikely(cond) (cond)/*http://lxr.free-electrons.com/source/lib/kstrtox.c#L23*/const char *_parse_integer_fixup_radix(const char *s, unsigned int *base){        if (*base == 0) {                if (s[0] == '0') {                        if (_tolower(s[1]) == 'x' && isxdigit(s[2]))                                *base = 16;                        else                                *base = 8;                } else                        *base = 10;        }        if (*base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')                s += 2;        return s;}/*http://lxr.free-electrons.com/source/lib/kstrtox.c#L47*//* * Convert non-negative integer string representation in explicitly given radix * to an integer. * Return number of characters consumed maybe or-ed with overflow bit. * If overflow occurs, result integer (incorrect) is still returned. * * Don't you dare use this function. */unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p){        unsigned long long res;        unsigned int rv;        int overflow;        res = 0;        rv = 0;        overflow = 0;        while (*s) {                unsigned int val;                if ('0' <= *s && *s <= '9')                        val = *s - '0';                else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')                        val = _tolower(*s) - 'a' + 10;                else                        break;                if (val >= base)                        break;                /*                 * Check for overflow only if we are within range of                 * it in the max base we support (16)                 */                if (unlikely(res & (~0ull << 60))) {                        if (res > ULLONG_MAX - val/base)                                overflow = 1;                }                res = res * base + val;                rv++;                s++;        }        *p = res;        if (overflow)                rv |= KSTRTOX_OVERFLOW;        return rv;}/*http://lxr.free-electrons.com/source/lib/vsprintf.c#L44*//** * simple_strtoull - convert a string to an unsigned long long * @cp: The start of the string * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use * * This function is obsolete. Please use kstrtoull instead. */unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base){        unsigned long long result;        unsigned int rv;        cp = _parse_integer_fixup_radix(cp, &base);        rv = _parse_integer(cp, base, &result);        /* FIXME */        cp += (rv & ~KSTRTOX_OVERFLOW);        if (endp)                *endp = (char *)cp;        return result;}/*http://lxr.free-electrons.com/source/lib/vsprintf.c#L83*//** * simple_strtoul - convert a string to an unsigned long * @cp: The start of the string * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use * * This function is obsolete. Please use kstrtoul instead. */unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base){        return simple_strtoull(cp, endp, base);}/*http://lxr.free-electrons.com/source/drivers/staging/tidspbridge/rmgr/dbdcd.c#L950*//* *  ======== atoi ======== *  Purpose: *      This function converts strings in decimal or hex format to integers. */static s32 matoi(const char *psz_buf){        char *pch = psz_buf;        s32 base = 0;        while (isspace(*pch))                pch++;        if (*pch == '-' || *pch == '+') {                base = 10;                pch++;        } else if (*pch && tolower(pch[strlen(pch) - 1]) == 'h') {                base = 16;        }        return simple_strtoul(pch, NULL, base);}void test(const char* str) {    printf("%s : %d\n", str, matoi(str));}int main() {    test("2147483647");    test("2147483648");    test("-2147483648");    test("-2147483649");    test("10522545459");    test("-10522545459");    return 0;}

修改的地方在第75行, 原来的代码为:

if (res > div_u64(ULLONG_MAX - val, base))

而div_u64调用的div_u64_rem函数中包含汇编代码编译不过(原因尚未可知, 有待进一步研究), 所以我把这段程序去掉了.

程序的输出结果(很显然, 对于溢出的情况, 程序没有处理):

2147483647 : 21474836472147483648 : -214748364810522545459 : 1932610867-2147483648 : -2147483648-2147483649 : -2147483647-10522545459 : 1932610867

Nut/OS的atoi测试

以下是测试代码(在ubuntu 10.4.1, gcc 4.4.3上编译通过, 为了区别于C标准库的函数, 函数名strtol更改为mstrtol, atoi更改为matoi2):

#include <errno.h>#include <stdio.h>#include <ctype.h>#include <limits.h>#define CONST      constlong mstrtol(CONST char *nptr, char **endptr, int base){    register CONST char *s;    register long acc, cutoff;    register int c;    register int neg, any, cutlim;    /*     * Skip white space and pick up leading +/- sign if any.     * If base is 0, allow 0x for hex and 0 for octal, else     * assume decimal; if base is already 16, allow 0x.     */    s = nptr;    do {        c = (unsigned char) *s++;    } while (isspace(c));    if (c == '-') {        neg = 1;        c = *s++;    } else {        neg = 0;        if (c == '+')            c = *s++;    }    if ((base == 0 || base == 16) && c == '0' && (*s == 'x' || *s == 'X')) {        c = s[1];        s += 2;        base = 16;    }    if (base == 0)        base = c == '0' ? 8 : 10;    /*     * Compute the cutoff value between legal numbers and illegal     * numbers.  That is the largest legal value, divided by the     * base.  An input number that is greater than this value, if     * followed by a legal input character, is too big.  One that     * is equal to this value may be valid or not; the limit     * between valid and invalid numbers is then based on the last     * digit.  For instance, if the range for longs is     * [-2147483648..2147483647] and the input base is 10,     * cutoff will be set to 214748364 and cutlim to either     * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated     * a value > 214748364, or equal but the next digit is > 7 (or 8),     * the number is too big, and we will return a range error.     *     * Set any if any `digits' consumed; make it negative to indicate     * overflow.     */    cutoff = neg ? LONG_MIN : LONG_MAX;    cutlim = cutoff % base;    cutoff /= base;    if (neg) {        if (cutlim > 0) {            cutlim -= base;            cutoff += 1;        }        cutlim = -cutlim;    }    for (acc = 0, any = 0;; c = (unsigned char) *s++) {        if (isdigit(c))            c -= '0';        else if (isalpha(c))            c -= isupper(c) ? 'A' - 10 : 'a' - 10;        else            break;        if (c >= base)            break;        if (any < 0)            continue;        if (neg) {            if ((acc < cutoff || acc == cutoff) && c > cutlim) {                any = -1;                acc = LONG_MIN;                errno = ERANGE;            } else {                any = 1;                acc *= base;                acc -= c;            }        } else {            if ((acc > cutoff || acc == cutoff) && c > cutlim) {                any = -1;                acc = LONG_MAX;                errno = ERANGE;            } else {                any = 1;                acc *= base;                acc += c;            }        }    }    if (endptr != 0)        *endptr = (char *) (any ? s - 1 : nptr);    return (acc);}int matoi2(CONST char *str){    return ((int) mstrtol(str, (char **) NULL, 10));}int mgetline(char* buf, size_t n) {  size_t idx = 0;  int c;    while (--n > 0 && (c = getchar()) != EOF && c != '\n') {    buf[idx++] = c;  }  buf[idx] = '\0';  return idx;}#define MAX_LINE 200int main() {    char buf[MAX_LINE];    while (mgetline(buf, MAX_LINE) >= 0) {        if (strcmp(buf, "quit") == 0) break;        printf("matoi2=%d\n", matoi2(buf));    }    return 0;}

程序的测试结果:

10522545459matoi2=2147483647-10522545459matoi2=-2147483648

程序貌似对溢出的处理是正确的, 真的吗? 请注意代码的第79和第89行. 现在我把测试数据换成"10522545454", 与"10522545459"区别在于最后一个字符.

10522545454matoi2=1932610862-10522545454matoi2=-1932610862

bingo! 正中下怀! 对于字串"10522545454", 在读取最后的数字字符'4'时, 整数1052254545已经大于2147483647/10了, 说明已经溢出, 不应该再判断字串的最后一位4是否大于2147483647%10, 所以第79行应该改为(89行修改方法类似):

            if (acc < cutoff || (acc == cutoff && c > cutlim)) {

修改过后的代码测试正常:

10522545459matoi2=2147483647-10522545459\matoi2=-214748364810522545454matoi2=2147483647-10522545454matoi2=-2147483648quit

关于此bug, 我已经邮件通知En-Nut-Discussion.

以下为邮件回复的截图, Uwe Bonnes说: 可以打个补丁到分支. 不过他把单词reasonable给拼错了.


References:

Linux Cross Reference

Nut/OS API