结合redis设计与实现的redis源码学习-26-工具函数（Util.h/.c）

来源：互联网发布：钉钉软件的坏处编辑：程序博客网时间：2024/05/21 07:13

Redis将很多的公用转换函数独立了出来，放入了Util.h中，包括字符串对比，内存转换，字符串数字转换，获取路径等，Redis的作者都是自己实现的，在这里我将这些函数认真学习，观察是在哪里高效并可以在之后的工作中使用。
因为Util的函数都是完全独立逻辑的，所以我在这里只看.c文件
Util.c

#include <stdlib.h>#include <stdio.h>#include <string.h>#include <ctype.h>#include <limits.h>#include <math.h>#include <unistd.h>#include <sys/time.h>#include <float.h>#include <stdint.h>#include <errno.h>#include "sha1.h"//Sha1校验算法的实现文件/* Glob-style pattern matching. 全局风格的模式对比*/int stringmatchlen(const char *pattern, int patternLen,        const char *string, int stringLen, int nocase){    while(patternLen) {        switch(pattern[0]) {        case '*'://当模式的第一个字符是*时            while (pattern[1] == '*') {                pattern++;//遍历模式字符串，查看模式到哪里不为*                patternLen--;            }            if (patternLen == 1)                return 1; /* match 如果直到最后都是*的话，那么就是完全匹配，*相当于通配符*/            while(stringLen) {                if (stringmatchlen(pattern+1, patternLen-1,                            string, stringLen, nocase))                    return 1; /* match 这里递归自己来判断后面的字符，666*/                string++;//字符串的下一位                stringLen--;            }            return 0; /* no match 如果到这里了，就没有匹配到，因为如果匹配的话前面和下面已经都匹配了*/            break;        case '?':            if (stringLen == 0)                return 0; /* no match 如果字符串长度是0，那么就不匹配了，因为模式是？*/            string++;否则到string的下一个            stringLen--;            break;        case '[':        {            int not, match;            pattern++;//模式的下一位            patternLen--;            not = pattern[0] == '^';//这里用int来表示bool值，因为在C里没有bool            if (not) {                pattern++;//如果模式是^，匹配模式的下一位                patternLen--;            }            match = 0;            while(1) {                if (pattern[0] == '\\') {//模式是\                    pattern++;//匹配下一个                    patternLen--;                    if (pattern[0] == string[0])//如果模式等于字符，就是匹配                        match = 1;                } else if (pattern[0] == ']') {                    break;                } else if (patternLen == 0) {                    pattern--;                    patternLen++;                    break;                } else if (pattern[1] == '-' && patternLen >= 3) {                    int start = pattern[0];                    int end = pattern[2];                    int c = string[0];                    if (start > end) {                        int t = start;                        start = end;                        end = t;                    }                    if (nocase) {                        start = tolower(start);//全部变成小写字符                        end = tolower(end);                        c = tolower(c);                    }                    pattern += 2;                    patternLen -= 2;                    if (c >= start && c <= end)                        match = 1;                } else {                    if (!nocase) {                        if (pattern[0] == string[0])                            match = 1;                    } else {                        if (tolower((int)pattern[0]) == tolower((int)string[0]))                            match = 1;                    }                }                pattern++;                patternLen--;            }            if (not)                match = !match;            if (!match)                return 0; /* no match */            string++;            stringLen--;            break;        }        case '\\':            if (patternLen >= 2) {                pattern++;                patternLen--;            }            /* fall through 接着执行下面的*/        default:            if (!nocase) {                if (pattern[0] != string[0])                    return 0; /* no match */            } else {                if (tolower((int)pattern[0]) != tolower((int)string[0]))                    return 0; /* no match */            }            string++;            stringLen--;            break;        }        pattern++;        patternLen--;        if (stringLen == 0) {            while(*pattern == '*') {                pattern++;                patternLen--;            }            break;        }    }    if (patternLen == 0 && stringLen == 0)        return 1;    return 0;}int stringmatch(const char *pattern, const char *string, int nocase) {    return stringmatchlen(pattern,strlen(pattern),string,strlen(string),nocase);}/* Convert a string representing an amount of memory into the number of bytes, so for instance memtoll("1Gb") will return 1073741824 that is (1024*1024*1024).转换一个字符串表示的内存到数字 On parsing error, if *err is not NULL, it's set to 1, otherwise it's set to 0. On error the function return value is 0, regardless of the fact 'err' is NULL or not. */ //这里是针对redis自己表示内存的方式来转换的，long long memtoll(const char *p, int *err) {    const char *u;    char buf[128];    long mul; /* unit multiplier */    long long val;    unsigned int digits;    if (err) *err = 0;    /* Search the first non digit character. 找到第一个非数字的符号*/    u = p;    if (*u == '-') u++;    while(*u && isdigit(*u)) u++;    if (*u == '\0' || !strcasecmp(u,"b")) {        mul = 1;    } else if (!strcasecmp(u,"k")) {        mul = 1000;    } else if (!strcasecmp(u,"kb")) {        mul = 1024;    } else if (!strcasecmp(u,"m")) {        mul = 1000*1000;    } else if (!strcasecmp(u,"mb")) {        mul = 1024*1024;    } else if (!strcasecmp(u,"g")) {        mul = 1000L*1000*1000;    } else if (!strcasecmp(u,"gb")) {        mul = 1024L*1024*1024;    } else {        if (err) *err = 1;        return 0;    }    /* Copy the digits into a buffer, we'll use strtoll() to convert the digit (without the unit) into a number. 将数字的字符复制到一个buffer中*/    digits = u-p;    if (digits >= sizeof(buf)) {        if (err) *err = 1;        return 0;    }    memcpy(buf,p,digits);    buf[digits] = '\0';    char *endptr;    errno = 0;    val = strtoll(buf,&endptr,10);    if ((val == 0 && errno == EINVAL) || *endptr != '\0') {        if (err) *err = 1;        return 0;    }    return val*mul;}/* Return the number of digits of 'v' when converted to string in radix 10. See ll2string() for more information. 当以10进制转换为字符串时返回v*/uint32_t digits10(uint64_t v) {    if (v < 10) return 1;    if (v < 100) return 2;    if (v < 1000) return 3;    if (v < 1000000000000UL) {        if (v < 100000000UL) {            if (v < 1000000) {                if (v < 10000) return 4;                return 5 + (v >= 100000);            }            return 7 + (v >= 10000000UL);        }        if (v < 10000000000UL) {            return 9 + (v >= 1000000000UL);        }        return 11 + (v >= 100000000000UL);    }    return 12 + digits10(v / 1000000000000UL);}/* Like digits10() but for signed values. 类似于上面的函数但是支队标识值*/uint32_t sdigits10(int64_t v) {    if (v < 0) {        /* Abs value of LLONG_MIN requires special handling. */        uint64_t uv = (v != LLONG_MIN) ?                      (uint64_t)-v : ((uint64_t) LLONG_MAX)+1;        return digits10(uv)+1; /* +1 for the minus. */    } else {        return digits10(v);    }}

ll2string

/* Convert a long long into a string. Returns the number of characters needed to represent the number. If the buffer is not big enough to store the string, 0 is returned.将长整型转换为一个字符串，返回表示使用的字符串，如果缓冲区不足，返回0 Based on the following article (that apparently does not provide a novel approach but only publicizes an already used technique): https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920 Modified in order to handle signed integers since the original code was designed for unsigned integers. */int ll2string(char* dst, size_t dstlen, long long svalue) {    static const char digits[201] =        "0001020304050607080910111213141516171819"        "2021222324252627282930313233343536373839"        "4041424344454647484950515253545556575859"        "6061626364656667686970717273747576777879"        "8081828384858687888990919293949596979899";    int negative;    unsigned long long value;    /* The main loop works with 64bit unsigned integers for simplicity, so we convert the number here and remember if it is negative. 为了简单起见，主循环与64位无符号整数一起使用，所以我们在这里转换数字并记住它是否为负数*/    if (svalue < 0) {        if (svalue != LLONG_MIN) {            value = -svalue;        } else {            value = ((unsigned long long) LLONG_MAX)+1;        }        negative = 1;    } else {        value = svalue;        negative = 0;    }    /* Check length. */    uint32_t const length = digits10(value)+negative;    if (length >= dstlen) return 0;    /* Null term. */    uint32_t next = length;    dst[next] = '\0';    next--;    while (value >= 100) {        int const i = (value % 100) * 2;        value /= 100;        dst[next] = digits[i + 1];        dst[next - 1] = digits[i];        next -= 2;    }    /* Handle last 1-2 digits. */    if (value < 10) {        dst[next] = '0' + (uint32_t) value;    } else {        int i = (uint32_t) value * 2;        dst[next] = digits[i + 1];        dst[next - 1] = digits[i];    }    /* Add sign. */    if (negative) dst[0] = '-';    return length;}

string2ll

/* Convert a string into a long long. Returns 1 if the string could be parsed * into a (non-overflowing) long long, 0 otherwise. The value will be set to * the parsed value when appropriate. */int string2ll(const char *s, size_t slen, long long *value) {    const char *p = s;    size_t plen = 0;    int negative = 0;    unsigned long long v;    if (plen == slen)        return 0;    /* Special case: first and only digit is 0. */    if (slen == 1 && p[0] == '0') {        if (value != NULL) *value = 0;        return 1;    }    if (p[0] == '-') {        negative = 1;        p++; plen++;        /* Abort on only a negative sign. */        if (plen == slen)            return 0;    }    /* First digit should be 1-9, otherwise the string should just be 0. */    if (p[0] >= '1' && p[0] <= '9') {        v = p[0]-'0';        p++; plen++;    } else if (p[0] == '0' && slen == 1) {        *value = 0;        return 1;    } else {        return 0;    }    while (plen < slen && p[0] >= '0' && p[0] <= '9') {        if (v > (ULLONG_MAX / 10)) /* Overflow. */            return 0;        v *= 10;        if (v > (ULLONG_MAX - (p[0]-'0'))) /* Overflow. */            return 0;        v += p[0]-'0';        p++; plen++;    }    /* Return if not all bytes were used. */    if (plen < slen)        return 0;    if (negative) {        if (v > ((unsigned long long)(-(LLONG_MIN+1))+1)) /* Overflow. */            return 0;        if (value != NULL) *value = -v;    } else {        if (v > LLONG_MAX) /* Overflow. */            return 0;        if (value != NULL) *value = v;    }    return 1;}

getrandomhexchar

/* Generate the Redis "Run ID", a SHA1-sized random number that identifies a given execution of Redis, so that if you are talking with an instance having run_id == A, and you reconnect and it has run_id == B, you can be sure that it is either a different instance or it was restarted. 生成redis运行id，这是一个sha1大小的随机数，用于标识给定的Redis执行情况*/void getRandomHexChars(char *p, unsigned int len) {    char *charset = "0123456789abcdef";    unsigned int j;    /* Global state. */    static int seed_initialized = 0;    static unsigned char seed[20]; /* The SHA1 seed, from /dev/urandom. */    static uint64_t counter = 0; /* The counter we hash with the seed. */    if (!seed_initialized) {        /* Initialize a seed and use SHA1 in counter mode, where we hash the same seed with a progressive counter. For the goals of this function we just need non-colliding strings, there are no cryptographic security needs. 在计数器模式下初始化一个种子并使用SHA1，在那里我们用一个累进计数器对相同的种子进行哈希处理*/        FILE *fp = fopen("/dev/urandom","r");        if (fp && fread(seed,sizeof(seed),1,fp) == 1)            seed_initialized = 1;        if (fp) fclose(fp);    }    if (seed_initialized) {        while(len) {            unsigned char digest[20];            SHA1_CTX ctx;            unsigned int copylen = len > 20 ? 20 : len;            SHA1Init(&ctx);            SHA1Update(&ctx, seed, sizeof(seed));            SHA1Update(&ctx, (unsigned char*)&counter,sizeof(counter));            SHA1Final(digest, &ctx);            counter++;            memcpy(p,digest,copylen);            /* Convert to hex digits. */            for (j = 0; j < copylen; j++) p[j] = charset[p[j] & 0x0F];            len -= copylen;            p += copylen;        }    } else {        /* If we can't read from /dev/urandom, do some reasonable effort in order to create some entropy, since this function is used to generate run_id and cluster instance IDs 如果我们不能正常读取数据，创建一些熵*/        char *x = p;        unsigned int l = len;        struct timeval tv;        pid_t pid = getpid();        /* Use time and PID to fill the initial array. */        gettimeofday(&tv,NULL);        if (l >= sizeof(tv.tv_usec)) {            memcpy(x,&tv.tv_usec,sizeof(tv.tv_usec));            l -= sizeof(tv.tv_usec);            x += sizeof(tv.tv_usec);        }        if (l >= sizeof(tv.tv_sec)) {            memcpy(x,&tv.tv_sec,sizeof(tv.tv_sec));            l -= sizeof(tv.tv_sec);            x += sizeof(tv.tv_sec);        }        if (l >= sizeof(pid)) {            memcpy(x,&pid,sizeof(pid));            l -= sizeof(pid);            x += sizeof(pid);        }        /* Finally xor it with rand() output, that was already seeded with time() at startup, and convert to hex digits. */        for (j = 0; j < len; j++) {            p[j] ^= rand();            p[j] = charset[p[j] & 0x0F];        }    }}

getabsolutepath

/* Given the filename, return the absolute path as an SDS string, or NULL * if it fails for some reason. Note that "filename" may be an absolute path * already, this will be detected and handled correctly. * * The function does not try to normalize everything, but only the obvious * case of one or more "../" appearning at the start of "filename" * relative path. */sds getAbsolutePath(char *filename) {    char cwd[1024];    sds abspath;    sds relpath = sdsnew(filename);    relpath = sdstrim(relpath," \r\n\t");    if (relpath[0] == '/') return relpath; /* Path is already absolute. */    /* If path is relative, join cwd and relative path. */    if (getcwd(cwd,sizeof(cwd)) == NULL) {        sdsfree(relpath);        return NULL;    }    abspath = sdsnew(cwd);    if (sdslen(abspath) && abspath[sdslen(abspath)-1] != '/')        abspath = sdscat(abspath,"/");    /* At this point we have the current path always ending with "/", and     * the trimmed relative path. Try to normalize the obvious case of     * trailing ../ elements at the start of the path.     *     * For every "../" we find in the filename, we remove it and also remove     * the last element of the cwd, unless the current cwd is "/". */    while (sdslen(relpath) >= 3 &&           relpath[0] == '.' && relpath[1] == '.' && relpath[2] == '/')    {        sdsrange(relpath,3,-1);        if (sdslen(abspath) > 1) {            char *p = abspath + sdslen(abspath)-2;            int trimlen = 1;            while(*p != '/') {                p--;                trimlen++;            }            sdsrange(abspath,0,-(trimlen+1));        }    }    /* Finally glue the two parts together. */    abspath = sdscatsds(abspath,relpath);    sdsfree(relpath);    return abspath;}/* Return true if the specified path is just a file basename without any * relative or absolute path. This function just checks that no / or \ * character exists inside the specified path, that's enough in the * environments where Redis runs. */int pathIsBaseName(char *path) {    return strchr(path,'/') == NULL && strchr(path,'\\') == NULL;}

阅读全文

0 0