Memcached源码分析（二）-jenkins hash函数

来源：互联网发布：最终幻想15剧情知乎编辑：程序博客网时间：2024/06/08 06:59
通过启动时的-o hash_algorithm可以配置memcached的hash算法，支持两种算法：jenkins, murmur3，默认是jenkins。
hash源码里面有几点比较有意思，直接在代码里标明。
大端序部分与小端序部分基本一致，就不再标明。
jenkins_hash.c
#if HASH_LITTLE_ENDIAN == 1uint32_t jenkins_hash(  const void *key,       /* the key to hash */  size_t      length)    /* length of the key */{  // 由于a,b,c都是4byte的，因此下面每次循环最大处理12byte  uint32_t a,b,c;                                          /* internal state */  // union里面有个const变量，不用马上初始化，通过i直接操作，免除了数据强转  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */  /* Set up the internal state */  // 0xdeadbeef是一个魔术数，标记软件崩溃或死锁，但这里好像并没有特别的意义，反汇编的时候方便查找？后面+0似乎也没有意义，知道的博友还望告知一下，不胜感激！  a = b = c = 0xdeadbeef + ((uint32_t)length) + 0;  u.ptr = key;  // 此处的HASH_LITTLE_ENDIAN是多余的，其值必然为1。u.i&0x3是检查地址低两位是否是00，如果是的话可以认为是4字节内存对齐的，可以一次处理4byte。  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {    const uint32_t *k = key;                           /* read 32-bit chunks */#ifdef VALGRIND    const uint8_t  *k8;#endif /* ifdef VALGRIND */    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */    while (length > 12)    {      a += k[0];      b += k[1];      c += k[2];      mix(a,b,c);      length -= 12;      k += 3;    }    /*----------------------------- handle the last (probably partial) block */    /*     * "k[2]&0xffffff" actually reads beyond the end of the string, but     * then masks off the part it's not allowed to read.  Because the     * string is aligned, the masked-off tail is in the same word as the     * rest of the string.  Every machine with memory protection I've seen     * does it on word boundaries, so is OK with this.  But VALGRIND will     * still catch it and complain.  The masking trick does make the hash     * noticably faster for short strings (like English words).     */#ifndef VALGRIND    switch(length)    {    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;    case 8 : b+=k[1]; a+=k[0]; break;    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;    case 6 : b+=k[1]&0xffff; a+=k[0]; break;    case 5 : b+=k[1]&0xff; a+=k[0]; break;    case 4 : a+=k[0]; break;    case 3 : a+=k[0]&0xffffff; break;    case 2 : a+=k[0]&0xffff; break;    case 1 : a+=k[0]&0xff; break;    case 0 : return c;  /* zero length strings require no mixing */    }#else /* make valgrind happy */    k8 = (const uint8_t *)k;    switch(length)    {    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */    case 9 : c+=k8[8];                   /* fall through */    case 8 : b+=k[1]; a+=k[0]; break;    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */    case 5 : b+=k8[4];                   /* fall through */    case 4 : a+=k[0]; break;    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */    case 1 : a+=k8[0]; break;    case 0 : return c;  /* zero length strings require no mixing */    }#endif /* !valgrind */// 此处的HASH_LITTLE_ENDIAN同样是多余的，其值必然为1。u.i&0x1是检查地址最低位是否是0，如果是0，可以认为是2字节内存对齐的，可以一次处理2byte。  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {    const uint16_t *k = key;                           /* read 16-bit chunks */    const uint8_t  *k8;    /*--------------- all but last block: aligned reads and different mixing */    while (length > 12)    {      a += k[0] + (((uint32_t)k[1])<<16);      b += k[2] + (((uint32_t)k[3])<<16);      c += k[4] + (((uint32_t)k[5])<<16);      mix(a,b,c);      length -= 12;      k += 6;    }    /*----------------------------- handle the last (probably partial) block */    k8 = (const uint8_t *)k;    switch(length)    {    case 12: c+=k[4]+(((uint32_t)k[5])<<16);             b+=k[2]+(((uint32_t)k[3])<<16);             a+=k[0]+(((uint32_t)k[1])<<16);             break;    case 11: c+=((uint32_t)k8[10])<<16;     /* @fallthrough */    case 10: c+=k[4];                       /* @fallthrough@ */             b+=k[2]+(((uint32_t)k[3])<<16);             a+=k[0]+(((uint32_t)k[1])<<16);             break;    case 9 : c+=k8[8];                      /* @fallthrough */    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);             a+=k[0]+(((uint32_t)k[1])<<16);             break;    case 7 : b+=((uint32_t)k8[6])<<16;      /* @fallthrough */    case 6 : b+=k[2];             a+=k[0]+(((uint32_t)k[1])<<16);             break;    case 5 : b+=k8[4];                      /* @fallthrough */    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);             break;    case 3 : a+=((uint32_t)k8[2])<<16;      /* @fallthrough */    case 2 : a+=k[0];             break;    case 1 : a+=k8[0];             break;    case 0 : return c;  /* zero length strings require no mixing */    }  // 此处的HASH_LITTLE_ENDIAN同样是多余的，其值必然为1。地址最低位是1，一次只能处理1byte。  } else {                        /* need to read the key one byte at a time */    const uint8_t *k = key;    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */    while (length > 12)    {      a += k[0];      a += ((uint32_t)k[1])<<8;      a += ((uint32_t)k[2])<<16;      a += ((uint32_t)k[3])<<24;      b += k[4];      b += ((uint32_t)k[5])<<8;      b += ((uint32_t)k[6])<<16;      b += ((uint32_t)k[7])<<24;      c += k[8];      c += ((uint32_t)k[9])<<8;      c += ((uint32_t)k[10])<<16;      c += ((uint32_t)k[11])<<24;      mix(a,b,c);      length -= 12;      k += 12;    }    /*-------------------------------- last block: affect all 32 bits of (c) */    switch(length)                   /* all the case statements fall through */    {    case 12: c+=((uint32_t)k[11])<<24;    case 11: c+=((uint32_t)k[10])<<16;    case 10: c+=((uint32_t)k[9])<<8;    case 9 : c+=k[8];    case 8 : b+=((uint32_t)k[7])<<24;    case 7 : b+=((uint32_t)k[6])<<16;    case 6 : b+=((uint32_t)k[5])<<8;    case 5 : b+=k[4];    case 4 : a+=((uint32_t)k[3])<<24;    case 3 : a+=((uint32_t)k[2])<<16;    case 2 : a+=((uint32_t)k[1])<<8;    case 1 : a+=k[0];             break;    case 0 : return c;  /* zero length strings require no mixing */    }  }  final(a,b,c);  return c;             /* zero length strings require no mixing */}#elif HASH_BIG_ENDIAN == 1/* * hashbig(): * This is the same as hashword() on big-endian machines.  It is different * from hashlittle() on all machines.  hashbig() takes advantage of * big-endian byte ordering. */uint32_t jenkins_hash( const void *key, size_t length){  uint32_t a,b,c;  union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */  /* Set up the internal state */  a = b = c = 0xdeadbeef + ((uint32_t)length) + 0;  u.ptr = key;  if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {    const uint32_t *k = key;                           /* read 32-bit chunks */#ifdef VALGRIND    const uint8_t  *k8;#endif /* ifdef VALGRIND */    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */    while (length > 12)    {      a += k[0];      b += k[1];      c += k[2];      mix(a,b,c);      length -= 12;      k += 3;    }    /*----------------------------- handle the last (probably partial) block */    /*     * "k[2]<<8" actually reads beyond the end of the string, but     * then shifts out the part it's not allowed to read.  Because the     * string is aligned, the illegal read is in the same word as the     * rest of the string.  Every machine with memory protection I've seen     * does it on word boundaries, so is OK with this.  But VALGRIND will     * still catch it and complain.  The masking trick does make the hash     * noticably faster for short strings (like English words).     */#ifndef VALGRIND    switch(length)    {    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;    case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;    case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;    case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;    case 8 : b+=k[1]; a+=k[0]; break;    case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;    case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;    case 5 : b+=k[1]&0xff000000; a+=k[0]; break;    case 4 : a+=k[0]; break;    case 3 : a+=k[0]&0xffffff00; break;    case 2 : a+=k[0]&0xffff0000; break;    case 1 : a+=k[0]&0xff000000; break;    case 0 : return c;              /* zero length strings require no mixing */    }#else  /* make valgrind happy */    k8 = (const uint8_t *)k;    switch(length)                   /* all the case statements fall through */    {    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;    case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */    case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */    case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */    case 8 : b+=k[1]; a+=k[0]; break;    case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */    case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */    case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */    case 4 : a+=k[0]; break;    case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */    case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */    case 1 : a+=((uint32_t)k8[0])<<24; break;    case 0 : return c;    }#endif /* !VALGRIND */  } else {                        /* need to read the key one byte at a time */    const uint8_t *k = key;    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */    while (length > 12)    {      a += ((uint32_t)k[0])<<24;      a += ((uint32_t)k[1])<<16;      a += ((uint32_t)k[2])<<8;      a += ((uint32_t)k[3]);      b += ((uint32_t)k[4])<<24;      b += ((uint32_t)k[5])<<16;      b += ((uint32_t)k[6])<<8;      b += ((uint32_t)k[7]);      c += ((uint32_t)k[8])<<24;      c += ((uint32_t)k[9])<<16;      c += ((uint32_t)k[10])<<8;      c += ((uint32_t)k[11]);      mix(a,b,c);      length -= 12;      k += 12;    }    /*-------------------------------- last block: affect all 32 bits of (c) */    switch(length)                   /* all the case statements fall through */    {    case 12: c+=k[11];    case 11: c+=((uint32_t)k[10])<<8;    case 10: c+=((uint32_t)k[9])<<16;    case 9 : c+=((uint32_t)k[8])<<24;    case 8 : b+=k[7];    case 7 : b+=((uint32_t)k[6])<<8;    case 6 : b+=((uint32_t)k[5])<<16;    case 5 : b+=((uint32_t)k[4])<<24;    case 4 : a+=k[3];    case 3 : a+=((uint32_t)k[2])<<8;    case 2 : a+=((uint32_t)k[1])<<16;    case 1 : a+=((uint32_t)k[0])<<24;             break;    case 0 : return c;    }  }  final(a,b,c);  return c;}#else /* HASH_XXX_ENDIAN == 1 */#error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN#endif /* HASH_XXX_ENDIAN == 1 */
jenkins_hash对字符串的首地址判断（1，2，4），可以加速处理过程。
0 0