Memcached源码分析(二)-jenkins hash函数
来源:互联网 发布:最终幻想15剧情 知乎 编辑:程序博客网 时间:2024/06/08 06:59
通过启动时的-o hash_algorithm可以配置memcached的hash算法,支持两种算法:jenkins, murmur3,默认是jenkins。
hash源码里面有几点比较有意思,直接在代码里标明。
大端序部分与小端序部分基本一致,就不再标明。
jenkins_hash.c
#if HASH_LITTLE_ENDIAN == 1uint32_t jenkins_hash( const void *key, /* the key to hash */ size_t length) /* length of the key */{ // 由于a,b,c都是4byte的,因此下面每次循环最大处理12byte uint32_t a,b,c; /* internal state */ // union里面有个const变量,不用马上初始化,通过i直接操作,免除了数据强转 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ /* Set up the internal state */ // 0xdeadbeef是一个魔术数,标记软件崩溃或死锁,但这里好像并没有特别的意义,反汇编的时候方便查找?后面+0似乎也没有意义,知道的博友还望告知一下,不胜感激! a = b = c = 0xdeadbeef + ((uint32_t)length) + 0; u.ptr = key; // 此处的HASH_LITTLE_ENDIAN是多余的,其值必然为1。u.i&0x3是检查地址低两位是否是00,如果是的话可以认为是4字节内存对齐的,可以一次处理4byte。 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { const uint32_t *k = key; /* read 32-bit chunks */#ifdef VALGRIND const uint8_t *k8;#endif /* ifdef VALGRIND */ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ while (length > 12) { a += k[0]; b += k[1]; c += k[2]; mix(a,b,c); length -= 12; k += 3; } /*----------------------------- handle the last (probably partial) block */ /* * "k[2]&0xffffff" actually reads beyond the end of the string, but * then masks off the part it's not allowed to read. Because the * string is aligned, the masked-off tail is in the same word as the * rest of the string. Every machine with memory protection I've seen * does it on word boundaries, so is OK with this. But VALGRIND will * still catch it and complain. The masking trick does make the hash * noticably faster for short strings (like English words). */#ifndef VALGRIND switch(length) { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; case 8 : b+=k[1]; a+=k[0]; break; case 7 : b+=k[1]&0xffffff; a+=k[0]; break; case 6 : b+=k[1]&0xffff; a+=k[0]; break; case 5 : b+=k[1]&0xff; a+=k[0]; break; case 4 : a+=k[0]; break; case 3 : a+=k[0]&0xffffff; break; case 2 : a+=k[0]&0xffff; break; case 1 : a+=k[0]&0xff; break; case 0 : return c; /* zero length strings require no mixing */ }#else /* make valgrind happy */ k8 = (const uint8_t *)k; switch(length) { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ case 9 : c+=k8[8]; /* fall through */ case 8 : b+=k[1]; a+=k[0]; break; case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ case 5 : b+=k8[4]; /* fall through */ case 4 : a+=k[0]; break; case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ case 1 : a+=k8[0]; break; case 0 : return c; /* zero length strings require no mixing */ }#endif /* !valgrind */// 此处的HASH_LITTLE_ENDIAN同样是多余的,其值必然为1。u.i&0x1是检查地址最低位是否是0,如果是0,可以认为是2字节内存对齐的,可以一次处理2byte。 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { const uint16_t *k = key; /* read 16-bit chunks */ const uint8_t *k8; /*--------------- all but last block: aligned reads and different mixing */ while (length > 12) { a += k[0] + (((uint32_t)k[1])<<16); b += k[2] + (((uint32_t)k[3])<<16); c += k[4] + (((uint32_t)k[5])<<16); mix(a,b,c); length -= 12; k += 6; } /*----------------------------- handle the last (probably partial) block */ k8 = (const uint8_t *)k; switch(length) { case 12: c+=k[4]+(((uint32_t)k[5])<<16); b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 11: c+=((uint32_t)k8[10])<<16; /* @fallthrough */ case 10: c+=k[4]; /* @fallthrough@ */ b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 9 : c+=k8[8]; /* @fallthrough */ case 8 : b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; case 7 : b+=((uint32_t)k8[6])<<16; /* @fallthrough */ case 6 : b+=k[2]; a+=k[0]+(((uint32_t)k[1])<<16); break; case 5 : b+=k8[4]; /* @fallthrough */ case 4 : a+=k[0]+(((uint32_t)k[1])<<16); break; case 3 : a+=((uint32_t)k8[2])<<16; /* @fallthrough */ case 2 : a+=k[0]; break; case 1 : a+=k8[0]; break; case 0 : return c; /* zero length strings require no mixing */ } // 此处的HASH_LITTLE_ENDIAN同样是多余的,其值必然为1。地址最低位是1,一次只能处理1byte。 } else { /* need to read the key one byte at a time */ const uint8_t *k = key; /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { a += k[0]; a += ((uint32_t)k[1])<<8; a += ((uint32_t)k[2])<<16; a += ((uint32_t)k[3])<<24; b += k[4]; b += ((uint32_t)k[5])<<8; b += ((uint32_t)k[6])<<16; b += ((uint32_t)k[7])<<24; c += k[8]; c += ((uint32_t)k[9])<<8; c += ((uint32_t)k[10])<<16; c += ((uint32_t)k[11])<<24; mix(a,b,c); length -= 12; k += 12; } /*-------------------------------- last block: affect all 32 bits of (c) */ switch(length) /* all the case statements fall through */ { case 12: c+=((uint32_t)k[11])<<24; case 11: c+=((uint32_t)k[10])<<16; case 10: c+=((uint32_t)k[9])<<8; case 9 : c+=k[8]; case 8 : b+=((uint32_t)k[7])<<24; case 7 : b+=((uint32_t)k[6])<<16; case 6 : b+=((uint32_t)k[5])<<8; case 5 : b+=k[4]; case 4 : a+=((uint32_t)k[3])<<24; case 3 : a+=((uint32_t)k[2])<<16; case 2 : a+=((uint32_t)k[1])<<8; case 1 : a+=k[0]; break; case 0 : return c; /* zero length strings require no mixing */ } } final(a,b,c); return c; /* zero length strings require no mixing */}#elif HASH_BIG_ENDIAN == 1/* * hashbig(): * This is the same as hashword() on big-endian machines. It is different * from hashlittle() on all machines. hashbig() takes advantage of * big-endian byte ordering. */uint32_t jenkins_hash( const void *key, size_t length){ uint32_t a,b,c; union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ /* Set up the internal state */ a = b = c = 0xdeadbeef + ((uint32_t)length) + 0; u.ptr = key; if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { const uint32_t *k = key; /* read 32-bit chunks */#ifdef VALGRIND const uint8_t *k8;#endif /* ifdef VALGRIND */ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ while (length > 12) { a += k[0]; b += k[1]; c += k[2]; mix(a,b,c); length -= 12; k += 3; } /*----------------------------- handle the last (probably partial) block */ /* * "k[2]<<8" actually reads beyond the end of the string, but * then shifts out the part it's not allowed to read. Because the * string is aligned, the illegal read is in the same word as the * rest of the string. Every machine with memory protection I've seen * does it on word boundaries, so is OK with this. But VALGRIND will * still catch it and complain. The masking trick does make the hash * noticably faster for short strings (like English words). */#ifndef VALGRIND switch(length) { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; case 8 : b+=k[1]; a+=k[0]; break; case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; case 5 : b+=k[1]&0xff000000; a+=k[0]; break; case 4 : a+=k[0]; break; case 3 : a+=k[0]&0xffffff00; break; case 2 : a+=k[0]&0xffff0000; break; case 1 : a+=k[0]&0xff000000; break; case 0 : return c; /* zero length strings require no mixing */ }#else /* make valgrind happy */ k8 = (const uint8_t *)k; switch(length) /* all the case statements fall through */ { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ case 8 : b+=k[1]; a+=k[0]; break; case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ case 4 : a+=k[0]; break; case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ case 1 : a+=((uint32_t)k8[0])<<24; break; case 0 : return c; }#endif /* !VALGRIND */ } else { /* need to read the key one byte at a time */ const uint8_t *k = key; /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ while (length > 12) { a += ((uint32_t)k[0])<<24; a += ((uint32_t)k[1])<<16; a += ((uint32_t)k[2])<<8; a += ((uint32_t)k[3]); b += ((uint32_t)k[4])<<24; b += ((uint32_t)k[5])<<16; b += ((uint32_t)k[6])<<8; b += ((uint32_t)k[7]); c += ((uint32_t)k[8])<<24; c += ((uint32_t)k[9])<<16; c += ((uint32_t)k[10])<<8; c += ((uint32_t)k[11]); mix(a,b,c); length -= 12; k += 12; } /*-------------------------------- last block: affect all 32 bits of (c) */ switch(length) /* all the case statements fall through */ { case 12: c+=k[11]; case 11: c+=((uint32_t)k[10])<<8; case 10: c+=((uint32_t)k[9])<<16; case 9 : c+=((uint32_t)k[8])<<24; case 8 : b+=k[7]; case 7 : b+=((uint32_t)k[6])<<8; case 6 : b+=((uint32_t)k[5])<<16; case 5 : b+=((uint32_t)k[4])<<24; case 4 : a+=k[3]; case 3 : a+=((uint32_t)k[2])<<8; case 2 : a+=((uint32_t)k[1])<<16; case 1 : a+=((uint32_t)k[0])<<24; break; case 0 : return c; } } final(a,b,c); return c;}#else /* HASH_XXX_ENDIAN == 1 */#error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN#endif /* HASH_XXX_ENDIAN == 1 */
jenkins_hash对字符串的首地址判断(1,2,4),可以加速处理过程。
0 0
- Memcached源码分析(二)-jenkins hash函数
- jenkins hash函数源码
- Memcached源码分析之状态机(二)
- Memcached源码分析之Hash表扩容
- Memcached源码分析之Hash表操作
- Memcached源码分析之二
- memcached源码分析之线程池机制(二)
- memcached源码分析之线程池机制(二)
- memcached源码分析之线程池机制(二)
- 源码分析Memcached-Java-Client一致性hash算法
- memcached源码分析(一): memcached.c主函数分析 执行流程
- memcached源码分析(一): memcached.c主函数分析 执行流程
- HashMap源码之hash()函数分析(JDK 1.8)
- Memcached源码分析 - Memcached源码分析之HashTable(4)
- Jenkins源码分析
- 源码安装lnmp redis jenkins memcached
- memcached源码分析(assoc.c)
- Memcached源码分析之状态机(一)
- 3G行业面试出的Java编程题
- linux uvc usb camera driver
- Android事件分发机制完全解析,带你从源码的角度彻底理解(下)
- Android提供第三方jar包时资源打包
- nodejs 发送邮件配置
- Memcached源码分析(二)-jenkins hash函数
- 微软掷豪金投资海底数据电缆
- unity3d移动平台性能优化(14):渲染路径设置
- macvim 配置
- 第三题
- leetcode--Search in Rotated Sorted Array
- java中常用的工具类
- 事件驱动机制在单片机软件中的应用
- poj_Feel Good_单调栈