redis zipmap

来源:互联网 发布:zozoc软件 编辑:程序博客网 时间:2024/06/05 21:32

原帖地址:  http://lobert.iteye.com/blog/1858823 

在看此文件源码之前,先看到此文件头部的英文注释,以下是本人理解翻译版:</p>
该文件实现了一个数据结构映射到其他字符串的字符串,实施一个O(n)查找数据结构的设计是非常记忆高效的。 Redis的hase类型就是使用这种由小数量元素组成的数据结构,转换为一个哈希表。鉴于很多次Redis hase是用来表示对象组成的一些字段,这是一种在内存使用上很大的成功。
它的zipmap的格式为:
<zmlen><len>"foo"<len><free>"bar"<len>"hello"<len><free>"world"
<zmlen>是1字节长度,持有的当前zipmap的大小。当zipmap长度大于或等于254,这个值并不使用,zipmap需要遍历找出长度 。
<len>是下列字符串的长度(键或值)。<len>长度编码在一个单一的值或在一个5字节值。如果第一个字节值(作为一个unsigned 8位值)是介于0和252,这是一个单字节长度。如果它是253,接着后面会是一个四字节的无符号整数(在主机字节排序)。一个值255用于信号结束的散列。特殊值254是用来标记空空间,可用于添加新的键/值对。
<free>是修改key关联的value后string后的未使用的空闲的字节数。例如,如果“foo” 设置为“bar”,后“foo”将被设置为“hi”,它将有一个免费的字节,使用如果值将稍后再扩大,甚至添加一对适合的键/值。
<free>总是一个unsigned 8位,因为如果在一个更新操作有很多免费的字节,zipmap将重新分配,以确保它是尽可能小。

通过注释可以清楚此结构的最大优点就是内存使用。由此也基本知道了其结构的组成,下面分析源码也轻松很多。

C代码 复制代码收藏代码
  1. /* Create a new empty zipmap. */ 
  2. unsignedchar *zipmapNew(void) { 
  3.     unsignedchar *zm = zmalloc(2); 
  4.  
  5.     zm[0] = 0;/* Length */ 
  6.     zm[1] = ZIPMAP_END; 
  7.    return zm; 

新建一个空的zipmap,其结构如图:

C代码 复制代码收藏代码
  1. /* Decode the encoded length pointed by 'p' */ 
  2. static unsignedint zipmapDecodeLength(unsigned char *p) { 
  3.     unsignedint len = *p; 
  4.  
  5.    if (len < ZIPMAP_BIGLEN) return len; 
  6.     memcpy(&len,p+1,sizeof(unsignedint)); 
  7.     memrev32ifbe(&len);//大小端转换 
  8.    return len; 
  9.  
  10. /* Encode the length 'l' writing it in 'p'. If p is NULL it just returns
  11. * the amount of bytes required to encode such a length. */ 
  12. static unsignedint zipmapEncodeLength(unsigned char *p, unsigned int len) { 
  13.    if (p == NULL) { 
  14.        return ZIPMAP_LEN_BYTES(len); 
  15.     }else
  16.        if (len < ZIPMAP_BIGLEN) { 
  17.             p[0] = len; 
  18.            return 1; 
  19.         }else
  20.             p[0] = ZIPMAP_BIGLEN; 
  21.             memcpy(p+1,&len,sizeof(len)); 
  22.             memrev32ifbe(p+1); 
  23.            return 1+sizeof(len); 
  24.         } 
  25.     } 

//上为解码,下为编码(将key的长度转为char,返回所占的字节数)。主要是当key/value的长度大于等于ZIPMAP_BIGLEN(254)时,<len>的头字符就为ZIPMAP_BIGLEN,后将len转换为char型,存入len。(为了节省这四个字节)

C代码 复制代码收藏代码
  1. /* Search for a matching key, returning a pointer to the entry inside the
  2. * zipmap. Returns NULL if the key is not found.
  3. *
  4. * If NULL is returned, and totlen is not NULL, it is set to the entire
  5. * size of the zimap, so that the calling function will be able to
  6. * reallocate the original zipmap to make room for more entries. */ 
  7. static unsignedchar *zipmapLookupRaw(unsigned char *zm, unsigned char *key, unsignedint klen, unsigned int *totlen) { 
  8.     unsignedchar *p = zm+1, *k = NULL;//开始+1,跳过length 
  9.     unsignedint l,llen; 
  10.  
  11.    while(*p != ZIPMAP_END) { 
  12.         unsignedchar free; 
  13.  
  14.        /* Match or skip the key */ 
  15.         l = zipmapDecodeLength(p);//取得key的长度     
  16.     llen = zipmapEncodeLength(NULL,l);//取得key占用的字节数        
  17.    if (key != NULL && k == NULL && l == klen && !memcmp(p+llen,key,l)) { 
  18.             /* Only return when the user doesn't care
  19.              * for the total length of the zipmap. */ 
  20.            if (totlen != NULL) { 
  21.                 k = p; 
  22.             }else
  23.                return p; 
  24.             } 
  25.         } 
  26.         p += llen+l; 
  27.        /* Skip the value as well */ 
  28.         l = zipmapDecodeLength(p);//取得value的长度        
  29.     p += zipmapEncodeLength(NULL,l);//取得value占用的字节数 
  30.         free = p[0]; 
  31.         p += l+1+free;/* +1 to skip the free byte */ 
  32.     } 
  33.    if (totlen != NULL) *totlen = (unsigned int)(p-zm)+1; 
  34.    return k; 

//查找key,注意totlen的值,如果没到key ,totlen将等于p的总长度,如果找到了,totlen等于key的下标

C代码 复制代码收藏代码
  1. static unsignedlong zipmapRequiredLength(unsigned int klen, unsigned int vlen) { 
  2.     unsignedint l; 
  3.  
  4.     l = klen+vlen+3;//注意此处为何要加3? (klen和vlen本身要占用1字节,还有1字节是留给free的)   
  5.    if (klen >= ZIPMAP_BIGLEN) l += 4;//这里加4,是因为上面编码方法中所明    
  6.    if (vlen >= ZIPMAP_BIGLEN) l += 4; 
  7.    return l; 
  8.  
  9. /* Return the total amount used by a key (encoded length + payload) */ 
  10. static unsignedint zipmapRawKeyLength(unsigned char *p) { 
  11.     unsignedint l = zipmapDecodeLength(p); 
  12.    return zipmapEncodeLength(NULL,l) + l; 
  13. //返回key总字节数 
  14.  
  15. /* Return the total amount used by a value
  16. * (encoded length + single byte free count + payload) */ 
  17. static unsignedint zipmapRawValueLength(unsigned char *p) { 
  18.     unsignedint l = zipmapDecodeLength(p); 
  19.     unsignedint used; 
  20.      
  21.     used = zipmapEncodeLength(NULL,l); 
  22.     used += p[used] + 1 + l; 
  23.    return used; 
  24. //返回value总字节数,包含free字节 
  25. /* If 'p' points to a key, this function returns the total amount of
  26. * bytes used to store this entry (entry = key + associated value + trailing
  27. * free space if any). */ 
  28. static unsignedint zipmapRawEntryLength(unsigned char *p) { 
  29.     unsignedint l = zipmapRawKeyLength(p); 
  30.    return l + zipmapRawValueLength(p+l); 
  31. //返回key和value总共所占的字节 
  32. staticinline unsigned char *zipmapResize(unsignedchar *zm, unsigned int len) { 
  33.     zm = zrealloc(zm, len); 
  34.     zm[len-1] = ZIPMAP_END; 
  35.    return zm; 
  36. //重置zm 

C代码 复制代码收藏代码
  1. /* Set key to value, creating the key if it does not already exist.
  2. * If 'update' is not NULL, *update is set to 1 if the key was
  3. * already preset, otherwise to 0. */ 
  4. unsignedchar *zipmapSet(unsigned char *zm, unsignedchar *key, unsigned int klen, unsignedchar *val, unsigned int vlen,int *update) { 
  5.     unsignedint zmlen, offset; 
  6.     unsignedint freelen, reqlen = zipmapRequiredLength(klen,vlen); 
  7.     unsignedint empty, vempty; 
  8.     unsignedchar *p; 
  9.     
  10.     freelen = reqlen; 
  11.    if (update) *update = 0; 
  12.     p = zipmapLookupRaw(zm,key,klen,&zmlen); 
  13.    if (p == NULL) { 
  14.        /* Key not found: enlarge */ 
  15.         zm = zipmapResize(zm, zmlen+reqlen); 
  16.         p = zm+zmlen-1; 
  17.         zmlen = zmlen+reqlen; 
  18.  
  19.        /* Increase zipmap length (this is an insert) */ 
  20.        if (zm[0] < ZIPMAP_BIGLEN) zm[0]++; 
  21.     }else
  22.        /* Key found. Is there enough space for the new value? */ 
  23.        /* Compute the total length: */ 
  24.        if (update) *update = 1; 
  25.         freelen = zipmapRawEntryLength(p); 
  26.        if (freelen < reqlen) { 
  27.             /* Store the offset of this key within the current zipmap, so
  28.              * it can be resized. Then, move the tail backwards so this
  29.              * pair fits at the current position. */ 
  30.             offset = p-zm; 
  31.             zm = zipmapResize(zm, zmlen-freelen+reqlen); 
  32.             p = zm+offset; 
  33.  
  34.             /* The +1 in the number of bytes to be moved is caused by the
  35.              * end-of-zipmap byte. Note: the *original* zmlen is used. */ 
  36.             memmove(p+reqlen, p+freelen, zmlen-(offset+freelen+1)); 
  37.             zmlen = zmlen-freelen+reqlen; 
  38.             freelen = reqlen; 
  39.         } 
  40.     } 
  41.  
  42.     /* We now have a suitable block where the key/value entry can
  43.      * be written. If there is too much free space, move the tail
  44.      * of the zipmap a few bytes to the front and shrink the zipmap,
  45.      * as we want zipmaps to be very space efficient. */ 
  46.     empty = freelen-reqlen; 
  47.    if (empty >= ZIPMAP_VALUE_MAX_FREE) { 
  48.         /* First, move the tail <empty> bytes to the front, then resize
  49.          * the zipmap to be <empty> bytes smaller. */ 
  50.         offset = p-zm; 
  51.         memmove(p+reqlen, p+freelen, zmlen-(offset+freelen+1)); 
  52.         zmlen -= empty; 
  53.         zm = zipmapResize(zm, zmlen); 
  54.         p = zm+offset; 
  55.         vempty = 0; 
  56.     }else
  57.         vempty = empty; 
  58.     } 
  59.  
  60.    /* Just write the key + value and we are done. */ 
  61.    /* Key: */ 
  62.     p += zipmapEncodeLength(p,klen); 
  63.     memcpy(p,key,klen); 
  64.     p += klen; 
  65.    /* Value: */ 
  66.     p += zipmapEncodeLength(p,vlen); 
  67.     *p++ = vempty; 
  68.     memcpy(p,val,vlen); 
  69.    return zm; 

此方法图解如下:



文件中还有几个方法如zipmapGet,zipmapNext等,如果zipmapSet搞懂,其它方法便无障碍。

C代码 复制代码收藏代码
  1. /* Return the number of entries inside a zipmap */ 
  2. unsignedint zipmapLen(unsigned char *zm) { 
  3.     unsignedint len = 0; 
  4.    if (zm[0] < ZIPMAP_BIGLEN) { 
  5. //早在注释时就说过,如果size大小超过了ZIPMAP_BIGLEN,那么zipmap的第一个字节将不会记录size,size需要遍历才能得出 
  6.         len = zm[0]; 
  7.     }else
  8.         unsignedchar *p = zipmapRewind(zm); 
  9.        while((p = zipmapNext(p,NULL,NULL,NULL,NULL)) != NULL) len++; 
  10.  
  11.        /* Re-store length if small enough */ 
  12.        if (len < ZIPMAP_BIGLEN) zm[0] = len; 
  13.     } 
  14.    return len; 

//为什么在记录zipmap长度时不效仿记录key/value长度的方法,以至于如果取个数都需要遍历一遍?

不过根据我的实际应用经验,很少会直接去取hase的size.

0 0
原创粉丝点击