SQLite文件格式初步分析之varint

来源:互联网 发布:php图片管理系统 编辑:程序博客网 时间:2024/06/08 00:25

        varint为变长整数。长度为19个字节。最大可表示64位整数。

    使用varint表示整数的原因:大多数情况下,整数数值比较小,如果使用64位整数保存的话,会浪费空间。

 

        Varint格式如下(源于源码util.c)

A = 0xxxxxxx    7位数据,一个标志位(这里就是高位的0)

B = 1xxxxxxx    7位数据,一个标志位(这里就是高位的1)

C = xxxxxxxx    8位数据

 

   7 bits - A

  14 bits - BA

  21 bits - BBA

  28 bits - BBBA

  35 bits - BBBBA

  42 bits - BBBBBA

  49 bits - BBBBBBA

  56 bits - BBBBBBBA

  64 bits - BBBBBBBBC

 

    对于前8个字节,以A为结尾,通过判断字节是否为A来来确定是否到达varint尾部。

    对于9个字节组成的varint来说,如果前8个字节都为B,则说明此varint9个字节长。如果为8B,则说明下一个字节就是varint的尾部。SQLite的作者设计果然精巧。

 

以下是源码分析:

/*** 从p[0]开始的内存处读取64位变长整数。** 返回varint的字节数。varint值保存在*v中。*/u8 sqlite3GetVarint(const unsigned char *p, u64 *v){  u32 a,b,s;  a = *p;  /* a: p0 (unmasked) */  //varint为一个字节时  if (!(a&0x80)) //如果a<=127=0x7F=1111111b  {//值在0到127之间    *v = a; //直接取值 之后返回    return 1;  }  p++;  b = *p;  /* b: p1 (unmasked) */  //varint为两个字节时  if (!(b&0x80))//判断第二个字节是否为A。如果是,则此varint占用两个字节 格式为BA  {    a &= 0x7f; //a为第一个字节 取低7位    a = a<<7; //a左移7位 为B的7位腾出空间    a |= b; //A的低7位+B的低7位    *v = a;    return 2;  }  /* Verify that constants are precomputed correctly */  assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );  assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );  p++;  a = a<<14;  a |= *p; //此后a为BXA或BXB  /* a: p0<<14 | p2 (unmasked) */  if (!(a&0x80)) //判断第三个字节是否为A。如果是,则此varint占用三个字节,格式为BBA。  {    a &= SLOT_2_0; //SLOT_2_0=111111100000001111111 清空X位    b &= 0x7f;    b = b<<7;    a |= b; //填充X位 合成BBA    *v = a;    return 3;  }  //</略过分析>  /* CSE1 from below */  a &= SLOT_2_0;  p++;  b = b<<14;  b |= *p;  /* b: p1<<14 | p3 (unmasked) */  if (!(b&0x80))  {    b &= SLOT_2_0;    /* moved CSE1 up */    /* a &= (0x7f<<14)|(0x7f); */    a = a<<7;    a |= b;    *v = a;    return 4;  }  /* a: p0<<14 | p2 (masked) */  /* b: p1<<14 | p3 (unmasked) */  /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */  /* moved CSE1 up */  /* a &= (0x7f<<14)|(0x7f); */  b &= SLOT_2_0;  s = a;  /* s: p0<<14 | p2 (masked) */  p++;  a = a<<14;  a |= *p;  /* a: p0<<28 | p2<<14 | p4 (unmasked) */  if (!(a&0x80))  {    /* we can skip these cause they were (effectively) done above    ** while calculating s */    /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */    /* b &= (0x7f<<14)|(0x7f); */    b = b<<7;    a |= b;    s = s>>18;    *v = ((u64)s)<<32 | a;    return 5;  }  /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */  s = s<<7;  s |= b;  /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */  p++;  b = b<<14;  b |= *p;  /* b: p1<<28 | p3<<14 | p5 (unmasked) */  if (!(b&0x80))  {    /* we can skip this cause it was (effectively) done above in calc'ing s */    /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */    a &= SLOT_2_0;    a = a<<7;    a |= b;    s = s>>18;    *v = ((u64)s)<<32 | a;    return 6;  }  p++;  a = a<<14;  a |= *p;  /* a: p2<<28 | p4<<14 | p6 (unmasked) */  if (!(a&0x80))  {    a &= SLOT_4_2_0;    b &= SLOT_2_0;    b = b<<7;    a |= b;    s = s>>11;    *v = ((u64)s)<<32 | a;    return 7;  }  /* CSE2 from below */  a &= SLOT_2_0;  p++;  b = b<<14;  b |= *p;  /* b: p3<<28 | p5<<14 | p7 (unmasked) */  if (!(b&0x80))  {    b &= SLOT_4_2_0;    /* moved CSE2 up */    /* a &= (0x7f<<14)|(0x7f); */    a = a<<7;    a |= b;    s = s>>4;    *v = ((u64)s)<<32 | a;    return 8;  }  //</略过分析>    //前8个字节都为B,所以此varint有9个字节,格式为BBBBBBBBC。  p++;  a = a<<15;  a |= *p;  /* a: p4<<29 | p6<<15 | p8 (unmasked) */  /* moved CSE2 up */  /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */  b &= SLOT_2_0;  b = b<<8;  a |= b;  s = s<<4;  b = p[-4];  b &= 0x7f;  b = b>>3;  s |= b;  *v = ((u64)s)<<32 | a;  return 9;}


0 0
原创粉丝点击