GZIP相关操作

来源:互联网 发布:transmit for mac 编辑:程序博客网 时间:2024/06/07 14:05

GZIP压缩

zlib是一个通用的压缩开源库,提供了在内存中压缩和解压的函数,包括对解压后数据的校验。目前版本的zlib只支持deflate方法,但是其它的方法将会被添加进来并且拥有同样的接口。

—— zlib manaul
    deflate算法在rfc1951中有详细的说明。

    zlib同时又是一种数据格式,使用zlib库压缩后的数据会在deflate数据的头和尾添加信息,形成zlib格式的数据。

   gzip也是一种数据压缩格式,可以大体分为头部,数据部和尾部三个部分,其中头部和尾部主要是一些文档属性和校验信息(rfc1952),数据部主要是用deflate方法压缩得到的数据。
   
    zlib库默认的压缩方法并不是gzip的,而是zlib的,因此使用zlib压缩得到gzip格式的数据有两种方法:

  1. 使用zlib提供的gz***系列函数可以直接把想要的内容写入一个磁盘gzip文件;
  2. 如果想在内存中生成gzip格式的数据,可以在初始化的时候调用inflateInit2函数,并指定为gzip格式,代码如下:

 

  z_stream d_stream;
  d_stream.zalloc = NULL;
  d_stream.zfree = NULL;
  d_stream.opaque = NULL;
  int ret = deflateInit2(&d_stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, DEFAULT_WINDOWSIZE, DEFAULT_MEMLEVEL,  Z_DEFAULT_STRATEGY);

  if (Z_OK != ret)
  {
    printf("init deflate error/n");
    return ret;
  }

    之后,用deflate压缩出来的数据就是gzip的了

 

HTTP的角度

1 客户端 http Request  Header上带上 Accept-Encoding:gzip,deflate

2服务器若是支持gzip压缩则在http reponse eader

部分返回Content-Encoding: gzip 或者Content-Type: application/x-gzip

3body部分用gzip解压缩 则得到网页内容.

传说中iebug 在处理js css压缩的时候有bug,我不理解 挺简单的怎么会有bug.

gzip的角度

gzip是一种数据格式 默认且目前仅使用deflate算法压缩data部分

zlib也是一种数据格式,使用defalte算法压缩数据部分.

deflate是一种压缩算法,huffman编码的一种加强

 

zlib是一个开源库, 提供deflate压缩和对应的infalte解压缩.

不过zlib默认的deflate infalte默认是处理zlib格式数据.必须使用

deflateInit2(&strm, DEFAULT_COMPRESSION,Z_DEFLATED, DEFAULT_WINDOWSIZE,DEFAULT_MEMLEVEL, Z_DEFAULT_STRATEGY);

初始化才是处理raw deflate data.(这一点在zlib manul没有提,faq中提到,困扰了我好久,还是同事L帮我调试发现)

至于gzip格式解析 对着RFC写就可以了.

参见RFC 1950 关于zlib http://www.faqs.org/rfcs/rfc1950.html

      RFC 1951 关于deflate http://www.faqs.org/rfcs/rfc1951.html

      RFC 1952 关于gzip http://www.faqs.org/rfcs/rfc1952.html 


nt  CGzip::Ungzip(const std::string & inStr , std::string &outStr){
    static int nFileCount=0;
        nFileCount++;
    string strZipFileName="test";
//    CConvert::StrToFile(inStr,strZipFileName+CConvert::toString<int>(nFileCount)+"H.gzip"clip_image001;
    if(inStr.length()<11){
        return -1;   
    }
    //process gzip header
    unsigned int skipCt = 10;
    unsigned int skipZeroCt = 0;
    unsigned char ID1 = inStr[0];
    unsigned char ID2 = inStr[1];
    unsigned char XFL=inStr[8];
    bool bFEXTRA = false ;
    bool bFNAME = false ;
    bool bFCOMMENT = false ;
    bool bFHCRC = false ;
    unsigned int XLEN = 0;
   
    if( (ID1!=31) && (ID2!=139)){
        return -1;  //
gzip头部
        }
    unsigned char CM = inStr[2];
    if(CM!=clip_image002{
        return -1; //
现在都只处理 deflate压缩的
    }
    unsigned char FLG = inStr[3];
    if( (FLG & GZIP_HEAD_FEXTRA) != 0){
        bFEXTRA = true ;
        skipCt += 2;
        XLEN = inStr[10]+ inStr[11]*256 ;//
按照小端字节序列处理
        skipCt += XLEN;
    }
    if( (FLG & GZIP_HEAD_FNAME) != 0){
        bFNAME = true;
        skipZeroCt++;
    }
    if( (FLG & GZIP_HEAD_FCOMMENT) != 0){
        bFCOMMENT = true;
        skipZeroCt++;   
    }
   
    size_t passedZeroCt = 0;
   
    size_t iStep = skipCt ;
    for( size_t iStep =  skipCt ; iStep<inStr.length(); iStep++){
            if(passedZeroCt>=skipZeroCt){
                break;   
            }
            if(inStr[iStep]==''clip_image001{
                passedZeroCt++;   
            }
       
    }
    skipCt = iStep ;
    if( (FLG & GZIP_HEAD_FHCRC) != 0){
        bFHCRC = true;
        skipCt+=2 ;
    }
        string coreStr = inStr.substr(skipCt,inStr.length()-8-skipCt);
           return CGzip::Inflate(coreStr,outStr);
   
   
   
}

 

int  CGzip:clip_image003ogzip(const std::string & inStr , std::string &outStr){
    char pAddHead[10];
    unsigned long crc = 0;
    // gzip header
    static const char deflate_magic[2] = {'37', '/213'};
    snprintf(pAddHead, 10,
            "%c%c%c%c%c%c%c%c%c%c", deflate_magic[0],
            deflate_magic[1], Z_DEFLATED, 0 /* flags */,
            0, 0, 0, 0 /* 4 chars for mtime */,
            0 /* xflags */, 0xff);
    string addHead(pAddHead,10);
    //gzip's raw deflate body
    if(CGzip:clip_image003eflate(inStr,outStr)<0){
        return - 1;   
    }
    //gzip trailer
    crc = crc32(crc, (const Bytef*)inStr.data(), inStr.length());
    char  tailBuf[8];
    memcpy(tailBuf, &crc, 4);
    int isize=inStr.size();
    memcpy(tailBuf,&isize,4);
    string tailStr(tailBuf , 8 );
    outStr = addHead + outStr+tailStr;     //
    return outStr.length(); //

 

 

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <zlib.h>
/* Compress data */
int zcompress(Bytef *data, uLong ndata,
Bytef *zdata, uLong *nzdata)
{
z_stream c_stream;
int err = 0;

if(data && ndata > 0)
{
c_stream.zalloc = (alloc_func)0;
c_stream.zfree = (free_func)0;
c_stream.opaque = (voidpf)0;
if(deflateInit(&c_stream, Z_DEFAULT_COMPRESSION) != Z_OK) return -1;
c_stream.next_in = data;
c_stream.avail_in = ndata;
c_stream.next_out = zdata;
c_stream.avail_out = *nzdata;
while (c_stream.avail_in != 0 && c_stream.total_out < *nzdata)
{
if(deflate(&c_stream, Z_NO_FLUSH) != Z_OK) return -1;
}
if(c_stream.avail_in != 0) return c_stream.avail_in;
for (;;) {
if((err = deflate(&c_stream, Z_FINISH)) == Z_STREAM_END) break;
if(err != Z_OK) return -1;
}
if(deflateEnd(&c_stream) != Z_OK) return -1;
*nzdata = c_stream.total_out;
return 0;
}
return -1;
}

/* Compress gzip data */
int gzcompress(Bytef *data, uLong ndata,
Bytef *zdata, uLong *nzdata)
{
z_stream c_stream;
int err = 0;

if(data && ndata > 0)
{
c_stream.zalloc = (alloc_func)0;
c_stream.zfree = (free_func)0;
c_stream.opaque = (voidpf)0;
if(deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
-MAX_WBITS, 8, Z_DEFAULT_STRATEGY) != Z_OK) return -1;
c_stream.next_in = data;
c_stream.avail_in = ndata;
c_stream.next_out = zdata;
c_stream.avail_out = *nzdata;
while (c_stream.avail_in != 0 && c_stream.total_out < *nzdata)
{
if(deflate(&c_stream, Z_NO_FLUSH) != Z_OK) return -1;
}
if(c_stream.avail_in != 0) return c_stream.avail_in;
for (;;) {
if((err = deflate(&c_stream, Z_FINISH)) == Z_STREAM_END) break;
if(err != Z_OK) return -1;
}
if(deflateEnd(&c_stream) != Z_OK) return -1;
*nzdata = c_stream.total_out;
return 0;
}
return -1;
}

/* Uncompress data */
int zdecompress(Byte *zdata, uLong nzdata,
Byte *data, uLong *ndata)
{
int err = 0;
z_stream d_stream; /* decompression stream */

d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = zdata;
d_stream.avail_in = 0;
d_stream.next_out = data;
if(inflateInit(&d_stream) != Z_OK) return -1;
while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) {
d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
if(err != Z_OK) return -1;
}
if(inflateEnd(&d_stream) != Z_OK) return -1;
*ndata = d_stream.total_out;
return 0;
}

/* HTTP gzip decompress */
int httpgzdecompress(Byte *zdata, uLong nzdata,
Byte *data, uLong *ndata)
{
int err = 0;
z_stream d_stream = {0}; /* decompression stream */
static char dummy_head[2] =
{
0x8 + 0x7 * 0x10,
(((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF,
};
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = zdata;
d_stream.avail_in = 0;
d_stream.next_out = data;
if(inflateInit2(&d_stream, 47) != Z_OK) return -1;
while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) {
d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
if(err != Z_OK )
{
if(err == Z_DATA_ERROR)
{
d_stream.next_in = (Bytef*) dummy_head;
d_stream.avail_in = sizeof(dummy_head);
if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK)
{
return -1;
}
}
else return -1;
}
}
if(inflateEnd(&d_stream) != Z_OK) return -1;
*ndata = d_stream.total_out;
return 0;
}

/* Uncompress gzip data */
int gzdecompress(Byte *zdata, uLong nzdata,
Byte *data, uLong *ndata)
{
int err = 0;
z_stream d_stream = {0}; /* decompression stream */
static char dummy_head[2] =
{
0x8 + 0x7 * 0x10,
(((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF,
};
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = zdata;
d_stream.avail_in = 0;
d_stream.next_out = data;
if(inflateInit2(&d_stream, -MAX_WBITS) != Z_OK) return -1;
//if(inflateInit2(&d_stream, 47) != Z_OK) return -1;
while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) {
d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
if(err != Z_OK )
{
if(err == Z_DATA_ERROR)
{
d_stream.next_in = (Bytef*) dummy_head;
d_stream.avail_in = sizeof(dummy_head);
if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK)
{
return -1;
}
}
else return -1;
}
}
if(inflateEnd(&d_stream) != Z_OK) return -1;
*ndata = d_stream.total_out;
return 0;
}

#ifdef _DEBUG_ZSTREAM
#define BUF_SIZE 65535
int main()
{
char *data = "kjdalkfjdflkjdlkfjdklfjdlkfjlkdjflkdjflddajfkdjfkdfaskf;ldsfk;ldakf;ldskfl;dskf;ld";
uLong ndata = strlen(data);
Bytef zdata[BUF_SIZE];
uLong nzdata = BUF_SIZE;
Bytef odata[BUF_SIZE];
uLong nodata = BUF_SIZE;

memset(zdata, 0, BUF_SIZE);
//if(zcompress((Bytef *)data, ndata, zdata, &nzdata) == 0)
if(gzcompress((Bytef *)data, ndata, zdata, &nzdata) == 0)
{
fprintf(stdout, "nzdata:%d %s/n", nzdata, zdata);
memset(odata, 0, BUF_SIZE);
//if(zdecompress(zdata, ndata, odata, &nodata) == 0)
if(gzdecompress(zdata, ndata, odata, &nodata) == 0)
{
fprintf(stdout, "%d %s/n", nodata, odata);
}
}
}

/*

获取gzip文件解压后文件大小


*/
#include 
<stdio.h>
int main()
{
    FILE 
* pFile = NULL;
    
int nRes = 0;
    
int nUnCompressSize = 0;
    pFile 
= fopen("test.gz","r");
    
if (!pFile )
        
goto Exit0;
    nRes 
= fseek(pFile, -4, SEEK_END);
    
if (nRes)
        
goto Exit0;
    fread(
&nUnCompressSize,sizeof(int),4,pFile);
    printf(
"uncompress file size %d /n",nUnCompressSize);
Exit0:
    
if(pFile)
    {
       fclose(pFile);
    }
    
return 0;
}

#endif
原创粉丝点击