一篇逆向压缩的算法

来源：互联网发布：2013年最火的网络歌曲编辑：程序博客网时间：2024/04/30 20:45

代码出自pediy. LTT的题目,Aker解决.

现在将代码帖出来,不过我还没有太看明白,我会继续跟踪学习的

#include <Windows.h>
#include <stdio.h>

class CBitEncoder
{
    PBYTE m_pbuf;
    UINT m_len;

    UINT Prob;
    UINT _cacheSize;
    BYTE _cache;

    UINT64 Low;
    UINT uiSom;

    void WriteByte(BYTE b) {    m_pbuf[m_len++] = b; }
    void Init()
    {
        Prob = (1 << 10);
        Low = 0;
        uiSom = 0xFFFFFFFF;
        _cacheSize = 0;
        _cache = 0;
    }
    void ShiftLow()
    {
        if(_cacheSize)
        {
            if(_cache == 0xff && (Low>>32 & 0x000000ff) )
            {
                int i;
                for (i=1; m_pbuf[m_len-i]==0xff&&(m_len-i)>=0;i++)
                    m_pbuf[m_len-i]=0;
                if((m_len-i)>=0)m_pbuf[m_len-i]++;
                else
                {
                    for(i=m_len++; i>=0; i--)
                        m_pbuf[i]=m_pbuf[i-1];
                    m_pbuf[0]=1;
                }
            }
            WriteByte(((Low>>32 & 0x000000ff) + _cache));
            _cacheSize--;
        }
        _cache = ((Low >>24) & 0x000000ff);
        _cacheSize++;
        Low <<= 8;
        Low &= 0xffffffff;
    }
public:
    void SetOutputBufPtr(PBYTE buf)
    {
        Init();
        m_pbuf = buf;
        m_len = 0;
    }
    int GetLength() {    return m_len; }

    void FlushData()
    {
        for(int i = 0; i<_cacheSize+sizeof(DWORD)/sizeof(BYTE);i++)
            ShiftLow();
    }

    void bEncode(UINT symbol)
    {
        UINT u = (this->uiSom >> 11) * this->Prob;     // 变小,大概1/2左右，看Prob是大于0x400还是小于0x400

        if(symbol == 0)
        {// uiAny始终比uiSom小
            this->uiSom = u;
            this->Prob += ((1 << 11) - this->Prob) >> 5;
        }
        else// uiAny >= u
        {
            //uiAny -= u;
            this->Low += u;
            this->uiSom -= u;
            this->Prob -= (this->Prob) >> 5;// prob 变小，减少1/32，如果一直输入一，则prob会变得很小-0x1f
        }

        if(this->uiSom < (1 << 24))
        {
            //如果最高位8位全0,写入一个字节,同时左移8位
            this->ShiftLow();
            this->uiSom <<= 8;
        }
    }
};
//---------------以上是 Encoder 以下是 Decoder

class CBitDecoder
{
    PBYTE m_psrc;
    int m_srclen;

    UINT uiSom;
    UINT uiAny;
    UINT Prob;

    BYTE ReadByte()
    {
        m_srclen--;
        return *m_psrc++;
    }
    void Init()
    {
        Prob = (1 << 10);
        uiAny = 0;
        uiSom = 0xFFFFFFFF;
        for(int i = 0; i < 4; i++)
            uiAny = (uiAny << 8) | this->ReadByte();
    }
public:
    void SetSrcBuf(PBYTE psrc, int srclen)
    {
        m_psrc = psrc;
        m_srclen = srclen;
        Init();
    }
    int GetRemainByte() {    return m_srclen; }

    bool bDecode()
    {
        bool b;
        UINT u = (this->uiSom >> 11) * this->Prob;              // 初始 0x0001ffff * 2^10
        if (this->uiAny < u)
        {
            this->uiSom = u;                                    // 通过Prob和uiSom修改u的值
            this->Prob += ((1 << 11) - this->Prob) >> 5;
            b = false;                                          // 假设是 1011 那么就是 t f t t
        }
        else
        {
            this->uiSom -= u;                                   // 通过Prob和uiSom修改u的值
            this->uiAny -= u;
            this->Prob -= (this->Prob) >> 5;
            b = true;                                           // b是一个明文真正的代码
        }
        if (this->uiSom < (1 << 24))                            // 越少调用他的压缩率越高
        {
            this->uiAny = (this->uiAny << 8) | this->ReadByte();
            this->uiSom <<= 8;
        }
        return b;
    }
};

int Test_Encode(PBYTE psrc, int srclen, PBYTE buf)
{
    CBitEncoder bitEncoder;
    bitEncoder.SetOutputBufPtr(buf);

    for (int i=0;i<srclen;i++)
    {
        BYTE b = psrc[i];
        for (int j=0;j<8;j++)
        {
            bitEncoder.bEncode((b>>j) & 1);
        }
    }
    bitEncoder.FlushData();

    return bitEncoder.GetLength();
}

int Test_Decode(PBYTE psrc, int srclen, PBYTE buf)
{
    CBitDecoder bitDecoder;
    bitDecoder.SetSrcBuf(psrc, srclen);

    int totallen = 0;
    for (;;)
    {
        BYTE b = 0;
        for (int i=0;i<8;i++)
        {
            b += (bitDecoder.bDecode() << i);
            if (bitDecoder.GetRemainByte() < 0)
                return totallen;
        }
        *buf++ = b;
        totallen++;
    }
    return totallen;
}

void main()
{
    char* psrc = "exam by LiuTaoTao 20070728";
    int srclen = strlen(psrc);
    BYTE buf[3000];
    int len = Test_Encode((PBYTE)psrc, srclen, buf);
    printf("Srclen = %d Encode len = %d /n", srclen, len);

    BYTE outbuf[3000];
    int len2 = Test_Decode(buf, len, outbuf);
    printf("Decode len from %d to %d /n", len, len2);
    if (len2 >= srclen && !memcmp(psrc, outbuf, srclen))
        printf("OK/n");
    else
        printf("Error/n");
}
// 这种压缩算法，代码简单，速度快．最差的情况，是增长4个字节
// 还有一个缺点,就是不能很有效地判断是否已经解码结束,有时候会多解几个字节出来