C语言实现RLE（游程编码）压缩算法（粘贴就能用）

来源：互联网发布：js监听scroll事件编辑：程序博客网时间：2024/05/16 06:51

RLE算法是最简单的压缩算法，作为学生党做作业不可避免的要去网上找RLE算法的代码，然而网上所有RLE压缩算法的代码都不好使，笔者在网上代码的基础上略加修改，使之粘贴即可使用。

源码来自：http://blog.csdn.net/calcular/article/details/46804919
算法思想来自：http://blog.csdn.net/orbit/article/details/7062218
感谢原作者。

原代码的问题在于，当一个字符超过127次重复之后，编码会出现错误。原作者可能并没有用足够的样本去测试，导致这个bug的出现。

废话不多说，先上代码。

bool IsRepeat3(unsigned char *in, int rest){    if (rest<2) return false;    else {        if (*in == *(in + 1) && *in == *(in + 2)) return true;        else return false;    }}int GetNoRepeat3(unsigned char *in, int rest){    if (rest <= 2)        return rest + 1;    else {        int c = 0,            restc = rest;        unsigned char *g = in;        while (!IsRepeat3(g, restc))        {            g++;            restc--;            c++;            if (c >= 128)                return c;            if (restc == 0)                return c + 1;        }        return c;    }}int Rle_Encode(unsigned char *inbuf, int insize, unsigned char *outbuf1, int outsize){    unsigned char *src = inbuf;    unsigned char *outbuf = outbuf1;    int rest = insize - 1;    int outrest = outsize;    int count = -1;    int flag = 0;    while (rest >= 0)    {        flag = 0;        count = -1;        if (IsRepeat3(src, rest))        {            while (rest >= 0)            {                if (count == 127) break;                if (*src == *(src + 1)) {                    rest--;                    count++;                    src++;                }                else {                    count++;                    if (count == 127) {                        flag = 1;                    }                    break;                }            }            if (outrest<2)                return -1;            *outbuf = count | 128;            outbuf++;            *outbuf = *src;            outbuf++;            outrest -= 2;            if (count != 127||flag==1) {                src++;                rest--;            }        }        else        {            if (IsRepeat3(src, rest))                continue;            int num = GetNoRepeat3(src, rest);            int i;            if (outrest<(num + 1))                return -1;            *outbuf = num-1;            outbuf++;            for (i = 0; i<num; i++) {                *outbuf = *(src + i);                outbuf++;            }            src += num;            rest -= num;            outrest -= num + 1;        }    }    return outsize - outrest;}int Rle_Decode(unsigned char *inbuf, int insize, unsigned char *outbuf, int outsize){    int inrest = insize;    int outrest = outsize;    int i;    unsigned char *in = inbuf;    unsigned char *out = outbuf;    int  ns;    unsigned char tmp;    while (inrest >= 0)    {        ns = *in+1;        if (ns>129) {            if ((outrest - ns + 128)<0)                return -1;            tmp = *(in + 1);            for (i = 0; i<ns - 128; i++) {                *out = tmp;                out++;            }            in += 2;            inrest -= 2;            outrest -= ns - 128;        }        else {            if ((outrest - ns)<0)                return -1;            in++;            for (i = 0; i<ns; i++) {                *out = *in;                out++;                in++;            }            inrest -= 1 + ns;            outrest -= ns;        }    }    return outsize - outrest;}

简单说下算法的原理。普通的RLE是把aabbccdd变成2a2b2c2d，这样当压缩abcd这样的数据时，不但压缩不了，还会产生冗余，所以我们需要高级点的RLE来解决这个问题。

我们知道，RLE编码是由一个个length-code字节对组成的，我们这个算法，将length字节的最高位置为标志位，如标志位为1，说明后面的一个byte连续重复X次（aaaaa这样），反之标志位为0，说明后面的X个bytes中，相邻byte互不相同（abcd这种）。标志位的后7位，表示一个X-1的整数（显然，X最大为128），当标志位为1时，X的意义和普通的RLE是一样的，当标志位为0时，X表示接下来X个bytes中相邻byte互不相同。

举例：aaaabcdefg
首先是四个连续的a，所以X=4，X-1=3，标志位为1，故length字节应为1000 0011=0x83，code字节为a，至此我们有0x83 a。
然后是6个相邻byte互不相同的字节，所以X=6，X-1=5，标志位为0，length字节为0x05，后面跟着6个bytes，bcdefg。至此，编码结束，结果为0x83 a 0x05 b c d e f g

最坏的情况，此算法会带来1/128的冗余，即使如此，比普通RLE 100%的冗余还是好上不少的。

阅读全文

0 0