算术编码(Arithmetic Coding)源代码

来源:互联网 发布:lol服务器ip 端口 编辑:程序博客网 时间:2024/05/16 08:28

 Ian H. Witten、Radford M. Neal和John G. Cleary在1987年发表了一篇启发性的论文。论文中描述了一种基于整数运算的通用算术编码器,而且还给出了由计算错误导致的效率低下的分析。以下源代码来自于这篇论文:《基于算术编码的数据压缩》(Arithmetic Coding For Data Compression)。该论文的下载地址:http://www.sachingarg.com/compression/entropy_coding/acm87_arithmetic_coding.pdf

  编码函数和解码函数的伪代码:

/* ARITHMETIC ENCODING ALGORITHM. */

/* Call encode_symbol repeatedly for each symbol in the message.           */
/* Ensure that a distinguished "terminator" symbol is encoded last, then   */
/* transmit any value in the range [low, high).                            */

encode_symbol(symbo1, cum_freq)
    range = high - low
    high = low + range*cum_freq[symbol-1]
    low   = low + range*cum-freq[symbol]


/* ARITHMETIC DECODING ALGORITHM. */

/* "Value" is the number that has been received.                           */
/* Continue calling decode-symbol until the terminator symbol is returned. */

decode_symbol(cum_freq)
    find symbol such that
        cum_freq[symbol] <= (value-low)/(high-low) < cum_freq[symbol-1]
                            /* This ensures that value lies within the new */
                            /* [low, high) range that will be calculated by */
                            /* the following lines of code.                 */

    range = high - low
    high = low + range*cum_freq[symbol-1]
    low   = low + range*cum_freq[symbol]
    return symbol

  算术编码器和解码器的C语言实现:

arithmetic_coding.h
─────────────────────────────────────────
/* DECLARATIONS USED FOR ARITHMETIC ENCODING AND DECODING */


/* SIZE OF ARITHMETIC CODE VALUES. */

#define Code_value_bits 16              /* Number of bits in a code value   */
typedef long code_value;                /* Type of an arithmetic code value */

#define Top_value (((long)1<<Code_value_bits)-1)      /* Largest code value */


/* HALF AND QUARTER POINTS IN THE CODE VALUE RANGE. */

#define First_qtr (Top_value/4+1)       /* Point after first quarter        */
#define Half      (2*First_qtr)         /* Point after first half           */
#define Third_qtr (3*First_qtr)         /* Point after third quarter        */
─────────────────────────────────────────

model.h
─────────────────────────────────────────
/* INTERFACE TO THE MODEL. */

/* THE SET OF SYMBOLS THAT MAY BE ENCODED. */

#define No_of_chars 256                 /* Number of character symbols      */
#define EOF_symbol (No_of_chars+1)      /* Index of EOF symbol              */

#define No_of_symbols (No_of_chars+1)   /* Total number of symbols          */

/* TRANSLATION TABLES BETWEEN CHARACTERS AND SYMBOL INDEXES. */

int char_to_index[No_of_chars];         /* To index from character          */
unsigned char index_to_char[No_of_symbols+1]; /* To character from index    */

/* CUMULATIVE FREQUENCY TABLE. */

#define Max_frequency 16383             /* Maximum allowed frequency count */
                                        /*   2^14 - 1                       */
int cum_freq[No_of_symbols+1];          /* Cumulative symbol frequencies    */
─────────────────────────────────────────

encode.c
─────────────────────────────────────────
/* MAIN PROGRAM FOR ENCODING. */

#include <stdio.h>
#include "model.h"

main()
{   start_model();                             /* Set up other modules.     */
    start_outputing_bits();
    start_encoding();
    for (;;) {                                 /* Loop through characters. */
        int ch; int symbol;
        ch = getc(stdin);                      /* Read the next character. */
        if (ch==EOF) break;                    /* Exit loop on end-of-file. */
        symbol = char_to_index[ch];            /* Translate to an index.    */
        encode_symbol(symbol,cum_freq);        /* Encode that symbol.       */
        update_model(symbol);                  /* Update the model.         */
    }
    encode_symbol(EOF_symbol,cum_freq);        /* Encode the EOF symbol.    */
    done_encoding();                           /* Send the last few bits.   */
    done_outputing_bits();
    exit(0);
}
─────────────────────────────────────────

arithmetic_encode.c
─────────────────────────────────────────
/* ARITHMETIC ENCODING ALGORITHM. */

#include "arithmetic_coding.h"

static void bit_plus_follow();   /* Routine that follows                    */

/* CURRENT STATE OF THE ENCODING. */

static code_value low, high;    /* Ends of the current code region          */
static long bits_to_follow;     /* Number of opposite bits to output after */
                                /* the next bit.                            */

/* START ENCODING A STREAM OF SYMBOLS. */

start_encoding()
{   low = 0;                            /* Full code range.                 */
    high = Top_value;
    bits_to_follow = 0;                 /* No bits to follow next.          */
}

/* ENCODE A SYMBOL. */

encode_symbol(symbol,cum_freq)
    int symbol;                 /* Symbol to encode                         */
    int cum_freq[];             /* Cumulative symbol frequencies            */
{   long range;                 /* Size of the current code region          */
    range = (long)(high-low)+1;
    high = low +                                /* Narrow the code region   */
      (range*cum_freq[symbol-1])/cum_freq[0]-1; /* to that allotted to this */
    low = low +                                 /* symbol.                  */
      (range*cum_freq[symbol])/cum_freq[0];
    for (;;) {                                  /* Loop to output bits.     */
        if (high<Half) {
            bit_plus_follow(0);                 /* Output 0 if in low half. */
        }
        else if (low>=Half) {                   /* Output 1 if in high half.*/
            bit_plus_follow(1);
            low -= Half;
            high -= Half;                       /* Subtract offset to top. */
        }
        else if (low>=First_qtr                 /* Output an opposite bit   */
              && high<Third_qtr) {              /* later if in middle half. */
            bits_to_follow += 1;
            low -= First_qtr;                   /* Subtract offset to middle*/
            high -= First_qtr;
        }
        else break;                             /* Otherwise exit loop.     */
        low = 2*low;
        high = 2*high+1;                        /* Scale up code range.     */
    }
}

/* FINISH ENCODING THE STREAM. */

done_encoding()
{   bits_to_follow += 1;                       /* Output two bits that      */
    if (low<First_qtr) bit_plus_follow(0);     /* select the quarter that   */
    else bit_plus_follow(1);                   /* the current code range    */
}                                              /* contains.                 */

/* OUTPUT BITS PLUS FOLLOWING OPPOSITE BITS. */

static void bit_plus_follow(bit)
    int bit;
{   output_bit(bit);                           /* Output the bit.           */
    while (bits_to_follow>0) {
        output_bit(!bit);                      /* Output bits_to_follow     */
        bits_to_follow -= 1;                   /* opposite bits. Set        */
    }                                          /* bits_to_follow to zero.   */
}
─────────────────────────────────────────

decode.c
─────────────────────────────────────────
/* MAIN PROGPAM FOR DECODING. */

#include <stdio.h>
#include "model.h"

main ()
{   start_model();                              /* Set up other modules.    */
    start_inputing_bits();
    start_decoding();
    for (;;) {                                  /* Loop through characters. */
        int ch; int symbol;
        symbol = decode_symbol(cum_freq);       /* Decode next symbol.      */
        if (symbol==EOF_symbol) break;          /* Exit loop if EOF symbol. */
        ch = index_to_char[symbol];             /* Translate to a character.*/
        putc(ch,stdout);                        /* Write that character.    */
        update_model(symbol);                   /* Update the model.        */
    }
    exit(0);
}
─────────────────────────────────────────

arithmetic_decode.c
─────────────────────────────────────────
/* ARITHMETIC DECODING ALGORITHM. */

#include "arithmetic_coding.h"

/* CURRENT STATE OF THE DECODING. */

static code_value value;        /* Currently-seen code value                */
static code_value low, high;    /* Ends of current code repion              */

/* START DECODING A STREAM OF SYMBOLS. */

start_decoding()
{   int i;
    value = 0;                                  /* Input bits to fill the   */
    for (i = 1; i<=Code_value_bits; i++) {      /* code value.              */
        value = 2*value+input_bit();
    }
    low = 0;                                    /* Full code range.         */
    high = Top_value;
}

/* DECODE THE NEXT SYMBOL. */

int decode_symbol(cum_freq)
    int cum_freq[];             /* Cumulative symbol frequencies            */
{   long range;                 /* Size of current code region              */
    int cum;                    /* Cumulative frequency calculated          */
    int symbol;                 /* Symbol decoded */
    range = (long)(high-low)+1;
    cum =                                       /* Find cum freq for value. */
      (((long)(value-low)+1)*cum_freq[0]-1)/range;
    for (symbol = 1; cum_freq[symbol]>cum; symbol++) ; /* Then find symbol. */
    high = low +                                /* Narrow the code region   */
      (range*cum_freq[symbol-1])/cum_freq[0]-1; /* to that allotted to this */
    low = low +                                 /* symbol.                  */
      (range*cum_freq[symbol])/cum_freq[0];
    for (;;) {                                  /* Loop to get rid of bits. */
        if (high<Half) {
            /* nothing */                       /* Expand low half.         */
        }
        else if (low>=Half) {                   /* Expand high half.        */
            value -= Half;
            low -= Half;                        /* Subtract offset to top. */
            high -= Half;
        }
        else if (low>=First_qtr                 /* Expand middle half.      */
              && high<Third_qtr) {
            value -= First_qtr;
            low -= First_qtr;                   /* Subtract offset to middle*/
            high -= First_qtr;
        }
        else break;                             /* Otherwise exit loop.     */
        low = 2*low;
        high = 2*high+1;                        /* Scale up code range.     */
        value = 2*value+input_bit();            /* Move in next input blt. */
    }
    return symbol;
}

bit_input.c
─────────────────────────────────────────
/* BIT INPUT ROUTINES. */

#include <stdio.h>
#include "arithmetic_coding.h"

/* THE BIT BUFFER. */

static int buffer;              /* Bits waiting to be input                 */
static int bits_to_go;          /* Number of bits still in buffer           */
static int garbage_bits;        /* Number of bits past end-of-file          */

/* INITIALIZE BIT INPUT. */

start_inputing_bits()
{   bits_to_go = 0;                             /* Buffer starts out with   */
    garbage_bits = 0;                           /* no bits in it.           */
}

/* INPUT A BIT. */

int input_bit()
{   int t;
    if (bits_to_go==0) {                        /* Read the next byte if no */
        buffer = getc(stdin);                   /* bits are left in buffer. */
        if (buffer==EOF) {
            garbage_bits += 1;                      /* Return arbitrary bits*/
            if (garbage_bits>Code_value_bits-2) {   /* after eof, but check */
                fprintf(stderr,"Bad input file/n"); /* for too many such.   */
                exit(-1);
            }
        }
        bits_to_go = 8;
    }
    t = buffer&1;                               /* Return the next bit from */
    buffer >>= 1;                               /* the bottom of the byte. */
    bits_to_go -= 1;
    return t;
}
─────────────────────────────────────────

bit_output.c
─────────────────────────────────────────
/* BIT OUTPUT ROUTINES. */

#include <stdio.h>

/* THE BIT BUFFER. */

static int buffer;               /* Bits buffered for output                */
static int bits_to_go;           /* Number of bits free in buffer           */

/* INITIALIZE FOR BIT OUTPUT. */

start_outputing_bits()
{   buffer = 0;                                 /* Buffer is empty to start */
    bits_to_go = 8;                             /* with.                    */
}

/* OUTPUT A BIT. */

output_bit(bit)
    int bit;
{   buffer >>= 1;                               /* Put bit in top of buffer.*/
    if (bit) buffer |= 0x80;
    bits_to_go -= 1;
    if (bits_to_go==0) {                        /* Output buffer if it is   */
        putc(buffer,stdout);                    /* now full.                */
        bits_to_go = 8;
    }
}

/* FLUSH OUT THE LAST BITS. */

done_outputing_bits()
{   putc(buffer>>bits_to_go,stdout);
}
─────────────────────────────────────────

  静态模型和自适应模型的程序调用:

fixed_model.c
─────────────────────────────────────────
/* THE FIXED SOURCE MODEL */

#include "model.h"

int freq[No_of_symbols+1] = (
   0,
   1,   1,   1,   1,   1,   1,   1,   1,   1,   1, 124,   1,   1,   1,   1,   1,
   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,

/*      !    "    #    $    %    &    '    (    )    *    +    ,    -    .    / */
1236,   1, 21,   9,   3,   1, 25, 15,   2,   2,   2,   1, 79, 19, 60,   1,

/* 0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ? */
15, 15,   8,   5,   4,   7,   5,   4,   4,   6,   3,   2,   1,   1,   1,   1,

/* @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O */
   1, 24, 15, 22, 12, 15, 10,   9, 16, 16,   8,   6, 12, 23, 13, 11,

/* P    Q    R    S    T    U    V    W    X    Y    Z    [    /    ]    ^    _ */
14,   1, 14, 28, 29,   6,   3, 11,   1,   3,   1,   1,   1,   1,   1,   3,

/* '    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o */
   1, 491, 85, 173, 232, 744, 127, 110, 293, 418,   6, 39, 250, 139, 429, 446,

/* p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~      */
111,   5, 388, 375, 531, 152, 57, 97, 12, 101,   5,   2,   1,   2,   3,   1,

   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
   1
};

/* INITIALIZE THE MODEL. */

start_model()
{   int 1;
    for (i = 0; i<No_of_chars; i++) {           /* Set up tables that       */
        char_to_index[i] = i+1;                 /* translate between symbol */
        index_to_char[i+1] = i;                 /* indexes and characters. */
    }
    cum_freq[No_of_symbols] = 0;
    for (i = No_of_symbols; i>0; i--) {        /* Set up cumulative        */
        cum_freq[i-1] = cum_freq[i] + freq[i]; /* frequency counts.        */
    }
    if (cum_freq[0] > Max_frequency) abort();   /* Check counts within limit*/
}

/* UPDATE THE MODEL TO ACCOUNT FOR A NEW SYMBOL. */

update_model(symbol)
    int symbol;
{                                               /* Do nothing. */
}
─────────────────────────────────────────

adaptive_model.c
─────────────────────────────────────────
/* THE ADAPTIVE SOURCE MODEL */

#include "modol.h"

int freq[No_of_symbols+1];       /* symbol frequencies                      */

/* INITIALIZE THE MODEL. */

start_model()
{   int i;
    for (i = 0; i<No_of_chars; i++) {           /* Set up tables that       */
        char_to_index[i] = i+1;                 /* translate between symbol */
        index_to_char[i+1] = i;                 /* indexes and characters. */
    }
    for (i = 0; i<=No_of_symbols; i++) {        /* Set up initial frequency */
        freq[i] = 1;                            /* counts to be one for all */
        cum_freq[i] = No_of_symbols-i;          /* symbols.                 */
    }
    freq[0] = 0;                                /* Freq[0] must not be the */
}                                               /* same as freq[1].         */

/* UPDATE THE MODEL TO ACCOUNT FOR A NEW SYMBOL. */

update_model(symbol)
    int symbol;                 /* Index of new symbol                      */
{   int i;                      /* New index for symbol                     */
    if (cum_freq[0]==Max_frequency) {           /* See if frequency counts */
        int cum;                                /* are at their maximum.    */
        cum = 0;
        for (i = No_of_symbols; i>=0; i--) {    /* If so, halve all the     */
            freq[i] = (freq[i]+1)/2;            /* counts (keeping them     */
            cum_freq[i] = cum;                  /* non-zero).               */
            cum += freq[i];
        }
    }
    for (i = symbol; freq[i]==freq[i-1]; i--) ; /* Find symbol's new index. */
    if (i<symbol) {
        int ch_i, ch_symbol;
        ch_i = index_to_char[i];                /* Update the translation   */
        ch_symbol = index_to_char[symbol];      /* tables if the symbol has */
        index_to_char[i] = ch_symbol;           /* moved.                   */
        index_to_char[symbol] = ch_i;
        char_to_index[ch_i] = symbol;
        char_to_index[ch_symbol] = i;
    }
    freq[i] += 1;
    while (i>0) {                               /* Increment the frequency */
        i -= 1;                                 /* count for the symbol and */
        cum_freq[i] += 1;                       /* update the cumulative    */
    }                                           /* frequencies.             */
}

 

 

 

 

 

 

 

原创粉丝点击