赫夫曼编码详解

来源:互联网 发布:小说程序源码 编辑:程序博客网 时间:2024/06/06 04:37

赫夫曼编码的原理:

用bit存储信息,一般是使用固定长度的bit来表示每一个信息单元,比如ascii用8bit来表示一个字符

为了压缩存储空间,可以使用更少的bit来存储信息

如果文本文件全文已知(不能是流文件),则可以根据每个字符出现的频率,临时编码

出现频率较高的字符编码较短,出现频率较低的字符编码较长,这样可以压缩文本文件的大小

由于字符编码长度不一,为了区分开各个编码 任何一个字符的编码不能为其他字符编码的前缀

所以通过赫夫曼树来构造编码,赫夫曼树是一个带权路径和最小二叉树。

其特性保证了每个字符不但

以下分三个文件实现赫夫曼编码,其中包括一个栈。 另外通过一个文本文件来提供输入




</pre><pre name="code" class="cpp">
</pre><pre name="code" class="cpp">/**************************** stack.c ***********************************************/#include <stdio.h>#define STACK_MAX 100int stack[STACK_MAX + 1]; // 0 is not usedint top = 0, bottom = 0;void push(int a){if (top >= STACK_MAX) {   printf("stack full!\n");   return;}stack[++top] = a;}void pop(int *a){if (top == bottom) {   printf("stack empty\n");   return;}if (a == NULL) top--;else *a = stack[top--];}void print_stack(){int i;for (i=bottom+1; i<=top; i++) {   printf("%d ", stack[i]);}printf("\n");}/******************************** stack.c ***********************************************//*********************** stack.h ***************************/void push(int);void pop(int *);void print_stack()/**************** stack.h ********************************//**************************** haffman.c ************************************/#include <stdlib.h>#include <stdio.h>#include "stack.h"#define HEAP_MAX 100/* save the waight(how many in the txt) of every charactor */char ascii[128] = {0};/* node of heap array*/typedef struct _node{char ch;int waight;struct _node *lchild;struct _node *rchild;}HaffmanNode;/* haffman heap contains a pointer array and a size*/typedef struct {HaffmanNode *heap[HEAP_MAX+1];// 0 is not usedint current_size;}HaffmanHeap;/* insert a node to the heap, including searching the position the new*//* node should be put from bottom to top , adding the heap size*//* the input parameter new_node should not be null, and it should not*//* be freed after this function */int haffman_heap_insert(HaffmanHeap *haffman_heap, HaffmanNode* new_node){int i, ci;if (new_node == NULL) {   printf("new node is NULL\n");   return -1;}/* at first the new node is supposed to be located in the end of the array*//* then it should climb from the bottom, by comparing to the waight of it's*//* temp parents*/i = ++(haffman_heap->current_size);ci = i/2;/* if the waight of its parents is larger, the parents should go down*//* to the lower level, then loop */while (ci != 0 && haffman_heap->heap[ci]->waight > new_node->waight) {    /* move the node in ci to the lower level: i*/    haffman_heap->heap[i] = haffman_heap->heap[ci];    i = ci;    ci /= 2;}/* finally , the new node should be located at i*/haffman_heap->heap[i] = new_node;return 0;}/* get the smallest node in the heap and output by parameter get_node *//* the most smallest node is always the root of the heap. after remove*//* the root, the heap should be re-adjust to a heap*/int haffman_heap_get(HaffmanHeap *haffman_heap, HaffmanNode **get_node){HaffmanNode *y;int i, ci;if (haffman_heap->current_size == 0) {   printf("heap empty!\n");   return -1;}/* output the root of heap*/*get_node = haffman_heap->heap[1];/* adjust the heap by relocated the node in the end of the array*/y = haffman_heap->heap[haffman_heap->current_size--];/* firstly, supposed the the node should be locate at the root*/i = 1;ci = 2;/* if the supposed position has child(ren). a comparison should be*//* done between its waight and the waight of the child, or the*//* smaller one of the child(ren) */while(ci <= haffman_heap->current_size) {   if (ci < haffman_heap->current_size &&     haffman_heap->heap[ci+1]->waight < haffman_heap->heap[ci]->waight)     ci++;   /* if the waight of its child(ren) is bigger than its, the location */   /* of the node is found*/   if(haffman_heap->heap[ci]->waight > y->waight) break;   /* else locate its child in the position*/   haffman_heap->heap[i] = haffman_heap->heap[ci];   /* go down and loop*/   i = ci;   ci *= 2;}/* finally, locate the node which was at the end of the array in the new*//* location to make the heap*/haffman_heap->heap[i] = y;return 0;}/* output the code of every character in the tree*/void output_haffman_tree(HaffmanNode *root){if (root) {   /* if the node is a leaf, output the character and print the stack*/   /* from the bottom to the top. i.e. first in first out*/   if (root->lchild == NULL && root->rchild == NULL) {    printf("%c: ", root->ch);    print_stack();   }   /* before turn to the lchild, push a 0*/   push(0);   output_haffman_tree(root->lchild);   /* before turn to the rchild, push a 1*/   push(1);   output_haffman_tree(root->rchild);}/* after the travelling of every node, pop the 0/1 of this node*/pop(NULL);}/* count the times of every character int the filename*//* and save them in the globle varible ascii[] */int get_waight(char *filename){FILE *fp = NULL;char ch;fp = fopen(filename,"r");if (fp == NULL) {   printf("open file error!\n");   return -1;}while ((ch = fgetc(fp)) != EOF) ascii[ch]++;return 0;}int main(int argc, char *args[]){int i;int j;HaffmanHeap haffman_heap;HaffmanNode *new_node;HaffmanNode *get_node1 = NULL;HaffmanNode *get_node2 = NULL;if (argc < 2) {   printf("usage: %s file\n", args[0]);   return 0;}if (get_waight(args[1])) {   printf("open file error1\n");   return 0;}haffman_heap.current_size = 0;/* insert every character in the text and constrct a heap*/for (i=0; i<128; i++) {   if (ascii[i] != 0) {    new_node = (HaffmanNode *)malloc(sizeof(HaffmanNode));    if (new_node == NULL) {printf("error\n"); return -1;};    new_node->waight = ascii[i];    new_node->ch = i;    new_node->lchild = NULL;    new_node->rchild = NULL;    if (haffman_heap_insert(&haffman_heap, new_node)) {     printf("insert error\n");     return -1;    }    new_node = NULL;   }}/* get the two smallest node, then construct a new one whose waight*//* is the sumary of the two, then insert it to the heap*/while (haffman_heap.current_size > 1) {   new_node = (HaffmanNode *)malloc(sizeof(HaffmanNode));   haffman_heap_get(&haffman_heap, &get_node1);   haffman_heap_get(&haffman_heap, &get_node2);   if (get_node1 == NULL || get_node2 == NULL) {printf("error\n");return -1;}   new_node->waight = get_node1->waight + get_node2->waight;   new_node->lchild = get_node1;   new_node->rchild = get_node2;   haffman_heap_insert(&haffman_heap, new_node);}/* when there is only one node in the heap, the huffman tree is constructed over*/haffman_heap_get(&haffman_heap, &new_node);output_haffman_tree(new_node);return 0;}/******************** haffman.c ************************************//********************* input *********************/hello!!! south africa world cup!!!wakawaka!!/********************* input *********************/

/**************************** stack.c ***********************************************/#include <stdio.h>#define STACK_MAX 100int stack[STACK_MAX + 1]; // 0 is not usedint top = 0, bottom = 0;void push(int a){if (top >= STACK_MAX) {   printf("stack full!\n");   return;}stack[++top] = a;}void pop(int *a){if (top == bottom) {   printf("stack empty\n");   return;}if (a == NULL) top--;else *a = stack[top--];}void print_stack(){int i;for (i=bottom+1; i<=top; i++) {   printf("%d ", stack[i]);}printf("\n");}/******************************** stack.c ***********************************************//*********************** stack.h ***************************/void push(int);void pop(int *);void print_stack()/**************** stack.h ********************************//**************************** haffman.c ************************************/#include <stdlib.h>#include <stdio.h>#include "stack.h"#define HEAP_MAX 100/* save the waight(how many in the txt) of every charactor */char ascii[128] = {0};/* node of heap array*/typedef struct _node{char ch;int waight;struct _node *lchild;struct _node *rchild;}HaffmanNode;/* haffman heap contains a pointer array and a size*/typedef struct {HaffmanNode *heap[HEAP_MAX+1];// 0 is not usedint current_size;}HaffmanHeap;/* insert a node to the heap, including searching the position the new*//* node should be put from bottom to top , adding the heap size*//* the input parameter new_node should not be null, and it should not*//* be freed after this function */int haffman_heap_insert(HaffmanHeap *haffman_heap, HaffmanNode* new_node){int i, ci;if (new_node == NULL) {   printf("new node is NULL\n");   return -1;}/* at first the new node is supposed to be located in the end of the array*//* then it should climb from the bottom, by comparing to the waight of it's*//* temp parents*/i = ++(haffman_heap->current_size);ci = i/2;/* if the waight of its parents is larger, the parents should go down*//* to the lower level, then loop */while (ci != 0 && haffman_heap->heap[ci]->waight > new_node->waight) {    /* move the node in ci to the lower level: i*/    haffman_heap->heap[i] = haffman_heap->heap[ci];    i = ci;    ci /= 2;}/* finally , the new node should be located at i*/haffman_heap->heap[i] = new_node;return 0;}/* get the smallest node in the heap and output by parameter get_node *//* the most smallest node is always the root of the heap. after remove*//* the root, the heap should be re-adjust to a heap*/int haffman_heap_get(HaffmanHeap *haffman_heap, HaffmanNode **get_node){HaffmanNode *y;int i, ci;if (haffman_heap->current_size == 0) {   printf("heap empty!\n");   return -1;}/* output the root of heap*/*get_node = haffman_heap->heap[1];/* adjust the heap by relocated the node in the end of the array*/y = haffman_heap->heap[haffman_heap->current_size--];/* firstly, supposed the the node should be locate at the root*/i = 1;ci = 2;/* if the supposed position has child(ren). a comparison should be*//* done between its waight and the waight of the child, or the*//* smaller one of the child(ren) */while(ci <= haffman_heap->current_size) {   if (ci < haffman_heap->current_size &&     haffman_heap->heap[ci+1]->waight < haffman_heap->heap[ci]->waight)     ci++;   /* if the waight of its child(ren) is bigger than its, the location */   /* of the node is found*/   if(haffman_heap->heap[ci]->waight > y->waight) break;   /* else locate its child in the position*/   haffman_heap->heap[i] = haffman_heap->heap[ci];   /* go down and loop*/   i = ci;   ci *= 2;}/* finally, locate the node which was at the end of the array in the new*//* location to make the heap*/haffman_heap->heap[i] = y;return 0;}/* output the code of every character in the tree*/void output_haffman_tree(HaffmanNode *root){if (root) {   /* if the node is a leaf, output the character and print the stack*/   /* from the bottom to the top. i.e. first in first out*/   if (root->lchild == NULL && root->rchild == NULL) {    printf("%c: ", root->ch);    print_stack();   }   /* before turn to the lchild, push a 0*/   push(0);   output_haffman_tree(root->lchild);   /* before turn to the rchild, push a 1*/   push(1);   output_haffman_tree(root->rchild);}/* after the travelling of every node, pop the 0/1 of this node*/pop(NULL);}/* count the times of every character int the filename*//* and save them in the globle varible ascii[] */int get_waight(char *filename){FILE *fp = NULL;char ch;fp = fopen(filename,"r");if (fp == NULL) {   printf("open file error!\n");   return -1;}while ((ch = fgetc(fp)) != EOF) ascii[ch]++;return 0;}int main(int argc, char *args[]){int i;int j;HaffmanHeap haffman_heap;HaffmanNode *new_node;HaffmanNode *get_node1 = NULL;HaffmanNode *get_node2 = NULL;if (argc < 2) {   printf("usage: %s file\n", args[0]);   return 0;}if (get_waight(args[1])) {   printf("open file error1\n");   return 0;}haffman_heap.current_size = 0;/* insert every character in the text and constrct a heap*/for (i=0; i<128; i++) {   if (ascii[i] != 0) {    new_node = (HaffmanNode *)malloc(sizeof(HaffmanNode));    if (new_node == NULL) {printf("error\n"); return -1;};    new_node->waight = ascii[i];    new_node->ch = i;    new_node->lchild = NULL;    new_node->rchild = NULL;    if (haffman_heap_insert(&haffman_heap, new_node)) {     printf("insert error\n");     return -1;    }    new_node = NULL;   }}/* get the two smallest node, then construct a new one whose waight*//* is the sumary of the two, then insert it to the heap*/while (haffman_heap.current_size > 1) {   new_node = (HaffmanNode *)malloc(sizeof(HaffmanNode));   haffman_heap_get(&haffman_heap, &get_node1);   haffman_heap_get(&haffman_heap, &get_node2);   if (get_node1 == NULL || get_node2 == NULL) {printf("error\n");return -1;}   new_node->waight = get_node1->waight + get_node2->waight;   new_node->lchild = get_node1;   new_node->rchild = get_node2;   haffman_heap_insert(&haffman_heap, new_node);}/* when there is only one node in the heap, the huffman tree is constructed over*/haffman_heap_get(&haffman_heap, &new_node);output_haffman_tree(new_node);return 0;}/******************** haffman.c ************************************//********************* input *********************/hello!!! south africa world cup!!!wakawaka!!/********************* input *********************/

0 0