赫夫曼编码和实现

来源:互联网 发布:淘宝网店怎么关闭 编辑:程序博客网 时间:2024/05/17 13:10

赫夫曼编码

 

上一节课我们已经谈了赫夫曼树的基本原理和构造方式,而赫夫曼编码可以很有效地压缩数据(通常可以节省20%~90%的空间,具体压缩率依赖于数据的特性)。

 

名词解释:定长编码,变长编码,前缀码

定长编码:像ASCII编码

变长编码:单个编码的长度不一致,可以根据整体出现频率来调节

前缀码:所谓的前缀码,就是没有任何码字是其他码字的前缀

#pragma once#ifndef _PQUEUE_H#define _PQUEUE_H#include "huffman.h"//We modify the data type to hold pointers to Huffman tree nodes#define TYPE htNode *#define MAX_SZ 256typedef struct _pQueueNode {TYPE val;unsigned int priority;struct _pQueueNode *next;}pQueueNode;typedef struct _pQueue {unsigned int size;pQueueNode *first;}pQueue;void initPQueue(pQueue **queue);void addPQueue(pQueue **queue, TYPE val, unsigned int priority);TYPE getPQueue(pQueue **queue);#endif
#include "pQueue.h"#include <stdlib.h>#include <stdio.h>void initPQueue(pQueue **queue){//We allocate memory for the priority queue type//and we initialize the values of the fields(*queue) = (pQueue *) malloc(sizeof(pQueue));(*queue)->first = NULL;(*queue)->size = 0;return;}void addPQueue(pQueue **queue, TYPE val, unsigned int priority){//If the queue is full we don't have to add the specified value.//We output an error message to the console and return.if((*queue)->size == MAX_SZ){printf("\nQueue is full.\n");return;}pQueueNode *aux = (pQueueNode *)malloc(sizeof(pQueueNode));aux->priority = priority;aux->val = val;//If the queue is empty we add the first value.//We validate twice in case the structure was modified abnormally at runtime (rarely happens).if((*queue)->size == 0 || (*queue)->first == NULL){aux->next = NULL;(*queue)->first = aux;(*queue)->size = 1;return;}else{//If there are already elements in the queue and the priority of the element//that we want to add is greater than the priority of the first element,//we'll add it in front of the first element.//Be careful, here we need the priorities to be in descending orderif(priority<=(*queue)->first->priority){aux->next = (*queue)->first;(*queue)->first = aux;(*queue)->size++;return;}else{//We're looking for a place to fit the element depending on it's prioritypQueueNode * iterator = (*queue)->first;while(iterator->next!=NULL){//Same as before, descending, we place the element at the begining of the//sequence with the same priority for efficiency even if//it defeats the idea of a queue.if(priority<=iterator->next->priority){aux->next = iterator->next;iterator->next = aux;(*queue)->size++;return;}iterator = iterator->next;}//If we reached the end and we haven't added the element,//we'll add it at the end of the queue.if(iterator->next == NULL){aux->next = NULL;iterator->next = aux;(*queue)->size++;return;}}}}TYPE getPQueue(pQueue **queue){TYPE returnValue;//We get elements from the queue as long as it isn't emptyif((*queue)->size>0){returnValue = (*queue)->first->val;(*queue)->first = (*queue)->first->next;(*queue)->size--;}else{//If the queue is empty we show an error message.//The function will return whatever is in the memory at that time as returnValue.//Or you can define an error value depeding on what you choose to store in the queue.printf("\nQueue is empty.\n");}return returnValue;}
#pragma once#ifndef _HUFFMAN_H#define _HUFFMAN_H//The Huffman tree node definitiontypedef struct _htNode {char symbol;struct _htNode *left, *right;}htNode;/*We "encapsulate" the entire tree in a structurebecause in the future we might add fields like "size"if we need to. This way we don't have to modify the entiresource code.*/typedef struct _htTree {htNode *root;}htTree;//The Huffman table nodes (linked list implementation)typedef struct _hlNode {char symbol;char *code;struct _hlNode *next;}hlNode;//Incapsularea listeitypedef struct _hlTable {hlNode *first;hlNode *last;}hlTable;htTree * buildTree(char *inputString);hlTable * buildTable(htTree *huffmanTree);void encode(hlTable *table, char *stringToEncode);void decode(htTree *tree, char *stringToDecode);#endif

#include <stdlib.h>#include <stdio.h>#include <string.h>#include "huffman.h"#include "pQueue.h"void traverseTree(htNode *treeNode, hlTable **table, int k, char code[256]){//If we reach the end we introduce the code in the tableif(treeNode->left == NULL && treeNode->right == NULL){code[k] = '\0';hlNode *aux = (hlNode *)malloc(sizeof(hlNode));aux->code = (char *)malloc(sizeof(char)*(strlen(code)+1));strcpy(aux->code,code);aux->symbol = treeNode->symbol;aux->next = NULL;if((*table)->first == NULL){(*table)->first = aux;(*table)->last = aux;}else{(*table)->last->next = aux;(*table)->last = aux;}}//We concatenate a 0 for each step to the leftif(treeNode->left!=NULL){code[k]='0';traverseTree(treeNode->left,table,k+1,code);}//We concatenate a 1 for each step to the rightif(treeNode->right!=NULL){code[k]='1';traverseTree(treeNode->right,table,k+1,code);}}hlTable * buildTable(htTree * huffmanTree){//We initialize the tablehlTable *table = (hlTable *)malloc(sizeof(hlTable));table->first = NULL;table->last = NULL;//Auxiliary variableschar code[256];//k will memories the level on which the traversal isint k=0;//We traverse the tree and calculate the codestraverseTree(huffmanTree->root,&table,k,code);return table;}htTree * buildTree(char *inputString){//The array in which we calculate the frequency of the symbols//Knowing that there are only 256 posibilities of combining 8 bits//(256 ASCII characters)int * probability = (int *)malloc(sizeof(int)*256);//We initialize the arrayfor(int i=0; i<256; i++)probability[i]=0;//We consider the symbol as an array index and we calculate how many times each symbol appearsfor(int i=0; inputString[i]!='\0'; i++)probability[(unsigned char) inputString[i]]++;//The queue which will hold the tree nodespQueue * huffmanQueue;initPQueue(&huffmanQueue);//We create nodes for each symbol in the stringfor(int i=0; i<256; i++)if(probability[i]!=0){htNode *aux = (htNode *)malloc(sizeof(htNode));aux->left = NULL;aux->right = NULL;aux->symbol = (char) i;addPQueue(&huffmanQueue,aux,probability[i]);}//We free the array because we don't need it anymorefree(probability);//We apply the steps described in the article to build the treewhile(huffmanQueue->size!=1){int priority = huffmanQueue->first->priority;priority+=huffmanQueue->first->next->priority;htNode *left = getPQueue(&huffmanQueue);htNode *right = getPQueue(&huffmanQueue);htNode *newNode = (htNode *)malloc(sizeof(htNode));newNode->left = left;newNode->right = right;addPQueue(&huffmanQueue,newNode,priority);}//We create the treehtTree *tree = (htTree *) malloc(sizeof(htTree));tree->root = getPQueue(&huffmanQueue);return tree;}void encode(hlTable *table, char *stringToEncode){hlNode *traversal;printf("\nEncoding\nInput string : %s\nEncoded string : \n",stringToEncode);//For each element of the string traverse the table//and once we find the symbol we output the code for itfor(int i=0; stringToEncode[i]!='\0'; i++){traversal = table->first;while(traversal->symbol != stringToEncode[i])traversal = traversal->next;printf("%s",traversal->code);}printf("\n");}void decode(htTree *tree, char *stringToDecode){htNode *traversal = tree->root;printf("\nDecoding\nInput string : %s\nDecoded string : \n",stringToDecode);//For each "bit" of the string to decode//we take a step to the left for 0//or ont to the right for 1for(int i=0; stringToDecode[i]!='\0'; i++){if(traversal->left == NULL && traversal->right == NULL){printf("%c",traversal->symbol);traversal = tree->root;}if(stringToDecode[i] == '0')traversal = traversal->left;if(stringToDecode[i] == '1')traversal = traversal->right;if(stringToDecode[i]!='0'&&stringToDecode[i]!='1'){printf("The input string is not coded correctly!\n");return;}}if(traversal->left == NULL && traversal->right == NULL){printf("%c",traversal->symbol);traversal = tree->root;}printf("\n");}
#include <stdio.h>#include <stdlib.h>#include "huffman.h"int main(void){//We build the tree depending on the stringhtTree *codeTree = buildTree("beep boop beer!");//We build the table depending on the Huffman treehlTable *codeTable = buildTable(codeTree);//We encode using the Huffman tableencode(codeTable,"beep boop beer!");//We decode using the Huffman tree//We can decode string that only use symbols from the initial stringdecode(codeTree,"0011111000111");//Output : 0011 1110 1011 0001 0010 1010 1100 1111 1000 1001return 0;}




原创粉丝点击