Huffman编码

来源:互联网 发布:阿里云pop3设置 编辑:程序博客网 时间:2024/04/26 15:20

     Huffman编码是最流行的基于统计学的数据压缩方法,下面我们简单介绍它的实现步骤:
1. 将信源符号按照概率递减顺序排列;
2. 取两个概率最小的符号分别分配以“0”和“1”,然后把它们的概率相加,并作为一个新的符号的概率,与其它未分配符号按照(1)重新排列;
3. 重复(1)(2)过程,直至所有概率相加得1;
4. 寻找从每一个信源符号到概率为1处的路径,记录下路径上的“0”和“1”;
5. 写出每一个符号的“0”和“1”序列(从树根到信源符号节点)。

缺点:

    从理论上讲,采用Huffman编码可以获得最佳编码效果,但是在实际中,由于计算机中存储和处理的最小数据单位是比特(bit),因此在某种情况下,实际的压缩编码效果往往达不到理论的压缩比。例如:信源符号{X, Y},其对应的概率为{2/3, 1/3},则根据理论计算,符号X, Y的最佳码长分别是:X: log(2/3)2 = 0.588(bit),Y: log(1/3)2 = 1.58(bit)
    这表明,要获得最佳效果,符号{X, Y }的码字长度应分别为0.588bit和1.58bit,而计算机不可能有非整数位出现,只能按整数位进行,即采用哈夫曼编码对{X,Y}进行编码,得{X,Y}的码字分别为0和1,也就是两符号的信息编码长度都为1。可见,对于大概率符号X并未赋予较短的码字,实际编码效果没有达到理论编码效果。由上述分析可见,Huffman编码的主要缺点在于其编码方法是对每个符号进行编码,每个符号的码长只能是整数。为此提出算术编码,以解决计算机中必须以整数位进行编码的问题。

编码实现:

头文件

/ Huffman.h: interface for the Huffman class.////////////////////////////////////////////////////////////////////////#if !defined(AFX_HUFFMAN_H__08A1863A_6641_4FE9_9596_5EEBE76B53F7__INCLUDED_)#define AFX_HUFFMAN_H__08A1863A_6641_4FE9_9596_5EEBE76B53F7__INCLUDED_#if _MSC_VER > 1000#pragma once#endif // _MSC_VER > 1000#include <string>/***********************数据结构***********************///哈弗曼树节点typedef struct {unsigned int weight;unsigned int parent;unsigned int lchild;unsigned int rchild;}HuffTreeNode,*HuffTree;//字符-权值-编码映射typedef struct{char c;unsigned int weight;char *code;}CharMapNode,*CharMap;/*************************类定义****************************/class Huffman  {private:void select(int n, int &s1, int &s2);HuffTree huffTree;//哈弗曼树CharMap chars;//字符表int n;//字符数std::string text;//原文std::string code;//编码public:void InputCharsWeight();void CountCharsWeight();void Decode();void ReadTextFromFile(char *filename);void ReadCodeFromFile(char *filename);void SaveTextToFile(char *filename);void SaveCodeToFile(char *filename);void PrintCode();void MakeCharMap();void PrintText();void PrintCharCode();void PrintCharWeight();void SetCharMap(CharMap m, int number);void Encode();Huffman();virtual ~Huffman();};#endif // !defined(AFX_HUFFMAN_H__08A1863A_6641_4FE9_9596_5EEBE76B53F7__INCLUDED_)

 

Huffman类

// Huffman.cpp: implementation of the Huffman class.////////////////////////////////////////////////////////////////////////#include "Huffman.h"#include <iostream>#include <fstream>using namespace std;//////////////////////////////////////////////////////////////////////// Construction/Destruction//////////////////////////////////////////////////////////////////////Huffman::Huffman(){huffTree = NULL;chars = NULL;n = 0;}Huffman::~Huffman(){}//对Text串进行哈弗曼编码void Huffman::Encode(){code = "";for (string::size_type i = 0; i != text.size(); ++i){for (int j = 1; j <= n; ++j)if (chars[j].c == text[i])code += chars[j].code;  //code为数组名,可表示数组存放的地址}}//设置字符表void Huffman::SetCharMap(CharMap m, int number){chars = m;n = number;}//在huffTree[1..n]中选择parent为0且weight最小的两个节点,其序号为s1,s2void Huffman::select(int n, int &s1, int &s2){s1 = s2 = 0;for (int i = 1; i <= n; ++i){if (huffTree[i].parent != 0)continue;if (s1 == 0)s1 = i;else if (s2 == 0){//此处采用的策略,使得整个过程中s1的权值小于s2的权值if (huffTree[i].weight < huffTree[s1].weight){s2 = s1;s1 = i;}elses2 = i;}else{if (huffTree[i].weight < huffTree[s1].weight){s2 = s1;s1 = i;}else if (huffTree[i].weight < huffTree[s2].weight)s2 = i;}}}void Huffman::PrintCharWeight(){for (int i = 1; i <= n; ++i){/*switch (chars[i].c){case '\t':cout << "\\t";break;case '\n':cout << "\\n";break;default:*/cout << chars[i].c;//break;//}cout << "——" << chars[i].weight << endl;}}void Huffman::PrintCharCode(){for (int i = 1; i <= n; ++i){/*switch (chars[i].c){case '\t':cout << "\\t";break;case '\n':cout << "\\n";break;default:*/cout << chars[i].c;//break;//}cout << "——" << chars[i].code << endl;}}//输出文本串void Huffman::PrintText(){cout << text << endl;}//输出0-1编码void Huffman::PrintCode(){cout << code << endl;}//根据各字符的权值建立字符-编码表void Huffman::MakeCharMap(){if (n <= 1)return;int m = 2 * n - 1;//哈弗曼树所需节点数huffTree = new HuffTreeNode[m+1];//0号单元未使用//初始化int i;for (i = 1; i <= n; ++i)   //从1开始{huffTree[i].weight = chars[i].weight;huffTree[i].parent = 0;huffTree[i].lchild = 0;huffTree[i].rchild = 0;}for (i = n + 1; i <= m; ++i){huffTree[i].weight = 0;huffTree[i].parent = 0;huffTree[i].lchild = 0;huffTree[i].rchild = 0;}//建哈弗曼树for (i = n + 1; i <= m; ++i){int s1,s2;select(i - 1, s1, s2);huffTree[s1].parent = huffTree[s2].parent = i;huffTree[i].lchild = s1;huffTree[i].rchild = s2;huffTree[i].weight = huffTree[s1].weight + huffTree[s2].weight;}//从叶子到根节点逆向求每个字符的哈弗曼编码char *cd = new char[n];//分配求编码的工作空间(每个字符编码结果最长n-1再加上'\0')cd[n-1] = '\0';//编码结束符for(i = 1; i <= n; ++i)//逐个字符求哈弗曼编码{int start = n - 1;int c,f;//从叶子到根逆向求编码for (c = i, f = huffTree[i].parent; f != 0; c = f, f = huffTree[f].parent){if (huffTree[f].lchild == c)//左孩子编码为0cd[--start] = '0';else//右孩子编码为1cd[--start] = '1';}chars[i].code = new char[n - start];//为第i个字符编码分配空间strcpy(chars[i].code,&cd[start]);}delete cd;}//从文件读入原文void Huffman::ReadTextFromFile(char *filename){ifstream infile(filename);if(!infile){cerr << "无法打开文件!" <<endl;return;}char c;while(infile.get(c)){text += c;}}//将编码存入文件void Huffman::SaveCodeToFile(char *filename){ofstream outfile(filename);if (!outfile){cerr << "保存文件出错!" << endl;return;}outfile << code;}//从文件读入编码void Huffman::ReadCodeFromFile(char *filename){ifstream infile(filename);if (!infile){cerr << "无法打开文件!" <<endl;return;}infile >> code;}//将0-1编码串解码void Huffman::Decode(){text = "";string::size_type i,count;for (i = 0; i < code.size(); i += count){//每个字符的编码结果最长n-1,从1至n-1依次尝试for (count = 1; count < n; ++count){for (int j = 1; j <= n; ++j)if (code.substr(i, count) == chars[j].code)//code.substr(a,b)表示对字符串code截取从第a个到第b个,依次与chars[].code比较{text += chars[j].c;  //text表示原文存放的数组名,作指针使用goto next;}}next:;}}//统计原文中各字符的权值void Huffman::CountCharsWeight(){if (text.empty())return;if (chars != NULL)delete chars;int i = 0;n = 0;chars = new CharMapNode[2];chars[1].c = text[i];chars[1].weight = 1;++n;for (i = 1; i != text.size(); ++i){int j;for (j = 1; j <= n; ++j)//遍历当前字符表,如果已存在该字符,权值+1{if (text[i] == chars[j].c){++chars[j].weight;break;}}if (j > n)//该字符不存在,添加该字符{++n;CharMap newchars = new CharMapNode[n + 1];memcpy(newchars, chars, n * sizeof(CharMapNode));delete chars;chars = newchars;chars[n].c = text[i];chars[n].weight = 1;}}}//输入字符和对应权值void Huffman::InputCharsWeight(){cout << "请输入字符集大小n(n>1):" << endl;cin >> n;if (chars != NULL)delete chars;chars = new CharMapNode[n+1];//0号单元未使用cout << "请输入字符和权值:" << endl;for (int i = 1; i <= n; ++i) {cin.ignore();//清除输入缓冲区cin.get(chars[i].c);//输入单个字符,可以是空白符cin >> chars[i].weight;}}void Huffman::SaveTextToFile(char *filename){ofstream outfile(filename);if (!outfile){cerr << "保存文件出错!" << endl;return;}outfile << text;}

 

主函数

#include <iostream>#include "Huffman.h"using namespace std;int main(){Huffman huffman;huffman.ReadTextFromFile("text.txt");/****************第一步输入字符和对应权值******************/cout << "请选择:  1.程序自动统计字符和权值(推荐)  2.用户输入" << endl;int r;do{cin >> r;}while((r != 1) && (r != 2));if (r == 1)huffman.CountCharsWeight();elsehuffman.InputCharsWeight();cout << "字符及对应权值:" << endl;huffman.PrintCharWeight(); //计算每个字符对应的权值system("pause");cout << endl;/****************第二步建哈弗曼树,输出字符与编码的对应关系******************/huffman.MakeCharMap();  //实现哈弗曼编码,对应存入chars[i].c-chars[i].codecout << "字符及对应的编码:" << endl;huffman.PrintCharCode();  //打印每个字符及其对应的编码,即chars[i].c-chars[i].codesystem("pause");cout << endl;/****************第三步对字符进行编码,将结果输出并存入文件******************/cout << "对原文进行编码:" << endl;cout << "原文:" << endl;huffman.PrintText();  //输出文本串huffman.Encode();  //对文本串进行编码cout << "编码:" << endl;huffman.PrintCode();huffman.SaveCodeToFile("code.txt");system("pause");cout << endl;/****************第四步从文件读入0、1代码串解码后输出并存入文件******************/cout << "对编码进行解码:" << endl;huffman.ReadCodeFromFile("code.txt");cout << "编码:" << endl;huffman.PrintCode();huffman.Decode();cout << "原文:" << endl;huffman.PrintText();huffman.SaveTextToFile("resulttext.txt");cout << "\n Over ^_^" << endl;system("pause");return 0;}


输出界面

 
 
 

 

原创粉丝点击