Huffman Tree

来源:互联网 发布:用中文域名的大公司 编辑:程序博客网 时间:2024/05/21 17:30

Huffman Tree

标签(空格分隔): 算法、Huffman


Huffman Tree & Huffman Coding

Given a sequence of weight, then build a Huffman Tree with this sequence. Or Given a sequence of string(only from ‘a’ to ‘z’), then build a Huffman Tree with each char’s frequency.

Something you need to know about Huffman Coding:

When you get the information, you need to find a best way to coding it, then you can try Huffman coding. Because it can make the coding very short. Now, I gonnan tell you the how to build a Huffman Tree and coding. Such as {2, 4, 5, 7}, this is the sequence of weight you get, and now you are going to build a best binary tree.

step_1: find out two smallest weights in the sequence;
step_2: make the smaller one be the left-node, the other the right-node, and the weight of their parent is the sum of its two child-node.
step_3: put this set of tree you make at the back of the sequence.
step_4: loop back to step1 untill the tree is built up.

这里写图片描述

When you get string input, you need to remove all the space in it, and count the frequency of all letters appeared in the string, and make it to a sequence of weight (in the order of alphabet). Then build the Huffman Tree in the same way. Such as ‘abbbccdddddeeeeff’, you should tansfer the string to the weight sequence: {1, 3, 2, 5, 4, 2}

这里写图片描述

代码

// HuffmanTree.hpp#ifndef HUFFMANTREE_HPP#define HUFFMANTREE_HPP#include <iostream>#include <string>#include <list>#include <algorithm>#include <stack>#include <map>using namespace std;namespace HFM { struct Node {   char alpha; //该节点的字符   int weight; //权   int height; //节点在树的深度   string code; //Huffman编码后的编码   Node *left, *right;    Node(char a = '\0', int w = 0, Node* l = NULL, Node* r = NULL):   alpha(a), weight(w), left(l), right(r), height(0) { code = ""; } }; class HuffmanCode {  public:   explicit HuffmanCode(const string &str); //输入一串字符   explicit HuffmanCode(int w[], int n); //输入一个权序列   void DisplayCode(); //显示   void BuildTree(list<Node*>& s); //根据所给信息建树   void coding(Node* r, string c); //建树后的编码   static bool cmp1(Node* l, Node* r); //建树过程中节点的排序   static bool cmp2(Node* l, Node* r); //对leaves进行排序(方便输出)   //用栈来销毁树   ~HuffmanCode() {     if (root != NULL) {       stack<Node*> s;       s.push(root);       while (!s.empty()) {        Node* p = s.top();        s.pop();        if (p->right != NULL) s.push(p->right);        if (p->left != NULL) s.push(p->left);        delete p;       }      root = NULL;     }   }  private:   string origin; //原始输入文本   bool type; //决定是哪种输入方式   list<Node*> leaves; //存储叶节点   Node* root; //树的根 };}// HuffmanTree.cpp#include "HuffmanTree.hpp"using namespace HFM;HuffmanCode::HuffmanCode(int w[], int n) {  type = false; //false代表权序列输入  root = NULL;   origin = "";  list<Node*> sequence; //该链表储存建树过程中的节点   //根据权序列创建节点  for (int i = 0; i < n; i++) {     Node* temp = new Node('\0', w[i], NULL, NULL); //'\0'代表无代表字符    sequence.push_back(temp);  }  sequence.sort(cmp1); //对节点进行升序排序  BuildTree(sequence); //建树  coding(root, ""); //建树后编码}HuffmanCode::HuffmanCode(const string &str) {  type = true; //true代表字符串输入  origin = "";  int size = str.size();  int ch[26] = {0}; //0-25代表a-z,采用桶思想统计权  root = NULL;  list<Node*> sequence; //该链表储存建树过程中的节点   //统计字符权重  for (int i = 0; i < size; i++) {    if (str[i] >= 'a' && str[i] <= 'z') {      ch[str[i] - 97]++;         origin += str[i];    }    }  //创建节点  for (int i = 0; i < 26; i++) {    if (ch[i] > 0) {      Node* temp = new Node(97 + i, ch[i], NULL, NULL);      sequence.push_back(temp);    }  }  sequence.sort(cmp1); //对节点进行升序排序  BuildTree(sequence); //建树  coding(root, ""); //编码}//先按权排序,若权重相等,按字典序排序bool HuffmanCode::cmp1(Node* l, Node* r) {  if (l->weight < r->weight)    return true;  else if (l->weight == r->weight && (l->alpha < r->alpha)           && l->alpha != '\0' && r->alpha != '\0')     return true;  else    return false;}//先按权排序,若权重相等,按照深度排序,若深度相等,则按字典序排序(大的在前,如111 > 110)bool HuffmanCode::cmp2(Node* l, Node* r) {  if (l->weight < r->weight)    return true;  else if ((l->weight == r->weight) &&            l->height > r->height)     return true;  else if ((l->weight == r->weight) && l->height == r->height )     if (l->code > r->code)      return true;   return false;}void HuffmanCode::DisplayCode() {  leaves.sort(cmp2); //对叶节点进行排序,方便输出  if (type == false) { //权序列建树的输出    list<Node*>::iterator it = leaves.begin();    int w = 0;    for (; it != leaves.end(); it++) {      cout << "Weight = " << (*it)->weight << "; "           << "Code = " << (*it)->code << endl;      w += (*it)->weight * ((*it)->code).size();        }    cout << "Huffman's codeLength = " << w << endl;  } else { //字符串建树的输出    list<Node*>::iterator it = leaves.begin();    map<char, string> c;    int w = 0;    for (; it != leaves.end(); it++) {      cout << '(' << (*it)->alpha << ')' << " " <<"Weight = " << (*it)->weight << "; "           << "Code = " << (*it)->code << endl;      w += (*it)->weight * ((*it)->code).size();      c[(*it)->alpha] = (*it)->code;    }    cout << "Huffman's codeLength = " << w << endl;    cout << "Origin Text: " << origin << endl;    cout << "Huffman's Code: ";    for (int i = 0; i < origin.size(); i++) {      cout << c[origin[i]];    }    cout << endl;  }}//用递归的思想,从下往上建树,模仿手动建树的过程void HuffmanCode::BuildTree(list<Node*>& s) {  if (s.size() == 0)    return ;  else if (s.size() == 1) {    root = s.front(); //递归出口  } else {    Node* left = s.front(); //取出第一个节点(权最小) 因为链表中的节点已经排序了    s.pop_front(); //从链表中删除    Node* right = s.front(); //取出删除后的第一个节点(权最小)    s.pop_front(); //从链表中删除    Node* newOne = new Node('\0', left->weight + right->weight, left, right);     //取出的两个节点相加得到新的父节点    s.push_back(newOne); //讲新节点压入链表    s.sort(cmp1); //再次排序    BuildTree(s); //递归建树  }}//递归编码void HuffmanCode::coding(Node* r, string c) {  if (r->left != NULL) {    string s = c + "1"; //往左为1    coding(r->left, s); //递归  }  if (r->right != NULL) {    string s = c + "0"; //往右为0    coding(r->right, s); //递归  }  if (r->left == NULL && r->right == NULL) { //递归出口     r->code = c;     r->height = c.size();    leaves.push_back(r); //找到叶节点  }}// main.cpp#include <iostream>#include <cstring>#include <string>#include "BeautifulTree.hpp"using namespace std;using namespace HFM;void test_HFM() {    cout << "---------- test_HFM ----------\n# TEST_1\n";    int weight[] = { 7, 4, 5, 2};    int count = 4;    HuffmanCode HFC(weight, count);    HFC.DisplayCode();    cout << "\nTEST_2\n";    string input = "you are the apple in my eyes";    HuffmanCode HFC_2(input);    HFC_2.DisplayCode();}void hard_test() {    cout << "---------- test_HFM ----------\n# TEST_1\n";    int count, weight_arr[20] = {0};    cin >> count;    for (int i = 0; i < count; i++) cin >> weight_arr[i];    HuffmanCode HFC(weight_arr, count);    HFC.DisplayCode();    cout << "\nTEST_2\n";    string input = "";    cin >> input;    HuffmanCode HFC_2(input);    HFC_2.DisplayCode();}int main() {        int t;    cin >> t;    if (t == 0) {        test_HFM();    } else {        hard_test();    }    system("pause");    return 0;} #endif

Standard Output

Format_1 [if construct with {2, 3, 4, 5, 7} and 5]  Weight = 2; Code = 011  Weight = 3; Code = 010  Weight = 4; Code = 11  Weight = 5; Code = 10  Weight = 7; Code = 00  Huffman's codeLength = 47Format_2 [if construct with string 'you are the apple in my eyes']  (h) Weight = 1; Code = 00011  (i) Weight = 1; Code = 00010  (l) Weight = 1; Code = 00001  (m) Weight = 1; Code = 00000  (n) Weight = 1; Code = 1111  (o) Weight = 1; Code = 1110  (r) Weight = 1; Code = 1101  (s) Weight = 1; Code = 1100  (t) Weight = 1; Code = 0111  (u) Weight = 1; Code = 0110  (a) Weight = 2; Code = 0011  (p) Weight = 2; Code = 0010  (y) Weight = 3; Code = 010  (e) Weight = 5; Code = 10  Huffman's codeLength = 79  Origin Text: youaretheappleinmyeyes  Huffman's Code: 0101110011000111101100111000111000110010001000001100001011110000001010010101100

HuffmanTree 正确性证明

需先证明以下两个引理
(1) If T is an optimal binary tree with weights w1 ≤ w2 ≤ … ≤ wn, then w1 and w2 are on the deepest level and they are brothers on some optimal binary tree.
(2) If T is an optimal binary tree with weights w1 ≤ w2 ≤ … ≤ wn, and replace the parent of w1 and w2 with a leaf with weight w1+w2, then the resulting tree T’ is optimal for the weights w1+w2,w3, w4, …, wn.

Proof:
(1)
当我们用Huffman方法来建最优二叉树时,w1和w2当然会是兄弟(brothers)。如果他们不在树的最深处,我们会有u>z,但是x(w1)和y(w2)是最小权节点,故与假设矛盾

这里写图片描述

(2)
W(T) = w1l1+w2l2+...+wnln
If we replace the parent of w1 and w2 with a leaf with weight w1+w2
then W(T’) = (w1+w2)(l11)+...+wnln
If T’ is not the optimal binary tree, then we find one tree T”, which W(T”) < W(T’)
But W(T)=W(T)+w1+w2
then W(T)=W(T)+w1+w2>W(T′′)+w1+w2
it means that T is not a optimal binary tree
So it’s contradict.

原创粉丝点击