赫夫曼树的创建

来源：互联网发布：淘宝如何不打电话注册编辑：程序博客网时间：2024/06/16 11:41

今天在看《数据结构》（严蔚敏版），第6章第6节，讲到创建赫夫曼树的时候，给出的算法没有实现Select()函数，即选出数组中最小的两个权值的节点。于是我自己用c++实现了一个版本，考虑到算法的效率问题，我选择了创建一个小顶堆，每次从堆顶提取出最小的权值节点，这样比暴力for循环求最小权值节点的效率要高，特别是在树的节点很多时。而建堆的操作也使得节点会被调整顺序，这也使得我定义的描述的节点的结构体跟书上不一样，下面来看。
描述构建赫夫曼树的算法书上说的很多了，我这里主要解释一下我的思路：

存储：

如何表示带权的节点？定义结构体RawNode，data表示权值，pos表示其在原vector或数组中的从0开始的位置。为什么需要pos这个域呢？
因为最终描述赫夫曼树时，HTNode结构使用了基于线性连续存储区域的孩子双亲表示法，所以要通过pos记录他们之间的寻址关系。
赫夫曼树，采用孩子双亲表示法，记录线性存储区中的相对地址，不用指针。与书上一样。

流程：

用原带权节点vector初始化赫夫曼树，此时树种所有的叶子节点左右孩子都为0，双亲也为0
在vector上原地建一个小顶堆，节点的相对关系已经改变。所以用变量pos_in_wght记录原vector的size，表示下一个新建的节点的地址。
取出堆上的最小的两个带权节点sm1，sm2，并将其从堆中删除；
新建节点sm3的权值为sm1和sm2的和，位置pos为pos_in_wght，伺候pos_in_wght自加一，表示下一个新建节点的位置。
将新的节点加入到堆中，使用push_heap接口使堆维持其状态。
新建一个赫夫曼树节点，正确处理其与sm1，sm2，sm3的父子关系，然后加入到树hoff中。
重复步骤3直到堆中只剩下一个节点。
将最后一个节点的双亲指向-1，表示它为根节点。

#include <iostream>#include <vector>#include <algorithm>using namespace std;/*pos is a node's position from vector[0]*/typedef struct rnode{    int data;    int pos;}RawNode;/*Huffman Tree Definition*/typedef struct hnode{    int weight;    int parent, left, right;}HTNode, *HoffmanTree;/*Show the Huffman Tree in a man-read way*/void PrintHuffman(vector<HTNode> &hoff){    vector<HTNode>::iterator it = hoff.begin();    while(it != hoff.end())    {        cout << "| " << it->weight << " | " << it->parent << " | "\            << it->left << " " << it->right << " |" << endl;        it++;    }}void PrintVector(vector<RawNode> &w, const string &note){    int n = w.size();    cout << note << endl;    for(int i = 0; i < n; i++)    {        cout << w[i].data << " ";    }    cout << endl;}/*comp function, make an min-heap*/bool compare(const RawNode &a, const RawNode &b){    return a.data > b.data;}void Build_HoffmanTree(vector<RawNode> &wght, vector<HTNode> &hoff){    int nodes = wght.size();    if(nodes < 1)        return;    //1. init the hofftree with wght    HTNode tmp;    vector<RawNode>::iterator it = wght.begin();    while(it != wght.end())    {        tmp.weight = it->data;        tmp.parent = -1;        tmp.left = 0;        tmp.right = 0;        hoff.push_back(tmp);        it++;    }    //2. heapify the w, get the top 2 smallest node    make_heap(wght.begin(), wght.end(), compare);    //because wght's size will be variable, so a var is needed to    //store new rawnode's pos in the original wght vector    int pos_in_wght = wght.size();    for(int i = 0; i < nodes - 1; i++)    {        //from the heap top, get the top-2 smallest nodes        RawNode sm1 = wght[0];        pop_heap(wght.begin(), wght.end(), compare);        wght.pop_back();        RawNode sm2 = wght[0];        pop_heap(wght.begin(), wght.end(), compare);        wght.pop_back();        //move them to the end of vector with pop_heap() and delete them        //push the new node to wght;        RawNode sm3;        sm3.data = sm2.data + sm1.data;        sm3.pos = pos_in_wght++;        wght.push_back(sm3);        push_heap(wght.begin(), wght.end(), compare);        //process the sm1, sm2 and sm3's relationship, add new huffman-tree        //node into hoff vector        hoff[sm1.pos].parent = hoff.size();        hoff[sm2.pos].parent = hoff.size();        HTNode tmp;        tmp.weight = sm3.data;        tmp.left = sm1.pos;        tmp.right = sm2.pos;        hoff.push_back(tmp);    }    hoff[hoff.size()-1].parent = -1;}int main(int argc, char const *argv[]){    vector<RawNode> w;    RawNode a;    a.data = 5;    a.pos = 0;    w.push_back(a);    a.data = 7;    a.pos = 1;    w.push_back(a);    a.data = 2;    a.pos = 2;    w.push_back(a);    a.data = 4;    a.pos = 3;    w.push_back(a);    vector<HTNode> Huffman;    Build_HoffmanTree(w, Huffman);    PrintHuffman(Huffman);    return 0;}

打印结果如下：

| 5 | 5 | 0 0 |
| 7 | 6 | 0 0 |
| 2 | 4 | 0 0 |
| 4 | 4 | 0 0 |
| 6 | 5 | 2 3 |
| 11 | 6 | 0 4 |
| 18 | -1 | 1 5 |

0 0