小项目-文件压缩(哈夫曼树)

来源:互联网 发布:虚拟股票操作软件 编辑:程序博客网 时间:2024/06/16 18:46

先回顾一下哈夫曼树
huffman树即最优二叉树,是加权路径长度最短的二叉树。哈夫曼树的树使用贪心算法。
这里写图片描述
每次选择该集合中权值最小的两个作为叶子结点,父亲节点的权值为叶子节点权值之和。然后又将其父亲重新放进此集合里。重复前面的做法,直到完成哈夫曼树的建立。
每次都要在集合中找出2个权值最小的。这里我们就可以建立一个小堆,每次找出最小的时候只需要向上调整堆就行了。
那么文件哈夫曼树怎么实现文件压缩的呢。
1.统计文件中字符出现的个数
2.每个字符的个数作为权值构建哈夫曼树,这样每个字符对应的权值为叶子结点,然后获取每个叶子节点的哈夫曼编码。
3.进行压缩,哈夫曼编码满8位写进压缩文件。
4.解压缩。
这里写图片描述
代码实现

//heap.h#define _CRT_SECURE_NO_WARNINGS 1#pragma once#include <iostream>#include<vector>using namespace std;template<class T>struct Less{    bool operator()(const T& l, const T& r)    {        return l < r;    }};template<class T,class Compare=Less<T>>class Heap{public:    Heap()    {}    Heap(T* a, size_t n)    {        _a.reserve(n);        for (size_t i = 0; i < n; i++)        {            _a.push_back(a[i]);        }        for (int i = (_a.size() - 2) / 2; i >= 0; i--)        {            _AdjustDown(i);        }    }    void Push(const T& x)    {        _a.push_back(x);        _AdjustUp(_a.size() - 1);    }    void Pop()    {        swap(_a[0], _a[_a.size() - 1]);        _a.pop_back();        _AdjustDown(0);    }    T& Top()    {        return _a[0];    }    size_t Size()    {        return _a.size();    }protected:    void _AdjustDown(int root)    {        Compare compare;        int parent = root;        int child = parent * 2 + 1;        while (child < (int)_a.size())        {            if (child <(int) _a.size() - 1 && compare(_a[child + 1] ,_a[child]))            {                child++;            }            if (compare(_a[child] ,_a[parent]))            {                swap(_a[parent], _a[child]);                parent = child;                child = parent * 2 + 1;            }            else            {                break;            }        }    }    void _AdjustUp(int root)    {        Compare compare;        int child = root;        int parent = (child - 1) >> 1;        while (child > 0)        {            if (compare(_a[child], _a[parent]))            {                swap(_a[parent], _a[child]);                child = parent;                parent = (child - 1) >> 1;            }            else            {                break;            }        }    }protected:    vector<T> _a;};void TestHeap(){    int a[] = { 1, 3, 9, 3, 54, 87, 21, 15, 8 };    int n = sizeof(a) / sizeof(a[0]);    Heap<int> hp(a, n);    hp.Push(2);}
//huffman.h#define _CRT_SECURE_NO_WARNINGS 1#pragma once#include"heap.h"template<class T>struct HuffmanTreeNode{    T _w;    HuffmanTreeNode<T>* _left;    HuffmanTreeNode<T>* _right;    HuffmanTreeNode<T>* _parent;    HuffmanTreeNode(const T& x)        :_w(x)        , _left(NULL)        , _right(NULL)        , _parent(NULL)    {}};template<class T>class HuffmanTree{    typedef HuffmanTreeNode<T> Node;public:    HuffmanTree()        :_root(NULL)    {}    ~HuffmanTree()    {        _Destory(_root);        _root = NULL;    }    HuffmanTree(T* a, size_t n,const T& invalid)    {        struct Compare        {            bool operator()(Node* l, Node* r)            {                return l->_w < r->_w;            }        };        Heap<Node* ,Compare> minHeap;        for (size_t i = 0; i < n; i++)        {            if (a[i] != invalid )            {                minHeap.Push(new Node(a[i]));            }        }        //贪心算法        while (minHeap.Size()>1)        {            Node* left = minHeap.Top();            minHeap.Pop();            Node* right = minHeap.Top();            minHeap.Pop();            Node* parent = new Node(left->_w + right->_w);            parent->_left = left;            parent->_right = right;            left->_parent = parent;            right->_parent= parent;            minHeap.Push(parent);        }        _root = minHeap.Top();    }    Node*& GetRoot()    {        return _root;    }public:    void _Destory(Node* root)    {        if (root == NULL)        {            return;        }        _Destory(root->_left);        _Destory(root->_right);        delete root;    }protected:    Node* _root;};/*void TestHuffManTree(){    int a[] = { 0, 4, 2, 1, 3, 0};    int n = sizeof(a) / sizeof(a[0]);    HuffmanTree<int> t(a, n, 0);    cout << endl;}*/
//FileCompress.h#define _CRT_SECURE_NO_WARNINGS 1#include"heap.h"#include"HuffManTree.h"#include<string>#include<assert.h>#include<algorithm>typedef long long longtype;struct CharInfo{    longtype _count;//字符出现的次数    string _code;//字符的哈夫曼编码    char _ch;//字符    CharInfo(const longtype x=0)        :_count(x)    {}    bool operator!=(const CharInfo& info)    {        return _count != info._count;    }    CharInfo operator+(const CharInfo& info)    {    return CharInfo(_count + info._count);    }    bool operator <(const CharInfo& info)    {        return _count < info._count;    }};struct CountInfo{    char _ch;    longtype _count;    CountInfo()        :_count(0)    {}};class FileCompess{public:    FileCompess()    {        for (int i = 0; i < 256; i++)        {            _infos[i]._ch = i;            _infos[i]._count = 0;        }    }    void Compess(const char* filename)    {        //统计字符个数        assert(filename);        FILE* fout = fopen(filename, "rb");        assert(fout);        char ch = fgetc(fout);        while (!feof(fout))        {            _infos[(unsigned char)ch]._count++;            ch = fgetc(fout);        }    //构建哈夫曼树        CharInfo invalid;        invalid._count = 0;        HuffmanTree<CharInfo> tree(_infos, 256, invalid);        //获取哈夫曼编码        GetHuffmanCode(tree.GetRoot());        //压缩        string compressfile = filename;        compressfile += ".huffman";        FILE* fin = fopen(compressfile.c_str(), "wb");        assert(fin);        fseek(fout, 0, SEEK_SET);//将指针偏移到文件开始        ch = fgetc(fout);        int pos = 0;        char value = 0;        while (!feof(fout))        {            string& code = _infos[(unsigned char)ch]._code;            for (size_t i = 0; i < code.size(); i++)            {                value <<= 1;                if (code[i] == '1')                {                    value |= 1;                }                else                {                    value |= 0;                }                   pos++;                if (pos == 8)                {                    fputc(value, fin);                    pos = 0;                    value = 0;                }            }            ch = fgetc(fout);        }        if (pos)        {            value <<= (8 - pos);            fputc(value, fin);        }        //写配置文件为解压缩        string configfile = filename;        configfile += ".config";        FILE* fcon = fopen(configfile.c_str(), "wb");        assert(fcon);        CountInfo info;        for (size_t i = 0; i < 256; i++)        {            if (_infos[i]._count)            {                info._ch = _infos[i]._ch;                info._count = _infos[i]._count;                fwrite(&info, sizeof(info), 1, fcon);            }        }        CountInfo info2;        info2._count = -1;        fwrite(&info2, sizeof(info2), 1, fcon);         fclose(fout);        fclose(fin);        fclose(fcon);    }    void UnCompess(const char* filename)    {    //读配置文件        string configfile = filename;        configfile += ".config";        FILE* confile = fopen(configfile.c_str(), "rb");        CountInfo info;        while (1)        {            fread(&info, sizeof(info), 1, confile);            if (info._count == -1)            {                break;            }            _infos[(unsigned char)info._ch]._ch = info._ch;            _infos[(unsigned char)info._ch]._count = info._count;        }        string uncompressfile(filename);        size_t pos = uncompressfile.rfind('.');        assert(pos != string::npos);        uncompressfile = uncompressfile.substr(0,pos);        uncompressfile += ".unhaffman";        //还原的文件        FILE* fin = fopen(uncompressfile.c_str(), "wb");        assert(fin);        //压缩的文件        string file = filename;        file += ".huffman";        FILE* fout = fopen(file.c_str(), "rb");        assert(fout);        //重建哈夫曼树        CharInfo invalid;        invalid._count = 0;        HuffmanTree<CharInfo>tree(_infos, 256, invalid);        HuffmanTreeNode<CharInfo>* root = tree.GetRoot();        HuffmanTreeNode<CharInfo>* cur = root;        longtype count = root->_w._count;        char value = fgetc(fout);         while (!feof(fout))        {            int pos = 7;            char test = 1;            while (pos >= 0)            {                if (value & (test << pos))//找出读出字符的每一位                {                    cur = cur->_right;                }                else                {                    cur = cur->_left;                }                if (cur->_left == NULL&&cur->_right == NULL)                {                    fputc(cur->_w._ch, fin);                    cur = root;                    count--;                }                pos--;            }            if (count == 0)//循环从这退出,当压缩的时候,最后不够8位就会补0,count是字符的个数,当解压缩的count==0时,说明已解压缩完成,就不会把无效的字符解压说出来。            {                break;            }            value = fgetc(fout);                }        fclose(fout);        fclose(fin);    }protected:    //构建哈夫曼编码    void GetHuffmanCode(HuffmanTreeNode<CharInfo>* root)    {        if (root == NULL)        {            return;        }       if (root->_left == NULL&&root->_right == NULL)        {            HuffmanTreeNode<CharInfo>* cur = root;            HuffmanTreeNode<CharInfo>* parent = cur->_parent;            string& code = _infos[(unsigned char)root->_w._ch]._code;            while (parent)            {                if (cur == parent->_left)                {                    code += '0';                }                else                {                    code += '1';                }                cur = parent;                parent = cur->_parent;            }            reverse(code.begin(), code.end());        }       GetHuffmanCode(root->_left);       GetHuffmanCode(root->_right);    }protected:    CharInfo _infos[256];};void test(){    //TestHeap();    //TestHuffManTree();    FileCompess t;    t.Compess("x.jpg");    //cout << "压缩成功" << endl;    //t.Compess("xs.jpg");     //t.UnCompess("xs.jpg");    t.UnCompess("x.jpg");    cout << "解压成功" << endl;}

注:在做的时候哈夫曼树一定先要测试好,要不然就是你调试的一大坑。还右要用到的仿函数都要一一实现。记得一定要强转成unsigned char,否则程序会崩掉。这里我用的的是二进制读写文件,所以用feof来判断文件结束。写配置文件考虑到精度的丢失,用了结构体写进读出。

0 0
原创粉丝点击