小项目-文件压缩(哈夫曼树)
来源:互联网 发布:虚拟股票操作软件 编辑:程序博客网 时间:2024/06/16 18:46
先回顾一下哈夫曼树
huffman树即最优二叉树,是加权路径长度最短的二叉树。哈夫曼树的树使用贪心算法。
每次选择该集合中权值最小的两个作为叶子结点,父亲节点的权值为叶子节点权值之和。然后又将其父亲重新放进此集合里。重复前面的做法,直到完成哈夫曼树的建立。
每次都要在集合中找出2个权值最小的。这里我们就可以建立一个小堆,每次找出最小的时候只需要向上调整堆就行了。
那么文件哈夫曼树怎么实现文件压缩的呢。
1.统计文件中字符出现的个数
2.每个字符的个数作为权值构建哈夫曼树,这样每个字符对应的权值为叶子结点,然后获取每个叶子节点的哈夫曼编码。
3.进行压缩,哈夫曼编码满8位写进压缩文件。
4.解压缩。
代码实现
//heap.h#define _CRT_SECURE_NO_WARNINGS 1#pragma once#include <iostream>#include<vector>using namespace std;template<class T>struct Less{ bool operator()(const T& l, const T& r) { return l < r; }};template<class T,class Compare=Less<T>>class Heap{public: Heap() {} Heap(T* a, size_t n) { _a.reserve(n); for (size_t i = 0; i < n; i++) { _a.push_back(a[i]); } for (int i = (_a.size() - 2) / 2; i >= 0; i--) { _AdjustDown(i); } } void Push(const T& x) { _a.push_back(x); _AdjustUp(_a.size() - 1); } void Pop() { swap(_a[0], _a[_a.size() - 1]); _a.pop_back(); _AdjustDown(0); } T& Top() { return _a[0]; } size_t Size() { return _a.size(); }protected: void _AdjustDown(int root) { Compare compare; int parent = root; int child = parent * 2 + 1; while (child < (int)_a.size()) { if (child <(int) _a.size() - 1 && compare(_a[child + 1] ,_a[child])) { child++; } if (compare(_a[child] ,_a[parent])) { swap(_a[parent], _a[child]); parent = child; child = parent * 2 + 1; } else { break; } } } void _AdjustUp(int root) { Compare compare; int child = root; int parent = (child - 1) >> 1; while (child > 0) { if (compare(_a[child], _a[parent])) { swap(_a[parent], _a[child]); child = parent; parent = (child - 1) >> 1; } else { break; } } }protected: vector<T> _a;};void TestHeap(){ int a[] = { 1, 3, 9, 3, 54, 87, 21, 15, 8 }; int n = sizeof(a) / sizeof(a[0]); Heap<int> hp(a, n); hp.Push(2);}
//huffman.h#define _CRT_SECURE_NO_WARNINGS 1#pragma once#include"heap.h"template<class T>struct HuffmanTreeNode{ T _w; HuffmanTreeNode<T>* _left; HuffmanTreeNode<T>* _right; HuffmanTreeNode<T>* _parent; HuffmanTreeNode(const T& x) :_w(x) , _left(NULL) , _right(NULL) , _parent(NULL) {}};template<class T>class HuffmanTree{ typedef HuffmanTreeNode<T> Node;public: HuffmanTree() :_root(NULL) {} ~HuffmanTree() { _Destory(_root); _root = NULL; } HuffmanTree(T* a, size_t n,const T& invalid) { struct Compare { bool operator()(Node* l, Node* r) { return l->_w < r->_w; } }; Heap<Node* ,Compare> minHeap; for (size_t i = 0; i < n; i++) { if (a[i] != invalid ) { minHeap.Push(new Node(a[i])); } } //贪心算法 while (minHeap.Size()>1) { Node* left = minHeap.Top(); minHeap.Pop(); Node* right = minHeap.Top(); minHeap.Pop(); Node* parent = new Node(left->_w + right->_w); parent->_left = left; parent->_right = right; left->_parent = parent; right->_parent= parent; minHeap.Push(parent); } _root = minHeap.Top(); } Node*& GetRoot() { return _root; }public: void _Destory(Node* root) { if (root == NULL) { return; } _Destory(root->_left); _Destory(root->_right); delete root; }protected: Node* _root;};/*void TestHuffManTree(){ int a[] = { 0, 4, 2, 1, 3, 0}; int n = sizeof(a) / sizeof(a[0]); HuffmanTree<int> t(a, n, 0); cout << endl;}*/
//FileCompress.h#define _CRT_SECURE_NO_WARNINGS 1#include"heap.h"#include"HuffManTree.h"#include<string>#include<assert.h>#include<algorithm>typedef long long longtype;struct CharInfo{ longtype _count;//字符出现的次数 string _code;//字符的哈夫曼编码 char _ch;//字符 CharInfo(const longtype x=0) :_count(x) {} bool operator!=(const CharInfo& info) { return _count != info._count; } CharInfo operator+(const CharInfo& info) { return CharInfo(_count + info._count); } bool operator <(const CharInfo& info) { return _count < info._count; }};struct CountInfo{ char _ch; longtype _count; CountInfo() :_count(0) {}};class FileCompess{public: FileCompess() { for (int i = 0; i < 256; i++) { _infos[i]._ch = i; _infos[i]._count = 0; } } void Compess(const char* filename) { //统计字符个数 assert(filename); FILE* fout = fopen(filename, "rb"); assert(fout); char ch = fgetc(fout); while (!feof(fout)) { _infos[(unsigned char)ch]._count++; ch = fgetc(fout); } //构建哈夫曼树 CharInfo invalid; invalid._count = 0; HuffmanTree<CharInfo> tree(_infos, 256, invalid); //获取哈夫曼编码 GetHuffmanCode(tree.GetRoot()); //压缩 string compressfile = filename; compressfile += ".huffman"; FILE* fin = fopen(compressfile.c_str(), "wb"); assert(fin); fseek(fout, 0, SEEK_SET);//将指针偏移到文件开始 ch = fgetc(fout); int pos = 0; char value = 0; while (!feof(fout)) { string& code = _infos[(unsigned char)ch]._code; for (size_t i = 0; i < code.size(); i++) { value <<= 1; if (code[i] == '1') { value |= 1; } else { value |= 0; } pos++; if (pos == 8) { fputc(value, fin); pos = 0; value = 0; } } ch = fgetc(fout); } if (pos) { value <<= (8 - pos); fputc(value, fin); } //写配置文件为解压缩 string configfile = filename; configfile += ".config"; FILE* fcon = fopen(configfile.c_str(), "wb"); assert(fcon); CountInfo info; for (size_t i = 0; i < 256; i++) { if (_infos[i]._count) { info._ch = _infos[i]._ch; info._count = _infos[i]._count; fwrite(&info, sizeof(info), 1, fcon); } } CountInfo info2; info2._count = -1; fwrite(&info2, sizeof(info2), 1, fcon); fclose(fout); fclose(fin); fclose(fcon); } void UnCompess(const char* filename) { //读配置文件 string configfile = filename; configfile += ".config"; FILE* confile = fopen(configfile.c_str(), "rb"); CountInfo info; while (1) { fread(&info, sizeof(info), 1, confile); if (info._count == -1) { break; } _infos[(unsigned char)info._ch]._ch = info._ch; _infos[(unsigned char)info._ch]._count = info._count; } string uncompressfile(filename); size_t pos = uncompressfile.rfind('.'); assert(pos != string::npos); uncompressfile = uncompressfile.substr(0,pos); uncompressfile += ".unhaffman"; //还原的文件 FILE* fin = fopen(uncompressfile.c_str(), "wb"); assert(fin); //压缩的文件 string file = filename; file += ".huffman"; FILE* fout = fopen(file.c_str(), "rb"); assert(fout); //重建哈夫曼树 CharInfo invalid; invalid._count = 0; HuffmanTree<CharInfo>tree(_infos, 256, invalid); HuffmanTreeNode<CharInfo>* root = tree.GetRoot(); HuffmanTreeNode<CharInfo>* cur = root; longtype count = root->_w._count; char value = fgetc(fout); while (!feof(fout)) { int pos = 7; char test = 1; while (pos >= 0) { if (value & (test << pos))//找出读出字符的每一位 { cur = cur->_right; } else { cur = cur->_left; } if (cur->_left == NULL&&cur->_right == NULL) { fputc(cur->_w._ch, fin); cur = root; count--; } pos--; } if (count == 0)//循环从这退出,当压缩的时候,最后不够8位就会补0,count是字符的个数,当解压缩的count==0时,说明已解压缩完成,就不会把无效的字符解压说出来。 { break; } value = fgetc(fout); } fclose(fout); fclose(fin); }protected: //构建哈夫曼编码 void GetHuffmanCode(HuffmanTreeNode<CharInfo>* root) { if (root == NULL) { return; } if (root->_left == NULL&&root->_right == NULL) { HuffmanTreeNode<CharInfo>* cur = root; HuffmanTreeNode<CharInfo>* parent = cur->_parent; string& code = _infos[(unsigned char)root->_w._ch]._code; while (parent) { if (cur == parent->_left) { code += '0'; } else { code += '1'; } cur = parent; parent = cur->_parent; } reverse(code.begin(), code.end()); } GetHuffmanCode(root->_left); GetHuffmanCode(root->_right); }protected: CharInfo _infos[256];};void test(){ //TestHeap(); //TestHuffManTree(); FileCompess t; t.Compess("x.jpg"); //cout << "压缩成功" << endl; //t.Compess("xs.jpg"); //t.UnCompess("xs.jpg"); t.UnCompess("x.jpg"); cout << "解压成功" << endl;}
注:在做的时候哈夫曼树一定先要测试好,要不然就是你调试的一大坑。还右要用到的仿函数都要一一实现。记得一定要强转成unsigned char,否则程序会崩掉。这里我用的的是二进制读写文件,所以用feof来判断文件结束。写配置文件考虑到精度的丢失,用了结构体写进读出。
0 0
- 小项目-文件压缩(哈夫曼树)
- 文件压缩(小项目)
- Java小程序之哈夫曼树与文件压缩和解压缩(二)文件压缩篇
- Java小程序之哈夫曼树与文件压缩和解压缩(三)文件解压篇
- 【小项目】用Huffman树实现文件压缩并解压
- Java小程序之哈夫曼树与文件压缩和解压缩(一)哈夫曼树构造篇
- 【项目】哈夫曼树的应用:文件压缩
- 文件压缩项目
- 【数据结构】文件压缩项目
- [项目]文件压缩
- 项目:文件压缩与解压
- 【项目】HuffMan编码--文件压缩
- 项目:文件压缩及解压缩
- 文件压缩(哈夫曼树实现)
- 项目 - 小玩文件
- hive压缩之小文件合并
- hive压缩之小文件合并
- 文件压缩(压缩单个文件)
- 【leetcode】对撞指针应用之回文字符串判断(忽略大小写,以及出数字外其他字符)
- FastDFS
- 双系统之Ubuntu16.04的安装(适用于w10,w8,w7)
- vue2.0 开发实践总结之入门篇
- 【python图像处理】直线和曲线的拟合与绘制(curve_fit()详解)
- 小项目-文件压缩(哈夫曼树)
- Elasticsearch修改mapping
- Mybatis foreach 的3中遍历方式
- Disruptor深入解读
- mybatis思维导图,让mybatis不再难懂(二)
- WSAEventSelect模型详解
- HTTP协议详解(真的很经典)
- OpenCV Error: Insufficient memory
- nginx启动,停止