【c++】Huffman实现文件压缩

来源:互联网 发布:3dmax2012数据错误 编辑:程序博客网 时间:2024/06/05 02:48

1.需求分析

利用小堆,huffman编码,文件流操作,二进制文件的读写实现对普通文件的压缩和解压过程。



2.能力要求

A.熟悉对文件的读写操作。

B.熟悉小堆的原理。

C.熟悉HuffmanTree的实现原理、

D.会编码的获取。

E.对编码信息处理和存储。

F.最重要一点,要理解压缩和解压缩的过程。


3.模块介绍。

A设计的存储结构。.

B.压缩过程.

C.解压缩过程。

D.编码的获取。.

E.HuffmanTree的实现。

F.小堆的实现。

G.读文件的操作。


4.压缩和解压缩的原理。

压缩过程:利用数据结构,对文件里面的字符进行统计。以字符出现的频率构建小堆,再利用小堆,构建HuffmanTree。以HuffmanTree左右孩子遍历,左为0,右为1.将统计出来的结果存放到自己设计的结构中。在通过对字符编码进行位进制压缩,可以实现压缩过程。

 

解压缩过程:利用字符和统计次数写配置文件,解压缩就可以通过配置文件建立一颗HuffmanTree,在根据压缩文件内容遍历HuffmanTree,叶子节点即为原来的一个字符。


5.模块具体分析。

A设计的存储结构。


typedef long longtype;
struct FileInfo
{
unsigned char _ch;           //字符
longtype _count;                 //计数器
string _code;               //Huffman编码

}


B.压缩过程.

1.先把文件内容读进来,并进行字符统计。

2.统计字符出现字数,存入_count。

3.依据每个节点的_count值,构建相应的huffman树。

4.将编码压缩,存入文件。

5.把字符和出现次数按照(字符,次数)的形式,每行的保存到配置文件里。


C.解压缩过程。


1.读取配置文件里的内容,构建HuffmanTree。

2.根据压缩文件,和重建的huffman树解压.

3.根据压缩文件字符编码解压文件.


D.编码的获取


void huffmancode(HuffManNode<FileInfo>* root, string& code)


E.HuffmanTree的实现。

struct HuffManNode
{
HuffManNode<T> *_parent;
HuffManNode<T> *_right;
HuffManNode<T> *_left;


T _weight;

}


class HuffManTree
{
public:
typedef HuffManNode<T> Node;

//仿函数
template<class T>
struct Compare
{
bool operator()(Node*& L, Node*& R)
{
return L->_weight < R->_weight;
}
};


private:
Node* _root;



F.小堆的实现。


Heap(const T* array, size_t size)
{
_array.reserve(size);
for (size_t i = 0; i < size; ++i)
{
_array.push_back(array[i]);
}


for (int begin = _array.size() / 2 - 1;
begin >= 0; begin--)
{
_AdjustDown(begin);
}
}

G.读文件的操作。


bool readline(FILE* str, string& code)



小堆代码:

#pragma once#include <vector>#include <algorithm>template<class T>class Less{public:bool operator()(const T& l, const T& r){return l < r;}};template<class T>class Greater{public:bool operator()(const T& l, const T& r){return l > r;}};// 小堆//template<class T, class Compare>template<class T, template<class> class Compare = Less>class Heap{public:Heap(){}Heap(const T* array, size_t size){_array.reserve(size);for (size_t i = 0; i < size; ++i){_array.push_back(array[i]);}for (int begin = _array.size() / 2 - 1;begin >= 0; begin--){_AdjustDown(begin);}}Heap(vector<T>& array){_array.swap(array);for (int begin = _array.size() / 2 - 1;begin >= 0; begin--){_AdjustDown(begin);}}void Push(const T& x){_array.push_back(x);//?_AdjustUp(_array.size() - 1);}void Pop(){_array[0] = _array[_array.size() - 1];_array.pop_back();_AdjustDown(0);}bool Empty(){return _array.empty();}const T& Top(){return _array[0];}protected:void _AdjustDown(int root)//size{int left = root * 2 + 1;int right = left + 1;// 左孩子已不存在,则root为叶子节点,不用再调整while (left < _array.size()){// 找左右孩子里面小的那个int key = left;if (right < _array.size()&& Compare<T>()(_array[right], _array[left])){key = right;}// 如果min小则跟根节点交换//Compare<T> com;//if (_array[min] < _array[root])if (Compare<T>()(_array[key], _array[root])){swap(_array[key], _array[root]);root = key;left = 2 * root + 1;right = left + 1;}else{break;}}}void _AdjustUp(int child){while (1){int root = (child - 1) / 2;//if (_array[root] > _array[child])if (Compare<T>()(_array[root], _array[child])){swap(_array[root], _array[child]);child = root;}else{break;}if (root == 0)break;}}private:vector<T> _array;};

HuffmanTree代码:

#pragma once#include"heap.h"template <class T>struct HuffManNode{HuffManNode<T> *_parent;HuffManNode<T> *_right;HuffManNode<T> *_left;T _weight;HuffManNode(T weight): _parent(NULL), _right(NULL), _left(NULL), _weight(weight){}};template <class T>class HuffManTree{public:typedef HuffManNode<T> Node;//仿函数template<class T>struct Compare{bool operator()(Node*& L, Node*& R){return L->_weight < R->_weight;}};public:void CreatHuffmanTree(const T* array, size_t size, const T& invalid){_CreatHuffmanTree(array, size, invalid);}HuffManTree():_root(NULL){}~HuffManTree(){_Destory(_root);}Node* GetRoot(){return _root;}protected:void _CreatHuffmanTree(const T* array, size_t size, const T& invalid){//将数据存到最小堆中(构建结点)Heap<Node*, Compare<T>> H;for (int i = 0; i < size; i++){if (array[i] != invalid){Node* node = new Node(array[i]);H.Push(node);}}//取堆中的最小两个值,作为节点构建Huffman树。Node* parent = H.Top();while (H.Size() > 1){Node* left = H.Top();H.Pop();Node* right = H.Top();H.Pop();parent = new Node(left->_weight + right->_weight);//构建父节点parent->_left = left;parent->_right = right;left->_parent = parent;right->_parent = parent;H.Push(parent);}_root = parent;}bool _Destory(Node*& root){if (root){_Destory(root->_left);_Destory(root->_right);delete root;root = NULL;}return true;}private:Node* _root;};

文件压缩实现:

#pragma once//#include"HuffMan.h"//#include<string>//#include<assert.h>typedef long longtype;struct FileInfo{unsigned char _ch;           //字符longtype _count;                 //计数器string _code;               //Huffman编码FileInfo(longtype count = 0):_ch(0), _count(count), _code(NULL){}FileInfo operator+(const FileInfo& info) const{FileInfo tmp;tmp._count = _count + info._count;return tmp;}bool operator!=(const FileInfo& info) const{return _count != info._count;}bool operator<(const FileInfo& info) const{return _count < info._count;}};ostream& operator<<(ostream& os, const FileInfo& info){os << info._ch << ":" << info._count;return os;}class FileCompress{public:FileCompress(){cout << "aaaaa" << endl;/*for (int i = 0; i < 256; ++i){_info[i]._ch = i;_info[i]._count = 0;}*/}void huffmancode(HuffManNode<FileInfo>* root, string& code){if (root == NULL){return;// root->_right;}huffmancode(root->_left, code + '0');huffmancode(root->_right, code + '1');if (root->_left == NULL&&root->_right == NULL){_info[root->_weight._ch]._code = code;   //生成并保存huffman编码}code.empty();//清空上次所存数据,为下次做准备}//文件压缩void filecompress(const char* filename){//1.先把文件内容读进来,并进行字符统计FILE* read = fopen(filename, "rb");if (read == NULL){return;}char ch = fgetc(read);//2.统计字符出现字数,存入_countwhile (ch != EOF){_info[(unsigned char)ch]._count++;//ch = fgetc(read);}//3.依据每个节点的_count值,构建相应的huffman树HuffManTree<FileInfo> tree;FileInfo invalid(0);tree.CreatHuffmanTree(_info, 256, invalid);string code;huffmancode(tree.GetRoot(), code);//4.将编码压缩,存入文件string compressfile = filename;compressfile += ".compress";FILE* fout = fopen(compressfile.c_str(), "wb");assert(fout);fseek(fout, 0, SEEK_SET);//fout文件指针,0位数,seek_set表示文件开头int index = 0;unsigned char comch = 0;ch = fgetc(read);while (ch != EOF){string& code = _info[(unsigned char)ch]._code;//位进制压缩for (int i = 0; i < 256; i++){comch << 1;if (code[i] == '1'){comch |= 1;}if (++index == 8){fputc(comch, fout);index = 0;comch = 0;}}ch = fgetc(read);}if (index != 0){comch << (8 - index);fputc(comch, fout);}//5.把字符和出现次数按照(字符,次数)的形式,每行的保存到配置文件里string configfile = filename;configfile = ".config";FILE* config = fopen(configfile.c_str(), "w");assert(config);string info;char count[20];for (int i = 0; i < 256; i++){info.clear();if (_info[i] != invalid){info = _info[i]._ch;info += ',';itoa(_info[ch]._count, count, 10);info += count;info += '\n';fputs(info.c_str(), config);}}fclose(read);fclose(fout);//压缩fclose(config);//配置}void uncompressfile(const char* filename){//读取配置文件里的内容string unfile = filename;unfile += ".comm";FILE* file = fopen(unfile.c_str(), "rb");assert(file);string code;char ch = 0;while (readline(file, code)){//若读到空行,则为‘\0’if (!code.empty()){ch = code[0];_info[(unsigned char)ch]._count = atoi(code.substr(2).c_str());code.clear();}else{code = '\n';}}//根据配置文件构建huffman树HuffManTree<FileInfo> tree;FileInfo invalid(0);tree.CreatHuffmanTree(_info, 256, invalid);HuffManNode<FileInfo>* root = tree.GetRoot();//根据压缩文件,和重建的huffman树解压string  uncompress = filename;uncompress += ".uncompress";FILE* fin = fopen(uncompress.c_str(), "wb");assert(fin);string  compress = filename;compress += ".compress";FILE* fout = fopen(compress.c_str(), "rb");assert(fout);//根据压缩文件字符编码解压文件HuffManNode<FileInfo>* str = root;int pos = 8;ch = fgetc(fin);assert(ch);longtype count = root->_weight._count;while (1){if (pos == 0){pos = 8;ch = fgetc(fin);}--pos;if (ch & 1 << pos)str = str->_right;elsestr = str->_left;if (str->_left == NULL && str->_right == NULL){fputc(str->_weight._ch, fout);str = root;if (--count == 0)break;}}fclose(fout);fclose(fin);}bool readline(FILE* str, string& code){assert(str);char ch = fgetc(str);if (ch == EOF){return false;}while (ch != '\n'&&ch != EOF){code += ch;ch = fgetc(str);}return true;}protected:FileInfo _info[256];};


0 0
原创粉丝点击