文件压缩

来源：互联网发布：android图案解锁源码编辑：程序博客网时间：2024/06/18 04:01

文件压缩原理：
1. 建立最小堆
2. 利用最小堆的数据特性建立huffman树结构
3. 统计要压缩的文件中字符出现次数
4. 以字符出现次数为树的权重建立哈夫曼树
5. 遍历huffman树获取字符编码和行数（真正要存储的数据）
6. 创建一个文件（test.compress）存储压缩数据（行数、配置信息）
7. 将字符编码（用字符在huffman树从根节点到叶子结点的路径编码）以位图方式存储在test.compress文件中
文件解压原理：
1. 根据压缩文件test.compress内的配置信息重建huffman树
2. 根据行数跳过配置信息，读取压缩后的数据本体
3. 根据压缩位图中的二进制信息遍历重建的huffman树
4. 建立解压后的文件test.uncompress文件存储遍历得到的字符
这里写图片描述

源码：
heap.h

#pragma once#include <iostream>using namespace std;#include <vector>#include <assert.h>template<class T>struct Less{    bool operator()(const T& left,const T& right)    {        return left<right;    }};template<class T>struct Great{    bool operator()(const T& left,const T& right)    {        return left>right;    }};template<class T,class Compare=Less>class Heap{public:    Heap()        :_heap(NULL)    {}    Heap(const T* array,size_t size)        :_heap(NULL)    {        _heap.resize(size);        for(size_t i=0; i<size; i++)        {            _heap[i]=array[i];        }        for (int root=(size-2)/2; root>0; root--)        {            BuildDown(root);        }    }    void Push(const T& data)    {        _heap.push_back(data);        for (int root=(_heap.size()-1)/2; root>=0; root--)        {            BuildDown(root);        }    }    void Pop()    {        assert(!_heap.empty());        size_t size=_heap.size()-1;        swap(_heap[size],_heap[0]);        _heap.pop_back();        for (int root=_heap.size(); root>=0; root--)        {            BuildDown(root);        }    }    bool Empty()    {        return _heap.empty();    }    size_t Size()    {        return _heap.size();    }    T Top()    {        return _heap[0];    }private:    void BuildDown(size_t root)    {        size_t parent=root;//倒数第一个非叶子节点        size_t size=_heap.size();        size_t child=root*2+1;        while (child<size)        {            if ((child+1<size)&&Compare()(_heap[child+1],_heap[child]))//右孩子存在情况下于左孩子比较找出最小值            {                child=child+1;            }            if (Compare()(_heap[child],_heap[parent]))//若小于父母节点则交换            {                swap(_heap[child],_heap[parent]);                parent=child;                child=parent*2+1;            }            else                 return;        }    }private:    vector<T> _heap;};void test(){    int arr[]={12,18,24,3,32,74,5};    size_t size=sizeof(arr)/sizeof(arr[0]);    Heap<int,Less<int>> h(arr,sizeof(arr)/sizeof(arr[0]));    h.Push(1);    h.Pop();}

HuffmanTree.hpp

#include "heap.hpp"template<class T>struct HuffmanNode{    HuffmanNode(const T& weight)        :_Lchild(NULL)        ,_Rchild(NULL)        ,_weight(weight)    {}    //HuffmanNode* _Parent;    HuffmanNode* _Lchild;    HuffmanNode* _Rchild;    T _weight;};template<class T>class Tree{    typedef HuffmanNode<T> Node;public:    Tree()    {}    Tree(T* array,size_t size,const T& invalid)    {        CreatHuffmanTree(array,size,invalid);        //invalid防止只有一个节点的情况    }    ~Tree()    {        Destory(_pRoot);    }    Node* Root()    {        return _pRoot;    }private:    void CreatHuffmanTree(T* array,size_t size,const T& invalid)    {        struct Compare//重载比较器        {            bool operator()(const Node* left,const Node* right)            {                return left->_weight<right->_weight;                    }        };        Heap<Node*,Compare> h;        for(size_t i=0; i<size; i++)        {                if (array[i] != invalid)                {                    Node* pRoot=new Node(array[i]);                    h.Push(pRoot);                }        }        if (h.Size())        {            while (h.Size()>1)            {                Node *Left=h.Top();                h.Pop();                Node *Right=h.Top();                h.Pop();                Node* Parent=new Node(Left->_weight+Right->_weight);                Parent->_Lchild=Left;                Parent->_Rchild=Right;                h.Push(Parent);            }            _pRoot=h.Top();        }        else            return ;        //int tmp=(int)_pRoot->_weight._ch;        //int tmp1=(int)_pRoot->_Lchild->_weight._ch;        //int tmp2=(int)_pRoot->_Rchild->_weight._ch;    }    void Destory(Node* _pRoot)    {        if (_pRoot)        {            Destory(_pRoot->_Lchild);            Destory(_pRoot->_Rchild);            delete _pRoot;            _pRoot=NULL;        }    }private:    Node* _pRoot;};

file_compress.h

#define  _CRT_SECURE_NO_WARNINGS#include "HuffmanTree.hpp"#include <string>#include <windows.h>#include <cstdlib>struct File_Info{    unsigned char _ch;//字符    long long _count;//出现次数    string _code;//字符编码    File_Info(const long long& count=0)        :_count(count)    {}    File_Info(const char ch)        :_ch(ch)    {}    File_Info operator+(const File_Info& f)const    {        return _count+f._count;    }    bool operator<(const File_Info& f)const    {        return _count<f._count;    }    bool operator!=(const File_Info& f)const    {        return _count!=f._count;    }};class FileCompress{    typedef HuffmanNode<File_Info> Node;public:    void Compress(const char* filename)//文件压缩    {        FILE* pfr=fopen(filename,"r");        if (pfr==NULL)        {            perror("fail to open the file");        }        //统计文件中字符出现次数        for (int i=0; i<256; i++)        {            _Info[i]._ch = i;//所有字符种类        }        int ch = fgetc(pfr);        while (ch!=EOF)        {            _Info[ch]._count++;            ch=fgetc(pfr);        }        fclose(pfr);//重置文件指针        FILE* fpr=fopen(filename,"r");        Tree<File_Info> tree(_Info, 256, File_Info());//以File_Info结构体为树的权重建立哈夫曼树        Node* root=tree.Root();        string code;        int line=0;        _GetTreeNode(root,code,line);//获取字符编码和行号        //添加文件后缀,并重置文件指针        fseek(pfr,0,SEEK_SET);        string write(filename);        write=write+".compress";//存放压缩后数据        FILE* pfw=fopen(write.c_str(),"w");        fputs(write.c_str(),pfw);//写入文件名        fputc('\n',pfw);        char Line[4];//存储行数        _itoa(line,Line,10);//转化行数为十进制字符放入文件        fputs(Line,pfw);        fputc('\n',pfw);        for(int i=0; i<256; i++)//写入文件配置信息        {            if(_Info[i]._count)//排除没有出现的字符            {                fputc(_Info[i]._ch,pfw);                char arr[126];                _itoa(_Info[i]._count,arr,10);//转化_count为十进制字符放入文件                fputc(' ',pfw);                fputs(arr,pfw);//              fputc(':',pfw);//              fputs(_Info[i]._code.c_str(),pfw);                fputc('\n',pfw);            }        }        unsigned char data = 0;        int pos=7;        ch=fgetc(pfr);        while (ch!=EOF)//写入编码信息        {            const char*ptr=_Info[ch]._code.c_str();            while (*ptr)            {                if (pos>=0)                {                    data=data | ((*ptr-'0')<<pos);                    pos--;                    ptr++;                }                else if(pos<0)                {                    fputc(data,pfw);//读满8位写入                    pos=7;                    data=0;                }            }            ch=fgetc(pfr);//读取下一个字符        }        //最后一个字符不管有没有写满都要放进去        fputc(data,pfw);        //fputc(EOF,pfw);//写入文件结束符        fputs("\r\n",pfw);        fclose(pfr);        fclose(pfw);        cout<<"压缩完成"<<endl;    }    //文件解压    void Extract(const char* filename)    {        assert(filename);        FILE* pfr=fopen(filename,"rb");        File_Info _Info[256];        string code;        string write(filename);        _GetLine(write.c_str(),_Info,code);//重写树节点        int index=write.rfind(".",write.size());        if (write.substr(index+1,write.size())!="Compress")//检查文件后缀是否正确        {            return ;        }        write=write.substr(0, index);//除去文件后缀        write=write+".UnCompress";        FILE* pfw=fopen(write.c_str(),"wb");//添加新的后缀并打开解压文件        Tree<File_Info> newtree(_Info,256,File_Info());        HuffmanNode<File_Info> *root=newtree.Root();        HuffmanNode<File_Info> *cur=root;        if (root==NULL)        {            return ;        }        long long charcount=root->_weight._count;//字符总数，控制循环条件        int pos=8;        const char* ch=code.c_str();//压缩后的数据        while (charcount)//解压        {            --pos;            unsigned char v=1;            int tmp=((*ch)&(v<<pos));            if (tmp)            {                cur=cur->_Rchild;            }            else if (tmp==0)            {                cur=cur->_Lchild;            }            if (cur->_Lchild==NULL && cur->_Rchild==NULL)            {                fputc(cur->_weight._ch,pfw);//找到编码对应叶节点的字符信息_ch                cur=root;                if (--charcount==0)//总字符数                {                    break;                }                           }            if (pos==0)            {                pos=8;                ch++;            }        }        fclose(pfr);        fclose(pfw);        cout<<"解压结束"<<endl;    }protected:    void _GetTreeNode(Node* root, string code,int& line)//获取字符编码和行数    {        if (root==NULL)            return;        _GetTreeNode(root->_Lchild, code+'0',line);        _GetTreeNode(root->_Rchild, code+'1',line);        if (root->_Lchild==NULL && root->_Rchild==NULL)        {            _Info[root->_weight._ch]._code=code.c_str();            if (root->_weight._count!=0)            {                line++;//行数为叶节点个数            }        }    }    void _GetLine(string filename,File_Info* _Info,string& code)    {        FILE* pfr=fopen(filename.c_str(),"r");        unsigned char ch=fgetc(pfr);        while(ch!='\n')//最后一位'\n'已被读取        {            ch=fgetc(pfr);        }        string Line;//存储行数        for (int i=0; i<4;i++)//行数最位4多        {            ch=fgetc(pfr);//读行数            if (ch=='\n')                break;            Line.push_back(ch);        }        int line=atoi(Line.c_str());        //将行数转化为整型        while (line--)        {            string buf;     //读取每一行信息            char ch=fgetc(pfr);            while (ch!='\n')            {                buf.push_back(ch);                ch=fgetc(pfr);            }            //建立叶节点信息            int index=buf.rfind(" ",buf.size());    //找到space对应下标            unsigned char c=buf[0];                 //第一个字符为_ch            _Info[c]._ch=c;            string str=buf.substr(index,buf.size());//space之后为出现次数_count            int count=atoi(str.c_str());            _Info[c]._count=count;        }        unsigned char h=fgetc(pfr);//读取压缩后的数据本体        while (h!=EOF)        {            code.push_back(h);            h=fgetc(pfr);        }        //char *rdbuf=new char[1024];        //int rdsize=fread(rdbuf,1,1024,pfr);        //while (rdsize!=0)        //{        //  break;        //  code+=fread(rdbuf,1,1024,pfr);        //}        //const unsigned char *cur=(const unsigned char *)code.c_str();        fclose(pfr);    }private:    File_Info _Info[256];};

阅读全文

1 0