用哈弗曼编码实现文件压缩和解压

来源:互联网 发布:超星软件打不开 编辑:程序博客网 时间:2024/06/06 03:11

     放假了把这个改一下,发现确实用单字节压缩的压缩率要高一些,暂时没去管为什么,不过放假静下心来写的话确实效率高很多。

    新版详见:http://blog.csdn.net/tookkke/article/details/50575103



今天脑洞大开突然想写一下,明明都要考试了,唉,怎么就管不住这手啊 

    总之呢,就是根据每种编码的出现频率把等长的编码换成变长的,据说理论上压缩比率是比较高的,可是为什么经检验我这个大部分时候压缩出来的比源文件还大呢?

    哈弗曼编码的时候要先做一颗字典树,查找的时候就按照当前一位是0还是1,找到叶子节点就找到了原编码。生成这颗树用一种贪心法,每次选两个出现频率最小的节点出来,连在一个新的节点上,再把该节点加入优先队列,它的频率是两个节点频率之和。

    我这里把整棵生成的树都保存在了新文件中,然后同时保留的是新文件中内容转换成的变长编码总长度(bit 为单位,不能保证一定是8的倍数),还有把原编码看成二个字节一组,共65536种状态,因为不能保证字节数一定是偶数,所以新文件又用了二个字节保存它,多出来的就直接添加到新文件末尾了。

这是压缩程序,得到的文件名是原名+.kcps(随便写写也没那么多注释了)

#include <cstdio>#include <cstring>#include <iostream>#include <algorithm>#include <queue>#define MAX_WORD (65536)#define MAX_BYTE (256)using namespace std;typedef unsigned char BYTE;typedef unsigned short WORD;typedef unsigned long DWORD;typedef unsigned long long ULL;ULL cnt[MAX_WORD*2];//number of each WORDWORD exBYTE=MAX_BYTE;//if exBYTE exists,it will less than MAX_BYTEULL bit_num;FILE *infp,*outfp;ULL kkke[MAX_WORD][2];int tree_size=MAX_WORD*2;struct the_tree{int son[2];}tree[MAX_WORD*2];//root is 1struct cmp{bool operator()(int a,int b){return cnt[a]>cnt[b];}};void open_file(){char filename[80],outfilename[80];printf("please input the original file: ");scanf("%s",filename);strcpy(outfilename,filename);strcat(outfilename,".kcps");if((infp=fopen(filename,"rb"))==NULL){printf("fail to open original file\n");exit(1);}if((outfp=fopen(outfilename,"wb"))==NULL){printf("fail to create new file\n");exit(1);}}void close_file(){if(fclose(infp)){printf("fail to close original file\n");exit(1);}if(fclose(outfp)){printf("fail to close new file\n");}}void read_data(){int len;WORD a;while(len=fread(&a,sizeof(BYTE),sizeof(WORD)/sizeof(BYTE),infp)){if(len==1){exBYTE=a%MAX_BYTE;break;}cnt[a+MAX_WORD]++;}}void build_tree(){priority_queue<int,vector<int>,cmp>q;for(int i=MAX_WORD+MAX_WORD-1;i>=MAX_WORD;i--)q.push(i);for(int i=MAX_WORD-1;i;i--){tree[i].son[0]=q.top();q.pop();tree[i].son[1]=q.top();q.pop();    cnt[i]=cnt[tree[i].son[0]]+cnt[tree[i].son[1]];    q.push(i);}}void dfs(int k,ULL a,int cnt){if(k>=MAX_WORD){k-=MAX_WORD;kkke[k][0]=a;kkke[k][1]=cnt;}else{dfs(tree[k].son[0],a<<1,cnt+1);dfs(tree[k].son[1],(a<<1)|1,cnt+1);}}/**********************//*****  tree      *****//*****  bit_num   *****//*****  cps       *****//*****  exBYTE    *****//**********************/void output(){for(int i=1;i<tree_size;i++)fwrite(&tree[i],sizeof(tree[i]),1,outfp);fseek(infp,0,SEEK_SET);WORD a;BYTE b=(BYTE)0;for(int i=0;i<MAX_WORD;i++)bit_num+=cnt[i+MAX_WORD]*kkke[i][1];fwrite(&bit_num,sizeof(bit_num),1,outfp);bit_num=0;while(fread(&a,sizeof(BYTE),sizeof(WORD)/sizeof(BYTE),infp)==sizeof(WORD)/sizeof(BYTE)){for(long long i=kkke[a][1]-1LL;i>=0LL;i--){if(kkke[a][0]&(1ULL<<i))b+=(BYTE)1<<(bit_num%8);bit_num++;if(bit_num%8==0){fwrite(&b,sizeof(b),1,outfp);b=0;}}}if(bit_num%8)fwrite(&b,sizeof(b),1,outfp);fwrite(&exBYTE,sizeof(exBYTE),1,outfp);}int main(){open_file();read_data();build_tree();dfs(1,0ULL,0);output();close_file();return 0;}

然后是解压程序,得到程序是(new)+原名,如果有.kcps就去掉

#include <cstdio>#include <cstring>#include <iostream>#include <algorithm>#include <queue>#define MAX_WORD (65536)#define MAX_BYTE (256)using namespace std;typedef unsigned char BYTE;typedef unsigned short WORD;typedef unsigned long DWORD;typedef unsigned long long ULL;ULL cnt[MAX_WORD*2];//number of each WORDWORD exBYTE=MAX_BYTE;//if exBYTE exists,it will less than MAX_BYTEULL bit_num;FILE *infp,*outfp;ULL kkke[MAX_WORD][2];int tree_size=MAX_WORD*2;struct the_tree{int son[2];}tree[MAX_WORD*2];//root is 1void open_file(){char filename[80],outfilename[80];printf("please input the compressed file: ");scanf("%s",filename);if((infp=fopen(filename,"rb"))==NULL){printf("fail to open compressed file\n");exit(1);}char *a=strstr(filename,".kcps");if(a!=NULL)*a='\0';strcpy(outfilename,"(new)");strcat(outfilename,filename);if((outfp=fopen(outfilename,"wb"))==NULL){printf("fail to create new file\n");exit(1);}}void close_file(){if(fclose(infp)){printf("fail to close compressed file\n");exit(1);}if(fclose(outfp)){printf("fail to close new file\n");}}/**********************//*****  tree      *****//*****  bit_num   *****//*****  cps       *****//*****  exBYTE    *****//**********************/void read_data(){    for(int i=1;i<tree_size;i++)fread(&tree[i],sizeof(tree[i]),1,infp);fread(&bit_num,sizeof(bit_num),1,infp);}void output(){BYTE a;WORD b;int now_node=1;for(int i=0;i<bit_num;i++){if(i%8==0)fread(&a,sizeof(a),1,infp);if(a&((BYTE)1<<(i%8))){    now_node=tree[now_node].son[1];}else{    now_node=tree[now_node].son[0];}if(now_node>=MAX_WORD){b=now_node-MAX_WORD;fwrite(&b,sizeof(b),1,outfp);now_node=1;}}fread(&exBYTE,sizeof(exBYTE),1,infp);if(exBYTE<MAX_BYTE)fwrite(&exBYTE,sizeof(exBYTE),1,outfp);}int main(){open_file();read_data();output();close_file();return 0;}


然后是怨念,为什么要考试了我却还没复习的心情啊啊啊啊啊啊啊啊,为什么这个压缩程序实际上还把源文件放大了啊啊啊啊啊啊啊,难道只能当成加密程序用了吗???

高兴的是其实有一次压缩吧10000K压成了9000K,还是挺开心的,虽然只有一次...



2 1
原创粉丝点击