中文压缩和解码程序设计与实现(huffman)
来源:互联网 发布:js toggle 切换状态 编辑:程序博客网 时间:2024/06/13 10:05
本项目是利用huffman算法进行中文压缩和解码的设计与实现,huffman算法被证明是最优的结构,可以用于数据压缩
源码
/****************************************************************************************this function is about to compress a file with chinese code**by using huffman method**NEWPLAN @ UESTC 2014.5****************************************************************************************/#include <stdio.h>#include <ctype.h>#include <stdlib.h>#include <string.h>#include <assert.h>#define SUCCESS1#define FAILURE 0#define DICTIONRAY0#define INDEX 1/*used to calculate the frequence of character has shown*******/int array_char[256]= {0};unsigned char amounts=0;/*******define a strcuture to hold the tree node***************/#pragma pack(1)typedef struct huff{ struct huff *lchild, *rchild, *parent; int wight; unsigned char code;} HUFF_NODE,*HUFF_NODE_PTR;#pragma pack()/*HEAD of tree*/HUFF_NODE_PTR HUFF_HEADER=NULL;/*dictionary of characters maybe not been used all just for convenient*/char* HUFF_DICTIONARY[256]= {0};HUFF_NODE_PTR huff_pt[256];void help(void);void version(void);void output(char*);void initilize(void);int claculate(FILE*);int create_dictiony(void);void compress(const char*);int file_write(char*,FILE*);void write_dictionary(char*);FILE* file_open(const char*);int create_tree(HUFF_NODE_PTR* );void decompress_func(const char*);void decode (HUFF_NODE_PTR,FILE*);HUFF_NODE_PTR tree_from_dic(char**);HUFF_NODE_PTR huff_malloc(unsigned char i);void swap_func(unsigned char *,unsigned char*);HUFF_NODE_PTR adjusty(HUFF_NODE_PTR code[] ,size_t);//********************************************************// Method: main// FullName: main// Access: public// Returns: int// Qualifier: the entry function// Parameter: int argc the parameter count// Parameter: char const * argv[] array of hold parameter//********************************************************int main(int argc, char const *argv[]){ initilize(); if(argc<2) { printf("can`t be empty file or operation\n"); return FAILURE; } // if((file_index=file_open(argv[argc-1]))==NULL) if (!stricmp(argv[1],"--help")) { help(); return SUCCESS; } else if (!stricmp(argv[1],"--version")) { version(); return SUCCESS; } else if (!stricmp(argv[1],"-c")) { compress(argv[2]); } else if(!stricmp(argv[1],"-z")) { decompress_func(argv[2]); } else help(); return SUCCESS;}//************************************// Method: initilize// FullName: initilize// Access: public// Returns: void// Qualifier: inites// Parameter: void//************************************void initilize(void){ int calcount=0; //memset(array_char,'0',sizeof(array_char)); while(calcount<256) { array_char[calcount]=0; huff_pt[calcount]=NULL; HUFF_DICTIONARY[calcount]=0; calcount++; } return ;}//************************************// Method: file_open// FullName: file_open// Access: public// Returns: FILE*// Qualifier: open the input file// Parameter: const char * file_name//************************************FILE* file_open(const char* file_name){ FILE* file_index=NULL; if((file_index=fopen(file_name,"r"))==NULL) return NULL; return file_index;}//******************************************************************// Method: claculate// FullName: claculate// Access: public// Returns: int// Qualifier: calculate the frequence of character in file_index// Parameter: FILE * file_index//****************************************************************int claculate(FILE* file_index){ unsigned char temp=0; while(!feof(file_index)) { /*eat file*/ if(fread(&temp,sizeof(unsigned char),1,file_index)) array_char[temp]++; } return SUCCESS;}//************************************// Method: create_dictiony// FullName: create_dictiony// Access: public// Returns: int// Qualifier:create dictionary// Parameter: void//************************************int create_dictiony(void){ int i=0; char *u=NULL; unsigned char j=0; HUFF_NODE_PTR head,currents=NULL; create_tree(&head); HUFF_HEADER=head; while(i<256) { if(huff_pt[i]!=NULL) { currents=huff_pt[i]; j=currents->code; u=malloc(sizeof(char)); assert(u!=NULL); memset(u,0,sizeof(char)); while(currents->parent!=NULL) { u=realloc(u,2*sizeof(char)+strlen(u)); assert(u!=NULL); if(currents->parent->lchild==currents) strcat(u,"0"); else if(currents->parent->rchild==currents) strcat(u,"1"); else { printf("error in code\n"); system("pasue"); } currents=currents->parent; } /*reverse*/ strrev(u); HUFF_DICTIONARY[j]=u; } i++; } return SUCCESS;}//************************************// Method: create_tree// FullName: create_tree// Access: public// Returns: int// Qualifier:create tree of huffman// Parameter: HUFF_NODE_PTR * head//************************************int create_tree(HUFF_NODE_PTR* head){ size_t n=256,i,j; HUFF_NODE_PTR temp[256]; memset(temp,0,sizeof(temp)); for (i = 0,j=0; i < n; i++) { if(array_char[i]) { temp[j]=huff_malloc((unsigned char)i); temp[j]->wight=array_char[i]; /*temp[j]->code is the code that contain the info of read_file*/ temp[j]->code=i; huff_pt[i]=temp[j]; j++; } } *head=adjusty(temp,j); amounts=j; return SUCCESS;}//************************************// Method: file_write// FullName: file_write// Access: public// Returns: int// Qualifier:create function// Parameter: char * file_name// Parameter: char * file_stream// Parameter: size_t num// Parameter: int flags//************************************int file_write(char* file_name,FILE* file_inputs){ FILE* fp=NULL; int counts=0; unsigned char readdata,writedata=0,*TempPoint; char* file_w=file_name; fp=fopen(file_w,"ab+"); if(!fp) { printf("can`t open file %s and exit FAILURE!\n", file_name); exit(FAILURE); } while(!feof(file_inputs)) { if(fread(&readdata,sizeof(unsigned char),1,file_inputs)) TempPoint=(unsigned char*)HUFF_DICTIONARY[readdata]; while(*TempPoint) { writedata=writedata<<1; writedata|=(*TempPoint-'0'); TempPoint++; counts++; if(counts==8) { fwrite(&writedata,sizeof(unsigned char),1,fp); counts=0; writedata=0; } } } if(counts) { writedata<<=(8-counts); fwrite(&writedata,sizeof(unsigned char),1,fp); } writedata=(unsigned char)((8-counts)%8); fwrite(&writedata,sizeof(unsigned char),1,fp); fclose(fp); return SUCCESS;}//************************************// Method: huff_malloc// FullName: huff_malloc// Access: public// Returns: HUFF_NODE_PTR// Qualifier:// Parameter: unsigned char i//************************************HUFF_NODE_PTR huff_malloc(unsigned char i){ HUFF_NODE_PTR temp=(HUFF_NODE_PTR)malloc(sizeof(HUFF_NODE)); assert (temp!=NULL); temp->code=i; temp->parent=NULL; temp->lchild=NULL; temp->rchild=NULL; temp->wight=0; return temp;}//************************************// Method: adjusty// FullName: adjusty// Access: public// Returns: HUFF_NODE_PTR// Qualifier:// Parameter: HUFF_NODE_PTR code[]// Parameter: size_t n//************************************HUFF_NODE_PTR adjusty(HUFF_NODE_PTR code[],size_t n){ assert (n<=256); int i,j=0,k=0; /* index j replcaethe lest,and index k replace the second*/ HUFF_NODE_PTR temp=NULL; while(n) { for(i=0; i<n; i++) { if(code[k]->wight>code[i]->wight) { if(code[j]->wight>code[i]->wight) { k=j; j=i; } else k=i; } } /*finished the merge*/ if(j==k) { if(n==1) break; k++; i=k; while(i<n) { if(code[k]->wight>code[i]->wight) k=i; i++; } } temp=huff_malloc(0); /*merge two child*/ temp->lchild=code[j]; temp->rchild=code[k]; /*child pointer point to parent*/ code[j]->parent=temp; code[k]->parent=temp; temp->wight=code[j]->wight+code[k]->wight; if(j<k) { code[j]=temp; if(k!=(--n)) code[k]=code[n]; } else { code[k]=temp; if(j!=(--n)) code[j]=code[n]; } k=0; j=0; } return temp;}//************************************// Method: decode// FullName: decode// Access: public// Returns: void// Qualifier:// Parameter: HUFF_NODE_PTR head// Parameter: FILE * fp//************************************void decode (HUFF_NODE_PTR head,FILE* fp){ unsigned char read_code,tempcode=0,rewards=0,tp=0; int count=8,model=0; int flags=0; HUFF_NODE_PTR decode_head=head; unsigned char CHINESE[3]= {0,0,0}; while(!feof(fp)) {backup: count=8; /*get a 8 bits code to find the leaves*/ fread(&tempcode,sizeof(unsigned char),1,fp); /* if arrive to last second character you should exit now ! */ if(feof(fp)) break; read_code=tempcode; while(decode_head) { /*go to the leave note!,should to decode now*/ if(!decode_head->lchild) { fread(&rewards,sizeof(unsigned char),1,fp); fread(&tp,sizeof(unsigned char),1,fp); /*arrive to second character*/ model++; model%=2; CHINESE[model]=decode_head->code; if(CHINESE[(model-1+2)%2]>0xa0) { if(!model) swap_func(&CHINESE[0],&CHINESE[1]); printf("%s",CHINESE); CHINESE[0]=CHINESE[1]=0; } else if (CHINESE[model]<0xa0) { printf("%c",CHINESE[model]); } decode_head=head; if(feof(fp)) { ungetc((char)tp,fp); ungetc((char)rewards,fp); flags=1; break; } fseek(fp,-2,SEEK_CUR); } /*left child should be 1,and else the opposite*/ if(read_code&0x80) { decode_head=decode_head->rchild; } else decode_head=decode_head->lchild; read_code<<=1; count--; /*if get here means that: a unsigned char temp has been used out and we should start again!*/ if(!count) goto backup; } if(flags) break; } assert(count>=rewards); while(count-rewards) { if(read_code&0x80) { decode_head=decode_head->rchild; } else decode_head=decode_head->lchild; read_code<<=1; count--; if(!decode_head->lchild) { model++; model%=2; CHINESE[model]=decode_head->code; if(CHINESE[(model-1+2)%2]>0xa0) { if(!model) swap_func(&CHINESE[0],&CHINESE[1]); printf("%s",CHINESE); CHINESE[0]=CHINESE[1]=0; } else if (CHINESE[model]<0xa0) { printf("%c",CHINESE[model]); } decode_head=head; } } return;}//************************************// Method: swap_func// FullName: swap_func// Access: public// Returns: void// Qualifier:// Parameter: unsigned char * A// Parameter: unsigned char * B//************************************void swap_func(unsigned char *A,unsigned char *B){ unsigned temp=*A; *A=*B; *B=temp; return;}//************************************// Method: write_dictionary// FullName: write_dictionary// Access: public// Returns: void// Qualifier: write to file keeping message// of dictionary// Parameter: char * file_name//************************************void write_dictionary(char* file_name){ int counts=0,length; FILE* fp=fopen(file_name,"ab+"); //char* file_w=(char *)malloc(strlen(file_name)+3); assert(fp!=NULL); /*write to logs*/ /*write the amount of characters*/ fwrite(&amounts,sizeof(unsigned char),1,fp); for(; counts<256;) { /*if(current dictionary is not null that means you should write it to dictionary, causing this is a pointer pointing to a series of decode here we should write the character ,decode,and its length*/ if(HUFF_DICTIONARY[counts]) { fwrite(&counts,sizeof(unsigned char),1,fp); length=strlen(HUFF_DICTIONARY[counts]); fwrite(&length,sizeof(int),1,fp); fwrite(HUFF_DICTIONARY[counts],sizeof(unsigned char),length,fp); } counts++; } fclose(fp); return ;}//************************************// Method: output// FullName: output// Access: public// Returns: void// Qualifier:decode and output// Parameter: char * file_name//************************************void output(char* file_name){ int ch_counts=0,length=0; unsigned char ch_index=0; HUFF_NODE_PTR head_index=NULL; FILE* fp=fopen(file_name,"rb+"); assert(fp!=NULL); fread(&ch_counts,sizeof(unsigned char),1,fp); while(ch_counts--) { fread(&ch_index,sizeof(unsigned char),1,fp); fread(&length,sizeof(int),1,fp); free(HUFF_DICTIONARY[ch_index]); HUFF_DICTIONARY[ch_index]=(char*)malloc(length+1); memset(HUFF_DICTIONARY[ch_index],0,length+1); fread(HUFF_DICTIONARY[ch_index],sizeof(char),length,fp); } printf("\ndecode start...\n"); //create_tree(&head_index); head_index = tree_from_dic(HUFF_DICTIONARY); decode(head_index,fp); printf("\ntranslation has been finished\n"); return ;}void version(void){printf("\nCOPYRIGHT @ NEWPLAN IN UESTC\n"); printf("CURRENT VERSION IS NEWPLAN.0.1 THANKS FOR SUPPORT US!\n\n");return;}void help(void){printf("/*********************************************" "*********************************\n**by using huffman method\n" "**NEWPLAN @ UESTC 2014.5\n*******************************" "***********************************************/ \n"); printf("choose function to operate!\n"); printf("FORMAT:\n"); printf("NEWPLAN [parameter] [FILE]\n"); printf("this is about compress file to a new file with FORMAT : file_name.n \n"); printf("parameter:\n"); printf("\t-c\t\tcheck for compress file\n"); printf("\t-z\t\tcheck for decompress file\n"); printf("\t--help\t\tcheck for documents\n"); printf("\t--version\tcheck for version\n");return;}void compress(const char* file_s){char* fw=NULL;FILE* file_index=file_open(file_s);fw=(char*)malloc(strlen(file_s)+3);assert ((file_index!=NULL)&&(fw!=NULL));memset(fw,0,strlen(file_s)+3);strcat(fw,file_s);strcat(fw,".n"); claculate(file_index); create_dictiony(); write_dictionary(fw); fclose(file_index); file_index=file_open(file_s); file_write(fw,file_index);return;}void decompress_func(const char* file_s){char* fw=(char*)malloc(strlen(file_s)+3);assert(fw!=NULL);memset(fw,0,strlen(file_s)+3);strcat(fw,file_s);strcat(fw,".n");output(fw);return;}HUFF_NODE_PTR tree_from_dic(char** dic_array){int cycle=0;char* ch_ptr=NULL;HUFF_NODE_PTR temp=NULL;HUFF_HEADER=huff_malloc(0);for (cycle = 0; cycle < 256; cycle++){if(dic_array[cycle]){temp=HUFF_HEADER;ch_ptr=dic_array[cycle];while(*ch_ptr){if(*ch_ptr=='0'){if(!temp->lchild){temp->lchild=huff_malloc(0);temp->lchild->parent=temp;}temp=temp->lchild;}else{if(!temp->rchild){temp->rchild=huff_malloc(0);temp->rchild->parent=temp;}temp=temp->rchild;}ch_ptr++;}temp->code=cycle;}}return HUFF_HEADER;}
1 0
- 中文压缩和解码程序设计与实现(huffman)
- Huffman树实现与应用(编码解码压缩解压缩)
- Huffman 编解码算法实现与压缩效率分析
- Huffman编解码实现文本压缩
- 数据压缩实验三 Huffman编解码算法实现与压缩效率分析
- 《数据压缩》实验报告三·Huffman编解码算法实现与压缩效率分析
- 数据压缩原理 实验三 Huffman编解码算法实现与压缩效率分析
- 实验三Huffman编解码算法实现与压缩效率分析
- 实验三 Huffman编解码算法实现与压缩效率分析
- 数据压缩试验三 Huffman 编解码算法实现与压缩效率分析
- 数据压缩 实验三 Huffman编解码算法实现与压缩效率分析
- 数据压缩 实验三 Huffman编解码算法实现与压缩效率分析
- Huffman编码与解码的实现
- Huffman编码与解码_C语言实现
- Java实现-Huffman编码与解码
- C#实现Huffman编码和解码
- huffman编解码算法实验与压缩效率分析
- 哈夫曼(Huffman)编码与解码
- 关于sqlyog的配置以及java length
- Greenplum中分布式数据库存储及查询处理
- 数据结构大作业——并查集:检查网络
- 使用BlockingQueue实现生产者消费者模式
- 用时间做种子生成随机数
- 中文压缩和解码程序设计与实现(huffman)
- java内存管理
- 二叉树遍历的6种遍历方法
- 地址空间
- 位域的使用
- C++强制类型转换
- hdu 1010 Tempter of the Bone
- 扑克序列——第五届蓝桥杯省赛C语言A组第6题
- 什么是#pragma?