中文压缩和解码程序设计与实现(huffman)

来源:互联网 发布:js toggle 切换状态 编辑:程序博客网 时间:2024/06/13 10:05

本项目是利用huffman算法进行中文压缩和解码的设计与实现,huffman算法被证明是最优的结构,可以用于数据压缩

源码

/****************************************************************************************this function is about to  compress a file with  chinese code**by using huffman method**NEWPLAN @ UESTC 2014.5****************************************************************************************/#include <stdio.h>#include <ctype.h>#include <stdlib.h>#include <string.h>#include <assert.h>#define SUCCESS1#define FAILURE 0#define DICTIONRAY0#define INDEX 1/*used to calculate the frequence of character has shown*******/int array_char[256]= {0};unsigned char amounts=0;/*******define a strcuture to hold the tree node***************/#pragma pack(1)typedef struct huff{    struct huff *lchild,    *rchild,    *parent;    int wight;    unsigned char code;} HUFF_NODE,*HUFF_NODE_PTR;#pragma pack()/*HEAD of tree*/HUFF_NODE_PTR HUFF_HEADER=NULL;/*dictionary of characters maybe not been used all just for convenient*/char* HUFF_DICTIONARY[256]= {0};HUFF_NODE_PTR huff_pt[256];void help(void);void version(void);void output(char*);void initilize(void);int claculate(FILE*);int create_dictiony(void);void compress(const char*);int file_write(char*,FILE*);void write_dictionary(char*);FILE* file_open(const char*);int create_tree(HUFF_NODE_PTR* );void decompress_func(const char*);void decode (HUFF_NODE_PTR,FILE*);HUFF_NODE_PTR tree_from_dic(char**);HUFF_NODE_PTR huff_malloc(unsigned char i);void swap_func(unsigned char *,unsigned char*);HUFF_NODE_PTR adjusty(HUFF_NODE_PTR code[] ,size_t);//********************************************************// Method:    main// FullName:  main// Access:    public// Returns:   int// Qualifier: the entry function// Parameter: int argc the parameter count// Parameter: char const * argv[] array of hold parameter//********************************************************int main(int argc, char const *argv[]){    initilize();    if(argc<2)    {        printf("can`t be empty file or operation\n");        return FAILURE;    }   // if((file_index=file_open(argv[argc-1]))==NULL)    if (!stricmp(argv[1],"--help"))    {    help();    return SUCCESS;    }    else if (!stricmp(argv[1],"--version"))    {    version();    return SUCCESS;    }    else if (!stricmp(argv[1],"-c"))    {    compress(argv[2]);    }    else if(!stricmp(argv[1],"-z"))    {    decompress_func(argv[2]);    }    else    help();    return SUCCESS;}//************************************// Method:    initilize// FullName:  initilize// Access:    public// Returns:   void// Qualifier: inites// Parameter: void//************************************void initilize(void){    int calcount=0;    //memset(array_char,'0',sizeof(array_char));    while(calcount<256)    {        array_char[calcount]=0;        huff_pt[calcount]=NULL;        HUFF_DICTIONARY[calcount]=0;        calcount++;    }    return ;}//************************************// Method:    file_open// FullName:  file_open// Access:    public// Returns:   FILE*// Qualifier: open the input file// Parameter: const char * file_name//************************************FILE* file_open(const char* file_name){    FILE* file_index=NULL;    if((file_index=fopen(file_name,"r"))==NULL)        return NULL;    return file_index;}//******************************************************************// Method:    claculate// FullName:  claculate// Access:    public// Returns:   int// Qualifier: calculate the frequence of character in file_index// Parameter: FILE * file_index//****************************************************************int claculate(FILE* file_index){    unsigned char temp=0;    while(!feof(file_index))    {        /*eat file*/        if(fread(&temp,sizeof(unsigned char),1,file_index))            array_char[temp]++;    }    return SUCCESS;}//************************************// Method:    create_dictiony// FullName:  create_dictiony// Access:    public// Returns:   int// Qualifier:create dictionary// Parameter: void//************************************int create_dictiony(void){    int i=0;    char *u=NULL;    unsigned char j=0;    HUFF_NODE_PTR head,currents=NULL;    create_tree(&head);    HUFF_HEADER=head;    while(i<256)    {        if(huff_pt[i]!=NULL)        {            currents=huff_pt[i];            j=currents->code;            u=malloc(sizeof(char));            assert(u!=NULL);            memset(u,0,sizeof(char));            while(currents->parent!=NULL)            {                u=realloc(u,2*sizeof(char)+strlen(u));                assert(u!=NULL);                if(currents->parent->lchild==currents)                    strcat(u,"0");                else if(currents->parent->rchild==currents)                    strcat(u,"1");                else                {                    printf("error in code\n");                    system("pasue");                }                currents=currents->parent;            }            /*reverse*/            strrev(u);            HUFF_DICTIONARY[j]=u;        }        i++;    }    return SUCCESS;}//************************************// Method:    create_tree// FullName:  create_tree// Access:    public// Returns:   int// Qualifier:create tree of huffman// Parameter: HUFF_NODE_PTR * head//************************************int create_tree(HUFF_NODE_PTR* head){    size_t n=256,i,j;    HUFF_NODE_PTR temp[256];    memset(temp,0,sizeof(temp));    for (i = 0,j=0; i < n; i++)    {        if(array_char[i])        {            temp[j]=huff_malloc((unsigned char)i);            temp[j]->wight=array_char[i];            /*temp[j]->code is the code that contain the info of read_file*/            temp[j]->code=i;            huff_pt[i]=temp[j];            j++;        }    }    *head=adjusty(temp,j);    amounts=j;    return SUCCESS;}//************************************// Method:    file_write// FullName:  file_write// Access:    public// Returns:   int// Qualifier:create function// Parameter: char * file_name// Parameter: char * file_stream// Parameter: size_t num// Parameter: int flags//************************************int file_write(char* file_name,FILE* file_inputs){    FILE* fp=NULL;    int counts=0;    unsigned char readdata,writedata=0,*TempPoint;    char* file_w=file_name;    fp=fopen(file_w,"ab+");    if(!fp)    {        printf("can`t open file %s and exit FAILURE!\n", file_name);        exit(FAILURE);    }    while(!feof(file_inputs))    {        if(fread(&readdata,sizeof(unsigned char),1,file_inputs))            TempPoint=(unsigned char*)HUFF_DICTIONARY[readdata];        while(*TempPoint)        {            writedata=writedata<<1;            writedata|=(*TempPoint-'0');            TempPoint++;            counts++;            if(counts==8)            {                fwrite(&writedata,sizeof(unsigned char),1,fp);                counts=0;                writedata=0;            }        }    }    if(counts)    {        writedata<<=(8-counts);        fwrite(&writedata,sizeof(unsigned char),1,fp);    }    writedata=(unsigned char)((8-counts)%8);    fwrite(&writedata,sizeof(unsigned char),1,fp);    fclose(fp);    return SUCCESS;}//************************************// Method:    huff_malloc// FullName:  huff_malloc// Access:    public// Returns:   HUFF_NODE_PTR// Qualifier:// Parameter: unsigned char i//************************************HUFF_NODE_PTR huff_malloc(unsigned char i){    HUFF_NODE_PTR temp=(HUFF_NODE_PTR)malloc(sizeof(HUFF_NODE));    assert (temp!=NULL);    temp->code=i;    temp->parent=NULL;    temp->lchild=NULL;    temp->rchild=NULL;    temp->wight=0;    return temp;}//************************************// Method:    adjusty// FullName:  adjusty// Access:    public// Returns:   HUFF_NODE_PTR// Qualifier:// Parameter: HUFF_NODE_PTR code[]// Parameter: size_t n//************************************HUFF_NODE_PTR adjusty(HUFF_NODE_PTR code[],size_t n){    assert (n<=256);    int i,j=0,k=0;    /* index j replcaethe lest,and index k replace the second*/    HUFF_NODE_PTR temp=NULL;    while(n)    {        for(i=0; i<n; i++)        {            if(code[k]->wight>code[i]->wight)            {                if(code[j]->wight>code[i]->wight)                {                    k=j;                    j=i;                }                else                    k=i;            }        }        /*finished the merge*/        if(j==k)        {            if(n==1)                break;            k++;            i=k;            while(i<n)            {                if(code[k]->wight>code[i]->wight)                    k=i;                i++;            }        }        temp=huff_malloc(0);        /*merge two child*/        temp->lchild=code[j];        temp->rchild=code[k];        /*child pointer point to parent*/        code[j]->parent=temp;        code[k]->parent=temp;        temp->wight=code[j]->wight+code[k]->wight;        if(j<k)        {            code[j]=temp;            if(k!=(--n))                code[k]=code[n];        }        else        {            code[k]=temp;            if(j!=(--n))                code[j]=code[n];        }        k=0;        j=0;    }    return temp;}//************************************// Method:    decode// FullName:  decode// Access:    public// Returns:   void// Qualifier:// Parameter: HUFF_NODE_PTR head// Parameter: FILE * fp//************************************void decode (HUFF_NODE_PTR head,FILE* fp){    unsigned char read_code,tempcode=0,rewards=0,tp=0;    int count=8,model=0;    int flags=0;    HUFF_NODE_PTR decode_head=head;    unsigned char CHINESE[3]= {0,0,0};    while(!feof(fp))    {backup:        count=8;        /*get a 8 bits code to find the leaves*/        fread(&tempcode,sizeof(unsigned char),1,fp);        /*        if arrive to last second character you should        exit now !        */        if(feof(fp))            break;        read_code=tempcode;        while(decode_head)        {            /*go to the leave note!,should to decode now*/            if(!decode_head->lchild)            {                fread(&rewards,sizeof(unsigned char),1,fp);                fread(&tp,sizeof(unsigned char),1,fp);                /*arrive to second character*/                model++;                model%=2;                CHINESE[model]=decode_head->code;                if(CHINESE[(model-1+2)%2]>0xa0)                {                    if(!model)                        swap_func(&CHINESE[0],&CHINESE[1]);                    printf("%s",CHINESE);                    CHINESE[0]=CHINESE[1]=0;                }                else if (CHINESE[model]<0xa0)                {                    printf("%c",CHINESE[model]);                }                decode_head=head;                if(feof(fp))                {                    ungetc((char)tp,fp);                    ungetc((char)rewards,fp);                    flags=1;                    break;                }                fseek(fp,-2,SEEK_CUR);            }            /*left child should be 1,and else the opposite*/            if(read_code&0x80)            {                decode_head=decode_head->rchild;            }            else                decode_head=decode_head->lchild;            read_code<<=1;            count--;            /*if get here means that: a unsigned char temp            has been used out and we should start again!*/            if(!count)                goto backup;        }        if(flags)            break;    }    assert(count>=rewards);    while(count-rewards)    {        if(read_code&0x80)        {            decode_head=decode_head->rchild;        }        else            decode_head=decode_head->lchild;        read_code<<=1;        count--;        if(!decode_head->lchild)        {            model++;            model%=2;            CHINESE[model]=decode_head->code;            if(CHINESE[(model-1+2)%2]>0xa0)            {                if(!model)                    swap_func(&CHINESE[0],&CHINESE[1]);                printf("%s",CHINESE);                CHINESE[0]=CHINESE[1]=0;            }            else if (CHINESE[model]<0xa0)            {                printf("%c",CHINESE[model]);            }            decode_head=head;        }    }    return;}//************************************// Method:    swap_func// FullName:  swap_func// Access:    public// Returns:   void// Qualifier:// Parameter: unsigned char * A// Parameter: unsigned char * B//************************************void swap_func(unsigned char *A,unsigned char *B){    unsigned temp=*A;    *A=*B;    *B=temp;    return;}//************************************// Method:    write_dictionary// FullName:  write_dictionary// Access:    public// Returns:   void// Qualifier: write to file keeping message// of dictionary// Parameter: char * file_name//************************************void write_dictionary(char* file_name){    int counts=0,length;    FILE* fp=fopen(file_name,"ab+");    //char* file_w=(char *)malloc(strlen(file_name)+3);    assert(fp!=NULL);    /*write to logs*/    /*write the amount of characters*/    fwrite(&amounts,sizeof(unsigned char),1,fp);    for(; counts<256;)    {        /*if(current dictionary is not null that means you should        write it to dictionary, causing this is a pointer pointing         to a series of decode         here we should write the character ,decode,and its length*/        if(HUFF_DICTIONARY[counts])        {            fwrite(&counts,sizeof(unsigned char),1,fp);            length=strlen(HUFF_DICTIONARY[counts]);            fwrite(&length,sizeof(int),1,fp);            fwrite(HUFF_DICTIONARY[counts],sizeof(unsigned char),length,fp);        }        counts++;    }    fclose(fp);    return ;}//************************************// Method:    output// FullName:  output// Access:    public// Returns:   void// Qualifier:decode and output// Parameter: char * file_name//************************************void output(char* file_name){    int ch_counts=0,length=0;    unsigned char ch_index=0;    HUFF_NODE_PTR head_index=NULL;    FILE* fp=fopen(file_name,"rb+");    assert(fp!=NULL);    fread(&ch_counts,sizeof(unsigned char),1,fp);    while(ch_counts--)    {        fread(&ch_index,sizeof(unsigned char),1,fp);        fread(&length,sizeof(int),1,fp);        free(HUFF_DICTIONARY[ch_index]);        HUFF_DICTIONARY[ch_index]=(char*)malloc(length+1);        memset(HUFF_DICTIONARY[ch_index],0,length+1);        fread(HUFF_DICTIONARY[ch_index],sizeof(char),length,fp);    }    printf("\ndecode start...\n");    //create_tree(&head_index);    head_index = tree_from_dic(HUFF_DICTIONARY);    decode(head_index,fp);    printf("\ntranslation has been finished\n");    return ;}void version(void){printf("\nCOPYRIGHT @ NEWPLAN IN UESTC\n");    printf("CURRENT VERSION IS NEWPLAN.0.1 THANKS FOR SUPPORT US!\n\n");return;}void help(void){printf("/*********************************************"    "*********************************\n**by using huffman method\n"    "**NEWPLAN @ UESTC 2014.5\n*******************************"    "***********************************************/ \n");    printf("choose function to operate!\n");    printf("FORMAT:\n");    printf("NEWPLAN [parameter] [FILE]\n");    printf("this is about compress file to a new file with FORMAT : file_name.n \n");    printf("parameter:\n");    printf("\t-c\t\tcheck for compress file\n");    printf("\t-z\t\tcheck for decompress file\n");    printf("\t--help\t\tcheck for documents\n");    printf("\t--version\tcheck for version\n");return;}void compress(const char* file_s){char* fw=NULL;FILE* file_index=file_open(file_s);fw=(char*)malloc(strlen(file_s)+3);assert ((file_index!=NULL)&&(fw!=NULL));memset(fw,0,strlen(file_s)+3);strcat(fw,file_s);strcat(fw,".n");    claculate(file_index);    create_dictiony();    write_dictionary(fw);    fclose(file_index);    file_index=file_open(file_s);    file_write(fw,file_index);return;}void decompress_func(const char* file_s){char* fw=(char*)malloc(strlen(file_s)+3);assert(fw!=NULL);memset(fw,0,strlen(file_s)+3);strcat(fw,file_s);strcat(fw,".n");output(fw);return;}HUFF_NODE_PTR tree_from_dic(char** dic_array){int cycle=0;char* ch_ptr=NULL;HUFF_NODE_PTR temp=NULL;HUFF_HEADER=huff_malloc(0);for (cycle = 0; cycle < 256; cycle++){if(dic_array[cycle]){temp=HUFF_HEADER;ch_ptr=dic_array[cycle];while(*ch_ptr){if(*ch_ptr=='0'){if(!temp->lchild){temp->lchild=huff_malloc(0);temp->lchild->parent=temp;}temp=temp->lchild;}else{if(!temp->rchild){temp->rchild=huff_malloc(0);temp->rchild->parent=temp;}temp=temp->rchild;}ch_ptr++;}temp->code=cycle;}}return HUFF_HEADER;}


1 0
原创粉丝点击