PAT Huffman Codes 哈弗曼树,C语言实现

来源:互联网 发布:new event js 位置 编辑:程序博客网 时间:2024/05/22 08:24

题目链接:https://pintia.cn/problem-sets/900290821590183936/problems/914044227287445504
本小白刚刚起步,最近在刷浙江大学PAT上的题目,做到了哈弗曼树的这一题,绞尽脑汁,网上百度的C代码(可能有的C++可以实现,但是我还不会C++)拉到PTA上去跑也没有能实现的,于是下决心一定要自己做出来,把每个细节都考虑到,一步步实现终于做出来了,每个测试点都正确,成就感满满的,一激动就开了个CSDN账号来分享一下C语言实现的Huffman Codes,谨供像我这样的小白参考一下。

原题:
05-树9 Huffman Codes(30 分)

In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:

Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:

c[i] code[i]

Output Specification:

For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:

7A 1 B 1 C 1 D 3 E 3 F 6 G 64A 00000B 00001C 0001D 001E 01F 10G 11A 01010B 01011C 0100D 011E 10F 11G 00A 000B 001C 010D 011E 100F 101G 110A 00000B 00001C 0001D 001E 00F 10G 11

Sample Output:

YesYesNoNo

我的原代码:

//在建树过程中特别要注意:使得H->Elements[]与HuffmanTree数据交换的过程中保证 //是以HuffmanTree的格式交换的,(例如:H->Element[].weight=T->weight就不行,//必须为H->Element[]=*T)这样才能将HuffmanTree的左右指针保留,使整棵树的连续性得以保留 ,不然那会造成段错误 #include <stdio.h>#include <stdlib.h>#include <string.h>typedef struct TreeNode *HuffmanTree;struct TreeNode{    int weight;    HuffmanTree left,right;};typedef struct HeapStruct *MinHeap;struct HeapStruct{    HuffmanTree Elements;       //it's so genius to make the Elements' type to be HuffmanTree    int Size;    int Capacity;};MinHeap MinHeap_Create(int Maxsize)         //construct a MinHeap{    MinHeap H=(MinHeap)malloc(sizeof(struct HeapStruct));    H->Elements=(HuffmanTree)malloc((Maxsize+1)*sizeof(struct TreeNode));       //one more space for Elements[0]    H->Size=0;    H->Capacity=Maxsize;    H->Elements[0].weight=-1;           //哨兵    return H; }void Insert(MinHeap H,HuffmanTree T){    int i;    if(H->Size==H->Capacity)    {        printf("最大堆已满");        return ;    }    i=++H->Size;    for(;H->Elements[i/2].weight>T->weight;i/=2)        //     {                                                   //        H->Elements[i]=H->Elements[i/2];                //        H->Elements[i/2]=*T;                            //    }                                                   //    H->Elements[i]=*T;                                  //}MinHeap BuildMinHeap(int Weight[],int Maxsize,char CH[]){    int i;    MinHeap H=MinHeap_Create(Maxsize);    HuffmanTree Temp=(HuffmanTree)malloc(sizeof(struct TreeNode));    for(i=0;i<Maxsize;i++)    {        Temp->weight=Weight[CH[i]];        Temp->left=NULL;        Temp->right=NULL;        Insert(H,Temp);    }    free(Temp);    return H;}HuffmanTree DeleteMin(MinHeap H){    int Parent,Child;    HuffmanTree MinItem,temp;    if(H->Size==0)    {        printf("最小堆已空");        return ;    }    MinItem=(HuffmanTree)malloc(sizeof(struct TreeNode));    temp=(HuffmanTree)malloc(sizeof(struct TreeNode));    *MinItem=H->Elements[1];    *temp=H->Elements[H->Size--];           //先使temp指向最后一个元素,然后再将size-1     for(Parent=1;Parent*2<=H->Size;Parent=Child)    {        Child=Parent*2;        if(Child!=H->Size&&(H->Elements[Child].weight>H->Elements[Child+1].weight))        Child++;        if(temp->weight<=H->Elements[Child].weight)break;       //此时可将小于child的temp赋给其parent        else        H->Elements[Parent]=H->Elements[Child];     }    H->Elements[Parent]=*temp;              //temp is a pointer,add * to be a HuffmanTree    free(temp);    return MinItem;}HuffmanTree Huffman(MinHeap H)      //construct a HuffmanTree{    int i;    HuffmanTree T;    int k;    k=H->Size;    for(i=1;i<k;i++)            //size个元素两两合并要size-1次     {        T=(HuffmanTree)malloc(sizeof(struct TreeNode));        T->left=DeleteMin(H);//      printf("chkl%d ",T->left->weight);        T->right=DeleteMin(H);//      printf("chkr%d ",T->right->weight);        T->weight=T->left->weight+T->right->weight;//      printf("chkw%d\n",T->weight);        Insert(H,T);    }    T=DeleteMin(H);    return T;}int Calwpl(HuffmanTree T,int Depth)     //calculate the WPL value of a HuffmanTree{    if(T->left==NULL&&T->right==NULL)   //dont know why is the//if(!T->left&&!T->right)//is a Segmentation Fault(段错误)     {//      printf("!%d\n",Depth*T->weight);        return Depth*T->weight;    }    else    {        return     (Calwpl(T->left,Depth+1)+     Calwpl(T->right,Depth+1));    }}int isPreFix(char a[],char b[])     //借口应该是字符数组,而不是指向字符的指针。原来的错误写法:(char *a,char *b){    while(a&&b&&*a==*b)             //*号不能掉,这时候是值的比较,指针是不可能相等的     {        a++;b++;    }    if(*a=='\0'||*b=='\0')          //表示指针所指的值时要带星号     return 1;    else return 0;}int HasPreFix(char s[][200],int n){    int i,j;    for(i=0;i<n;i++)    {        for(j=i+1;j<n;j++)          //*****最终错误******j!=i,故j=i+1         {            if(isPreFix(s[i],s[j]))            return 1;         }    }    return 0;}/*void test(HuffmanTree T){    if(T->left!=NULL&&T->right!=NULL)    {        printf("*%d %d*",T->left->weight,T->right->weight);        test(T->left);        test(T->right);    }        else         printf("GG");        return;}*/int main(){    int n,i,freq[256];    char CH[100];    int num[100];    scanf("%d",&n);    for(i=0;i<n;i++)    {        scanf(" %c %d",&CH[i],&num[i]);        freq[CH[i]]=num[i];    }    MinHeap H=BuildMinHeap(freq,n,CH);//  for(i=1;i<=n;i++)//  {//      printf("$%d ",H->Elements[i].weight);//  }    HuffmanTree T=Huffman(H);//  printf("@%d\n",T->weight);//  printf("@%d @%d\n",T->left->weight,T->right->weight);//  printf("@%d,@%d,@%d,@%d\n",T->left->left->weight,T->left->right->weight,T->right->left->weight,T->right->right->weight);//  test(T);    int wpl=Calwpl(T,0);//  printf("\n%d\n",wpl);    int k;    scanf("%d",&k);    while(k--){        char ch[256];        char str[256][200];        int thiswpl=0;        for(i=0;i<n;i++)        {            scanf(" %c %s",&ch[i],str[i]);          //输入%c之前一定要有空格,因为前面一个scanf输入之后有回车             thiswpl+=freq[ch[i]]*strlen(str[i]);//          printf("$%d\n",thiswpl);//          printf("$%d\n",freq[ch[i]]);//          printf("$%d\n",strlen(str[i]));        }        if(wpl==thiswpl&&!HasPreFix(str,n))        printf("Yes\n");        else        printf("No\n");    }       return 0;}