浙大数据结构-HuffMan Code

来源:互联网 发布:门口的野蛮人 知乎 编辑:程序博客网 时间:2024/05/18 14:24

树9 Huffman Codes


In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] … c[N] f[N]
where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:

c[i] code[i]
where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 ‘0’s and ‘1’s.

Output Specification:

For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.


Sample Input:


7A 1 B 1 C 1 D 3 E 3 F 6 G 64A 00000B 00001C 0001D 001E 01F 10G 11A 01010B 01011C 0100D 011E 10F 11G 00A 000B 001C 010D 011E 100F 101G 110A 00000B 00001C 0001D 001E 00F 10G 11

Sample Output:

YesYesNoNo

基本思路

一.建立一个小顶栈:
1.1 插入数到栈;
1.2 可以从顶部删除最新的数字

二.通过小顶栈,建立一个Huffuman Tree
2.1 每次提取两个小顶栈的值,组成左右结点,并把两数的和放入小顶栈,反复循环,构成Huffman Tee;
2.2 使用递归的方法计算WPL,就是深度* 频率,这个就是字符串长度;

三.验证输入的例子是否相同;
3.1 相同的条件: WPL 长度相等,同时满足构成Huffman Tree
3.2 WPL : 单个字符编码长度*对应频率,然后求和;
3.3 遍历构建HuffmanTree, 每遍历一个字符编码,应该满足:1.叶子节点,2.该节点没有被别的字符串使用过(可以里面添加参数flag判断)

#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdbool.h>#include<iostream> using namespace std; #define MAXSIZE 164#define MinData -100typedef struct _node{    int data;    struct _node * left;    struct _node * right;} Node;typedef struct _heap{    int size;    Node datas[MAXSIZE];} Heap;typedef struct _Node  {      int Flag;      struct _Node *left=NULL;      struct _Node *right=NULL;  } JNode; Heap * BuildHeap(int arr[],int size);void Insert(Heap * MinHeap,Node * node);Node *  DeleteMin(Heap * minHeap); void PrintHeap(Heap * minHeap) ;Node * CreateNode(int V);Node * BuildHuffman(Heap * minHeap);int calculate(Node * root);int calculate(Node * root,int depth);void ReadData(int N,int A[])//读取各个节点的权值输入数据  {      char s='\0';      int value=0;      for (int i=0; i<N; ++i)      {          cin>>s;          cin>>value;          A[i]=value;     }  }  void printTree(Node * node){    if(node!=NULL){        printf("%d ",node->data);        printTree(node->left);        printTree(node->right);    }}void freeTree(JNode * node){    if(node&& (node->left==NULL)&&(node->right==NULL)){            free(node);    }     if(node&&node->left)        freeTree(node->left);    if(node&&node->right)           freeTree(node->right);}bool Judge(char S[],JNode *J,int len)//判断该次编码能否符合前缀编码的要求  {      int i=0;      for (; i<len; ++i)      {          if (S[i]=='0')          {              if (J->left==NULL)              {                  JNode *J_1=(JNode*)malloc(sizeof(JNode));                  J->left=J_1;              }else              {                  if (J->left->Flag==1)                  {                      return false;                  }              }              J=J->left;          }else          {              if (J->right==NULL)              {                  JNode *J_1=(JNode*)malloc(sizeof(JNode));                  J->right=J_1;              }else              {                  if (J->right->Flag==1)                  {                      return false;                  }              }              J=J->right;          }      }      J->Flag=1;     if (J->left==NULL&&J->right==NULL)      {          return true;      }else      {          return false;      }  }  int main(){    int n,m;    scanf("%d\n",&n);    int arrs[n];    ReadData(n,arrs);    Heap * MinHeap =  BuildHeap(arrs,n);     Node * rootHuffman = BuildHuffman(MinHeap);    int wpl=calculate(rootHuffman);    scanf("%d\n",&m);//  printf("m=%d n=%d",m,n);    char temp[125]="\0";      char c='\0';      bool result=false;     for(int j=0;j<m;j++){        int count=0,flag=0;          JNode * jnode = (JNode*)malloc(sizeof(JNode));        for (int k=0; k<n; ++k) {            cin>>c>>temp;            int l =strlen(temp);            count+=l*arrs[k];            if(!flag){                result=Judge(temp,jnode,l);                 if (!result)                  {                      flag=1;                  }             }        }    //  printf("count=%d  result=%d \n",count,result);        //freeTree(jnode);              freeTree(jnode);          if (result&&(count==wpl))//前缀编码且编码长度之和与Huffman编码相同          {              cout<<"Yes"<<endl;          }else{              cout<<"No"<<endl;          }    }    return 0;} void Insert(Heap * MinHeap,Node* node){    int i;    i=++(MinHeap->size);    for( ;MinHeap->datas[i/2].data > node->data ;i/=2 ){        MinHeap->datas[i]=MinHeap->datas[i/2];    } // printf("insert = %d \n",node->data);    MinHeap->datas[i]=*node; }Heap * BuildHeap(int arr[],int n){    Heap * minHeap =  (Heap*)malloc(sizeof(struct _heap));    minHeap->size=0;    minHeap->datas[0]=*CreateNode(MinData);//  printf("BuildHeap\n");     for(int i=0;i<n;i++)    {        Node * node = CreateNode(arr[i]);         Insert(minHeap,node);    } //  PrintHeap(minHeap);    return minHeap;}//int arrs[]={1,1,1,3,3,6,6};Node * CreateNode(int V){    Node * node =(Node*)malloc(sizeof(struct _node)) ;    node->data=V;    node->left=node->right=NULL;     return node;} void PrintHeap(Heap * minHeap) {    int length= minHeap->size;    for(int i=1;i<=length;i++){        printf("%d ",minHeap->datas[i].data) ;    }     printf("\n"); } Node * DeleteMin(Heap * minHeap){    int size =  minHeap->size;    int resultsize=(minHeap->size--);    Node * datas = minHeap->datas;    Node lastnode =  datas[size];    //删除的时候严重查看这里     Node * firstnode=CreateNode(datas[1].data);    firstnode->left= datas[1].left;    firstnode->right= datas[1].right;//  Node * firstnode= &datas[1];     int parent,child;    for(parent=1;parent*2 <=resultsize;parent=child){        child=parent*2;        if(child!=resultsize&& datas[child].data>datas[child+1].data){            child++;        }        if(lastnode.data<=datas[child].data) break;         else        {            minHeap->datas[parent]= minHeap->datas[child];          }    }    minHeap->datas[parent]=lastnode;//  printf("delete =%d\n",firstnode->data);    return firstnode;} Node * BuildHuffman(Heap * minHeap){//  printf("BuildHuffman\n");     int size = minHeap->size;    Node * newNode;    for(int i=0;i<size-1;i++){        newNode = CreateNode(0);        newNode->left =DeleteMin(minHeap);          newNode->right =DeleteMin(minHeap);        newNode->data=newNode->left->data + newNode->right->data;        Insert(minHeap,newNode);    }     newNode = CreateNode(0);     newNode=DeleteMin(minHeap);      return newNode;}int calculate(Node * root){    return calculate(root,0);}int calculate(Node * node,int depth){    if(node->left==NULL&&node->right==NULL){        return depth*(node->data);    }else{        return calculate(node->left,depth+1)+calculate(node->right,depth+1);    }}