05-树9 Huffman Codes（30 point(s)）最小堆、哈夫曼树、dfs

来源：互联网发布：百姓自动发帖软件编辑：程序博客网时间：2024/06/03 23:02

05-树9 Huffman Codes（30 point(s)）

In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer $N$ ( $2 \leq N \leq 63$ ), then followed by a line that contains all the $N$ distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i]is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer $M$ ( $\leq 1000$ ), then followed by $M$ student submissions. Each student submission consists of $N$ lines, each in the format:

c[i] code[i]

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0's and '1's.

Output Specification:

For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input:

7A 1 B 1 C 1 D 3 E 3 F 6 G 64A 00000B 00001C 0001D 001E 01F 10G 11A 01010B 01011C 0100D 011E 10F 11G 00A 000B 001C 010D 011E 100F 101G 110A 00000B 00001C 0001D 001E 00F 10G 11

Sample Output:

YesYesNoNo

思路：先建立最小堆，用最小堆生成哈夫曼树，计算出编码占用的最小空间。输入学生的编码时先看长度是否超过n-1，然后计算占用空间，最后检查任意两个编码是否相同，某个是否是另一个的前缀

#include<cstdio>#include<cstdlib>#include<cstring>using namespace std;int n,wt,m,sz[520];typedef struct hnod *HNOD;typedef struct hfm *HFM;typedef struct hfm{    //节点结构体 char c;int f;HFM l,r;}hfm;typedef struct hnod{    //堆结构体，nods为节点地址数组，下标从1开始存储，curn为当前元素个数，maxn为最大元素个数（没有用到 HFM *nods;int curn;int maxn;}hnod;HNOD minheap;HFM T;void buildmh(){    //建立最小堆 minheap=(HNOD)malloc(sizeof(struct hnod));minheap->nods=(HFM*)malloc((n+1)*sizeof(HFM));    //从1开始存储，所以申请n+1空间 minheap->curn=0;minheap->maxn=n;}void insmh(HFM h){    //插入最小堆 minheap->curn++;    //元素个数+1 minheap->nods[minheap->curn]=h;    //把要插入的元素放到最后 int cld=minheap->curn;    //初始化孩子节点位置 int par=cld/2;    //初始化父节点位置 while(par>0&&h->f<minheap->nods[par]->f){     //从最后位置往上遍历 minheap->nods[cld]=minheap->nods[par];cld=par;par=cld/2;}minheap->nods[cld]=h;}HFM del(){    //从堆中返回并删除最小元素 HFM tnod=minheap->nods[1];    //取出最小元素 minheap->nods[1]=minheap->nods[minheap->curn--];    //将最后位置的元素拿到根节点 HFM tnod2=minheap->nods[1];if(minheap->curn>1){    //如果取出之后堆只剩一个或者没有元素，则不需要整理 int par=1,cld=2;if(cld<minheap->curn&&minheap->nods[cld+1]->f<minheap->nods[cld]->f) cld++;while(cld<=minheap->curn&&tnod2->f>minheap->nods[cld]->f){    //从根节点往下遍历 minheap->nods[par]=minheap->nods[cld];par=cld;cld=par*2;if(cld<minheap->curn&&minheap->nods[cld+1]->f<minheap->nods[cld]->f) cld++;}minheap->nods[par]=tnod2;}return tnod;}HFM buildhfm(){    //建立哈夫曼树 while(minheap->curn>1){    //当堆中只剩一个元素的时候表示只剩一颗树，退出循环 HFM h1=del();    //取出两个最小元素 HFM h2=del();HFM h3=(HFM)malloc(sizeof(struct hfm));h3->f=h1->f+h2->f;h3->l=h1;h3->r=h2;insmh(h3);    //将用两个最小元素合成的新节点插入最小堆 }return minheap->nods[1];}int dfs(HFM h,int w){if(!h->l)return h->f*w;return dfs(h->l,w+1)+dfs(h->r,w+1);}int min(int a,int b){return a<b?a:b;}bool ck(char a[],char b[]){int i;for(i=0;i<strlen(a)&&i<strlen(b);i++){if(a[i]!=b[i])break;     }return i!=min(strlen(a),strlen(b));}int main(){scanf("%d",&n);buildmh();    //建立最小堆，堆元素为节点的地址 int i,j,k,tf;char tc;for(i=0;i<n;i++){scanf("%c",&tc);scanf("%c %d",&tc,&tf);sz[tc]=tf;HFM h=(HFM)malloc(sizeof(struct hfm));    //读取建立节点 h->c=tc;h->f=tf;h->l=NULL;h->r=NULL;insmh(h);   //把节点地址插入最小堆 }int t=minheap->curn;T=buildhfm();    //建立哈夫曼树 wt=dfs(T,0);    //获得最小的占用空间 scanf("%d",&m);for(i=0;i<m;i++){bool f=true;int tw=0;char cs[63][64];for(j=0;j<n;j++){scanf("%c",&tc);scanf("%c %s",&tc,cs[j]);if(strlen(cs[j])>n-1){    //单个编码最长可能为n-1，如果超过则肯定不是哈夫曼编码 f=false;}if(f){tw+=strlen(cs[j])*sz[tc];    //统计占用空间 }}if(tw>wt)f=false;    //检查占用空间 if(f)    for(j=0;j<n;j++)for(k=j+1;k<n;k++)if(f) f=ck(cs[j],cs[k]);    //两两检查是否相同，一个是否是另一个的前缀 printf("%s\n",f?"Yes":"No");}return 0;}

阅读全文

0 0