poj 2804 词典（字典树或者快排+二分）

来源：互联网发布：png软件下载编辑：程序博客网时间：2024/05/17 03:57

2804:词典

总时间限制:: 3000ms
内存限制:: 65536kB

描述

你旅游到了一个国外的城市。那里的人们说的外国语言你不能理解。不过幸运的是，你有一本词典可以帮助你。

输入

首先输入一个词典，词典中包含不超过100000个词条，每个词条占据一行。每一个词条包括一个英文单词和一个外语单词，两个单词之间用一个空格隔开。而且在词典中不会有某个外语单词出现超过两次。词典之后是一个空行，然后给出一个由外语单词组成的文档，文档不超过100000行，而且每行只包括一个外语单词。输入中出现单词只包括小写字母，而且长度不会超过10。

输出

在输出中，你需要把输入文档翻译成英文，每行输出一个英文单词。如果某个外语单词不在词典中，就把这个单词翻译成“eh”。

样例输入

dog ogdaycat atcaypig igpayfroot ootfrayloops oopslayatcayittenkayoopslay

样例输出

catehloops

提示

输入比较大，推荐使用C语言的I / O函数。

这道题目开始做的时候想到的是字典树；后来因为这道题输入有点特殊，就一直不知道从何下手建树，后来看到别人处理输入的方法；一下子就来了灵感，套用了以前的模板，把代码敲好提交到百练上就a了，后来在我们学校的oj提交；直接被一个数据难倒了。。。（百练的数据有点水啊），后来就一直再想解决办法；开始的那个程序，如果与给的单词部分匹配直到给的单词结束就输出结果了，但是字典树里面还有，不是完全匹配；当时因为a了就没有考虑到这种情况；一直卡在这里；今天看了别人的ac自动机的代码；又给了我一点提示；在

节点中设置一个单词完结的标记；这样的处理，开始我也想了好久，还是有点水啊；一直出现异常的结束；后来终于写好了，感觉还是有点投机取巧的意思，在两个oj上都a了，但是还可以优化；有时候运行还是有问题，我也不知道是什么原因；

下面是代码；能ac的，还能够优化啊！！

#include <cstdio>#include <cstring>#include <cstdlib>typedef struct node //节点的结构体{    char eng[12];    int count; //标记单词是否结束    struct node * next[26];}node;int flag;void Insert(node *T,char *f,char *e) //插入{    node *p,*q;    p=T;    int len,k;    len=strlen(f);    if(len==0) return ;    for(int i=0;i<len;i++)    {        k=f[i]-'a';        if(p->next[k]==NULL)        {            q=(node*)malloc(sizeof(node)); //增加新节点            for(int i=0;i<26;i++)            {                q->count=0;                strcpy(q->eng,e);                q->next[i]=NULL;            }            p->next[k]=q;            p=q;        }        else            p=p->next[k];    }    p->count++;}void Search(node *T,char *s)//查找{     node *q;     q=T;     int k,i=0,len;     int flag=0;     for(i=0;i<26;i++)     {         k=s[i]-'a';         q=q->next[k];         if(q==NULL)        {            flag=1;            printf("eh\n");            break;        }         if(q->count>0)//单词结束的标记        {             printf("%s\n",q->eng);             flag=1;             break;        }     }}void Release(node *T)//销毁{    for(int i=0;i<26;i++)        if(T->next[i]!=NULL)            Release(T->next[i]);        free(T);}int main(){     //freopen("1.txt","r",stdin);     char english[20],forigen[20];     node *T;     T=(node *)malloc(sizeof(node));     T->count=0;     for(int i=0;i<26;i++)        T->next[i]=NULL;    while(1)      {        english[0]=getchar();        if(english[0]=='\n') break;        scanf("%s %s",english+1,forigen);        Insert(T,forigen,english);        getchar();    }    while(scanf("%s",forigen)!=EOF)    {        flag=0;        Search(T,forigen);    }    Release(T);    return 0;}

第一次ac的水代码；

void Search(char *f){   node *q;   int len,k;   q=T;   len=strlen(f);    for(int i=0;i<len;i++)    {        k=f[i]-'a';        q=q->next[k];        if(q==NULL)        {            printf("eh\n");            flag=1;            break;        }    }    if(flag==0) printf("%s\n",q->eng);//部分匹配就直接输出了}

别人写的字典树代码，好巧妙值得学习；把单词最后一个字符分开处理，做标志，这样就能避免我先前所犯的错误；看了别人的代码，茅塞顿开啊；

#include "stdio.h"#include "string.h"#include "math.h"#include "algorithm"#include "iostream"using namespace std;typedef struct  node{      struct node *next[26];      char   ans[12];}node;char arr1[12],arr2[12],str[25];node *T;void Diction(){     node *q,*p=T;     int i,j,k,len;     len=strlen(arr2);     for(i=0;i<len-1;i++)     {          k=arr2[i]-'a';          if(p->next[k]==NULL){                q=(node*)malloc(sizeof(node));                p->next[k]=q;q->ans[0]=0;                for(j=0;j<26;j++)                   q->next[j]=NULL;                p=q;          }else{                p=p->next[k];          }     }     k=arr2[i]-'a';//最后一个字符     if(p->next[k]==NULL){         q=(node*)malloc(sizeof(node));         p->next[k]=q;strcpy(q->ans,arr1);//把arr数组复制给最后一个字符的节点；其他节点都是0           for(j=0;j<26;j++)              q->next[j]=NULL;     }else{           p=p->next[k];           strcpy(p->ans,arr1);     }}void search()//查询{       node *p=T;       int i,len,k;       len=strlen(str);       for(i=0;i<len-1;i++)       {              k=str[i]-'a';              if(p->next[k]==NULL){                   printf("eh\n");                    break;              }else{                   p=p->next[k];              }       }       if(i==len-1)//最后一个字符       {             k=str[i]-'a';             if(p->next[k]==NULL){                   printf("eh\n");             }else{                   p=p->next[k];                   if(p->ans[0]!=0)//前面插入那里做的标志                       puts(p->ans);                   else                       printf("eh\n");             }       }}int main(){     //freopen("input.txt","r",stdin);     int i;     T=(node*)malloc(sizeof(node));     for(i=0;i<26;i++)         T->next[i]=NULL;     while(1)     {          gets(str);          if(strcmp(str,"")==0)             break;          sscanf(str,"%s %s",arr1,arr2);          Diction();     }     while(scanf("%s",str)!=EOF)              search();         return 0;}

还有一种思路就是直接用快排+二分的组合，这种用法也很常见；在这个题目中用到了fgets,sscanf这两个函数，都是第一次用；还用了c语言库里面的qsort和bsearch，然后自己又写了一次二分；比写字典树难度还是小了很多，很多细节上还是把握的不好啊；

下面是ac的代码，直接调用库函数写的；

#include <cstdio>#include <cstring>#include <cstdlib>#include <algorithm>using namespace std;const int maxn=100000+10;struct node //节点的结构体{    char eng[12];    char fore[12];};node dictionary[maxn];int cmp(const void *a,const void *b){ //比较函数    return strcmp(((node *)a)->fore,((node *)b)->fore);}int search(const void *a,const void *b){ //二分查找函数    return strcmp((char *)a,((node *)b)->fore);}int main(){     //freopen("1.txt","r",stdin);     char english[30],forigen[20];     int count=0,flag;     node *p;    while(fgets(english,29,stdin)&&english[0]!='\n')    {        sscanf(english,"%s%s",dictionary[count].eng,dictionary[count].fore);        count++;    }    qsort(dictionary,count,sizeof(node),cmp);   while(scanf("%s",forigen)!=EOF)    {        p=NULL;        p=(node*)bsearch(forigen,dictionary,count,sizeof(node),search);        if(p)         printf("%s\n",p->eng);        else            printf("eh\n");}    return 0;}

自己写得二分查找；

#include <cstdio>#include <cstring>#include <cstdlib>#include <algorithm>using namespace std;const int maxn=100000+10;struct node //节点的结构体{    char eng[12];    char fore[12];};node dictionary[maxn];bool Cmp(node one, node two){    return strcmp(one.fore, two.fore) < 0;}int bsearch(char *s,int n) //二分查找{    int mid,start,end;    start=0;end=n-1;    int flag=0;    while(start<=end)    {        mid=(start+end)/2;        flag=strcmp(s,dictionary[mid].fore);        if(flag==0)            return mid;        if(flag<0)        {            end=mid-1;        }        else        {            start=mid+1;        }    }    return -1;}int main(){     //freopen("1.txt","r",stdin);     char english[30],forigen[20];     int count=0,flag;     node *p;    while(fgets(english,29,stdin)&&english[0]!='\n')    {        sscanf(english,"%s%s",dictionary[count].eng,dictionary[count].fore);        count++;    }   sort(dictionary,dictionary+count,Cmp);    while(scanf("%s",forigen)!=EOF)    {      flag=bsearch(forigen,count);        if(flag!=-1)        {            printf("%s\n",dictionary[flag].eng);        }        else            printf("eh\n");    }    return 0;}

在平时做题中还是要多积累；

总结：这个题目可以用多种思路去做，平时练习可以多去尝试一下，熟悉一下库函数；fgets，sscanf对输入格式的控制；qsort中cmp函数的格式，还用bsearch里面search函数的写法和格式，传递的参数；对于字典树在模板的基础上还要根据题意灵活处理，像这道题的话，设置一个参数表示结束就会很方便；

题不在多，在精！！

6 0

poj 2804 词典 （字典树 或者 快排+二分）

2804:词典

poj 2804 词典（字典树或者快排+二分）