AC自动机学习记录

来源:互联网 发布:淘宝买家怎么升级 编辑:程序博客网 时间:2024/06/06 15:02

学完了Trie树和KMP算法,我们就可以继续学习AC自动机了。。。

AC自动机,全名Aho-Chorasick string match(不是所有题都可以AC的意思= =,否则OI比赛还有什么意义),由于多模板的字符串匹配,其基本做法是在Trie树上建立fail指针(基本相当于KMP算法中next数组),然后在Trie上进行KMP即可,不多说了,直接上习题。

COGS 1913 AC自动机的裸题,直接套模板。code:

#include<iostream>#include<cstdio>#include<cstring>using namespace std;struct trie_node{trie_node *next[26],*fail;int num;trie_node() {num=0; fail=NULL; int i; for (i=0;i<=25;++i) next[i]=NULL;}}*root;char str[100][500],art[10000000];trie_node *queue[50000];void insert(trie_node *p,int i,int len,int num){trie_node *temp;if (p->next[(int)(str[num][i])-97]==NULL)  {    temp=new trie_node();    p->next[(int)(str[num][i])-97]=temp;    p=p->next[(int)(str[num][i])-97];  }else  p=p->next[(int)(str[num][i])-97];if (i==len-1)  {  p->num=num;  return;  }insert(p,i+1,len,num);}void work(){int head,tail,i;trie_node *now,*p;head=0; tail=1; queue[1]=root;while (head<tail)  {    head++;    now=queue[head];    for (i=0;i<=25;++i)      if (now->next[i])        {          p=now->fail;          while (p&&!p->next[i]) p=p->fail;          if (!p) now->next[i]->fail=root;          else now->next[i]->fail=p->next[i];          queue[++tail]=now->next[i];        }  }}int main(){int x,len,n,i;int tong[500];trie_node *now,*p;freopen("ACautomata.in","r",stdin);freopen("ACautomata.out","w",stdout);scanf("%d",&n);root=new trie_node();for (i=1;i<=n;++i)  {    scanf("%s",&str[i]);    len=strlen(str[i]);    insert(root,0,len,i);  }work(); memset(tong,0,sizeof(tong));scanf("%s",&art); len=strlen(art); now=root;for (i=0;i<len;++i)  {    x=(int)(art[i])-97;while (now&&!now->next[x]) now=now->fail;if (now)  {    now=now->next[x];    p=now;    while (p)      {        if (p->num) tong[p->num]++;        p=p->fail;      }  }else  now=root;  }for (i=1;i<=n;++i)  printf("%s %d\n",str[i],tong[i]);fclose(stdin);fclose(stdout);}
NOI 2011 COGS 1376 BZOJ 2434 阿狸的打字机

一道比较有思维复杂度的题目,首先我们考虑朴素的做法,我们在y节点的每一个节点上都沿fail指针遍历,每找到一个x的结尾节点ans+1.我们可以逆向思维一下,这样做其实等价于找以x的结尾节点为fail指针的且是y的一部分的个数,于是我们可以逆向fail指针建立fail树,我们观察dfs序后发现每棵子树的dfs序是连续的,于是我们可以采用树状数组来动态维护每个节点子节点的个数。code:

#include<iostream>#include<cstdio>#include<cstring>using namespace std;struct trie_node{trie_node *next[26],*fail,*fath;int l,r,sum,ppoint;trie_node(){l=r=sum=0; ppoint=0; fail=fath=NULL; int i; for (i=0;i<=25;++i) next[i]=NULL;}}*root;struct fail_node{trie_node *now;}vp[1000000];int pnext[1000000];trie_node *fnum[1000000];int point[1000000],next[1000000],v[1000000],num[1000000],ansi[1000000],SIZE;trie_node *queue[1000000];int t=0,n,numi,edge=0;int bit[10000000];char xl[1000000];int lowbit(int x){return x&(-x); }void bit_ins(int x,int a){for (;x<=SIZE;x+=lowbit(x)) bit[x]+=a; }int bit_sum(int x){int ans=0;for (;x;x-=lowbit(x)) ans+=bit[x]; return ans;}void insert(trie_node *p,int i,int len){if (xl[i]>='a'&&xl[i]<='z')  {    if (p->next[(int)(xl[i])-97]==NULL)      {        p->next[(int)(xl[i])-97]=new trie_node();        p->next[(int)(xl[i])-97]->fath=p;        p=p->next[(int)(xl[i])-97];      }    else      p=p->next[(int)(xl[i])-97];  }if (xl[i]=='P')  fnum[++t]=p;if (xl[i]=='B')  p=p->fath;if (i==len-1)  return;insert(p,i+1,len);}void work(){int head,tail,i; trie_node *now,*p;head=0; tail=1; queue[1]=root;while (head<tail)  {    head++;    now=queue[head];    for (i=0;i<=25;++i)      if (now->next[i])        {          p=now->fail;          while (p&&!p->next[i]) p=p->fail;          if (p) now->next[i]->fail=p->next[i];          else now->next[i]->fail=root;          queue[++tail]=now->next[i];        }  }}void make_fail(trie_node *p){int i;if (p->fail!=NULL)  {    edge++;pnext[edge]=p->fail->ppoint;    p->fail->ppoint=edge;     vp[edge].now=p;  }for (i=0;i<=25;++i)  if (p->next[i])    make_fail(p->next[i]);}void dfs(trie_node *p){int i;    p->l=++numi;for (i=p->ppoint;i!=0;i=pnext[i])  dfs(vp[i].now);p->r=numi;SIZE=numi;}void solve(trie_node *p){int i,l,t=0,ans,j; l=strlen(xl);for (i=0;i<l;++i)  {    if (xl[i]=='P')      {        t++;         for (j=point[t];j;j=next[j])          {            ans=bit_sum(fnum[v[j]]->r)-bit_sum(fnum[v[j]]->l-1);ansi[num[j]]=ans;          }      }    else      {        if (xl[i]=='B')          {            bit_ins(p->l,-1);            p=p->fath;          }        else          {          p=p->next[(int)(xl[i])-97];            bit_ins(p->l,1);          }      }      }}int main(){int l,i,e=0,x,y;root=new trie_node();scanf("%s",&xl);l=strlen(xl);insert(root,0,l);work();make_fail(root);dfs(root);scanf("%d",&n);for (i=1;i<=n;++i)  {    scanf("%d%d",&x,&y);    ++e; v[e]=x; num[e]=i;    next[e]=point[y]; point[y]=e;  }solve(root);for (i=1;i<=n;++i)  printf("%d\n",ansi[i]);}

JSOI 2007 BZOJ 1030 文本生成器

AC自动机上的DP,我们可以使用记忆化搜索,如果找到了以某个节点为结尾的位置,就加上剩余长度的26次方。code:

#include<iostream>#include<cstdio>#include<cstring>#define P 10007using namespace std;struct trie_node{trie_node *next[26],*fail; bool f; int num;trie_node() {num=0; f=false; fail=NULL;int i; for (i=0;i<=25;++i) next[i]=NULL;}}*root,*queue[100000];char s[50];bool visit[100][110000]={false};int f[100][110000],l,mi[110000],n;void insert(trie_node *p,int i,int len){if (p->next[(int)(s[i])-'A']==NULL)  {    p->next[(int)(s[i])-'A']=new trie_node();    p=p->next[(int)(s[i])-'A'];  }else  p=p->next[(int)(s[i])-'A'];if (i==len-1)  {    p->f=true;    return;  }insert(p,i+1,len);}void work(){int head,tail,i;trie_node *now,*p;head=0; tail=1; queue[1]=root;root->num=1; while (head<tail)  {    head++;    now=queue[head];    for (i=0;i<=25;++i)      if (now->next[i])        {          p=now->fail;          while (p&&!p->next[i]) p=p->fail;          if (p) now->next[i]->fail=p->next[i];          else now->next[i]->fail=root;          queue[++tail]=now->next[i];          now->next[i]->num=tail;        }  }}bool judge(trie_node *p){while (p!=root)  {    if (p->f) return true;    p=p->fail;  }return false;}int dfs(trie_node *p,int l){int j,ans=0;trie_node *now;if (l==0) return 0;if (visit[p->num][l]) return f[p->num][l];visit[p->num][l]=true;for (j=0;j<=25;++j)  {    if (!p->next[j])      {      now=p;        while (now&&!now->next[j])       now=now->fail;    if (now)      {        if (judge(now->next[j])) ans=(ans+mi[l-1])%P;            else ans=(ans+dfs(now->next[j],l-1))%P;          }        else          ans=(ans+dfs(root,l-1))%P;      }else  {    if (judge(p->next[j])) ans=(ans+mi[l-1])%P;        else ans=(ans+dfs(p->next[j],l-1))%P;  }      }    f[p->num][l]=ans;    return ans;}int main(){int i,len,ans;root=new trie_node();scanf("%d%d",&n,&l);mi[0]=1;for (i=1;i<=l;++i)  mi[i]=(mi[i-1]*26)%P;for (i=1;i<=n;++i)  {    scanf("%s",&s);    len=strlen(s);    insert(root,0,len);  }work(); ans=dfs(root,l);printf("%d\n",ans);}

其实上面这道题也可以使用AC自动机上的矩阵乘法,matrix.num[i][j]表示从编号为i的节点走一步走到编号为j的节点且不经过任何一个单词结尾的方法数(不包括沿fail指针走过去的),然后矩阵自乘n次即可再用26^L相减即可。注意在这里,如果一个节点没有编号为x的子节点,要一直沿fail指针走直到找到有编号为x子节点的节点,这样也算一步能走过来(有点像AC自动机失配时的找法)code:

#include<iostream>#include<cstdio>#include<cstring>#define P 10007using namespace std;struct trie_node{trie_node *next[26],*fail;int num; bool f;trie_node() {int i; for (i=0;i<=25;++i) next[i]=NULL; num=0;f=false;}}*root,*queue[100000];struct hp{int num[65][65];}matrix,ans,null;char s[1000000];int mi[1010000];int t=1,size;void insert(trie_node *p,int i,int len){if (p->next[(int)(s[i])-'A']==NULL)  {    p->next[(int)(s[i])-'A']=new trie_node();        p->next[(int)(s[i])-'A']->num=++t;    p=p->next[(int)(s[i])-'A'];  }else  p=p->next[(int)(s[i])-'A'];if (i==len-1)  {    p->f=true;    return;  }insert(p,i+1,len);}void work(){int i,head,tail; trie_node *now,*p;head=0; tail=1; queue[tail]=root;while (head<tail)  {    head++; now=queue[head];    for (i=0;i<=25;++i)      {        if (now->next[i]!=NULL)          {            p=now->fail;    while (p&&!p->next[i]) p=p->fail;    if (p)       {    now->next[i]->fail=p->next[i];    if (p->next[i]->f)      now->next[i]->f=true;  }else  now->next[i]->fail=root;if (!now->f&&!now->next[i]->f)  matrix.num[now->num][now->next[i]->num]++;    queue[++tail]=now->next[i];           }        else          {            p=now->fail;            while (p&&!p->next[i]) p=p->fail;            if (p&&!now->f&&!p->next[i]->f)              matrix.num[now->num][p->next[i]->num]++;            if (!p)              matrix.num[now->num][root->num]++;          }      }  }}hp cheng(hp a,hp b){int i,j,k;hp c;for (i=1;i<=size;++i)  for (j=1;j<=size;++j)    {      c.num[i][j]=0;      for (k=1;k<=size;++k)        c.num[i][j]=(c.num[i][j]+a.num[i][k]*b.num[k][j])%P;    }return c;}hp mult(hp mat,int t){hp temp;int i,j;if (t==0) return null;if (t==1) return mat;temp=mult(mat,t/2);if (t%2==0) return cheng(temp,temp);else return cheng(cheng(temp,temp),mat);}int main(){int ansi=0,len,n,l,i,j;freopen("textgen.in","r",stdin);freopen("textgen.out","w",stdout);scanf("%d%d",&n,&l);mi[0]=1;for (i=1;i<=l;++i)  mi[i]=(mi[i-1]*26)%P;root=new trie_node(); root->num=1;for (i=1;i<=n;++i)  {    scanf("%s",&s);    len=strlen(s);    insert(root,0,len);      }    size=t;    work();    ans=mult(matrix,l);    for (i=1;i<=size;++i)      ansi=(ansi+ans.num[1][i])%P;    ansi=(mi[l]-ansi+P)%P;    printf("%d\n",ansi);    fclose(stdin);    fclose(stdout);}
POJ 2778 DNA测序

几乎与上一道题一模一样,不过要注意的是快速矩阵幂的时候注意不要开太多临时变量,否则会炸栈 code:

#include<iostream>#include<cstdio>#include<cstring>#define P 100000using namespace std;struct trie_node{trie_node *next[4],*fail;bool f; int num;trie_node(){fail=NULL; f=false; num=0; int i; for (i=0;i<=3;++i) next[i]=NULL;}}*root,*queue[101];struct hp{long long num[101][101];}matrix,ans,temp;int t=1,n,l,size;char s[100];int index(char c){if (c=='A') return 0;if (c=='T') return 1;if (c=='G') return 2;if (c=='C') return 3;}void insert(trie_node *p,int i,int len){if (p->next[index(s[i])]==NULL)  {    p->next[index(s[i])]=new trie_node();    p->next[index(s[i])]->num=++t;    p=p->next[index(s[i])];  }else  p=p->next[index(s[i])];if (i==len-1)  {    p->f=true;    return;  }insert(p,i+1,len);}void work(){int head,tail,i;trie_node *now,*p;head=0; tail=1; queue[1]=root;while (head<tail)  {    head++; now=queue[head];    for (i=0;i<=3;++i)      if (now->next[i]!=NULL)        {          p=now->fail;  while (p&&!p->next[i]) p=p->fail;  if (p)    {      now->next[i]->fail=p->next[i];      if (p->next[i]->f)        now->next[i]->f=true;    }  else    now->next[i]->fail=root;  queue[++tail]=now->next[i];  if (!now->f&&!now->next[i]->f)    matrix.num[now->num][now->next[i]->num]++;        }      else        {          p=now->fail;          while (p&&!p->next[i]) p=p->fail;          if (p&&!now->f&&!p->next[i]->f)            matrix.num[now->num][p->next[i]->num]++;          if (!p&&!now->f)            matrix.num[now->num][root->num]++;        }  }}void mult(hp a,hp b,hp &c){int i,j,k;for (i=1;i<=size;++i)  for (j=1;j<=size;++j)    {      c.num[i][j]=0;      for (k=1;k<=size;++k)        c.num[i][j]=(c.num[i][j]+a.num[i][k]*b.num[k][j])%P;    }}void power(int l){int i,j;if (l==1)  {    for (i=1;i<=size;++i)      for (j=1;j<=size;++j)        ans.num[i][j]=matrix.num[i][j];  }else  {    power(l/2);    mult(ans,ans,temp);    if (l%2) mult(temp,matrix,ans);    else swap(ans,temp);  }}int main(){int i,len,ansi=0;scanf("%d%d",&n,&l); root=new trie_node(); root->num=1;for (i=1;i<=n;++i)  {    scanf("%s",&s);    len=strlen(s);    insert(root,0,len);  }size=t;work();power(l);for (i=1;i<=size;++i)  ansi=((long long)(ansi)+ans.num[1][i])%P;printf("%d\n",ansi);}
ZOJ 3228 Searching the String

一道蛮简单的题,记录一下每个单词结尾的上次出现位置,更新不重复的答案是判断一下是否i>=p->last+p->len即可,由于有重复的插入,重新对输入编下号即可。code:

#include<iostream>#include<cstdio>#include<cstring>using namespace std;struct trie_node{trie_node *next[26],*fail;int num,last,len;trie_node() {int i; for (i=0;i<=25;++i) next[i]=NULL; fail=NULL; len=num=0; last=-1;}}*root,*queue[600001];char s[100],art[100001];int tong[100001][2];int qst[100001],numi[100001];void insert(trie_node *p,int i,int len,int num){if (p->next[(int)(s[i])-97]==NULL)  {    p->next[(int)(s[i])-97]=new trie_node();    p=p->next[(int)(s[i])-97];  }else  p=p->next[(int)(s[i])-97];if (i==len-1)  {  if (p->num==0)    {        p->num=num;        numi[num]=num;      }    else numi[num]=p->num;    p->len=len;    return;  }insert(p,i+1,len,num);}void work(){int head,tail,i; trie_node *now,*p;head=0; tail=1; queue[tail]=root;while (head<tail)  {    head++; now=queue[head];    for (i=0;i<=25;++i)      if (now->next[i])        {          p=now->fail;          while (p&&!p->next[i]) p=p->fail;          if (p) now->next[i]->fail=p->next[i];          else now->next[i]->fail=root;          queue[++tail]=now->next[i];        }  }}void ac(){int i,l,x;trie_node *now,*p;l=strlen(art); now=root;for (i=0;i<l;++i)  {    x=int(art[i])-97;    while (now&&!now->next[x]) now=now->fail;    if (now)      {        now=now->next[x];p=now;while (p)  {    if (p->num)       {        tong[p->num][0]++;        if (i>=p->last+p->len)          {            tong[p->num][1]++;            p->last=i;          }      }p=p->fail;      }      }    else      now=root;  }}void clear(trie_node *p){int i;for (i=0;i<=25;++i)  if (p->next[i])    clear(p->next[i]);delete(p);}int main(){int n,i,len,t=0;while (~scanf("%s",&art))  {  memset(tong,0,sizeof(tong));  t++;    scanf("%d",&n);    root=new trie_node();    for (i=1;i<=n;++i)      {        scanf("%d%s",&qst[i],&s);        len=strlen(s);    insert(root,0,len,i);      }    work();    ac();    printf("Case %d\n",t);    for (i=1;i<=n;++i)      printf("%d\n",tong[numi[i]][qst[i]]);    printf("\n");    clear(root);     } }



0 0
原创粉丝点击