两天AC自动机的学习 HDOJ2222 HDOJ 2896

来源：互联网发布：centos安装java sdk 编辑：程序博客网时间：2024/06/15 03:48

暑假集训两天了，最近作息无规律，暴饮暴食，肚子涨气，消化不良，运动过度，以至于身体非常不适，所以这两天生活学习效率不高。我足足看了一天半才看懂了AC自动机的标程，今天用了一下午才A了一道AC自动机的模板题，但还是很开心的，因为又学会了一种新的中等算法题。

　　之所以想学这个是因为，之前有两次比赛，一次是师大校赛，一次是湘潭邀请赛，我遇到过些类的题，第一次用的是暴力，半年以后用的是高级了一点点的KMP算法，但是结果都是TIMELIMIT。因些，暑假第一件事就是先学AC自动机。这周才刚开始吧，剩下一周就是熟悉好自动机和进一步加深此类题目的难度，以便应付更风骚一点的自动机吧。想今天就被一个好傻的问题困饶了好久。被gets（）的输入与标志单词结尾的flag问题耗了我老久时间。

　　下面将自动机的要点总结与两道标程分享一下，方便大家的学习与我以后的复习。（学AC自动机的前提是KMP算法。）

AC自动机的主体思想还是KMP；只不过是KMP在trie上的应用罢了。此次两天的学习虽然只学会了模板，但同时加深了自已对BFS，TRIE，KMP的理解。一题，砥多题啊。希望在这周里能做点强化的自动机，提高AC的水平。在建立模板库的同时把自动机背掉。以后能在有限的时间内做出些类题。因为我深知，此类题往往关系到比赛中是拿铜还是拿银。

AC自动机专辑http://www.notonlysuccess.com/?p=607

总结：

一：TRIE（）

　　　　1：FLAG标志单词的结尾；所以当该单词在长句中出现一次后就将flag赋为0，避免重复；

　　　　2：next[]数组如果是全部字符的话要开到128，indext要减去31。如果是字母要开到26，indext减去'a'或'A'；　　　　 trie[]数组就统一开到500010吧，具体原因不知道也懒得知道了。

二：insert()

　　　　1:直接建一个树。每个结点注意初始化，单词末尾flag++。

三：bfs()

　　　　1：用深搜过一遍所有结点，从根结点往下一层层地过。

　　　　2：和KMP算法一样标记fail。

　　　　在now结点中：先找可行的p结点。（可行就是p.next[t]有值。如果没值p=trie[p].fail。直到找到或P为根结点。）

　　　　如果找到了可行的P结点：将now的下个结点的fail=p的下一个结点。即trie[q].fail= trie[p].next[t];

四：ac_auto（）

　　　　1：将s字符串过一次。如果s的当前字符与trie[root].next[s-'a']一样。则将s下移，root下移。

　　　　　　　　　　　　　　　　如果他们不一样。则寻找可行的root节点，并更新。
　　　　2：如果找到了一个字母结尾的标志。还需往回找fail一遍，找出所有子字母。

五：主函数是：先建个trie树，再用KMP标记fail指针，最后用ac自动机在trie上过一遍就行了。

杭电2222：http://acm.hdu.edu.cn/showproblem.php?pid=2222

题目大意：给N组数据，每组包含M个单词，和一个长句L；问L中出现是M个单词中的哪几个。

#include <stdio.h>
#include <stdlib.h>
#include <iostream>
using namespace std;
int const N= 500010;
struct Trie{
    int flag; // 标记是否为某一模式串的结尾
    int fail; // 失败指针
    int next[26];
    void init()
    {
        flag= 0; fail= -1;
        for( int i= 0; i< 26; ++i )
            next[i]= 0;
    }
}trie[N];

int len= 0;//len 表示数组TIRE节点长度
int que[N], n;
char str[1000010];

void inline insert( char* s )
{
    int root= 0;
    while( *s )
    {
        int t= *s- 'a';
        if( trie[root].next[t]==0 )
        {
            trie[++len].init();
            trie[root].next[t]=len;//
        }
        root= trie[root].next[t];
        s++;
    }
    trie[root].flag++;
}

void bfs()
{
    int head= 0, tail= 0, p, q;
    que[0]= 0;
    while( head<= tail )
    {
        int now= que[head++];
        for( int t= 0; t< 26; ++t )
        if( trie[now].next[t] )
        {
            p= trie[now].fail, q= trie[now].next[t];
            while( p!= -1 && !trie[p].next[t] )     p=trie[p].fail;//不断地寻找，使P为可行父节点 //P为ROOT 或 P的.next[t]有值则跳出
            if( p== -1 )
                trie[q].fail= 0;              //P为ROOT:    将该节点的下一个节点赋为 0
            else
                 trie[q].fail= trie[p].next[t];//P的.next[t]有值:     将该节点的下一个节点赋为 p的.next[t]
            que[++tail]= q;
        }
    }
}

int ac_auto( char* s )
{
    int ans= 0, root= 0, t, p;
    while( *s )
    {
        t= *s- 'a';
        if( trie[root].next[t] )
            root= trie[root].next[t];
        else
        {
            p= trie[root].fail;
            while( p!= -1 && !trie[p].next[t] ) p= trie[p].fail;//不断地寻找可行的父节点P //P为ROOT 或 P的.next[t]有值则跳出
            if( p== -1 ) root= 0;             //P为ROOT:       更新ROOT
            else root= trie[p].next[t];       //P的.next[t]有值:       更新ROOT
        }
        p= root;
        while( p!= 0 && trie[p].flag )
        {
                ans+= trie[p].flag;
                trie[p].flag=0;
                p= trie[p].fail;
        }
        s++;
    }
    return ans;
}

int main(){
    int test;
    scanf("%d",&test );
    while( test-- ){
        scanf("%d\n",&n );
        len= 0; trie[0].init();
        while( n-- )
        {
            gets(str);
            insert( str );
        }
        bfs();
        gets(str);
        //cout<<ac_auto(str)<<endl;
        printf("%d\n",ac_auto(str));    }
    return 0;
}

HDOJ 2896: http://acm.hdu.edu.cn/showproblem.php?pid=2896

题目大意：给N个病毒代码，M个网站。求：这些网站中分别包含哪些病毒，并依次列出各个网站的病毒，和包含病毒网站的个数。

#include <stdio.h>
#include <stdlib.h>
#include <iostream>
using namespace std;
int mark[10008][505];
int const N= 500010;
struct Trie{
    int flag; // 标记是否为某一模式串的结尾
    int fail; // 失败指针
    int next[127];
    int num;
    void init()
    {
        flag= 0; fail= -1;
        for( int i= 0; i< 127; ++i )
            next[i]= 0;
            num=0;
    }
}trie[N];

int len= 0;//len 表示数组TIRE节点长度
int que[N], n,tim=0,web=0;
char str[10010];

void inline insert( char* s )
{
    int root= 0;
    while( *s )
    {
        int t= *s- 31;
        if( trie[root].next[t]==0 )
        {
            trie[++len].init();
            trie[root].next[t]=len;//
        }
        root= trie[root].next[t];
        s++;
    }
    trie[root].flag++;
    trie[root].num=++tim;
    //cout<<root<<"+"<<trie[root].num<<"    "<<endl;
}

void bfs()
{
    int head= 0, tail= 0, p, q;
    que[0]= 0;
    while( head<= tail )
    {
        int now= que[head++];
        for( int t= 0; t<127; ++t )
        if( trie[now].next[t] )
        {
            p= trie[now].fail, q= trie[now].next[t];
            while( p!= -1 && !trie[p].next[t] )     p=trie[p].fail;//不断地寻找，使P为可行父节点 //P为ROOT 或 P的.next[t]有值则跳出
            if( p== -1 )
                trie[q].fail= 0;              //P为ROOT:    将该节点的下一个节点赋为 0
            else
                 trie[q].fail= trie[p].next[t];//P的.next[t]有值:     将该节点的下一个节点赋为 p的.next[t]
            que[++tail]= q;
        }
    }
}

void ac_auto( char* s )
{
    int root= 0, t, p;
    while( *s )
    {
        t= *s- 31;
        if( trie[root].next[t] )
            root= trie[root].next[t];
        else
        {
            p= trie[root].fail;
            while( p!= -1 && !trie[p].next[t] ) p= trie[p].fail;//不断地寻找可行的父节点P //P为ROOT 或 P的.next[t]有值则跳出
            if( p== -1 ) root= 0;             //P为ROOT:       更新ROOT
            else root= trie[p].next[t];       //P的.next[t]有值:       更新ROOT
        }
        p= root;
        while( p!= 0 && trie[p].flag )
        {
            //if( trie[p].flag )
            //ans+= trie[p].flag;
            //trie[p].flag= 0;
            //cout<<endl<<web<<"+"<<trie[p].num<<endl;
            mark[web][trie[p].num]=1;
            p= trie[p].fail;
            mark[web][0]=1;
        }
        s++;
    }
}
int main()
{
    for(int i=0;i<=1002;i++)
    for(int j=0;j<=502;j++)mark[i][j]=0;
    int m;
    scanf("%d",&n);
    len= 0; trie[0].init();
    for(int i=1;i<=n;i++)
    {
           scanf("%s",str);
           insert( str );
    }
    bfs();
    int sum=0;
    scanf("%d",&m);
    web=0;
    for(int i=1;i<=m;i++)
    {
            //cout<<i<<endl;
            //for(int w=0;w<=20;w++)cout<<trie[w].flag<<" ";cout<<endl;
                  web++;
                  scanf("%s",str);
                  ac_auto(str);
                  if(mark[web][0])
                  {
                            sum++;
                            printf("web %d:",i);
                            for(int j=1;j<=n+2;j++)
                            {
                                    if(mark[i][j])
                                    {
                                             printf(" %d",j);

                                    }

                            }
                            printf("\n");
                  }
    }
    printf("total: %d\n",sum);
    //system("pause");
    return 0;
}

总结：