poj2778 DNA Sequence AC自动机+矩阵快速幂

来源:互联网 发布:php恶意代码检测工具 编辑:程序博客网 时间:2024/06/07 12:00

DNA Sequence
Time Limit: 1000MS Memory Limit: 65536KTotal Submissions: 11721 Accepted: 4471

Description

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3ATACAGAA

Sample Output

36

  给M个串,问有多少个长度为N的的串不包含给出的M个串。

  首先把M个串建立AC自动机,然后构造一个大小为ac.size*ac.size的矩阵,mat[i][j]代表节点i走到节点j走一步的合法路径数,i和j应该是合法节点。这样形成了一个一步矩阵,N步的话把这个矩阵N次方,答案是矩阵第一行的和。

#include<iostream>#include<queue>#include<cstring>#include<cstdio>#include<cmath>#include<set>#include<map>#include<vector>#include<stack>#include<algorithm>#define INF 0x3f3f3f3f#define eps 1e-9#define MAXN 60010#define MAXM 2000010#define MAXNODE 105#define MOD 100000#define SIGMA_SIZE 4typedef long long LL;using namespace std;int T,M,N;char str[15];struct AC{    int ch[MAXNODE][SIGMA_SIZE],f[MAXNODE],val[MAXNODE],sz;    void init(){        memset(ch[0],0,sizeof(ch[0]));        val[0]=0;        sz=1;    }    int idx(char c){        switch(c){            case 'A':return 0;            case 'C':return 1;            case 'T':return 2;            case 'G':return 3;        }    }    void insert(char *s,int v){        int u=0;        for(int i=0;s[i];i++){            int c=idx(s[i]);            if(!ch[u][c]){                memset(ch[sz],0,sizeof(ch[sz]));                val[sz]=0;                ch[u][c]=sz++;            }            u=ch[u][c];        }        val[u]=1;    }    void get_fail(){        queue<int> q;        f[0]=0;        for(int c=0;c<SIGMA_SIZE;c++){            int u=ch[0][c];            if(u){                f[u]=0;                q.push(u);            }        }        while(!q.empty()){            int r=q.front();            q.pop();            for(int c=0;c<SIGMA_SIZE;c++){                int u=ch[r][c];                if(!u){                    ch[r][c]=ch[f[r]][c];                    continue;                }                q.push(u);                f[u]=ch[f[r]][c];                val[u]|=val[f[u]];            }        }    }}ac;struct Mat{    LL mat[MAXNODE][MAXNODE];    void init(){        memset(mat,0,sizeof(mat));    }}ans;Mat operator * (Mat a,Mat b){    int i,j,k,sz=ac.sz;    Mat ret;    ret.init();    for(int k=0;k<sz;k++)        for(int i=0;i<sz;i++){            if(!a.mat[i][k]) continue;            for(int j=0;j<sz;j++) ret.mat[i][j]=(ret.mat[i][j]+a.mat[i][k]*b.mat[k][j])%MOD;        }    return ret;}Mat operator ^ (Mat a,int n){    Mat ret,t=a;    int sz=ac.sz;    for(int i=0;i<sz;i++)        for(int j=0;j<sz;j++) ret.mat[i][j]=(i==j);    while(n){        if(n&1) ret=ret*t;        t=t*t;        n>>=1;    }    return ret;}void get_mat(){    for(int u=0;u<ac.sz;u++)        for(int c=0;c<SIGMA_SIZE;c++) if(!ac.val[u]&&!ac.val[ac.ch[u][c]]) ans.mat[u][ac.ch[u][c]]++;}int main(){    freopen("in.txt","r",stdin);    while(scanf("%d%d",&M,&N)!=EOF){        ac.init();        for(int i=0;i<M;i++){            scanf("%s",str);            ac.insert(str,1);        }        ac.get_fail();        ans.init();        get_mat();        ans=(ans^N);        LL cnt=0;        for(int i=0;i<ac.sz;i++) cnt=(cnt+ans.mat[0][i])%MOD;        printf("%I64d\n",cnt);    }    return 0;}


0 0
原创粉丝点击