POJ 2778-DNA Sequence(AC自动机+构建邻接矩阵+矩阵快速幂)

来源:互联网 发布:非农数据软件 编辑:程序博客网 时间:2024/04/30 12:09
DNA Sequence
Time Limit: 1000MS Memory Limit: 65536KTotal Submissions: 15118 Accepted: 5826

Description

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments. 

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n. 

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences. 

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10. 

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3ATACAGAA

Sample Output

36

Source

POJ Monthly--2006.03.26,dodo


题目意思:

有M个含有疾病的DNA序列,求出用AGCT四种字符构成长度为N的DNA序列,使之不含有疾病序列的总数。

解题思路:

这个题阿,自己看的时候完全木有思路,也不明白怎么就扯到矩阵上去了,搜了很多题解终于弄明白了…Orz真·弱渣渣…
推荐看这个人的!特别详细明白!

把ac自动机看成一个有向图,构建一个邻接矩阵,那么matrix[i][j]表示i和j是否可达,这个矩阵的n次幂matrix^n[i][j]表示从i恰好走n步到达j的路径有几条。

下面搬运一下:

这个和矩阵有什么关系呢???
①插入字符串,构建trie图。

•上图是例子{“ACG”,”C”},构建trie图后如图所示,从每个结点出发都有4条边(A,T,C,G)
•从状态0出发走一步有4种走法:
  –走A到状态1(安全);
  –走C到状态4(危险);
  –走T到状态0(安全);
  –走G到状态0(安全);
•所以当n=1时,答案就是3
•当n=2时,就是从状态0出发走2步,就形成一个长度为2的字符串,只要路径上没有经过危险结点,有几种走法,那么答案就是几种。依此类推走n步就形成长度为n的字符串。
②建立trie图的邻接矩阵M:

2 1 0 0 1

2 1 1 0 0

1 1 0 1 1

2 1 0 0 1

2 1 0 0 1

M[i,j]表示从结点i到j只走一步有几种走法。

那么M的n次幂就表示从结点i到j走n步有几种走法。

去掉危险结点,也就是去掉危险结点的行和列。结点3和4是单词结尾所以危险,结点2的fail指针指向4,当匹配”AC”时也就匹配了”C”,所以2也是危险的。

矩阵变成M:

2 1

2 1

④计算M[][]的n次幂,然后 Σ(M[0,i]) mod 100000 就是答案。

由于n很大,可以使用二分来计算矩阵的幂


下面的代码是kuangbin巨巨的~

#include <iostream>#include <stdio.h>#include <algorithm>#include <string.h>#include <queue>using namespace std;const int MOD=100000;struct Matrix{    int mat[110][110],n;    Matrix() {}    Matrix(int _n)    {        n = _n;        for(int i=0; i<n; i++)            for(int j=0; j<n; j++)                mat[i][j]=0;    }    Matrix operator *(const Matrix &b)const    {        Matrix ret=Matrix(n);        for(int i=0; i<n; i++)            for(int j=0; j<n; j++)                for(int k=0; k<n; k++)                {                    int tmp=(long long)mat[i][k]*b.mat[k][j]%MOD;                    ret.mat[i][j]=(ret.mat[i][j]+tmp)%MOD;                }        return ret;    }};struct Trie{    int next[110][4],fail[110];    bool end[110];    int root,L;    int newnode()    {        for(int i=0; i<4; i++)            next[L][i]=-1;        end[L++]=false;        return L-1;    }    void init()    {        L=0;        root=newnode();    }    int getch(char ch)    {        switch(ch)        {        case 'A':            return 0;            break;        case 'C':            return 1;            break;        case 'G':            return 2;            break;        case 'T':            return 3;            break;        }    }    void insert(char s[])    {        int len=strlen(s);        int now=root;        for(int i = 0; i < len; i++)        {            if(next[now][getch(s[i])] == -1)                next[now][getch(s[i])] = newnode();            now = next[now][getch(s[i])];        }        end[now]=true;    }    void build()    {        queue<int>Q;        for(int i = 0; i < 4; i++)            if(next[root][i] == -1)                next[root][i] = root;            else            {                fail[next[root][i]] = root;                Q.push(next[root][i]);            }        while(!Q.empty())        {            int now = Q.front();            Q.pop();            if(end[fail[now]]==true)                end[now]=true;            for(int i = 0; i < 4; i++)            {                if(next[now][i] == -1)                    next[now][i] = next[fail[now]][i];                else                {                    fail[next[now][i]] = next[fail[now]][i];                    Q.push(next[now][i]);                }            }        }    }    Matrix getMatrix()    {        Matrix res = Matrix(L);        for(int i=0; i<L; i++)            for(int j=0; j<4; j++)                if(end[next[i][j]]==false)                    res.mat[i][next[i][j]]++;        return res;    }};Trie ac;char buf[20];Matrix pow_M(Matrix a,int n){    Matrix ret = Matrix(a.n);    for(int i = 0; i < ret.n; i++)        ret.mat[i][i]=1;    Matrix tmp=a;    while(n)    {        if(n&1)ret=ret*tmp;        tmp=tmp*tmp;        n>>=1;    }    return ret;}int main(){    int n,m;    while(scanf("%d%d",&n,&m) != EOF)    {        ac.init();        for(int i=0; i<n; i++)        {            scanf("%s",buf);            ac.insert(buf);        }        ac.build();//插入字符串构建AC自动机,根据trie图构建邻接矩阵        Matrix a=ac.getMatrix();//从矩阵中去掉含疾病的危险节点所在行列        a=pow_M(a,m);        int ans=0;        for(int i=0; i<a.n; i++)        {            ans=(ans+a.mat[0][i])%MOD;        }        printf("%d\n",ans);    }    return 0;}


0 0
原创粉丝点击