HDU 2457 AC自动机+dp

来源:互联网 发布:超市销售量的数据 编辑:程序博客网 时间:2024/06/05 06:05

题意:

题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=2457
给出只包含AGCT四个字符的基因序列,其中给出n个字符串,要求序列中不含有这些子串,问最少改变多少个字符?


思路:

AC自动机+dp,不能存在的子串构成一个AC自动机,然后在自动机上dp,dp[x][y]表示第x个字符已经到了自动机结点y之后能修改的最少步骤。判断字符是否需要修改,然后转移即可。这里采用了记忆化搜索,直接for循环也可以。
重点要注意build中end要保存失配之后结点的所有信息。


代码:

#include <bits/stdc++.h>using namespace std;const int INF = 0x3f3f3f3f;struct ACauto {    int next[1005][5], fail[1005], end[1005];    int root, sz;    int newnode() {        for (int i = 0; i < 4; i++)            next[sz][i] = -1;        end[sz++] = 0;        return sz - 1;    }    void init() {        sz = 0;        root = newnode();    }    int idx(char c) {        if (c == 'A') return 0;        if (c == 'C') return 1;        if (c == 'G') return 2;        return 3;    }    void insert(char *buf, int id) {        int len = strlen(buf);        int now = root;        for (int i = 0; i < len; i++) {            int id = idx(buf[i]);            if (next[now][id] == -1)                next[now][id] = newnode();            now = next[now][id];        }        end[now]++;    }    void build() {        queue <int> Q;        fail[root] = root;        for (int i = 0; i < 4; i++) {            if (next[root][i] == -1)                next[root][i] = root;            else {                fail[next[root][i]] = root;                Q.push(next[root][i]);            }        }        while (!Q.empty()) {            int now = Q.front();            Q.pop();            end[now] += end[fail[now]]; // 关键            for (int i = 0; i < 4; i++) {                if (next[now][i] == -1)                    next[now][i] = next[fail[now]][i];                else {                    fail[next[now][i]] = next[fail[now]][i];                    Q.push(next[now][i]);                }            }        }    }} ac;int len;int dp[1005][1005];char str[1005];const char d[] = {'A', 'C', 'G', 'T'};int dfs(int x, int y) {    if (x == len) return 0;    if (dp[x][y] != -1) return dp[x][y];    int res = INF;    for (int i = 0; i < 4; i++) {        int nx = x + 1, ny = ac.next[y][i];        if (ac.end[ny] > 0) continue;        if (d[i] == str[x])            res = min(res, dfs(nx, ny));        else            res = min(res, dfs(nx, ny) + 1);    }    dp[x][y] = res;    return res;}int main() {    //freopen("in.txt", "r", stdin);    int n, cs = 0;    while (scanf("%d", &n), n) {        ac.init();        for (int i = 1; i <= n; i++) {            scanf("%s", str);            ac.insert(str, i);        }        ac.build();        /*for (int i = 0; i < ac.sz; i++) {            printf("%d : ", i);            for (int j = 0; j < 4; j++)                printf("%d ", ac.next[i][j]);            printf("\n");        }*/        scanf("%s", str);        len = strlen(str);        memset(dp, -1, sizeof(dp));        int ans = dfs(0, 0);        if (ans >= INF) printf("Case %d: -1\n", ++cs);        else printf("Case %d: %d\n", ++cs, ans);    }    return 0;}
原创粉丝点击