AC自动机水题

来源:互联网 发布:知乎三国和战国 编辑:程序博客网 时间:2024/06/03 16:55

近日复习下AC机,顺便把以前的指针写法改成数组写法,删了以前的AC机相关博文(代码库2333),原本的指针写法实在太长了,而且内存下不来,随随便便就MLE。
教程可以看 别人的博客

  • 插入到trie树
  • 构造失败指针
  • 查询
  • 水题列表
    • HDU 2222 Keywords Search 真裸题
    • HDU 3065 病毒侵袭持续中
    • HDU 2896 病毒侵袭

具体写法各异,以下说明针对下面代码的写法,这种写法对数组的破坏性比较大。

插入到trie树

next[cur][k] : cur号点下的k号子节点的节点编号,插入字符串就很easy

void insert(char *s1) {    int len = strlen(s1);    int cur = root;    for (int i = 0; i < len; ++i) {        int id = s1[i] - beginCh;        if (next[cur][id] == -1) {            next[cur][id] = newNode();        }        cur = next[cur][id];    }    leaf[cur] = true;}

构造失败指针

对于每个父节点u,其k号子节点的fail值 = fail[父节点u] 对应点的k号子节点。若k号子节点不存在,则修改该k号点的next值为next[ fail[父节点u]下的k号点对应值,注意是改了next值,详见代码。
root点特殊处理:它的子节点,若不存在则修改next为root,若存在则修改失败指针为root,且入队列。

void build() {    queue<int>Q;    fail[root] = root;    for (int i = 0; i < maxch; ++i) {        if (next[root][i] == -1) next[root][i] = root;          else  fail[next[root][i]] = root, Q.push(next[root][i]); //存在则指定fail到root    }    while (!Q.empty()) {        int cur = Q.front(); Q.pop();        for (int i = 0; i < maxch; ++i) {            if (next[cur][i] == -1) next[cur][i] = next[fail[cur]][i];            else {                fail[next[cur][i]] = next[fail[cur]][i];                Q.push(next[cur][i]);            }        }    }}

查询

查询变化多,根据具体情况改,这里以在文本串里查找模式串集合字符串出现个数为例。

int query(char *s1) {    int len = strlen(s1);    int cur = root;    int ans = 0;    for (int i = 0; i < len; ++i) {        int k = s1[i] - beginCh;        int son = next[cur][k];        cur = son;        while (son != root && leaf[son]) {             ans += leaf[son];            leaf[son] = 0;//防止重复访问计数            son = fail[son];        }    }    return ans;}

注意:上面那张写法只在叶子结点时跳转fail,速度快也能过题,但有一种情况匹配不出来:在 abcdf 里查找 [ bc , abcde ] 会找不到 bc ,貌似OJ题目都没这种数据,建议用下面这张写法

while (son != root ) {      ans += leaf[son];//leaf[]初始为0时才能这样,不然就需要if判断    leaf[son] = 0;    son = fail[son];}

水题列表

HDU 2222 Keywords Search 真裸题

给一堆串,再给一个长串,求有多少串在长串里出现

#include <cstdio>#include <cmath>#include <cstring>#include <string>#include <iostream>#include <algorithm>#include <queue>using namespace std;#define ll long long#define clr( a , x ) memset ( a , x , sizeof (a) );#define RE freopen("in.txt","r",stdin);#define WE freopen("out.txt","w",stdout);//http://acm.split.hdu.edu.cn/showproblem.php?pid=2222const int maxn = 10000 * 26 * 50/50 + 5;  //全部字符不同的情况才需要*50,因为这样的数据不太可能就开小点减空间const int maxch = 26;const char beginCh = 'a';struct tree{    int next[maxn][maxch];    int leaf[maxn];    int fail[maxn];    int cnt, root;    int newNode() {        memset(next[cnt], -1, sizeof(next[cnt]));        return cnt++;    }    void init() {        cnt = 0;        root = newNode();        memset(leaf, 0, sizeof(leaf));    }    void insert(char *s1) {        int len = strlen(s1);        int cur = root;        for (int i = 0; i < len; ++i) {            int id = s1[i] - beginCh;            if (next[cur][id] == -1) {                next[cur][id] = newNode();            }            cur = next[cur][id];        }        leaf[cur] ++;    }    void build() {        queue<int>Q;        fail[root] = root;        for (int i = 0; i < maxch; ++i) {            if (next[root][i] == -1) next[root][i] = root;  //不存在的点修改next为root            else  fail[next[root][i]] = root, Q.push(next[root][i]); //存在则指定fail到root        }        while (!Q.empty()) {            int cur = Q.front(); Q.pop();            for (int i = 0; i < maxch; ++i) {                if (next[cur][i] == -1) next[cur][i] = next[fail[cur]][i];                else {                    fail[next[cur][i]] = next[fail[cur]][i];                    Q.push(next[cur][i]);                }            }        }    }    int query(char *s1) {        int len = strlen(s1);        int cur = root;        int ans = 0;        for (int i = 0; i < len; ++i) {            int k = s1[i] - beginCh;            int son = next[cur][k];            cur = son;            while (son != root && leaf[son]) {  //加上 && leaf[son] 减少无用功,当前字符ch不是结尾的话怎么跳也不会是ch结尾                ans += leaf[son];                leaf[son] = 0;//防止重复访问                son = fail[son];            }        }        return ans;    }} ac;int main() {    // RE    int t, n;    char s1[1000002];    scanf("%d", &t);    while (t--) {        scanf("%d", &n);        ac.init();        for (int i = 0; i < n; ++i) {            scanf("%s", s1);            ac.insert(s1);        }        ac.build();        scanf("%s", s1);        printf("%d\n", ac.query(s1));    }    return 0;}

HDU 3065 病毒侵袭持续中

开个数组记录病毒出现次数, 输出即可。字符集不到100个,因为空格(32)起才是可见字符。

#include <cstdio>#include <cmath>#include <cstring>#include <iostream>#include <queue>using namespace std;#define ll long long#define RE freopen("in.txt","r",stdin);#define WE freopen("out.txt","w",stdout);const int maxn = 1000  * 52 + 5;const int maxch = 100;      //32-126 < 100const char beginCh = ' ';   //第一个可见字符char word[1002][52];int times[1002];struct tree{    int next[maxn][maxch];    int leaf[maxn];    int cnt, root;    int fail[maxn];    int newNode() {        memset(next[cnt], -1, sizeof(next[cnt]));        return cnt++;    }    void init() {        cnt = 0;        root = newNode();        memset(leaf, 0, sizeof(leaf));    }    void insert(char *s1, int id) {        int len = strlen(s1);        int cur = root;        for (int i = 0; i < len; ++i) {            int id = s1[i] - beginCh;            if (next[cur][id] == -1) {                next[cur][id] = newNode();            }            cur = next[cur][id];        }        leaf[cur] = id;    }    void build() {        queue<int>Q;        fail[root] = root;        for (int i = 0; i < maxch; ++i) {            if (next[root][i] == -1) next[root][i] = root;              else  fail[next[root][i]] = root, Q.push(next[root][i]);         }        while (!Q.empty()) {            int cur = Q.front(); Q.pop();            for (int i = 0; i < maxch; ++i) {                if (next[cur][i] == -1) next[cur][i] = next[fail[cur]][i];                else {                    fail[next[cur][i]] = next[fail[cur]][i];                    Q.push(next[cur][i]);                }            }        }    }    void query(char *s1) {        int len = strlen(s1);        int cur = root;        for (int i = 0; i < len; ++i) {            int k = s1[i] - beginCh;            int son = next[cur][k];            cur = son;            while (son != root && leaf[son]) {                times[leaf[son]]++;                son = fail[son];            }        }    }} ac;char s1[2000005];int main() {    // RE    int n;    while (scanf("%d%*c", &n) != EOF) {        ac.init();        memset(times, 0, sizeof(times));        for (int i = 1; i <= n; ++i) {            gets(word[i]);            ac.insert(word[i], i);        }        ac.build();        gets(s1);        ac.query(s1);        for (int i = 1; i <= n; ++i) {            if (times[i]) {                printf("%s: %d\n", word[i], times[i]);            }        }    }    return 0;}

HDU 2896 病毒侵袭

结果要排序,所以不能在查询中直接输出,如下例子

3
ccc
bbb
aaa
2
aaabbbcccccc
bbaacc

输出
web 1: 1 2 3
total: 1

#include <cstdio>#include <cmath>#include <cstring>#include <string>#include <iostream>#include <algorithm>#include <queue>using namespace std;#define ll long long#define clr( a , x ) memset ( a , x , sizeof (a) );#define RE freopen("in.txt","r",stdin);#define WE freopen("out.txt","w",stdout);const int maxn = 500  * 200 + 5;const int maxch = 100;      //32-126 < 100const char beginCh = ' ';   //第一个可见字符bool vis[502];struct tree{    int next[maxn][maxch];    int leaf[maxn];    int cnt, root;    int fail[maxn];    int newNode() {        memset(next[cnt], -1, sizeof(next[cnt]));        return cnt++;    }    void init() {        cnt = 0;        root = newNode();        memset(leaf, 0, sizeof(leaf));    }    void clrVis() {        memset(vis, 0, sizeof(vis));    }    void insert(char *s1, int id) {        int len = strlen(s1);        int cur = root;        for (int i = 0; i < len; ++i) {            int id = s1[i] - beginCh;            if (next[cur][id] == -1) {                next[cur][id] = newNode();            }            cur = next[cur][id];        }        leaf[cur] = id;    }    void build() {        queue<int>Q;        fail[root] = root;        for (int i = 0; i < maxch; ++i) {            if (next[root][i] == -1) next[root][i] = root;             else  fail[next[root][i]] = root, Q.push(next[root][i]);         }        while (!Q.empty()) {            int cur = Q.front(); Q.pop();            for (int i = 0; i < maxch; ++i) {                if (next[cur][i] == -1) next[cur][i] = next[fail[cur]][i];                else {                    fail[next[cur][i]] = next[fail[cur]][i];                    Q.push(next[cur][i]);                }            }        }    }    int query(char *s1, int id) {        int len = strlen(s1);        int cur = root;        int has = 0;        for (int i = 0; i < len; ++i) {            int k = s1[i] - beginCh;            int son = next[cur][k];            cur = son;            while (son != root && leaf[son]) {                has = 1;                vis[leaf[son]] = true;                son = fail[son];            }        }        return has;    }} ac;int main() {    // RE    int idMax,n;    char s1[10005];    ac.init();    scanf("%d%*c", &idMax);    for (int i = 1; i <= idMax; ++i) {        gets(s1);        ac.insert(s1, i);    }    ac.build();    scanf("%d%*c", &n);    int cnt = 0;    for (int i = 1; i <= n; ++i) {        gets(s1);        ac.clrVis();        cnt += ac.query(s1, i);        bool first = true;        for (int id = 1; id <= idMax; ++id) {            if (vis[id]) {                if (first) {                    printf("web %d: %d", i, id); first = false;                } else {                    printf(" %d", id);                }            }        }        if(!first) printf("\n");    }    printf("total: %d\n", cnt);    return 0;}
0 0
原创粉丝点击