【小结】AC自动机

来源:互联网 发布:金利合软件 编辑:程序博客网 时间:2024/06/15 21:35
  • 参考资料:http://blog.csdn.net/niushuai666/article/details/7002823
  • 搞了两天,突然明白,这玩意它原来就是个DFA鸭!窝来分析分析

DFAAC自动机

  • 考虑以下单词: {she, he, her}
  • 我们先画出它Trie树的模样

  • 留个板子

/* **********************************************  File Name: ac_automata.cpp  Auther: zhengdongjian@tju.edu.cn  Created Time: 2015年08月14日 星期五 08时41分23秒*********************************************** */#include <bits/stdc++.h>using namespace std;typedef pair<int, int> P;const double EPS = 1e-8;const double PI = acos(-1.0);const int MAX = 500007;const int MAXD = 26; //26 alphasstruct Trie {    /*      * nxt & end is used in trie     * fail is for ac automata     */    int nxt[MAX][MAXD], fail[MAX], end[MAX];    int root, L; //root node, length(the nodes has been malloc)[0, L]    int newnode() {        memset(nxt[L], -1, sizeof(int) * MAXD);        end[L++] = 0;        return L - 1;    }    void clear() {        L = 0;        root = newnode();    }    void insert(char* buf) {        int len = strlen(buf);        int now = root;        for (int i = 0; i < len; ++i) {            if (nxt[now][buf[i] - 'a'] == -1) {                nxt[now][buf[i] - 'a'] = newnode();            }            now = nxt[now][buf[i] - 'a'];        }        ++end[now];    }    void build() {        queue<int> Q;        fail[root] = root;        for (int i = 0; i < MAXD; ++i) {            if (nxt[root][i] == -1) {                nxt[root][i] = root;            } else {                fail[nxt[root][i]] = root;                Q.push(nxt[root][i]);            }        }        while (!Q.empty()) {            int now = Q.front();            Q.pop();            for (int i = 0; i < MAXD; ++i) {                if (nxt[now][i] == -1) {                    nxt[now][i] = nxt[fail[now]][i];                } else {                    fail[nxt[now][i]] = nxt[fail[now]][i];                    Q.push(nxt[now][i]);                }            }        }    }    int query(char* buf, int len = -1) {        if (len == -1) {            len = strlen(buf);        }        int now = root;        int res = 0;        for (int i = 0; i < len; ++i) {            now = nxt[now][buf[i] - 'a'];            int tmp = now;            while (tmp != root) {                res += end[tmp];                end[tmp] = 0; //不重复,若可重复此处不置0即可                tmp = fail[tmp];            }        }        return res;    }    void debug() {        for (int i = 0; i < L; ++i) {            printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]);            for (int j = 1; j < MAXD; ++j) {                printf(" %d", nxt[i][j]);            }            puts("]");        }    }} ac;const int MAXL = 64;char str[MAXL];char buf[MAX << 1];int main() {    int T;    scanf(" %d", &T);    while (T--) {        int n;        scanf(" %d", &n);        ac.clear();        for (int i = 0; i < n; ++i) {            scanf(" %s", str);            ac.insert(str);        }        ac.build();        scanf(" %s", buf);        printf("%d\n", ac.query(buf));    }    return 0;}

几个简单的小题目

  • hdu2222,First attempt
/* **********************************************  File Name: ac_automata.cpp => hdu2222  Auther: zhengdongjian@tju.edu.cn  Created Time: 2015年08月14日 星期五 08时41分23秒*********************************************** */#include <bits/stdc++.h>using namespace std;typedef pair<int, int> P;const double EPS = 1e-8;const double PI = acos(-1.0);const int MAX = 500007;const int MAXD = 26; //26 alphasstruct Trie {    /*      * nxt & end is used in trie     * fail is for ac automata     */    int nxt[MAX][MAXD], fail[MAX], end[MAX];    int root, L; //root node, length(the nodes has been malloc)[0, L]    int newnode() {        memset(nxt[L], -1, sizeof(int) * MAXD);        end[L++] = 0;        return L - 1;    }    void clear() {        L = 0;        root = newnode();    }    void insert(char* buf) {        int len = strlen(buf);        int now = root;        for (int i = 0; i < len; ++i) {            if (nxt[now][buf[i] - 'a'] == -1) {                nxt[now][buf[i] - 'a'] = newnode();            }            now = nxt[now][buf[i] - 'a'];        }        ++end[now];    }    void build() {        queue<int> Q;        fail[root] = root;        for (int i = 0; i < MAXD; ++i) {            if (nxt[root][i] == -1) {                nxt[root][i] = root;            } else {                fail[nxt[root][i]] = root;                Q.push(nxt[root][i]);            }        }        while (!Q.empty()) {            int now = Q.front();            Q.pop();            for (int i = 0; i < MAXD; ++i) {                if (nxt[now][i] == -1) {                    nxt[now][i] = nxt[fail[now]][i];                } else {                    fail[nxt[now][i]] = nxt[fail[now]][i];                    Q.push(nxt[now][i]);                }            }        }    }    int query(char* buf, int len = -1) {        if (len == -1) {            len = strlen(buf);        }        int now = root;        int res = 0;        for (int i = 0; i < len; ++i) {            now = nxt[now][buf[i] - 'a'];            int tmp = now;            while (tmp != root) {                res += end[tmp];                end[tmp] = 0; //不重复,若可重复此处不置0即可                tmp = fail[tmp];            }        }        return res;    }    void debug() {        for (int i = 0; i < L; ++i) {            printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]);            for (int j = 1; j < MAXD; ++j) {                printf(" %d", nxt[i][j]);            }            puts("]");        }    }} ac;const int MAXL = 64;char str[MAXL];char buf[MAX << 1];int main() {    int T;    scanf(" %d", &T);    while (T--) {        int n;        scanf(" %d", &n);        ac.clear();        for (int i = 0; i < n; ++i) {            scanf(" %s", str);            ac.insert(str);        }        ac.build();        scanf(" %s", buf);        printf("%d\n", ac.query(buf));    }    return 0;}
  • hdu2896
  • 病毒保证不同,简单统计,随便搞
/* **********************************************  File Name: 2896.cpp  Auther: zhengdongjian@tju.edu.cn  Created Time: 2015年08月14日 星期五 11时25分51秒*********************************************** */#include <bits/stdc++.h>using namespace std;typedef pair<int, int> P;const double EPS = 1e-8;const double PI = acos(-1.0);const int MAX = 100007;const int MAXD = 128;struct Trie {    int nxt[MAX][MAXD], fail[MAX], end[MAX];    int root, L;    int newnode() {        memset(nxt[L], -1, sizeof(int) * MAXD);        end[L++] = -1;        return L - 1;    }    void clear() {        L = 0;        root = newnode();    }    void insert(char* buf, int _end) {        int len = strlen(buf);        int now = root;        for (int i = 0; i < len; ++i) {            if (nxt[now][(int)buf[i]] == -1) {                nxt[now][(int)buf[i]] = newnode();            }            now = nxt[now][(int)buf[i]];        }        end[now] = _end;    }    void build() {        queue<int> Q;        fail[root] = root;        for (int i = 0; i < MAXD; ++i) {            if (nxt[root][i] == -1) {                nxt[root][i] = root;            } else {                fail[nxt[root][i]] = root;                Q.push(nxt[root][i]);            }        }        while (!Q.empty()) {            int now = Q.front();            Q.pop();            for (int i = 0; i < MAXD; ++i) {                if (nxt[now][i] == -1) {                    nxt[now][i] = nxt[fail[now]][i];                } else {                    fail[nxt[now][i]] = nxt[fail[now]][i];                    Q.push(nxt[now][i]);                }            }        }    }    set<int> query(char* buf) {        int len = strlen(buf);        int now = root;        set<int> res;        for (int i = 0; i < len; ++i) {            now = nxt[now][(int)buf[i]];            int tmp = now;            while (tmp != root) {                if (~end[tmp]) {                    res.insert(end[tmp]);                }                tmp = fail[tmp];            }        }        return res;    }} ac;char buf[MAX];int main() {    int n, m;    while (~scanf(" %d", &n)) {        ac.clear();        for (int i = 1; i <= n; ++i) {            scanf(" %s", buf);            ac.insert(buf, i);        }        ac.build();        scanf(" %d", &m);        int sum = 0;        for (int i = 1; i <= m; ++i) {            scanf(" %s", buf);            auto v = ac.query(buf);            if (!v.empty()) {                ++sum;                printf("web %d:", i);                for (auto it = v.begin(); it != v.end(); ++it) {                    printf(" %d", *it);                }                puts("");            }        }        printf("total: %d\n", sum);    }    return 0;}
  • hdu3065
  • 要打印匹配串:打标记,Trie上节点打前驱和字符标记。P.S.空间上还可以优化
/* **********************************************  File Name: 3065.cpp  Auther: zhengdongjian@tju.edu.cn  Created Time: 2015年08月14日 星期五 11时46分10秒*********************************************** */#include <bits/stdc++.h>using namespace std;typedef pair<int, int> P;const double EPS = 1e-8;const double PI = acos(-1.0);const int MAX = 50007;const int MAXD = 128; //26 alphasstruct Trie {    /*      * nxt & end is used in trie     * fail is for ac automata     */    int nxt[MAX][MAXD], fail[MAX], end[MAX], pre[MAX];    char dad[MAX];    int root, L; //root node, length(the nodes has been malloc)[0, L]    int newnode() {        memset(nxt[L], -1, sizeof(int) * MAXD);        pre[L] = -1;        end[L++] = 0;        return L - 1;    }    void clear() {        L = 0;        root = newnode();    }    void insert(char* buf) {        int len = strlen(buf);        int now = root;        for (int i = 0; i < len; ++i) {            if (nxt[now][(int)buf[i]] == -1) {                nxt[now][(int)buf[i]] = newnode();                pre[nxt[now][(int)buf[i]]] = now;                dad[nxt[now][(int)buf[i]]] = buf[i];            }            now = nxt[now][(int)buf[i]];        }        ++end[now];    }    void build() {        queue<int> Q;        fail[root] = root;        for (int i = 0; i < MAXD; ++i) {            if (nxt[root][i] == -1) {                nxt[root][i] = root;            } else {                fail[nxt[root][i]] = root;                Q.push(nxt[root][i]);            }        }        while (!Q.empty()) {            int now = Q.front();            Q.pop();            for (int i = 0; i < MAXD; ++i) {                if (nxt[now][i] == -1) {                    nxt[now][i] = nxt[fail[now]][i];                } else {                    fail[nxt[now][i]] = nxt[fail[now]][i];                    Q.push(nxt[now][i]);                }            }        }    }    map<int, int> query(char* buf, int len = -1) {        if (len == -1) {            len = strlen(buf);        }        int now = root;        map<int, int> res;        for (int i = 0; i < len; ++i) {            now = nxt[now][(int)buf[i]];            int tmp = now;            while (tmp != root) {                if (end[tmp] > 0) {                    ++res[tmp];                }                tmp = fail[tmp];            }        }        return res;    }    void debug() {        for (int i = 0; i < L; ++i) {            printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]);            for (int j = 1; j < MAXD; ++j) {                printf(" %d", nxt[i][j]);            }            puts("]");        }    }} ac;const int MAXL = 64;char str[MAXL];char buffer[MAX * 40];int main() {    int n;    while (~scanf(" %d", &n)) {        ac.clear();        for (int i = 1; i <= n; ++i) {            scanf(" %s", str);            ac.insert(str);        }        ac.build();        scanf(" %s", buffer);        auto mp = ac.query(buffer);        for (auto it = mp.begin(); it != mp.end(); ++it) {            int now = it->first;            int idx = MAXL - 1;            str[idx--] = '\0';            while (now != ac.root) {                str[idx--] = ac.dad[now];                now = ac.pre[now];            }            ++idx;            printf("%s: %d\n", str + idx, it->second);        }    }    return 0;}
  • zoj3430
  • 解码一下即可。debug了好久,最后发现Base64直接解码出来的字符可能不是ASCII码,就如1111 1111对应到ASCII后是EOF真让人难堪。。。所以字符集开到256就可以过了…一部分调试中间修改导致整个代码看起来丑陋了一些,不愿改了。。
/* **********************************************  File Name: 3430.cpp  Auther: zhengdongjian@tju.edu.cn  Created Time: 2015年08月14日 星期五 13时28分38秒*********************************************** */#include <bits/stdc++.h>using namespace std;typedef pair<int, int> P;const double EPS = 1e-8;const double PI = acos(-1.0);const int MAX = 50007;const int MAXD = 256;/* * Base64 Decode */static const char cb64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";bool pool[50007];inline int dic64(char& c) {    if (isupper(c)) {        return c - 'A';    } else if (islower(c)) {        return c - 'a' + 26;    } else if (isdigit(c)) {        return c - '0' + 52;    } else {        return c == '+' ? 62 : 63;    }}void decode64(char source[], int dest[]) {    int len = strlen(source);    int bit = len * 6;    memset(pool, false, sizeof(bool) * bit);    while (source[len - 1] == '=') {        --len;        bit -= 8;    }    //printf("bit = %d\n", bit);    for (int i = 0, j = 0; i < len; ++i, j += 6) {        int c = dic64(source[i]);        for (int k = j + 5; k >= j; --k) {            pool[k] = c & 1;            c >>= 1;        }    }    int p = 0;    for (int i = 0; i < bit; i += 8) {        dest[p] = 0;        for (int j = 0; j < 8; ++j) {            dest[p] <<= 1;            if (pool[i + j]) {                ++dest[p];                //dest[p] = (char)((int)dest[p] + 1);            }        }        ++p;    }    dest[p] = -1;}/**********************************************/struct Trie {    /*      * nxt & end is used in trie     * fail is for ac automata     */    int nxt[MAX][MAXD], fail[MAX], end[MAX];    int root, L; //root node, length(the nodes has been malloc)[0, L]    int newnode() {        memset(nxt[L], -1, sizeof(int) * MAXD);        end[L++] = 0;        return L - 1;    }    void clear() {        L = 0;        root = newnode();    }    void insert(int* buf, int _id) {        int* p = buf;        int now = root;        while (~(*p)) {            if (nxt[now][*p] == -1) {                nxt[now][*p] = newnode();            }            now = nxt[now][*p++];        }        end[now] = _id;    }    void build() {        queue<int> Q;        fail[root] = root;        for (int i = 0; i < MAXD; ++i) {            if (nxt[root][i] == -1) {                nxt[root][i] = root;            } else {                fail[nxt[root][i]] = root;                Q.push(nxt[root][i]);            }        }        while (!Q.empty()) {            int now = Q.front();            Q.pop();            for (int i = 0; i < MAXD; ++i) {                if (nxt[now][i] == -1) {                    nxt[now][i] = nxt[fail[now]][i];                } else {                    fail[nxt[now][i]] = nxt[fail[now]][i];                    Q.push(nxt[now][i]);                }            }        }    }    set<int> query(int* buf) {        int* p = buf;        int now = root;        set<int> res;        while (~(*p)) {            now = nxt[now][*p++];            int tmp = now;            while (tmp != root) {                if (end[tmp]) {                    res.insert(end[tmp]);                }                tmp = fail[tmp];            }        }        return res;    }    void debug() {        for (int i = 0; i < L; ++i) {            printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]);            for (int j = 1; j < MAXD; ++j) {                printf(" %d", nxt[i][j]);            }            puts("]");        }    }} ac;const int MAXL = 128;char str[MAXL];char buf[MAX];int jj[MAX];int main() {    /*    while (cin >> buf) {        decode64(buf, buf);        cout << buf << endl;    }    return 0;    */    int n;    while (~scanf(" %d", &n)) {        ac.clear();        for (int i = 1; i <= n; ++i) {            scanf(" %s", str);            decode64(str, jj);            ac.insert(jj, i);        }        ac.build();        int m;        scanf(" %d", &m);        while (m--) {            scanf(" %s", buf);            decode64(buf, jj);            printf("%d\n", (int)ac.query(jj).size());        }        puts("");    }    return 0;}
0 0
原创粉丝点击