【小结】AC自动机
来源:互联网 发布:金利合软件 编辑:程序博客网 时间:2024/06/15 21:35
- 参考资料:http://blog.csdn.net/niushuai666/article/details/7002823
- 搞了两天,突然明白,这玩意它原来就是个
DFA 鸭!窝来分析分析
从DFA 到AC 自动机
- 考虑以下单词:
{she, he, her} 我们先画出它Trie树的模样
留个板子
/* ********************************************** File Name: ac_automata.cpp Auther: zhengdongjian@tju.edu.cn Created Time: 2015年08月14日 星期五 08时41分23秒*********************************************** */#include <bits/stdc++.h>using namespace std;typedef pair<int, int> P;const double EPS = 1e-8;const double PI = acos(-1.0);const int MAX = 500007;const int MAXD = 26; //26 alphasstruct Trie { /* * nxt & end is used in trie * fail is for ac automata */ int nxt[MAX][MAXD], fail[MAX], end[MAX]; int root, L; //root node, length(the nodes has been malloc)[0, L] int newnode() { memset(nxt[L], -1, sizeof(int) * MAXD); end[L++] = 0; return L - 1; } void clear() { L = 0; root = newnode(); } void insert(char* buf) { int len = strlen(buf); int now = root; for (int i = 0; i < len; ++i) { if (nxt[now][buf[i] - 'a'] == -1) { nxt[now][buf[i] - 'a'] = newnode(); } now = nxt[now][buf[i] - 'a']; } ++end[now]; } void build() { queue<int> Q; fail[root] = root; for (int i = 0; i < MAXD; ++i) { if (nxt[root][i] == -1) { nxt[root][i] = root; } else { fail[nxt[root][i]] = root; Q.push(nxt[root][i]); } } while (!Q.empty()) { int now = Q.front(); Q.pop(); for (int i = 0; i < MAXD; ++i) { if (nxt[now][i] == -1) { nxt[now][i] = nxt[fail[now]][i]; } else { fail[nxt[now][i]] = nxt[fail[now]][i]; Q.push(nxt[now][i]); } } } } int query(char* buf, int len = -1) { if (len == -1) { len = strlen(buf); } int now = root; int res = 0; for (int i = 0; i < len; ++i) { now = nxt[now][buf[i] - 'a']; int tmp = now; while (tmp != root) { res += end[tmp]; end[tmp] = 0; //不重复,若可重复此处不置0即可 tmp = fail[tmp]; } } return res; } void debug() { for (int i = 0; i < L; ++i) { printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]); for (int j = 1; j < MAXD; ++j) { printf(" %d", nxt[i][j]); } puts("]"); } }} ac;const int MAXL = 64;char str[MAXL];char buf[MAX << 1];int main() { int T; scanf(" %d", &T); while (T--) { int n; scanf(" %d", &n); ac.clear(); for (int i = 0; i < n; ++i) { scanf(" %s", str); ac.insert(str); } ac.build(); scanf(" %s", buf); printf("%d\n", ac.query(buf)); } return 0;}
几个简单的小题目
- hdu2222,First attempt
/* ********************************************** File Name: ac_automata.cpp => hdu2222 Auther: zhengdongjian@tju.edu.cn Created Time: 2015年08月14日 星期五 08时41分23秒*********************************************** */#include <bits/stdc++.h>using namespace std;typedef pair<int, int> P;const double EPS = 1e-8;const double PI = acos(-1.0);const int MAX = 500007;const int MAXD = 26; //26 alphasstruct Trie { /* * nxt & end is used in trie * fail is for ac automata */ int nxt[MAX][MAXD], fail[MAX], end[MAX]; int root, L; //root node, length(the nodes has been malloc)[0, L] int newnode() { memset(nxt[L], -1, sizeof(int) * MAXD); end[L++] = 0; return L - 1; } void clear() { L = 0; root = newnode(); } void insert(char* buf) { int len = strlen(buf); int now = root; for (int i = 0; i < len; ++i) { if (nxt[now][buf[i] - 'a'] == -1) { nxt[now][buf[i] - 'a'] = newnode(); } now = nxt[now][buf[i] - 'a']; } ++end[now]; } void build() { queue<int> Q; fail[root] = root; for (int i = 0; i < MAXD; ++i) { if (nxt[root][i] == -1) { nxt[root][i] = root; } else { fail[nxt[root][i]] = root; Q.push(nxt[root][i]); } } while (!Q.empty()) { int now = Q.front(); Q.pop(); for (int i = 0; i < MAXD; ++i) { if (nxt[now][i] == -1) { nxt[now][i] = nxt[fail[now]][i]; } else { fail[nxt[now][i]] = nxt[fail[now]][i]; Q.push(nxt[now][i]); } } } } int query(char* buf, int len = -1) { if (len == -1) { len = strlen(buf); } int now = root; int res = 0; for (int i = 0; i < len; ++i) { now = nxt[now][buf[i] - 'a']; int tmp = now; while (tmp != root) { res += end[tmp]; end[tmp] = 0; //不重复,若可重复此处不置0即可 tmp = fail[tmp]; } } return res; } void debug() { for (int i = 0; i < L; ++i) { printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]); for (int j = 1; j < MAXD; ++j) { printf(" %d", nxt[i][j]); } puts("]"); } }} ac;const int MAXL = 64;char str[MAXL];char buf[MAX << 1];int main() { int T; scanf(" %d", &T); while (T--) { int n; scanf(" %d", &n); ac.clear(); for (int i = 0; i < n; ++i) { scanf(" %s", str); ac.insert(str); } ac.build(); scanf(" %s", buf); printf("%d\n", ac.query(buf)); } return 0;}
- hdu2896
- 病毒保证不同,简单统计,随便搞
/* ********************************************** File Name: 2896.cpp Auther: zhengdongjian@tju.edu.cn Created Time: 2015年08月14日 星期五 11时25分51秒*********************************************** */#include <bits/stdc++.h>using namespace std;typedef pair<int, int> P;const double EPS = 1e-8;const double PI = acos(-1.0);const int MAX = 100007;const int MAXD = 128;struct Trie { int nxt[MAX][MAXD], fail[MAX], end[MAX]; int root, L; int newnode() { memset(nxt[L], -1, sizeof(int) * MAXD); end[L++] = -1; return L - 1; } void clear() { L = 0; root = newnode(); } void insert(char* buf, int _end) { int len = strlen(buf); int now = root; for (int i = 0; i < len; ++i) { if (nxt[now][(int)buf[i]] == -1) { nxt[now][(int)buf[i]] = newnode(); } now = nxt[now][(int)buf[i]]; } end[now] = _end; } void build() { queue<int> Q; fail[root] = root; for (int i = 0; i < MAXD; ++i) { if (nxt[root][i] == -1) { nxt[root][i] = root; } else { fail[nxt[root][i]] = root; Q.push(nxt[root][i]); } } while (!Q.empty()) { int now = Q.front(); Q.pop(); for (int i = 0; i < MAXD; ++i) { if (nxt[now][i] == -1) { nxt[now][i] = nxt[fail[now]][i]; } else { fail[nxt[now][i]] = nxt[fail[now]][i]; Q.push(nxt[now][i]); } } } } set<int> query(char* buf) { int len = strlen(buf); int now = root; set<int> res; for (int i = 0; i < len; ++i) { now = nxt[now][(int)buf[i]]; int tmp = now; while (tmp != root) { if (~end[tmp]) { res.insert(end[tmp]); } tmp = fail[tmp]; } } return res; }} ac;char buf[MAX];int main() { int n, m; while (~scanf(" %d", &n)) { ac.clear(); for (int i = 1; i <= n; ++i) { scanf(" %s", buf); ac.insert(buf, i); } ac.build(); scanf(" %d", &m); int sum = 0; for (int i = 1; i <= m; ++i) { scanf(" %s", buf); auto v = ac.query(buf); if (!v.empty()) { ++sum; printf("web %d:", i); for (auto it = v.begin(); it != v.end(); ++it) { printf(" %d", *it); } puts(""); } } printf("total: %d\n", sum); } return 0;}
- hdu3065
- 要打印匹配串:打标记,Trie上节点打前驱和字符标记。P.S.空间上还可以优化
/* ********************************************** File Name: 3065.cpp Auther: zhengdongjian@tju.edu.cn Created Time: 2015年08月14日 星期五 11时46分10秒*********************************************** */#include <bits/stdc++.h>using namespace std;typedef pair<int, int> P;const double EPS = 1e-8;const double PI = acos(-1.0);const int MAX = 50007;const int MAXD = 128; //26 alphasstruct Trie { /* * nxt & end is used in trie * fail is for ac automata */ int nxt[MAX][MAXD], fail[MAX], end[MAX], pre[MAX]; char dad[MAX]; int root, L; //root node, length(the nodes has been malloc)[0, L] int newnode() { memset(nxt[L], -1, sizeof(int) * MAXD); pre[L] = -1; end[L++] = 0; return L - 1; } void clear() { L = 0; root = newnode(); } void insert(char* buf) { int len = strlen(buf); int now = root; for (int i = 0; i < len; ++i) { if (nxt[now][(int)buf[i]] == -1) { nxt[now][(int)buf[i]] = newnode(); pre[nxt[now][(int)buf[i]]] = now; dad[nxt[now][(int)buf[i]]] = buf[i]; } now = nxt[now][(int)buf[i]]; } ++end[now]; } void build() { queue<int> Q; fail[root] = root; for (int i = 0; i < MAXD; ++i) { if (nxt[root][i] == -1) { nxt[root][i] = root; } else { fail[nxt[root][i]] = root; Q.push(nxt[root][i]); } } while (!Q.empty()) { int now = Q.front(); Q.pop(); for (int i = 0; i < MAXD; ++i) { if (nxt[now][i] == -1) { nxt[now][i] = nxt[fail[now]][i]; } else { fail[nxt[now][i]] = nxt[fail[now]][i]; Q.push(nxt[now][i]); } } } } map<int, int> query(char* buf, int len = -1) { if (len == -1) { len = strlen(buf); } int now = root; map<int, int> res; for (int i = 0; i < len; ++i) { now = nxt[now][(int)buf[i]]; int tmp = now; while (tmp != root) { if (end[tmp] > 0) { ++res[tmp]; } tmp = fail[tmp]; } } return res; } void debug() { for (int i = 0; i < L; ++i) { printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]); for (int j = 1; j < MAXD; ++j) { printf(" %d", nxt[i][j]); } puts("]"); } }} ac;const int MAXL = 64;char str[MAXL];char buffer[MAX * 40];int main() { int n; while (~scanf(" %d", &n)) { ac.clear(); for (int i = 1; i <= n; ++i) { scanf(" %s", str); ac.insert(str); } ac.build(); scanf(" %s", buffer); auto mp = ac.query(buffer); for (auto it = mp.begin(); it != mp.end(); ++it) { int now = it->first; int idx = MAXL - 1; str[idx--] = '\0'; while (now != ac.root) { str[idx--] = ac.dad[now]; now = ac.pre[now]; } ++idx; printf("%s: %d\n", str + idx, it->second); } } return 0;}
- zoj3430
- 解码一下即可。debug了好久,最后发现Base64直接解码出来的字符可能不是
ASCII 码,就如1111 1111 对应到ASCII 后是EOF… 真让人难堪。。。所以字符集开到256就可以过了…一部分调试中间修改导致整个代码看起来丑陋了一些,不愿改了。。
/* ********************************************** File Name: 3430.cpp Auther: zhengdongjian@tju.edu.cn Created Time: 2015年08月14日 星期五 13时28分38秒*********************************************** */#include <bits/stdc++.h>using namespace std;typedef pair<int, int> P;const double EPS = 1e-8;const double PI = acos(-1.0);const int MAX = 50007;const int MAXD = 256;/* * Base64 Decode */static const char cb64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";bool pool[50007];inline int dic64(char& c) { if (isupper(c)) { return c - 'A'; } else if (islower(c)) { return c - 'a' + 26; } else if (isdigit(c)) { return c - '0' + 52; } else { return c == '+' ? 62 : 63; }}void decode64(char source[], int dest[]) { int len = strlen(source); int bit = len * 6; memset(pool, false, sizeof(bool) * bit); while (source[len - 1] == '=') { --len; bit -= 8; } //printf("bit = %d\n", bit); for (int i = 0, j = 0; i < len; ++i, j += 6) { int c = dic64(source[i]); for (int k = j + 5; k >= j; --k) { pool[k] = c & 1; c >>= 1; } } int p = 0; for (int i = 0; i < bit; i += 8) { dest[p] = 0; for (int j = 0; j < 8; ++j) { dest[p] <<= 1; if (pool[i + j]) { ++dest[p]; //dest[p] = (char)((int)dest[p] + 1); } } ++p; } dest[p] = -1;}/**********************************************/struct Trie { /* * nxt & end is used in trie * fail is for ac automata */ int nxt[MAX][MAXD], fail[MAX], end[MAX]; int root, L; //root node, length(the nodes has been malloc)[0, L] int newnode() { memset(nxt[L], -1, sizeof(int) * MAXD); end[L++] = 0; return L - 1; } void clear() { L = 0; root = newnode(); } void insert(int* buf, int _id) { int* p = buf; int now = root; while (~(*p)) { if (nxt[now][*p] == -1) { nxt[now][*p] = newnode(); } now = nxt[now][*p++]; } end[now] = _id; } void build() { queue<int> Q; fail[root] = root; for (int i = 0; i < MAXD; ++i) { if (nxt[root][i] == -1) { nxt[root][i] = root; } else { fail[nxt[root][i]] = root; Q.push(nxt[root][i]); } } while (!Q.empty()) { int now = Q.front(); Q.pop(); for (int i = 0; i < MAXD; ++i) { if (nxt[now][i] == -1) { nxt[now][i] = nxt[fail[now]][i]; } else { fail[nxt[now][i]] = nxt[fail[now]][i]; Q.push(nxt[now][i]); } } } } set<int> query(int* buf) { int* p = buf; int now = root; set<int> res; while (~(*p)) { now = nxt[now][*p++]; int tmp = now; while (tmp != root) { if (end[tmp]) { res.insert(end[tmp]); } tmp = fail[tmp]; } } return res; } void debug() { for (int i = 0; i < L; ++i) { printf("%d, %d, %d, [%d", i, fail[i], end[i], nxt[i][0]); for (int j = 1; j < MAXD; ++j) { printf(" %d", nxt[i][j]); } puts("]"); } }} ac;const int MAXL = 128;char str[MAXL];char buf[MAX];int jj[MAX];int main() { /* while (cin >> buf) { decode64(buf, buf); cout << buf << endl; } return 0; */ int n; while (~scanf(" %d", &n)) { ac.clear(); for (int i = 1; i <= n; ++i) { scanf(" %s", str); decode64(str, jj); ac.insert(jj, i); } ac.build(); int m; scanf(" %d", &m); while (m--) { scanf(" %s", buf); decode64(buf, jj); printf("%d\n", (int)ac.query(jj).size()); } puts(""); } return 0;}
0 0
- AC自动机专题小结
- AC自动机 + DP小结
- AC自动机算法小结
- 【小结】AC自动机
- 【小结】AC自动机
- AC自动机小结
- AC自动机专题小结
- Trie、KMP、AC自动机小结
- AC自动机+DP小结 (一)
- hdu2222 Keywords Search & AC自动机学习小结
- AC自动机模板及基础例题小结
- 【转载】AC自动机小结--跟kuangbin巨巨学习AC自动机
- AC自动机...
- AC自动机
- AC 自动机
- AC自动机
- AC自动机
- ac自动机
- 为何Windows下的动态库总伴随一个静态库?
- HttpURLConnection和HttpClient两种连接方式的区别
- 如何能够在Android运行Java的main方法
- AMD驱动安装
- SQL当前的进程信息,当前的执行语句
- 【小结】AC自动机
- MDK 生成BIN文件 最简单方式
- 基于Swift语言开发微信、QQ和微博的SSO授权登录代码分析
- IOS 编程初体验 第一篇:自学和培训的选择
- android linearlayout代替listview实现
- TOMCAT修改jvm内存的方法
- 比较优秀的软件记录
- c++线程的创建
- CString类型的字符串写入文件,所遇到的问题