AC自动机
来源:互联网 发布:redis c语言接口 编辑:程序博客网 时间:2024/06/07 09:21
要学会AC自动机,我们必须知道什么是Trie,也就是字典树。最好对KMP算法也有些了解。Trie树和KMP算法我之前博客都有写过,感兴趣的可以看看。
简单叙述下问题,现在给出
"hsay";
"ah";
"sahe";
"he";
"say";
"herhb";
"aher";
"erhs"
共8个关键词,要问字符串"yasaherhsay"中这8个关键词有几个出现过。
答案是7。
这就是一个多模式匹配问题。
AC自动机算法分为3步:构造一棵Trie树,构造失败指针和模式匹配过程。
失败指针和KMP算法中的next函数或称shift函数的功能类似。
上图解释了失败指针的作用。
// AC_automachine.cpp : 定义控制台应用程序的入口点。//#include "stdafx.h"#include<vector>#include<algorithm>#include<set>#include<iostream> using namespace std;#define MAXSIZE 26 struct TrieNode{TrieNode* next[MAXSIZE];TrieNode*parent;vector<TrieNode*>fail;char p;int Num;bool isword;};set<string>re;//保存结果TrieNode*initiate_Trie(){TrieNode*root = new TrieNode;for (int i = 0; i < MAXSIZE; i++)root->next[i] = NULL;root->Num = 0;root->parent = NULL;root->isword = false;return root;}bool search(TrieNode*root, char*str){TrieNode*tn;tn = root;int k;while (*str != '\0'){k = *str - 'a';if (tn->next[k] == NULL)return false;tn = tn->next[k];str++;}if (tn->isword == false)return false;return true;}TrieNode*build_Trie_singleword(TrieNode*root, char*str){if (search(root, str))return root;root->Num = root->Num + 1;TrieNode*tn;tn = root;while (*str != '\0'){int k = *str - 'a';if (tn->next[k] == NULL){tn->next[k] = new TrieNode;for (int i = 0; i < MAXSIZE; i++){tn->next[k]->next[i] = NULL;}tn->next[k]->p = *str;tn->next[k]->Num = 1;tn->next[k]->parent = tn;tn->next[k]->isword = false;}else{tn->next[k]->Num = tn->next[k]->Num + 1;}tn = tn->next[k];str++;}tn->isword = true;return root;}void initiate_fail_pointer(TrieNode*root, TrieNode*node){//if (node == NULL)//return;if (node == root){for (int i = 0; i < MAXSIZE; i++)if (root->next[i] != NULL)initiate_fail_pointer(root, root->next[i]);}else{cout << node->p;TrieNode*n = node;vector<char>ss;ss.push_back(node->p);vector<TrieNode*>::iterator result = find(node->fail.begin(), node->fail.end(), root->next[node->p - 'a']); //查找if (root->next[node->p - 'a'] != NULL&&result == node->fail.end() && root->next[node->p - 'a'] != node)node->fail.push_back(root->next[node->p - 'a']);while (n->parent != root){TrieNode*mm = root;ss.push_back(n->parent->p);int i;for (i = ss.size() - 1; i >= 0; i--)if (mm->next[ss[i] - 'a'] != NULL)mm = mm->next[ss[i] - 'a'];elsebreak;if (i == -1 && mm != node){result = find(node->fail.begin(), node->fail.end(), mm);if (result == node->fail.end())node->fail.push_back(mm);}n = n->parent;}for (int i = 0; i < MAXSIZE; i++)if (node->next[i] != NULL)initiate_fail_pointer(root, node->next[i]);}}int AC_automachine(TrieNode*root, char*str){int count = 0;int len = strlen(str);int k = 0;while (k < len){while (root->next[str[k] - 'a'] == NULL){k++;}TrieNode*p,*node = root->next[str[k] - 'a'];p = NULL;while (node != NULL){if (node->isword == true){string aa;TrieNode*nn = node;while (nn != root){aa += nn->p;nn = nn->parent;}std::reverse(aa.begin(), aa.end());if (re.find(aa) == re.end()){re.insert(aa);count++;}}if (!node->fail.empty()){for (int i = 0; i < node->fail.size(); i++)if (node->fail[i]->isword){string aa;TrieNode*nn = node->fail[i];while (nn != root){aa += nn->p;nn = nn->parent;}std::reverse(aa.begin(), aa.end());if (re.find(aa) == re.end()){re.insert(aa);count++;}}}k++;p = node;node = node->next[str[k] - 'a'];}k--;node = p;_ASSERT(node);if (node->fail.empty()){k++;}else{int max = 0;TrieNode*tn, *tp;tn = NULL;int kk;for (int i = 0; i < node->fail.size(); i++){kk = 0;tp = node->fail[i];while (tp != NULL){if (tp->isword){string aa;TrieNode*nn = tp;while (nn != root){aa += nn->p;nn = nn->parent;}std::reverse(aa.begin(), aa.end());if (re.find(aa) == re.end()){re.insert(aa);count++;}}if (!tp->fail.empty()){for (int i = 0; i < tp->fail.size(); i++)if (tp->fail[i]->isword){string aa;TrieNode*nn = tp->fail[i];while (nn != root){aa += nn->p;nn = nn->parent;}std::reverse(aa.begin(), aa.end());if (re.find(aa) == re.end()){re.insert(aa);count++;}}}kk++;p = tp;tp = tp->next[str[k + kk] - 'a'];}if (kk > max){max = kk;tn = p;_ASSERT(tn);}}if (!tn->fail.empty()){int maxlen=0;for (int i = 0; i < tn->fail.size(); i++){TrieNode*mm = tn->fail[i];int kkk = 0;while (mm != root){mm = mm->parent;kkk++;}if (kkk > maxlen)maxlen = kkk;}k = k + kk - maxlen;}else{k = k + kk;}}//end of else}return count;}int _tmain(int argc, _TCHAR* argv[]){TrieNode*root = initiate_Trie();root = build_Trie_singleword(root, "hsay");root = build_Trie_singleword(root, "ah");root = build_Trie_singleword(root, "sahe");root = build_Trie_singleword(root, "he");root = build_Trie_singleword(root, "say");root = build_Trie_singleword(root, "herhb");root = build_Trie_singleword(root, "aher");root = build_Trie_singleword(root, "erhs");initiate_fail_pointer(root, root);cout << endl;cout << AC_automachine(root, "yasaherhsay") << endl;system("pause");return 0;}
0 0
- AC自动机...
- AC自动机
- AC 自动机
- AC自动机
- AC自动机
- ac自动机
- ac自动机
- AC自动机
- AC自动机
- AC自动机
- AC自动机
- AC自动机
- AC 自动机
- ac自动机
- AC自动机
- AC自动机
- AC自动机
- AC自动机
- jQuery效果之滑动
- 2015-08-09
- 动态链接库的应用
- 2015 Multi-University Training Contest-5 MZL's chemistry
- OCUI界面设计:弹出框
- AC自动机
- iOS 正则表达式判断邮箱、手机号、身份证、昵称、密码、手机型号等
- 2015 Multi-University Training Contest-6 Key Set
- Git Shell命令大全
- POJ2774 字符串HASH
- Regionals 2009 Asia - Hsinchu uvalive 4526 Inventory - dp
- HDU - 1824 Let's go home(2-SAT)
- zookeeper原理
- hdu1702(ACboy needs your help again!) 在杭电又遇坑了