字符串匹配算法KMP

来源:互联网 发布:网络禁歌120首试听 编辑:程序博客网 时间:2024/05/23 22:22

KMP算法的原理,这里我不写,建议参考:

http://www.ruanyifeng.com/blog/2013/05/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm.html

http://blog.csdn.net/v_JULY_v/article/details/6545192

http://blog.csdn.net/joylnwang/article/details/6778316

这篇博客写的非常好,以本人的能力只能写出更烂的解释 .....所以就不再多此一举了,下面仅仅给出两段代码,分别用C++和Python写的(都已经编译通过),

希望能对大家有所帮助:


C++代码:

#include <iostream>#include <cstring>using namespace std;bool piFun(char *pattern, int *pi)//pattern表示模式字符串,pi表示失配函数{if (pattern == NULL || pi == NULL) return false ;int len = strlen(pattern) ;pi[ 0 ] = -1 ;int k = -1 ;for (int i = 1; i < len ; ++ i){while ( k >= 0 && pattern[k + 1] != pattern[i])k = pi[k] ;//寻找一个满足条件的前缀,使得该前缀是pattern[0 ~ i -1]的一个后缀if ( pattern[k + 1] == pattern[i])++ k;//能够匹配的前缀和后缀的长度增加1pi[i] = k ;//失配后开始查找的位置}return true ;}int matchStr(char *text, char*pattern){if (text == NULL || pattern == NULL) return -1 ;int lenT = strlen(text) ;int lenP = strlen(pattern) ;int *pi = new int[lenP] ;if (!piFun(pattern, pi)) return - 1;int k = -1 ;for (int i = 0; i < lenT; ++ i){while(k >= 0 && pattern[k + 1] != text[i])k = pi[k] ;//下一个字符不能匹配if (pattern[k + 1] == text[i])++ k ;//匹配上下一个字符if (k == lenP - 1)//已经找到可以匹配的pattern{delete[] pi ;return (i - lenP + 1) ;//得到开始出现的第一个位置}}delete[] pi ;return - 1 ;}int main(int argc, char **argv){char p[] = "ababbacdefgacd" ;cout << matchStr(p, "aba") << endl;cout << matchStr(p, "ba") << endl;cout << matchStr(p, "efg") << endl;cout << matchStr(p, "addf") << endl;return 0 ;}


Python代码:

#!/usr/bin/python#Filename KMP.pydef piFun(pattern, pi):if len(pattern) == 0:return Falsedel pi[:]pi.extend(-1 for i in range(0, len(pattern)))pi[0] = -1k = -1for i in range(1, len(pattern)) :while k >= 0 and pattern[k + 1] != pattern[i] :k = pi[k]if pattern[k + 1] == pattern[i] :k = k + 1pi[i] = kreturn Truedef matchStr(text, pattern) :if len(text) == 0 or len(pattern) == 0 :return -1 pi = []if not piFun(pattern, pi) :return -1k = -1for i in range(0, len(text)) :while k >= 0 and pattern[k + 1] != text[i] :k = pi[k]if pattern[k + 1] == text[i] :k = k + 1if k == len(pattern) - 1 :return (i - len(pattern) + 1)return -1print(matchStr('abcefabdefcnfghiorpnpp', 'ce'))print(matchStr('abcefabdefcnfghiorpnpp', 'ab'))print(matchStr('abcefabdefcnfghiorpnpp', 'ef'))print(matchStr('abcefabdefcnfghiorpnpp', 'npp'))print(matchStr('abcefabdefcnfghiorpnpp', 'ok'))