字符串匹配算法

来源:互联网 发布:洛丽塔 知乎 编辑:程序博客网 时间:2024/06/04 00:21

朴素方法

一种最朴素的思想,每一轮epochi都从主字符串S中的S[i]开始匹配模式串。如果失败,则在下一轮epochi+1的匹配中,从主字符串的下一位S[i+1]开始作为起点,匹配模式串。

int strStr(string s, string p){    /*        input:            s: string            p: pattern        output:            -1 for not found pattern p in s            otherwise, the index of p first appear in s    */    int i = 0, j = 0;    while(i < s.length() && j < p.length()){        if(s[i] == p[j]) {            ++i;            ++j;        } else{          i = i-j+1;  // 回到这一趟比较开始时,i的下一个位置S[i+1]          j = 0;        }    } if(j == p.length()) return i-j;    return -1;}

最坏的时间复杂度为 O(n*m),其中n和m分别是主串和模式串的长度。

KMP

高端一点的方法是KMP算法,我最近发现考研书中讲解的很详细,大家可以直接去找那本书,ISBN提供在本文最后了。

vector<int> get_next(string p){    /*        通过模式串构造next数组        kmp算法调整时参考的next数组取决于模式串而不是主串    */    vector<int> next(p.length());  // next的长度等于模式串长度    int i = 0;    next[0] = -1;    int j = -1;    while (i < p.length()){        if(j == -1 || p[i] == p[j]){                ++i;                ++j;                next[i] = j;           } else j = next[j];    } return next;}int KMP(string s, string p, int pos=0){    /*        input:            s: string            p: pattern            pos: find pattern start with s[pos]        output:            -1 for not found pattern p in s            otherwise, the index of p first appear in s started in pos    */    if(!(0 <= pos && pos < s.length())){        cout << "pos must be in the [0, s.length)" << endl;        return -1;    }    vector<int> next = get_next(p);    //cout << "next: "; for (int i = 0; i < next.size(); ++i) cout << next[i]+1 << ' '; cout << endl;    int i = pos, j = 0;    int step = 0;    while((i < s.length()) && (j < int(p.length()))){        if(j == -1 || (s[i] == p[j])) {            cout << "step=" << step++ << ' ';            cout << "match. s[" << i << "]=p[" << j << "]" << endl;            ++i;            ++j;        }        else{          cout << "step=" << step++ << ' ';          cout << "not match. current j=" << j << ",";          cout << "j=next[j]=next[" << j << "]=" << next[j] << endl;          j = next[j];        }    }    cout << "step=" << step++ << '\n';    if(j == p.length()) return i-j;    return -1;}int main(){    //cout << strStr("ababcabcacbab", "abcac") << endl;    //cout << KMP("ababcabcacbab", "abcac") << endl;    cout << "result of finding pattern: " << KMP("abcabaaabaabcac", "abaabcac") << endl;    return 0;}

参考书目

2017年-数据结构联考复习指导
ISBN:9787121283468

原创粉丝点击