【字符串】题型总结：min window, word ladder II, valid number

来源：互联网发布：京津冀大数据联盟编辑：程序博客网时间：2024/06/05 03:56

Title
Add Date
Acceptance
Difficulty
Reverse Words in a String2014-03-0514.2%MediumWord Break II2013-10-0516.7%HardWord Break2013-10-0421.4%MediumWord Ladder II2013-02-1011.6%HardWord Ladder2013-02-1018.4%MediumWord Search2012-04-1819.6%MediumLength of Last Word2012-03-2728.9%EasySubstring with Concatenation of All Words2012-02-2318.1%Hard

1 min window

本题和数据结构联系紧密

维护一个map<char, int> t,保持每个字母的数量。

维护一个queue，保持当前的有效的字母的下标。

维护一个map<char, int> cnt,保持当前队列queue中的字母和数量。

每次从S遍历一个char，如果是有效的字母：

修改cnt[char],加1。
当计数值刚好等于t[char]的时候，说明有一个新的字母就位了， m++。当m==n的时候，说明所有的字母都就位了。
然后从队首取得一个char，查看该char是否是多余的（cnt[char] 超过了t[char],如果是，丢出队列并更新cnt[char]）
如果m==n，即所有的字母都就位，则用当前的下表和队首的下标构成的区间去更新结果（最短长度和下标志）。

  string minWindow(string S, string T) {    // int cnt[26];        map<char, int> cnt;        map<char, int> t;        for(int i=0; i<T.length(); i++)            t[T[i]]++;        int n = t.size();        if(!n) return "";                queue<int> que;        int m = 0, mm = -1, mi = -1;        for(int i=0; i<S.length(); i++){            char c = S[i];            if(t.count(c)){                cnt[c]=cnt[c]+1;                if(cnt[c] == t[c])  m++;                que.push(i);                                while(!que.empty()){                    int j = que.front();                    char d = S[j];                    if(cnt[d] > t[d]) {                        cnt[d] --;                        que.pop();                    }                    else break;                }                            if(m==n){                    assert(!que.empty());                    int j = que.front();                    if(mm<0 || mm>i-j+1){                        mi = j, mm = i-j+1;                    }                }            }        }        if(mm<0) return "";        return S.substr(mi, mm);    }

2 Word Break 1

DP: res[i] = E(res[0<=j<i] && isW[j][i-1] )

class Solution {public:    bool wordBreak(string s, unordered_set<string> &dict) {        int n = s.length();        if(!n) return dict.empty();        vector<vector<bool> > w(n, vector<bool>(n, false));        for(int i =0; i<n; i++){            string t = "";            for(int j = i; j<n; j++){                t+= s[j];                w[i][j] = dict.count(t);            }        }                vector<bool> t(n+1, false);        t[0] = true;        for(int i =1; i<=n; i++){            for(int j =0; j<i; j++){                if(t[j] && w[j][i-1]) {                    t[i] = true;                    break;                }            }        }        return t[n];    }};

3 Word Break II

cut - tree............................

这道题关键是要剪枝，剪枝方法为

res[i]保存i~n-1区间是否有效（存在被正确地划分的方案），
每次dp到i-1的时候，首先判断i~n-1是否有效，如果无效则剪枝。

class Solution {public:    vector<string> wordBreak(string s, unordered_set<string> &dict) {        int n = s.length();        if(!n)  return vector<string>();                vector<vector<bool> > isW(n, vector<bool>(n, false));        for(int i = 0; i<n; i++){            string tmp = "";            for(int j = i; j<n; j++){                tmp+= s[j];                if(dict.count(tmp)) isW[i][j] = true;            }        }                vector<bool> res(n+1, false);        res[n] = true;        for(int i = n-1; i>=0; i--){            for(int j = i+1; j<=n; j++){                if(res[j] && isW[i][j-1]){                    res[i] = true;                    break;                }            }        }        vector<vector<string> > t(n+1);        t[0].push_back("");        for(int i =1; i<=n; i++){            if(!res[i]) continue; //cut ...            for(int j = 0; j<i; j++){//0~j-1, j~i-1                if(t[j].size() && isW[j][i-1]){                    for(int k = 0; k<t[j].size(); k++){                        string tmp = t[j][k];                         if(j>0) //[j, i-1] is not the first word                            tmp+=" "+s.substr(j, i-j);                        else // [j, i-1] is the first word                            tmp+=s.substr(j, i-j);                        t[i].push_back(tmp);                    }                }            }        }        return t[n];    }};

或者

class Solution {public:    vector<string> wordBreak(string s, unordered_set<string> &dict) {        int n = s.length();        if(!n)  return vector<string>();                vector<vector<bool> > isW(n, vector<bool>(n, false));        for(int i = 0; i<n; i++){            string tmp = "";            for(int j = i; j<n; j++){                tmp+= s[j];                if(dict.count(tmp)) isW[i][j] = true;            }        }                vector<bool> res(n+1, false);        res[n] = true;        for(int i = n-1; i>=0; i--){            for(int j = i+1; j<=n; j++){                if(res[j] && isW[i][j-1]){                    res[i] = true;                    break;                }            }        }        vector<vector<vector<int> > > t(n+1);        t[0].push_back(vector<int>(1, 0));        for(int i =1; i<=n; i++){            if(!res[i]) continue; //cut ...            for(int j = 0; j<i; j++){//0~j-1, j~i-1                if(t[j].size() && isW[j][i-1]){                    for(int k = 0; k<t[j].size(); k++){                        vector<int> tmp = t[j][k];                         tmp.push_back(i-j);                        t[i].push_back(tmp);                    }                }            }        }                vector<string> ss;        for(int i = 0; i<t[n].size(); i++){            string tmp="";            int l = 0, k = 0;            for(int j=0; j<t[n][i].size(); j++){                k = t[n][i][j];                if(!k) continue;                tmp+=s.substr(l, k);                if(j<t[n][i].size()-1) tmp+=" ";                l+= k;            }            ss.push_back(tmp);        }        return ss;    }};

3 Word Ladder II

这道题要求找最短的s-》t的变换，如果把每个字符串看成一个节点，就相当于找s-》t节点的最短路径。

一般做法是采用“最短路径约束”

最短路径约束

首先求出图中到所有点的单源(s)最短路径d[node]
然后再dfs搜索路径，搜索过程中只遍历满足d[j] == d[i]+1的边<i, j> ，得到的s->t的路径就是最短的（最短路径约束）

对图中所有点做单源最短路径，很容易想到对源点s做bfs，然后记录每个顶点到s的距离d[i]。

那么在做单源最短路径之前，是不是需要建立起图来？

如果先建图，那么最容易想到任取两点，查看二者是否可达。

错误做法 n*n+bfs + graph simplify + dfs

这个题先试了一下用图表示每个字符串，建立起5000规模的图。遗憾的是超时了。

代码如下：

class Solution {public:    inline bool isNb(string &a, string &b){//判断相邻，时间代价O（n^2)太长了~~        int cnt = 0;        for(int i = 0; i<a.length(); i++){            if(a[i]!=b[i]) cnt++;            if(cnt>1) return false;        }        return cnt == 1;    }    vector<vector<string>> findLadders(string start, string end, unordered_set<string> &dict) {        map<string, int> num; vector<string> strs;        strs.push_back(start), num[start] = strs.size() - 1;                typedef unordered_set<string>::iterator IT;        for(IT it = dict.begin(); it!=dict.end(); it++){            if(!num.count(*it)){                strs.push_back(*it), num[*it] = strs.size() - 1;            }        }        if(!num.count(end)){            strs.push_back(end), num[end] = strs.size() - 1;        }                int n = strs.size();        vector<vector<int> > graph(n);        int s = 0, t = num[end];        for(int i = 0; i<strs.size(); i++){            for(int j = i; j<strs.size(); j++){                string &a = strs[i], &b = strs[j];                if(isNb(a, b)) graph[i].push_back(j), graph[j].push_back(i);            }        }                // BFS to find d[i]        vector<int> d(n, -1);        queue<int> que;        d[0] = 1, que.push(0);        while(!que.empty()){            int i = que.front(); que.pop();            if(i == t) break;            for(int j =0; j<graph[i].size(); j++){                int k = graph[i][j];                if(d[k]<0) {                    que.push(k);                    d[k] = d[i]+1;                }            }        }                // simplify the graph using d[i]        vector<vector<string> > res;        vector<string> tmp;        for(int i =0; i<n; i++){            vector<int> tmp;            for(int j =0; j<graph[i].size(); j++){                int k = graph[i][j];                if(d[k] == d[i]+1) tmp.push_back(k);            }            graph[i] = tmp;        }                tmp.push_back(start);        dfs(strs, d, graph, res, tmp, 0, t);        return res;    }    void dfs(const vector<string> &strs, const vector<int> &d, const vector<vector<int> > &graph, vector<vector<string> > &res, vector<string> &cur, int idx, int t){        if(cur.size() == d[t]){            if(idx == t){                res.push_back(cur);            }            return;        }                for(int i =0; i<graph[idx].size(); i++){            int j = graph[idx][i];            cur.push_back(strs[j]);            dfs(strs, d, graph, res, cur, j, t);            cur.pop_back();        }    }};

上述代码会超时，原因是建图调用isNb（str, str) 了N^2次。而且graph[i][j]中包括太多实际上不会在单源（s）最短路径中出现的点。

后来考虑变换str，查找邻居节点。这样建图是O（n）的

但是建完图，bfs的时候还要记录d[i]，然后得到的图里有些边<i, j>是不满足d[j] = d[i] +1的，如果留下，会对后续dfs搜索路径造成复杂度。所以还需要bfs的时候化简图。而这个思路也超时。

正确的做法

先不建图，bfs的时候再搜索每个节点i的邻居j，判断<i,j>是否满足最短路径约束，满足则把<i, j>加入到图中。
然后对这个最简的图做dfs

    vector<vector<string>> findLadders(string start, string end, unordered_set<string> &dict) {        map<string, int> num; vector<string> strs;        strs.push_back(start), num[start] = strs.size() - 1;        typedef unordered_set<string>::iterator IT;        for(IT it = dict.begin(); it!=dict.end(); it++){            if(!num.count(*it)){                strs.push_back(*it), num[*it] = strs.size() - 1;            }        }        if(!num.count(end)){            strs.push_back(end), num[end] = strs.size() - 1;        }                int n = strs.size();        int s = num[start], t = num[end];        vector<vector<int> > graph(n);        vector<int> d(n, -1);        queue<int> que;        d[s] = 1, que.push(s);        while(!que.empty()){            int i = que.front();            que.pop(); //@error 3: forget to pop after peek ...........            if(d[i] == d[t]) break; //@error 2: we should break when the t's layer reached, not when t reached            string &ss = strs[i];            for(int j = 0; j<ss.length(); j++){                char c = ss[j];                for(char k = 'a'; k <= 'z'; k++){                    if(k == c) continue;                    ss[j] = k; //@error 4: forget this...............                    if(num.count(ss)){                        int l = num[ss];                        if(d[l] < 0) {                            d[l] = d[i] + 1;                            //graph[i].push_back(l); //@errror 1:here, not only when d[l]<0 l is i's child, but also when d[l] == d[i]+1                            que.push(l);                            //if(l == t) goto END_BFS; @error 2:                         }                        if(d[l] == d[i]+1)                             graph[i].push_back(l);//@error 1:                    }                }                ss[j] = c;            }        }              vector<vector<string> > res;        vector<int> tmp;        tmp.push_back(s);        dfs(strs, graph, d[t], t,  res, tmp);        return res;    }        void dfs(const vector<string> &strs, const vector<vector<int> > &graph, const int md, const int t,              vector<vector<string> > &res, vector<int> &cur){        int idx = cur[cur.size()-1];        if(cur.size() == md){            if(idx == t){                vector<string> tmp;                for(int i = 0; i < cur.size(); i++){                    tmp.push_back(strs[cur[i]]);                }                res.push_back(tmp);            }            return;        }                for(int i =0; i<graph[idx].size(); i++){            int j = graph[idx][i];            cur.push_back(j);            dfs(strs, graph, md, t, res, cur);            cur.pop_back();        }    }};

短短一个bfs，掉进无数坑。最近记忆力衰退，什么都忘记。。。。。

4 valid number

最好写一个子函数，用来读取和验证一个number_pattern( 各种数字和正负号（只读取一次）)；并传一个参数告诉他是否可以有点符号。

number_pattern = [+,-][0~9]*.[0~9]*

number = number_pattern [e number_pattern]

class Solution {public:    int getNumber(const char *s, bool hasDot){ //读取number_pattern = [+,-][0~9]*.[0~9]*,并要求至少出现一个数字([0~9]).对于无效输入返回-1         int i = 0;         if(s[i] == '+' || s[i] == '-') i++;         int j = i;         while(s[i]<='9'&&s[i]>='0') i++;         int k1 = i - j;         if(s[i]=='.') {            if(!hasDot) { return -1;}            i++;            int j = i;            while(s[i]<='9'&&s[i]>='0') i++;            int k2 = i - j;            if(!k1 && !k2) { return -1;}         }         else if(!k1) { return -1;} //没有读取到数字（中间卡在哪个无效字符上了）         return i;      }    bool isNumber(const char *s) {// number = number_pattern [e number_pattern]        int i =0;        while(s[i]==' ') i++;        bool t ;        int k1 = getNumber(s+i, true);        if(k1<0) return false;//无效number_pattern        i += k1;        if(s[i]=='e') //读取到e        {            i++;            int k2 = getNumber(s+i, false);//e右边的number_pattern不许有dot            if(k2<0) return false;         //无效number_pattern            i += k2;            if(!k1 || !k2) return false;        }        while(s[i]==' ') i++;        return s[i] == 0;    }};

5 Substring with all words

实际上也是一个queue<int>保存匹配到的单词，

当匹配到新的单词t之后，cnt[t]++,如果个数cnt[t]超了，则需要从que弹出一段，直到弹到第一个该单词t。
- 如果队列大小为n=L.size()，说明找到一个匹配
当未匹配到时，que清空，cnt[]置为0.

str unique + KMP strs matches + queue , cnt

class Solution {public:    void kmp(string &s, string &word, vector<int> &res){        vector<int> nxt(word.length()+2);        nxt[0] = -1, nxt[1] = 0;        for(int i=1; word[i]; i++){            int t = nxt[i];            while(t>=0 && word[t] != word[i])                t = nxt[t];            nxt[i+1] = t+1;        }            for(int i =0, j = 0; i<s.length(); i++){            while(j>=0 && s[i]!=word[j]) j = nxt[j];            j++;            if(j==word.length()) res.push_back(i-j+1);        }    }    vector<int> findSubstring(string S, vector<string> &L) {        vector<int> res;        if(L.size()==0) return res;                // unique L to ul and wc        sort(L.begin(), L.end());        vector<string> ul; //unique L        vector<int> wc;         int ln = 1;        for(int i=1; i<=L.size(); i++){            if(i== L.size() || L[i]!=L[i-1]){                ul.push_back(L[i-1]);                wc.push_back(ln);                ln = 1;            }else ln ++;        }                vector<int> matches(S.length(), -1);        for(int i=0; i<ul.size(); i++){            string & word = ul[i];            vector<int> res;            kmp(S, word, res);            for(int j =0; j<res.size(); j++){                matches[res[j]] = i;            }        }                 int n = L.size(), l = L[0].length();        queue<int> que;        vector<int> cnt(n, 0);        for(int i =0; i<l; i++){            que = queue<int>(), cnt = vector<int>(n, 0);            for(int j = i; j<S.length(); j+=l){                if(matches[j]>=0){                    int t = matches[j];                    que.push(t);                    cnt[t]++;                    if(cnt[t]>wc[t]){                        //while(que.front()!=t) que.pop(); //@error 1: should update cnt when que changes                        //que.pop();                        while(!que.empty()){                            int s = que.front(); que.pop();                            cnt[s] --;                            if(s==t) break;                        }                    }                    if(que.size()==n) res.push_back(j-(n-1)*l); //@error 3: j-n*l+1                }                //@error 2: lost: when some word no match, then clear que                else que = queue<int>(), cnt = vector<int>(n, 0);            }        }        return res;    }};

或者不用kmp, 用hash查找

Hash+ que/cnt

class Solution {public:    vector<int> findSubstring(string S, vector<string> &L) {    vector<int> res;    if(L.size()==0) return res;    int N=L[0].length();        #define rep(i,n) for(int i=0;i<n;i++)    map <string,int > words;    map <int,int > cnt;    rep(i,L.size()) {     if(words.find(L[i])==words.end()) words[L[i]]=i,cnt[i]=1;    else {     int j=words[L[i]];    cnt[j]++;    }    }    vector<int> tag(S.length(),-1);    rep(i,S.length()){    string tmp=S.substr(i,N);    if(words.find(tmp)!=words.end()){    tag[i]=words[tmp];    }    }        rep(i,N){    int j=i;    deque<int> cur;    int begin=j;//begin go ahead n step when cur[] pop n elements    while(j<S.length()){        //a new word to append    if(tag[j]>=0){    deque<int>::iterator it;    if(count(cur.begin(),cur.end(),tag[j])==cnt[tag[j]]){//overflow of word j    it=find(cur.begin(),cur.end(),tag[j]);    begin+=(it-cur.begin()+1)*N;    cur.erase(cur.begin(),it+1);    }    cur.push_back(tag[j]);        //if a new concatness is found after push_back    if(cur.size()==L.size()) {    res.push_back(begin);        begin+=N;    cur.pop_front();    }    }        //a nonword: divide the string    else{//-1             begin=j+N;    cur.clear();    }    j+=N;    }    }    return res;    }};

0 0