[LeetCode]Substring with Concatenation of All Worlds（渣算法）_TBC

来源：互联网发布：怎么求逆矩阵编辑：程序博客网时间：2024/06/05 07:59

You are given a string, s, and a list of words, words, that are all of
the same length. Find all starting indices of substring(s) in s that
is a concatenation of each word in words exactly once and without any
intervening characters.

For example, given: s: “barfoothefoobarman” words: [“foo”, “bar”]

You should return the indices: [0,9]. (order does not matter).
由于words中可以存放重复的string，所以用multiset，思路也是延续strStr()那道题，只不过现在每次从set中找到一个string就要erase掉，由于是multiset，所以要erase掉迭代器。
988ms AC o(╯□╰)o

class Solution {public:    vector<int> findSubstring(string s, vector<string>& words) {        multiset<string> set_org(words.begin(),words.end());        multiset<string> set_cpy;        vector<string>::size_type sz_v = words.size();        string::size_type sz_w = words[0].size();        string::size_type sz_s = s.size();        int i = 0;        vector<int> v;        int match;        if(sz_s<sz_w*sz_v)  return v;        while(i<=sz_s-sz_w*sz_v){            match = 0;            set_cpy = set_org;            multiset<string>::iterator set_it = set_cpy.find(s.substr(i,sz_w));            while(set_it==set_cpy.end()&&i<=sz_s-sz_w*sz_v){                ++i;                set_it = set_cpy.find(s.substr(i,sz_w));            }            while(match<sz_v&&set_it!=set_cpy.end()){  //找打了但是没有匹配完全                match++;                i = i + sz_w;                set_cpy.erase(set_it);                set_it = set_cpy.find(s.substr(i,sz_w));            }            if(match==sz_v){                v.push_back(i-sz_w*sz_v);                i = i-sz_w*sz_v+1;            }            else                i = i-sz_w*match+1;        }         return v;          }};

还有一种窗口的方法，思路仍然是维护一个窗口，如果当前单词在字典中，则继续移动窗口右端，否则窗口左端可以跳到字符串下一个单词了。假设源字符串的长度为n，字典中单词的长度为l。因为不是一个字符，所以我们需要对源字符串所有长度为l的子串进行判断。做法是i从0到l-1个字符开始，得到开始index分别为i, i+l, i+2*l, …的长度为l的单词。这样就可以保证判断到所有的满足条件的串。因为每次扫描的时间复杂度是O(2*n/l)(每个单词不会被访问多于两次，一次是窗口右端，一次是窗口左端)，总共扫描l次（i=0, …, l-1)，所以总复杂度是O(2*n/l*l)=O(n)，是一个线性算法。空间复杂度是字典的大小，即O(m*l)，其中m是字典的单词数量。

class Solution {public: vector<int> findSubstring(string S, vector<string> &L) {        vector<int> ans;        int n = S.size(), cnt = L.size();        if (n <= 0 || cnt <= 0) return ans;        // init word occurence        unordered_map<string, int> dict;        for (int i = 0; i < cnt; ++i) dict[L[i]]++;        // travel all sub string combinations        int wl = L[0].size();        for (int i = 0; i < wl; ++i) {            int left = i, count = 0;            unordered_map<string, int> tdict;            for (int j = i; j <= n - wl; j += wl) {                string str = S.substr(j, wl);                // a valid word, accumulate results                if (dict.count(str)) {                    tdict[str]++;                    if (tdict[str] <= dict[str])                         count++;                    else {                        // a more word, advance the window left side possiablly                        while(tdict[str] > dict[str]) {                            string str1 = S.substr(left, wl);                            tdict[str1]--;                            //if (tdict[str1] < dict[str1])     //防止从str1和str相同                            if(str1!=str)                                count--;                            left += wl;                        }                    }                    // come to a result                    if (count == cnt) {                        ans.push_back(left);                        // advance one word                        tdict[S.substr(left, wl)]--;                        count--;                        left += wl;                    }                }                // not a valid word, reset all vars                else {                    tdict.clear();                    count = 0;                    left = j + wl;                }            }        }        return ans;    }};

0 0