Substring with Concatenation of All Words -- leetcode

来源：互联网发布：修改windows登录界面编辑：程序博客网时间：2024/04/30 05:58

You are given a string, S, and a list of words, L, that are all of the same length. Find all starting indices of substring(s) in S that is a concatenation of each word in L exactly once and without any intervening characters.

For example, given:
S: "barfoothefoobarman"
L: ["foo", "bar"]

You should return the indices: [0,9].
(order does not matter).

第三版：双重Rolling Hash

1. 使用Rolling hash，给每组字符计算hash值。

2. 使用Rolling hash, 给整组计算hash值。

3. 以整组为单位的hash值作比较，以确定整组是否匹配。

时间复杂度为O(n)。此算法在leetcode上实际执行时间为54ms。

class Solution {public:    vector<int> findSubstring(string S, vector<string> &L) {        vector<int> result;        if (L.empty() || L[0].empty() ) return result;        if (S.size() < L.size() * L[0].size()) return result;        const int lsize = L.size();        const int wsize = L[0].size();        const int gsize = lsize * wsize;        const int r = 31;        const int q = 997;        const int m = 3355439;        int lhash = 0;        for (int i = 0; i<lsize; i++) {                int whash = 0;                for (int j=0; j<wsize; j++) {                        whash = (whash * r + L[i][j]) % q;                }                lhash = (lhash + whash) % m;        }        vector<int > ghashes(wsize);        vector<int> window(gsize);        int whead = 0;        int whash = 0;        int i = 0;        int weight = 1;        for (; i<wsize-1; i++) {                whash = (whash * r + S[i]) % q;                weight = (weight * r) % q;        }        unordered_multiset<string> Lset(L.begin(), L.end());        for (; i<S.size(); i++) {                int &ghash = ghashes[whead % wsize];                ghash = (ghash - window[whead] + m) % m;                window[whead] = whash = (whash * r + S[i]) % q;                ghash = (ghash + whash) % m;                whead = ++whead % gsize;                whash = (whash - (S[i-wsize+1] * weight) % q + q) % q;                if (ghash == lhash && match(S, i-gsize+1, Lset))                        result.push_back(i - gsize + 1);        }        return result;    }        bool match(const string &S, int pos, unordered_multiset<string> L) {        if (pos < 0) return false;        const int lsize = L.size();        const int wsize = L.begin()->size();        for (int i=0; i<lsize; i++) {                auto iter = L.find(S.substr(pos, wsize));                if (iter == L.end()) return false;                L.erase(iter);                pos += wsize;        }        return true;    }};

第二版： Hash查找，多窗口同时移动

此算法利用C++11的unordered_map，并进行hash函数和比较函数的自定义，以省去取子串的复制操作。

窗口移动过程中，只需要考虑子串查不到，和查到但次数超过预期的两种情况。

做成多窗口同时移动，以希望能更高的缓存命中率。在S足够长时，有优势。省去多次遍历。

与单窗口多次遍历相比，付出了更多的额外空间。

此算法在leetcode上的实际势行时间为124ms。

class Solution {public:struct EqualHash {        EqualHash(int size): size_(size) {}        bool operator() (const char *x, const char *y) const        { return !strncmp(x, y, size_); }        size_t operator() (const char *s) const noexcept        { return std::_Hash_impl::hash(s, size_); }        size_t size_;};    vector<int> findSubstring(string S, vector<string> &L) {        vector<int> result;        if (L.empty()) return result;        if (L[0].empty()) return result;        if (S.size() < L[0].size()) return result;        const int itemSize = L[0].size();        vector<int> total(itemSize);        vector<int> left(itemSize);        vector<unordered_map<const char *, int, EqualHash, EqualHash> > actuals;        unordered_map<const char *, int, EqualHash, EqualHash> expected(L.size(), EqualHash(itemSize), EqualHash(itemSize));        actuals.reserve(itemSize);        for (int i=0; i<itemSize; i++) {                actuals.push_back(expected);                left[i] = i;        }        for (const string &item: L)                expected[item.data()]++;        const char *const s = S.data();        const char *h = s + itemSize - 2;        int group = -1;        while (*++h) {                group = (group + 1) % itemSize;                auto &actual = actuals[group];                const char *item = h - itemSize + 1;                auto iter1 = expected.find(item);                if (iter1 == expected.end()) {                        total[group] = 0;                        left[group] = h - s + 1;                        actual.clear();                        continue;                }                auto &value = ++actual[item];                ++total[group];                while (value > iter1->second) {                        --actual[s + left[group]];                        left[group] = left[group] + itemSize;                        --total[group];                }                if (total[group] == L.size()) {                        result.push_back(left[group]);                        --actual[s + left[group]];                        left[group] = left[group] + itemSize;                        --total[group];                }        }        return result;    }};

第一版：利用滚动Hash，接合窗口移动，完成下面这个算法。

思想是，先计算出Hash值，用Hash值完成在map中的查找，最后用字符串精确匹配最验证。

并尽可能推迟并省掉字符串精确匹配。

这个算法虽然被leetcode AC了。但是实际运行时间并预想的慢多了，高达1272ms。

class Solution {public:    vector<int> findSubstring(string S, vector<string> &L) {        vector<int> result;        if (L.empty()) return result;        const int itemSize = L[0].size();        const int wndSize = itemSize * L.size();        const int q = 3355439;        const int r = 256;        map<int, vector<int> > map1;        for (int i=0; i<L.size(); i++) {                int nHash = 0;                for (int j=0; j<L[i].size(); j++) {                        nHash = ((nHash * r) % q + L[i][j]) % q;                }                map1[nHash].push_back(i);        }        vector<int> total(itemSize);        vector<int> revMap(wndSize);        vector<pair<int,bool> > map2(wndSize);        int head2 = 0;        int weight = 1;        for (int i=1; i<itemSize; i++)                weight = (weight * r) % q;        int nHash = 0;        for (int i=0; i<itemSize-1; i++)                nHash = ((nHash * r) % q + S[i]) % q;        for (int i=itemSize-1; i<S.size(); i++) {                const int group = i % itemSize;                const int offset = group * L.size();                if (map2[head2].first) {                        revMap[offset + map2[head2].first - 1] = 0;                        map2[head2].first = 0;                        map2[head2].second = false;                        --total[group];                }                nHash = ((nHash * r) % q + S[i]) % q;                map<int, vector<int> >::iterator iter = map1.find(nHash);                if (iter != map1.end()) {                        int pos = -1;                        bool compare = true;                        const vector<int> &val = (*iter).second;                        if (val.size() == 1) {                                const int entry = revMap[offset + val[0]];                                if (!entry ||                                        !S.compare(i-itemSize+1, itemSize, L[val[0]])) {                                        pos = 0;                                        compare = !!entry;                                }                        }                        else {                                compare = true;                                int min = i+1;                                for (int j=0; j<val.size(); j++) {                                        if (!S.compare(i-itemSize+1, itemSize, L[val[j]])) {                                                const int entry = revMap[offset + val[j]];                                                if (!entry) {                                                        pos = j;                                                        break;                                                }                                                else if (entry < min) {                                                        min = entry;                                                        pos = j;                                                }                                        }                                }                        }                                                if (pos != -1) {                                const int entry = revMap[offset + val[pos]];                                revMap[offset + val[pos]] = i + 1;                                map2[head2].first = val[pos] + 1;                                map2[head2].second= compare;                                total[group]++;                                if (entry) {                                        const int entry2 = (head2 - i + entry - 1 + wndSize) % wndSize;                                        map2[entry2].first = 0;                                        map2[entry2].second= false;                                        total[group]--;                                }                                if (total[group] == L.size()) {                                        for (int j=0; j<L.size(); j++) {                                                const int entry2 = (head2 - i + revMap[offset+j] - 1 + wndSize) % wndSize;                                                if (!map2[entry2].second) {                                                        if (!S.compare(revMap[offset+j]-itemSize, itemSize, L[j])) {                                                                map2[entry2].second = true;                                                        }                                                        else {                                                                revMap[offset+j] = 0;                                                                map2[entry2].first = 0;                                                                map2[entry2].second= false;                                                                total[group]--;                                                                break;                                                        }                                                }                                        }                                        if (total[group] == L.size())                                                result.push_back(i-wndSize+1);                                }                        }                }                                nHash = ((nHash - (S[i-itemSize+1] * weight) % q) % q + q) % q;                head2 = (head2 + 1) % wndSize;        }        return result;    }};

0 0