POJ 2774 解题报告

来源：互联网发布：淘宝店全场包邮编辑：程序博客网时间：2024/04/29 04:21

这道题可能是近期写过的最纠结的一道题了。之前一直没有看过suffix array，这次必须看了。

geeksforgeeks上面有通俗易懂的O(nlognlogn)的实现：http://www.geeksforgeeks.org/suffix-array-set-2-a-nlognlogn-algorithm/。但我不清楚是否卡时间。

最好的资料还是discuss中大家都提到的罗穗骞大神的实现：https://github.com/oeddyo/algorithm/blob/master/resources/%E7%89%9B%E4%BA%BA%E8%B0%88ACM%E7%BB%8F%E9%AA%8C(%E5%8C%85%E6%8B%AC%E5%9B%BD%E5%AE%B6%E9%9B%86%E8%AE%AD%E9%98%9F%E8%AE%BA%E6%96%87)/%E5%9B%BD%E5%AE%B6%E9%9B%86%E8%AE%AD%E9%98%9F%E8%AE%BA%E6%96%87/%E5%9B%BD%E5%AE%B6%E9%9B%86%E8%AE%AD%E9%98%9F2009%E8%AE%BA%E6%96%87%E9%9B%86/11.%E7%BD%97%E7%A9%97%E9%AA%9E%E3%80%8A%E5%90%8E%E7%BC%80%E6%95%B0%E7%BB%84%E2%80%94%E2%80%94%E5%A4%84%E7%90%86%E5%AD%97%E7%AC%A6%E4%B8%B2%E7%9A%84%E6%9C%89%E5%8A%9B%E5%B7%A5%E5%85%B7%E3%80%8B/%E5%90%8E%E7%BC%80%E6%95%B0%E7%BB%84%E2%80%94%E2%80%94%E5%A4%84%E7%90%86%E5%AD%97%E7%AC%A6%E4%B8%B2%E7%9A%84%E6%9C%89%E5%8A%9B%E5%B7%A5%E5%85%B7.pdf

后缀数组本不容易，所以需要费些功夫理解，虽然罗穗骞大神的解释已经非常通俗易懂了。

最终我只是大致了解了大神的程序，这里照搬了源程序（俗称“模板”）。

解题思路是将两个string合成一个，然后看后缀的最长共同前缀(longest common prefix, LCP)。这是后缀数组的一个常见应用。

需要注意的是，需要保证两个后缀来自不同的string，最简单的办法是在第一个string后面加一个从未出现过的字符，比如'$'。这里有很好的解释：http://poj.org/showmessage?message_id=85977。

由于模板程序需要在字符串后面加个`0`（sa中将排在第一位）。所以n, n - 1， 0， 1之类的要区分清楚。

贡献了很多WA和RE，不过也是个理解加深的过程。

thestoryofsnow2774Accepted5640K344MSC++5079B

/* ID: thestor1 LANG: C++ TASK: poj2774 */#include <iostream>#include <fstream>#include <cmath>#include <cstdio>#include <cstring>#include <limits>#include <string>#include <vector>#include <list>#include <set>#include <map>#include <queue>#include <stack>#include <algorithm>#include <cassert>// the largest size of each stringconst int MAXS = 100000 + 1;// each is size of MAXS, so 2 * MAXS// pad '$' at the end of first string (so that the common prefix will not overlap)// pad '\0' at the end of whole stringconst int MAXN = 2 * MAXS + 2;int wa[MAXN], wb[MAXN], wv[MAXN], wc[MAXN];int sa[MAXN];// r is the rank of parts, each of size l// a = sa[i - 1], b = sa[i]// compare if subsequent sa has the same "rank"// "rank" consists of first part(r[a] == r[b]) and second part(r[a + l] == r[b + l]).int cmp(int *r, int a, int b, int l){return r[a] == r[b] && r[a + l] == r[b + l];}// r is the input char sequence (expressed as int[])// r[n - 1] == 0 and r[i] > 0 (0 <= r < n - 1) for simplicity of computation// n is the length of r// m is the range of r, that is, 0 <= r[i] < mvoid da(int *r, int *sa, int n, int m){int i, j, p, *x = wa, *y = wb, *t;// radix sort, j = 1for (i = 0; i < m; i++){wc[i] = 0;}for (i = 0; i < n;i++) {wc[x[i] = r[i]]++;}for (i = 1; i < m; i++){wc[i] += wc[i - 1];}for (i = n - 1; i >= 0; i--){sa[--wc[x[i]]] = i;}for (j = 1, p = 1; p < n; j *= 2, m = p){// rank of second part can take advantage of sa// i + j >= n for i in [n - j ~ n - 1]// that is, this range do not have second part// that is, second part should be smallestfor (p = 0, i = n - j; i < n; i++){y[p++] = i;}for (i = 0; i < n; i++){// this position can be second part (sa[i] - j >= 0)if (sa[i] >= j){// rank them according to say[p++] = sa[i] - j;}}// radix sort, according to first part rank (x)// both parts (x and y) have size of j// now the total size is 2 * jfor (i = 0; i < n; i++){// get its first partwv[i] = x[y[i]];}for (i = 0; i < m; i++){wc[i] = 0;}for (i = 0; i < n; i++){wc[wv[i]]++;}for (i = 1; i < m; i++){wc[i] +=  wc[i - 1];}for (i = n - 1; i >= 0; i--){sa[--wc[wv[i]]] = y[i];}// swap x and y// after swap, y stands for current rank (based on two parts)// x will be overwrittent = x, x = y, y = t;p = 1;x[sa[0]] = 0;for (i = 1; i < n; i++){x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++; } } // in the end, sa[0] will be meaningless as it will be n - 1 // remember r[n - 1] is always 0 and others are larger than 0? return; }// rank is the sequence number in sa (which is ranked)// rank[sa[i]] = i// height[i] is the longest common prefix of sa[i] and sa[i - 1]int rank[MAXN], height[MAXN];void calheight(int *r, int *sa, int n){int i, j, k = 0;for (i = 1; i < n; i++){rank[sa[i]] = i;}// if we calculate height according to original sequence, that is, input r// that is, define h[i] = height[rank[i]]// then h[i] >= h[i - 1] - 1// which means we can start with h[i - 1] - 1// k = h[i - 1]for (i = 0; i < n - 1; i++){if (k > 0){k--;}j = sa[rank[i] - 1];while (r[i + k] == r[j + k]){k++;}height[rank[i]] = k;}// in the end, height[0] is meaninglessreturn;}int main(){char str[MAXN];int r[MAXN];scanf("%s", str);int N1 = strlen(str);str[N1] = 'z' + 1;scanf(" %s", str + N1 + 1);// printf("str:[%s]\nstr2:[%s]\n", str, str + N1 + 1);int N = strlen(str) + 1;for (int i = 0; i < N - 1; ++i){r[i] = str[i] - 'a' + 1;}r[N - 1] = 0;// for (int i = 0; i < N; ++i)// {// printf("%d ", r[i]);// }// printf("\n");// from 'a' (1) to 'z' (26), then 'z' + 1 (27)// m (unreachable upper bound) thus should be 28da(r, sa, N, 28);// for (int i = 0; i < N; ++i)// {// printf("%d: %s\n", sa[i], str + sa[i]);// }// printf("\n");calheight(r, sa, N);// for (int i = 0; i < N; ++i)// {// printf("%d ", height[i]);// }// printf("\n");int ans = 0;for (int i = 1; i < N; ++i){if (height[i] > ans && ((sa[i] < N1 && sa[i - 1] > N1) || (sa[i] > N1 && sa[i - 1] < N1))){// printf("sa[i - 1]:%d, str + sa[i - 1]:%s\n", sa[i - 1], str + sa[i - 1]);// printf("sa[i]:%d, str + sa[i]:%s\n", sa[i], str + sa[i]);// printf("height[i]:%d\n", height[i]);// printf("\n");// the common prefix can not go over the end of first string// int h;// if (sa[i] < N1)// {// h = std::min(height[i], N1 - sa[i]);// }// else// {// assert(sa[i - 1] < N1);// h = std::min(height[i], N1 - sa[i - 1]);// }ans = height[i];}}printf("%d\n", ans);return 0;  }

0 0