后缀数组 - hdu5008 Boring String Problem
来源:互联网 发布:网络女主播喝酒洗澡 编辑:程序博客网 时间:2024/04/27 18:43
题目:
http://acm.hdu.edu.cn/showproblem.php?pid=5008
题意:
将一个字符串的所有不同子串按字典序排序,多次询问第k小的子串所在位置,多个位置时输出最前的位置
思路:
先考虑如果只询问第k小的子串是什么,可以用后缀数组轻易解决,因为对于后缀i,其能产生的子串数量为len - height[i] - sa[i],因此,用b数组存储对应sa后缀序列的每一个i值,[0,i]一共产生了多少不同子串,二分搜索第k个子串位置pos,即可得到第k个子串是什么;
然后写到这里我就被卡住了= =因为输出最前的位置直观想法就是沿着sa序列向下遍历,一直找到LCP小于sublen的为止,这样的算法在比较坑爹的数据前,比如10^5个a,时间复杂度是O(n^2),虽然实际数据非常弱居然让这种算法过了...
正确解法应为,利用height数组特性,RMQ + 二分枚举找到下限区间r,满足在[pos,r]这段区间内的任意两个后缀的LCP大于等于sublen,再用RMQ处理sa数组,找到sa序列中[pos,r]这段区间的最小值,这种解法即使在极端数据时的时间复杂度依然只有O(nlgn)
这个算法依次用了二分求pos位置,二分+RMQ求下限r,RMQ求sa区间最小值,非常优美的思路,值得好好理解一下
代码:
#include <iostream>#include <stdio.h>#include <algorithm>#include <string.h>using namespace std;const int MAXSIZE = 1e5 + 100;//待处理字符串,sa,rank,height均为[0,len)#define rep(i,n) for(int i = 0; i < n; i++)int rk[MAXSIZE], sa[MAXSIZE], height[MAXSIZE], wa[MAXSIZE], res[MAXSIZE];char w[MAXSIZE]; //转储待处理字符串int len;void getSa(int up) {int *k = rk, *id = height, *r = res, *cnt = wa;rep(i, up) cnt[i] = 0;rep(i, len) cnt[k[i] = w[i]]++;rep(i, up) cnt[i + 1] += cnt[i];for (int i = len - 1; i >= 0; i--) {sa[--cnt[k[i]]] = i;}int d = 1, p = 0;while (p < len){for (int i = len - d; i < len; i++) id[p++] = i;rep(i, len) if (sa[i] >= d) id[p++] = sa[i] - d;rep(i, len) r[i] = k[id[i]];rep(i, up) cnt[i] = 0;rep(i, len) cnt[r[i]]++;rep(i, up) cnt[i + 1] += cnt[i];for (int i = len - 1; i >= 0; i--) {sa[--cnt[r[i]]] = id[i];}swap(k, r);p = 0;k[sa[0]] = p++;rep(i, len - 1) {if (sa[i] + d < len && sa[i + 1] + d < len && r[sa[i]] == r[sa[i + 1]] && r[sa[i] + d] == r[sa[i + 1] + d])k[sa[i + 1]] = p - 1;else k[sa[i + 1]] = p++;}if (p >= len) return;d <<= 1, up = p, p = 0;}}//计算rank及height值void getHeight() {int i, k, h = 0;rep(i, len) rk[sa[i]] = i;rep(i, len) {if (rk[i] == 0)h = 0;else {k = sa[rk[i] - 1];if (h) h--;while (w[i + h] == w[k + h]) h++;}height[rk[i]] = h;}}void getSuffix() {len = strlen(w);int up = 0;rep(i, len) { w[i] = w[i] - 'a' + 1; up = up > w[i] ? up : w[i];}w[len] = 0;getSa(up + 1);getHeight();}//nlogn时间预处理 logn时间查询区间极大极小值int ddmin[MAXSIZE][32];void RMQ_init(int A[], int len){//len 数组长度for (int i = 0; i<len; ++i){ddmin[i][0] = A[i];}for (int j = 1; (1 << j) <= len; ++j)for (int i = 0; i + (1 << j) - 1<len; ++i){ddmin[i][j] = min(ddmin[i][j - 1], ddmin[i + (1 << (j - 1))][j - 1]);}return;}int RMQ_min(int L, int R){int k = 0;while (1 << (k + 1) <= R - L + 1) k++;return min(ddmin[L][k], ddmin[R - (1 << k) + 1][k]);}//nlogn时间预处理 logn时间查询区间极大极小值int dmin[MAXSIZE][32];void LCP_init(int A[], int len){//len 数组长度for (int i = 0; i<len; ++i){dmin[i][0] = A[i];}for (int j = 1; (1 << j) <= len; ++j)for (int i = 0; i + (1 << j) - 1<len; ++i){dmin[i][j] = min(dmin[i][j - 1], dmin[i + (1 << (j - 1))][j - 1]);}return;}int LCP(int L, int R){ if (L == R) return len - sa[L]; //int l = rk[L], r = rk[R]; if (L>R) swap(L,R); L++;int k = 0;while (1 << (k + 1) <= R - L + 1) k++;return min(dmin[L][k], dmin[R - (1 << k) + 1][k]);}long long l,r;void init(){ l = 0; r = 0;}long long b[MAXSIZE];void calb(){ b[0] = len - sa[0]; for (int i = 1; i < len; ++i){ b[i] = b[i-1] + len - sa[i] - height[i]; }}int main(){ long long v; int q; while (scanf("%s",w)!=EOF){ init(); getSuffix(); calb(); LCP_init(height,len); RMQ_init(sa,len); scanf("%d",&q); for (int i=0;i<q;++i){ scanf("%lld",&v); long long k = (l^r^v)+1; if (k>b[len-1]){ l = 0; r = 0; printf("0 0\n"); continue; } //cout<<"k: "<<k<<endl; int L = 0, R = len-1; int pos; while (L<=R){ int mid = (L+R)>>1; if (b[mid]>=k){ pos = mid; R = mid -1; } else L = mid + 1; } //cout<<"pos: "<<pos<<endl; int sublen; if (pos == 0) sublen = k; else sublen = height[pos] + k - b[pos-1]; //cout<<"sublen: "<<sublen<<endl; L = pos, R = len -1; int ll = pos, rr; while (L<=R){ int mid = (L+R)>>1; if (LCP(pos,mid)>=sublen){ rr = mid; L = mid + 1; } else R = mid - 1; } //cout<<"ll: "<<ll<<"rr: "<<rr<<endl; l = RMQ_min(ll,rr) + 1; r = l + sublen - 1; printf("%lld %lld\n",l,r); } } return 0;}
0 0
- hdu5008 Boring String Problem 后缀数组+二分
- hdu5008-Boring String Problem(后缀数组专题)
- hdu5008 Boring String Problem(后缀数组)
- 后缀数组 - hdu5008 Boring String Problem
- hdu5008 Boring String Problem 后缀数组+二分+RMQ
- hdu5008 Boring String Problem,2014西安网络赛B题,后缀数组,RMQ
- HDU5008--Boring String Problem(SA+二分)
- 【后缀数组】 HDOJ 5008 Boring String Problem
- hdu 5008 Boring String Problem(后缀数组)
- HDU 5008 Boring String Problem 后缀数组
- hdu 5008 Boring String Problem(后缀数组)
- hdu 5008 Boring String Problem 【后缀数组】
- hdu 5008 Boring String Problem 后缀数组
- HDU 5008 Boring String Problem 后缀数组
- HDU 5008 Boring String Problem 后缀数组 RMQ
- [后缀数组+二分+rmq] hdu 5008 Boring String Problem
- HDU - 5008 Boring String Problem (后缀数组+二分+RMQ)
- HDU 5008 Boring String Problem 二分 + 后缀数组
- 链表的增删改查
- Java入门-核心机制
- 三、NoteEditor.java文件学习笔记
- call和apply
- 隐藏实施过程
- 后缀数组 - hdu5008 Boring String Problem
- HDU 5113 Black And White (dfs)
- QNX驱动开发——SD卡SD模式开发实录
- 内存分析
- hdu2604Queuing 矩阵快速幂
- ibatis分组查询结果的返回参数绑定问题解决方案
- HDU 4919(Exclusive or-java的HashMap类)
- 大悲咒
- 代码实现截图(全屏,局部)