hdu 5008(2014 ACM/ICPC Asia Regional Xi'an Online ) Boring String Problem(后缀数组&二分)

来源:互联网 发布:深入浅出php 编辑:程序博客网 时间:2024/04/18 15:42

Boring String Problem

Time Limit: 6000/3000 MS (Java/Others)    Memory Limit: 65536/65536 K (Java/Others)
Total Submission(s): 219    Accepted Submission(s): 45


Problem Description
In this problem, you are given a string s and q queries.

For each query, you should answer that when all distinct substrings of string s were sorted lexicographically, which one is the k-th smallest.

A substring si...j of the string s = a1a2 ...an(1 ≤ i ≤ j ≤ n) is the string aiai+1 ...aj. Two substrings sx...y and sz...w are cosidered to be distinct if sx...y ≠ Sz...w
 

Input
The input consists of multiple test cases.Please process till EOF.

Each test case begins with a line containing a string s(|s| ≤ 105) with only lowercase letters.

Next line contains a postive integer q(1 ≤ q ≤ 105), the number of questions.

q queries are given in the next q lines. Every line contains an integer v. You should calculate the k by k = (l⊕r⊕v)+1(l, r is the output of previous question, at the beginning of each case l = r = 0, 0 < k < 263, “⊕” denotes exclusive or)
 

Output
For each test case, output consists of q lines, the i-th line contains two integers l, r which is the answer to the i-th query. (The answer l,r satisfies that sl...r is the k-th smallest and if there are several l,r available, ouput l,r which with the smallest l. If there is no l,r satisfied, output “0 0”. Note that s1...n is the whole string)
 

Sample Input
aaa40235
 

Sample Output
1 11 31 20 0
 

Source
2014 ACM/ICPC Asia Regional Xi'an Online
 

Recommend
hujie   |   We have carefully selected several similar problems for you:  5017 5016 5015 5014 5013 
 题意:
给你一个长度不超过1e5的字符串。把它的所有子串去重后排序。然后要你输出第k大的字符串的位置l,r。如果有多个位置输出l最小的。
思路:
比赛时看到这题感觉和SPOJ-7258 Lexicographical Substring Search很像。
不过那是学后缀自动机时看到了。由于后缀自动机是在是不是很好懂。。。所以最后还是放弃了。但是网上有这题的题解。但是很那题有点差别的是这题要求输出字符串的位置。于是就不知道怎么搞了。于是就用相对熟悉的后缀数组想了下。毕竟这题感觉n*log(n)是可过的。然后就开干了。我们先算出来每个后缀sa[i]比sa[i-1]多出多少个不同的子串。明显为len-sa[i]-height[i]存到val[i]。而sa[i]就对应len-sa[i]-height[i]个子串了。然后用val[i]构造线段树。然后找排名第k的字符串怎么找呢。就类似二分的思想了。如果线段树左子树字符大于等于k个就到左子树。不行就到右子树找。这样就可以找到第k大串对应的sa[i]和第k串的长度len。写完这里就卡了下。因为怎么处理l最小的问题呢。不可能在i附近找和sa[i]lcp>=len且sa[i]最小的吧。想了下最发杂的就是全a的情况。这样就退化成O(n^2)了。那sa就白写了。
冷静了下。一想。就出来了。我们可以二分求出最左边和sa[i]lcp>=len的位置left。在二分出sa[i]最右边lcp>=len的位置right。然后答案就是left->right中sa最小的。这个可以用rmq维护。然后接1A了。不过有人就是按我先前的前后直接找的。居然过了。可见数据还是蛮水的。
详细见代码:
#include<algorithm>#include<iostream>#include<string.h>#include<stdio.h>using namespace std;const int INF=0x3f3f3f3f;const int maxn=100010;typedef long long ll;#define lson L,mid,ls#define rson mid+1,R,rschar txt[maxn];int sa[maxn],T1[maxn],T2[maxn],ct[maxn],he[maxn],rk[maxn],n,m,le,ri;int rmq[25][maxn],lg[maxn],id[25][maxn],pos,len;ll num[maxn<<2];void build(int L,int R,int rt){    if(L==R)    {        num[rt]=n-sa[L]-he[L];        return;    }    int ls=rt<<1,rs=ls|1,mid=(L+R)>>1;    build(lson);    build(rson);    num[rt]=num[ls]+num[rs];}void getsa(char *st){    int i,k,p,*x=T1,*y=T2;    for(i=0; i<m; i++) ct[i]=0;    for(i=0; i<n; i++) ct[x[i]=st[i]]++;    for(i=1; i<m; i++) ct[i]+=ct[i-1];    for(i=n-1; i>=0; i--)        sa[--ct[x[i]]]=i;    for(k=1,p=1; p<n; k<<=1,m=p)    {        for(p=0,i=n-k; i<n; i++) y[p++]=i;        for(i=0; i<n; i++) if(sa[i]>=k) y[p++]=sa[i]-k;        for(i=0; i<m; i++) ct[i]=0;        for(i=0; i<n; i++) ct[x[y[i]]]++;        for(i=1; i<m; i++) ct[i]+=ct[i-1];        for(i=n-1; i>=0; i--) sa[--ct[x[y[i]]]]=y[i];        for(swap(x,y),p=1,x[sa[0]]=0,i=1; i<n; i++)            x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++;    }}void gethe(char *st){    int i,j,k=0;    for(i=0;i<n;i++) rk[sa[i]]=i;    for(i=0;i<n-1;i++)    {        if(k) k--;        j=sa[rk[i]-1];        while(st[i+k]==st[j+k]) k++;        he[rk[i]]=k;    }}void rmq_init(){    int i,j;    for(i=0;i<n;i++)    {        rmq[0][i]=he[i];        id[0][i]=sa[i];    }    for(i=1;i<=lg[n];i++)        for(j=0;j+(1<<i)-1<n;j++)        {            rmq[i][j]=min(rmq[i-1][j],rmq[i-1][j+(1<<(i-1))]);            id[i][j]=min(id[i-1][j],id[i-1][j+(1<<(i-1))]);        }}int rmq_min(int l,int r){    if(l>r)        return 0;    int tmp=lg[r-l+1];    return min(rmq[tmp][l],rmq[tmp][r-(1<<tmp)+1]);}int rmq_id(int l,int r){    int tmp=lg[r-l+1];    return min(id[tmp][l],id[tmp][r-(1<<tmp)+1]);}void prermq(){    int  i;    lg[0]=-1;    for(i=1;i<maxn;i++)        lg[i]=lg[i>>1]+1;}void qu(int L,int R,int rt,ll k){    if(k>num[rt])    {        pos=-1;        le=ri=0;        return;    }    if(L==R)    {        pos=L;        len=he[L]+k;        return;    }    int ls=rt<<1,rs=ls|1,mid=(L+R)>>1;    if(num[ls]>=k)        qu(lson,k);    else        qu(rson,k-num[ls]);}int binl(int x){    int low=1,hi=x-1,mid,ans=x;    while(low<=hi)    {        mid=(low+hi)>>1;        if(rmq_min(mid+1,x)>=len)            ans=mid,hi=mid-1;        else            low=mid+1;    }    return ans;}int binr(int x){    int low=x+1,hi=n,mid,ans=x;    while(low<=hi)    {        mid=(low+hi)>>1;        if(rmq_min(x+1,mid)>=len)            ans=mid,low=mid+1;        else            hi=mid-1;    }    return ans;}inline ll ReadInt(){    char ch = getchar();    if (ch==EOF) return -1;    ll data = 0;    while (ch < '0' || ch > '9')    {        ch = getchar();        if (ch==EOF) return -1;    }    do    {        data = data*10 + ch-'0';        ch = getchar();    } while (ch >= '0' && ch <= '9');    return data;}inline void putit(int x){    if (x/10>0) putit(x/10);    putchar(x%10+'0');}int main(){    int q,lll,rrr;    ll kth;    prermq();    while(~scanf("%s",txt))    {        m=150,n=strlen(txt);        n++;        getsa(txt);        gethe(txt);        rmq_init();        n--;        build(1,n,1);        scanf("%d",&q);        le=ri=0;        while(q--)        {            kth=ReadInt();            kth=(le^ri^kth)+1;            qu(1,n,1,kth);            if(pos==-1)                putit(le),putchar(' '),putit(ri),putchar('\n');            else            {                lll=binl(pos);                rrr=binr(pos);                le=rmq_id(lll,rrr)+1;                ri=le+len-1;                putit(le),putchar(' '),putit(ri),putchar('\n');            }        }    }    return 0;}


2 0