poj3693 Maximum repetition substring 后缀数组+RMQ

来源：互联网发布：id for mac中文破解版编辑：程序博客网时间：2024/04/30 05:03

Maximum repetition substring

Time Limit: 1000MS Memory Limit: 65536KTotal Submissions: 7496 Accepted: 2244

Description

The repetition number of a string is defined as the maximum number R such that the string can be partitioned intoR same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.

The last test case is followed by a line containing a '#'.

Output

For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

ccabababcdaabbccaa#

Sample Output

Case 1: abababCase 2: aa

找出重复次数最多的子串，如果有多个输出字典序最小的那个。

这题首先枚举重复串的长度L，再枚举子串开始的位置i，重点是i不是从开始到最后一个一个枚举，而是枚举0,L,2L...

预处理后缀数组和RMQ，这样可以做到在O（1）的时间内得到任何两个后缀的LCP（输入串下标为i开始的后缀和下标为j开始的后缀的LCP就是它们俩rank之间所有串的LCP，因为height[i]是排名第i的后缀和第i-1的后缀的公共长度，因此d[i][j]表示rank[i]-1开始，往后2^j+1个串的LCP，所以查询下标为i的后缀和下标为j的后缀相当于是RMQ(min(rank[i],rank[j])+1,max(rank[i],rank[j])）。）

用RMQ得到枚举的i和i+L的LCP，设为n，那么长度L的重复次数是n/L+1，这是以i为起始的串的情况。i-L处理完了接着处理了i，那么以i-L+1...i这些位置为起始的会不会重复更多次L？是有可能的，而且最多也就多重复一次。在n%L不等于0的情况下。因为如果n%L为0，那么如果要使重复次数更多，至少也要从i-L开始，而i-L这个位置之前已经枚举过。如果n%L!=0，那么在i+1...i-(L-n%L)这些位置，设为j，j和j+L的LCP设为m，那么m/L+1可能会大于n/L+1，所以只要再求i-(L-n%L)和i+L-(L-n%L)的LCP，不用再求之前的，因为若i-(L-n%L)使重复次数加1了，之前的也不可能使重复次数再加1，若i-(L-n%L)不能使重复次数增加，之前的也不能。

还有个问题是字典序，我做的时候一直WA，就是这个字典序的问题搞错了。我刚开始是在枚举的时候同时更新答案，一直没发现错，后来终于找到一组样例，mbkbkb，我得到的是kbkb，因为在整个过程中根本不会处理到bkbk，长度L为2的时候kbkb这里已经整除了。枚举过程中只能得到重复长度和重复次数，但不能保证字典序最小。所以只要最后再对整个串扫一遍，枚举每个位置为起始位置，看对于那个重复长度能不能得到那个重复次数，如果能的话说明这个串是满足，那就看它的rank是不是比原来的小，因为rank小字典序肯定小，如果小就更新。

这个题还是比较复杂的，自己画一画才能弄清楚。。

#include<iostream>#include<queue>#include<cstring>#include<cstdio>#include<cmath>#include<set>#include<map>#include<vector>#include<stack>#include<algorithm>using namespace std;typedef long long LL;typedef pair<LL,LL> pii;const int MAXN=100010;const int LOGMAXN=50;const int INF=0x3f3f3f3f;int N,sublen,times,pos,ans;int d[MAXN][LOGMAXN];char str[MAXN];struct SuffixArray{    int s[MAXN];    int sa[MAXN];    int height[MAXN];    int rank[MAXN];    int c[MAXN];    int t[MAXN],t2[MAXN];    int n;    void clear(){        n=0;        memset(sa,0,sizeof(sa));    }    void build_sa(int m){        int i,*x=t,*y=t2;        for(i=0;i<m;i++) c[i]=0;        for(i=0;i<n;i++) c[x[i]=s[i]]++;        for(i=1;i<m;i++) c[i]+=c[i-1];        for(i=n-1;i>=0;i--) sa[--c[x[i]]]=i;        for(int k=1;k<=n;k<<=1){            int p=0;            for(i=n-k;i<n;i++) y[p++]=i;            for(i=0;i<n;i++) if(sa[i]>=k) y[p++]=sa[i]-k;            for(i=0;i<m;i++) c[i]=0;            for(i=0;i<n;i++) c[x[y[i]]]++;            for(i=0;i<m;i++) c[i]+=c[i-1];            for(i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];            swap(x,y);            p=1;            x[sa[0]]=0;            for(int i=1;i<n;i++) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++;            if(p>=n) break;            m=p;        }    }    void build_height(){        int k=0;        for(int i=0;i<n;i++) rank[sa[i]]=i;        height[0]=0;        for(int i=0;i<n-1;i++){            if(k) k--;            int j=sa[rank[i]-1];            while(s[i+k]==s[j+k]) k++;            height[rank[i]]=k;        }    }}sa;void RMQ_init(){    for(int i=0;i<sa.n;i++) d[i][0]=sa.height[i];    for(int j=1;(1<<j)<=sa.n;j++)        for(int i=0;i+(1<<j)-1<sa.n;i++) d[i][j]=min(d[i][j-1],d[i+(1<<(j-1))][j-1]);}int RMQ(int L,int R){    if(L>R) swap(L,R);    L++;    int k=0;    while((1<<(k+1))<=R-L+1) k++;    return min(d[L][k],d[R-(1<<k)+1][k]);}void solve(int len){    for(int L=1;L<=len;L++){        for(int i=0;i+L<=len;i+=L){            int n=RMQ(sa.rank[i],sa.rank[i+L]);            if(n/L+1>times||n/L+1==times&&sa.rank[i]<ans){                times=n/L+1;                ans=sa.rank[i];                sublen=L;                pos=i;            }            if(n%L!=0&&i){                int m=RMQ(sa.rank[i-(L-n%L)],sa.rank[i+L-(L-n%L)]);                if(m/L+1>times||m/L+1==times&&sa.rank[i-(L-n%L)]<ans){                    times=m/L+1;                    ans=sa.rank[i-(L-n%L)];                    sublen=L;                    pos=i-(L-n%L);                }            }        }    }    for(int i=0;i+sublen<=len;i++){        int n=RMQ(sa.rank[i],sa.rank[i+sublen]);        if(n/sublen+1==times&&sa.rank[i]<ans){            pos=i;            ans=sa.rank[i];        }    }    for(int i=0;i<times;i++)        for(int j=pos;j<pos+sublen;j++) printf("%c",str[j]);    puts("");}int main(){    freopen("in.txt","r",stdin);    int cas=0;    while(scanf("%s",str)!=EOF&&str[0]!='#'){        sa.clear();        int len=strlen(str);        for(int i=0;i<len;i++) sa.s[sa.n++]=str[i]-'a'+1;        sa.s[sa.n++]=0;        sa.build_sa(30);        sa.build_height();        RMQ_init();        times=0;        ans=INF;        printf("Case %d: ",++cas);        solve(len);    }    return 0;}

0 0