Maximum repetition substring+POj+后缀数组之求重复次数最多的连续重复子串

来源:互联网 发布:移动互联网数据报告 编辑:程序博客网 时间:2024/04/29 19:54
Maximum repetition substring
Time Limit: 1000MS Memory Limit: 65536KTotal Submissions: 6904 Accepted: 2065

Description

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.

The last test case is followed by a line containing a '#'.

Output

For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

ccabababcdaabbccaa#

Sample Output

Case 1: abababCase 2: aa
解决方案:此题为论文例题,
1)可先枚举子串长度l,求i,i+l的最长公共前缀得k,那么重复次数为ans=k/l+1,但重复的子串未必开头在i,若不在,可能重复次数还要加1,这就往前看了,由于后面多出k%l这一截,所以求最长公共前缀可往前l-k%l,若公共前缀大于等于k,这重复次数加1,记录重复次数最大值。
2)求两后缀的最长公共前缀可归为RMQ问题。
3)最后就是字典序的问题了这时可用后缀数组从前往后推,找到符合的即可跳出循环。
code:
#include<iostream>#include<cstdio>#include<cstring>using namespace std;const int nMax = 2000001;int sa[nMax], rank[nMax], height[nMax];int wa[nMax], wb[nMax], wv[nMax], wd[nMax];int cmp(int *r, int a, int b, int l){    return r[a] == r[b] && r[a+l] == r[b+l];}void da(int *r, int n, int m)           //  倍增算法 r为待匹配数组  n为总长度 m为字符范围{    int i, j, p, *x = wa, *y = wb, *t;    for(i = 0; i < m; i ++) wd[i] = 0;    for(i = 0; i < n; i ++) wd[x[i]=r[i]] ++;    for(i = 1; i < m; i ++) wd[i] += wd[i-1];    for(i = n-1; i >= 0; i --) sa[-- wd[x[i]]] = i;    for(j = 1, p = 1; p < n; j *= 2, m = p)    {        for(p = 0, i = n-j; i < n; i ++) y[p ++] = i;        for(i = 0; i < n; i ++) if(sa[i] >= j) y[p ++] = sa[i] - j;        for(i = 0; i < n; i ++) wv[i] = x[y[i]];        for(i = 0; i < m; i ++) wd[i] = 0;        for(i = 0; i < n; i ++) wd[wv[i]] ++;        for(i = 1; i < m; i ++) wd[i] += wd[i-1];        for(i = n-1; i >= 0; i --) sa[-- wd[wv[i]]] = y[i];        for(t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; i ++)        {            x[sa[i]] = cmp(y, sa[i-1], sa[i], j) ? p - 1: p ++;        }    }}void calHeight(int *r, int n)            //  求height数组。{    int i, j, k = 0;    for(i = 1; i <= n; i ++) rank[sa[i]] = i;    for(i = 0; i < n; height[rank[i ++]] = k)    {        for(k ? k -- : 0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k ++);    }}int dp[nMax][30];int Log[nMax];void initRMQ_log(int len){    for(int i=1; i<=len; i++) dp[i][0]=height[i];    for(int j=1; (1<<j)<=len; j++)        for(int i=1; i+(1<<j)-1<=len; i++)        {            dp[i][j]=min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]);        }    Log[0]=-1;    for(int i=1;i<=len;i++){        Log[i]=(i&(i-1))?Log[i-1]:Log[i-1]+1;    }}int lcp(int a,int b){    a=rank[a],b=rank[b];    if(a>b)    {        swap(a,b);    }    a++;    int k=Log[b-a+1];    return min(dp[a][k],dp[b-(1<<k)+1][k]);}char text[nMax];int num[nMax];int save[nMax];int main(){    int kk=0;    //freopen("in.txt","r",stdin);    //freopen("out.txt","w",stdout);    while(~scanf("%s",text))    {        if(text[0]=='#') break;        int len=strlen(text);        for(int i=0; i<len; i++)        {            num[i]=int(text[i]-'a'+1);        }        num[len]=0;        da(num,len+1,30);        calHeight(num,len);        initRMQ_log(len);        int a=0,mmax=0;        for(int l=1; l<len; l++)///枚举l        {            for(int j=0; j+l<len; j+=l)            {                int k=lcp(j,j+l);                int ans=k/l+1;                int t=l-k%l;///往前搞                t=j-t;                if(t>=0&&k%l!=0)                {                    if(lcp(t,t+l)>=k) ans++;                }                if(ans>mmax)                {                    a=0;                    mmax=ans;                    save[a++]=l;                }                else if(ans==mmax)                {                    save[a++]=l;                }///存储重复次数最长的长度            }        }        int st=0,lo=0;        bool flag=false;        for(int i=1; i<=len; i++)///从字典序小的推,可保证字典序最小        {            for(int j=0; j<a; j++)            {                int r=save[j];                if(lcp(sa[i],sa[i]+r)>=(mmax-1)*r)                {                    st=sa[i];                    lo=mmax*r;                    flag=true;                    break;                }            }            if(flag) break;        }        printf("Case %d: ",++kk);        for(int i=st; i<st+lo; i++)        {            printf("%c",text[i]);        }        printf("\n");    }    return 0;}

0 0
原创粉丝点击