POJ 1743 Musical Theme 不可重叠最长重复字串(后缀数组)

Musical Theme
Time Limit: 1000MS Memory Limit: 30000KTotal Submissions: 16969 Accepted: 5817


A musical melody is represented as a sequence of N (1<=N<=20000)notes that are integers in the range 1..88, each representing a key on the piano. It is unfortunate but true that this representation of melodies ignores the notion of musical timing; but, this programming task is about notes and not timings. 
Many composers structure their music around a repeating &qout;theme&qout;, which, being a subsequence of an entire melody, is a sequence of integers in our representation. A subsequence of a melody is a theme if it: 
  • is at least five notes long 
  • appears (potentially transposed -- see below) again somewhere else in the piece of music 
  • is disjoint from (i.e., non-overlapping with) at least one of its other appearance(s)

Transposed means that a constant positive or negative value is added to every note value in the theme subsequence. 
Given a melody, compute the length (number of notes) of the longest theme. 
One second time limit for this problem's solutions! 


The input contains several test cases. The first line of each test case contains the integer N. The following n integers represent the sequence of notes. 
The last test case is followed by one zero. 


For each test case, the output file should contain a single line with a single integer that represents the length of the longest theme. If there are no themes, output 0.

Sample Input

3025 27 30 34 39 45 52 60 69 79 69 60 52 45 39 34 30 26 22 1882 78 74 70 66 67 64 60 65 800

Sample Output



Use scanf instead of cin to reduce the read time.



因为题目给的是音符,而真正要求的是相邻两个音符的差值,例如有一段音符是2 4 6 8 10 22 24 26 28 30,此情况就满足题意,它们的波动是一样的,都是2.
做法比较简单,只需要求 height 数组里的最大值即可。首先求最长重复子串,等价于求两个后缀的最长公共前缀的最大值。因为任意两个后缀的最长公共前缀都是 height 数组里某一段的最小值,那么这个值一定不大于 height 数组里的最大值。所以最长重复子串的长度就是height 数组里的最大值。这个做法的时间复杂度为 O(n)。
这是选自IOI2009 国家集训队论文的罗穗骞的“处理字符串的有力工具“的文章。
//1056K204MS#include<stdio.h>#include<string.h>#include<algorithm>#define M 20007using namespace std;int sa[M],rank[M],height[M];int wa[M],wb[M],wv[M],ws[M];int num[M],s[M];int cmp(int *r,int a,int b,int l){    return r[a]==r[b]&&r[a+l]==r[b+l];}void get_sa(int *r,int n,int m)//求get函数{    int i,j,p,*x=wa,*y=wb,*t;    for(i=0;i<m;i++)ws[i]=0;    for(i=0;i<n;i++)ws[x[i]=r[i]]++;    for(i=1;i<m;i++)ws[i]+=ws[i-1];    for(i=n-1;i>=0;i--)sa[--ws[x[i]]]=i;    for(j=1,p=1;p<n;j*=2,m=p)    {        for(p=0,i=n-j;i<n;i++)y[p++]=i;        for(i=0;i<n;i++)if(sa[i]>=j)y[p++]=sa[i]-j;        for(i=0;i<n;i++)wv[i]=x[y[i]];        for(i=0;i<m;i++)ws[i]=0;        for(i=0;i<n;i++)ws[wv[i]]++;        for(i=1;i<m;i++)ws[i]+=ws[i-1];        for(i=n-1;i>=0;i--)sa[--ws[wv[i]]]=y[i];        for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;    }}void get_height(int *r,int n)//求height函数{    int i,j,k=0;    for(i=1;i<=n;i++)rank[sa[i]]=i;//求rank函数    for(i=0;i<n;height[rank[i++]]=k)        for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);}int solve(int n)//二分求解最长长度{    int maxx=n/2,minn=0,mid,flag;//maxx代表重复子段最长长度,minn代表重复子段最短长度    while(minn<=maxx)//二分求解重复子段最长长度    {        mid=(maxx+minn)/2;//求中间长度        int low=sa[1],high=sa[1];//low和high记录sa的起始和结束位置        flag=0;        for(int i=2;i<n;i++)            if(height[i]<mid){low=sa[i];high=sa[i];}//如果前缀比重复子段长度还要小,low和high重新赋值            else            {                low=min(low,sa[i]);                high=max(high,sa[i]);                if(high-low>=mid){flag=1;break;}//判断结束位置和起始位置之间的距离是否大于重复子段长度            }        if(flag)minn=mid+1;        else maxx=mid-1;    }    return maxx>=4?maxx+1:0;}int main(){    //freopen("in.txt","r",stdin);    int n;    while(scanf("%d",&n),n)    {        for(int i=0;i<n;i++)            scanf("%d",&s[i]);        if(n<10){printf("0\n");continue;}//如果串的长度小于10,肯定不满足题意        for(int i=0;i<n-1;i++)            num[i]=(s[i+1]-s[i])+90;        int m=180;        get_sa(num,n,m);        get_height(num,n-1);        printf("%d\n",solve(n-1));    }    return 0;}

