POJ3261之后缀数组

来源:互联网 发布:分光计实验的数据记录 编辑:程序博客网 时间:2024/06/06 16:33

Milk Patterns
Time Limit: 5000MS Memory Limit: 65536K
Total Submissions: 13891 Accepted: 6143
Case Time Limit: 2000MS
Description

Farmer John has noticed that the quality of milk given by his cows varies from day to day. On further investigation, he discovered that although he can’t predict the quality of milk from one day to the next, there are some regular patterns in the daily milk quality.

To perform a rigorous study, he has invented a complex classification scheme by which each milk sample is recorded as an integer between 0 and 1,000,000 inclusive, and has recorded data from a single cow over N (1 ≤ N ≤ 20,000) days. He wishes to find the longest pattern of samples which repeats identically at least K (2 ≤ K ≤ N) times. This may include overlapping patterns – 1 2 3 2 3 2 3 1 repeats 2 3 2 3 twice, for example.

Help Farmer John by finding the longest repeating subsequence in the sequence of samples. It is guaranteed that at least one subsequence is repeated at least K times.

Input

Line 1: Two space-separated integers: N and K
Lines 2..N+1: N integers, one per line, the quality of the milk on day i appears on the ith line.
Output

Line 1: One integer, the length of the longest pattern which occurs at least K times
Sample Input

8 2
1
2
3
2
3
2
3
1
Sample Output

4

做了一天的后缀数组Q^Q。
感觉被掏空,还是要继续加油。

题意就是给我们一串数字和一个K,要求我们找出出现不少于K次的最长子串(子串是可以重叠的),样例就是很好的解释~

首先要求出的肯定是sa数组和height数组啦,这里套个模板就好~

然后我们来分析~

排名第i的字符串和排名第j的字符串的最长公共前缀等于height[i],height[i+1],…,height[j]中的最小值,所以把每k个sa看成一组就保证了组内任意两个字符串的最长公共前缀都至少为k,且长度为k的前缀是每个字符串共有的,因此这组内有多少个字符串,就相当于有多少个长度至少为k的重复的子串(任意一个子串都是某个后缀的前缀)。

这里先附上一种hash做法(是我看到的):
http://www.cnblogs.com/Norlan/p/4745289.html

然后是我自己的做法~
我用一个multiset来储存高度数组,保证顶端一定是最小值~

代码:

#include <stdio.h>#include <string.h>#include <algorithm>#include <iostream>#include <set>using namespace std;const int N = 1e6 + 100;const long long INF =0x7f7f7f7f7f7fll;int sa[N],Rank[N],rank2[N],height[N],cnt[N],*x,*y;long long mxx[N];void radix_sort(int n,int sz){    memset(cnt,0,sizeof(cnt));    for(int i=0;i<n;i++)        cnt[ x[ y[i] ] ]++;    for(int i=1;i<sz;i++)        cnt[i] += cnt[i-1];    for(int i=n-1;i>=0;i--)        sa[ --cnt[ x[ y[i] ] ] ] = y[i];}void get_sa(int text[],int n,int sz=128){    x = Rank, y = rank2;    for(int i=0;i<n;i++)        x[i] = text[i], y[i] = i;    radix_sort(n,sz);    for(int len=1;len<n;len<<=1)    {        int yid = 0;        for(int i=n-len;i<n;i++)            y[yid++] = i;        for(int i=0;i<n;i++)            if(sa[i] >= len)                y[yid++] = sa[i] - len;        radix_sort(n,sz);        swap(x,y);        x[ sa[0] ] = yid = 0;        for(int i=1;i<n;i++)        {            if(y[ sa[i-1] ]==y[ sa[i] ] && sa[i-1]+len<n && sa[i]+len<n && y[ sa[i-1]+len ]==y[ sa[i]+len ])                x[ sa[i] ] = yid;            else                x[ sa[i] ] = ++yid;        }        sz = yid + 1;        if(sz >= n)            break;    }    for(int i=0;i<n;i++)        Rank[i] = x[i];}void get_height(int text[],int n){    int k = 0;    for(int i=0;i<n;i++)    {        if(Rank[i] == 0)            continue;        k = max(0,k-1);        int j = sa[ Rank[i]-1 ];        while(i+k<n && j+k<n && text[i+k]==text[j+k])            k++;        height[ Rank[i] ] = k;    }}int a[N];long long nxt[N];int main (void){    int num;    int k;    multiset<int> se;    scanf("%d %d",&num,&k);    for(int i=0;i<num;i++)    {        scanf("%d",&a[i]);    }    int maxx=0;    get_sa(a,num,1e6+100);    get_height(a,num);    int i=0;    while(i<num)    {        while(se.size()<k-1)            //出现K次,那么只需要K-1次的高度数组        {            se.insert(height[i]);            //将相邻sa的最长公共前缀放进去            i++;        }//每k个字符串为一组        maxx=max(maxx,*se.begin());        //容器的顶端一定是最小值,而最小的一定是k个字符串中都会出现的,然后我们需要的是最小值的最大值        se.erase(se.lower_bound(height[i-(k-1)]));        //然后删除    }    printf("%d\n",maxx);    return 0;}
0 0