Repeats (spoj 687)

来源:互联网 发布:撒贝宁老婆二婚 知乎 编辑:程序博客网 时间:2024/05/21 10:19

题意:给你一串字符串,求重复次数最多的连续重复子串。例如u=babbabaabaabaabab, 最多的为aba, 在u的子串abaabaabaaba中重复了4次。

题解:参考论文上的解法,先枚举长度l,然后求长度为l的子串至少连续出现2次的情况。如果在原字符串中连续出现2次,那么这个由长为l的子串重复两次所形成的串肯定包括了字符s[0],s[l],s[l2],s[l3]中的某相邻的两个。所以只须看字符s[li]s[l(i+1)]往前和往后各能匹配到多远(因为连续的子串最初的起点很可能不是在s[0],s[l],s[l2],s[l3]上,所以考虑往前,当然,往前最多需要考虑一个l的长度是否匹配),记总长度为k,那么在这里长为l的串就连续出现了k/l+1次。显然,往后匹配的情况就是lcp问题,在有heigh数组后,相当于求一次RMQ。往前的情况也很简单。

#include <stdio.h>#include <string.h>#include <algorithm>#include <stdlib.h>#include <iostream>#define  LL long longusing namespace std;const int Max = 50000 + 100;void radix(int *str, int *a, int *b, int n, int m) {    static int count[Max];    for (int i = 0; i <= m; i++)count[i] = 0;    for (int i = 0; i < n; i++)++count[str[a[i]]];    for (int i = 1; i <= m; i++)count[i] += count[i - 1];    for (int i = n - 1; i >= 0; i--)b[--count[str[a[i]]]] = a[i];}void suffix_array(int *str, int *sa, int n, int m) {//求sa数组    static int rank[Max], a[Max], b[Max];    for (int i = 0; i < n; i++)rank[i] = i;    radix(str, rank, sa, n, m);    rank[sa[0]] = 0;    for (int i = 1; i < n; i++)rank[sa[i]] = rank[sa[i - 1]] + (str[sa[i]] != str[sa[i - 1]]);    for (int i = 0; 1 << i < n; i++) {        for (int j = 0; j < n; j++) {            a[j] = rank[j] + 1;            b[j] = j + (1 << i) >= n ? 0 : rank[j + (1 << i)] + 1;            sa[j] = j;        }        radix(b, sa, rank, n, n);        radix(a, rank, sa, n, n);        rank[sa[0]] = 0;        for (int j = 1; j < n; j++) {            rank[sa[j]] = rank[sa[j - 1]] + (a[sa[j - 1]] != a[sa[j]] || b[sa[j - 1]] != b[sa[j]]);        }    }}void calc_height(int *str, int *sa, int *h, int n) {//求height数组    static int rank[Max];    int k = 0;    h[0] = 0;    for (int i = 0; i < n; i++)rank[sa[i]] = i;    for (int i = 0; i < n; i++) {        k = k == 0 ? 0 : k - 1;        if (rank[i] != 0)            while (str[i + k] == str[sa[rank[i] - 1] + k])++k;        h[rank[i]] = k;    }}int str[Max], sa[Max], h[Max], ranks[Max];char data[Max];int mm[Max], RMQ[Max];int best[20][Max];void initRMQ(int n) {//初始化RMQ为求lcp做准备    mm[0] = -1;    for (int i = 1; i <= n; i++) {        mm[i] = ((i & (i - 1)) == 0) ? mm[i - 1] + 1 : mm[i - 1];    }    for (int i = 1; i <= n; i++)best[0][i] = i;    for (int i = 1; i <= mm[n]; i++)        for (int j = 1; j + (1 << i) - 1 <= n; j++) {            int a = best[i - 1][j];            int b = best[i - 1][j + (1 << (i - 1))];            if (RMQ[a] < RMQ[b])best[i][j] = a;            else best[i][j] = b;        }}int askRMQ(int a, int b) {    int t;    t = mm[b - a + 1];    b -= (1 << t) - 1;    a = best[t][a];    b = best[t][b];    return RMQ[a] < RMQ[b] ? a : b;}int lcp(int a, int b) {//直接求得lcp    a = ranks[a];    b = ranks[b];    if (a > b)swap(a, b);    return h[askRMQ(a + 1, b)];}char get_char() {    char ans;    while (1) {        ans = getchar();        if (ans != ' ' && ans != '\n' && ans != '\r' && ans != '\t')            return ans;    }}int main() {    int T;    scanf("%d", &T);    while (T--) {        int n, m = -1;        scanf("%d", &n);        for (int a = 0; a < n; a++) {            str[a] = get_char();            m = max(str[a], m);        }        str[n] = 0;        suffix_array(str, sa, n, m);//套的板子        calc_height(str, sa, h, n);        for (int i = 1; i <= n; i++)RMQ[i] = h[i];        for (int i = 0; i < n; i++)ranks[sa[i]] = i;        initRMQ(n);        int ans = 0;        for (int l = 1; l < n; l++) {            for (int i = 0; i + l < n; i += l) {                int k = lcp(i, i + l);                int r = k / l + 1;                int t = i - (l - k % l);                if (t >= 0 && k % l != 0 && lcp(t, t + l) >= k)r++;//当往后不够一个长为l的子串时, 判断往前是否最少有t个(t和最后余下的不够l的字符数相加等于l)字符匹配                if (r > ans)ans = r;            }        }        printf("%d\n", ans);    }    return 0;}