Maximum repetition substring+POj+后缀数组之求重复次数最多的连续重复子串
来源:互联网 发布:移动互联网数据报告 编辑:程序博客网 时间:2024/04/29 19:54
Description
The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.
Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.
Input
The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.
The last test case is followed by a line containing a '#'.
Output
For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.
Sample Input
ccabababcdaabbccaa#
Sample Output
Case 1: abababCase 2: aa
解决方案:此题为论文例题,
1)可先枚举子串长度l,求i,i+l的最长公共前缀得k,那么重复次数为ans=k/l+1,但重复的子串未必开头在i,若不在,可能重复次数还要加1,这就往前看了,由于后面多出k%l这一截,所以求最长公共前缀可往前l-k%l,若公共前缀大于等于k,这重复次数加1,记录重复次数最大值。
2)求两后缀的最长公共前缀可归为RMQ问题。
3)最后就是字典序的问题了这时可用后缀数组从前往后推,找到符合的即可跳出循环。
code:#include<iostream>#include<cstdio>#include<cstring>using namespace std;const int nMax = 2000001;int sa[nMax], rank[nMax], height[nMax];int wa[nMax], wb[nMax], wv[nMax], wd[nMax];int cmp(int *r, int a, int b, int l){ return r[a] == r[b] && r[a+l] == r[b+l];}void da(int *r, int n, int m) // 倍增算法 r为待匹配数组 n为总长度 m为字符范围{ int i, j, p, *x = wa, *y = wb, *t; for(i = 0; i < m; i ++) wd[i] = 0; for(i = 0; i < n; i ++) wd[x[i]=r[i]] ++; for(i = 1; i < m; i ++) wd[i] += wd[i-1]; for(i = n-1; i >= 0; i --) sa[-- wd[x[i]]] = i; for(j = 1, p = 1; p < n; j *= 2, m = p) { for(p = 0, i = n-j; i < n; i ++) y[p ++] = i; for(i = 0; i < n; i ++) if(sa[i] >= j) y[p ++] = sa[i] - j; for(i = 0; i < n; i ++) wv[i] = x[y[i]]; for(i = 0; i < m; i ++) wd[i] = 0; for(i = 0; i < n; i ++) wd[wv[i]] ++; for(i = 1; i < m; i ++) wd[i] += wd[i-1]; for(i = n-1; i >= 0; i --) sa[-- wd[wv[i]]] = y[i]; for(t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; i ++) { x[sa[i]] = cmp(y, sa[i-1], sa[i], j) ? p - 1: p ++; } }}void calHeight(int *r, int n) // 求height数组。{ int i, j, k = 0; for(i = 1; i <= n; i ++) rank[sa[i]] = i; for(i = 0; i < n; height[rank[i ++]] = k) { for(k ? k -- : 0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k ++); }}int dp[nMax][30];int Log[nMax];void initRMQ_log(int len){ for(int i=1; i<=len; i++) dp[i][0]=height[i]; for(int j=1; (1<<j)<=len; j++) for(int i=1; i+(1<<j)-1<=len; i++) { dp[i][j]=min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]); } Log[0]=-1; for(int i=1;i<=len;i++){ Log[i]=(i&(i-1))?Log[i-1]:Log[i-1]+1; }}int lcp(int a,int b){ a=rank[a],b=rank[b]; if(a>b) { swap(a,b); } a++; int k=Log[b-a+1]; return min(dp[a][k],dp[b-(1<<k)+1][k]);}char text[nMax];int num[nMax];int save[nMax];int main(){ int kk=0; //freopen("in.txt","r",stdin); //freopen("out.txt","w",stdout); while(~scanf("%s",text)) { if(text[0]=='#') break; int len=strlen(text); for(int i=0; i<len; i++) { num[i]=int(text[i]-'a'+1); } num[len]=0; da(num,len+1,30); calHeight(num,len); initRMQ_log(len); int a=0,mmax=0; for(int l=1; l<len; l++)///枚举l { for(int j=0; j+l<len; j+=l) { int k=lcp(j,j+l); int ans=k/l+1; int t=l-k%l;///往前搞 t=j-t; if(t>=0&&k%l!=0) { if(lcp(t,t+l)>=k) ans++; } if(ans>mmax) { a=0; mmax=ans; save[a++]=l; } else if(ans==mmax) { save[a++]=l; }///存储重复次数最长的长度 } } int st=0,lo=0; bool flag=false; for(int i=1; i<=len; i++)///从字典序小的推,可保证字典序最小 { for(int j=0; j<a; j++) { int r=save[j]; if(lcp(sa[i],sa[i]+r)>=(mmax-1)*r) { st=sa[i]; lo=mmax*r; flag=true; break; } } if(flag) break; } printf("Case %d: ",++kk); for(int i=st; i<st+lo; i++) { printf("%c",text[i]); } printf("\n"); } return 0;}
- Maximum repetition substring+POj+后缀数组之求重复次数最多的连续重复子串
- POJ - 3693 Maximum repetition substring(后缀数组求重复次数最多的连续重复子串)
- POJ 3693 Maximum repetition substring(后缀数组[重复次数最多的连续重复子串])
- POJ 3693 Maximum repetition substring (后缀数组+RMQ 求重复最多的连续子串)
- POJ 3693 Maximum repetition substring (后缀数组+RMQ 求重复最多的连续子串)
- poj 3693/hdu 2459 Maximum repetition substring spoj 687. Repeats ( 后缀数组 重复次数最多的连续重复子串)
- poj 1743Maximum repetition substring(后缀数组+RMQ+重复次数最多的连续重复子串))
- POJ 3693 Maximum repetition substring 后缀数组求重复次数最多子串
- POJ 3693 Maximum repetition substring (求重复次数最多的连续子串,4级)
- Poj 3693 & Hdu 2459 Maximum repetition substring (08合肥Online 后缀数组+RMQ 重复次数最多的连续重复子串)
- POJ 3693 Maximum repetition substring(重复次数最多的连续子串 字典序最小)
- HDU 2459 Maximum repetition substring(后缀数组+RMQ-重复次数最多的子串)
- POJ 题目 3693 Maximum repetition substring(后缀数组+RMQ+枚举求最小字典序的重复次数最多的子串)
- poj3693Maximum repetition substring【后缀数组+RMQ求重复最多连续子串】
- POJ 3693 Maximum repetition substring(后缀数组求最长重复子串)
- 【POJ】3693 Maximum repetition substring 【后缀数组——求最长连续重复字串】
- 后缀数组(重复次数最多的连续重复子串)
- 【后缀数组求重复次数最多的连续重复子串】SPOJ687 POJ3693
- 通过二分的方法查找小于n的素数的个数
- 好东西,大家一起分享呀,送福利了,喜欢编程的小伙伴
- 网站长尾词优化排名,你知道怎么做最有效吗?
- CentOS上编译安装OpenCV-2.3.1与ffmpeg-2.1.2
- [LeetCode] Minimum Window Substring
- Maximum repetition substring+POj+后缀数组之求重复次数最多的连续重复子串
- Oracle 左外连接的一些测试
- Ejabberd作为推送服务的优化手段
- OpenCV基础篇之读取显示图片
- css背景图片拉伸填充避免重复显示
- ZoneGridVIew 自定义放大GridView
- Neutron L3 auto Reschedule VRouter feature
- 一些优秀的移动开发网址
- opecvdll 的测试 代码