后缀数组学习笔记
来源:互联网 发布:淘宝活动大全2017 编辑:程序博客网 时间:2024/05/18 03:25
要用好后缀数组要先理解里面几个数组的概念:
一个性质:
倍增法求出这些数组, 记得在原串的末尾增加一个0:
int t1[maxn],t2[maxn],c[maxn];bool cmp(int *r,int a,int b,int l){ return r[a] == r[b] && r[a+l] == r[b+l];}void da(int str[],int sa[],int rank[],int height[],int n,int m){ n++; int i, j, p, *x = t1, *y = t2; //第一轮基数排序,如果s的最大值很大,可改为快速排序 for(i = 0; i < m; i++)c[i] = 0; for(i = 0; i < n; i++)c[x[i] = str[i]]++; for(i = 1; i < m; i++)c[i] += c[i-1]; for(i = n-1; i >= 0; i--)sa[--c[x[i]]] = i; for(j = 1; j <= n; j <<= 1) { p = 0; //直接利用sa数组排序第二关键字 for(i = n-j; i < n; i++)y[p++] = i;//后面的j个数第二关键字为空的最小 for(i = 0; i < n; i++)if(sa[i] >= j)y[p++] = sa[i] - j; //这样数组y保存的就是按照第二关键字排序的结果 //基数排序第一关键字 for(i = 0; i < m; i++)c[i] = 0; for(i = 0; i < n; i++)c[x[y[i]]]++; for(i = 1; i < m; i++)c[i] += c[i-1]; for(i = n-1; i >= 0; i--)sa[--c[x[y[i]]]] = y[i]; //根据sa和x数组计算新的x数组 swap(x,y); p = 1; x[sa[0]] = 0; for(i = 1; i < n; i++) x[sa[i]] = cmp(y,sa[i-1],sa[i],j)?p-1:p++; if(p >= n)break; m = p;//下次基数排序的最大值 } int k = 0; n--; for(i = 0; i <= n; i++)rank[sa[i]] = i; for(i = 0; i < n; i++) { if(k) k--; j = sa[rank[i]-1]; while(str[i+k] == str[j+k]) k++; height[rank[i]] = k; }}
POJ 1743 最长不重叠相同子串
题意: 求两个最长的不重叠子串, 满足两个串对应下标的差值相等.
对于相邻的两个数直接做差得到一个新串, 直接对新串求最长不重叠子串. 非常经典的做法, 二分长度, 然后对height数组分组, 满足这个长度的分成一组, 然后判断组中下标最大最小之差.
数据很水~
#include <iostream>#include <cstdio>#include <cstring>#include <queue>#include <cmath>#include <string>#include <vector>#include <algorithm>#include <map>#include <set>#define maxn 20005using namespace std;int t1[maxn],t2[maxn],c[maxn];bool cmp(int *r,int a,int b,int l){ return r[a] == r[b] && r[a+l] == r[b+l];}void da(int str[],int sa[],int rank[],int height[],int n,int m){ n++; int i, j, p, *x = t1, *y = t2; //第一轮基数排序,如果s的最大值很大,可改为快速排序 for(i = 0; i < m; i++)c[i] = 0; for(i = 0; i < n; i++)c[x[i] = str[i]]++; for(i = 1; i < m; i++)c[i] += c[i-1]; for(i = n-1; i >= 0; i--)sa[--c[x[i]]] = i; for(j = 1; j <= n; j <<= 1) { p = 0; //直接利用sa数组排序第二关键字 for(i = n-j; i < n; i++)y[p++] = i;//后面的j个数第二关键字为空的最小 for(i = 0; i < n; i++)if(sa[i] >= j)y[p++] = sa[i] - j; //这样数组y保存的就是按照第二关键字排序的结果 //基数排序第一关键字 for(i = 0; i < m; i++)c[i] = 0; for(i = 0; i < n; i++)c[x[y[i]]]++; for(i = 1; i < m; i++)c[i] += c[i-1]; for(i = n-1; i >= 0; i--)sa[--c[x[y[i]]]] = y[i]; //根据sa和x数组计算新的x数组 swap(x,y); p = 1; x[sa[0]] = 0; for(i = 1; i < n; i++) x[sa[i]] = cmp(y,sa[i-1],sa[i],j)?p-1:p++; if(p >= n)break; m = p;//下次基数排序的最大值 } int k = 0; n--; for(i = 0; i <= n; i++)rank[sa[i]] = i; for(i = 0; i < n; i++) { if(k) k--; j = sa[rank[i]-1]; while(str[i+k] == str[j+k]) k++; height[rank[i]] = k; }}int rank[maxn], height[maxn];int str[maxn];int sa[maxn];int n;#define INF 111111bool ok (int x) { int Min = INF, Max = 0; for (int i = 1; i <= n; i++) { if (height[i] >= x) { Min = min (Min, sa[i]); Max = max (Max, sa[i]); } else { if (Max-Min >= x) return 1; Max = sa[i]; Min = sa[i]; } } return Max-Min >= x;}int solve () { int l = 0, r = n/2; while (r-l > 1) { int mid = (r+l)>>1; if (ok (mid)) l = mid; else r = mid; } return (ok (r) ? r : l);}int main(){ while (scanf ("%d", &n) == 1 && n) { for (int i = 0; i < n; i++) { scanf ("%d", &str[i]); } if (n <= 9) { printf ("0\n"); continue; } n--; for (int i = 0; i < n; i++) { str[i] = 100+str[i+1]-str[i]; } str[n] = 0; da(str, sa, rank, height, n+1, 188); int ans = solve ()+1; printf ("%d\n", (ans >= 5 ? ans : 0)); } return 0;}
POJ 3261 求重复k次的最长子串
还是二分结果, 按照height分组, 判断是不是有大于k的组.
数据还是很水, 不加离散化都能过~
#include <iostream>#include <cstdio>#include <cstring>#include <queue>#include <cmath>#include <string>#include <vector>#include <algorithm>#include <map>#include <set>#define maxn 200005using namespace std;int t1[maxn],t2[maxn],c[maxn];bool cmp(int *r,int a,int b,int l){ return r[a] == r[b] && r[a+l] == r[b+l];}void da(int str[],int sa[],int rank[],int height[],int n,int m){ n++; int i, j, p, *x = t1, *y = t2; //第一轮基数排序,如果s的最大值很大,可改为快速排序 for(i = 0; i < m; i++)c[i] = 0; for(i = 0; i < n; i++)c[x[i] = str[i]]++; for(i = 1; i < m; i++)c[i] += c[i-1]; for(i = n-1; i >= 0; i--)sa[--c[x[i]]] = i; for(j = 1; j <= n; j <<= 1) { p = 0; //直接利用sa数组排序第二关键字 for(i = n-j; i < n; i++)y[p++] = i;//后面的j个数第二关键字为空的最小 for(i = 0; i < n; i++)if(sa[i] >= j)y[p++] = sa[i] - j; //这样数组y保存的就是按照第二关键字排序的结果 //基数排序第一关键字 for(i = 0; i < m; i++)c[i] = 0; for(i = 0; i < n; i++)c[x[y[i]]]++; for(i = 1; i < m; i++)c[i] += c[i-1]; for(i = n-1; i >= 0; i--)sa[--c[x[y[i]]]] = y[i]; //根据sa和x数组计算新的x数组 swap(x,y); p = 1; x[sa[0]] = 0; for(i = 1; i < n; i++) x[sa[i]] = cmp(y,sa[i-1],sa[i],j)?p-1:p++; if(p >= n)break; m = p;//下次基数排序的最大值 } int k = 0; n--; for(i = 0; i <= n; i++)rank[sa[i]] = i; for(i = 0; i < n; i++) { if(k) k--; j = sa[rank[i]-1]; while(str[i+k] == str[j+k]) k++; height[rank[i]] = k; }}int rank[maxn], height[maxn];int str[maxn];int sa[maxn];int n, k;bool ok (int x) { int ans = 1; for (int i = 2; i <= n; i++) { if (height[i] >= x) { ans++; if (ans >= k) return 1; } else ans = 1; } return 0;}int solve () { int l = 0, r = n; while (r-l > 1) { int mid = (l+r) >>1; if (ok (mid)) l = mid; else r= mid; } return (ok (r) ? r : l);}int cnt, num[maxn], gg[maxn];int lisanhua () { cnt = 0; for (int i = 0; i < n; i++) num[i] = i; sort (num, num+n); for (int i = 0; i < n; i++) if (!i || num[i] != num[i-1]) gg[cnt++] = num[i]; for (int i = 0; i < n; i++) str[i] = lower_bound (gg, gg+cnt, str[i])-gg+1; return cnt+1;}int main(){ while (cin >> n >> k) { int Max = 0; for (int i = 0; i < n; i++) { cin >> str[i]; Max = max (Max, str[i]); } str[n] = 0; int m = lisanhua (); da (str, sa, rank, height, n, m+2); int ans = solve (); cout << ans << endl; } return 0;}/*2 21 1*/
SPOJ 694 不重复子串个数
根据
#include <iostream>#include <cstdio>#include <cstring>#include <queue>#include <cmath>#include <string>#include <vector>#include <algorithm>#include <map>#include <set>#define maxn 200005using namespace std;int t1[maxn],t2[maxn],c[maxn];bool cmp(int *r,int a,int b,int l){ return r[a] == r[b] && r[a+l] == r[b+l];}void da(int str[],int sa[],int rank[],int height[],int n,int m){ n++; int i, j, p, *x = t1, *y = t2; //第一轮基数排序,如果s的最大值很大,可改为快速排序 for(i = 0; i < m; i++)c[i] = 0; for(i = 0; i < n; i++)c[x[i] = str[i]]++; for(i = 1; i < m; i++)c[i] += c[i-1]; for(i = n-1; i >= 0; i--)sa[--c[x[i]]] = i; for(j = 1; j <= n; j <<= 1) { p = 0; //直接利用sa数组排序第二关键字 for(i = n-j; i < n; i++)y[p++] = i;//后面的j个数第二关键字为空的最小 for(i = 0; i < n; i++)if(sa[i] >= j)y[p++] = sa[i] - j; //这样数组y保存的就是按照第二关键字排序的结果 //基数排序第一关键字 for(i = 0; i < m; i++)c[i] = 0; for(i = 0; i < n; i++)c[x[y[i]]]++; for(i = 1; i < m; i++)c[i] += c[i-1]; for(i = n-1; i >= 0; i--)sa[--c[x[y[i]]]] = y[i]; //根据sa和x数组计算新的x数组 swap(x,y); p = 1; x[sa[0]] = 0; for(i = 1; i < n; i++) x[sa[i]] = cmp(y,sa[i-1],sa[i],j)?p-1:p++; if(p >= n)break; m = p;//下次基数排序的最大值 } int k = 0; n--; for(i = 0; i <= n; i++)rank[sa[i]] = i; for(i = 0; i < n; i++) { if(k) k--; j = sa[rank[i]-1]; while(str[i+k] == str[j+k]) k++; height[rank[i]] = k; }}int rank[maxn], height[maxn];int str[maxn];char s[maxn];int sa[maxn];int n, k;int main(){ ios::sync_with_stdio(0); int t; cin >> t; while (t--) { cin >> s; n = strlen (s); for (int i = 0; i < n; i++) str[i] = s[i]; str[n] = 0; da (str, sa, rank, height, n, 233); long long ans = 0; for (int i = 1; i <= n; i++) { ans += n-sa[i]-height[i]; } cout << ans << endl; } return 0;}
POJ 2774 最长公共子串
把第二个串放到第一个串的后面, 中间用一个失配符隔开, 然后遍历height数组维护最大子串长度. 要避免出现在同一串中的公共子串.
#include <iostream>#include <cstdio>#include <cstring>#include <queue>#include <cmath>#include <string>#include <vector>#include <algorithm>#include <map>#include <set>#define maxn 200005using namespace std;int t1[maxn],t2[maxn],c[maxn];bool cmp(int *r,int a,int b,int l){ return r[a] == r[b] && r[a+l] == r[b+l];}void da(int str[],int sa[],int rank[],int height[],int n,int m){ n++; int i, j, p, *x = t1, *y = t2; //第一轮基数排序,如果s的最大值很大,可改为快速排序 for(i = 0; i < m; i++)c[i] = 0; for(i = 0; i < n; i++)c[x[i] = str[i]]++; for(i = 1; i < m; i++)c[i] += c[i-1]; for(i = n-1; i >= 0; i--)sa[--c[x[i]]] = i; for(j = 1; j <= n; j <<= 1) { p = 0; //直接利用sa数组排序第二关键字 for(i = n-j; i < n; i++)y[p++] = i;//后面的j个数第二关键字为空的最小 for(i = 0; i < n; i++)if(sa[i] >= j)y[p++] = sa[i] - j; //这样数组y保存的就是按照第二关键字排序的结果 //基数排序第一关键字 for(i = 0; i < m; i++)c[i] = 0; for(i = 0; i < n; i++)c[x[y[i]]]++; for(i = 1; i < m; i++)c[i] += c[i-1]; for(i = n-1; i >= 0; i--)sa[--c[x[y[i]]]] = y[i]; //根据sa和x数组计算新的x数组 swap(x,y); p = 1; x[sa[0]] = 0; for(i = 1; i < n; i++) x[sa[i]] = cmp(y,sa[i-1],sa[i],j)?p-1:p++; if(p >= n)break; m = p;//下次基数排序的最大值 } int k = 0; n--; for(i = 0; i <= n; i++)rank[sa[i]] = i; for(i = 0; i < n; i++) { if(k) k--; j = sa[rank[i]-1]; while(str[i+k] == str[j+k]) k++; height[rank[i]] = k; }}int rank[maxn], height[maxn];int str[maxn];char s1[maxn], s2[maxn];int sa[maxn];int n, m, len;bool legal (int i, int j) { if (i > j) swap (i, j); return (i < n && j > n);}void solve () { int Max = 0; for (int i = 2; i <= len; i++) { if (height[i] >= Max && legal (sa[i], sa[i-1])) Max = height[i]; } cout << Max << endl;}int main(){ ios::sync_with_stdio(0); while (cin >> s1 >> s2) { n = strlen (s1), m = strlen (s2); for (int i = 0; i < n; i++) str[i] = s1[i]; str[n] = 1; for (int i = n+1; i <= n+m; i++) str[i] = s2[i-n-1]; len = n+m+1; str[len] = 0; da (str, sa, rank, height, len, 233); solve (); } return 0;}
0 0
- 后缀数组 学习笔记
- 后缀数组学习笔记
- 学习笔记----后缀数组
- 后缀数组学习笔记
- 后缀数组学习笔记
- 后缀数组学习笔记
- 后缀数组学习笔记
- 后缀数组学习笔记
- 后缀数组学习笔记
- 后缀数组 学习笔记
- 后缀数组学习笔记
- 后缀数组(SuffixArray) 学习笔记
- 【字符串数据结构后缀系列Part1】后缀数组学习笔记
- 后缀数组--学习笔记(倍增算法)
- 后缀数组学习笔记【详解|图】
- 《黑书》后缀数组学习笔记.
- |算法讨论|后缀数组 学习笔记
- suffix_array(后缀数组) 学习笔记
- Hibernate (开放源代码的对象关系映射框架)
- AtomicInteger的用法
- Struts学习总结
- Recently Viewed Products 加图片
- android studio 计算器textview text右下方显示
- 后缀数组学习笔记
- 学习记录3:树的直径(树的最长链)(poj 1985 Cow Marathon)
- 优就业Java:简单介绍Java的内存泄漏
- 【34】第一个只出现一次的字符
- VB程序学习代码记录20160731
- [leetcode] 260. Single Number III
- Python——7列表生成式
- 实验二:CDP命令操作
- 深入理解ReentrantLock