后缀数组模版

来源:互联网 发布:知乎电脑版官方下载 编辑:程序博客网 时间:2024/05/22 06:22

----by kuangbin

#include <stdio.h>#include <string.h>#include <iostream>#include <algorithm>#include <math.h>#include <set>using namespace std;#define rank Rank/*后缀数组DC3算法,复杂度O(n)所有的相关数组都要开三倍待排序数组长度为n,放在0~n-1中,在最后面补一个0da(str ,n+1,sa,rank,height, , );//注意是n+1;例如:n = 8;字符串存在num[0,n-1] num[] = { 1, 1, 2, 1, 1, 1, 1, 2, $ };注意num最后一位为0,其他大于0rank[] = { 4, 6, 8, 1, 2, 3, 5, 7, 0 };rank[0~n-1]为有效值,rank[n]必定为0无效值sa[] = { 8, 3, 4, 5, 0, 6, 1, 7, 2 };sa[1~n]为有效值,sa[0]必定为n是无效值height[]= { 0, 0, 3, 2, 3, 1, 2, 0, 1 };height[2~n]为有效值*/const int MAXN=301000;int rank[MAXN],height[MAXN];#define F(x) ((x)/3+((x)%3==1?0:tb))#define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2)int wa[MAXN*3],wb[MAXN*3],wv[MAXN*3],wss[MAXN*3];int c0(int *r,int a,int b){return r[a] == r[b] && r[a+1] == r[b+1] && r[a+2] == r[b+2];}int c12(int k,int *r,int a,int b){if(k == 2)return r[a] < r[b] || ( r[a] == r[b] && c12(1,r,a+1,b+1) );else return r[a] < r[b] || ( r[a] == r[b] && wv[a+1] < wv[b+1] );}void sort(int *r,int *a,int *b,int n,int m){int i;for(i = 0;i < n;i++)wv[i] = r[a[i]];for(i = 0;i < m;i++)wss[i] = 0;for(i = 0;i < n;i++)wss[wv[i]]++;for(i = 1;i < m;i++)wss[i] += wss[i-1];for(i = n-1;i >= 0;i--)b[--wss[wv[i]]] = a[i];}void dc3(int *r,int *sa,int n,int m){int i, j, *rn = r + n;int *san = sa + n, ta = 0, tb = (n+1)/3, tbc = 0, p;r[n] = r[n+1] = 0;for(i = 0;i < n;i++)if(i %3 != 0)wa[tbc++] = i;sort(r + 2, wa, wb, tbc, m);sort(r + 1, wb, wa, tbc, m);sort(r, wa, wb, tbc, m);for(p = 1, rn[F(wb[0])] = 0, i = 1;i < tbc;i++)rn[F(wb[i])] = c0(r, wb[i-1], wb[i]) ? p - 1 : p++;if(p < tbc)dc3(rn,san,tbc,p);else for(i = 0;i < tbc;i++)san[rn[i]] = i;for(i = 0;i < tbc;i++) if(san[i] < tb)wb[ta++] = san[i] * 3;if(n % 3 == 1)wb[ta++] = n - 1;sort(r, wb, wa, ta, m);for(i = 0;i < tbc;i++)wv[wb[i] = G(san[i])] = i;for(i = 0, j = 0, p = 0;i < ta && j < tbc;p++)sa[p] = c12(wb[j] % 3, r, wa[i], wb[j]) ? wa[i++] : wb[j++];for(;i < ta;p++)sa[p] = wa[i++];for(;j < tbc;p++)sa[p] = wb[j++];}//str和sa也要三倍void da(int str[],int sa[],int rank[],int height[],int n,int m)//r, sa, rank, height, 字符串长度, 最大ascil(最大可能为255){for(int i = n;i < n*3;i++)str[i] = 0;dc3(str, sa, n+1, m);int i,j,k = 0;for(i = 0;i <= n;i++)rank[sa[i]] = i;for(i = 0;i < n; i++){if(k) k--;j = sa[rank[i]-1];while(str[i+k] == str[j+k]) k++;height[rank[i]] = k;}}char str[MAXN];int r[MAXN];//把待排序的字符串直接复制到 str中int sa[MAXN];





白书模版:
#include<stdio.h>#include<string.h>#include<set>#include<iostream>using namespace std;#define MAXN 100000#define rank Rankchar s[MAXN]; //[0,n-1]int sa[MAXN], t[MAXN], t2[MAXN], c[MAXN], n;void build_sa(int m) {//在字符串s中的每个字符值必须为 [0,m-1]int i, *x = t, *y = t2;//基数排序for(i = 0; i < m; i++) c[i] = 0;for(i = 0; i < n; i++) c[x[i] = s[i]]++;for(i = 1; i < m; i++) c[i] += c[i-1];for(i = n-1; i >= 0; i--) sa[--c[x[i]]] = i;for(int k = 1; k <= n; k<<=1){int p = 0;//直接利用sa数组排序第二关键字for(i = n-k; i < n; i++) y[p++] = i;for(i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i]-k;//基数排序第一关键字for(i = 0; i < m; i++) c[i] = 0;for(i = 0; i < n; i++) c[x[y[i]]]++;for(i = 0; i < m; i++) c[i] += c[i-1];for(i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];//根据sa和y数组计算新的x数组swap(x, y);p = 1; x[sa[0]] = 0;for(i = 1; i < n; i++)x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++;if(p >= n) break;m = p;}}int m; //模版长度int cmp_suffix(char *pattern, int p){ //判断模版s是否为后缀p的前缀return strncmp(pattern, s+sa[p], m);}int find(char* P){m = strlen(P);if(cmp_suffix(P, 0) < 0) return -1;if(cmp_suffix(P, n-1) > 0) return -1;int L = 0, R = n-1;while(L <= R) {int mid = L + (R-L)>>1;int res = cmp_suffix(P, mid);if(!res) return mid;if(res < 0) R = mid-1; else L = mid+1;}return -1;}int rank[MAXN], height[MAXN]; //height[i] 表示sa[i-1]和sa[i]的LCPvoid getHeight(){int i, j, k = 0;for(i = 0; i < n; i++) rank[sa[i]] = i;for(i = 0; i < n; i++) {if(k) k--;int j = sa[rank[i]-1];while(s[i+k] == s[j+k]) k++;height[rank[i]] = k;}}int main(){while(~scanf("%s", s), s[0] != '#'){memset(height, -1, sizeof(height));memset(sa, -1, sizeof(sa));build_sa(200);getHeight();}return 0;}

0 1
原创粉丝点击