Uva 11107 Life Forms(后缀数组 + 二分)

来源:互联网 发布:手机安装ubuntu arm版 编辑:程序博客网 时间:2024/06/01 10:46

题意:给你n个DNA序列,求一个最长的字符串,使得它在超过一半的DNA序列中出现,若多解按字典序从小到大输出所有解。


分析:白书例题,把n个字符串连接到一块后求它的后缀数组,然后我们可以来二分答案然后遍历一遍后缀数组来验证。


#include<iostream>#include<string>#include<algorithm>#include<cstdlib>#include<cstdio>#include<set>#include<map>#include<vector>#include<cstring>#include<stack>#include<cmath>#include<queue>using namespace std;#define INF 0x3f3f3f3f//rank从0开始//sa从1开始,因为最后一个字符(最小的)排在第0位//height从2开始,因为表示的是sa[i-1]和sa[i]const int MAXN = 110005;int T,n,m,Rank[MAXN],sa[MAXN],X[MAXN],Y[MAXN],height[MAXN],s[MAXN],f[MAXN],num[MAXN];int buc[MAXN];char str[MAXN],S[MAXN];void calheight(int n) {    int i , j , k = 0;    for(i = 1 ; i <= n ; i++) Rank[sa[i]] = i;    for(i = 0 ; i < n ; height[Rank[i++]] = k)        for(k?k--:0 , j = sa[Rank[i]-1] ; s[i+k] == s[j+k] ; k++);}bool cmp(int *r,int a,int b,int l) {    return (r[a] == r[b] && r[a+l] == r[b+l]);}void suffix(int n,int m = 128) {    int i , l , p , *x = X , *y = Y;    for(i = 0 ; i < m ; i ++) buc[i] = 0;    for(i = 0 ; i < n ; i ++) buc[ x[i] = s[i]  ] ++;    for(i = 1 ; i < m ; i ++) buc[i] += buc[i-1];    for(i = n - 1; i >= 0 ; i --) sa[ --buc[ x[i] ]] = i;    for(l = 1,p = 1 ; p < n ; m = p , l *= 2) {        p = 0;        for(i = n-l ; i < n ; i ++) y[p++] = i;        for(i = 0 ; i < n ; i ++) if(sa[i] >= l) y[p++] = sa[i] - l;        for(i = 0 ; i < m ; i ++) buc[i] = 0;        for(i = 0 ; i < n ; i ++) buc[ x[y[i]] ] ++;        for(i = 1 ; i < m ; i ++) buc[i] += buc[i-1];        for(i = n - 1; i >= 0 ; i --) sa[ --buc[ x[y[i]] ] ] = y[i];        for(swap(x,y) , x[sa[0]] = 0 , i = 1 , p = 1 ; i < n ; i ++)            x[ sa[i] ] = cmp(y,sa[i-1],sa[i],l) ? p-1 : p++;    }    calheight(n-1);//后缀数组关键是求出height,所以求sa的时候顺便把rank和height求出来}bool jud(int k){memset(num,0,sizeof(num));int now = 1,tot = 1;for(int i = 2;i <= n;i++)  if(height[i] < k)  { now = i; tot = 1; num[f[sa[i]]] = now; } else { if(num[f[sa[i]]] != now) tot++;  num[f[sa[i]]] = now; if(2*tot > m) return true;  }return false;}int main(){bool fis = true;while(scanf("%d",&m) && m){if(fis) fis = false;else printf("\n");memset(str,0,sizeof(str));for(int i = 1;i <= m;i++){scanf("%s",S);int leng = strlen(S);S[leng++] = 'A' + i % 26;S[leng] = '\0';  strcat(str,S);int now = strlen(str) - leng;while(leng) f[now+(leng--)-1] = i;}n = strlen(str);for(int i = 0;i < n;i++) s[i] = str[i];s[n] = 0;suffix(n+1,128);int l = 0,r = n;while(l != r){int mid = (l + r)/2 + 1;if(jud(mid)) l = mid;else r = mid - 1;}if(!l) printf("?\n");else{memset(num,0,sizeof(num));int now = 1,tot = 1;bool flag = false;for(int i = 2;i <= n;i++) if(height[i] < l) { now = i; tot = 1; flag = false; num[f[sa[i]]] = now; } else{if(flag) continue; if(num[f[sa[i]]] != now) tot++;  num[f[sa[i]]] = now; if(2*tot > m)  { flag = true; for(int j = 0;j < l;j++) printf("%c",str[sa[i]+j]); printf("\n");} }}} }


0 0
原创粉丝点击