poj_3294 Life Forms(后缀数组+二分)

来源:互联网 发布:艾弗森体测数据弹跳 编辑:程序博客网 时间:2024/06/11 01:37
Life Forms
Time Limit: 5000MS Memory Limit: 65536KTotal Submissions: 15288 Accepted: 4493

Description

You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.

The answer is given in the 146th episode of Star Trek - The Next Generation, titledThe Chase. It turns out that in the vast majority of the quadrant's life forms ended up with a large fragment of common DNA.

Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.

Input

Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.

Output

For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.

Sample Input

3abcdefgbcdefghcdefghi3xxxyyyzzz0

Sample Output

bcdefgcdefgh?
后缀数组,
将若干字符串用不同的间隔符连成一条长字符串,比如abcdefgAbcdefghBcdefghiC
然后求出这个字符串的后缀数组sa和height,然后二分答案,若公共子串长度为p,
那么我们就去判断此时长字符串中是否有超过n/2个原字符串有长度为p的公共子串。
方法是遍历height数组,将height数组分组,若当前height[i]小于p,则另开一组,
然后记录每组中出现了多少个原字符串。
----------------------------------
n最大为100,加上我们的间隔符,长字符串中可能会出现200个不同的字符,这超过了char的范围,
得用int保存。记得给s数组后面添加一个0,不然在算height时,方法getHeight中的j和i可能都等于0,
这样代码中的循环就跳不出了。
#include <iostream>#include <cstdio>#include <cstdlib>#include <cstring>#include <cmath>#include <stack>#include <bitset>#include <queue>#include <set>#include <map>#include <string>#include <algorithm>#define FOP freopen("data.txt","r",stdin)#define FOP2 freopen("data1.txt","w",stdout)#define inf 0x3f3f3f3f#define maxn 200010#define mod 1000000007#define PI acos(-1.0)#define LL long longusing namespace std;int s[maxn];int sa[maxn], t[maxn], t2[maxn], c[maxn];//构造字符串s的后缀数组。每个字符串必须为0~m-1void build_sa(int m, int n) //记得s数组末尾要添加0{    int i, *x = t, *y = t2;    //基数排序    for(i = 0; i < m; i++) c[i] = 0;    for(i = 0; i < n; i++) c[x[i] = s[i]]++;    for(i = 1; i < m; i++) c[i] += c[i-1];    for(i = n-1; i >= 0; i--) sa[--c[x[i]]] = i;    for(int k = 1; k <= n; k <<= 1)    {        int p = 0;        //直接利用sa数组排序第二关键字        for(i = n-k; i < n; i++) y[p++] = i;        for(i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i]-k;        //基数排序第一关键字        for(i = 0; i < m; i++) c[i] = 0;        for(i = 0; i < n; i++) c[x[y[i]]]++;        for(i = 1; i < m; i++) c[i] += c[i-1];        for(i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];        //根据sa和y数组计算新的x数组        swap(x, y);        p = 1;        x[sa[0]] = 0;        for(i = 1; i < n; i++)            x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1 : p++;        if(p >= n) break;        m = p;    }}int rank[maxn], height[maxn];void getHeight(int n){    int i, j, k = 0;    for(i = 0; i < n; i++) rank[sa[i]] = i;    for(i = 0; i < n; i++)    {        if(k) k--;        j = sa[rank[i]-1];        while(s[i+k] == s[j+k]) k++;        height[rank[i]] = k;    }}int n, len;char str[1010];int pos[maxn], flag[110];bool judge(int l, int r){    if(r-l <= n/2) return false;    memset(flag, 0, sizeof(flag));    int cnt = 0;    for(int i = l; i < r; i++)    {        int x = pos[sa[i]];        if(!flag[x] && x != 0)        {            cnt++;            flag[x] = 1;        }    }    return cnt > n/2;}bool F(int p, bool print){    memset(flag, 0, sizeof(flag));    int l = 0;    for(int r = 1; r < len; r++)    {        if(height[r] < p || r == len-1)        {            if(judge(l, r))            {                if(print)                {                    for(int i = sa[l]; i < sa[l]+p; i++) printf("%c", s[i]+'a'-1);                    printf("\n");                }                else return true;            }            l = r;        }    }    return false;}int main(){    //FOP2;    while(~scanf("%d", &n) && n)    {        len = 0;        int mal = 0;        for(int i = 0; i < n; i++)        {            scanf("%s", str);            int l = strlen(str);            mal = max(mal, l);            for(int j = 0; j < l; j++)            {                pos[len] = i+1;                s[len++] = str[j]-'a'+1;            }            pos[len] = 0;            s[len++] = i+100;        }        pos[len] = 0;        s[len++] = 0;        if(n == 1) { printf("%s\n\n", str); continue; } //特判n==1        build_sa(250, len);        getHeight(len);        if(!F(1, false)) printf("?\n\n");        else        {            int l = 1, r = mal, mid, ans;            while(l <= r)            {                mid = l+r>>1;                if(F(mid, false)) l = mid+1, ans = mid;                else r = mid-1;            }            F(ans, true);            printf("\n");        }    }    return 0;}


0 0
原创粉丝点击