找出第二个文本抄袭第一个文本的所有位置和长度 后缀数组 UVA 10526 - Intellectual Property

来源:互联网 发布:21天阿里云推荐系统 编辑:程序博客网 时间:2024/06/05 14:59

题目链接

题意:给定两个问题,要求找出第二个文本抄袭第一个文本的所有位置和长度,输出前k个,按长度从大到小先排,长度一样的按位置从小到大

思路:后缀数组,把两个文本拼接起来,记录下拼接位置为tdp,这样如果sa[i] < tdp就是前面的文本开头,如果sa[i] >= tdp就是后面的文本开头,拼接起来的求出height数组,利用该数组的性质,从前往后扫一遍,从后往前扫一遍,把所有位置的最大值保存下来,最后在扫描一遍位置,把答案记录下来

代码:

#include <cstdio>  #include <cstring>  #include <algorithm>    using namespace std;    const int MAXLEN = 200005;  const int INF = 0x3f3f3f3f;    char str[55555];  int k, tdp, an, v[MAXLEN];    struct Ans {      int len, pos;      Ans() {}      Ans(int len, int pos) {          this->len = len;          this->pos = pos;      }  } ans[MAXLEN];    bool cmp(Ans a, Ans b) {      if (a.len == b.len) return a.pos < b.pos;      return a.len > b.len;  }    struct Suffix {        int s[MAXLEN];      int sa[MAXLEN], t[MAXLEN], t2[MAXLEN], c[MAXLEN], n;      int rank[MAXLEN], height[MAXLEN];        void build_sa(int m) {          n++;          int i, *x = t, *y = t2;          for (i = 0; i < m; i++) c[i] = 0;          for (i = 0; i < n; i++) c[x[i] = s[i]]++;          for (i = 1; i < m; i++) c[i] += c[i - 1];          for (i = n - 1; i >= 0; i--) sa[--c[x[i]]] = i;          for (int k = 1; k <= n; k <<= 1) {              int p = 0;              for (i = n - k; i < n; i++) y[p++] = i;              for (i = 0; i < n; i++) if (sa[i] >= k) y[p++] = sa[i] - k;              for (i = 0; i < m; i++) c[i] = 0;              for (i = 0; i < n; i++) c[x[y[i]]]++;              for (i = 0; i < m; i++) c[i] += c[i - 1];              for (i = n - 1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];              swap(x, y);              p = 1; x[sa[0]] = 0;              for (i = 1; i < n; i++)                  x[sa[i]] = (y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k]) ? p - 1 : p++;              if (p >= n) break;              m = p;          }          n--;      }        void getHeight() {          int i, j, k = 0;          for (i = 1; i <= n; i++) rank[sa[i]] = i;          for (i = 0; i < n; i++) {              if (k) k--;              int j = sa[rank[i] - 1];              while (s[i + k] == s[j + k]) k++;              height[rank[i]] = k;          }      }        void init() {          tdp = 0; n = 0; an = 0;          gets(str);          while (gets(str)) {              if (strcmp(str, "END TDP CODEBASE") == 0) break;              int len = strlen(str);              str[len] = '\n';              for (int i = 0; i <= len; i++)                  s[n++] = str[i];          }          tdp = n;          s[n++] = 260;          gets(str);          while (gets(str)) {              if (strcmp(str, "END JCN CODEBASE") == 0) break;              int len = strlen(str);              str[len] = '\n';              for (int i = 0; i <= len; i++)                  s[n++] = str[i];          }          s[n] = 0;      }        void solve() {          init();          build_sa(261);          getHeight();          memset(v, 0, sizeof(v));          int Min = -1;          for (int i = 1; i <= n; i++) {              if (sa[i] < tdp) Min = INF;              else if (sa[i] > tdp) {                  if (Min == -1) continue;                  Min = min(height[i], Min);                  v[sa[i] - tdp - 1] = max(Min, v[sa[i] - tdp - 1]);              }          }          Min = -1;          for (int i = n; i >= 1; i--) {              if (sa[i] < tdp) Min = INF;              else if (sa[i] > tdp) {                  if (Min == -1) continue;                  Min = min(height[i + 1], Min);                  v[sa[i] - tdp - 1] = max(Min, v[sa[i] - tdp - 1]);              }          }          int r = -1;          for (int i = 0; i < n - tdp; i++) {              if (i + v[i] <= r) continue;              if (v[i] == 0) continue;              ans[an++] = Ans(v[i], i);              r = i + v[i];          }          sort(ans, ans + an, cmp);          for (int i = 0; i < min(an, k); i++) {              printf("INFRINGING SEGMENT %d LENGTH %d POSITION %d\n", i + 1, ans[i].len, ans[i].pos);              for (int j = ans[i].pos + tdp + 1; j < ans[i].pos + tdp + 1 + ans[i].len; j++)                  printf("%c", s[j]);              printf("\n");          }      }    } gao;    int main() {      int bo = 0;      int cas = 0;      while (~scanf("%d%*c", &k) && k) {          if (bo) printf("\n");          else bo = 1;          printf("CASE %d\n", ++cas);          gao.solve();      }      return 0;  }  


0 0
原创粉丝点击