Number Sequence(HDU1711)

来源：互联网发布：智能运动手表推荐知乎编辑：程序博客网时间：2024/06/05 17:02

[kuangbin带你飞]专题十六 KMP & 扩展KMP & Manacher

题目：

A - Number Sequence

Time Limit:5000MS
Memory Limit:32768KB

HDU 1711

Description
Given two sequences of numbers : a[1], a[2], …… , a[N], and b[1], b[2], …… , b[M] (1 <= M <= 10000, 1 <= N <= 1000000). Your task is to find a number K which make a[K] = b[1], a[K + 1] = b[2], …… , a[K + M - 1] = b[M]. If there are more than one K exist, output the smallest one.

Input
The first line of input is a number T which indicate the number of cases. Each case contains three lines. The first line is two numbers N and M (1 <= M <= 10000, 1 <= N <= 1000000). The second line contains N integers which indicate a[1], a[2], …… , a[N]. The third line contains M integers which indicate b[1], b[2], …… , b[M]. All integers are in the range of [-1000000, 1000000].

Output
For each test case, you should output one line which only contain K described above. If no such K exists, output -1 instead.

Sample Input

213 51 2 1 2 3 1 2 3 1 3 2 1 21 2 3 1 313 51 2 1 2 3 1 2 3 1 3 2 1 21 2 3 2 1

Sample Output

6-1

题目大意：

两个数组a,b，a作为主串，b为模板串，寻找a中包含b的最小子串下标

分析：

法一 a作为主串，b作为模板串，用kmp匹配
（我学了好久kmp才明白这个算法）我是看了俞勇的《ACM国际大学生程序设计竞赛知识与入门》p161懂了匹配的方式
看了http://www.cnblogs.com/c-cloud/p/3224788.html才明白next数组怎么构造
尤其是这两张图特别容易让人明白next数组构造方式

kmp模板：

//p[]为模板串，n为模板串的长度，现构造next数组void makenext(int n){    next[0] = 0;    for (int i = 1, j = 0; i < n; i++)    {        while (j>0 && p[j] != p[i])            j = next[j - 1];        if (p[i] == p[j])        {            j++;        }        next[i] = j;    }}//T[]为主串，m为主串的长度，p[]为模板串，n为模板串的长度int kmp(int m, int n){    for (int i = 0, j = 0; i < m; i++)    {        while (j>0 && T[i] != p[j])            j = next[j - 1];        if (T[i] == p[j])        {            j++;        }        if (j == n)return i - n + 1;    }    return -1;//主串并不包含模板串}

法二用hash字符串处理，匹配主串和模板串
看的是大白书（《挑战程序设计竞赛》）p374

只要不断这样计算开始位置右移一位后的字符串子串的哈希值，就可以在O(n)时间内得到所有位置对应的hash值，从而在O(n+m)时间内完成字符串匹配。在实现时，可以用64位无符号整数计算hash值，并取h为2^64，通过自然溢出省去求模运算。

hash模板：

//al为主串长度，bl为模板串长度typedef unsigned long long ull;const ull B=100000007;//hash基数int contain(int al, int bl){    if (bl > al)return -1;    int cnt = 0;    //计算B的bl次方    ull t = 1;    for (int i = 0; i < bl; i++)        t *= B;    //计算a和b长度为bl的前缀对应的hash值    ull ah = 0, bh = 0;    for (int i = 0; i < bl; i++)ah = ah*B + a[i];    for (int i = 0; i < bl; i++)bh = bh*B + b[i];    //对a不断右移一位，更新hash值并判断    for (int i = 0; i+bl <=al; i++)    {        if (ah == bh)return i;        if(i+bl<al)            ah = ah*B - a[i] * t + a[i + bl];    }    return -1;}

用kmpAC代码：

#include<iostream>#include<algorithm>#include<vector>#include<string>#include<string.h>using namespace std;#define maxnA 1000001#define maxnB 10001int a[maxnA];int b[maxnB];int nextb[maxnB];void makenext(int bl){    nextb[0] = 0;    for (int i = 1, j = 0; i < bl; i++)    {        while (j>0 && b[i] != b[j])            j = nextb[j - 1];        if (b[i] == b[j])        {            j++;        }        nextb[i] = j;    }} int kmp(int al,int bl){    makenext(bl);    for (int i = 0, j = 0; i < al; i++)    {        while (j > 0 && b[j] != a[i])            j = nextb[j - 1];        if (b[j] == a[i])            j++;        if (j == bl)            return i - bl + 1;    }    return -2;}int main(){    int cas;    scanf("%d", &cas);    while (cas--)    {        memset(a, 0, sizeof(a));        memset(b, 0, sizeof(b));        memset(nextb, 0, sizeof(nextb));        int al, bl;        scanf("%d%d", &al, &bl);        for (int i = 0; i < al; i++)            scanf("%d", &a[i]);        for (int i = 0; i < bl; i++)            scanf("%d", &b[i]);        int res = kmp(al, bl)+1;        printf("%d\n", res);    }}

这里写图片描述

用hash字符串AC代码：

#include<iostream>#include<string>#include<string.h>#include<algorithm>using namespace std;typedef unsigned long long ull;const ull B = 100000007;const int MAXN = 1000001;const int MAXM = 10001;int a[MAXN];int b[MAXM];//b是否在a中int contain(int al, int bl){    if (bl > al)return -2;    int cnt = 0;    //计算B的bl次方    ull t = 1;    for (int i = 0; i < bl; i++)        t *= B;    //计算a和b长度为bl的前缀对应的hash值    ull ah = 0, bh = 0;    for (int i = 0; i < bl; i++)ah = ah*B + a[i];    for (int i = 0; i < bl; i++)bh = bh*B + b[i];    //对a不断右移一位，更新hash值并判断    for (int i = 0; i < al; i++)    {        if (ah == bh)return i;        else            ah = ah*B - a[i] * t + a[i + bl];    }    return -2;}int main(){    int cas;    scanf("%d", &cas);    while (cas--)    {        memset(a, 0, sizeof(a));        memset(b, 0, sizeof(b));        int al, bl;        scanf("%d%d", &al, &bl);        for (int i = 0; i < al; i++)            scanf("%d", &a[i]);        for (int i = 0; i < bl; i++)            scanf("%d", &b[i]);        int res = contain(al, bl)+1;        printf("%d\n", res);    }}

这里写图片描述

小结：

个人觉得大多数kmp匹配问题似乎都可以用hash处理，而且hash似乎好写一些（较易理解），况且hash比kmp省了300ms。

0 0