带通配符*和?的kmp

来源:互联网 发布:node 获取git版本号 编辑:程序博客网 时间:2024/04/28 21:46

Wildcard

TimeLimit: 2000/1000 MS (Java/Others) Memory Limit: 125536/65536 K(Java/Others)
Total Submission(s): 563 Accepted Submission(s):135


Problem Description
When specifying file names (or paths) inDOS, Microsoft Windows and Unix-like operating systems,theasterisk character (“*") substitutes for any zero or morecharacters, and thequestion mark (“?")substitutes for any one character.
Now give you a text and a pattern, you should judge whether thepattern matches the text or not.

Input
There are several cases. For each case,only two lines. The first line contains a text, which contains onlylower letters. The last line contains a pattern, which consists oflower letters and the two wildcards (“*", "?").
The text is a non-empty string and its size is less than 100,000,so do the pattern.

We ensure the number of “?” and the number of “*” in the patternare both no more than 10.

Output
Output “YES” if the pattern matches thetext, otherwise “NO”.

Sample Input
abcdefa*c*f

Sample Output
YES

Source
2011 Multi-University Training Contest 7 - Host by ECNU

HDU 3901 Wildcard

题目大意:


给两个长度不超过100000的字符串, 一个是带有通配符?和*的模式串, 问能否匹配. 通配符不超过10个.
 
 

这题弄了差不多一天= =···不过还算是有收获吧!

方法是我自己YY出来的,代码老长==···希望能有更好的方法,忘各位神牛指教!!
 
我先把字符串处理成s1-s2-s3这样,就是两头没有'?'和'*'(其中'*'直接忽视,计算'?'的数目,然后对文本串减去该数目的长度),一以方便下面的处理。
然后分别对子串进行kmp,找出所有匹配。
再然后对'?'两边进行合并,是匹配串都是s1*s2*s3这样,然后就可以进行贪心了。
不过具体的实现我觉得有挺多细节的···具体代码有详细注释:

#include <cstdio>#include <cstring>static const int maxn = 100100;//这些数组下标为0处用来表示数组长度//前面四个都是用来储存子串的起点or终点的static int start[25][maxn], end[25][maxn], s[25][maxn], e[25][maxn], temp1[maxn], temp2[maxn];//front和back表示当前匹配的子串前or后紧接着的'?'个数,然后会加到start or end数组中int front, back;int fail[maxn];void kmp(char* str, char* pat, int num){    int i, j, k;    memset(fail, -1, sizeof(fail));    start[num][0] = 0;    end[num][0] = 0;    for( i = 1; pat[i]; ++i )    {        for( k = fail[i-1]; k >= 0 && pat[i] - pat[k+1]; k = fail[k] );        if( pat[k+1] == pat[i] )            fail[i] = k + 1;    }    i = j = 0;    while( str[i] && pat[j] )    {        if( str[i] == pat[j] )        {            ++i, ++j;            if( pat[j] == '\0' )            {                start[num][++start[num][0]] = i - j - front;                end[num][++end[num][0]] = i - 1 + back;                j = fail[j-1] + 1;            }        }        else if( j == 0 ) ++i;        else j = fail[j-1] + 1;    }    front = back = 0;}static char str[maxn], p[maxn], t[maxn];//st表示的是该间断点的状态,1代表'?',0是'*'static bool st[maxn];int main(){    int i, j, k, ll, rr, mid;    int len, cnt1, cnt2;    bool ok;    while( scanf("%s %s", str, p) != EOF )    {        ok = 1;        front = back = 0;        //先处理一下p的头尾,并相应的对str做出改变        //使之变成统一的s1-s2-s3-s4形式,两头没有'?'or'*',方便下面的处理        len = strlen(p);        for( i = cnt1 = 0; i < len && (p[i] == '*' || p[i] == '?'); ++i )            if( p[i] == '?' )                ++cnt1;        for( j = len-1, cnt2 = 0; j >= 0 && (p[j] == '*' || p[j] == '?'); --j )            if( p[j] == '?' )                ++cnt2;        if( i > j )        {            if( cnt1 <= strlen(str) )                    printf("YES 0 0\n");            else                    printf("NO\n");            continue;        }        p[j+1] = '\0';    str[strlen(str)-cnt2] = '\0';        strcpy(p, p+i);        strcpy(str, str+cnt1);        //对每个子串求它的匹配        len = strlen(p);        for( i = j = k = 0; i < len; ++i )        {            if( p[i] == '*' )            {                if( !j )                    continue;                t[j] = '\0';                st[k] = 0;                kmp(str, t, k++);                j = 0;            }            else if( p[i] == '?' )            {                if( !j )                {                    //例子: aa*??b                    //??应该算到b中,就是后一个子串                    front = 1;                    while( p[i+1] == '?' )                    {                        ++front;                            ++i;                    }                    continue;                }                back = 1;                while( p[i+1] == '?' )                {                    //例子: aa???                    ++back;                    ++i;                }                //例子: aa???*b 与 aa???b                st[k] = p[i+1] == '*' ? 0 : 1;                t[j] = '\0';                kmp(str, t, k++);                j = 0;            }            else                t[j++] = p[i];        }        t[j] = '\0';        kmp(str, t, k);        //检查每个子串是否都出现了        for( i = 0; ok && i <= k; ++i )    if( !start[i][0] )ok = 0;        //对状态为1,就是'?'两边的子串合并,使p串成为统一由'*'分隔的,以进行贪心        if( ok )        {            len = 0;            for( i = 0; ok && i < k; ++i )            {                //临时数组,储存合并后的始点和终点                temp1[0] = temp2[0] = 0;                if( st[i] )                {                    //枚举左边的子串的始点,然后对右边的终点进行二分查找                    for( j = 1; j <= end[i][0]; ++j )                    {                        ll = 1, rr = start[i+1][0]+1;                        while( rr - ll > 1 )                        {                            mid = (ll+rr)/2;                            if( start[i+1][mid] <= end[i][j] + 1 )                                ll = mid;                            else                                rr = mid;                        }                        if( start[i+1][ll] == end[i][j] + 1 )                        {                            temp1[++temp1[0]] = start[i][j];                            temp2[++temp2[0]] = end[i+1][ll];                        }                    }                    //找不到符合的,匹配失败                    if( !temp1[0] )                        ok = 0;                    else                    {                        //把temp中的复制去i+1                        memcpy(start[i+1], temp1, sizeof(temp1));                        memcpy(end[i+1], temp2, sizeof(temp2));                    }                }                else                {                    for( s[len][0] = e[len][0] = 0, j = 1; j <= end[i][0]; ++j )                    {                        //s, e储存合并后的子串                        s[len][++s[len][0]] = start[i][j];                        e[len][++e[len][0]] = end[i][j];                    }                    ++len;                }            }            //不要忘了最后一个            for( s[len][0] = e[len][0] = 0, j = 1; j <= end[k][0]; ++j )            {                s[len][++s[len][0]] = start[k][j];                e[len][++e[len][0]] = end[k][j];            }            ++len;        }        //贪心验证        if( ok )        {            k = -1;            for( i = 0; ok && i < len; ++i )            {                for( j = 1; j <= s[i][0]; ++j )    if( s[i][j] > k ){k = e[i][j]; // 取第一种结果,即匹配的主串片断中长度最小的break;}                if( j > s[i][0] )                    ok = 0;            }        }        if( ok ){            printf("(suffix = include ? : not *) \nYES\nfront = %d\nback = k + cnt1 + cnt2= %d\ns=\n", s[0][1], k + cnt1 + cnt2);for (int i = 0; i < len; i++){printf("%3d: ", s[i][0]);for(int j = 1; j <= s[i][0]; j++)printf("%3d ", s[i][j]);printf("\n");}printf("YES k = %d cnt1 = %d cnt2 = %d\ne=\n", k, cnt1, cnt2);for ( i = 0; i < len; i++){printf("%3d: ", e[i][0]);for(int j = 1; j <= e[i][0]; j++)printf("%3d ", e[i][j]);printf("\n");}}        else            printf("NO k = %d\n", k);printf("///////////////////////////////////////////////////////////\n");    }    return 0;}


再给出几组我debug中比较有价值的数据:

abababcdababcdecdabefcdabef
*ab??cd??ef*

 

abcdebcdde
*abcd?e*

 

babbbabab
ab?b?bab

 

abcdef
a*b*c*d*e*f

 

isdjkasd
i*s*d*j*k*a*s*d

 

hellokugou
hello*??gou

 

dfjijijiugnmlok
??*f?ij*ug?ml?k

 

dfjijijiugnmlok
?*f?ij*ug?ml?k

 

abcdefghijklmnopqrstuvwxyz
ab*?*ef?h?jk*qr??*u??x?z

 

sodfmkkoasa
s?df?k?o?*a

 

sodfmkkoas
s?df?k?o?*

求更简单的方法···


http://blog.sina.com.cn/s/blog_7da04dd30100vlcp.html


0 0
原创粉丝点击