linux kernel data struct: KMP算法实现

来源:互联网 发布:淘宝网集市商家热线 编辑:程序博客网 时间:2024/05/17 04:17

   KMP算法被三位外国人所发现,其实是利用“欲搜索”字符串内部的特征,避免两个字符串的回溯,从而把字符串模式匹配的时间复杂度从O(m*n)降低到O(m+n)。

    这个算法的核心在于理解和计算next数组。具体实现如下:

 

#include <stdio.h>

/**
return where different.
return 0 means equal.
*/
int is_2str_diff(char *s1, char *s2, int count)
{
    int i;
    printf("is 2str diff: %c & %c count: %d\n", *s1, *s2, count);
    for(i = 0; i < count; i++)
    {
        if(*(s1+i) != *(s2+i)) {printf("return :%d\n", i+1); return i+1;}
    }
   
    {printf("return 0\n"); return 0;}
}

int calc_next(char *s, char *next)
{
    int len = strlen(s);
    int i,j;
    int diff_pos = 0;
   
    //how to calculate next
    *(next + 0) = -1;
    *(next + 1) = 0;
   
    for(i = 2; i < len; i++)
    {
          for(j = i/2; j >= 1; j--)
          {
               printf("compare %c and %c, count is:%d\n",*s,*(s+i-j),j);
               diff_pos = is_2str_diff(s, s+i-j, j);
               if(!diff_pos) break;
          }
         
          *(next+i) = j;     
    }
   
    return 0;
}

 

/*
 return -1: not contain,
 else return the position where match.
*/
int kmp_compare(char *s1, char *s2, int s1_len, int s2_len, char *next)
{
    int i=0 , j=0;
    int diff_pos = 0;
    int from_start = 1;
   
    while(i < s1_len)
    {
        printf("i = %d , j = %d\n", i,j);
       
        if(from_start && s1[i] != s2[0])
        {
            i++;
            j = 0;
        }
        else
        {
          if(-1 == *(next+j) || s1[i] == s2[j])
          {
                from_start = 0;
                i++;
                j++;
          }
          else
          {
                from_start = 1;
                j = *(next+j);
          }
        }
          if(j == s2_len) return i-j;
    }
   
    return -1;
}

int main(void)
{
    char *s1 = "abcdefabcabcabcdefeam";
    char *s2 = "abcabc";
   
    int str1_len = strlen(s1);
    int str2_len = strlen(s2);
   
    char next[str2_len];
   
    int  i;
    int res = 0;
   
    calc_next(s2, next);

    printf("%s\n%s\n calc_next get\n", s1, s2);
    for(i = 0; i < str2_len; i++)
    printf(" %d", next[i]);
    printf("\n");
   
    res = kmp_compare(s1, s2, str1_len, str2_len, next);
    printf("result is: %d\n",res);
   
    system("pause");
}