几种字符串匹配算法性能简单实验对比
来源:互联网 发布:gta5淘宝刷钱 编辑:程序博客网 时间:2024/06/04 23:36
做了一个很粗糙的实验,比较了几种字符串匹配算法的性能。程序用-O3进行编译优化。以下为待查找的文本长度为434018字节,模式串长度为4时的典型实验结果。可以看到,horspool算法最快,表现最差的为KMP系的shift_and算法(实验结果与《柔性字符串匹配》一书中的结果一致)。以下为horspool,shift_and和BNDM算法的实验源码:
strstr(C库函数) time:743 微秒
horspool: time:642 微秒
shift_and: time:1465 微秒
BNDM: time:721 微秒
horspool: time:642 微秒
shift_and: time:1465 微秒
BNDM: time:721 微秒
// horspool算法:计算模式串pat在文本txt中出现的次数
int horspool(const char *txt,const char *pat)
...{
short d[256];
short m = strlen(pat); /**//**< m is the length of pat */
// preprocessing
for(unsigned short c = 0; c < 256; c++)
d[c] = m;
for(short i = 0; i < m-1; i++)...{
d[(unsigned char)pat[i]] = m - i - 1;
}
// searching
const char *p = txt; /**//**< current pointer */
const char *t = txt + strlen(txt) - m;
int cnt = 0; /**//**< the exist times of pat in txt */
int jj = m-1;
while(p <= t)...{
int j = jj;
while(j >= 0 && pat[j] == p[j])
j--;
if(j == -1)
cnt++;
p += d[(unsigned char)p[m-1]];
}
return cnt;
}
...{
short d[256];
short m = strlen(pat); /**//**< m is the length of pat */
// preprocessing
for(unsigned short c = 0; c < 256; c++)
d[c] = m;
for(short i = 0; i < m-1; i++)...{
d[(unsigned char)pat[i]] = m - i - 1;
}
// searching
const char *p = txt; /**//**< current pointer */
const char *t = txt + strlen(txt) - m;
int cnt = 0; /**//**< the exist times of pat in txt */
int jj = m-1;
while(p <= t)...{
int j = jj;
while(j >= 0 && pat[j] == p[j])
j--;
if(j == -1)
cnt++;
p += d[(unsigned char)p[m-1]];
}
return cnt;
}
// Shift_And算法:计算模式串pat在文本txt中出现的次数
int shift_and(const char *txt, const char *pat)
...{
long b[256];
int m = strlen(pat);
for(int i = 0; i < 256; i++)
b[i] = 0;
for(int i = 0; i < m; i++)
b[(unsigned char)pat[i]] |= (0x1 << i);
int cnt = 0;
long d = 0;
const char *s = txt;
const char *end = txt + strlen(txt);
long mask = 0x1<<m-1;
while(s < end)...{
d = ((d<<1) | 0x1) & b[(unsigned char)*s];
if(d & mask)
cnt ++;
s++;
}
return cnt;
}
...{
long b[256];
int m = strlen(pat);
for(int i = 0; i < 256; i++)
b[i] = 0;
for(int i = 0; i < m; i++)
b[(unsigned char)pat[i]] |= (0x1 << i);
int cnt = 0;
long d = 0;
const char *s = txt;
const char *end = txt + strlen(txt);
long mask = 0x1<<m-1;
while(s < end)...{
d = ((d<<1) | 0x1) & b[(unsigned char)*s];
if(d & mask)
cnt ++;
s++;
}
return cnt;
}
// BNDM算法:计算模式串pat在文本txt中出现的次数
int BNDM(const char *txt, const char *pat)
...{
long b[256];
int m = strlen(pat);
for(int i = 0; i < 256; i++)
b[i] = 0;
for(int i = 0; i < m; i++)
b[(unsigned char)pat[i]] |= (0x1 << (m-i-1));
const char *limit = txt + strlen(txt) - m;
const char *s = txt;
int cnt = 0;
long mask = 0x1 << (m-1);
while(s <= limit)...{
int j = m-1;
int last = m-1;
long d = -1;
while(d != 0)...{
d &= b[(unsigned char)s[j]];
j--;
if(d & mask)...{
if(j >= 0)
last = j;
else
cnt++;
}
d <<= 1;
}
s += last+1;
}
return cnt;
}
...{
long b[256];
int m = strlen(pat);
for(int i = 0; i < 256; i++)
b[i] = 0;
for(int i = 0; i < m; i++)
b[(unsigned char)pat[i]] |= (0x1 << (m-i-1));
const char *limit = txt + strlen(txt) - m;
const char *s = txt;
int cnt = 0;
long mask = 0x1 << (m-1);
while(s <= limit)...{
int j = m-1;
int last = m-1;
long d = -1;
while(d != 0)...{
d &= b[(unsigned char)s[j]];
j--;
if(d & mask)...{
if(j >= 0)
last = j;
else
cnt++;
}
d <<= 1;
}
s += last+1;
}
return cnt;
}
- 几种字符串匹配算法性能简单实验对比
- 几种常见的字符串匹配算法
- 字符串简单匹配算法
- 简单的字符串匹配算法
- 简单的字符串匹配算法
- 几种字符串匹配算法,我基本上没有听说过
- 字符串匹配的几种算法总结(KMP、等)
- BF,KMP,BM三种字符串匹配算法性能比较
- [算法]两种字符串匹配算法(索引法,KMP算法)对比,C语言实现
- 判断字符串为空方法性能对比实验
- 字符串匹配算法 BF算法或者简单匹配
- 几种文字匹配算法
- 几种流行Webservice框架性能对比
- 几种流行Webservice框架性能对比
- 几种流行Webservice框架性能对比
- 几种流行Webservice框架性能对比 .
- 几种流行Webservice框架性能对比
- 几种流行Webservice框架性能对比
- Mac OSX Leopard 中 终端java乱码的解决方法
- 删子串
- 普普通通的一天过去了
- 谈谈数独(Sudoku)
- device_register()和platform_device_register()的区别(转载)
- 几种字符串匹配算法性能简单实验对比
- 系统分析师--结构化分析与设计
- 数据库系统基础知识--存档
- 程序语言基础知识--存档
- 道真正的智力题吧,据说是世界上目前最好的智力题目
- NGINX总结
- 每天学多一点flash(37) 制作走马灯一
- = =该死的圣战日也不得安宁啊...QUSI!!!!
- Using Oracle's Parallel Execution Features