glibc 系列之strlen()函数
来源:互联网 发布:域名转国外注册商流程 编辑:程序博客网 时间:2024/05/18 14:25
glibc系列之strlen()函数学习。
首先奉上的是strlen的源代码:
size_tSTRLEN (const char *str) 可以看到返回值是个size_t,输入参数是char型数组。{ const char *char_ptr; const unsigned long int *longword_ptr; unsigned long int longword, himagic, lomagic; /* Handle the first few characters by reading one character at a time. Do this until CHAR_PTR is aligned on a longword boundary. */ for (char_ptr = str; ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0; 这里在做的是字节对齐。 ++char_ptr) if (*char_ptr == '\0') return char_ptr - str; /* All these elucidatory comments refer to 4-byte longwords, but the theory applies equally well to 8-byte longwords. */ longword_ptr = (unsigned long int *) char_ptr; /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits the "holes." Note that there is a hole just to the left of each byte, with an extra at the end: bits: 01111110 11111110 11111110 11111111 bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD The 1-bits make sure that carries propagate to the next 0-bit. The 0-bits provide holes for carries to fall into. */ himagic = 0x80808080L; 这两个魔数很重要。 lomagic = 0x01010101L; if (sizeof (longword) > 4) { /* 64-bit version of the magic. */ /* Do the shift in two steps to avoid a warning if long has 32 bits. */ himagic = ((himagic << 16) << 16) | himagic; lomagic = ((lomagic << 16) << 16) | lomagic; } if (sizeof (longword) > 8) abort (); /* Instead of the traditional loop which tests each character, we will test a longword at a time. The tricky part is testing if *any of the four* bytes in the longword in question are zero. */ for (;;) { longword = *longword_ptr++; if (((longword - lomagic) & ~longword & himagic) != 0) 主要是判断这连续的四个字节有没有0x0,如果有0,那么进入到if语句中, { /* Which of the bytes was the zero? If none of them were, it was a misfire; continue the search. */ const char *cp = (const char *) (longword_ptr - 1); if (cp[0] == 0) return cp - str; if (cp[1] == 0) return cp - str + 1; if (cp[2] == 0) return cp - str + 2; if (cp[3] == 0) return cp - str + 3; if (sizeof (longword) > 4) { if (cp[4] == 0) return cp - str + 4; if (cp[5] == 0) return cp - str + 5; if (cp[6] == 0) return cp - str + 6; if (cp[7] == 0) return cp - str + 7; } } }}
下面是代码解析
首先给出俩例子来形象的说明这个问题。
1
himagic = 0x80808080L; 1000-0000 1000-0000 1000-0000 1000-0000
lomagic = 0x01010101L; 0000-0001 0000-0001 0000-0001 0000-0001
((longword - lomagic) & ~longword & himagic) != 0
举个例子来说明这个问题:
longword = 000000ff
a =(longword - lomagic) =fefefffe
b = ~longword = ffffff00
a&b = fefeff00
himagic = 80808080
a&b&himagic 80808000
2
另一个例子: longword =00000000
a =(longword - lomagic) =fefefeff
b = ~longword =ffffffff
a&b = fefefeff
himagic = 80808080
a&b&himagic = 80808080
其实最主要的是if (((longword - lomagic) & ~longword & himagic) != 0)
怎么理解的。
首先考虑到的是ASCII码最高位是是0,
~longword & himagic
运算的结果就是看看哪个字节的最高位不是0.如果都是0 ,则运算结果是0x80808080.
longword - lomagic 运算结果是一旦longword 有一个字节为0,那么久会是0xfe。也就是找到了‘\0’,进入了if体里。
下面是我自己写的strlen()函数。
#define ulint unsigned long int //宏定义ulintunsigned int mglStrLen(const char * str){ //allign const char *tempStr ; for (tempStr = str; (ulint)tempStr &(sizeof(ulint) - 1) != 0; tempStr++) { if (*tempStr == '\0') return tempStr - str; } //caculate the length ulint himagic = 0x80808080; ulint lomagic = 0x01010101; const ulint* longwordPtr = ( const ulint *)tempStr; //这里也要注意对const char* 强制类型转化成const ulint*时也需要( const ulint *) 。这里的const修饰很有意思。 while (1) { longwordPtr++; if (((*longwordPtr - lomagic)&~(*longwordPtr)&himagic) != 0) //跟glibc一样的处理方法。 { if (*(char *)longwordPtr == 0) //这里要注意的是longwordPtr 指向的int型,对于单个的字节比对,需要(char*)强制类型转换。 return (char *)longwordPtr - str; //这里的强制类型转化是因为str是const char* ,他们要一样。 if (*((char *)longwordPtr+1) == 0) return (char *)longwordPtr - str+1; if (*((char *)longwordPtr + 2) == 0) return (char *)longwordPtr - str + 2; if (*((char *)longwordPtr + 3) == 0) return (char *)longwordPtr - str + 3; } }}void main(){ char str[] = "mglhahaahhahah"; ---->14个字节 int len = mglStrLen(str); cout << "mgl " <<len<< endl;}
程序运行结果:
mgl 14请按任意键继续. . .
一个属于mgl的完美unsigned int strlen(const char * str){}函数。
哈哈!!
- glibc 系列之strlen()函数
- glibc源码分析之stat系列函数
- glibc源码分析之stat64系列函数
- glibc源码分析之chmod系列函数
- glibc源码分析之truncate系列函数
- glibc源码分析之chown系列函数
- glibc源码分析之utime系列函数
- glibc源码分析之statfs系列函数
- strlen glibc 源码分析
- glibc strlen 实现
- GLIBC strlen源代码分析
- glibc -- strlen源码分析
- glibc笔记——strlen
- C语言之strlen函数
- C语言之strlen函数
- strlen()函数
- strlen()函数
- glibc源码分析之access函数
- 使用CSS美化Chrome下的滚动条样式
- 【分析】Ceph通用模块--线程池
- JAVA 攻城狮 第十二天
- maven实战(二)maven的依赖
- vim中常用基本命令
- glibc 系列之strlen()函数
- js基础
- 工厂方法模式
- 神经机器翻译(Neural Machine Translation)系列教程
- web测试要点
- java-web知识点
- JavaScript初识和简单的方法调用 7.10
- 安装Linux后我做的几件事
- 【C语言】字符类型