Linux中自带正则表达式应用举例

来源:互联网 发布:java方法格式 编辑:程序博客网 时间:2024/05/23 16:29

 环境:Fedora12, C程序:

#include <stdio.h>#include <string.h>#include <sys/types.h>#include <regex.h>// 提取子串char* getsubstr(char *s, regmatch_t *pmatch){static char buf[100] = {0};memset(buf, 0, sizeof(buf));memcpy(buf, s+pmatch->rm_so, pmatch->rm_eo - pmatch->rm_so);return buf;}int main(int argc, char **argv){int status, i;int cflags = REG_EXTENDED;regmatch_t pmatch[5];const size_t nmatch = 5;regex_t reg;const char *pattern = "([A-Z]+)([a-z]+)ID[0-9]+@([a-z]+)\\.([a-z]+)";// 正则表达式char buf[] = "COMEdavID2012@gmail.com";// 待搜索的字符串regcomp(®, pattern, cflags);status = regexec(®, buf, nmatch, pmatch, 0);if(status == REG_NOMATCH)printf("No Match\n");else{printf("Match:\n");for(i = 0; i < nmatch; i++){if(pmatch[i].rm_so == -1)continue;char *p = getsubstr(buf, &pmatch[i]);printf("[%d, %d): %s\n", pmatch[i].rm_so, pmatch[i].rm_eo, p);}}regfree(®);return 0;}


编译运行:

[zcm@t #52]$makegcc    -c -o a.o a.cgcc  -o a a.o[zcm@t #53]$./aMatch:[0, 23): COMEdavID2012@gmail.com[0, 4): COME[4, 7): dav[14, 19): gmail[20, 23): com[zcm@t #54]$


注意

pmatch[0]用来匹配整个正则表达式

pmatch[1]用来匹配子模式1

pmatch[2]用来匹配子模式2

......

 

思考

所以如果想从待搜索的字符串中搜索出所有匹配的结果(假设大于1个),怎么办呢?------- 循环调用regexec,代码如下:

/*Posix正则表达式应用:循环调用regexec(),以获得多个匹配的结果*/#include <stdio.h>#include <string.h>#include <sys/types.h>#include <regex.h>// 提取子串char* getsubstr(char *s, regmatch_t *pmatch){static char buf[100] = {0};memset(buf, 0, sizeof(buf));memcpy(buf, s+pmatch->rm_so, pmatch->rm_eo - pmatch->rm_so);return buf;}int main(int argc, char **argv){int status, i;int cflags = REG_EXTENDED;regmatch_t pmatch[10];const size_t nmatch = 10;regex_t reg;//const char *pattern = "([A-Z]+)([a-z]+)(ID|DB)[0-9]+@([a-z]+)\\.([a-z]+)";// 正则表达式const char *pattern = "[[:upper:]]+([[:lower:]]+)";// 正则表达式char buf[] = "c COMEdavDB2012@gmail.com ZHOUcimingID2030@sohu.com";// 待搜索的字符串char *pSrc = buf, *p = NULL;int next = 0;int mCount = 1;// 匹配的次数int len = strlen(buf);regcomp(®, pattern, cflags);// 编译正则表达式do// 循环搜索匹配的结果{printf("pSrc = %s\n", pSrc);status = regexec(®, pSrc, nmatch, pmatch, 0);if(status == REG_NOMATCH)// 未找到匹配的结果{printf("No Match%d\n", mCount);break;}else{printf("Match%d:\n", mCount);for(i = 0; i < nmatch; i++)// 输出此次匹配的结果(包括子模式){if(pmatch[i].rm_so == -1)break;p = getsubstr(pSrc, &pmatch[i]);printf("pmatch[%d] = [%d, %d): %s\n", i, pmatch[i].rm_so, pmatch[i].rm_eo, p);}putchar('\n');pSrc = pSrc + pmatch[0].rm_eo;// 后移搜索的起始位置}mCount++;}while(pSrc < buf + len - 1);regfree(®);return 0;}
编译运行:

[zcm@t #157]$makegcc    -c -o a2.o a2.cgcc  -o a2 a2.o[zcm@t #158]$./a2pSrc = c COMEdavDB2012@gmail.com ZHOUcimingID2030@sohu.comMatch1:pmatch[0] = [2, 9): COMEdavpmatch[1] = [6, 9): davpSrc = DB2012@gmail.com ZHOUcimingID2030@sohu.comMatch2:pmatch[0] = [17, 27): ZHOUcimingpmatch[1] = [21, 27): cimingpSrc = ID2030@sohu.comNo Match3[zcm@t #159]$



原创粉丝点击