C语言POSIX正则表达式示例

来源:互联网 发布:喜马拉雅fm mac版 编辑:程序博客网 时间:2024/04/29 10:34


C语言的正则表达式具有效率高,编程容易的特点,其精华在于写出一个好的正则表达式规则。

示例如下。规则可以看我的其他博文。

下面是示例调试的时候,可以发现,其实有些问题,不能置零字符串尾,否则否内存错误。很奇怪,这是C的内存操作的奥秘啊。

#include <sys/types.h>#include <regex.h>#include <stdio.h>#include <stdlib.h>#include <string.h>int main(int argc, char ** argv){// !AIVDM,\.{20,80}\,.[*]    ^(hisencyber)(.com|.com.cn)// \{m,n\}   "!AIVDM,\..*\,0*"  "!AIVDM,\.*\,0*"   REG_NOSUB  child char check    char * pRegexStr = (char *)"!AIVDM,\.{1,72}\[*]";    char * pText_ ="!AIVDM,2,1,3,A,577K2:02;M1AI9HkP01DpUHE9<4j0lfo;33P000`40=664000422isC@,0*2F\!AIVDM,2,2,3,A,@h0000000000000,2*0F\!AIVDM,1,1,,B,9002=mQq1oIJvt6;2eUn>Sh0040<,0*5D\!AIVDM,1,1,,A,>5?Per18=HB1U:1@E=B0m<L,2*51\!AIVDM,1,1,,B,C5N3SRP0IFJKmE4:v6pBGw`@62PaLELTBJ:V00000000S0D:R220,0*3A\!AIVDM,2,1,5,B,E1mg=5J1T4W0h97aRh6ba84<h2d;W:Te=eLvH50```q,0*46\!AIVDM,2,2,5,B,:D44QDlp0C1DU00,2*36\!AIVDM,1,1,,B,15Mq4J0P01EREODRv4@74gv00HRq,0*88\!AIVDM,1,1,,A,;4eG>3iuaFP2:r3OiBH7;8i00000,0*65\!AIVDM,1,1,,A,B5N3SRP0FFJFC`4:CQDFKwiP200>,0*75\!AIVDM,1,1,,B,H5NHcTP<51@4TrM>10584@U<D000,2*77 ";    regex_t oRegex;    regmatch_t pmatch;    char *msg;    int nErrCode = 0 ; int len ; int start ;int end ;    char szErrMsg[1024] = {0};    size_t unErrMsgLen = 0;    char * pText = pText_ ;    if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)    {        while(regexec(&oRegex, pText, 1, &pmatch,0)==0)        {        len = pmatch.rm_eo-pmatch.rm_so+2 ;        start = pmatch.rm_so ;end =pmatch.rm_eo+2;msg=(char*)malloc((len)*(sizeof(char)));memcpy(msg,&pText[start],len);msg[(end)]=(char)NULL ;printf("%s \n",msg);pText = &pText[end+1] ;        }        regfree(&oRegex);    }    unErrMsgLen = regerror(nErrCode, &oRegex, szErrMsg, sizeof(szErrMsg));    unErrMsgLen = unErrMsgLen < sizeof(szErrMsg) ? unErrMsgLen : sizeof(szErrMsg) - 1;    szErrMsg[unErrMsgLen] = '\0';    printf("ErrMsg: %s\n", szErrMsg);    regfree(&oRegex);    return 1;}

将之作一个链表的数据结构组包,增强程序的耦合性。

数据结构的示例可以看博文:http://blog.csdn.net/luopeiyuan1990/article/details/8255632

void Regex_MSG(pRet_Regex pret_regex,char *Regex_rule,char *pText_){pRet_Regex pret_regex_ = pret_regex ;char *pRegexStr = Regex_rule ;char * pText = pText_ ;regex_t oRegex;regmatch_t pmatch;int nErrCode = 0 ; int len ; int start ;int end ;char szErrMsg[1024] = {0};size_t unErrMsgLen = 0;if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0){while(regexec(&oRegex, pText, 1, &pmatch,0)==0){pRet_Regex pNode =Before_Null_Node(pret_regex);//pRet_Regex pNode = CreateNode();AddNode(pNode,pNode->next);len = pmatch.rm_eo-pmatch.rm_so+2 ;start = pmatch.rm_so ;end =pmatch.rm_eo+2;pNode->next->Data=(char*)malloc((len)*(sizeof(char)));memcpy(pNode->next->Data,&pText[start],len);pNode->next->Data[(end)]=(char)NULL ;//printf("%s \n",pNode->next->Data);pText = &pText[end+1] ;}regfree(&oRegex);PrintList(pret_regex);}unErrMsgLen = regerror(nErrCode, &oRegex, szErrMsg, sizeof(szErrMsg));unErrMsgLen = unErrMsgLen < sizeof(szErrMsg) ? unErrMsgLen : sizeof(szErrMsg) - 1;szErrMsg[unErrMsgLen] = '\0';printf("ErrMsg: %s\n", szErrMsg);regfree(&oRegex);}


最后分拆为两个文件:

#include <sys/types.h>#include "Eregex.hpp"int main(int argc, char ** argv){// !AIVDM,\.{20,80}\,.[*]    ^(hisencyber)(.com|.com.cn)// \{m,n\}   "!AIVDM,\..*\,0*"  "!AIVDM,\.*\,0*"   REG_NOSUB  child char check    //char * pRegexStr = (char *)"!AIVDM,\.{1,72}\[*]";    char * pText_ =(char*)"!AIVDM,2,1,3,A,577K2:02;M1AI9HkP01DpUHE9<4j0lfo;33P000`40=664000422isC@,0*2F\!AIVDM,2,2,3,A,@h0000000000000,2*0F\!AIVDM,1,1,,B,9002=mQq1oIJvt6;2eUn>Sh0040<,0*5D\!AIVDM,1,1,,A,>5?Per18=HB1U:1@E=B0m<L,2*51\!AIVDM,1,1,,B,C5N3SRP0IFJKmE4:v6pBGw`@62PaLELTBJ:V00000000S0D:R220,0*3A\!AIVDM,2,1,5,B,E1mg=5J1T4W0h97aRh6ba84<h2d;W:Te=eLvH50```q,0*46\!AIVDM,2,2,5,B,:D44QDlp0C1DU00,2*36\!AIVDM,1,1,,B,15Mq4J0P01EREODRv4@74gv00HRq,0*88\!AIVDM,1,1,,A,;4eG>3iuaFP2:r3OiBH7;8i00000,0*65\!AIVDM,1,1,,A,B5N3SRP0FFJFC`4:CQDFKwiP200>,0*75\!AIVDM,1,1,,B,H5NHcTP<51@4TrM>10584@U<D000,2*77 ";pRet_Regex pret_regex;pret_regex = (pRet_Regex)malloc(sizeof(Ret_Regex)); pret_regex->next =NULL;//!AIVDM,\.{1,72}\[*]Regex_MSG(NULL,(char*)"!AIVDM,\.*\[*]",pText_);}

另外一个是hpp文件,但是使用的时候最好写到c文件中

#include <stdio.h>#include <regex.h>#include <stdio.h>#include <stdlib.h>#include <string.h>typedef struct Ret_Regex{char *Data;unsigned char length;Ret_Regex *next;}Ret_Regex,*pRet_Regex ;#define Linkpointer pRet_Regex#define LinkEntity Ret_Regexvoid Entity_entity(Linkpointer Node){//Node->Data = readyplaindata.length ;//Node->Data = readyplaindata.Data ;}/* first is messy print */void Entity_print(Linkpointer Node){printf("%s \n",Node->Data);}void PrintList(Linkpointer phead){Linkpointer p = phead ;while(p!=NULL){Entity_print(p);p = p->next ;}}/* Length 1 no data only head */int GetLength(Linkpointer phead){Linkpointer p = phead ; int Length=0 ;while(p!=NULL){Length ++ ;p = p->next ;}return Length ;}Linkpointer Before_Null_Node(Linkpointer phead){Linkpointer p = phead ;while(p->next!=NULL){p=p->next ;}return p ;}/* Create,return Node add */Linkpointer CreateNode(){Linkpointer pNode = (Linkpointer)malloc(sizeof(LinkEntity));if(pNode==NULL){printf("Node malloc error \n");}Entity_entity(pNode);pNode->next = NULL ;return pNode ;}/* Add Node between */void AddNode(Linkpointer pNode,Linkpointer pNode2){Linkpointer pNew = CreateNode();pNode->next = pNew ;pNew->next = pNode2 ;}/* Delete next Node */bool DeleteNode(Linkpointer pNode){Linkpointer pDel = pNode->next ;if(pDel==NULL){printf(" No Node to Delete ");return 0 ;}pNode->next = pDel->next ;pDel->next = NULL ;free(pDel);return 1 ;}/************************************************************** *  Two Mode *  1. find str meets Regex_rule , fill it into linklist *  (linklist must initialis first) *  (Head of pText will change) *  2. find first str meets Regex_rule , return it *  (Head of pText will change) *  pText is not freed , free yourself. * ***************************************************************/char* Regex_MSG(pRet_Regex pret_regex_,char *Regex_rule,char *pText_){pRet_Regex pret_regex = pret_regex_ ;char *pRegexStr = Regex_rule ;char * pText = pText_ ;regex_t oRegex;regmatch_t pmatch;int nErrCode = 0 ; int len ; int start ;int end ;char szErrMsg[1024] = {0};size_t unErrMsgLen = 0;if(pret_regex!=NULL){if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0){while(regexec(&oRegex, pText, 1, &pmatch,0)==0){pRet_Regex pNode =Before_Null_Node(pret_regex);//pRet_Regex pNode = CreateNode();AddNode(pNode,pNode->next);len = pmatch.rm_eo-pmatch.rm_so+2 ;start = pmatch.rm_so ;end =pmatch.rm_eo+2;pNode->next->Data=(char*)malloc((len)*(sizeof(char)));memcpy(pNode->next->Data,&pText[start],len);pNode->next->Data[(end)]=(char)NULL ;//printf("%s \n",pNode->next->Data);pText = &pText[end+1] ;}regfree(&oRegex);PrintList(pret_regex);}}else{char *OutChar ;if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0){while(regexec(&oRegex, pText, 1, &pmatch,0)==0){len = pmatch.rm_eo-pmatch.rm_so+2 ;start = pmatch.rm_so ;end =pmatch.rm_eo+2;OutChar=(char*)malloc((len)*(sizeof(char)));memcpy(OutChar,&pText[start],len);OutChar[(end)]=(char)NULL ;printf("Out char is %s \n",OutChar);pText = &pText[end+1] ;return OutChar ;}}}unErrMsgLen = regerror(nErrCode, &oRegex, szErrMsg, sizeof(szErrMsg));unErrMsgLen = unErrMsgLen < sizeof(szErrMsg) ? unErrMsgLen : sizeof(szErrMsg) - 1;szErrMsg[unErrMsgLen] = '\0';printf("ErrMsg: %s\n", szErrMsg);regfree(&oRegex);return NULL ;}


bug修改如下:


char* Regex_MSG(pRet_Regex pret_regex_,char *Regex_rule,char *pText_,pTAisRawData pAisRawData_,int FrontShifting,int BackShifting){    pRet_Regex pret_regex = pret_regex_ ;    pTAisRawData p = pAisRawData_ ;    char *pRegexStr = Regex_rule ;    char * pText = pText_ ;    char *OutChar ;    regex_t oRegex;    regmatch_t pmatch;    int nErrCode = 0 ; int len ; int start ;int end ;    char szErrMsg[1024] = {0};    size_t unErrMsgLen = 0;    if(pret_regex!=NULL)    {        if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)        {            while(regexec(&oRegex, pText, 1, &pmatch,0)==0)            {                pRet_Regex pNode =Ret_Regex_Before_Null_Node(pret_regex);                //pRet_Regex pNode = CreateNode();                Ret_Regex_AddNode(pNode,pNode->next);                len = pmatch.rm_eo-pmatch.rm_so+BackShifting ;                start = pmatch.rm_so +FrontShifting;                end =pmatch.rm_eo+BackShifting;                pNode->next->Data=(char*)malloc((len)*(sizeof(char)));                memcpy(pNode->next->Data,&pText[start],len);                //pNode->next->Data[(end)]=(char)NULL ;                //printf("%s \n",pNode->next->Data);                pText = &pText[end] ;            }            Ret_Regex_PrintList(pret_regex);        }    }    else{        if ((nErrCode = regcomp(&oRegex, pRegexStr,REG_EXTENDED)) == 0)        {            while(regexec(&oRegex, pText, 1, &pmatch,0)==0)            {                len = pmatch.rm_eo-pmatch.rm_so+BackShifting ;                start = pmatch.rm_so +FrontShifting;end =pmatch.rm_eo+FrontShifting;                OutChar=(char*)malloc((len)*(sizeof(char)));                memcpy(OutChar,&pText[start],len);                //OutChar[(end)]=(char)NULL ;                printf("Out char is %s \n",OutChar);                pText = &pText[end] ;            }            if(p!=NULL)            {                free(p->Data);                p->Data=strdup(pText);            }        }    }    unErrMsgLen = regerror(nErrCode, &oRegex, szErrMsg, sizeof(szErrMsg));    unErrMsgLen = unErrMsgLen < sizeof(szErrMsg) ? unErrMsgLen : sizeof(szErrMsg) - 1;    szErrMsg[unErrMsgLen] = '\0';    printf("ErrMsg: %s\n", szErrMsg);    regfree(&oRegex);    return OutChar ;}