标准C++以及MFC6.0字符串的tokenize和split函数

来源:互联网 发布:mac怎么把图标放到桌面 编辑:程序博客网 时间:2024/06/06 01:15

标准C++字符串string以及MFC6.0字符串CString的tokenize和split函数。1、标准串的:/********************************************  the tokenize function for std::string*********************************************/#include <string>#include <vector>#include <iostream>using namespace std;typedef basic_string<char>::size_type S_T;static const S_T npos = -1;////trim指示是否保留空串,默认为保留。vector<string> tokenize(const string& src, string tok,  bool trim=false, string null_subst=""){ if( src.empty() || tok.empty() ) throw "tokenize: empty string/0";  vector<string> v; S_T pre_index = 0, index = 0, len = 0; while( (index = src.find_first_of(tok, pre_index)) != npos ) {  if( (len = index-pre_index)!=0 )   v.push_back(src.substr(pre_index, len));  else if(trim==false)   v.push_back(null_subst);  pre_index = index+1; } string endstr = src.substr(pre_index); if( trim==false ) v.push_back( endstr.empty()? null_subst:endstr ); else if( !endstr.empty() ) v.push_back(endstr); return v;}////使用一个完整的串delimit(而不是其中的某个字符)来分割src串,没有trim选项,即严格分割。vector<string> split(const string& src, string delimit, string null_subst=""){ if( src.empty() || delimit.empty() ) throw "split: empty string/0"; vector<string> v; S_T deli_len = delimit.size(); long index = npos, last_search_position = 0; while( (index=src.find(delimit, last_search_position))!=npos ) {  if(index==last_search_position)   v.push_back(null_subst);  else   v.push_back( src.substr(last_search_position, index-last_search_position) );  last_search_position = index + deli_len; } string last_one = src.substr(last_search_position); v.push_back( last_one.empty()? null_subst:last_one ); return v;}// testint main(void){ string src = ",ab,cde;,,fg,," ; string tok = ",;" ; vector<string> v1 = tokenize(src, tok ,true); vector<string> v2 = tokenize(src, tok ,false, "<null>"); cout<<"-------------v1:"<<endl; for(int i=0; i<v1.size();i++) {  cout<<v1[i].c_str()<<endl; }  cout<<"-------------v2:"<<endl; for(int j=0; j<v2.size();j++) {  cout<<v2[j].c_str()<<endl; } try{   string s = "######123#4###56########789###";  string del = "";//"###";  vector<string> v3 = split(s, del, "<null>");  cout<<"-------------v3:"<<endl;  for(int k=0; k<v3.size();k++)  {   cout<<v3[k].c_str()<<endl;  } } catch (char *s) {  cout<<s<<endl; } return 0;}2、CString版的:#include <stdio.h>#include <afx.h>/* * 该函数用delimits里的字符拆分s,传出一个CStringList指针pList, * 若trim为真,则不保留分割后的空串(注意不是空白字符)。比如: * Tokenize( "a,bc;,d,", ",;", &out_list, TRUE) * 会返回3个串:a、bc、d。 * 若trim为FALSE,则用nullSubst用来替代分割后的空串,比如: *  Tokenize( "a,bc;,d;", ",;", &out_list, FALSE,"[null]" ) * 会返回5个串:a、bc、[null]、d、[null]。 * trim默认为FALSE,nullSubst默认为空串。 */void Tokenize(CString s, CString delimits, CStringList* pList, BOOL trim=FALSE, CString nullSubst=""){ ASSERT( !s.IsEmpty() && !delimits.IsEmpty() ); s += delimits[0]; for( long index=-1; (index=s.FindOneOf((LPCTSTR)delimits))!=-1; ) {  if(index != 0) pList->AddTail( s.Left(index) );  else if(!trim) pList->AddTail(nullSubst);  s = s.Right(s.GetLength()-index-1); }}/*  * 类似java字符串的split()方法。 * 使用一个完整的串delimit(而不是其中的某个字符)来分割src串,没有trim选项, * 即严格分割。num用来确定最多分割为多少个串,如果是0(默认),则按照delimit * 分割,若为1,则返回源串。 */void Split(const CString& src, CString delimit, CStringList* pOutList, int num=0, CString nullSubst=""){ ASSERT( !src.IsEmpty() && !delimit.IsEmpty() ); if(num==1)  {  pOutList->AddTail(src);  return; } int deliLen = delimit.GetLength(); long index = -1, lastSearchPosition = 0, cnt = 0; while( (index=src.Find(delimit, lastSearchPosition))!=-1 ) {  if(index==lastSearchPosition)   pOutList->AddTail(nullSubst);  else   pOutList->AddTail(src.Mid(lastSearchPosition, index-lastSearchPosition));  lastSearchPosition = index + deliLen;  if(num)  {   ++cnt;   if(cnt+1==num) break;  } } CString lastOne = src.Mid(lastSearchPosition); pOutList->AddTail( lastOne.IsEmpty()? nullSubst:lastOne);}// testint main(void){ CString s = ",ab;cde,f,,;gh,,"; CString sub = ",;"; CStringList list1,list2;  Tokenize(s,sub,&list1,TRUE,"no use"); // <----- printf("-------[Tokenize_trim]-------/n"); POSITION pos1 = list1.GetHeadPosition(); while( pos1!= NULL ) {  printf( list1.GetNext(pos1) );  printf("/n"); } Tokenize(s,sub,&list2,FALSE,"[null]"); // <----- printf("------[Tokenize_no_trim]-----/n"); POSITION pos2 = list2.GetHeadPosition(); while( pos2!= NULL ) {  printf( list2.GetNext(pos2) );  printf("/n"); }  CStringList list3; s = "###0123###567######89###1000###"; sub = "###"; Split(s,sub,&list3, 3, "<null>"); // <----- printf("------[Split]-----/n"); POSITION pos3 = list3.GetHeadPosition(); while( pos3!= NULL ) {  printf( list3.GetNext(pos3) );  printf("/n"); } return 0; }