regex
来源:互联网 发布:阿里云金融云vpc专线 编辑:程序博客网 时间:2024/05/18 01:57
The regex match and search interface
//main.cpp#include <regex>#include <iostream>using namespace std;void out(bool b){cout << (b ? "found" : "not found") << endl;}int main(){// find XML/HTML-tagged value (using default syntax):regex reg1("<.*>.*</.*>");bool found = regex_match("<tag>value</tag>", // datareg1); // regular expressionout(found);// find XML/HTML-tagged value (tags before and after the value must match):regex reg2("<(.*)>.*</\\1>");found = regex_match("<tag>value</tag>", // datareg2); // regular expressionout(found);// find XML/HTML-tagged value (using grep syntax):regex reg3("<\\(.*\\)>.*</\\1>", regex_constants::grep);found = regex_match("<tag>value</tag>", // datareg3); // regular expressionout(found);// use C-string as regular expression (needs explicit cast to regex):found = regex_match("<tag>value</tag>", // dataregex("<(.*)>.*</\\1>")); // regular expressionout(found);cout << endl;// regex_match() versus regex_search():found = regex_match("XML tag: <tag>value</tag>",regex("<(.*)>.*</\\1>")); // fails to matchout(found);found = regex_match("XML tag: <tag>value</tag>",regex(".*<(.*)>.*</\\1>.*")); // matchesout(found);found = regex_search("XML tag: <tag>value</tag>",regex("<(.*)>.*</\\1>")); // matchesout(found);found = regex_search("XML tag: <tag>value</tag>",regex(".*<(.*)>.*</\\1>.*")); // matchesout(found);}
dealing with subexpressions
#include <string>#include <regex>#include <iostream>#include <iomanip>using namespace std;int main(){string data = "XML tag: <tag-name>the value</tag-name>.";cout << "data: " << data << "\n\n";smatch m; // for returned details of the matchbool found = regex_search(data,m,regex("<(.*)>(.*)</(\\1)>"));// print match details:cout << "m.empty(): " << boolalpha << m.empty() << endl;cout << "m.size(): " << m.size() << endl;if (found) {cout << "m.str(): " << m.str() << endl;cout << "m.length(): " << m.length() << endl;cout << "m.position(): " << m.position() << endl;cout << "m.prefix().str(): " << m.prefix().str() << endl;cout << "m.suffix().str(): " << m.suffix().str() << endl;cout << endl;// iterating over all matches (using the match index):for (int i = 0; i<m.size(); ++i) {cout << "m[" << i << "].str(): " << m[i].str() << endl;cout << "m.str(" << i << "): " << m.str(i) << endl;cout << "m.position(" << i << "): " << m.position(i)<< endl;}cout << endl;// iterating over all matches (using iterators):cout << "matches:" << endl;for (auto pos = m.begin(); pos != m.end(); ++pos) {cout << " " << *pos << " ";cout << "(length: " << pos->length() << ")" << endl;}}}
data: XML tag: <tag-name>the value</tag-name>
m.empty(): false
m.size(): 4
m.str(): <tag-name>the value</tag-name>
m.length(): 30
m.position(): 9
m.prefix().str(): XML tag:
m.suffix().str(): .
m[0].str(): <tag-name>the value</tag-name>
m.str(0): <tag-name>the value</tag-name>
m.position(0): 9
m[1].str(): tag-name
m.str(1): tag-name
m.position(1): 10
m[2].str(): the value
m.str(2): the value
m.position(2): 19
m[3].str(): tag-name
m.str(3): tag-name
m.position(3): 30
matches:
<tag-name>the value</tag-name> (length: 30)
tag-name (length: 8)
the value (length: 9)
tag-name (length: 8)
请按任意键继续. . .
find all matches of a regular expression
#include <string>#include <regex>#include <iostream>using namespace std;int main(){string data = "<person>\n"" <first>Nico</first>\n"" <last>Josuttis</last>\n""</person>\n";regex reg("<(.*)>(.*)</(\\1)>");// iterate over all matchesauto pos = data.cbegin();auto end = data.cend();smatch m;for (; regex_search(pos, end, m, reg); pos = m.suffix().first) {cout << "match: " << m.str() << endl;cout << " tag: " << m.str(1) << endl;cout << " value: " << m.str(2) << endl;}}
match: <first>Nico</first>
tag: first
value: Nico
match: <last>Josuttis</last>
tag: last
value: Josuttis
请按任意键继续. . .
Regex iterators
#include <string>#include <regex>#include <iostream>#include <algorithm>using namespace std;int main(){string data = "<person>\n"" <first>Nico</first>\n"" <last>Josuttis</last>\n""</person>\n";regex reg("<(.*)>(.*)</(\\1)>");// iterate over all matches (using a regex_iterator):sregex_iterator pos(data.cbegin(), data.cend(), reg);sregex_iterator end;for (; pos != end; ++pos) {cout << "match: " << pos->str() << endl;cout << " tag: " << pos->str(1) << endl;cout << " value: " << pos->str(2) << endl;}// use a regex_iterator to process each matched substring as element in an algorithm:sregex_iterator beg(data.cbegin(), data.cend(), reg);for_each(beg, end, [](const smatch& m) {cout << "match: " << m.str() << endl;cout << " tag: " << m.str(1) << endl;cout << " value: " << m.str(2) << endl;});}
match: <first>Nico</first>
tag: first
value: Nico
match: <last>Josuttis</last>
tag: last
value: Josuttis
match: <first>Nico</first>
tag: first
value: Nico
match: <last>Josuttis</last>
tag: last
value: Josuttis
请按任意键继续. . .
regex Token iterators
#include <string>#include <regex>#include <iostream>#include <algorithm>using namespace std;int main(){string data = "<person>\n"" <first>Nico</first>\n"" <last>Josuttis</last>\n""</person>\n";regex reg("<(.*)>(.*)</(\\1)>");// iterate over all matches (using a regex_token_iterator):sregex_token_iterator pos(data.cbegin(), data.cend(), // sequencereg, // token separator{ 0,2 }); // 0: full match, 2: second substringsregex_token_iterator end;for (; pos != end; ++pos) {cout << "match: " << pos->str() << endl;}cout << endl;string names = "nico, jim, helmut, paul, tim, john paul, rita";regex sep("[ \t\n]*[,;.][ \t\n]*"); // separated by , ; or . and spacessregex_token_iterator p(names.cbegin(), names.cend(), // sequencesep, // separator-1); // -1: values between separatorssregex_token_iterator e;for (; p != e; ++p) {cout << "name: " << *p << endl;}}
match: <first>Nico</first>
match: Nico
match: <last>Josuttis</last>
match: Josuttis
name: nico
name: jim
name: helmut
name: paul
name: tim
name: john paul
name: rita
请按任意键继续. . .
replacing regular expressions
#include <string>#include <regex>#include <iostream>#include <iterator>using namespace std;int main(){string data = "<person>\n"" <first>Nico</first>\n"" <last>Josuttis</last>\n""</person>\n";regex reg("<(.*)>(.*)</(\\1)>");// print data with replacement for matched patternscout << regex_replace(data, // datareg, // regular expression"<$1 value=\"$2\"/>") // replacement<< endl;// same using sed syntaxcout << regex_replace(data, // datareg, // regular expression"<\\1 value=\"\\2\"/>", // replacementregex_constants::format_sed) // format flag<< endl;// use iterator interface, and// - format_no_copy: don’t copy characters that don’t match// - format_first_only: replace only the first match foundstring res2;regex_replace(back_inserter(res2), // destinationdata.begin(), data.end(), // source rangereg, // regular expression"<$1 value=\"$2\"/>", // replacementregex_constants::format_no_copy // format flags| regex_constants::format_first_only);cout << res2 << endl;}
<person>
<first value="Nico"/>
<last value="Josuttis"/>
</person>
<person>
<first value="Nico"/>
<last value="Josuttis"/>
</person>
<first value="Nico"/>
请按任意键继续. . .
Regex Flags
#include <string>#include <regex>#include <iostream>using namespace std;int main(){// case-insensitive find LaTeX index entriesstring pat1 = R"(\\.*index\{([^}]*)\})"; // first capture groupstring pat2 = R"(\\.*index\{(.*)\}\{(.*)\})"; // 2nd and 3rd capture groupregex pat(pat1 + "\n" + pat2,regex_constants::egrep | regex_constants::icase);//Using the egrep grammar, we can put a newline character between these two regular expressions.// initialize string with characters from standard input:string data((istreambuf_iterator<char>(cin)),istreambuf_iterator<char>());// search and print matching index entries:smatch m;auto pos = data.cbegin();auto end = data.cend();for (; regex_search(pos, end, m, pat); pos = m.suffix().first) {cout << "match: " << m.str() << endl;cout << " val: " << m.str(1) + m.str(2) << endl;cout << " see: " << m.str(3) << endl;}}
\chapter{The Standard Template Library}
\index{STL}%
\MAININDEX{standard template library}%
\SEEINDEX{standard template library}{STL}%
This is the basic chapter about the STL.
\section{STL Components}
\hauptindex{STL, introduction}%
The \stl{} is based on the cooperation of
^Z
match: \index{STL}
val: STL
see:
match: \MAININDEX{standard template library}
val: standard template library
see:
match: \SEEINDEX{standard template library}{STL}
val: standard template library
see: STL
match: \hauptindex{STL, introduction}
val: STL, introduction
see:
请按任意键继续. . .
Regex exceptions
//regexexception.hpp#include <regex>#include <string>template <typename T>std::string regexCode(T code){switch (code) {case std::regex_constants::error_collate:return "error_collate: ""regex has invalid collating element name";case std::regex_constants::error_ctype:return "error_ctype: ""regex has invalid character class name";case std::regex_constants::error_escape:return "error_escape: ""regex has invalid escaped char. or trailing escape";case std::regex_constants::error_backref:return "error_backref: ""regex has invalid back reference";case std::regex_constants::error_brack:return "error_brack: ""regex has mismatched '[' and ']'";case std::regex_constants::error_paren:return "error_paren: ""regex has mismatched '(' and ')'";case std::regex_constants::error_brace:return "error_brace: ""regex has mismatched '{' and '}'";case std::regex_constants::error_badbrace:return "error_badbrace: ""regex has invalid range in {} expression";case std::regex_constants::error_range:return "error_range: ""regex has invalid character range, such as '[b-a]'";case std::regex_constants::error_space:return "error_space: ""insufficient memory to convert regex into finite state";case std::regex_constants::error_badrepeat:return "error_badrepeat: ""one of *?+{ not preceded by valid regex";case std::regex_constants::error_complexity:return "error_complexity: ""complexity of match against regex over pre-set level";case std::regex_constants::error_stack:return "error_stack: ""insufficient memory to determine regex match";}return "unknown/non-standard regex error code";}
//Source.cpp#include <regex>#include <iostream>#include "regexexception.hpp"using namespace std;int main(){try {// initialize regular expression with invalid syntax:regex pat("\\\\.*index\\{([^}]*)\\}",regex_constants::grep | regex_constants::icase);//...}catch (const regex_error& e) {cerr << "regex_error: \n"<< " what(): " << e.what() << "\n"<< " code(): " << regexCode(e.code()) << endl;}}
regex_error:
what(): regex_error(error_badbrace): The expression contained an invalid range
in a { expression }.
code(): error_badbrace: regex has invalid range in {} expression
请按任意键继续. . .
- Regex
- Regex
- Regex
- Regex
- regex
- Regex
- regex
- Regex
- regex
- Regex
- regex
- Regex
- regex
- Regex
- Regex
- Regex
- regex
- Regex
- 三大简单排序算法(java)
- ##c提高篇## 第二课 体验函数【项目1-7】
- Mac下Nexue7 2012版刷机Android5.1
- HDU 3746 Cyclic Nacklace(kmp求循环节)
- jQuery16(内容过滤器,子元素过滤器)
- regex
- 深度解析Linux SSH
- 10分钟掌握XML、JSON及其解析(上)
- 10分钟掌握XML、JSON及其解析(中)
- 与字符串有关的一些典型问题的C++解法
- 程序员简历优化之道
- poj2229Sumsets【dp】
- 10分钟掌握XML、JSON及其解析(下)
- hdoj1254推箱子【dfs+bfs】