tokenizer
来源:互联网 发布:windows怎么看显卡 编辑:程序博客网 时间:2024/04/29 22:53
例一:
// simple_example_1.cpp
#include<iostream>
#include<boost/tokenizer.hpp>
#include<string>
int main(){
using namespace std;
using namespace boost;
string s = "This is, a test";
tokenizer<> tok(s);
for(tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
cout << *beg << "\n";
}
}
输出
This
is
a
test
tokenizer默认将单词以空格和标点为边界分开.
例二:
#include<iostream>
#include<boost/tokenizer.hpp>
#include<string>
int main(){
using namespace std;
using namespace boost;
string s = "Field 1,\"putting quotes around fields, allows commas\",Field 3";
tokenizer<escaped_list_separator<char> > tok(s);
for(tokenizer<escaped_list_separator<char> >::iterator beg=tok.begin(); beg!=tok.end();++beg){
cout << *beg << "\n";
}
}
输出
Field 1
putting quotes around fields, allows commas
Field 3
双引号之间可以有标点.
例三:
// simple_example_3.cpp
#include<iostream>
#include<boost/tokenizer.hpp>
#include<string>
int main(){
using namespace std;
using namespace boost;
string s = "12252001";
int offsets[] = {2,2,4};
offset_separator f(offsets, offsets+3);
tokenizer<offset_separator> tok(s,f);
for(tokenizer<offset_separator>::iterator beg=tok.begin(); beg!=tok.end();++beg){
cout << *beg << "\n";
}
}
把12252001分解为
12
25
2001
例4:
// char_sep_example_1.cpp
#include <iostream>
#include <boost/tokenizer.hpp>
#include <string>
int main()
{
std::string str = ";!!;Hello|world||-foo--bar;yow;baz|";
typedef boost::tokenizer<boost::char_separator<char> >
tokenizer;
boost::char_separator<char> sep("-;|");
tokenizer tokens(str, sep);
for (tokenizer::iterator tok_iter = tokens.begin();
tok_iter != tokens.end(); ++tok_iter)
std::cout << "<" << *tok_iter << "> ";
std::cout << "\n";
return EXIT_SUCCESS;
}
输出
<!!> <Hello> <world> <foo> <bar> <yow> <baz>
自定义分隔的标点
例5:
// char_sep_example_2.cpp
#include <iostream>
#include <boost/tokenizer.hpp>
#include <string>
int main()
{
std::string str = ";;Hello|world||-foo--bar;yow;baz|";
typedef boost::tokenizer<boost::char_separator<char> >
tokenizer;
boost::char_separator<char> sep("-;", "|", boost::keep_empty_tokens);
tokenizer tokens(str, sep);
for (tokenizer::iterator tok_iter = tokens.begin();
tok_iter != tokens.end(); ++tok_iter)
std::cout << "<" << *tok_iter << "> ";
std::cout << "\n";
return EXIT_SUCCESS;
}
The output is:
<> <> <Hello> <|> <world> <|> <> <|> <> <foo> <> <bar> <yow> <baz> <|> <>
去除-; , 保留|但将它看作是分隔符,当两个分隔符相邻的时候会自动加空格
例6:
// char_sep_example_3.cpp
#include <iostream>
#include <boost/tokenizer.hpp>
#include <string>
int main()
{
std::string str = "This is, a test";
typedef boost::tokenizer<boost::char_separator<char> > Tok;
boost::char_separator<char> sep; // default constructed
Tok tok(str, sep);
for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter)
std::cout << "<" << *tok_iter << "> ";
std::cout << "\n";
return EXIT_SUCCESS;
}
The output is:
<This> <is> <,> <a> <test>
保留标点但将它看作分隔符
// simple_example_1.cpp
#include<iostream>
#include<boost/tokenizer.hpp>
#include<string>
int main(){
using namespace std;
using namespace boost;
string s = "This is, a test";
tokenizer<> tok(s);
for(tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
cout << *beg << "\n";
}
}
输出
This
is
a
test
tokenizer默认将单词以空格和标点为边界分开.
例二:
#include<iostream>
#include<boost/tokenizer.hpp>
#include<string>
int main(){
using namespace std;
using namespace boost;
string s = "Field 1,\"putting quotes around fields, allows commas\",Field 3";
tokenizer<escaped_list_separator<char> > tok(s);
for(tokenizer<escaped_list_separator<char> >::iterator beg=tok.begin(); beg!=tok.end();++beg){
cout << *beg << "\n";
}
}
输出
Field 1
putting quotes around fields, allows commas
Field 3
双引号之间可以有标点.
例三:
// simple_example_3.cpp
#include<iostream>
#include<boost/tokenizer.hpp>
#include<string>
int main(){
using namespace std;
using namespace boost;
string s = "12252001";
int offsets[] = {2,2,4};
offset_separator f(offsets, offsets+3);
tokenizer<offset_separator> tok(s,f);
for(tokenizer<offset_separator>::iterator beg=tok.begin(); beg!=tok.end();++beg){
cout << *beg << "\n";
}
}
把12252001分解为
12
25
2001
例4:
// char_sep_example_1.cpp
#include <iostream>
#include <boost/tokenizer.hpp>
#include <string>
int main()
{
std::string str = ";!!;Hello|world||-foo--bar;yow;baz|";
typedef boost::tokenizer<boost::char_separator<char> >
tokenizer;
boost::char_separator<char> sep("-;|");
tokenizer tokens(str, sep);
for (tokenizer::iterator tok_iter = tokens.begin();
tok_iter != tokens.end(); ++tok_iter)
std::cout << "<" << *tok_iter << "> ";
std::cout << "\n";
return EXIT_SUCCESS;
}
输出
<!!> <Hello> <world> <foo> <bar> <yow> <baz>
自定义分隔的标点
例5:
// char_sep_example_2.cpp
#include <iostream>
#include <boost/tokenizer.hpp>
#include <string>
int main()
{
std::string str = ";;Hello|world||-foo--bar;yow;baz|";
typedef boost::tokenizer<boost::char_separator<char> >
tokenizer;
boost::char_separator<char> sep("-;", "|", boost::keep_empty_tokens);
tokenizer tokens(str, sep);
for (tokenizer::iterator tok_iter = tokens.begin();
tok_iter != tokens.end(); ++tok_iter)
std::cout << "<" << *tok_iter << "> ";
std::cout << "\n";
return EXIT_SUCCESS;
}
The output is:
<> <> <Hello> <|> <world> <|> <> <|> <> <foo> <> <bar> <yow> <baz> <|> <>
去除-; , 保留|但将它看作是分隔符,当两个分隔符相邻的时候会自动加空格
例6:
// char_sep_example_3.cpp
#include <iostream>
#include <boost/tokenizer.hpp>
#include <string>
int main()
{
std::string str = "This is, a test";
typedef boost::tokenizer<boost::char_separator<char> > Tok;
boost::char_separator<char> sep; // default constructed
Tok tok(str, sep);
for(Tok::iterator tok_iter = tok.begin(); tok_iter != tok.end(); ++tok_iter)
std::cout << "<" << *tok_iter << "> ";
std::cout << "\n";
return EXIT_SUCCESS;
}
The output is:
<This> <is> <,> <a> <test>
保留标点但将它看作分隔符
- tokenizer
- boost tokenizer
- boost tokenizer
- 19、Tokenizer
- boost::tokenizer
- boost tokenizer
- tokenizer.h
- Boost tokenizer examples
- [转载]boost tokenizer
- Boost学习笔记-tokenizer
- Tokenizer and TokenFilter 区别
- Boost Tokenizer 使用介绍
- boost之Tokenizer
- 【Boost】boost::tokenizer详解
- boost::tokenizer详解
- Boost学习笔记-tokenizer
- Tokenizer for Python source
- boost::tokenizer分词器
- 【嵌入式开发视频教程整套26个视频】Linux嵌入式开发
- VS2010使用扩展方法对List<T>进行随机排序
- windows平台下vlc编译之十:vlc-1.0.0的编译
- Eclipse自动提示功能
- ubuntu 10.10 安装配置samba
- tokenizer
- 使用Eclipse,Kemulator模拟器配置J2ME工程
- Flex中的弹出窗口(mx.managers.PopUpManager)
- android Service
- WCF,Net remoting,Web service
- struts2改变后缀的方式
- 如何启用SharePoint 2010的代码块
- 如何获取JBOSS源码
- 智能指针