工具类库系列(一)-StringTool
来源:互联网 发布:让wow.js兼容ie8 编辑:程序博客网 时间:2024/06/08 17:20
好久没写了,最近闲了下来,把这段时间的代码整理整理,将常用的代码按功能整理成一些静态库,便于以后复用
今天是第一个库:libtool,其实就是一些工具类的集合
第一个工具类:StringTool
std::string已经很强大了,但是在实际项目中,还是会遇到一些需求,需要用string提供的接口组装一些字符串相关的功能,整理如下
(PS,本系列所有代码,均不同程度的需要boost库支持,本人使用的是boost1.55.0)
1:将一个字符串的首字母大写
这个功能很简单的,就是如果第一个字符'a' - 'z' 则换成 ‘A' - 'Z'
2:去除字符串中的所有制表符:空格,'\t','\r','\n'
这里就是遍历字符串,将相应的字符过滤掉
主要用于读文件之后的初步处理
3:将字符串中所有的字符串A替换成字符串B
这里用到string的find,查找字符串A的位置
仅遍历一遍,已处理过的部分不再处理,即如果字符串“1222”,需要将 “12”替换成“1”,则结果是“122”,不是“1”
该函数目前大部分情况还是用在文件路径中的windows下的"\\"替换成windows/linux兼容的“/”
补充增加 正则表达式替换:
这里用到了一个boost库提供的功能:boost::regex正则表达式regex_replace
4:将字符串按指定分隔符进行分割,分隔符可以是一个字符串
同上,分割后的结果存成一个std::vector返回
5:判断字符串是否是一个合法的数字:int,uint,float
这里用到了一个boost库提供的功能:boost::regex正则表达式regex_match
6:判断字符串是否是合法的Utf8编码
也是遍历,按Utf8的编码规则校验
7:Unicode和Utf8互转
这里仅处理了UCS-2,也是按Utf8的编码规则去压缩/解压缩
8:单宽字节互转
为了windows和linux的兼容,这里用的是mbstowcs/wcstombs
9:Gbk和Utf8互转
有了7和8,这个就是上两项功能的组合 Gbk <-> Unicode <-> Utf8
2017./1/9 修正Utf8ToGbk/GbkToUtf8分别在windows/linux下不同的locale设置
最后上代码
StringTool.h
#ifndef __StringTool_h__#define __StringTool_h__#include <string>#include <vector>namespace common{namespace tool{class StringTool{public:// 字符串首字母大写static std::string UpcaseFirstChar(const std::string& str);// 去除字符串str中的所有制表符:' ','\t','\r','\n'static std::string TrimAll(const std::string& str);// 将字符串str中所有的字符串src替换成字符串desstatic std::string ReplaceAll(const std::string& str, const std::string& src, const std::string& des);// 将字符串str中所有符合src格式(正则表达式)的字符串替换成字符串desstatic std::string ReplaceReg(const std::string& str, const std::string& src, const std::string& des);// 对字符串按指定分隔符进行分割,返回分割后的内容列表static bool SplitStr2List(const std::string& str, const std::string& split, std::vector<std::string>& values);static bool SplitStr2List(const std::string& str, const std::string& split, std::vector<unsigned int>& values);static bool SplitStr2List(const std::string& str, const std::string& split, std::vector<int>& values);static bool SplitStr2List(const std::string& str, const std::string& split, std::vector<float>& values);// 判断字符串是否是一个合法的数字static bool IsUInt(const std::string& str);static bool IsInt(const std::string& str);static bool IsFloat(const std::string& str);// 返回一个字符串是否是合法的utf8编码static bool IsUtf8(const std::string& str);// Gbk和Utf8互转,返回转换后的字符串static std::string GbkToUtf8(const std::string& gbk);static std::string Utf8ToGbk(const std::string& utf8);// Unicode和Utf8互转,返回转换后的字符串static void UnicodeToUtf8(std::string& utf8, const std::wstring& unicode);static void Utf8ToUnicode(std::wstring& unicode, const std::string& utf8);// 单宽字节互转#ifdef UNICODEstatic std::wstring MbStrToWcStr(const std::string& mbs, const wchar_t* language);static std::string WcStrToMbStr(const std::wstring& wcs, const wchar_t* language);#elsestatic std::wstring MbStrToWcStr(const std::string& mbs, const char* language);static std::string WcStrToMbStr(const std::wstring& wcs, const char* language);#endifprivate:// Unicode和Utf8互转,获取转换后的字符数static size_t UnicodeToUtf8Length(const std::wstring& unicode);static size_t Utf8ToUnicodeLength(const std::string& utf8);};}}#endif
StringTool.cpp
#include "StringTool.h"#include <boost/regex.hpp>namespace common{namespace tool{std::string StringTool::UpcaseFirstChar(const std::string& str){std::string temp = str;if ('a' <= temp[0] && temp[0] <= 'z'){temp[0] = temp[0] - ('a' - 'A');}return temp;}std::string StringTool::TrimAll(const std::string& str){std::string temp;temp.reserve(str.size());for (size_t i = 0; i < str.length(); i++){if (str[i] != ' ' &&str[i] != '\t' &&str[i] != '\r' &&str[i] != '\n'){temp += str[i];}}return temp;}std::string StringTool::ReplaceAll(const std::string& str, const std::string& src, const std::string& des){std::string temp;temp.reserve(str.size());if (0 < src.length()){size_t pos = str.find(src);size_t lastpos = 0;while (pos != std::string::npos){temp += str.substr(lastpos, pos - lastpos);temp += des;lastpos = pos + src.length();pos = str.find(src, lastpos);}if (lastpos != str.length()){temp += str.substr(lastpos, str.length() - lastpos);}}return temp;}std::string StringTool::ReplaceReg(const std::string& str, const std::string& src, const std::string& des){boost::regex reg(src);return boost::regex_replace(str, reg, des);}bool StringTool::SplitStr2List(const std::string& str, const std::string& split, std::vector<std::string>& values){size_t currPos = str.find(split);size_t lastPos = 0;while (currPos != std::string::npos){values.push_back(str.substr(lastPos, currPos - lastPos));lastPos = currPos + split.length();currPos = str.find(split, lastPos);}values.push_back(str.substr(lastPos, str.length() - lastPos));return true;}bool StringTool::SplitStr2List(const std::string& str, const std::string& split, std::vector<unsigned int>& values){std::string temp = str;TrimAll(temp);size_t currPos = temp.find(split);size_t lastPos = 0;while (currPos != std::string::npos){std::string strUInt = temp.substr(lastPos, currPos - lastPos);if (IsUInt(strUInt)){values.push_back(static_cast<unsigned int>(atoi(strUInt.c_str())));}else{return false;}lastPos = currPos + split.length();currPos = temp.find(split, lastPos);}std::string strUInt = temp.substr(lastPos, temp.length() - lastPos);if (IsUInt(strUInt)){values.push_back(static_cast<unsigned int>(atoi(strUInt.c_str())));}else{return false;}return true;}bool StringTool::SplitStr2List(const std::string& str, const std::string& split, std::vector<int>& values){std::string temp = str;TrimAll(temp);size_t currPos = temp.find(split);size_t lastPos = 0;while (currPos != std::string::npos){std::string strInt = temp.substr(lastPos, currPos - lastPos);if (IsInt(strInt)){values.push_back(atoi(strInt.c_str()));}else{return false;}lastPos = currPos + split.length();currPos = temp.find(split, lastPos);}std::string strInt = temp.substr(lastPos, temp.length() - lastPos);if (IsInt(strInt)){values.push_back(atoi(strInt.c_str()));}else{return false;}return true;}bool StringTool::SplitStr2List(const std::string& str, const std::string& split, std::vector<float>& values){std::string temp = str;TrimAll(temp);size_t currPos = temp.find(split);size_t lastPos = 0;while (currPos != std::string::npos){std::string strFloat = temp.substr(lastPos, currPos - lastPos);if (IsFloat(strFloat)){values.push_back(static_cast<float>(atof(strFloat.c_str())));}else{return false;}lastPos = currPos + split.length();currPos = temp.find(split, lastPos);}std::string strFloat = temp.substr(lastPos, temp.length() - lastPos);if (IsFloat(strFloat)){values.push_back(static_cast<float>(atof(strFloat.c_str())));}else{return false;}return true;}bool StringTool::IsUInt(const std::string& str){boost::regex reg("[1-9]?[0-9]*");return boost::regex_match(str, reg);}bool StringTool::IsInt(const std::string& str){boost::regex reg("[-]?[1-9]?[0-9]*");return boost::regex_match(str, reg);}bool StringTool::IsFloat(const std::string& str){boost::regex reg("[-]?[1-9]?[0-9]*[\\.]?[0-9]*");return boost::regex_match(str, reg);}bool StringTool::IsUtf8(const std::string& str){unsigned int nBytes = 0;bool bAllAscii = true;for (size_t i = 0; i < str.length(); i++){unsigned char ch = str[i];// 判断是否ASCII编码,如果不是,说明有可能是UTF-8,ASCII一个字节用7位编码,最高位标记为0,0xxxxxxxif ((ch & 0x80) != 0){bAllAscii = false;}// 如果不是ASCII码,计算字节数(校验是否符合UTF8规则)if (nBytes == 0){if (ch >= 0x80){if (ch >= 0xFC && ch <= 0xFD){nBytes = 6;}else if (ch >= 0xF8){nBytes = 5;}else if (ch >= 0xF0){nBytes = 4;}else if (ch >= 0xE0){nBytes = 3;}else if (ch >= 0xC0){nBytes = 2;}else{return false;}nBytes--;}}// 多字节符的非首字节,应为10xxxxxxelse{if ((ch & 0xC0) != 0x80){return false;}nBytes--;}}if (nBytes > 0){return false;}// 如果全部都是ASCII, 说明不是UTF-8if (bAllAscii){return false;}return true;}std::string StringTool::GbkToUtf8(const std::string& gbk){#ifdef WIN32#ifdef UNICODEstd::wstring unicode = MbStrToWcStr(gbk, L"chs");#else // UNICODEstd::wstring unicode = MbStrToWcStr(gbk, "chs");#endif // UNICODE#else // WIN32#ifdef UNICODEstd::wstring unicode = MbStrToWcStr(gbk, L"zh_CN.GB18030");#else // UNICODEstd::wstring unicode = MbStrToWcStr(gbk, "zh_CN.GB18030");#endif // UNICODE#endif // WIN32std::string utf8;UnicodeToUtf8(utf8, unicode);return utf8;}std::string StringTool::Utf8ToGbk(const std::string& utf8){std::wstring unicode;Utf8ToUnicode(unicode, utf8);#ifdef WIN32#ifdef UNICODEstd::string gbk = WcStrToMbStr(unicode, L"chs");#else // UNICODEstd::string gbk = WcStrToMbStr(unicode, "chs");#endif // UNICODE#else // WIN32#ifdef UNICODEstd::string gbk = WcStrToMbStr(unicode, L"zh_CN.GB18030");#else // UNICODEstd::string gbk = WcStrToMbStr(unicode, "zh_CN.GB18030");#endif // UNICODE#endif // WIN32return gbk;}void StringTool::UnicodeToUtf8(std::string& utf8, const std::wstring& unicode){size_t mbLen = UnicodeToUtf8Length(unicode) + 1;if (1 < mbLen){utf8.resize(mbLen);size_t utf8Pos = 0;size_t unicodePos = 0;size_t unicodeLen = unicode.length();while (unicodePos < unicodeLen){unsigned short unicodeCh = unicode[unicodePos];// 0x0800 - 0xffff => 1110 XXXX, 10XX XXXX, 10XX XXXXif (0x0800 <= unicodeCh){utf8[utf8Pos + 0] = ((unicodeCh >> 12) & 0x0F) | 0xE0;utf8[utf8Pos + 1] = ((unicodeCh >> 6) & 0x3F) | 0x80;utf8[utf8Pos + 2] = (unicodeCh & 0x3F) | 0x80;unicodePos += 1;utf8Pos += 3;}// 0x0080 - 0x07ff => 110X XXXX, 10XX XXXXelse if (0x0080 <= unicodeCh && unicodeCh < 0x07FF){utf8[utf8Pos + 0] = ((unicodeCh >> 6) & 0x1F) | 0xC0;utf8[utf8Pos + 1] = (unicodeCh & 0x3F) | 0x80;unicodePos += 1;utf8Pos += 2;}// unicodeCh < 0x0080 // 0x0000 - 0x007f => 0XXX XXXXelse{utf8[utf8Pos + 0] = unicodeCh & 0x7F;unicodePos += 1;utf8Pos += 1;}}utf8[utf8Pos] = 0;}}void StringTool::Utf8ToUnicode(std::wstring& unicode, const std::string& utf8){size_t wcLen = Utf8ToUnicodeLength(utf8) + 1;if (1 < wcLen){unicode.resize(wcLen);size_t utf8Pos = 0;size_t unicodePos = 0;size_t utf8Len = utf8.length();while (utf8Pos < utf8Len){unsigned short utfCh0 = utf8[utf8Pos + 0] & 0xFF;unsigned short utfCh1 = utf8[utf8Pos + 1] & 0xFF;unsigned short utfCh2 = utf8[utf8Pos + 2] & 0xFF;// 1111 110X, 10XX XXXX, 10XX XXXX, 10XX XXXX, 10XX XXXX, 10XX XXXX// 1111 10XX, 10XX XXXX, 10XX XXXX, 10XX XXXX, 10XX XXXX// 1111 0XXX, 10XX XXXX, 10XX XXXX, 10XX XXXX if (0xF0 <= utfCh0){break; // 需要UCS-4,未处理}// 1110 XXXX, 10XX XXXX, 10XX XXXXelse if (0xE0 <= utfCh0 && utfCh0 < 0xF0){unicode[unicodePos] = ((((utfCh0 << 4) & 0xF0) + ((utfCh1 >> 2) & 0x0F)) << 8) + (((utfCh1 << 6) & 0xC0) + (utfCh2 & 0x3F));utf8Pos += 3;unicodePos += 1;}// 110X XXXX, 10XX XXXXelse if (0xC0 <= utfCh0 && utfCh0 < 0xE0){unicode[unicodePos] = (((utfCh0 >> 2) & 0x07) << 8) + ((utfCh0 << 6) & 0xC0 + utfCh1 & 0x3F);utf8Pos += 2;unicodePos += 1;}// 10XX XXXX else if (0x80 <= utfCh0 && utfCh0 < 0xC0){break; // 非法情况,Utf8首字节不存在该种编码}// 0XXX XXXXelse{unicode[unicodePos] = utfCh0;utf8Pos += 1;unicodePos += 1;}}unicode[unicodePos] = 0;}}size_t StringTool::UnicodeToUtf8Length(const std::wstring& unicode){size_t chars = 0;size_t unicodeLen = unicode.length();size_t unicodePos = 0;while (unicodePos < unicodeLen){unsigned short unicodeCh = unicode[unicodePos];// 0x0800 - 0xffff => 1110 XXXX, 10XX XXXX, 10XX XXXXif (0x0800 <= unicodeCh){unicodePos += 1;chars += 3;}// 0x0080 - 0x07ff => 110X XXXX, 10XX XXXXelse if (0x0080 <= unicodeCh && unicodeCh < 0x07FF){unicodePos += 1;chars += 2;}// unicodeCh < 0x0080 // 0x0000 - 0x007f => 0XXX XXXXelse{unicodePos += 1;chars += 1;}}return chars;}size_t StringTool::Utf8ToUnicodeLength(const std::string& utf8){size_t wchars = 0;size_t utf8Len = utf8.length();size_t utf8Pos = 0;while (utf8Pos < utf8Len){unsigned char utf8Ch = utf8[utf8Pos];// 1111 110X, 10XX XXXX, 10XX XXXX, 10XX XXXX, 10XX XXXX, 10XX XXXX// 1111 10XX, 10XX XXXX, 10XX XXXX, 10XX XXXX, 10XX XXXX// 1111 0XXX, 10XX XXXX, 10XX XXXX, 10XX XXXX if (0xF0 <= utf8Ch){return 0; // 需要UCS-4,未处理}// 1110 XXXX, 10XX XXXX, 10XX XXXXelse if (0xE0 <= utf8Ch && utf8Ch < 0xF0){utf8Pos += 3;wchars += 1;}// 110X XXXX, 10XX XXXXelse if (0xC0 <= utf8Ch && utf8Ch < 0xE0){utf8Pos += 2;wchars += 1;}// 10XX XXXX else if (0x80 <= utf8Ch && utf8Ch < 0xC0){return 0; // 非法情况,Utf8首字节不存在该种编码}// 0XXX XXXXelse{utf8Pos += 1;wchars += 1;}}return wchars;}#ifdef UNICODEstd::wstring StringTool::MbStrToWcStr(const std::string& mbs, const wchar_t* language)#elsestd::wstring StringTool::MbStrToWcStr(const std::string& mbs, const char* language)#endif{std::wstring wcs;#ifdef UNICODEstd::wstring curLocale = _wsetlocale(LC_ALL, NULL);_wsetlocale(LC_ALL, language);#elsestd::string curLocale = setlocale(LC_ALL, NULL);setlocale(LC_ALL, language);#endifint wcLen = mbstowcs(NULL, mbs.c_str(), 0) + 1;if (1 < wcLen){wchar_t* wcBuf = new wchar_t[wcLen];if (NULL != wcBuf){wmemset(wcBuf, 0, wcLen);mbstowcs(wcBuf, mbs.c_str(), wcLen);}wcs = wcBuf;if (NULL != wcBuf){delete[] wcBuf;wcBuf = NULL;}}#ifdef UNICODE_wsetlocale(LC_ALL, curLocale.c_str());#elsesetlocale(LC_ALL, curLocale.c_str());#endifreturn wcs;}#ifdef UNICODEstd::string StringTool::WcStrToMbStr(const std::wstring& wcs, const wchar_t* language)#elsestd::string StringTool::WcStrToMbStr(const std::wstring& wcs, const char* language)#endif{std::string mbs;#ifdef UNICODEstd::wstring curLocale = _wsetlocale(LC_ALL, NULL);_wsetlocale(LC_ALL, language);#elsestd::string curLocale = setlocale(LC_ALL, NULL);setlocale(LC_ALL, language);#endifint mbLen = wcstombs(NULL, wcs.c_str(), 0) + 1;if (1 < mbLen){char* mbBuf = new char[mbLen];if (NULL != mbBuf){memset(mbBuf, 0, mbLen);wcstombs(mbBuf, wcs.c_str(), mbLen);}mbs = mbBuf;if (NULL != mbBuf){delete[] mbBuf;mbBuf = NULL;}}#ifdef UNICODE_wsetlocale(LC_ALL, curLocale.c_str());#elsesetlocale(LC_ALL, curLocale.c_str());#endifreturn mbs;}}}
- 工具类库系列(一)-StringTool
- java工具类系列 (一.StringUtils)
- 工具系列(一)
- SQL 工具系列一
- 令仔学多线程系列(一)----同步工具类CountDownLatch
- 懒人的工具系列一:Digester
- codereivew 系列一:Git系工具
- Android基础工具类重构系列一Toast
- 工具类库系列(二)-ExePath
- 工具类库系列(三)-IniReader
- 工具类库系列(四)-CsvReader
- 工具类库系列(五)-Timer
- 工具类库系列(六)-TimeTool
- 工具类库系列(七)-Logger
- 工具类库系列(八)-WinService
- 工具类库系列(九)-ReflectEnum
- 工具类库系列(十)-Object
- 工具类库系列(十一)-ObjectMap
- MediaPlayer+TextureView实现小视频居中(不拉伸)播放
- dispatchkeyevent的调用机制
- protobuf
- 一个字符串在另一个字符串中出现的次数的最简方法
- DOM chapter05
- 工具类库系列(一)-StringTool
- SPOJ Time Limit Exceeded
- HM平台之xCompressCU(TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, UInt uiDepth, PartSize eParent)函数
- bzoj 3339 Rmq problem 离线+线段树
- 基于cookie-redis实现单点登录的原理浅谈
- 进程通信之无名管道
- I/O复用
- Android中访问sdcard路径的几种方式
- tcp Send_Q Recv-Q 字段含义 timer 字段含义