工具类库系列(一)-StringTool

来源:互联网 发布:让wow.js兼容ie8 编辑:程序博客网 时间:2024/06/08 17:20

好久没写了,最近闲了下来,把这段时间的代码整理整理,将常用的代码按功能整理成一些静态库,便于以后复用


今天是第一个库:libtool,其实就是一些工具类的集合


第一个工具类:StringTool


std::string已经很强大了,但是在实际项目中,还是会遇到一些需求,需要用string提供的接口组装一些字符串相关的功能,整理如下


(PS,本系列所有代码,均不同程度的需要boost库支持,本人使用的是boost1.55.0)


1:将一个字符串的首字母大写

这个功能很简单的,就是如果第一个字符'a' - 'z' 则换成 ‘A' - 'Z'


2:去除字符串中的所有制表符:空格,'\t','\r','\n'

这里就是遍历字符串,将相应的字符过滤掉

主要用于读文件之后的初步处理


3:将字符串中所有的字符串A替换成字符串B

这里用到string的find,查找字符串A的位置

仅遍历一遍,已处理过的部分不再处理,即如果字符串“1222”,需要将 “12”替换成“1”,则结果是“122”,不是“1”

该函数目前大部分情况还是用在文件路径中的windows下的"\\"替换成windows/linux兼容的“/”


补充增加 正则表达式替换:

这里用到了一个boost库提供的功能:boost::regex正则表达式regex_replace


4:将字符串按指定分隔符进行分割,分隔符可以是一个字符串

同上,分割后的结果存成一个std::vector返回


5:判断字符串是否是一个合法的数字:int,uint,float

这里用到了一个boost库提供的功能:boost::regex正则表达式regex_match


6:判断字符串是否是合法的Utf8编码

也是遍历,按Utf8的编码规则校验


7:Unicode和Utf8互转

这里仅处理了UCS-2,也是按Utf8的编码规则去压缩/解压缩


8:单宽字节互转

为了windows和linux的兼容,这里用的是mbstowcs/wcstombs


9:Gbk和Utf8互转

有了7和8,这个就是上两项功能的组合 Gbk <-> Unicode <-> Utf8


2017./1/9 修正Utf8ToGbk/GbkToUtf8分别在windows/linux下不同的locale设置


最后上代码

StringTool.h

#ifndef __StringTool_h__#define __StringTool_h__#include <string>#include <vector>namespace common{namespace tool{class StringTool{public:// 字符串首字母大写static std::string UpcaseFirstChar(const std::string& str);// 去除字符串str中的所有制表符:' ','\t','\r','\n'static std::string TrimAll(const std::string& str);// 将字符串str中所有的字符串src替换成字符串desstatic std::string ReplaceAll(const std::string& str, const std::string& src, const std::string& des);// 将字符串str中所有符合src格式(正则表达式)的字符串替换成字符串desstatic std::string ReplaceReg(const std::string& str, const std::string& src, const std::string& des);// 对字符串按指定分隔符进行分割,返回分割后的内容列表static bool SplitStr2List(const std::string& str, const std::string& split, std::vector<std::string>& values);static bool SplitStr2List(const std::string& str, const std::string& split, std::vector<unsigned int>& values);static bool SplitStr2List(const std::string& str, const std::string& split, std::vector<int>& values);static bool SplitStr2List(const std::string& str, const std::string& split, std::vector<float>& values);// 判断字符串是否是一个合法的数字static bool IsUInt(const std::string& str);static bool IsInt(const std::string& str);static bool IsFloat(const std::string& str);// 返回一个字符串是否是合法的utf8编码static bool IsUtf8(const std::string& str);// Gbk和Utf8互转,返回转换后的字符串static std::string GbkToUtf8(const std::string& gbk);static std::string Utf8ToGbk(const std::string& utf8);// Unicode和Utf8互转,返回转换后的字符串static void UnicodeToUtf8(std::string& utf8, const std::wstring& unicode);static void Utf8ToUnicode(std::wstring& unicode, const std::string& utf8);// 单宽字节互转#ifdef UNICODEstatic std::wstring MbStrToWcStr(const std::string& mbs, const wchar_t* language);static std::string WcStrToMbStr(const std::wstring& wcs, const wchar_t* language);#elsestatic std::wstring MbStrToWcStr(const std::string& mbs, const char* language);static std::string WcStrToMbStr(const std::wstring& wcs, const char* language);#endifprivate:// Unicode和Utf8互转,获取转换后的字符数static size_t UnicodeToUtf8Length(const std::wstring& unicode);static size_t Utf8ToUnicodeLength(const std::string& utf8);};}}#endif


StringTool.cpp

#include "StringTool.h"#include <boost/regex.hpp>namespace common{namespace tool{std::string StringTool::UpcaseFirstChar(const std::string& str){std::string temp = str;if ('a' <= temp[0] && temp[0] <= 'z'){temp[0] = temp[0] - ('a' - 'A');}return temp;}std::string StringTool::TrimAll(const std::string& str){std::string temp;temp.reserve(str.size());for (size_t i = 0; i < str.length(); i++){if (str[i] != ' ' &&str[i] != '\t' &&str[i] != '\r' &&str[i] != '\n'){temp += str[i];}}return temp;}std::string StringTool::ReplaceAll(const std::string& str, const std::string& src, const std::string& des){std::string temp;temp.reserve(str.size());if (0 < src.length()){size_t pos = str.find(src);size_t lastpos = 0;while (pos != std::string::npos){temp += str.substr(lastpos, pos - lastpos);temp += des;lastpos = pos + src.length();pos = str.find(src, lastpos);}if (lastpos != str.length()){temp += str.substr(lastpos, str.length() - lastpos);}}return temp;}std::string StringTool::ReplaceReg(const std::string& str, const std::string& src, const std::string& des){boost::regex reg(src);return boost::regex_replace(str, reg, des);}bool StringTool::SplitStr2List(const std::string& str, const std::string& split, std::vector<std::string>& values){size_t currPos = str.find(split);size_t lastPos = 0;while (currPos != std::string::npos){values.push_back(str.substr(lastPos, currPos - lastPos));lastPos = currPos + split.length();currPos = str.find(split, lastPos);}values.push_back(str.substr(lastPos, str.length() - lastPos));return true;}bool StringTool::SplitStr2List(const std::string& str, const std::string& split, std::vector<unsigned int>& values){std::string temp = str;TrimAll(temp);size_t currPos = temp.find(split);size_t lastPos = 0;while (currPos != std::string::npos){std::string strUInt = temp.substr(lastPos, currPos - lastPos);if (IsUInt(strUInt)){values.push_back(static_cast<unsigned int>(atoi(strUInt.c_str())));}else{return false;}lastPos = currPos + split.length();currPos = temp.find(split, lastPos);}std::string strUInt = temp.substr(lastPos, temp.length() - lastPos);if (IsUInt(strUInt)){values.push_back(static_cast<unsigned int>(atoi(strUInt.c_str())));}else{return false;}return true;}bool StringTool::SplitStr2List(const std::string& str, const std::string& split, std::vector<int>& values){std::string temp = str;TrimAll(temp);size_t currPos = temp.find(split);size_t lastPos = 0;while (currPos != std::string::npos){std::string strInt = temp.substr(lastPos, currPos - lastPos);if (IsInt(strInt)){values.push_back(atoi(strInt.c_str()));}else{return false;}lastPos = currPos + split.length();currPos = temp.find(split, lastPos);}std::string strInt = temp.substr(lastPos, temp.length() - lastPos);if (IsInt(strInt)){values.push_back(atoi(strInt.c_str()));}else{return false;}return true;}bool StringTool::SplitStr2List(const std::string& str, const std::string& split, std::vector<float>& values){std::string temp = str;TrimAll(temp);size_t currPos = temp.find(split);size_t lastPos = 0;while (currPos != std::string::npos){std::string strFloat = temp.substr(lastPos, currPos - lastPos);if (IsFloat(strFloat)){values.push_back(static_cast<float>(atof(strFloat.c_str())));}else{return false;}lastPos = currPos + split.length();currPos = temp.find(split, lastPos);}std::string strFloat = temp.substr(lastPos, temp.length() - lastPos);if (IsFloat(strFloat)){values.push_back(static_cast<float>(atof(strFloat.c_str())));}else{return false;}return true;}bool StringTool::IsUInt(const std::string& str){boost::regex reg("[1-9]?[0-9]*");return boost::regex_match(str, reg);}bool StringTool::IsInt(const std::string& str){boost::regex reg("[-]?[1-9]?[0-9]*");return boost::regex_match(str, reg);}bool StringTool::IsFloat(const std::string& str){boost::regex reg("[-]?[1-9]?[0-9]*[\\.]?[0-9]*");return boost::regex_match(str, reg);}bool StringTool::IsUtf8(const std::string& str){unsigned int nBytes = 0;bool bAllAscii = true;for (size_t i = 0; i < str.length(); i++){unsigned char ch = str[i];// 判断是否ASCII编码,如果不是,说明有可能是UTF-8,ASCII一个字节用7位编码,最高位标记为0,0xxxxxxxif ((ch & 0x80) != 0){bAllAscii = false;}// 如果不是ASCII码,计算字节数(校验是否符合UTF8规则)if (nBytes == 0){if (ch >= 0x80){if (ch >= 0xFC && ch <= 0xFD){nBytes = 6;}else if (ch >= 0xF8){nBytes = 5;}else if (ch >= 0xF0){nBytes = 4;}else if (ch >= 0xE0){nBytes = 3;}else if (ch >= 0xC0){nBytes = 2;}else{return false;}nBytes--;}}// 多字节符的非首字节,应为10xxxxxxelse{if ((ch & 0xC0) != 0x80){return false;}nBytes--;}}if (nBytes > 0){return false;}// 如果全部都是ASCII, 说明不是UTF-8if (bAllAscii){return false;}return true;}std::string StringTool::GbkToUtf8(const std::string& gbk){#ifdef WIN32#ifdef UNICODEstd::wstring unicode = MbStrToWcStr(gbk, L"chs");#else // UNICODEstd::wstring unicode = MbStrToWcStr(gbk, "chs");#endif // UNICODE#else // WIN32#ifdef UNICODEstd::wstring unicode = MbStrToWcStr(gbk, L"zh_CN.GB18030");#else // UNICODEstd::wstring unicode = MbStrToWcStr(gbk, "zh_CN.GB18030");#endif // UNICODE#endif // WIN32std::string utf8;UnicodeToUtf8(utf8, unicode);return utf8;}std::string StringTool::Utf8ToGbk(const std::string& utf8){std::wstring unicode;Utf8ToUnicode(unicode, utf8);#ifdef WIN32#ifdef UNICODEstd::string gbk = WcStrToMbStr(unicode, L"chs");#else // UNICODEstd::string gbk = WcStrToMbStr(unicode, "chs");#endif // UNICODE#else // WIN32#ifdef UNICODEstd::string gbk = WcStrToMbStr(unicode, L"zh_CN.GB18030");#else // UNICODEstd::string gbk = WcStrToMbStr(unicode, "zh_CN.GB18030");#endif // UNICODE#endif // WIN32return gbk;}void StringTool::UnicodeToUtf8(std::string& utf8, const std::wstring& unicode){size_t mbLen = UnicodeToUtf8Length(unicode) + 1;if (1 < mbLen){utf8.resize(mbLen);size_t utf8Pos = 0;size_t unicodePos = 0;size_t unicodeLen = unicode.length();while (unicodePos < unicodeLen){unsigned short unicodeCh = unicode[unicodePos];// 0x0800 - 0xffff => 1110 XXXX, 10XX XXXX, 10XX XXXXif (0x0800 <= unicodeCh){utf8[utf8Pos + 0] = ((unicodeCh >> 12) & 0x0F) | 0xE0;utf8[utf8Pos + 1] = ((unicodeCh >> 6) & 0x3F) | 0x80;utf8[utf8Pos + 2] = (unicodeCh & 0x3F) | 0x80;unicodePos += 1;utf8Pos += 3;}// 0x0080 - 0x07ff => 110X XXXX, 10XX XXXXelse if (0x0080 <= unicodeCh && unicodeCh < 0x07FF){utf8[utf8Pos + 0] = ((unicodeCh >> 6) & 0x1F) | 0xC0;utf8[utf8Pos + 1] = (unicodeCh & 0x3F) | 0x80;unicodePos += 1;utf8Pos += 2;}// unicodeCh < 0x0080 // 0x0000 - 0x007f => 0XXX XXXXelse{utf8[utf8Pos + 0] = unicodeCh & 0x7F;unicodePos += 1;utf8Pos += 1;}}utf8[utf8Pos] = 0;}}void StringTool::Utf8ToUnicode(std::wstring& unicode, const std::string& utf8){size_t wcLen = Utf8ToUnicodeLength(utf8) + 1;if (1 < wcLen){unicode.resize(wcLen);size_t utf8Pos = 0;size_t unicodePos = 0;size_t utf8Len = utf8.length();while (utf8Pos < utf8Len){unsigned short utfCh0 = utf8[utf8Pos + 0] & 0xFF;unsigned short utfCh1 = utf8[utf8Pos + 1] & 0xFF;unsigned short utfCh2 = utf8[utf8Pos + 2] & 0xFF;// 1111 110X, 10XX XXXX, 10XX XXXX, 10XX XXXX, 10XX XXXX, 10XX XXXX// 1111 10XX, 10XX XXXX, 10XX XXXX, 10XX XXXX, 10XX XXXX// 1111 0XXX, 10XX XXXX, 10XX XXXX, 10XX XXXX if (0xF0 <= utfCh0){break; // 需要UCS-4,未处理}// 1110 XXXX, 10XX XXXX, 10XX XXXXelse if (0xE0 <= utfCh0 && utfCh0 < 0xF0){unicode[unicodePos] = ((((utfCh0 << 4) & 0xF0) + ((utfCh1 >> 2) & 0x0F)) << 8) + (((utfCh1 << 6) & 0xC0) + (utfCh2 & 0x3F));utf8Pos += 3;unicodePos += 1;}// 110X XXXX, 10XX XXXXelse if (0xC0 <= utfCh0 && utfCh0 < 0xE0){unicode[unicodePos] = (((utfCh0 >> 2) & 0x07) << 8) + ((utfCh0 << 6) & 0xC0 + utfCh1 & 0x3F);utf8Pos += 2;unicodePos += 1;}// 10XX XXXX else if (0x80 <= utfCh0 && utfCh0 < 0xC0){break; // 非法情况,Utf8首字节不存在该种编码}// 0XXX XXXXelse{unicode[unicodePos] = utfCh0;utf8Pos += 1;unicodePos += 1;}}unicode[unicodePos] = 0;}}size_t StringTool::UnicodeToUtf8Length(const std::wstring& unicode){size_t chars = 0;size_t unicodeLen = unicode.length();size_t unicodePos = 0;while (unicodePos < unicodeLen){unsigned short unicodeCh = unicode[unicodePos];// 0x0800 - 0xffff => 1110 XXXX, 10XX XXXX, 10XX XXXXif (0x0800 <= unicodeCh){unicodePos += 1;chars += 3;}// 0x0080 - 0x07ff => 110X XXXX, 10XX XXXXelse if (0x0080 <= unicodeCh && unicodeCh < 0x07FF){unicodePos += 1;chars += 2;}// unicodeCh < 0x0080 // 0x0000 - 0x007f => 0XXX XXXXelse{unicodePos += 1;chars += 1;}}return chars;}size_t StringTool::Utf8ToUnicodeLength(const std::string& utf8){size_t wchars = 0;size_t utf8Len = utf8.length();size_t utf8Pos = 0;while (utf8Pos < utf8Len){unsigned char utf8Ch = utf8[utf8Pos];// 1111 110X, 10XX XXXX, 10XX XXXX, 10XX XXXX, 10XX XXXX, 10XX XXXX// 1111 10XX, 10XX XXXX, 10XX XXXX, 10XX XXXX, 10XX XXXX// 1111 0XXX, 10XX XXXX, 10XX XXXX, 10XX XXXX if (0xF0 <= utf8Ch){return 0; // 需要UCS-4,未处理}// 1110 XXXX, 10XX XXXX, 10XX XXXXelse if (0xE0 <= utf8Ch && utf8Ch < 0xF0){utf8Pos += 3;wchars += 1;}// 110X XXXX, 10XX XXXXelse if (0xC0 <= utf8Ch && utf8Ch < 0xE0){utf8Pos += 2;wchars += 1;}// 10XX XXXX else if (0x80 <= utf8Ch && utf8Ch < 0xC0){return 0; // 非法情况,Utf8首字节不存在该种编码}// 0XXX XXXXelse{utf8Pos += 1;wchars += 1;}}return wchars;}#ifdef UNICODEstd::wstring StringTool::MbStrToWcStr(const std::string& mbs, const wchar_t* language)#elsestd::wstring StringTool::MbStrToWcStr(const std::string& mbs, const char* language)#endif{std::wstring wcs;#ifdef UNICODEstd::wstring curLocale = _wsetlocale(LC_ALL, NULL);_wsetlocale(LC_ALL, language);#elsestd::string curLocale = setlocale(LC_ALL, NULL);setlocale(LC_ALL, language);#endifint wcLen = mbstowcs(NULL, mbs.c_str(), 0) + 1;if (1 < wcLen){wchar_t* wcBuf = new wchar_t[wcLen];if (NULL != wcBuf){wmemset(wcBuf, 0, wcLen);mbstowcs(wcBuf, mbs.c_str(), wcLen);}wcs = wcBuf;if (NULL != wcBuf){delete[] wcBuf;wcBuf = NULL;}}#ifdef UNICODE_wsetlocale(LC_ALL, curLocale.c_str());#elsesetlocale(LC_ALL, curLocale.c_str());#endifreturn wcs;}#ifdef UNICODEstd::string StringTool::WcStrToMbStr(const std::wstring& wcs, const wchar_t* language)#elsestd::string StringTool::WcStrToMbStr(const std::wstring& wcs, const char* language)#endif{std::string mbs;#ifdef UNICODEstd::wstring curLocale = _wsetlocale(LC_ALL, NULL);_wsetlocale(LC_ALL, language);#elsestd::string curLocale = setlocale(LC_ALL, NULL);setlocale(LC_ALL, language);#endifint mbLen = wcstombs(NULL, wcs.c_str(), 0) + 1;if (1 < mbLen){char* mbBuf = new char[mbLen];if (NULL != mbBuf){memset(mbBuf, 0, mbLen);wcstombs(mbBuf, wcs.c_str(), mbLen);}mbs = mbBuf;if (NULL != mbBuf){delete[] mbBuf;mbBuf = NULL;}}#ifdef UNICODE_wsetlocale(LC_ALL, curLocale.c_str());#elsesetlocale(LC_ALL, curLocale.c_str());#endifreturn mbs;}}}


1 0
原创粉丝点击
热门问题 老师的惩罚 人脸识别 我在镇武司摸鱼那些年 重生之率土为王 我在大康的咸鱼生活 盘龙之生命进化 天生仙种 凡人之先天五行 春回大明朝 姑娘不必设防,我是瞎子 lg背光灯坏了怎么办 苹果6s屏幕变黄怎么办 苹果8背光坏了怎么办 苹果六背光坏了怎么办 苹果5s蓝屏死机怎么办 苹果6s突然黑屏怎么办? 苹果6s经常死机怎么办 苹果6s没有背光怎么办 苹果7白屏死机怎么办 苹果6p屏膜失灵怎么办 苹果7p屏幕卡死怎么办 苹果7p手机死机怎么办 苹果6s死机了怎么办啊 6s死机了关不了怎么办 爱疯7死机了怎么办 苹果11.4系统老死机怎么办 苹果6震动坏了怎么办 苹果6手机死机了怎么办 苹果x耗电太快怎么办 苹果6升级11.3卡怎么办 苹果6splus耗电快怎么办 苹果手机系统升级后反应慢怎么办 苹果8听筒声音小怎么办 苹果6s通话声音小怎么办 苹果手机通讯录丢失了怎么办 苹果换id通讯录没有了怎么办 华为p7 更新重启怎么办 打电话的图标没了怎么办 苹果手机wifi信号弱怎么办 没电脑想装wifi怎么办 阿巴町手表二维码丢了怎么办 儿童手表二维码丢了怎么办 电话手表二维码丢了怎么办 360儿童手表二维码丢了怎么办 喜书郎电话手表二维码丢了怎么办 微信充话费充错号码怎么办 话费冲到了副卡怎么办 冲话费冲错了怎么办 微信支付未到账怎么办 速卖通修补配件到国外怎么办 速卖通流量低怎么办