windows API实现中文中字符串与GBK、Unicode、UTF-8三种编码互转

来源:互联网 发布:sdk2000数据无效 编辑:程序博客网 时间:2024/05/20 11:51
#include <iostream>#include <string>#include <Windows.h>using namespace std;//gbk转UTF-8string GbkToUtf8(const std::string& strGbk)//传入的strGbk是GBK编码{//gbk转unicodeint len = MultiByteToWideChar(CP_ACP, 0, strGbk.c_str(), -1, NULL, 0);wchar_t *strUnicode = new wchar_t[len];wmemset(strUnicode, 0, len);MultiByteToWideChar(CP_ACP, 0, strGbk.c_str(), -1, strUnicode, len);//unicode转UTF-8len = WideCharToMultiByte(CP_UTF8, 0, strUnicode, -1, NULL, 0, NULL, NULL);char * strUtf8 = new char[len];WideCharToMultiByte(CP_UTF8, 0, strUnicode, -1, strUtf8, len, NULL, NULL);std::string strTemp(strUtf8);//此时的strTemp是UTF-8编码delete[]strUnicode;delete[]strUtf8;strUnicode = NULL;strUtf8 = NULL;return strTemp;}//UTF-8转gbkstring Utf8ToGbk(const std::string& strUtf8)//传入的strUtf8是UTF-8编码{//UTF-8转unicodeint len = MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, NULL, 0);wchar_t * strUnicode = new wchar_t[len];//len = 2wmemset(strUnicode, 0, len);MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, strUnicode, len);//unicode转gbklen = WideCharToMultiByte(CP_ACP, 0, strUnicode, -1, NULL, 0, NULL, NULL);char *strGbk = new char[len];//len=3 本来为2,但是char*后面自动加上了\0memset(strGbk, 0, len);WideCharToMultiByte(CP_ACP,0, strUnicode, -1, strGbk, len, NULL, NULL);std::string strTemp(strGbk);//此时的strTemp是GBK编码delete[]strUnicode;delete[]strGbk;strUnicode = NULL;strGbk = NULL;return strTemp;}//gbk转unicode (下面的例子没用到)wstring GbkToUnicode(const std::string& strGbk)//返回值是wstring{int len = MultiByteToWideChar(CP_ACP, 0, strGbk.c_str(), -1, NULL, 0);wchar_t *strUnicode = new wchar_t[len];wmemset(strUnicode, 0, len);MultiByteToWideChar(CP_ACP, 0, strGbk.c_str(), -1, strUnicode, len);std::wstring strTemp(strUnicode);//此时的strTemp是Unicode编码delete[]strUnicode;strUnicode = NULL;return strTemp;}//Unicode转gbkstring UnicodeToGbk (const std::wstring& strUnicode)//参数是wstring{int len = WideCharToMultiByte(CP_ACP, 0, strUnicode.c_str(), -1, NULL, 0, NULL, NULL);char *strGbk = new char[len];//len=3 本来为2,但是char*后面自动加上了\0memset(strGbk, 0, len);WideCharToMultiByte(CP_ACP,0,strUnicode.c_str(), -1, strGbk, len, NULL, NULL);std::string strTemp(strGbk);//此时的strTemp是GBK编码delete[]strGbk;strGbk = NULL;return strTemp;}int main(){//1、ANSI/GBK编码string strGbk = "我";int num = strGbk.size();//获取两个字符数,也是我字所占的字节数unsigned char* p = (unsigned char*)strGbk.c_str();    for (int i = 0; i < num; i++)    {    printf("%0x", *p);    p++;    }  //输出ced2 所以我的GBK编码是0xced2printf("\n");   char gbk[] = {0xce, 0xd2, 0x00}; //加上0x00字符串结束符,不会输出乱码cout<<gbk<<endl;//输出汉字我//2、unicodde编码//方法一//wchar_t str = 0x6211;  //wcout.imbue(locale("chs")); //wcout << str << endl;//输出汉字我//wchar_t c=L'我';//cout << hex << (short)c << endl<<endl;//输出unicodde编码 6211//方法二:wstring strUnicode = L"我";//转成unicode编码num = strUnicode.size()*2;//乘以2,才是我所占的字节数p = (unsigned char*)strUnicode.c_str();    for (int i = 0; i < num; i++)    {    printf("%0x", *p);    p++;    }  //输出1162 因为默认是小端模式,所以我的unicode编码是0x6211printf("\n");   wchar_t s[2] = {0x6211, 0x00}; //加上0x00字符串结束符,不会输出乱码wstring str =(wchar_t*)s;cout<<UnicodeToGbk(str)<<endl;//需要先将unicode字符串转成gbk之后才能用cout输出//3、UTF-8编码string strUtf8  = GbkToUtf8("我");//转成utf8编码num = strUtf8.size();//num=3p = (unsigned char*)strUtf8.c_str();    for (int i = 0; i < num; i++)    {    printf("%0x", *p);    p++;    }  //输出e68891printf("\n");   char utf8[] = {0xe6, 0x88, 0x91,0x00}; //加上0x00字符串结束符,不会输出乱码cout<<Utf8ToGbk(utf8)<<endl;//需要先将utf8字符串转成gbk之后才能用cout输出return 0;}

0 0
原创粉丝点击