C/C++ 字符编码的转换(ut8、gb2312)
来源:互联网 发布:王刚吐槽大会知乎 编辑:程序博客网 时间:2024/06/06 20:13
//这是个类strCoding (strCoding.h文件)
#pragma once
#include<iostream>
#include<string>
#include<windows.h>
usingnamespace std;
class strCoding
{
public:
strCoding(void);
~strCoding(void);
void UTF_8ToGB2312(string&pOut, char *pText,int pLen);//utf_8转为gb2312
void GB2312ToUTF_8(string& pOut,char*pText, int pLen); //gb2312 转utf_8
string UrlGB2312(char* str); //urlgb2312编码
string UrlUTF8(char* str); //urlutf8 编码
string UrlUTF8Decode(string str); //urlutf8解码
string UrlGB2312Decode(string str); //urlgb2312解码
private:
void Gb2312ToUnicode(WCHAR* pOut,char*gbBuffer);
void UTF_8ToUnicode(WCHAR* pOut,char*pText);
void UnicodeToUTF_8(char* pOut,WCHAR* pText);
void UnicodeToGB2312(char* pOut,WCHAR uData);
char CharToInt(char ch);
char StrToBin(char*str);
};
#pragma once
#include<iostream>
#include<string>
#include<windows.h>
usingnamespace std;
class strCoding
{
public:
strCoding(void);
~strCoding(void);
void UTF_8ToGB2312(string&pOut, char *pText,int pLen);//utf_8转为gb2312
void GB2312ToUTF_8(string& pOut,char*pText, int pLen); //gb2312 转utf_8
string UrlGB2312(char* str); //urlgb2312编码
string UrlUTF8(char* str); //urlutf8 编码
string UrlUTF8Decode(string str); //urlutf8解码
string UrlGB2312Decode(string str); //urlgb2312解码
private:
void Gb2312ToUnicode(WCHAR* pOut,char*gbBuffer);
void UTF_8ToUnicode(WCHAR* pOut,char*pText);
void UnicodeToUTF_8(char* pOut,WCHAR* pText);
void UnicodeToGB2312(char* pOut,WCHAR uData);
char CharToInt(char ch);
char StrToBin(char*str);
};
//这是个类strCoding (strCoding.cpp文件)
#include"StdAfx.h"
#include".\strcoding.h"
strCoding::strCoding(void)
{
}
strCoding::~strCoding(void)
{
}
void strCoding::Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer)
{
::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1);
return;
}
void strCoding::UTF_8ToUnicode(WCHAR* pOut,char *pText)
{
char* uchar= (char*)pOut;
uchar[1]= ((pText[0]& 0x0F)<< 4)+ ((pText[1]>> 2)& 0x0F);
uchar[0]= ((pText[1]& 0x03)<< 6)+ (pText[2]& 0x3F);
return;
}
void strCoding::UnicodeToUTF_8(char* pOut,WCHAR* pText)
{
// 注意 WCHAR高低字的顺序,低字节在前,高字节在后
char* pchar= (char*)pText;
pOut[0]= (0xE0| ((pchar[1]& 0xF0)>> 4));
pOut[1]= (0x80| ((pchar[1]& 0x0F)<< 2))+ ((pchar[0]& 0xC0)>> 6);
pOut[2]= (0x80| (pchar[0]& 0x3F));
return;
}
void strCoding::UnicodeToGB2312(char* pOut,WCHAR uData)
{
WideCharToMultiByte(CP_ACP,NULL,&uData,1,pOut,sizeof(WCHAR),NULL,NULL);
return;
}
//做为解Url使用
char strCoding:: CharToInt(char ch){
if(ch>='0'&& ch<='9')return (char)(ch-'0');
if(ch>='a'&& ch<='f')return (char)(ch-'a'+10);
if(ch>='A'&& ch<='F')return (char)(ch-'A'+10);
return -1;
}
char strCoding::StrToBin(char*str){
char tempWord[2];
char chn;
tempWord[0]= CharToInt(str[0]); //make the B to 11 -- 00001011
tempWord[1]= CharToInt(str[1]); //make the 0 to 0 -- 00000000
chn= (tempWord[0]<< 4)| tempWord[1]; //to change the BO to 10110000
return chn;
}
//UTF_8 转gb2312
void strCoding::UTF_8ToGB2312(string&pOut, char *pText,int pLen)
{
char buf[4];
char* rst= newchar[pLen + (pLen >>2) +2];
memset(buf,0,4);
memset(rst,0,pLen+ (pLen >> 2)+ 2);
int i =0;
int j = 0;
while(i < pLen)
{
if(*(pText+ i) >= 0)
{
rst[j++]= pText[i++];
}
else
{
WCHAR Wtemp;
UTF_8ToUnicode(&Wtemp,pText+ i);
UnicodeToGB2312(buf,Wtemp);
unsignedshort int tmp =0;
tmp= rst[j] = buf[0];
tmp= rst[j+1]= buf[1];
tmp= rst[j+2]= buf[2];
//newBuf[j] = Ctemp[0];
//newBuf[j + 1] = Ctemp[1];
i+= 3;
j+= 2;
}
}
rst[j]='\0';
pOut= rst;
delete []rst;
}
//GB2312 转为 UTF-8
void strCoding::GB2312ToUTF_8(string& pOut,char *pText, int pLen)
{
char buf[4];
memset(buf,0,4);
pOut.clear();
int i = 0;
while(i < pLen)
{
//如果是英文直接复制就可以
if( pText[i] >= 0)
{
char asciistr[2]={0};
asciistr[0]= (pText[i++]);
pOut.append(asciistr);
}
else
{
WCHAR pbuffer;
Gb2312ToUnicode(&pbuffer,pText+i);
UnicodeToUTF_8(buf,&pbuffer);
pOut.append(buf);
i+= 2;
}
}
return;
}
//把str编码为网页中的 GB2312 url encode ,英文不变,汉字双字节 如%3D%AE%88
string strCoding::UrlGB2312(char* str)
{
string dd;
size_t len= strlen(str);
for (size_t i=0;i<len;i++)
{
if(isalnum((BYTE)str[i]))
{
char tempbuff[2];
sprintf(tempbuff,"%c",str[i]);
dd.append(tempbuff);
}
else if (isspace((BYTE)str[i]))
{
dd.append("+");
}
else
{
char tempbuff[4];
sprintf(tempbuff,"%%%X%X",((BYTE*)str)[i]>>4,((BYTE*)str)[i]%16);
dd.append(tempbuff);
}
}
return dd;
}
//把str编码为网页中的 UTF-8 url encode ,英文不变,汉字三字节 如%3D%AE%88
string strCoding::UrlUTF8(char* str)
{
string tt;
string dd;
GB2312ToUTF_8(tt,str,(int)strlen(str));
size_t len=tt.length();
for (size_t i=0;i<len;i++)
{
if(isalnum((BYTE)tt.at(i)))
{
char tempbuff[2]={0};
sprintf(tempbuff,"%c",(BYTE)tt.at(i));
dd.append(tempbuff);
}
else if (isspace((BYTE)tt.at(i)))
{
dd.append("+");
}
else
{
char tempbuff[4];
sprintf(tempbuff,"%%%X%X",((BYTE)tt.at(i))>>4,((BYTE)tt.at(i))%16);
dd.append(tempbuff);
}
}
return dd;
}
//把url GB2312解码
string strCoding::UrlGB2312Decode(string str)
{
string output="";
char tmp[2];
int i=0,idx=0,ndx,len=str.length();
while(i<len){
if(str[i]=='%'){
tmp[0]=str[i+1];
tmp[1]=str[i+2];
output+= StrToBin(tmp);
i=i+3;
}
else if(str[i]=='+'){
output+='';
i++;
}
else{
output+=str[i];
i++;
}
}
return output;
}
//把url utf8解码
string strCoding::UrlUTF8Decode(string str)
{
string output="";
string temp =UrlGB2312Decode(str);//
UTF_8ToGB2312(output,(char*)temp.data(),strlen(temp.data()));
return output;
}
#include"StdAfx.h"
#include".\strcoding.h"
strCoding::strCoding(void)
{
}
strCoding::~strCoding(void)
{
}
void strCoding::Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer)
{
::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1);
return;
}
void strCoding::UTF_8ToUnicode(WCHAR* pOut,char *pText)
{
char* uchar= (char*)pOut;
uchar[1]= ((pText[0]& 0x0F)<< 4)+ ((pText[1]>> 2)& 0x0F);
uchar[0]= ((pText[1]& 0x03)<< 6)+ (pText[2]& 0x3F);
return;
}
void strCoding::UnicodeToUTF_8(char* pOut,WCHAR* pText)
{
// 注意 WCHAR高低字的顺序,低字节在前,高字节在后
char* pchar= (char*)pText;
pOut[0]= (0xE0| ((pchar[1]& 0xF0)>> 4));
pOut[1]= (0x80| ((pchar[1]& 0x0F)<< 2))+ ((pchar[0]& 0xC0)>> 6);
pOut[2]= (0x80| (pchar[0]& 0x3F));
return;
}
void strCoding::UnicodeToGB2312(char* pOut,WCHAR uData)
{
WideCharToMultiByte(CP_ACP,NULL,&uData,1,pOut,sizeof(WCHAR),NULL,NULL);
return;
}
//做为解Url使用
char strCoding:: CharToInt(char ch){
if(ch>='0'&& ch<='9')return (char)(ch-'0');
if(ch>='a'&& ch<='f')return (char)(ch-'a'+10);
if(ch>='A'&& ch<='F')return (char)(ch-'A'+10);
return -1;
}
char strCoding::StrToBin(char*str){
char tempWord[2];
char chn;
tempWord[0]= CharToInt(str[0]); //make the B to 11 -- 00001011
tempWord[1]= CharToInt(str[1]); //make the 0 to 0 -- 00000000
chn= (tempWord[0]<< 4)| tempWord[1]; //to change the BO to 10110000
return chn;
}
//UTF_8 转gb2312
void strCoding::UTF_8ToGB2312(string&pOut, char *pText,int pLen)
{
char buf[4];
char* rst= newchar[pLen + (pLen >>2) +2];
memset(buf,0,4);
memset(rst,0,pLen+ (pLen >> 2)+ 2);
int i =0;
int j = 0;
while(i < pLen)
{
if(*(pText+ i) >= 0)
{
rst[j++]= pText[i++];
}
else
{
WCHAR Wtemp;
UTF_8ToUnicode(&Wtemp,pText+ i);
UnicodeToGB2312(buf,Wtemp);
unsignedshort int tmp =0;
tmp= rst[j] = buf[0];
tmp= rst[j+1]= buf[1];
tmp= rst[j+2]= buf[2];
//newBuf[j] = Ctemp[0];
//newBuf[j + 1] = Ctemp[1];
i+= 3;
j+= 2;
}
}
rst[j]='\0';
pOut= rst;
delete []rst;
}
//GB2312 转为 UTF-8
void strCoding::GB2312ToUTF_8(string& pOut,char *pText, int pLen)
{
char buf[4];
memset(buf,0,4);
pOut.clear();
int i = 0;
while(i < pLen)
{
//如果是英文直接复制就可以
if( pText[i] >= 0)
{
char asciistr[2]={0};
asciistr[0]= (pText[i++]);
pOut.append(asciistr);
}
else
{
WCHAR pbuffer;
Gb2312ToUnicode(&pbuffer,pText+i);
UnicodeToUTF_8(buf,&pbuffer);
pOut.append(buf);
i+= 2;
}
}
return;
}
//把str编码为网页中的 GB2312 url encode ,英文不变,汉字双字节 如%3D%AE%88
string strCoding::UrlGB2312(char* str)
{
string dd;
size_t len= strlen(str);
for (size_t i=0;i<len;i++)
{
if(isalnum((BYTE)str[i]))
{
char tempbuff[2];
sprintf(tempbuff,"%c",str[i]);
dd.append(tempbuff);
}
else if (isspace((BYTE)str[i]))
{
dd.append("+");
}
else
{
char tempbuff[4];
sprintf(tempbuff,"%%%X%X",((BYTE*)str)[i]>>4,((BYTE*)str)[i]%16);
dd.append(tempbuff);
}
}
return dd;
}
//把str编码为网页中的 UTF-8 url encode ,英文不变,汉字三字节 如%3D%AE%88
string strCoding::UrlUTF8(char* str)
{
string tt;
string dd;
GB2312ToUTF_8(tt,str,(int)strlen(str));
size_t len=tt.length();
for (size_t i=0;i<len;i++)
{
if(isalnum((BYTE)tt.at(i)))
{
char tempbuff[2]={0};
sprintf(tempbuff,"%c",(BYTE)tt.at(i));
dd.append(tempbuff);
}
else if (isspace((BYTE)tt.at(i)))
{
dd.append("+");
}
else
{
char tempbuff[4];
sprintf(tempbuff,"%%%X%X",((BYTE)tt.at(i))>>4,((BYTE)tt.at(i))%16);
dd.append(tempbuff);
}
}
return dd;
}
//把url GB2312解码
string strCoding::UrlGB2312Decode(string str)
{
string output="";
char tmp[2];
int i=0,idx=0,ndx,len=str.length();
while(i<len){
if(str[i]=='%'){
tmp[0]=str[i+1];
tmp[1]=str[i+2];
output+= StrToBin(tmp);
i=i+3;
}
else if(str[i]=='+'){
output+='';
i++;
}
else{
output+=str[i];
i++;
}
}
return output;
}
//把url utf8解码
string strCoding::UrlUTF8Decode(string str)
{
string output="";
string temp =UrlGB2312Decode(str);//
UTF_8ToGB2312(output,(char*)temp.data(),strlen(temp.data()));
return output;
}
//test
#include"stdafx.h"
#include"strCoding.h"
usingnamespace std;
int main()
{
strCoding cfm;
string keyword="大家好,欢迎你";
string Temp="";
string Output="";
//把关键字做url的utf8编码
Temp= cfm.UrlUTF8((char*)keyword.data());
cout<<Temp<<endl;
//把url的utf8编码的结果解码
Temp=cfm.UrlUTF8Decode(Temp);
cout<<Temp<<endl;
//把关键字做url的gb2312编码
Temp=cfm.UrlGB2312((char*)keyword.data());
cout<<Temp<<endl;
//把url的gb2312编码的结果解码
Temp=cfm.UrlGB2312Decode(Temp);
cout<<Temp<<endl;
//把关键字GB2312转UTF_8
cfm.GB2312ToUTF_8(Output,(char*)keyword.data(),strlen(keyword.data()));
cout<<Output<<endl;
//把GB2312转UTF_8转为中文
cfm.UTF_8ToGB2312(Temp,(char*)Output.data(),strlen(Output.data()));
cout<<Temp<<endl;
//system("pasue");
getchar();
return 0;
//
}
#include"stdafx.h"
#include"strCoding.h"
usingnamespace std;
int main()
{
strCoding cfm;
string keyword="大家好,欢迎你";
string Temp="";
string Output="";
//把关键字做url的utf8编码
Temp= cfm.UrlUTF8((char*)keyword.data());
cout<<Temp<<endl;
//把url的utf8编码的结果解码
Temp=cfm.UrlUTF8Decode(Temp);
cout<<Temp<<endl;
//把关键字做url的gb2312编码
Temp=cfm.UrlGB2312((char*)keyword.data());
cout<<Temp<<endl;
//把url的gb2312编码的结果解码
Temp=cfm.UrlGB2312Decode(Temp);
cout<<Temp<<endl;
//把关键字GB2312转UTF_8
cfm.GB2312ToUTF_8(Output,(char*)keyword.data(),strlen(keyword.data()));
cout<<Output<<endl;
//把GB2312转UTF_8转为中文
cfm.UTF_8ToGB2312(Temp,(char*)Output.data(),strlen(Output.data()));
cout<<Temp<<endl;
//system("pasue");
getchar();
return 0;
//
}
- [收藏]C/C++ 字符编码的转换(ut8、gb2312)
- C/C++ 字符编码的转换(ut8、gb2312)
- C/C++ 字符编码的转换(ut8、gb2312)
- C/C++ 字符编码的转换(ut8、gb2312)
- C/C++ 字符编码的转换(ut8、gb2312)
- C+++Utf8字符转换Gb2312编码,解决TinyXml中文乱码
- 字符编码格式串转换之C语言版(UTF-8, Unicode, GB2312)
- C/C++ 字符编码的转换
- 【C++】UTF-8字符和GB2312字符相互转换函数
- C/C++,字符串的UTF-8与GBK(或GB2312)编码转换
- C/C++,字符串的UTF-8与GBK(或GB2312)编码转换
- Objective C - 字符编码转码 成 GB2312:
- GB2312中汉字字符的编码在C语言中的输出测试
- c#实现GB2312和UTF8字符编码方式的转换!
- Android的字符编码转换问题,Unicode,GB2312,UTF8等
- Android的字符编码转换问题,Unicode,GB2312,UTF8等
- C/C++ GB2312,UTF编码互相准确转换
- C/C++ GB2312,UTF编码互相准确转换
- 15个你可能不知道的开源云平台
- Flex布局基础
- Flex点滴(更新中)
- CUnit使用
- 批处理获取当前路径
- C/C++ 字符编码的转换(ut8、gb2312)
- MyEclipse快捷键整理
- 记录trim
- Interview: Ingo Molnar
- Web设计师必备的10款最佳排版工具
- 【春节献礼】Ubuntu 12.10 安全写入NTFS,远离天杀windows chkdisk
- 数据挖掘常用算法及实现(http://bbs.chinakdd.com/forum.php?mod=viewthread&tid=4055&extra=page%3D1)
- SSH与工作流Activiti的集成开发
- 转-Linux 操作指导专题 华为3Com技术有限公司