UTF-8与GB2312之间的互换

来源:互联网 发布:php模板消息接口 demo 编辑:程序博客网 时间:2024/06/03 11:08

相信一定有不少的程序开发人员时常会遇到字符编码的问题,而这个问题也是非常让人头痛的。因为这些都是潜在的错误,要找出这些错误也得要有这方面的开发经验才行。特别是在处理xml文档时 ,该问题的出现就更加的频繁了,有一次用java写服务器端程序,用vc写客户端与之交互。交互的协议都是用xml写的。结果在通讯时老是发现数据接受不正确。纳闷!于是用抓取网络数据包工具抓取数据,后来才发现原来是java上xml的头是这样的,而vc上默认的是GB2312。所以一遇到汉字数据就不正确了。去网上找资料,这方面的文章好象特别少,针对像这样的问题,下面我介绍一下我自己写的一个转换程序。当然,程序很简单。如果有画蛇添足的地方,还望各位高手一笑了之。

如果您对UTF-8、Unicode、GB2312等还是很陌生的话,请查看http://www.linuxforum.net/books/UTF-8-Unicode.html,我这里就不浪费口舌了。下面介绍一下WinAPI的两个函数:WideCharToMultiByte、MultiByteToWideChar。

函数原型:

01.int WideCharToMultiByte(
02.UINT CodePage, // code page
03.DWORD dwFlags, // performance and mapping flags
04.LPCWSTR lpWideCharStr, // wide-character string
05.int cchWideChar, // number of chars in string
06.LPSTR lpMultiByteStr, // buffer for new string
07.int cbMultiByte, // size of buffer
08.LPCSTR lpDefaultChar, // default for unmappable chars
09.LPBOOL lpUsedDefaultChar // set when default char used
10.); //将宽字符转换成多个窄字符
11. 
12.int MultiByteToWideChar(
13.UINT CodePage, // code page
14.DWORD dwFlags, // character-type options
15.LPCSTR lpMultiByteStr, // string to map
16.int cbMultiByte, // number of bytes in string
17.LPWSTR lpWideCharStr, // wide-character buffer
18.int cchWideChar // size of buffer
19.);//将多个窄字符转换成宽字符

需要用到的一些函数:

01.CString CXmlProcess::HexToBin(CString string)//将16进制数转换成2进制
02.{
03.if( string == "0"return "0000";
04.if( string == "1"return "0001";
05.if( string == "2"return "0010";
06.if( string == "3"return "0011";
07.if( string == "4"return "0100";
08.if( string == "5"return "0101";
09.if( string == "6"return "0110";
10.if( string == "7"return "0111";
11.if( string == "8"return "1000";
12.if( string == "9"return "1001";
13.if( string == "a"return "1010";
14.if( string == "b"return "1011";
15.if( string == "c"return "1100";
16.if( string == "d"return "1101";
17.if( string == "e"return "1110";
18.if( string == "f"return "1111";
19.return "";
20.}
21. 
22. 
23.CString CXmlProcess::BinToHex(CString BinString)//将2进制数转换成16进制
24.{
25.if( BinString == "0000"return "0";
26.if( BinString == "0001"return "1";
27.if( BinString == "0010"return "2";
28.if( BinString == "0011"return "3";
29.if( BinString == "0100"return "4";
30.if( BinString == "0101"return "5";
31.if( BinString == "0110"return "6";
32.if( BinString == "0111"return "7";
33.if( BinString == "1000"return "8";
34.if( BinString == "1001"return "9";
35.if( BinString == "1010"return "a";
36.if( BinString == "1011"return "b";
37.if( BinString == "1100"return "c";
38.if( BinString == "1101"return "d";
39.if( BinString == "1110"return "e";
40.if( BinString == "1111"return "f";
41.return "";
42.}
43. 
44.int CXmlProcess::BinToInt(CString string)//2进制字符数据转换成10进制整型
45.{
46.int len =0;
47.int tempInt = 0;
48.int strInt = 0;
49.for(int i =0 ;i < string.GetLength() ;i ++)
50.{
51.tempInt = 1;
52.strInt = (int)string.GetAt(i)-48;
53.for(int k =0 ;k < 7-i ; k++)
54.{
55.tempInt = 2*tempInt;
56.}
57.len += tempInt*strInt;
58.}
59.return len;
60.}

UTF-8转换成GB2312先把UTF-8转换成Unicode.然后再把Unicode通过函数WideCharToMultiByte转换成GB2312

01.WCHAR* CXmlProcess::UTF_8ToUnicode(char *ustart)  //把UTF-8转换成Unicode
02.{
03.char char_one;
04.char char_two;
05.char char_three;
06.int Hchar;
07.int Lchar;
08.char uchar[2];
09.WCHAR *unicode;
10.CString string_one;
11.CString string_two;
12.CString string_three;
13.CString combiString;
14.char_one = *ustart;
15.char_two = *(ustart+1);
16.char_three = *(ustart+2);
17.string_one.Format("%x",char_one);
18.string_two.Format("%x",char_two);
19.string_three.Format("%x",char_three);
20.string_three = string_three.Right(2);
21.string_two = string_two.Right(2);
22.string_one = string_one.Right(2);
23.string_three = HexToBin(string_three.Left(1))+HexToBin(string_three.Right(1));
24.string_two = HexToBin(string_two.Left(1))+HexToBin(string_two.Right(1));
25.string_one = HexToBin(string_one.Left(1))+HexToBin(string_one.Right(1));
26.combiString = string_one +string_two +string_three;
27.combiString = combiString.Right(20);
28.combiString.Delete(4,2);
29.combiString.Delete(10,2);
30.Hchar = BinToInt(combiString.Left(8));
31.Lchar = BinToInt(combiString.Right(8));
32.uchar[1] = (char)Hchar;
33.uchar[0] = (char)Lchar;
34.unicode = (WCHAR *)uchar;
35.return unicode;
36.}
37. 
38.char * CXmlProcess::UnicodeToGB2312(unsigned short uData)  //把Unicode 转换成 GB2312
39.{
40.char *buffer ;
41.buffer = new char[sizeof(WCHAR)];
42.WideCharToMultiByte(CP_ACP,NULL,&uData,1,buffer,sizeof(WCHAR),NULL,NULL);
43.return buffer;
44.}

GB2312转换成UTF-8先把GB2312通过函数MultiByteToWideChar转换成Unicode.然后再把Unicode通过拆开Unicode后拼装成UTF-8

01.WCHAR * CXmlProcess::Gb2312ToUnicode(char *gbBuffer)  //GB2312 转换成 Unicode
02.{
03.WCHAR *uniChar;
04.uniChar = new WCHAR[1];
05.::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,uniChar,1);
06.return uniChar;
07.}
08.char * CXmlProcess::UnicodeToUTF_8(WCHAR *UniChar) // Unicode 转换成UTF-8
09.{
10.char *buffer;
11.CString strOne;
12.CString strTwo;
13.CString strThree;
14.CString strFour;
15.CString strAnd;
16.buffer = new char[3];
17.int hInt,lInt;
18.hInt = (int)((*UniChar)/256);
19.lInt = (*UniChar)%256;
20.CString string ;
21.string.Format("%x",hInt);
22.strTwo = HexToBin(string.Right(1));
23.string = string.Left(string.GetLength() - 1);
24.strOne = HexToBin(string.Right(1));
25.string.Format("%x",lInt);
26.strFour = HexToBin(string.Right(1));
27.string = string.Left(string.GetLength() -1);
28.strThree = HexToBin(string.Right(1));
29.strAnd = strOne +strTwo + strThree + strFour;
30.strAnd.Insert(0,"1110");
31.strAnd.Insert(8,"10");
32.strAnd.Insert(16,"10");
33.strOne = strAnd.Left(8);
34.strAnd = strAnd.Right(16);
35.strTwo = strAnd.Left(8);
36.strThree = strAnd.Right(8);
37.*buffer = (char)BinToInt(strOne);
38.buffer[1] = (char)BinToInt(strTwo);
39.buffer[2] = (char)BinToInt(strThree);
40.return buffer;
41.}

例子:将GB2312转换成UTF-8的调用:

view source
print?
01.char * CXmlProcess::translateCharToUTF_8(char *xmlStream, int len)
02.{
03.int newCharLen =0 ;
04.int oldCharLen = 0;
05.int revCharLen = len;
06.char* newCharBuffer;
07.char* finalCharBuffer;
08.char *buffer ;
09.CString string;
10.buffer  = new char[sizeof(WCHAR)];
11.newCharBuffer = new char[int(1.5*revCharLen)];//设置最大的一个缓冲区
12.while(oldCharLen < revCharLen)
13.{
14.if( *(xmlStream + oldCharLen) >= 0)
15.{
16.*(newCharBuffer+newCharLen) = *(xmlStream +oldCharLen);
17.newCharLen ++;
18.oldCharLen ++;
19.}//如果是英文直接复制就可以
20.else
21.{
22.WCHAR *pbuffer = this->Gb2312ToUnicode(xmlStream+oldCharLen);
23.buffer = this->UnicodeToUTF_8(pbuffer);
24.*(newCharBuffer+newCharLen) = *buffer;
25.*(newCharBuffer +newCharLen +1) = *(buffer + 1);
26.*(newCharBuffer +newCharLen +2) = *(buffer + 2);
27.newCharLen += 3;
28.oldCharLen += 2;
29.}
30.}
31.newCharBuffer[newCharLen] = ''\0'';
32.CString string1 ;
33.string1.Format("%s",newCharBuffer);
34.finalCharBuffer = new char[newCharLen+1];
35.memcpy(finalCharBuffer,newCharBuffer,newCharLen+1);
36.return finalCharBuffer;
37.}

程序都非常的简单,由于实在太穷。已经吃了两天的方便面。所以现在头昏,程序的详细说明就不写了。程序员到了像我这样的地步也真是少见。工资低没有办法。哎!!!!