utf-8转换GB3212或GBK

来源:互联网 发布:时时彩 彩票软件 源码 编辑:程序博客网 时间:2024/05/22 03:08

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////////////
// CodeConvert.h: interface for the CCodeConvert class.
//
//////////////////////////////////////////////////////////////////////

#if !defined(AFX_CODECONVERT_H__9ECD30AF_5D65_436C_95E4_E412B19529D2__INCLUDED_)
#define AFX_CODECONVERT_H__9ECD30AF_5D65_436C_95E4_E412B19529D2__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

//实现不同字符编码之间的转换
class CCodeConvert 
{
public:
 void ConvertGBKToUtf8(CString& strGBK);
 static void ConvertUtf8ToGBK(CString& strUtf8);
 static enum {
  UTF8_TO_GB2312,
  GB2312_TO_UTF8,
 };
 static void Convert(LPCTSTR srcfile, LPCTSTR destfile, DWORD dwFlag=UTF8_TO_GB2312);
 static void UTF_8ToGB2312(string& pOut,char *pText, int pLen);
 static void GB2312ToUTF_8(string& pOut,char *pText, int pLen);
 // Unicode 转换成UTF-8
 static void UnicodeToUTF_8(char* pOut,WCHAR* pText);
 // GB2312 转换成 Unicode
 static void Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer);
 // 把Unicode 转换成 GB2312
 static void UnicodeToGB2312(char* pOut,unsigned short uData);
 // 把UTF-8转换成Unicode
 static void UTF_8ToUnicode(WCHAR* pOut,char* pText);
 
 CCodeConvert();
 virtual ~CCodeConvert();

};

#endif // !defined(AFX_CODECONVERT_H__9ECD30AF_5D65_436C_95E4_E412B19529D2__INCLUDED_)

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////////////
// CodeConvert.cpp: implementation of the CCodeConvert class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "popsvr.h"
#include "CodeConvert.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CCodeConvert::CCodeConvert()
{

}

CCodeConvert::~CCodeConvert()
{

}


void CCodeConvert::UTF_8ToUnicode(WCHAR* pOut,char *pText)
{
 char* uchar = (char *)pOut;
 
 uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
 uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);

 return;
}

void CCodeConvert::UnicodeToGB2312(char* pOut,unsigned short uData)
{
 //WideCharToMultiByte(CP_ACP,NULL,&uData,1,pOut,sizeof(WCHAR),NULL,NULL);
 WideCharToMultiByte(CP_ACP,WC_COMPOSITECHECK,&uData,1,pOut,sizeof(WCHAR),NULL,NULL);
 return;
}

void CCodeConvert::Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer)
{
 ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1);
 return;
}

void CCodeConvert::UnicodeToUTF_8(char* pOut,WCHAR* pText)
{
 // 注意 WCHAR高低字的顺序,低字节在前,高字节在后
 char* pchar = (char *)pText;

 pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));
 pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[1] & 0xC0) >> 6);
 pOut[2] = (0x80 | (pchar[0] & 0x3F));
 
 return;
}
//  GB2312 =>UTF-8
void CCodeConvert::GB2312ToUTF_8(string& pOut,char *pText, int pLen)
{
 char buf[4];
 char* rst = new char[pLen + (pLen >> 2) + 2];
 
 memset(buf,0,4);
 memset(rst,0,pLen + (pLen >> 2) + 2);
 
 int i = 0;
 int j = 0; 
 while(i < pLen)
 {
  //如果是英文直接复制就可以
  if( *(pText + i) >= 0)
  {
   rst[j++] = pText[i++];
  }
  else
  {
   WCHAR pbuffer;
   Gb2312ToUnicode(&pbuffer,pText+i);
   
   UnicodeToUTF_8(buf,&pbuffer);
   
   unsigned short int tmp = 0;
   tmp = rst[j] = buf[0];
   tmp = rst[j+1] = buf[1];
   tmp = rst[j+2] = buf[2];
   
   
   j += 3;
   i += 2;
  }
 }
 rst[j] = '/0';

 //返回结果
 pOut = rst;  
 delete []rst; 
 
 return;
}
//UTF-8 => GB2312
void CCodeConvert::UTF_8ToGB2312(string &pOut, char *pText, int pLen)
{
 TRACE("/r/nCCodeConvert::UTF_8ToGB2312");
 char * newBuf = new char[pLen+1];
  newBuf[pLen]=0x00;

  char Ctemp[4];
  memset(Ctemp,0,4);

  int i =0;
  int j = 0;
  while(i < pLen)
  {
   if(pText[i] > 0)
  {
   newBuf[j++] = pText[i++];   
  }
  else                
  {
   WCHAR Wtemp;
   UTF_8ToUnicode(&Wtemp,pText + i);
   UnicodeToGB2312(Ctemp,Wtemp);
   newBuf[j] = Ctemp[0];
   newBuf[j + 1] = Ctemp[1];

   i += 3;   
   j += 2;  
  }
  }//end while
  newBuf[j] = '/0';
  pOut = newBuf;
  delete []newBuf;
  return;
  //////////////////////////////////////////////////////////////////////////
}
//UTF-8 => GBK
void CCodeConvert::ConvertUtf8ToGBK(CString& strUtf8)
{
 TRACE("/r/nCCodeConvert::UTF_8ToGBK");
 
 int len=MultiByteToWideChar(CP_UTF8, 0, (LPCTSTR)strUtf8, -1, NULL,0);
    unsigned short * wszGBK = new unsigned short[len+1];
    memset(wszGBK, 0, len * 2 + 2);
    MultiByteToWideChar(CP_UTF8, 0, (LPCTSTR)strUtf8, -1, wszGBK, len);

    len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);
    char *szGBK=new char[len + 1];
    memset(szGBK, 0, len + 1);
    WideCharToMultiByte (CP_ACP, 0, wszGBK, -1, szGBK, len, NULL,NULL);

    strUtf8 = szGBK;
    delete[] szGBK;
    delete[] wszGBK;
}

void CCodeConvert::Convert(LPCTSTR srcfile, LPCTSTR destfile, DWORD dwFlag)
{
 TRACE("/r/nCCodeConvert::Convert");
 CFile file(srcfile,CFile::modeReadWrite);
 DWORD nlen = file.GetLength();
 char *s = new char[nlen+1]; 
 s[nlen]=0x00;
 file.ReadHuge(s,nlen);
 file.Close();
 
 switch(dwFlag)
 {
 case 0: //UTF-8 => GB2312
  { 
   CFile newfile(destfile,CFile::modeCreate | CFile::modeWrite);
   string str;
   UTF_8ToGB2312(str,s,nlen);
   newfile.WriteHuge(str.c_str(),str.length());
   newfile.Close();
  }
  break;
 case 1://UTF-8 => GBK
  {
   CFile newfile(destfile,CFile::modeCreate | CFile::modeWrite);
   CString strBuf(' ',nlen);
   strBuf.Format("%s",s);
   ConvertUtf8ToGBK(strBuf);
   newfile.WriteHuge(strBuf.GetBuffer(0),strBuf.GetLength());
   newfile.Close();
  }
  break;
 }

 delete[] s;
}


//GBK => UTF-8
void CCodeConvert::ConvertGBKToUtf8(CString &strGBK)
{
    int len=MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK, -1, NULL,0);
    unsigned short * wszUtf8 = new unsigned short[len+1];
    memset(wszUtf8, 0, len * 2 + 2);
    MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK, -1, wszUtf8, len);

    len = WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, NULL, 0, NULL, NULL);
    char *szUtf8=new char[len + 1];
    memset(szUtf8, 0, len + 1);
    WideCharToMultiByte (CP_UTF8, 0, wszUtf8, -1, szUtf8, len, NULL,NULL);

    strGBK = szUtf8;
    delete[] szUtf8;
    delete[] wszUtf8;
}