ifstream打印汉字乱码问题 UTF-8转GB2312

来源：互联网发布：内存条js 编辑：程序博客网时间：2024/05/22 14:18
// ConsoleApplication2.cpp : 定义控制台应用程序的入口点。//#include "stdafx.h"#include <windows.h>#include <stdio.h>#include <tchar.h>#include <iostream>#include <fstream>using namespace std;WCHAR * UTF_8ToUnicode(char *pText, WCHAR &unicode){/*UTF-8是一种多字节编码的字符集，表示一个Unicode字符时，它可以是1个至多个字节，在表示上有规律：1字节：0xxxxxxx2字节：110xxxxx 10xxxxxx3字节：1110xxxx 10xxxxxx 10xxxxxx4字节：11110xxx 10xxxxxx 10xxxxxx 10xxxxxx*/char *uchar = (char *)&unicode;uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);return &unicode;}char * UnicodeToGB2312(WCHAR uData, char buffer[2]){WideCharToMultiByte(CP_ACP, NULL, &uData, 1, buffer, sizeof(WCHAR), NULL, NULL);return buffer;}char * TranslateUTF8ToGB(char *str, size_t len){char * newCharBuffer = new char[len];int index = 0;int nCBIndex = 0;WCHAR wTemp = 0;char cTemp[2] = " ";while (index < len){if (str[index] == 0)break;else if (str[index] > 0)  // 如果是GB2312的字符  {newCharBuffer[nCBIndex] = str[index];    //直接复制  index += 1;    //源字符串偏移量1  nCBIndex += 1;   //目标字符串偏移量1  }else      //如果是UTF-8的字符  {UTF_8ToUnicode(str + index, wTemp);   //先把UTF-8转成Unicode  UnicodeToGB2312(wTemp, &newCharBuffer[nCBIndex]); //再把Unicode 转成 GB2312  index += 3;    //源字符串偏移量3  nCBIndex += 2;   //目标字符串偏移量2  因为一个中文UTF-8占3个字节，GB2312占两个字节  }}newCharBuffer[nCBIndex] = '\0'; //结束符  strcpy(str, newCharBuffer);delete newCharBuffer;  //避免内存泄漏，这是对源代码的稍许修改  newCharBuffer = NULL;return str;}int main(){ifstream in("ReadMe.txt", ios::in);if (!in.is_open()){cout << "open fail..." << endl;in.close();return 0;}char readBuff[1024];while (!in.eof()){in.getline(readBuff, 1024);TranslateUTF8ToGB(readBuff, 1024);cout << readBuff << endl;}in.close();    return 0;}
阅读全文
0 0