UTF8转Unicode

来源:互联网 发布:matlab最优化算法例题 编辑:程序博客网 时间:2024/05/22 08:25

int GetUtf8ByteNumForWord(char firstCh){    int nRet=0;    __asm    {            movzx ecx,byte ptr[firstCh]        and ecx,0xE0        jz  done        test ecx,0x80        jnz lbm        mov nRet,1        jmp donelbm:        cmp cl,0xE0        jz  lb3        cmp cl,0x0C        jz  lb2        jmp donelb3:        mov nRet,3        jmp donelb2:        mov nRet,2done:    }    return nRet;}void Utf8ToUnicode(const char* utf8, int len, wchar_t *unicode){    int i = 0;    int j = 0;    char* temp=(char*)unicode;    //循环解析    while (i < len)    {           int nByteNum=GetUtf8ByteNumForWord(utf8[i]);        if (nByteNum==0)        {            return;        }        switch(nByteNum)        {        case 1:            temp[j] = utf8[i];            temp[j+1]=0;            break;        case 2:            temp[j] = utf8[i];            temp[j + 1] = utf8[i + 1];            break;        case 3:            //这里就开始进行UTF8->Unicode            temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F);            temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F);            break;        default:            break;            }        j+=2;        i+=nByteNum;    }    temp[j]=0;    temp[j+1]=0;}

测试代码如下:

std::ifstream fin("debug\\Test.txt");const unsigned int L_MAX_LINE=1024;char utf8[L_MAX_LINE];wchar_t unicode[L_MAX_LINE];while(fin.getline(utf8,L_MAX_LINE)){    Utf8ToUnicode(utf8,strlen(utf8),unicode);    MessageBoxW(0,unicode,0,0);}fin.close();

原创粉丝点击