汉字utf-8转unicode

来源:互联网 发布:单片机课后答案霍孟友 编辑:程序博客网 时间:2024/05/20 18:40
#include <stdio.h>#include <string.h>#include <stdlib.h>#define u8 unsigned char#define u16 unsigned shortstatic int getUtf8ByteNumForWord(u8 firstCh){int num = 0;int temp = 0x80;while (firstCh & temp){num++;temp = temp >> 1;}if (num == 0)return 1;elsereturn num;}int utf8ToUnicode(u8 *utf8, u16 *unicodeKey){int i = 0;int j = 0;u8 firstCh = utf8[0];u8 temp[2] = {0, 0};int num = getUtf8ByteNumForWord(firstCh);switch(num) {case 1:temp[j] = utf8[i];break;case 2:temp[j + 1] = (utf8[i] >> 2) & 0x07;temp[j] = utf8[i+1] | ((utf8[i] << 6) & 0xc0);break;case 3:/* utf-8 >>>>>> unicode */temp[j + 1] = ((utf8[i] & 0x0f) << 4) | ((utf8[i + 1] >> 2) & 0x0f);temp[j] = ((utf8[i + 1] << 6) & 0xc0) | ((utf8[i + 2]) & 0x3f);break;case 4:case 5:case 6:default:break;}memcpy(unicodeKey, temp, 2);return 0;}int main(){int i, a , b ,c;u8 word_utf8[10][3];u8 print_word[3];char *stdin_st = malloc(31);u16 unicode = 0;while(1) {a = b = c = 0;fgets(stdin_st, 31, stdin);while(c < 31) {if (stdin_st[c] == '\n')break;word_utf8[a][b++] = stdin_st[c++];if (b == 3) {a++;b = 0;}}for (i = 0; i < a; i++) {utf8ToUnicode(word_utf8[i], &unicode);memcpy(print_word, word_utf8[i], 3);printf("%s=%d\n", print_word, unicode);}}return 0;}

0 0