纯c 实现utf8与ucs2 相互转换
来源:互联网 发布:淘宝 司法拍卖 房产 编辑:程序博客网 时间:2024/06/05 07:21
网上很多代码,好多都是单个字符转码,自己整理了一下分享一下,望指正
UCS2:2字节unicode
utf8:多字节编码
int ucs2_to_utf8(const unsigned char *in, int ilen, unsigned char *out, int olen)
{
int length = 0;
if(!out) return length;
char *start = NULL;
char *pout = out;
for(start = in; start != NULL && start < in+ilen-1; start +=2)
{
unsigned short ucs2_code = *(unsigned short *)start;
if(0x0080 > ucs2_code)
{
/* 1 byte UTF-8 Character.*/
if(length+1 > olen) return -1;
{
int length = 0;
if(!out) return length;
char *start = NULL;
char *pout = out;
for(start = in; start != NULL && start < in+ilen-1; start +=2)
{
unsigned short ucs2_code = *(unsigned short *)start;
if(0x0080 > ucs2_code)
{
/* 1 byte UTF-8 Character.*/
if(length+1 > olen) return -1;
*pout = (char)*start;
length++;
pout ++;
}
else if(0x0800 > ucs2_code)
{
/*2 bytes UTF-8 Character.*/
if(length+2 > olen) return -1;
*pout = ((char)(ucs2_code >> 6)) | 0xc0;
*(pout+1) = ((char)(ucs2_code & 0x003F)) | 0x80;
length += 2;
pout += 2;
}
else
{
/* 3 bytes UTF-8 Character .*/
if(length+3 > olen) return -1;
length++;
pout ++;
}
else if(0x0800 > ucs2_code)
{
/*2 bytes UTF-8 Character.*/
if(length+2 > olen) return -1;
*pout = ((char)(ucs2_code >> 6)) | 0xc0;
*(pout+1) = ((char)(ucs2_code & 0x003F)) | 0x80;
length += 2;
pout += 2;
}
else
{
/* 3 bytes UTF-8 Character .*/
if(length+3 > olen) return -1;
*pout = ((char)(ucs2_code >> 12)) | 0xE0;
*(pout+1) = ((char)((ucs2_code & 0x0FC0)>> 6)) | 0x80;
*(pout+2) = ((char)(ucs2_code & 0x003F)) | 0x80;
length += 3;
pout += 3;
}
}
*(pout+1) = ((char)((ucs2_code & 0x0FC0)>> 6)) | 0x80;
*(pout+2) = ((char)(ucs2_code & 0x003F)) | 0x80;
length += 3;
pout += 3;
}
}
return length;
}
}
int utf8_to_ucs2(const unsigned char *in, int ilen, unsigned char *out,
int olen) {
int olen) {
unsigned char *inoffset = in;
unsigned char *inend = in + ilen;
unsigned char *inend = in + ilen;
int ret = 0;
while (inoffset < inend && ret + 2 <= olen) {
unsigned short temp1, temp2;
while (inoffset < inend && ret + 2 <= olen) {
unsigned short temp1, temp2;
unsigned char * one = inoffset;
unsigned short ucs2_code = 0;
unsigned short ucs2_code = 0;
if (0x00 == (*one & 0x80)) {
/* 1 byte UTF-8 Charater.*/
ucs2_code = (unsigned short) *one;
inoffset++;
memcpy(out + ret, &ucs2_code, 2);
ret += 2;
continue;
/* 1 byte UTF-8 Charater.*/
ucs2_code = (unsigned short) *one;
inoffset++;
memcpy(out + ret, &ucs2_code, 2);
ret += 2;
continue;
} else if (0xc0 == (*one & 0xe0) && 0x80 == (*(one + 1) & 0xc0)) {
/* 2 bytes UTF-8 Charater.*/
temp1 = (unsigned short) (*one & 0x1f);
temp1 <<= 6;
temp1 |= (unsigned short) (*(one + 1) & 0x3f);
ucs2_code = temp1;
inoffset += 2;
memcpy(out + ret, &ucs2_code, 2);
ret += 2;
continue;
} else if (0xe0 == (*one & 0xf0) && 0x80 == (*(one + 1) & 0xc0) && 0x80
== (*(one + 2) & 0xc0)) {
/* 3bytes UTF-8 Charater.*/
temp1 = (unsigned short) (*one & 0x0f);
temp1 <<= 12;
temp2 = (unsigned short) (*(one + 1) & 0x3F);
temp2 <<= 6;
temp1 = temp1 | temp2 | (unsigned short) (*(one + 2) & 0x3F);
ucs2_code = temp1;
inoffset += 3;
memcpy(out + ret, &ucs2_code, 2);
ret += 2;
continue;
} else {
break;
}
}
return ret;
}
/* 2 bytes UTF-8 Charater.*/
temp1 = (unsigned short) (*one & 0x1f);
temp1 <<= 6;
temp1 |= (unsigned short) (*(one + 1) & 0x3f);
ucs2_code = temp1;
inoffset += 2;
memcpy(out + ret, &ucs2_code, 2);
ret += 2;
continue;
} else if (0xe0 == (*one & 0xf0) && 0x80 == (*(one + 1) & 0xc0) && 0x80
== (*(one + 2) & 0xc0)) {
/* 3bytes UTF-8 Charater.*/
temp1 = (unsigned short) (*one & 0x0f);
temp1 <<= 12;
temp2 = (unsigned short) (*(one + 1) & 0x3F);
temp2 <<= 6;
temp1 = temp1 | temp2 | (unsigned short) (*(one + 2) & 0x3F);
ucs2_code = temp1;
inoffset += 3;
memcpy(out + ret, &ucs2_code, 2);
ret += 2;
continue;
} else {
break;
}
}
return ret;
}
0 0
- 纯c 实现utf8与ucs2 相互转换
- utf8与ucs2互转
- GB2312与UTF8相互转换
- UNICODE 与 UTF8 的相互转换 (c++)版
- GBK与UTF8编码相互转换
- 纯C实现unicode-utf8互转
- 纯C实现unicode-utf8互转
- 纯C实现unicode-utf8互转
- 【miscellaneous】【C/C++语言】UTF8与GBK字符编码之间的相互转换
- 实现UTF8与汉字相互转换的JavaScript代码(收藏)
- pb字符串实现GB2312与Unicode、UTF8之间的相互转换
- UTF8、GB2312相互转换
- [C/C++]_[utf8和unicode的相互转换]
- Unicode,UTF8,GB2312,UCS2,GBK之间的转换
- 不能在 UTF8 和 UCS2 之间转换: failUTF8Conv
- Linux下GB2313与UTF8的相互转换
- ANSI, UNICODE与UTF8相互转换模板类
- UTF8与GBK字符编码之间的相互转换
- 数据结构 读书笔记
- 获取Windows操作系统版本和位数
- UIKit & UIWindow
- poj3678 Katu Puzzle 【解法一】
- Error 30 error C2664: 'HANDLE FindFirstFileW(LPCWSTR,LPWIN32_FIND_DATAW)' : 不能将参数 1 从“char [260]”转换为
- 纯c 实现utf8与ucs2 相互转换
- 常见排序算法小结
- HDU 2647 Reward
- Anroid搭建一个局域网Web服务器
- 基于变更的测试 - 提高测试效率和软件质量的最佳方法
- windows 命令符python3实现tab自动补全功能
- dubbo+zookeeper+dubbo管理控制台实践demo
- 对称加密和非对称加密总结
- HDU4009 Transfer water(最小树形图)