Jsoncpp 中文汉字unicode乱码解决及标点问题解决

来源:互联网 发布:手机淘宝不能搜索 编辑:程序博客网 时间:2024/06/04 01:00

由于项目中用到了JSON数据结构,客户端采用VC编写,服务端数据接口采用PHP编写,遇到了JSON中文Unicode编码后,客户端出现了乱码的情形。

网上有一个方法比较好用,就是修改json_reader.cpp中的codePointToUTF8函数。

修改为以下:

static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {  JSONCPP_STRING result;  // based on description from http://en.wikipedia.org/wiki/UTF-8  if (cp <= 0x7f) {    result.resize(1);    result[0] = static_cast<char>(cp);  } else if (cp <= 0x7FF) {    result.resize(2);    result[1] = static_cast<char>(0x80 | (0x3f & cp));    result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));  } else if (cp <= 0xFFFF) {  if ((cp >= 0x4E00 && cp <= 0x9FA5) || (cp >= 0xF900 && cp <= 0xFA2D))    {wchar_t src[2] = { 0 };char dest[5] = { 0 };src[0] = static_cast<wchar_t>(cp);std::string curLocale = setlocale(LC_ALL, NULL);setlocale(LC_ALL, "chs");wcstombs_s(NULL, dest, 5, src, 2);result = dest;setlocale(LC_ALL, curLocale.c_str());}else{result.resize(3);result[2] = static_cast<char>(0x80 | (0x3f & cp));//result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));//result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));}  } else if (cp <= 0x10FFFF) {    result.resize(4);    result[3] = static_cast<char>(0x80 | (0x3f & cp));    result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));    result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));    result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));  }  return result;}

经过以上修改,中文确定能显示了,但是中文的标点符号仍为乱码,于是查找了中文标点的UNicode编码,只要在上面处理中文汉字的地方,加入标点编码的处理即可。

句号    0x3002  。   
问号    0xFF1F  ?   
叹号       0xFF01  !   
逗号    0xFF0C  ,
顿号    0x3001  、   
分号    0xFF1B  ;   
冒号    0xFF1A  :   
引号    0x300C  「   
      0x300D  」   
引号    0x300E  『   
      0x300F  』   
引号    0x2018  ‘   
      0x2019  ’   
引号    0x201C  “   
      0x201D  ”   
括号    0xFF08  (   
      0xFF09  )   
括号    0x3014  〔   
      0x3015  〕   
括号    0x3010  【   
      0x3011  】   
破折号      0x2014  —   
省略号      0x2026  …   
连接号      0x2013  –   
间隔号      0xFF0E  .   
书名号      0x300A  《   
       0x300B  》   
书名号      0x3008  〈   
       0x3009  〉


最终修改后的函数为:


static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {  JSONCPP_STRING result;  // based on description from http://en.wikipedia.org/wiki/UTF-8  if (cp <= 0x7f) {    result.resize(1);    result[0] = static_cast<char>(cp);  } else if (cp <= 0x7FF) {    result.resize(2);    result[1] = static_cast<char>(0x80 | (0x3f & cp));    result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));  } else if (cp <= 0xFFFF) {  if ((cp >= 0x4E00 && cp <= 0x9FA5) || (cp >= 0xF900 && cp <= 0xFA2D) || cp == 0x3002 || cp == 0xFF1F || cp == 0xFF01 || cp == 0xFF0C || cp == 0x3001 || cp == 0xFF1B || cp == 0xFF1A || cp == 0x300C || cp == 0x300D || cp == 0x300E || cp == 0x300F || cp == 0x2018 || cp == 0x2019 || cp == 0x201C || cp == 0x201D || cp == 0xFF08 || cp == 0xFF09 || cp == 0x3014 || cp == 0x3015 || cp == 0x3010 || cp == 0x3011 || cp == 0x2014 || cp == 0x2026 || cp == 0x2013 || cp == 0xFF0E || cp == 0x300A || cp == 0x300B || cp == 0x3008 || cp == 0x3009)    {wchar_t src[2] = { 0 };char dest[5] = { 0 };src[0] = static_cast<wchar_t>(cp);std::string curLocale = setlocale(LC_ALL, NULL);setlocale(LC_ALL, "chs");wcstombs_s(NULL, dest, 5, src, 2);result = dest;setlocale(LC_ALL, curLocale.c_str());}else{result.resize(3);result[2] = static_cast<char>(0x80 | (0x3f & cp));//result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));//result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));}  } else if (cp <= 0x10FFFF) {    result.resize(4);    result[3] = static_cast<char>(0x80 | (0x3f & cp));    result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));    result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));    result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));  }  return result;}


即可完美解决中文乱码问题。