(ZT)string is utf-8

来源:互联网 发布:苹果手机备份软件 编辑:程序博客网 时间:2024/05/22 00:42

一般工程中, 文本不是utf-8就是gbk.
那先判断文本是否为utf-8, 如果不是就按照gbk走.
同事在网上找了一段代码, 我整理一下,也备一份.
准备了一段utf-8的字符串缓冲区,还判断的挺准的.

实验

// prj.cpp : 定义控制台应用程序的入口点。//#include "stdafx.h"#include <stdlib.h>#include <stdio.h>#include "class_is_utf8.h"int _tmain(int argc, _TCHAR* argv[]){    // plain text is "abc中文测试"    // save as utf-8,don't input utf-8 header(0xef,0xbb,0xbf), because the input isn't file content    unsigned char szMsg[] = {0x61,0x62,0x63,0xe4,0xb8,0xad,0xe6,0x96,0x87,0xe6,0xb5,0x8b,0xe8,0xaf,0x95,0x00,0x00};    // save as ansi, detect is not utf-8//     {//         0x61,0x62,0x63,0xd6,0xd0,0xce,0xc4,0xb2,0xe2,0xca,0xd4,0x00,0x00//     };    // {'a', 'b', 'c', '\0', '\0'}; // not utf-8    bool b_rc = class_is_utf8::is_utf8((const char*)&szMsg[0]);    printf("class_is_utf8::is_utf8 = %s\n", (b_rc ? "true" : "false"));    /** run result    class_is_utf8::is_utf8 = true    */    system("pause");    return 0;}
// @file class_is_utf8.h#ifndef __CLASS_IS_UTF8_H__#define __CLASS_IS_UTF8_H__class class_is_utf8{public:    class_is_utf8(void);    virtual ~class_is_utf8(void);    static bool is_utf8(const char* str); // 给定的字符串是否为utf-8};#endif // #ifndef __CLASS_IS_UTF8_H__
// @file class_is_utf8.cpp#include "StdAfx.h"#include <stdlib.h>#include <stdio.h>#include <string.h>#include "class_is_utf8.h"class_is_utf8::class_is_utf8(void){}class_is_utf8::~class_is_utf8(void){}// http://www.fileformat.info/info/unicode/utf8.htmbool class_is_utf8::is_utf8(const char* str){    int i = 0;         int size = strlen(str);           while(i < size)         {                 int step = 0;                 if((str[i] & 0x80) == 0x00)                {                         step = 1;                 }        else if((str[i] & 0xe0) == 0xc0)                 {                         if(i + 1 >= size)             {                return false;              }            if((str[i + 1] & 0xc0) != 0x80)             {                return false;             }            step = 2;                 }        else if((str[i] & 0xf0) == 0xe0)                 {                         if(i + 2 >= size)             {                return false;               }            if((str[i + 1] & 0xc0) != 0x80)             {                return false;               }            if((str[i + 2] & 0xc0) != 0x80)             {                return false;             }            step = 3;                }        else          {                         return false;                 }         i += step;        }     if(i == size)     {        return true;      }    return false; }
原创粉丝点击