修改tinyxml让其支持解析特殊字符

来源:互联网 发布:马基雅维利主义知乎 编辑:程序博客网 时间:2024/04/28 08:59

修改tinyxmlparser.cpp文件中的部分代码如下:

TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = 
{
    { "&",  5, "&" },
    { "&lt;",   4, "<" },
    { "&gt;",   4, ">" },
    { "&quot;", 6, "\"" },
    { "&apos;", 6, "\'" },
    { "&\#955;", 6, "λ" },
    { "&\#934;", 6, "Φ" },
    { "&\#951;", 6, "η" }
};

const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding ){// Presume an entity, and pull it out.    TIXML_STRING ent;int i;*length = 0;    // Now try to match it.for( i=0; i<NUM_ENTITY; ++i ){if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 ){assert( strlen( entity[i].str ) == entity[i].strLength );strncat(value, entity[i].chr, strlen(entity[i].chr));*length = strlen(entity[i].chr);return ( p + entity[i].strLength );}}if ( *(p+1) && *(p+1) == '#' && *(p+2) ){unsigned long ucs = 0;ptrdiff_t delta = 0;unsigned mult = 1;if ( *(p+2) == 'x' ){// Hexadecimal.if ( !*(p+3) ) return 0;const char* q = p+3;q = strchr( q, ';' );if ( !q || !*q ) return 0;delta = q-p;--q;while ( *q != 'x' ){if ( *q >= '0' && *q <= '9' )ucs += mult * (*q - '0');else if ( *q >= 'a' && *q <= 'f' )ucs += mult * (*q - 'a' + 10);else if ( *q >= 'A' && *q <= 'F' )ucs += mult * (*q - 'A' + 10 );else return 0;mult *= 16;--q;}}else{// Decimal.if ( !*(p+2) ) return 0;const char* q = p+2;q = strchr( q, ';' );if ( !q || !*q ) return 0;delta = q-p;--q;while ( *q != '#' ){if ( *q >= '0' && *q <= '9' )ucs += mult * (*q - '0');else return 0;mult *= 10;--q;}}if ( encoding == TIXML_ENCODING_UTF8 ){// convert the UCS to UTF-8ConvertUTF32ToUTF8( ucs, value, length );}else{*value = (char)ucs;*length = 1;}return p + delta + 1;}// So it wasn't an entity, its unrecognized, or something like that.*value = *p;// Don't put back the last one, since we return it!//*length = 1;// Leave unrecognized entities - this doesn't really work.// Just writes strange XML.return p+1;}



修改tinyxml.h文件中的部分代码如下:

struct Entity{    const char*     str;    unsigned intstrLength;    char    chr[3];//特殊字符的长度一般最长是两个字节,所以这里设置为3的字符串};enum{    NUM_ENTITY = 8,//特殊符号的个数,可以修改    MAX_ENTITY_LENGTH = 6};




原创粉丝点击