tinyxml2源码分析-3

来源:互联网 发布:网络造谣案例 编辑:程序博客网 时间:2024/04/28 09:02

在开始编写读取文件时,先简单介绍下解析策略,对于xml文件:

<?This is a Declaration!?><!--This is a Document Comment!--><School>    <!--This is a School Comment!-->    <teachers>        <Wang/>        <Li/>        <Zhao/>    </teachers>    <students>        <LiMing sex="male" height="174" weight="80.400000000000006" Is_good_at_math="false">Li Ming is a good Student!</LiMing>        <CuiHua>this is a Text!</CuiHua>        <Hanmeimei><![CDATA[this is a CData Text:if (a < b && a < 0)]]></Hanmeimei>    </students>    <!this is a Unknow!></School>

我们要建立的DOM树如下图所示,



不同节点的头是不同的,如下:

XMLDeclaration:<?
XMLComment: <!-- 
XMLText(CData):<![CDATA[
XMLUnknown:<!
XMLElement:<

XMLText:剩下的

在解析时,把<xxx> ...</xxx>这样类型的Element,第一个<xxx>为父节点,最后一个</xxx>将是<xxx>的_lastChild,最后再删除</xxx>。

对于父节点,我们使用如下策略:

1)我们从头开始遍历文件,如果碰到 <?,创建XMLDeclaration,继续搜索?>,使用InsertEndChild插入;

2)如果碰到 <!-- ,创建XMLComment,继续搜索-->,使用InsertEndChild插入;

3)如果碰到<![CDATA[,创建XMLText,继续搜索]]>,使用InsertEndChild插入;

4)如果碰到 <!,创建XMLUnknown,继续搜索>,使用InsertEndChild插入;

5)如果碰到 <,创建XMLElemet,接着解析


if(/xxx>类型){

则标记状态为CLOSING,意味着某个Element结束了。使用InsertEndChild插入;并且和父类XMLElemet的名称做对比,相等则删除该XMLElement。

}

else{

使用InsertEndChild插入;解析Attributes,如果以/>结尾,该节点解析完毕。否则,将当前节点设为父节点,跳到1)开始解析它的子节点。

}

6)XMLText,搜索<,返回<之前的指针即可。

从上面的逻辑可以看出,需要递归解析,而每个节点类型的解析方式也不同,所以需要一个多态的解析接口,在XMLNode中增加解析接口ParseDeep(源代码的子类重载的时候并没有加virtual,不是个好习惯),其实XMLNode的搜索,就是父节点的搜索策略:

 virtual char* ParseDeep( char*, StrPair* );

先定义搜索节点头函数:

char* XMLDocument::Identify( char* p, XMLNode** node ){    TIXMLASSERT( node );    TIXMLASSERT( p );    char* const start = p;    p = XMLUtil::SkipWhiteSpace( p );    if( !*p ) {        *node = 0;        TIXMLASSERT( p );        return p;    }    // These strings define the matching patterns:    static const char* xmlHeader= { "<?" };    static const char* commentHeader= { "<!--" };    static const char* cdataHeader= { "<![CDATA[" };    static const char* dtdHeader= { "<!" };    static const char* elementHeader= { "<" };// and a header for everything else; check last.    static const int xmlHeaderLen= 2;    static const int commentHeaderLen= 4;    static const int cdataHeaderLen= 9;    static const int dtdHeaderLen= 2;    static const int elementHeaderLen= 1;    TIXMLASSERT( sizeof( XMLComment ) == sizeof( XMLUnknown ) );// use same memory pool    TIXMLASSERT( sizeof( XMLComment ) == sizeof( XMLDeclaration ) );// use same memory pool    XMLNode* returnNode = 0;    if ( XMLUtil::StringEqual( p, xmlHeader, xmlHeaderLen ) ) {        TIXMLASSERT( sizeof( XMLDeclaration ) == _commentPool.ItemSize() );        returnNode = new (_commentPool.Alloc()) XMLDeclaration( this );        returnNode->_memPool = &_commentPool;        p += xmlHeaderLen;    }    else if ( XMLUtil::StringEqual( p, commentHeader, commentHeaderLen ) ) {        TIXMLASSERT( sizeof( XMLComment ) == _commentPool.ItemSize() );        returnNode = new (_commentPool.Alloc()) XMLComment( this );        returnNode->_memPool = &_commentPool;        p += commentHeaderLen;    }    else if ( XMLUtil::StringEqual( p, cdataHeader, cdataHeaderLen ) ) {        TIXMLASSERT( sizeof( XMLText ) == _textPool.ItemSize() );        XMLText* text = new (_textPool.Alloc()) XMLText( this );        returnNode = text;        returnNode->_memPool = &_textPool;        p += cdataHeaderLen;        text->SetCData( true );    }    else if ( XMLUtil::StringEqual( p, dtdHeader, dtdHeaderLen ) ) {        TIXMLASSERT( sizeof( XMLUnknown ) == _commentPool.ItemSize() );        returnNode = new (_commentPool.Alloc()) XMLUnknown( this );        returnNode->_memPool = &_commentPool;        p += dtdHeaderLen;    }    else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) {        TIXMLASSERT( sizeof( XMLElement ) == _elementPool.ItemSize() );        returnNode = new (_elementPool.Alloc()) XMLElement( this );        returnNode->_memPool = &_elementPool;        p += elementHeaderLen;    }    else {        TIXMLASSERT( sizeof( XMLText ) == _textPool.ItemSize() );        returnNode = new (_textPool.Alloc()) XMLText( this );        returnNode->_memPool = &_textPool;        p = start;// Back it up, all the text counts.    }    TIXMLASSERT( returnNode );    TIXMLASSERT( p );    *node = returnNode;    return p;}

下面看XMLNode的搜索,

char* XMLNode::ParseDeep( char* p, StrPair* parentEnd ){    while( p && *p ) {        XMLNode* node = 0;        p = _document->Identify( p, &node );//搜索节点头,根据节点头创建对应的子节点        if ( node == 0 ) {            break;        }        StrPair endTag;        p = node->ParseDeep( p, &endTag );//由子节点根据自己的策略完成搜索        if ( !p ) {            DeleteNode( node );            if ( !_document->Error() ) {                _document->SetError( XML_ERROR_PARSING, 0, 0 );            }            break;        }        XMLDeclaration* decl = node->ToDeclaration();        if ( decl ) {//如果有声明,必须是文档的第一个孩子                // A declaration can only be the first child of a document.                // Set error, if document already has children.                if ( !_document->NoChildren() ) {                        _document->SetError( XML_ERROR_PARSING_DECLARATION, decl->Value(), 0);                        DeleteNode( node );                        break;                }        }        XMLElement* ele = node->ToElement();        if ( ele ) {            // 解析到</xxx>了,直接返回,当前Element的解析结束            if ( ele->ClosingType() == XMLElement::CLOSING ) {                if ( parentEnd ) {                    ele->_value.TransferTo( parentEnd );                }                node->_memPool->SetTracked();   // created and then immediately deleted.                DeleteNode( node );                return p;            }            // Handle an end tag returned to this level.            // And handle a bunch of annoying errors.            bool mismatch = false;            if ( endTag.Empty() ) {                if ( ele->ClosingType() == XMLElement::OPEN ) {                    mismatch = true;                }            }            else {                if ( ele->ClosingType() != XMLElement::OPEN ) {                    mismatch = true;                }//下面比较Element是否对应                else if ( !XMLUtil::StringEqual( endTag.GetStr(), ele->Name() ) ) {                    mismatch = true;                }            }            if ( mismatch ) {                _document->SetError( XML_ERROR_MISMATCHED_ELEMENT, ele->Name(), 0 );                DeleteNode( node );                break;            }        }        InsertEndChild( node );//插入子节点    }    return 0;}

I

XMLText的搜索策略

// --------- XMLText ---------- //char* XMLText::ParseDeep( char* p, StrPair* ){    const char* start = p;    if ( this->CData() ) {        p = _value.ParseText( p, "]]>", StrPair::NEEDS_NEWLINE_NORMALIZATION );        if ( !p ) {            _document->SetError( XML_ERROR_PARSING_CDATA, start, 0 );        }        return p;    }    else {        int flags = _document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES;        if ( _document->WhitespaceMode() == COLLAPSE_WHITESPACE ) {            flags |= StrPair::NEEDS_WHITESPACE_COLLAPSING;        }        p = _value.ParseText( p, "<", flags );        if ( p && *p ) {            return p-1;        }        if ( !p ) {            _document->SetError( XML_ERROR_PARSING_TEXT, start, 0 );        }    }    return 0;}
XMLComment:
char* XMLComment::ParseDeep( char* p, StrPair* ){    // Comment parses as text.    const char* start = p;    p = _value.ParseText( p, "-->", StrPair::COMMENT );    if ( p == 0 ) {        _document->SetError( XML_ERROR_PARSING_COMMENT, start, 0 );    }    return p;}
XMLDeclaration:

char* XMLDeclaration::ParseDeep( char* p, StrPair* ){    // Declaration parses as text.    const char* start = p;    p = _value.ParseText( p, "?>", StrPair::NEEDS_NEWLINE_NORMALIZATION );    if ( p == 0 ) {        _document->SetError( XML_ERROR_PARSING_DECLARATION, start, 0 );    }    return p;}

XMLUnknown:

char* XMLUnknown::ParseDeep( char* p, StrPair* ){    // Unknown parses as text.    const char* start = p;    p = _value.ParseText( p, ">", StrPair::NEEDS_NEWLINE_NORMALIZATION );    if ( !p ) {        _document->SetError( XML_ERROR_PARSING_UNKNOWN, start, 0 );    }    return p;}

XMLElement:

char* XMLElement::ParseDeep( char* p, StrPair* strPair ){    // Read the element name.    p = XMLUtil::SkipWhiteSpace( p );    // The closing element is the </element> form. It is    // parsed just like a regular element then deleted from    // the DOM.    if ( *p == '/' ) {        _closingType = CLOSING;        ++p;    }    p = _value.ParseName( p );    if ( _value.Empty() ) {        return 0;    }    p = ParseAttributes( p );    if ( !p || !*p || _closingType ) {        return p;    }<span style="white-space:pre"></span>//调用XMLNode中的解析方法,就是将当前节点设为父节点,在这里实现递归调用    p = XMLNode::ParseDeep( p, strPair );    return p;}
XMLDocument就使用XMLNode的策略,因为他是一个父节点。

可以开始编写LoadFile的代码了,XMLError是个枚举类型,一般而言,先定义一个枚举,同时定义XML_SUCCESS=0,因为成功不需要理由,失败者总是需要借口。等到编码到后期再添加错误码。

XMLError XMLDocument::LoadFile( const char* filename ){    Clear();    FILE* fp = callfopen( filename, "rb" );    if ( !fp ) {        SetError( XML_ERROR_FILE_NOT_FOUND, filename, 0 );        return _errorID;    }    LoadFile( fp );    fclose( fp );    return _errorID;}

如果打开文件失败,返回XML_ERROR_FILE_NOT_FOUND错误。这时候可以在enum XMLError中添加错误码,以后为了不影响主要源码的阅读,不再细说。接着调用LoadFile

XMLError XMLDocument::LoadFile( FILE* fp ){    Clear();    fseek( fp, 0, SEEK_SET );//如果文件内容为空,返回错误    if ( fgetc( fp ) == EOF && ferror( fp ) != 0 ) {        SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 );        return _errorID;    }    fseek( fp, 0, SEEK_END );    const long filelength = ftell( fp );    fseek( fp, 0, SEEK_SET );//文件长度错误时    if ( filelength == -1L ) {        SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 );        return _errorID;    }    TIXMLASSERT( filelength >= 0 );//检查size_t和unsigned long的字节个数是否相等    if ( !LongFitsIntoSizeTMinusOne<>::Fits( filelength ) ) {        // Cannot handle files which won't fit in buffer together with null terminator        SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 );        return _errorID;    }    if ( filelength == 0 ) {        SetError( XML_ERROR_EMPTY_DOCUMENT, 0, 0 );        return _errorID;    }    const size_t size = filelength;    TIXMLASSERT( _charBuffer == 0 );    _charBuffer = new char[size+1];//知道了文件有多大,可以分配内存了    size_t read = fread( _charBuffer, 1, size, fp );    if ( read != size ) {        SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 );        return _errorID;    }    _charBuffer[size] = 0;    Parse();    return _errorID;}
开始解析文档,utf8 bom 自行搜索:

void XMLDocument::Parse(){    TIXMLASSERT( NoChildren() ); // Clear() must have been called previously    TIXMLASSERT( _charBuffer );    char* p = _charBuffer;    p = XMLUtil::SkipWhiteSpace( p );    p = const_cast<char*>( XMLUtil::ReadBOM( p, &_writeBOM ) );    if ( !*p ) {        SetError( XML_ERROR_EMPTY_DOCUMENT, 0, 0 );        return;    }    ParseDeep(p, 0 );}




0 0