XMLNode类

来源：互联网发布：如何用爬虫爬数据编辑：程序博客网时间：2024/05/22 01:08

//
// XML.h
//
// Copyright (c) Shareaza Development Team, 2002-2005.
// This file is part of SHAREAZA (www.shareaza.com)
//
// Shareaza is free software; you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Shareaza is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Shareaza; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//

#pragma once

class CXMLNode;
class CXMLElement;
class CXMLAttribute;

class CXMLNode
{

// Construction

public:

CXMLNode(CXMLElement* pParent = NULL,
LPCTSTR pszName = NULL);

virtual ~CXMLNode();

// Attributes

protected:

int    m_nNode;
CXMLElement* m_pParent;
CString   m_sName;
CString   m_sValue;

enum { xmlNode, xmlElement, xmlAttribute };

// Operations

public:

inline int    GetType() const;
inline CXMLNode*  AsNode() const;
inline CXMLElement*  AsElement() const;
inline CXMLAttribute* AsAttribute() const;

public:

inline CXMLElement* GetParent() const;
inline void Delete();

public:

inline CString  GetName() const;
inline void   SetName(LPCTSTR pszValue);
inline BOOL   IsNamed(LPCTSTR pszName) const;
inline CString  GetValue() const;
inline void   SetValue(LPCTSTR pszValue);

protected:

static BOOL ParseMatch(LPCTSTR& pszXML,
LPCTSTR pszToken);

static BOOL ParseIdentifier(LPCTSTR& pszXML,
CString& strIdentifier);

#ifdef _AFX
void Serialize(CArchive& ar);
#endif

public:

static CString StringToValue(LPCTSTR& pszXML,
int nLength);

static void ValueToString(LPCTSTR pszValue,
CString& strXML);

static void UniformString(CString& str);

friend class CXMLElement;

friend class CQuerySearch;

friend class CXMLCOM;

};

class CXMLElement : public CXMLNode
{

// Construction

public:
CXMLElement(CXMLElement* pParent = NULL,
LPCTSTR pszName = NULL);

virtual ~CXMLElement();

// Attributes

protected:

#ifdef _AFX
CPtrList m_pElements;
CMapStringToPtr m_pAttributes;
#else
CAtlList<CXMLElement*> m_pElements;
CAtlMap<CString, CXMLAttribute*, CStringElementTraits<CString> > m_pAttributes;
#endif

// Operations

public:

CXMLElement* Clone(CXMLElement* pParent = NULL);

inline CXMLElement* Detach();

public:

inline CXMLElement*  AddElement(LPCTSTR pszName);
inline CXMLElement*  AddElement(CXMLElement* pElement);
inline int    GetElementCount() const;
inline CXMLElement*  GetFirstElement() const;
inline POSITION   GetElementIterator() const;
inline CXMLElement*  GetNextElement(POSITION& pos) const;
inline CXMLElement*  GetElementByName(LPCTSTR pszName) const;
inline CXMLElement*  GetElementByName(LPCTSTR pszName, BOOL bCreate);
inline void    RemoveElement(CXMLElement* pElement);
void     DeleteAllElements();

public:
inline CXMLAttribute* AddAttribute(LPCTSTR pszName,
LPCTSTR pszValue = NULL);

inline CXMLAttribute* AddAttribute(CXMLAttribute* pAttribute);
inline int    GetAttributeCount() const;
inline POSITION   GetAttributeIterator() const;
inline CXMLAttribute* GetNextAttribute(POSITION& pos) const;
inline CXMLAttribute* GetAttribute(LPCTSTR pszName) const;
inline CString   GetAttributeValue(LPCTSTR pszName,
  LPCTSTR pszDefault = NULL) const;

inline void    RemoveAttribute(CXMLAttribute* pAttribute);
inline void    DeleteAttribute(LPCTSTR pszName);
void     DeleteAllAttributes();

public:

CString   ToString(BOOL bHeader = FALSE, BOOL bNewline = FALSE);
void   ToString(CString& strXML, BOOL bNewline = FALSE);
BOOL   ParseString(LPCTSTR& strXML);
BOOL   Equals(CXMLElement* pXML) const;
CString   GetRecursiveWords();
void   AddRecursiveWords(CString& strWords);

#ifdef _AFX
void Serialize(CArchive& ar);
#endif

static CXMLElement* FromString(LPCTSTR pszXML,
BOOL bHeader = FALSE);

static CXMLElement* FromBytes(BYTE* pByte,
DWORD nByte, BOOL bHeader = FALSE);

static CXMLElement* FromFile(LPCTSTR pszPath,
BOOL bHeader = FALSE);

static CXMLElement* FromFile(HANDLE hFile,
BOOL bHeader = FALSE);

};

class CXMLAttribute : public CXMLNode
{

// Construction

public:

CXMLAttribute(CXMLElement* pParent, LPCTSTR pszName = NULL);

virtual ~CXMLAttribute();

// Attributes

public:

static LPCTSTR xmlnsSchema;
static LPCTSTR xmlnsInstance;
static LPCTSTR schemaName;

// Operations

public:

CXMLAttribute* Clone(CXMLElement* pParent = NULL);

void ToString(CString& strXML);

BOOL ParseString(LPCTSTR& strXML);
BOOL Equals(CXMLAttribute* pXML) const;

#ifdef _AFX
void Serialize(CArchive& ar);
#endif

};

#ifdef _AFX
#define XMLVOID(x) (void*&)(x)
#else
#define XMLVOID(x) (x)
#endif

#include "XML.inl"

//
// XML.cpp
//
// Copyright (c) Shareaza Development Team, 2002-2005.
// This file is part of SHAREAZA (www.shareaza.com)
//
// Shareaza is free software; you can redistribute it
// and/or modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Shareaza is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Shareaza; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//

#include "StdAfx.h"
#include "Shareaza.h"
#include "XML.h"

#ifdef DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

//////////////////////////////////////////////////////////////////////
// CXMLNode construction

CXMLNode::CXMLNode(CXMLElement* pParent, LPCTSTR pszName)
{
m_nNode = xmlNode;

m_pParent = pParent;

if ( pszName )
{
m_sName = pszName;
}
}

CXMLNode::~CXMLNode()
{
}

//////////////////////////////////////////////////////////////////////
// CXMLNode parsing

//判断pszBase中有无pszToken字符串,没有则返回false，有则将pszBase移动到Token的后面

BOOL CXMLNode::ParseMatch(LPCTSTR& pszBase, LPCTSTR pszToken)
{
LPCTSTR pszXML = pszBase;

int nParse = 0;

for ( ; *pszXML == ' ' || *pszXML == '/t'
|| *pszXML == '/r' || *pszXML == '/n' ;
pszXML++, nParse++ );

if ( ! *pszXML ) // 只有空字符，返回FALSE
{
return FALSE;
}

//从pszXML开始，逐个字符的和pszToken比较，看是否相同，不相同，返回false

for ( ; *pszXML && *pszToken ; pszXML++, pszToken++, nParse++ )
{
  if ( *pszXML != *pszToken )
  {
   return FALSE;
  }
}

pszBase += nParse;

return TRUE;
}

//解析出标识符部分，拷贝到参数strIndentifier中

BOOL CXMLNode::ParseIdentifier(LPCTSTR& pszBase,
CString& strIdentifier)
{
LPCTSTR pszXML = pszBase;

int nParse = 0;

//去掉开头的空白字符

for ( ; *pszXML == ' ' || *pszXML == '/t'
|| *pszXML == '/r' || *pszXML == '/n' ;
pszXML++, nParse++ );

if ( ! *pszXML )
{
return FALSE;
}

int nIdentifier = 0;

//后面是内容部分，必须是字母、数字、冒号和下划线四种情况之一

//注意：遇到等号和空格就跳出来了

for ( ; *pszXML
  && ( _istalnum( *pszXML ) || *pszXML == ':'
       || *pszXML == '_' ) ;
  pszXML++, nIdentifier++ );

if ( ! nIdentifier )
{
return FALSE;
}

pszBase += nParse;

//将名字部分拷贝到参数strIndentifier中

_tcsncpy( strIdentifier.GetBuffer( nIdentifier ),
pszBase, nIdentifier );

strIdentifier.ReleaseBuffer( nIdentifier );

pszBase += nIdentifier;

return TRUE;
}

//////////////////////////////////////////////////////////////////////
// CXMLNode string to value

//将char*转化为value,将转化后的值还原为原来的特殊字符

CString CXMLNode::StringToValue(LPCTSTR& pszXML, int nLength)
{
CString strValue;

if ( ! nLength || ! *pszXML )
{
return strValue;
}

LPTSTR pszValue = strValue.GetBuffer( nLength + 4 );

LPTSTR pszOut = pszValue;

LPTSTR pszNull = (LPTSTR)pszXML + nLength;

TCHAR cNull = *pszNull;

*pszNull = 0;

while ( *pszXML && pszXML < pszNull )
{
  if ( _istspace( *pszXML ) )
  {
   if ( pszValue != pszOut )
   {
    *pszOut++ = ' ';
   }

pszXML++;

   while ( *pszXML && _istspace( *pszXML ) )
   {
    pszXML++;
   }

   if ( ! *pszXML || pszXML >= pszNull )
   {
    break;
   }
  }

  if ( *pszXML == '&' )
  {
   pszXML++;

   if ( ! *pszXML || pszXML >= pszNull )
   {
    break;
   }

   if ( _tcsnicmp( pszXML, _T("amp;"), 4 ) == 0 )
   {
    *pszOut++ = '&';

    pszXML += 4;
   }
   else if ( _tcsnicmp( pszXML, _T("lt;"), 3 ) == 0 )
   {
    *pszOut++ = '<';

    pszXML += 3;
   }
   else if ( _tcsnicmp( pszXML, _T("gt;"), 3 ) == 0 )
   {
    *pszOut++ = '>';

    pszXML += 3;
   }
   else if ( _tcsnicmp( pszXML, _T("quot;"), 5 ) == 0 )
   {
    *pszOut++ = '/"';

    pszXML += 5;
   }
   else if ( _tcsnicmp( pszXML, _T("apos;"), 5 ) == 0 )
   {
    *pszOut++ = '/'';

    pszXML += 5;
   }
   else if ( _tcsnicmp( pszXML, _T("nbsp;"), 5 ) == 0 )
   {
    *pszOut++ = ' ';

    pszXML += 5;
   }
   else if ( *pszXML == '#' )
   {
    int nChar;

pszXML++;

    if ( ! *pszXML || pszXML >= pszNull || ! _istdigit( *pszXML ) )
    {
     break;
    }

    if ( _stscanf( pszXML, _T("%lu;"), &nChar ) == 1 )
    {
     *pszOut++ = (TCHAR)nChar;

     while ( *pszXML && *pszXML != ';' )
     {
      pszXML++;
     }

     if ( ! *pszXML || pszXML >= pszNull )
     {
      break;
     }

     pszXML++;
    }
   }
   else
   {
    *pszOut++ = '&';
   }
  }
  else
  {
   *pszOut++ = *pszXML++;
  }
}

ASSERT( pszNull == pszXML );

*pszNull = cNull;

ASSERT( pszOut - pszValue <= nLength );

strValue.ReleaseBuffer( (int)( pszOut - pszValue ) );

return strValue;
}

//////////////////////////////////////////////////////////////////////
// CXMLNode value to string

#define V2S_APPEND(x,y) /
if ( (x) > nOut ) /
{ /
  strXML.ReleaseBuffer( nLen + nOut ); /
  nOut += (x) + 16; /
  pszOut = strXML.GetBuffer( nLen + nOut ) + nLen; /
} /
{ for ( LPCTSTR pszIn = (y) ; *pszIn ; nOut--, nLen++ ) *pszOut++ = *pszIn++; }

//将字符串pszValue替换掉特殊字符后，加到strXML的后面

void CXMLNode::ValueToString(LPCTSTR pszValue, CString& strXML)
{
int nLen = strXML.GetLength();

int nOut = (int)_tcslen( pszValue );

LPTSTR pszOut = strXML.GetBuffer( nLen + nOut ) + nLen;

//如果定义了unicode，则一次前进两个字节，取两个字节为nChar

for ( ; *pszValue ; pszValue++ )
{

#ifdef UNICODE
int nChar = (int)(unsigned short)*pszValue;
#else
int nChar = (int)(unsigned char)*pszValue;
#endif

//将特殊字符替换掉，然后

switch ( nChar )
{

case '&':

V2S_APPEND( 5, _T("&") );

break;

case '<':

V2S_APPEND( 4, _T("<") );

break;

case '>':

V2S_APPEND( 4, _T(">") );

break;

case '/"':

V2S_APPEND( 6, _T(""") );

break;

case '/'':

V2S_APPEND( 6, _T("'") );

break;

default:

   if ( nChar > 127 )
   {
    CString strItem;

strItem.Format( _T("&#%lu;"), nChar );

    V2S_APPEND( strItem.GetLength(), strItem );
   }
   else if ( nOut > 0 )
   {
    *pszOut++ = nChar;

nOut--;

    nLen++;
   }
   else
   {
    strXML.ReleaseBuffer( nLen + nOut );

nOut += 16;

pszOut = strXML.GetBuffer( nLen + nOut ) + nLen;

*pszOut++ = nChar;

nOut--;

    nLen++;
   }
   break;
  }
}

strXML.ReleaseBuffer( nLen );
}

//////////////////////////////////////////////////////////////////////
// CXMLNode serialize

#ifdef _AFX

//CXMLNode仅仅保存m_sName和m_sValue

void CXMLNode::Serialize(CArchive& ar)
{
if ( ar.IsStoring() )
{
ar << m_sName;

ar << m_sValue;
}
else
{
ar >> m_sName;

ar >> m_sValue;
}
}

#endif

//////////////////////////////////////////////////////////////////////
// CXMLNode string helper

//第一个空格保留，然后空格后面的小于32的字符从str中删除；

//如果先搜索到了小于32的字符，则将其替换为空格

//若先搜索到了空格，则删除空格后面小于32的字符等；

void CXMLNode::UniformString(CString& str)
{
// non-alphanumeric characters which will not be ignored

static LPCTSTR pszOK = _T("'-&/,;#()");

str.TrimLeft();

str.TrimRight();

BOOL bSpace = TRUE;

for ( int nPos = 0 ; nPos < str.GetLength() ; nPos++ )
{

#ifdef UNICODE
int nChar = (int)(unsigned short)str.GetAt( nPos );
#else
int nChar = (int)(unsigned char)str.GetAt( nPos );
#endif

  if ( nChar <= 32 ) // 空格的ascii码就是32！！
  {
   //将所有小于等于32的字符替换为空格，然后在str中删除它们

   if ( bSpace ) // 去掉前间的空格
   {
    str = str.Left( nPos ) + str.Mid( nPos + 1 );

    nPos--;
   }
   else
   {
    if ( nChar != 32 )//不是32，是小于32的值，则替换为32
    {
     str.SetAt( nPos, 32 );
    }

    bSpace = TRUE;
   }
  }
  else if ( ! _istalnum( nChar ) && nChar < 0xC0
   && _tcschr( pszOK, nChar ) == NULL )
  {
   // 小于0XC0，大于32，而且不是字母和数字，而且又不是'-&/,;#()

//则看做是空格，删除之

str = str.Left( nPos ) + str.Mid( nPos + 1 );

   nPos--;
  }
  else // 是字母，或者是数字，或者是'-&/,;#()中的某一个
  {
   bSpace = FALSE;
  }
}
}

//////////////////////////////////////////////////////////////////////
// CXMLElement construction

CXMLElement::CXMLElement(CXMLElement* pParent, LPCTSTR pszName) : CXMLNode( pParent, pszName )
{
m_nNode = xmlElement;
}

//删除掉当前Element的所有Element对象和Attribute属性

CXMLElement::~CXMLElement()
{
DeleteAllElements();

DeleteAllAttributes();
}

//////////////////////////////////////////////////////////////////////
// CXMLElement clone

//创建一个完全一样的XMLElement对象，注意要完成pClone->m_pElements

//和pClone->m_pAttributes的克隆和建立

CXMLElement* CXMLElement::Clone(CXMLElement* pParent)
{
CXMLElement* pClone = new CXMLElement( pParent, m_sName );

//将各个属性对象-值对加入到pClone->m_pAttributes中

for ( POSITION pos = GetAttributeIterator() ; pos ; )
{
CXMLAttribute* pAttribute = GetNextAttribute( pos )->Clone( pClone );

CString strName( pAttribute->m_sName );

CharLower( strName.GetBuffer() );

strName.ReleaseBuffer();

pClone->m_pAttributes.SetAt( strName, pAttribute );
}

//将各个Element加入到pClone->m_pElements链表中

for ( POSITION pos = GetElementIterator() ; pos ; )
{
CXMLElement* pElement = GetNextElement( pos );

pClone->m_pElements.AddTail( pElement->Clone( pClone ) );
}

pClone->m_sValue = m_sValue;

return pClone;
}

//////////////////////////////////////////////////////////////////////
// CXMLElement delete

//删除m_pElements中所有的CXMLElement对象

void CXMLElement::DeleteAllElements()
{
for ( POSITION pos = m_pElements.GetHeadPosition() ; pos ; )
{
delete (CXMLElement*)m_pElements.GetNext( pos );
}

m_pElements.RemoveAll();
}

//删除m_pAttributes中所有的CXMLAttribute对象

void CXMLElement::DeleteAllAttributes()
{
for ( POSITION pos = m_pAttributes.GetStartPosition() ; pos ; )
{
CXMLAttribute* pAttribute = NULL;

CString strName;

m_pAttributes.GetNextAssoc( pos, strName, XMLVOID(pAttribute) );

delete pAttribute;
}

m_pAttributes.RemoveAll();
}

//////////////////////////////////////////////////////////////////////
// CXMLElement to string

CString CXMLElement::ToString(BOOL bHeader, BOOL bNewline)
{
CString strXML;

if ( bHeader ) // 需要加入头部，则加入xml version
{
strXML = _T("<?xml version=/"1.0/"?>");
}

if ( bNewline )//需要加入新行，则加入/r/n
{
strXML += _T("/r/n");
}

ToString( strXML, bNewline );

ASSERT( strXML.GetLength() == _tcslen(strXML) );

return strXML;
}

//将当前Element元素转化为String，保存到strXML中

void CXMLElement::ToString(CString& strXML, BOOL bNewline)
{
/*

没有Element的格式为：< m_sName AttributeValueSet />

首先加入<m_sName，然后将属性列表中的各项CXMLAttribute

加到后面，使用空格分割各个属性;

bNewLine指定了在最后需要还是不需要加上新行；

有Element的格式为：

<?xml version="1.0"?>

<!DOCTYPE jxta:PA>

<jxta:PA xmlns:jxta="http://www.jxta.org">

<PID>

urn:jxta:uuid-591623232124645685465465

</PID>

</jxta:PA>

strXML += '<' + m_sName;

POSITION pos = GetAttributeIterator();

for ( ; pos ; )
{
strXML += ' ';

CXMLAttribute* pAttribute = GetNextAttribute( pos );

pAttribute->ToString( strXML );
}

pos = GetElementIterator();

if ( pos == NULL && m_sValue.IsEmpty() )
{
//没有Element和value，直接加上/>返回，需要新行则加上/r/n

strXML += _T("/>");

  if ( bNewline )
  {
   strXML += _T("/r/n");
  }

return;
}

//加上>结束之，然后加上换行符号

//下面将各个Element加入到strXML中

strXML += '>';

if ( bNewline && pos )
{
strXML += _T("/r/n");
}

while ( pos )
{
CXMLElement* pElement = GetNextElement( pos );

pElement->ToString( strXML, bNewline );
}

//将值转化为String，加入到strXML中

ValueToString( m_sValue, strXML );

//最后加上结束符号，同时加上换行

strXML += _T("</") + m_sName + '>';

if ( bNewline )
{
strXML += _T("/r/n");
}

}

//////////////////////////////////////////////////////////////////////
// CXMLElement from string

CXMLElement* CXMLElement::FromString(LPCTSTR pszXML, BOOL bHeader)
{
CXMLElement* pElement = NULL;

LPCTSTR pszElement = NULL;

#ifdef _AFX
try
{
#endif
  if ( ParseMatch( pszXML, _T("<?xml version=/"") ) )
  {
   //含有<?xml version=/" 字符串

//检查看有没有？>字符串

pszElement = _tcsstr( pszXML, _T("?>") );

   if ( ! pszElement ) // 没有，返回false
   {
    return FALSE;
   }

   pszXML = pszElement + 2; // 有，则指向下一个要处理的字符串
  }
  else if ( bHeader ) // 如果没有header字符串而参数又要求有，则返回NULL
  {
   return NULL;
  }

//去掉注释部分，后面是 部分

while ( ParseMatch( pszXML, _T("") );

   if ( ! pszElement || *pszElement != '-' ) // 为空或者不是-,返回false
   {
    return FALSE;
   }

pszXML = pszElement + 3;
}

//后面是<!DOCTYPE...>字符串

  if ( ParseMatch( pszXML, _T("<!DOCTYPE") ) )
  {
   pszElement = _tcsstr( pszXML, _T(">") );

   if ( ! pszElement )
   {
    return FALSE;
   }

pszXML = pszElement + 1;
}

//后面还是注释

while ( ParseMatch( pszXML, _T("") );

   if ( ! pszElement || *pszElement != '-' )
   {
    return FALSE;
   }

pszXML = pszElement + 3;
}

//这里才是有用的Element,调用Element的ParseString方法完成解析

pElement = new CXMLElement();

  if ( ! pElement->ParseString( pszXML ) )
  {
   delete pElement;

pElement = NULL;
}

#ifdef _AFX
}
catch ( CException* pException )
{
pException->Delete();

delete pElement;

pElement = NULL;
}
#endif

return pElement;
}

BOOL CXMLElement::ParseString(LPCTSTR& strXML)
{
if ( ! ParseMatch( strXML, _T("<") ) )
{
return FALSE;
}

//得到标识符部分，拷贝到m_sName中

if ( ! ParseIdentifier( strXML, m_sName ) )
{
return FALSE;
}

//pszEnd指向字符串strXML的最后

LPCTSTR pszEnd = strXML + _tcslen( strXML );

while ( ! ParseMatch( strXML, _T(">") ) )
{
//如果后面不是>说明有属性，则解析属性

//到了末尾了？则解析/后面是>

  if ( ParseMatch( strXML, _T("/") ) )
  {
   return ParseMatch( strXML, _T(">") );
  }

  if ( ! *strXML || strXML >= pszEnd )
  {
   return FALSE;
  }

//读取出一个新属性

CXMLAttribute* pAttribute = new CXMLAttribute( this );

//解析这个新属性，设置名字和value

  if ( pAttribute->ParseString( strXML ) )
  {
   CString strName( pAttribute->m_sName );

CXMLAttribute* pExisting;

CharLower( strName.GetBuffer() );

strName.ReleaseBuffer();

//m_pAttributes中原来有同样的名字，则删除原来的，

//加入新的CXMLAttribute*指针

   if ( m_pAttributes.Lookup( strName, XMLVOID(pExisting) ) )
   {
    delete pExisting;
   }

   m_pAttributes.SetAt( strName, pAttribute );
  }
  else // 格式错误
  {
   delete pAttribute;

return FALSE;
}
}

//属性解析完了，开始解析各个Element

CString strClose = _T("</");

strClose += m_sName + '>';

while ( TRUE )
{
  if ( ! *strXML || strXML >= pszEnd )
  {
   return FALSE;
  }

//查找<字符

LPCTSTR pszElement = _tcschr( strXML, '<' );

  if ( ! pszElement || *pszElement != '<' )
  {
   return FALSE;
  }

//如果字符<不是strXML的开始位置，则就是当前Element的值了，保存到m_sValue中

  if ( pszElement > strXML ) // 有这个字符<
  {
   if ( m_sValue.GetLength() && m_sValue.Right( 1 ) != ' ' )
   {
    m_sValue += ' ';
   }

//将字符串解析为值

m_sValue += StringToValue( strXML, (int)( pszElement - strXML ) );

ASSERT( strXML == pszElement );

   if ( strXML != pszElement )
   {
    return FALSE;
   }
  }

if ( ParseMatch( strXML, strClose ) ) // 到了最后了，跳出循环
  {
   break;
  }
  else if ( ParseMatch( strXML, _T("") );

   if ( ! pszElement || *pszElement != '-' )
   {
    return FALSE;
   }

   strXML = pszElement + 3;
  }
  else // 递归解析,将新的Element加入到m_pElements的最后
  {
   CXMLElement* pElement = new CXMLElement( this );

   if ( pElement->ParseString( strXML ) )
   {
    m_pElements.AddTail( pElement );
   }
   else // 否则立刻返回 false
   {
    delete pElement;

    return FALSE;
   }
  }
}

return TRUE;
}

//////////////////////////////////////////////////////////////////////
// CXMLElement from bytes

//根据文件的格式去掉开头的特殊字符

CXMLElement* CXMLElement::FromBytes(BYTE* pByte, DWORD nByte, BOOL bHeader)
{
CString strXML;

if ( nByte >= 2
  && ( ( pByte[0] == 0xFE && pByte[1] == 0xFF )
     || ( pByte[0] == 0xFF && pByte[1] == 0xFE ) ) )
{
  //是unicode编码，总的字数为原来的一半减1，减1是因为出去开头的FFFE

nByte = nByte / 2 - 1;

  if ( pByte[0] == 0xFE && pByte[1] == 0xFF )
  {
   //是big edian的，则需要交换[2*i]和[2*i+1]的位置

//http://www.bluefeel.com/show-592-1.html

在UCS编码中有一个叫做"ZERO WIDTH NO-BREAK SPACE"的字符，它的编码是FEFF。

而FFFE在UCS中是不存在的字符，所以不应该出现在实际传输中。UCS规范建议我们在传输字节流前，

先传输字符"ZERO WIDTH NO-BREAK SPACE"。

这样如果接收者收到FEFF，就表明这个字节流是Big-Endian的；如果收到FFFE，

就表明这个字节流是Little-Endian的。因此字符

"ZERO WIDTH NO-BREAK SPACE"又被称作BOM。

UTF-8不需要BOM来表明字节顺序，但可以用BOM来表明编码方式。

字符"ZERO WIDTH NO-BREAK SPACE"的UTF-8编码是EF BB BF

（读者可以用我们前面介绍的编码方法验证一下）。

所以如果接收者收到以EF BB BF开头的字节流，就知道这是UTF-8编码了。

Unicode：FF FE

Unicode big_endian：EF FF

UTF-8： EF BB BF

GB2312是高位在前，Big_endian

pByte += 2;

   for ( DWORD nSwap = 0 ; nSwap < nByte ; nSwap ++ )
   {
    register CHAR nTemp = pByte[ ( nSwap << 1 ) + 0 ];

pByte[ ( nSwap << 1 ) + 0 ] = pByte[ ( nSwap << 1 ) + 1 ];

    pByte[ ( nSwap << 1 ) + 1 ] = nTemp;
   }
  }
  else
  {
   pByte += 2;
  }

//将pBytes拷贝到strXML的buffer中

CopyMemory( strXML.GetBuffer( nByte ), pByte, nByte * sizeof(TCHAR) );

strXML.ReleaseBuffer( nByte );
}
else
{
//是utf8编码的,转化为宽字符，保存到strXML中

  if ( nByte >= 3 && pByte[0] == 0xEF && pByte[1] == 0xBB && pByte[2] == 0xBF )
  {
   pByte += 3;

nByte -= 3;
}

DWORD nWide = MultiByteToWideChar( CP_UTF8, 0, (LPCSTR)pByte, nByte, NULL, 0 );

MultiByteToWideChar( CP_UTF8, 0, (LPCSTR)pByte, nByte, strXML.GetBuffer( nWide ), nWide );

strXML.ReleaseBuffer( nWide );
}

//调用FromString完成从字符串到XML的转换

return FromString( strXML, bHeader );
}

//////////////////////////////////////////////////////////////////////
// CXMLElement from file

//从文件中读取XML内容，构建CXMLElement对象

CXMLElement* CXMLElement::FromFile(LPCTSTR pszPath, BOOL bHeader)
{
//打开path指定的文件句柄

HANDLE hFile = CreateFile( pszPath, GENERIC_READ, FILE_SHARE_READ, NULL,
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL );

if ( hFile == INVALID_HANDLE_VALUE ) // 句柄无效？返回NULL
{
return NULL;
}

//调用FromFile完成构建过程

CXMLElement* pXML = FromFile( hFile, bHeader );

CloseHandle( hFile ); // 关闭文件

return pXML;
}

//将文件的全部内容读入到一个大字节数组中，调用FromBytes完成从字节数组的转化

CXMLElement* CXMLElement::FromFile(HANDLE hFile, BOOL bHeader)
{
//获得文件大小，单位：字节

DWORD nByte = GetFileSize( hFile, NULL );

if ( nByte > 4096 * 1024 ) //大小超过4M，返回FALSE
{
return FALSE;
}

//将文件的全部内容读入到一个大字符数组pByte中

BYTE* pByte = new BYTE[ nByte ];

ReadFile( hFile, pByte, nByte, &nByte, NULL );

//调用FromBytes完成从字节数组到XMLElement的过程

CXMLElement* pXML = FromBytes( pByte, nByte, bHeader );

delete [] pByte;

return pXML;
}

//////////////////////////////////////////////////////////////////////
// CXMLElement equality

BOOL CXMLElement::Equals(CXMLElement* pXML) const
{
if ( this == NULL || pXML == NULL )
{
return FALSE;
}

if ( pXML == this )
{
return TRUE;
}

//m_sName和 m_sValue 必须相同

if ( m_sName != pXML->m_sName )
{
return FALSE;
}

if ( m_sValue != pXML->m_sValue )
{
return FALSE;
}

//XMLElement含有的属性的数目必须相同

if ( GetAttributeCount() != pXML->GetAttributeCount() )
{
return FALSE;
}

//XMLElement含有的Element的数目必须相同

if ( GetElementCount() != pXML->GetElementCount() )
{
return FALSE;
}

//递归调用每个Attribute的Equals方法完成equals比较

for ( POSITION pos = GetAttributeIterator() ; pos ; )
{
CXMLAttribute* pAttribute1 = GetNextAttribute( pos );

CXMLAttribute* pAttribute2 = pXML->GetAttribute( pAttribute1->m_sName );

  if ( pAttribute2 == NULL )
  {
   return FALSE;
  }

  if ( ! pAttribute1->Equals( pAttribute2 ) )
  {
   return FALSE;
  }
}

//调用每个Element的Equals方法完成比较

POSITION pos1 = GetElementIterator();

POSITION pos2 = pXML->GetElementIterator();

for ( ; pos1 && pos2 ; )
{
CXMLElement* pElement1 = GetNextElement( pos1 );

CXMLElement* pElement2 = pXML->GetNextElement( pos2 );

  if ( pElement1 == NULL || pElement2 == NULL )
  {
   return FALSE;
  }

  if ( ! pElement1->Equals( pElement2 ) )
  {
   return FALSE;
  }
}

//后面还有，返回false，相等的话这里就应该都是NULL了

if ( pos1 != NULL || pos2 != NULL )
{
return FALSE;
}

return TRUE;
}

//////////////////////////////////////////////////////////////////////
// CXMLElement recursive word accumulation

//调用下面的方法，将所有属性的值加入到字符串strWords中

//值之间使用空格分割，当前Element的值加入到最后

//先是属性值，然后是Element的值，最后才是当前Element的值

CString CXMLElement::GetRecursiveWords()
{
CString strWords;

AddRecursiveWords( strWords );

strWords.TrimLeft();

strWords.TrimRight();

return strWords;
}

void CXMLElement::AddRecursiveWords(CString& strWords)
{
//将所有名字中没有冒号，而且不是SHA1的那些对应的value

//值加入到strWords中，使用空格分割各个值

for ( POSITION pos = GetAttributeIterator() ; pos ; )
{
CXMLAttribute* pAttribute = GetNextAttribute( pos );

CString strText = pAttribute->GetName();

  if ( strText.Find( ':' ) >= 0 )
  {
   continue;
  }

  if ( strText.CompareNoCase( _T("SHA1") ) == 0 )
  {
   continue; // NOTE: Shareaza Specific
  }

  if ( strWords.GetLength() )
  {
   strWords += ' ';
  }

strWords += pAttribute->GetValue();
}

//将各个Element的值加入到strWords中，递归调用各个Element的方法

for ( POSITION pos = GetElementIterator() ; pos ; )
{
GetNextElement( pos )->AddRecursiveWords( strWords );
}

//最后将当前Element的值加入

if ( m_sValue.GetLength() )
{
  if ( strWords.GetLength() )
  {
   strWords += ' ';
  }

strWords += m_sValue;
}
}

//////////////////////////////////////////////////////////////////////
// CXMLElement serialize

#ifdef _AFX

//XMLElement的序列化过程，递归的调用包含的属性和Element的序列化

void CXMLElement::Serialize(CArchive& ar)
{
//首先写入/读出name 和value

CXMLNode::Serialize( ar );

if ( ar.IsStoring() )
{
//写入属性的数目，然后写入各个属性

ar.WriteCount( GetAttributeCount() );

  for ( POSITION pos = GetAttributeIterator() ; pos ; )
  {
   GetNextAttribute( pos )->Serialize( ar );
  }

//接着写入Element的数目，然后写入各个Element

ar.WriteCount( GetElementCount() );

  for ( POSITION pos = GetElementIterator() ; pos ; )
  {
   GetNextElement( pos )->Serialize( ar );
  }
}
else
{
  //读取出各个属性，加入到m_pAttributes中，

  //注意m_pAttributes中的元素是一个pair，（strName ，属性指针）

  for ( int nCount = (int)ar.ReadCount() ; nCount > 0 ; nCount-- )
  {
   CXMLAttribute* pAttribute = new CXMLAttribute( this );

pAttribute->Serialize( ar );

CString strName( pAttribute->m_sName );

CharLower( strName.GetBuffer() );

strName.ReleaseBuffer();

m_pAttributes.SetAt( strName, pAttribute );
}

//读取出各个Element，加入到m_pElements中，

//m_pElements是一个CXMLElement*的数组

  for ( int nCount = (int)ar.ReadCount() ; nCount > 0 ; nCount-- )
  {
   CXMLElement* pElement = new CXMLElement( this );

pElement->Serialize( ar );

m_pElements.AddTail( pElement );
}
}
}

#endif

//////////////////////////////////////////////////////////////////////
// CXMLAttribute construction

LPCTSTR CXMLAttribute::xmlnsSchema = _T("http://www.w3.org/2001/XMLSchema");
LPCTSTR CXMLAttribute::xmlnsInstance = _T("http://www.w3.org/2001/XMLSchema-instance");
LPCTSTR CXMLAttribute::schemaName = _T("xsi:noNamespaceSchemaLocation");

CXMLAttribute::CXMLAttribute(CXMLElement* pParent, LPCTSTR pszName) : CXMLNode( pParent, pszName )
{
m_nNode = xmlAttribute;
}

CXMLAttribute::~CXMLAttribute()
{
}

//////////////////////////////////////////////////////////////////////
// CXMLAttribute clone

//创建一个新的CXMLAttribute对象，value设置为当前对象的m_sValue

CXMLAttribute* CXMLAttribute::Clone(CXMLElement* pParent)
{
CXMLAttribute* pClone = new CXMLAttribute( pParent, m_sName );

pClone->m_sValue = m_sValue;

return pClone;
}

//////////////////////////////////////////////////////////////////////
// CXMLAttribute to string

//将值封装为字符串，在strXML后面加入 m_sName="m_sValue" 这样的字符串

void CXMLAttribute::ToString(CString& strXML)
{
strXML += m_sName + _T("=/"");

ValueToString( m_sValue, strXML );

strXML += '/"';
}

//////////////////////////////////////////////////////////////////////
// CXMLAttribute from string

//将参数字符串strXML解析为名字，值，保存到当前Attribute的属性中

BOOL CXMLAttribute::ParseString(LPCTSTR& strXML)
{
// 解析像 xmlns:jxta="http://www.jxta.org" 这样的字符串

//双引号也可以是单引号

//首先将attribute的名字部分拷贝到m_sName中

if ( ! ParseIdentifier( strXML, m_sName ) )
{
return FALSE;
}

//后面肯定是=，如果不是格式就错误了，返回false

if ( ! ParseMatch( strXML, _T("=") ) )
{
return FALSE;
}

//再下面就是引号了“

if ( ParseMatch( strXML, _T("/"") ) )
{
//pszQuote指向下一个引号

LPCTSTR pszQuote = _tcschr( strXML, '/"' );

  if ( ! pszQuote || *pszQuote != '/"' )
  {
   return FALSE;
  }

//两个引号之间的字符串部分是value，调用StringToValue转化为

//value,保存到m_sValue中

m_sValue = StringToValue( strXML, (int)( pszQuote - strXML ) );

return ParseMatch( strXML, _T("/"") ); //StringToValue后strXML指向引号
}
else if ( ParseMatch( strXML, _T("'") ) ) // 单引号，处理和双引号一样
{
LPCTSTR pszQuote = _tcschr( strXML, '/'' );

  if ( ! pszQuote || *pszQuote != '/'' )
  {
   return FALSE;
  }

m_sValue = StringToValue( strXML, (int)( pszQuote - strXML ) );

return ParseMatch( strXML, _T("/'") );
}
else
{
return FALSE;
}
}

//////////////////////////////////////////////////////////////////////
// CXMLAttribute equality

//m_sName和m_sValue都必须相同才Equal

BOOL CXMLAttribute::Equals(CXMLAttribute* pXML) const
{
if ( this == NULL || pXML == NULL ) // 有一个是NULL就返回false
{
return FALSE;
}

if ( pXML == this ) //就是当前对象，返回TRUE
{
return TRUE;
}

if ( m_sName != pXML->m_sName )
{
return FALSE;
}

if ( m_sValue != pXML->m_sValue )
{
return FALSE;
}

return TRUE;
}

//////////////////////////////////////////////////////////////////////
// CXMLAttribute serialize

#ifdef _AFX

//调用CXMLNode的Serialize方法完成属性的序列化，名字，值

//注意：CXMLAttribute是XMLNode类的子类！！！！！！！

void CXMLAttribute::Serialize(CArchive& ar)
{
CXMLNode::Serialize( ar );
}

#endif