libxml2

来源：互联网发布：网络论坛编辑：程序博客网时间：2024/05/01 17:56

XML--Libxml2使用实例

一，使用Libxml2生成xml
1，编辑生成
#include <stdio.h>
#include <libxml/parser.h>
#include <libxml/tree.h>

int main(int argc, char **argv)
{
xmlDocPtr doc = NULL; /* document pointer */
xmlNodePtr root_node = NULL, node = NULL, node1 = NULL;/* node pointers */

// Creates a new document, a node and set it as a root node
doc = xmlNewDoc(BAD_CAST "1.0");
root_node = xmlNewNode(NULL, BAD_CAST "root");
xmlDocSetRootElement(doc, root_node);

//creates a new node, which is "attached" as child node of root_node node.
xmlNewChild(root_node, NULL, BAD_CAST "node1",BAD_CAST "content of node1");

// xmlNewProp() creates attributes, which is "attached" to an node.
node=xmlNewChild(root_node, NULL, BAD_CAST "node3", BAD_CAST"node has attributes");
xmlNewProp(node, BAD_CAST "attribute", BAD_CAST "yes");

//Here goes another way to create nodes.
node = xmlNewNode(NULL, BAD_CAST "node4");
node1 = xmlNewText(BAD_CAST"other way to create content");
xmlAddChild(node, node1);
xmlAddChild(root_node, node);

//Dumping document to stdio or file
xmlSaveFormatFileEnc(argc > 1 ? argv[1] : "-", doc, "UTF-8", 1);

/*free the document */
xmlFreeDoc(doc);
xmlCleanupParser();
xmlMemoryDump();//debug memory for regression tests

return 0;
}

2，编译运行
gcc main.c -o main.out -I /usr/include/libxml2 -lxml2

3，生成的xml
<?xml version="1.0" encoding="UTF-8"?>
<root>
<node1>content of node1</node1>
<node3 attribute="yes">node has attributes</node3>
<node4>other way to create content</node4>
</root>

两个实例，说明如何使用Libxml2遍历xml文档和使Xpath获取特定结点的内容值：
程序使用的xml文档如下：
<?xml version="1.0" encoding="UTF-8"?>
<root>
<node1>content of node1</node1>
<node3 attribute="yes">node has attributes</node3>
<node4>other way to create content</node4>
</root>

二，遍历程序代
1，代码
#include <stdio.h>
#include <libxml/parser.h>
#include <libxml/tree.h>

int main(int argc, char** argv)
{
xmlDocPtr doc=NULL;
xmlNodePtr cur=NULL;
char* name=NULL;
char* value=NULL;

xmlKeepBlanksDefault (0);

if(argc<2)
{
printf("ERROR: argc must be 2 or above.\n");
return -1;
}

//create Dom tree
doc=xmlParseFile(argv[1]);
if(doc==NULL)
{
printf("ERROR: Loading xml file failed.\n");
exit(1);
}

// get root node
cur=xmlDocGetRootElement(doc);
if(cur==NULL)
{
    printf("ERROR: empty file\n");
    xmlFreeDoc(doc);
    exit(2);
}

//walk the tree
cur=cur->xmlChildrenNode; //get sub node
while(cur !=NULL)
{
    name=(char*)(cur->name);
    value=(char*)xmlNodeGetContent(cur);
    printf("DEBUG: name is: %s, value is: %s\n", name, value);
    xmlFree(value);
    cur=cur->next;
}

// release resource of xml parser in libxml2
xmlFreeDoc(doc);
xmlCleanupParser();

return 0;
}

输出：
DEBUG: name is: node1, value is: content of node1
DEBUG: name is: node3, value is: node has attributes
DEBUG: name is: node4, value is: other way to create content

2，说明：
1）当使用dom树来解析xml文档时，由于默认的方式是把节点间的空白当作第一个子节点，所以为了能和常说的第一个子节点相符，需调用xmlKeepBlanksDefault (0)函数来忽略这种空白。
2）对于使用xmlChar* xmlNodeGetContent(xmlNodePtr cur)函数获取节点内容后，必须调用xmlFree()来对所分配的内存进行释放。

使用Xpath获取特定结点的内容(使用的xml文档见上面)：
//#include <iostream>
#include <string.h>
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>

int main(int argc, char** argv)
{
xmlDocPtr doc;
xmlXPathContextPtr xpathCtx;
xmlXPathObjectPtr xpathObj;
xmlNodeSetPtr nodeset;
char* xpathExpr = "/root/node3";
char* val=NULL;
int size,i;

if(argc<2)
{
printf("ERROR: argc must be 2 or above.\n");
return -1;
}

/* Load XML document */
doc = xmlParseFile(argv[1]);
if (doc == NULL)
{
printf("ERROR: unable to parse file: %s\n", argv[1]);
return -1;
}

/* Create xpath evaluation context */
xpathCtx = xmlXPathNewContext(doc);
if(xpathCtx == NULL)
{
    printf("ERROR: unable to create new XPath context\n");
    xmlXPathFreeContext(xpathCtx);
    xmlFreeDoc(doc);
    return -2;
}

/* Evaluate xpath expression */
xpathObj = xmlXPathEvalExpression((const xmlChar*)(xpathExpr), xpathCtx);
if(xpathObj == NULL)
{
    printf("ERROR: unable to evaluate xpath expression = %s\n", xpathExpr);
    xmlXPathFreeContext(xpathCtx);
    xmlFreeDoc(doc);
    return -3;
}

/* get values of the selected nodes */
nodeset=xpathObj->nodesetval;
if(xmlXPathNodeSetIsEmpty(nodeset))
{
  printf("WARNING: No such nodes.\n");
    xmlXPathFreeObject(xpathObj);
    xmlXPathFreeContext(xpathCtx);
    xmlFreeDoc(doc);
    return -4;
}

//get the value
size = (nodeset) ? nodeset->nodeNr : 0;
for(i = 0; i <size; i++)
{
    val=(char*)xmlNodeListGetString(doc, nodeset->nodeTab[i]->xmlChildrenNode, 1);
    printf("DEBUG: the results are: %s\n", val);
    xmlFree(val);
}

//Cleanup of XPath data
xmlXPathFreeObject(xpathObj);
xmlXPathFreeContext(xpathCtx);

/* free the document */
xmlFreeDoc(doc);
xmlCleanupParser();

return 0;
}

输出：
DEBUG: the results are: node has attributes

由于libxml2内部默认的编码方式为utf-8，所以当在xml文档使用中文时必须指明支持中文编码的编码方式(如gb2312)，否则在解析和生成时将会报错。另外显示、输入、输出的时候还必须进行编码转换，不然将很有可能出现乱码。
如使用 xmlNodeGetContent(xmlNodePtr cur)接口获取一个含有中文的节点内容后，为了能够正常显示，必须将返回值进行编码转换。
下面是编码转换函数可参考如下（使用时需要加上头文件iconv.h）：
/**********************************************************************
功能：
字符编码转换
输入参数：
@fromCode：转换前的字符编码方式
@toCode：     转换后的字符编码方式
@text：           待转换的字符串
返回值：
成功：编码方式为@toCode的@text字符串
失败：返回NULL
***********************************************************************/
const char* encodeConvert(char* fromCode,char* toCode,const char* text)
{ static char bufout[1024],*sin,*sout;
int length_in,length_out,err;
iconv_t c_pt;
c_pt=iconv_open(toCode,fromCode);
if(c_pt==(iconv_t)-1)
{
   cout<<"iconv_open failed: "<<fromCode<<" ==> "<<toCode<<endl;
   return NULL;
}
iconv(c_pt,NULL,NULL,NULL,NULL);
length_in=strlen(text)+1;
length_out=1024;
sin=(char*)text;
sout=bufout;
err=iconv(c_pt,&sin,(size_t*)&length_in,&sout,(size_t*)&length_out);
if(err==-1)
{
   cout<<"converting failed"<<endl;
   return NULL;
}
iconv_close(c_pt);
return bufout;
}