[libexpat]_[XML处理]_[C/C++使用libexpat库以数据流(SAX模型)的方式读取(解析)大XML文件]

来源:互联网 发布:单机收银软件免费版 编辑:程序博客网 时间:2024/04/30 05:29

优点:

1.当处理大文件XML时,只读取特定的数据。

2.速度快。

3.内存占用少。


缺点:

1.只支持XML文件,HTML文件不支持。

2.需要自己实现特定的存储结构。


以下是编译的Makefile文件:

CP="cp -u".build-post: test.exe${CP} E:/software/Lib/file/xml-expat-2.0.1/win32/release/share/libexpat-1.dll .test.exe:test.og++ -o test.exe test.o -LE:/software/Lib/file/xml-expat-2.0.1/win32/release/share -lexpattest.o:test.cppg++ -IE:/software/Lib/file/xml-expat-2.0.1/win32/release/share/include -c test.cpp -o test.o

以下是源代码:

#include <stdio.h>#include <string>#include <iostream>#include <string.h>#include "expat.h"using namespace std;#define XML_FMT_INT_MOD "l"static bool sectPrStart = false;static void StartElementHandler(void *userData,const XML_Char *name,                                const XML_Char **atts){if(!strcmp("w:sectPr",name)){sectPrStart = true;for (int i = 0; atts[i] != 0; i += 2){cout << "name: " << atts[i] << endl;cout << "value: " << atts[i+1] << endl;}}}static void EndElementHandler(void *userData,const XML_Char *name){if(!strcmp("w:sectPr",name)){sectPrStart = false;cout << "End element name: " << name << endl;}}static void CharacterDataHandler(void *userData,const XML_Char *s,                                 int len){if(sectPrStart && s){string str(s,len);cout << "inner Text: " << str << endl;}}int main(int argc, char *argv[]){cout << "Start............................." << endl;int i = 0;XML_Parser parser = XML_ParserCreate(NULL);XML_SetUserData(parser, &i);XML_SetElementHandler(parser, &StartElementHandler,&EndElementHandler);{XML_SetCharacterDataHandler(parser,&CharacterDataHandler);}int WRITEBUFFERSIZE = 5242880; // 5Mb bufferint size_buf = WRITEBUFFERSIZE;void* buf = malloc(size_buf);const char* file_path = argv[1];FILE* file = fopen(file_path,"r");int ret = 0;while(!feof(file)){ret = fread(buf,1,size_buf,file);cout << "ret: " << ret << endl;if (XML_Parse(parser, (char*) buf, ret, 0) == XML_STATUS_ERROR){fprintf(stderr, "%s at line %" XML_FMT_INT_MOD "u ->%s\n",XML_ErrorString(XML_GetErrorCode(parser)),XML_GetCurrentLineNumber(parser), file_path);break;}}//last callXML_Parse(parser, (char*) 0, 0, 1);free(buf);XML_ParserFree(parser);fclose(file);cout << "End............................." << endl;return 0;}