tinyxml2解析RSS文件 并将其转换为.dat格式

来源:互联网 发布:诸神黄昏神翼进阶数据 编辑:程序博客网 时间:2024/06/06 09:42

参考博客:
tinyXml2 – https://github.com/leethomason/tinyxml2
rss – http://www.runoob.com/rss/rss-tutorial.htm
mxl格式 – http://blog.csdn.net/zhubinqiang/article/details/7185322
在c++中使用tinyxml2 解析XML格式文件例子 – http://blog.csdn.net/educast/article/details/12908455
tinyxml2 使用教程 – http://blog.csdn.net/K346K346/article/details/48750417
xmldocument 类 – http://www.cnblogs.com/kissdodog/archive/2013/02/24/2924236.html
参考接口:
struct RssItem{
string title;
string link;
string description;
string content;
};
class RssReader{
public:
RssReader();
void parseRss();//解析
void dump();//输出
private:
vector _rss;
};
要求:最后生成一个 pagelib.dat, 其格式:

    <doc>      <docid>1</docid>      <title> ... </title>      <link> ...  </link>      <content> ... </content>    </doc>    <doc>      ...    </doc>    <doc>      ...    </doc>

rss 格式

<?xml version="1.0" encoding="utf-8" ?>   //声明该文档是xml文档<rss version="2.0"> //声明当前文件为rss格式文件<channel> <title></title> //对网站和当前rss进行描述 <description>国内焦点新闻列表</description>   //对当前rss进行描述<link></link><item>//一条信息<title>最高检:严惩公务员利用审批等权力索贿受贿</title><link></link>//新闻链接<description><description>//新闻描述</item></channel></rss>

以下是可能用到的一些函数的准备
FirstChildElement(const char* value=0): 获取第一个值为value的子节点,value默认值为空,则返回第一个子节点。
RootElement(): 获取根节点,相当于FirstChildElement的空参数版本;
const XMLAttribute* FirstAttribute() const: 获取第一个属性值;
Attribute(“Password”) //获取password属性;
XMLHandle NextSiblingElement( const char* _value=0 ) :获得下一个节点。
NextSiblingElement//获取下一个相邻的节点

//class information{    public:        ~information(){            cout<<"~information"<<endl;        }    public:        string title;        string link;        string description;        string  content;        string lagnguage;        string copyright;        string pubdate;         string guid;        string dccreator;};void traverse(vector <information> * _vecotr){//  string filename = "pagelib.dat";    std::ofstream _ostream("pagelib.dat",std::ios::app);    vector<information>::iterator _begin= _vecotr->begin();    vector<information>::iterator _end= _vecotr->end();    ++_begin;    while(_begin!=_end){        _ostream<<"<doc>"<<endl;        _ostream<<"<title>"<<_begin->title<<"</title>"<<endl;        _ostream<<"<link>"<<_begin->link<<"</link>"<<endl;        _ostream<<"<description>"<<_begin->description<<"</description>"<<endl;//      _ostream<<"<content>"<<_begin->content<<"</content>"<<endl;        _ostream<<"</doc>"<<endl;        ++_begin;    }}
void example1(){    vector<information> information_vector;    XMLDocument doc;    doc.LoadFile("douban.book.review.xml");    cout<<doc.Error()<<endl;    if(0!=doc.Error()){        cout<<"load file error"<<endl;        return;    }//  doc.Print();    XMLElement*  root = doc.RootElement();    XMLElement* channel = root->FirstChildElement("channel");    if(NULL==channel){        cout<<"get first chile element error "<<endl;        return ;    }    XMLElement * title = channel->FirstChildElement("title");    cout<<title->GetText()<<endl;    cout<<title->Name()<<endl;    XMLElement * description = title->NextSiblingElement();    cout<<description->GetText()<<endl;    cout<<description->Name()<<endl;    XMLElement * language = description->NextSiblingElement();    cout<<language->GetText()<<endl;    cout<<language->Name()<<endl;    XMLElement * copyright = language->NextSiblingElement();    cout<<copyright->GetText()<<endl;    cout<<copyright->Name()<<endl;    XMLElement * pubDate = copyright->NextSiblingElement();    cout<<pubDate->GetText()<<endl;    cout<<pubDate->Name()<<endl;    XMLElement * item=pubDate->NextSiblingElement();    while(item){        information  tmp;        cout<<"sucess in item"<<endl;        XMLElement *item_child= item->FirstChildElement();        while(item_child){            cout<<item_child->GetText()<<endl;            if(strcmp(item_child->Name(),"title")==0){//              tmp.title.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());                tmp.title.clear();                tmp.title.append(item_child->GetText());                cout<<"title-------------------------------------------"<<endl;            }else if(strcmp(item_child->Name(),"link")==0){//              tmp.link.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());                tmp.link.clear();                tmp.link.append(item_child->GetText());                cout<<"link-------------------------------------------"<<endl;            }else if(strcmp(item_child->Name(),"description")==0){//              tmp.link.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());                tmp.description.clear();                tmp.description.append(item_child->GetText());                cout<<"description------------------------------------"<<endl;            }else if(strncmp(item_child->Name(),"content",7)==0){//              tmp.content.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());                tmp.content.clear();                tmp.content.append(item_child->GetText());                cout<<"content------------------------------------"<<endl;            }else if(strcmp(item_child->Name(),"dc:creator")==0){//              tmp.dccreator.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());                tmp.dccreator.clear();                tmp.dccreator.append(item_child->GetText());                cout<<"dc:creator ------------------------------------"<<endl;            }else if(strcmp(item_child->Name(),"pubdate")==0){//              tmp.pubdate.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());                tmp.pubdate.clear();                tmp.pubdate.append(item_child->GetText());                cout<<"pubdate ------------------------------------"<<endl;            }else if(strcmp(item_child->Name(),"guid")==0){//              tmp.guid.replace(tmp.title.begin(),tmp.title.end(),item_child->GetText());                tmp.guid.clear();                tmp.guid.append(item_child->GetText());                cout<<"guid------------------------------------"<<endl;            }            item_child = item_child->NextSiblingElement();        }        information_vector.push_back(tmp);//      delete tmp;        item = item->NextSiblingElement();    }    traverse(&information_vector);}int main(){    example1();    return 0;}
阅读全文
0 0
原创粉丝点击