使用dom4j对rss的xml文件分析

来源:互联网 发布:java批量发信息 编辑:程序博客网 时间:2024/06/05 04:43

Rss-web开始了,今天我的工作就是做对Rss文件的分析,想起前几天刚刚试用了一下的 dom4j,今天就拿他来分析xml.用的的确很方便!顶一个!

由于我只需要从xml中读取必要的信息,所以写的代码不具有通用性,不过对大多数基本应用,这个类应该够用了!先贴代码和运行结果:

 

package org.ibmclub.hust.Rss;

 

import java.io.File;

import java.util.ArrayList;

import java.util.List;

 

import org.dom4j.Document;

import org.dom4j.DocumentException;

import org.dom4j.Node;

import org.dom4j.Element;

import org.dom4j.io.SAXReader;

import org.ibmclub.hust.Rss.bean.Item;

 

public class RssFile {

       private Document doc;

 

       private String channelName;

 

       private String channelLink;

 

       private String channelDes;

 

       private ArrayList<Item> items = new ArrayList<Item>();

 

       public RssFile(File f) {

              parseFile(f);

       }

 

       private void parseFile(File f) {

              try {

                     SAXReader reader = new SAXReader();

                     doc = reader.read(f);

 

                     List l = doc.selectNodes("rss/channel");

 

                     if (l.size() == 0) {

                            throw new Exception("not a good rss-xml file");

                     }

                     Node channel = (Node) l.get(0);

                     channelName = ((Element) channel.selectNodes("title").get(0))

                                   .getText();

                     channelLink = ((Element) channel.selectNodes("link").get(0))

                                   .getText();

                     channelDes = ((Element) channel.selectNodes("description").get(0))

                                   .getText();

                    

                     //封装item信息

                     List itemList = channel.selectNodes("item");

                     for(Object item : itemList){

                            Item i = new Item((Node)item);

                            items.add(i);

                     }

                    

                     //查看输出

                     System.out.println("channelName:" + channelName);

                     System.out.println("channelLink:" + channelLink);

                     System.out.println("channeDes:" + channelDes);

                     for(Item i : items){

                            System.out.println(" title:"+i.getTitle());

                     //     System.out.println(" link:"+i.getLink());

                     //     System.out.println(" pubtime:"+i.getPubTime());

                     //     System.out.println(" des:"+i.getDes());

                     }

              } catch (Exception e) {

                     System.out.println("message:"+e.getMessage());

              }

       }

 

       public String getChannelName() {

              return channelName;

       }

 

       public String getChannelLink() {

              return channelLink;

       }

 

       public String getDescription() {

              return channelDes;

       }

 

       public ArrayList<Item> getItems() {

              return items;

       }

 

       public static void main(String[] args) {

              long start = System.currentTimeMillis();

              RssFile rssfile = new RssFile(new File("c://Rss.xml"));

              System.out.println(System.currentTimeMillis() - start);

       }

}

 

package org.ibmclub.hust.Rss.bean;

 

import java.sql.Timestamp;

import org.dom4j.Element;

import org.dom4j.Node;

 

public class Item {

       private String title;

 

       private String link;

 

       private String des;

 

       private String pubTime;

 

       public Item() {

       }

 

       public Item(Node itemNode) {

              title = ((Element) itemNode.selectNodes("title").get(0)).getText();

              link = ((Element) itemNode.selectNodes("link").get(0)).getText();

              des = ((Element) itemNode.selectNodes("description").get(0)).getText();

              pubTime = ((Element) itemNode.selectNodes("pubDate").get(0)).getText();

       }

 

       public String getTitle() {

              return title;

       }

 

       public void setTitle(String title) {

              this.title = title;

       }

 

       public String getLink() {

              return link;

       }

 

       public void setLink(String link) {

              this.link = link;

       }

 

       public String getPubTime() {

              return pubTime;

       }

      

       public void setPubTime(String pubTime) {

              this.pubTime = pubTime;

       }

      

       public String getDes(){

              return des ;

       }

       public void setDes(String des){

              this.des = des ;

       }

}

 

运行结果:

channelName:CQ的专栏

 

channelLink:http://blog.csdn.net/cq8587/

 

channeDes:

 

 title:今天上课了

 

 title:拓扑排序

 

 title:俄罗斯农夫算法

 

 title:影评:雨天的水彩画 《爱有天意》观感(转)

 

 title:java处理压缩文件

 

 title:第一篇文章

 

532

 

 

用的时间还真不少 532ms,不过写起代码来还真是简单方便,鱼和熊掌不可兼得!

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

原创粉丝点击