parsing xml with sax and pull

来源:互联网 发布:查看手机4g网络dns 编辑:程序博客网 时间:2024/05/15 23:51
package com.ivanlovetracy.rssreader;import java.io.InputStream;import java.net.URL;import javax.xml.parsers.SAXParser;import javax.xml.parsers.SAXParserFactory;import org.xml.sax.Attributes;import org.xml.sax.SAXException;import org.xml.sax.helpers.DefaultHandler;import android.text.Html;public class RssParser {public static RssFeed rssParse(String xmlSource) throws Exception{SAXParserFactory factory = SAXParserFactory.newInstance();SAXParser parser = factory.newSAXParser();RssHandler handler = new RssHandler();//XMLReader reader = parser.getXMLReader();//reader.setContentHandler(handler);//InputSource is = new InputSource((new URL(xmlSource)).openStream());//reader.parse(is);InputStream is = (new URL(xmlSource)).openStream();parser.parse(is, handler);is.close();return handler.getFeed();}private static class RssHandler extends DefaultHandler{final int RSS_TITLE = 1;final int RSS_LINK = 2;final int RSS_DESCRIPTION = 3;final int RSS_CATEGORY = 4;final int RSS_PUBDATE = 5;/*为解决不能识别CDATA而引入StringBuffer * 实际上charactor方法是可以解析出 CDATA中包含的数据的,准确的说是charactor * 可以解析<>外的数据,包括空格,当SAX解析到<!CDATA[[时,也会掉用charactor * 方法解析当中包含的数据,所以可以用一个StringBuffer把<title>与</title>之 * 间的所有数据连接起来,然后再endElement方法中把StringBuffer赋给,RssItem * 中的title, 也就得到了CDATA中的数据,description中同理 *  * */private StringBuffer sBuffer = new StringBuffer();private int state;/*尝试使用LexicalHandler接口中的startCDATA方法,结果没有得到调用,原因不明 * private boolean isCdata = false;* */ private RssFeed rssFeed;private RssItem rssItem;public RssFeed getFeed(){return rssFeed;}@Overridepublic void startDocument() throws SAXException {rssFeed = new RssFeed();/*必须在此新建RssItem对象,否则在characters()方法中, *当遇到不是item的情况下(比如channel中),仍然会有localName=title *的情况出现,此时会执行rssItem.setTitle()方法,将会因为找不到 *rssItem对象而抛出空指针异常 */rssItem = new RssItem();}@Overridepublic void endDocument() throws SAXException {}@Overridepublic void startElement(String uri, String localName, String qName,Attributes attributes) throws SAXException {if (localName.equals("channel")) {state = 0;return;}if (localName.equals("item")) {rssItem = new RssItem();return;}if (localName.equals("title")) {state = RSS_TITLE;sBuffer = new StringBuffer();return;}if (localName.equals("link")) {state = RSS_LINK;return;}if (localName.equals("description")) {state = RSS_DESCRIPTION;sBuffer = new StringBuffer();return;}if (localName.equals("category")) {state = RSS_CATEGORY;return;}if (localName.equals("pubDate")) {state = RSS_PUBDATE;return;}state = 0;}@Overridepublic void endElement(String uri, String localName, String qName)throws SAXException {if (localName.equals("title")) {rssItem.setTitle(sBuffer.toString());state = 0;}if (localName.equals("description")) {rssItem.setDescription(sBuffer.toString());state = 0;}if (localName.equals("item")) {rssFeed.addItem(rssItem);}}@Overridepublic void characters(char[] ch, int start, int length)throws SAXException {String string = Html.fromHtml(new String(ch,start,length)).toString();//String string = new String(ch,start,length);switch (state) {case RSS_TITLE:sBuffer.append(string);//rssItem.setTitle(sBuffer.toString());//state = 0; break;case RSS_LINK:rssItem.setLink(string);state = 0;break;case RSS_DESCRIPTION:sBuffer.append(string);//rssItem.setDescription(sBuffer.toString());//state = 0;break;case RSS_CATEGORY:rssItem.setCategory(string);state = 0;break;case RSS_PUBDATE:rssItem.setPubdate(string);state = 0;break;default:break;}return;}}}
package com.ivanlovetracy.rssreader;import java.io.InputStream;import java.net.URL;import org.xmlpull.v1.XmlPullParser;import org.xmlpull.v1.XmlPullParserFactory;import android.text.Html;public class RssPullParser {private int state = 0;private boolean isItemTag = false;private RssFeed rssFeed;private RssItem rssItem;private static final int TITLE = 1;private static final int LINK = 2;private static final int DESCRIPTION = 3;private static final int CATEGORY = 4;private static final int PUBDATE = 5;public RssFeed rssParse(String url) throws Exception{XmlPullParserFactory factory = XmlPullParserFactory.newInstance();factory.setNamespaceAware(true);XmlPullParser parser = factory.newPullParser();InputStream is = (new URL(url)).openStream();parser.setInput(is, "UTF-8");int eventType = parser.getEventType();while (eventType != XmlPullParser.END_DOCUMENT) {String tagName = parser.getName();switch (eventType) {case XmlPullParser.START_DOCUMENT:rssFeed = new RssFeed();break;case XmlPullParser.START_TAG:if (tagName.equals("item")) {rssItem = new RssItem();isItemTag = true;}if (tagName.equals("title")) {state = TITLE;}if (tagName.equals("link")) {state = LINK;}if (tagName.equals("description")) {state = DESCRIPTION;}if (tagName.equals("category")) {state = CATEGORY;}if (tagName.equals("pubDate")) {state = PUBDATE;}break;case XmlPullParser.END_TAG:if (tagName.equals("item")) {rssFeed.addItem(rssItem);}break;case XmlPullParser.TEXT:if (isItemTag) {String tagText = Html.fromHtml(parser.getText()).toString().trim();switch (state) {case TITLE:rssItem.setTitle(tagText);state = 0;break;case LINK:rssItem.setLink(tagText);state = 0;break;case DESCRIPTION:rssItem.setDescription(tagText);state = 0;break;case CATEGORY:rssItem.setCategory(tagText);state = 0;break;case PUBDATE:rssItem.setPubdate(tagText);state = 0;break;default:break;}}break;default:break;}eventType = parser.next();}is.close();return rssFeed;}}



原创粉丝点击