XML解析,sax实现详细解说

来源:互联网 发布:淘宝弹窗怎么关闭 编辑:程序博客网 时间:2024/04/30 16:16

这xml解析呢与上次的html解析是一个功能,功能介绍,就是取多个xml的解析工作。我想大家对与sax解析也不陌生了。它的以堆栈的方式来工作的,工作原理,也较有复杂。同样,废话少说,马上进入正题。

sax实现批量xml解析,上次忘记说了实现思路,所谓的批量操作,我的实现是写很多个属性文件,开始配置。然后在一个主配置文件开始做操作。所谓的主文件就是把所有配置文件扩展名前的名称放到主文件里,后续我在介绍次

 

代码演示:

package com.tz.xml;

import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import org.apache.commons.lang.StringUtils;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
import com.tz.tools.PropertiesTools;
import com.tz.tools.SaveImage;

/**
 * XML解析
 *
 * @author Sunweikun
 *
 */
public class XMLResolve extends DefaultHandler {

    //这一堆的变量统一解释下,tagValue是主要操作变量,pts是读取配置文件类 saveImg是下载文件类,下面的List保存取到的节点值

    private StringBuffer tagValue =new StringBuffer();
    private PropertiesTools pts;
    //private SaveImage saveImg;
    private List<String> imagesList=new ArrayList<String>();
    private List<String> priceList=new ArrayList<String>();
    private List<String> valueList=new ArrayList<String>();
    private List<String> titleList=new ArrayList<String>();
    private List<String> numberList=new ArrayList<String>();
    private List<String> cityList=new ArrayList<String>();
    private List<String> urlList=new ArrayList<String>();

    public XMLResolve() {

    }
 

    /**
     * 开始解析XML文件
     */
    public void startDocument() throws SAXException {
        // 可以在此初始化变量等操作
      //  System.out.println("~~~~解析文档开始~~~");

    }

    /**
     * 结束解析XML文件
     */
    public void endDocument() throws SAXException {
       // System.out.println("~~~~解析文档结束~~~");
    }

    /**
     * 在遇到结束标签时调用此方法
     */
    public void endElement(String uri, String localName, String qName)throws SAXException {
        String title = null, sold = null, city = null, url = null,
        price = null, value = null, img = null;
        for (int i = 0; i < this.pts.getXMLURL().length; i++) {
            Properties p = this.pts.getProperties(this.pts.getXMLURL()[i],"xml");
            // 获取标题
            if (p.getProperty("title").equals(qName)) {
                title = tagValue.toString().trim();
            }
            // 获取购买人数
            if (p.getProperty("quantity_sold").equals(qName)) {
                sold = tagValue.toString().trim();
            }
            // 获取城市名称
            if (p.getProperty("division_name").equals(qName)) {
                city = tagValue.toString().trim();
            }
            // 获取团购地址
            if (p.getProperty("deal_url").equals(qName)) {
                url = tagValue.toString().trim();
            }
            // 获取现在价格
            if (p.getProperty("price").equals(qName)) {
                price = tagValue.toString().trim();
            }
            // 获取原来价格
            if (p.getProperty("value").equals(qName)) {
                value = tagValue.toString().trim();
            }
            // 获取图片地址
            if (p.getProperty("image_Url").equalsIgnoreCase(qName)) {
                img = tagValue.toString().trim();
            }
        }

        if (title != null){
            titleList.add(title);
        }
        if (sold != null){
            numberList.add(sold);
        }
        if (city != null){
           cityList.add(city);
        }
        if (url != null) {
            if (!StringUtils.isNumeric(url)) {
                urlList.add(url);
            }
        }
        if (price != null){
            priceList.add(price);
        }
        if (value != null){
            valueList.add(value);
        }
        if (img != null) {
            imagesList.add(img);
        }
    }

    /**
     * 所有的XML文件中的字符会放到ch[]中
     */
    public void characters(char ch[], int start, int length)throws SAXException {
        tagValue.append(ch, start, length);

    }

    /**
     * 错误的解析通知
     */
    public void error(SAXParseException e) throws SAXException {
    }

    /**
     * 在开始是要取的属性
     */
    public void startElement(String uri, String localName, String qName,Attributes attributes) throws SAXException {
       //初始化
      //  tagValue = new StringBuffer();
        //每一次获取标签是清空缓冲
        tagValue.delete(0, tagValue.length());
        super.startElement(uri, localName, qName, attributes);
    }

    public void setPts(PropertiesTools pts) {
        this.pts = pts;
    }

    public void setSaveImg(SaveImage saveImg) {
        this.saveImg = saveImg;
    }
    public List<String> getImagesList() {
        return imagesList;
    }

    public void setImagesList(List<String> imagesList) {
        this.imagesList = imagesList;
    }

    public List<String> getPriceList() {
        return priceList;
    }

    public void setPriceList(List<String> priceList) {
        this.priceList = priceList;
    }

    public List<String> getValueList() {
        return valueList;
    }

    public void setValueList(List<String> valueList) {
        this.valueList = valueList;
    }

    public List<String> getTitleList() {
        return titleList;
    }

    public void setTitleList(List<String> titleList) {
        this.titleList = titleList;
    }

    public List<String> getNumberList() {
        return numberList;
    }

    public void setNumberList(List<String> numberList) {
        this.numberList = numberList;
    }

    public List<String> getCityList() {
        return cityList;
    }

    public void setCityList(List<String> cityList) {
        this.cityList = cityList;
    }

    public List<String> getUrlList() {
        return urlList;
    }

    public void setUrlList(List<String> urlList) {
        this.urlList = urlList;
    }


}

--------------------------------------------------------------

下面是执行方法

--------------------------------------------------------------

package com.tz.xml;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.SAXException;

import com.tz.tools.PropertiesTools;
/**
 * 操作类
 * @author Sunweikun
 *
 */
public class SendRE {
    public PropertiesTools pts;
    public XMLResolve xmlResolve;
    /**
     * 完成的最后操作
     */
    public void sendRE() throws IOException, ParserConfigurationException, SAXException {
        for (int i = 0; i < this.pts.getXMLURL().length; i++) {
            URL url = new URL(this.pts.getProperties(this.pts.getXMLURL()[i],"xml").getProperty("url"));
            InputStream input = url.openStream();
            SAXParserFactory factory = SAXParserFactory.newInstance();
            factory.setNamespaceAware(false);
            SAXParser parser = factory.newSAXParser();
            parser.parse(input, this.xmlResolve);
        }
      
    }
    public void setPts(PropertiesTools pts) {
        this.pts = pts;
    }
    public void setXmlResolve(XMLResolve xmlResolve) {
        this.xmlResolve = xmlResolve;
    }
}

------------------------------------

配置文件 24quan.properties

------------------------------------

#URL要抓取的API
url=http/://www.24quan.com/api/alliance.php
#该团的URL节点
deal_url=siteurl
#购买的人数节点
quantity_sold=bought
#城市节点
division_name=city
#标题节点
title=title
#图片地址节点
image_Url=image
#现在价格节点
price=price
#原来价格节点
value=value
#图片输出路径
image_path=D/://MyEclipse 8.5//tz//24juan//

-------------------------------------------------------------

主配置文件 config.properties

-------------------------------------------------------------

#写入新加的配置文件名称以,号取文件名称 对xml操作24juan,aibang,didatuan,ftuan,lashou,meituan,pintuan,sohu,tuanbao,tuanku,wowo
xmlName=24juan,aibang,didatuan,ftuan,lashou,meituan,pintuan,sohu,tuanku,tuanbao,wowo
#写入新加的配置对html操作
htmlName=nuomi,xinlang

----------------------------------------------------------------

配置文件读写类 PropertiesTools

----------------------------------------------------------------

package com.tz.tools;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Set;

import org.apache.commons.lang.StringUtils;

/**
 * 读文件
 *
 * @author Sunweikun
 *
 */
public class PropertiesTools {

    /**
     * 读取properties配置文件
     *
     * @param path
     * @param manner 方式 xml 或者 html
     * @return
     */
    public Properties getProperties(String path,String manner) {
        Properties p = new Properties();
        try {
            InputStream in = new BufferedInputStream(new FileInputStream(manner+"/"+path
                    + ".properties"));
            p.load(in);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return p;
    }

    /**
     * 动态读取xml的配置文件
     *
     * @return
     */
    public String[] getXMLURL() {
        String[] paths = null;
        Properties p = new Properties();
        try {
            InputStream in = new BufferedInputStream(new FileInputStream("config.properties"));
            p.load(in);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        paths = p.getProperty("xmlName").split(",");
        return paths;
    }
    /**
     * 动态读取html的配置文件
     * @return
     */
    public String[] getHTMLURL() {
        String[] paths = null;
        Properties p = new Properties();
        try {
            InputStream in = new BufferedInputStream(new FileInputStream("config.properties"));
            p.load(in);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        paths = p.getProperty("htmlName").split(",");
        return paths;
    }
    /**
     * 去掉左右空格后字符串是否为空
     * @param astr String
     * @return boolean
     */
    public static boolean isTrimEmpty(String astr)
    {
        if ((null == astr) || (astr.length() == 0))
        {
            return true;
        }
        if (StringUtils.isBlank(astr.trim()))
        {
            return true;
        }
        return false;
    }
    /**
     * 去掉重复的List
     * @param list
     */
    @SuppressWarnings("unchecked")
    public static void removeDuplicateWithOrder(List<String> list)   
    {   
        Set set = new HashSet();   
        List newList = new ArrayList();   
        for (Iterator iter = list.iterator(); iter.hasNext();){   
            Object element = iter.next();   
            if (set.add(element)) newList.add(element);   
        }   
        list.clear();   
        list.addAll(newList);   
    }

}

原创粉丝点击