XML解析

来源:互联网 发布:fresh玫瑰化妆水知乎 编辑:程序博客网 时间:2024/06/04 19:30

原XML文件含有31个省,由于数据太多,只放一个象征性的广东省,其他省结构一致。

 <province name="广东省" postcode="440000" >    <city name="广州市" postcode="440100" >        <area name="荔湾区" postcode="440103" />        <area name="越秀区" postcode="440104" />        <area name="海珠区" postcode="440105" />        <area name="天河区" postcode="440106" />        <area name="白云区" postcode="440111" />        <area name="黄埔区" postcode="440112" />        <area name="番禺区" postcode="440113" />        <area name="花都区" postcode="440114" />        <area name="南沙区" postcode="440115" />        <area name="萝岗区" postcode="440116" />        <area name="增城市" postcode="440183" />        <area name="从化市" postcode="440184" />    </city>    <city name="韶关市" postcode="440200" >        <area name="武江区" postcode="440203" />        <area name="浈江区" postcode="440204" />        <area name="曲江区" postcode="440205" />        <area name="始兴县" postcode="440222" />        <area name="仁化县" postcode="440224" />        <area name="翁源县" postcode="440229" />        <area name="乳源瑶族自治县" postcode="440232" />        <area name="新丰县" postcode="440233" />        <area name="乐昌市" postcode="440281" />        <area name="南雄市" postcode="440282" />    </city>    <city name="深圳市" postcode="440300" >        <area name="罗湖区" postcode="440303" />        <area name="福田区" postcode="440304" />        <area name="南山区" postcode="440305" />        <area name="宝安区" postcode="440306" />        <area name="龙岗区" postcode="440307" />        <area name="盐田区" postcode="440308" />    </city>    <city name="珠海市" postcode="440400" >        <area name="香洲区" postcode="440402" />        <area name="斗门区" postcode="440403" />        <area name="金湾区" postcode="440404" />    </city>    <city name="汕头市" postcode="440500" >        <area name="龙湖区" postcode="440507" />        <area name="金平区" postcode="440511" />        <area name="濠江区" postcode="440512" />        <area name="潮阳区" postcode="440513" />        <area name="潮南区" postcode="440514" />        <area name="澄海区" postcode="440515" />        <area name="南澳县" postcode="440523" />    </city>    <city name="佛山市" postcode="440600" >        <area name="禅城区" postcode="440604" />        <area name="南海区" postcode="440605" />        <area name="顺德区" postcode="440606" />        <area name="三水区" postcode="440607" />        <area name="高明区" postcode="440608" />    </city>    <city name="江门市" postcode="440700" >        <area name="蓬江区" postcode="440703" />        <area name="江海区" postcode="440704" />        <area name="新会区" postcode="440705" />        <area name="台山市" postcode="440781" />        <area name="开平市" postcode="440783" />        <area name="鹤山市" postcode="440784" />        <area name="恩平市" postcode="440785" />    </city>    <city name="湛江市" postcode="440800" >        <area name="赤坎区" postcode="440802" />        <area name="霞山区" postcode="440803" />        <area name="坡头区" postcode="440804" />        <area name="麻章区" postcode="440811" />        <area name="遂溪县" postcode="440823" />        <area name="徐闻县" postcode="440825" />        <area name="廉江市" postcode="440881" />        <area name="雷州市" postcode="440882" />        <area name="吴川市" postcode="440883" />    </city>    <city name="茂名市" postcode="440900" >        <area name="茂南区" postcode="440902" />        <area name="茂港区" postcode="440903" />        <area name="电白县" postcode="440923" />        <area name="高州市" postcode="440981" />        <area name="化州市" postcode="440982" />        <area name="信宜市" postcode="440983" />    </city>    <city name="肇庆市" postcode="441200" >        <area name="端州区" postcode="441202" />        <area name="鼎湖区" postcode="441203" />        <area name="广宁县" postcode="441223" />        <area name="怀集县" postcode="441224" />        <area name="封开县" postcode="441225" />        <area name="德庆县" postcode="441226" />        <area name="高要市" postcode="441283" />        <area name="四会市" postcode="441284" />    </city>    <city name="惠州市" postcode="441300" >        <area name="惠城区" postcode="441302" />        <area name="惠阳区" postcode="441303" />        <area name="博罗县" postcode="441322" />        <area name="惠东县" postcode="441323" />        <area name="龙门县" postcode="441324" />    </city>    <city name="梅州市" postcode="441400" >        <area name="梅江区" postcode="441402" />        <area name="梅县" postcode="441421" />        <area name="大埔县" postcode="441422" />        <area name="丰顺县" postcode="441423" />        <area name="五华县" postcode="441424" />        <area name="平远县" postcode="441426" />        <area name="蕉岭县" postcode="441427" />        <area name="兴宁市" postcode="441481" />    </city>    <city name="汕尾市" postcode="441500" >        <area name="城区" postcode="441502" />        <area name="海丰县" postcode="441521" />        <area name="陆河县" postcode="441523" />        <area name="陆丰市" postcode="441581" />    </city>    <city name="河源市" postcode="441600" >        <area name="源城区" postcode="441602" />        <area name="紫金县" postcode="441621" />        <area name="龙川县" postcode="441622" />        <area name="连平县" postcode="441623" />        <area name="和平县" postcode="441624" />        <area name="东源县" postcode="441625" />    </city>    <city name="阳江市" postcode="441700" >        <area name="江城区" postcode="441702" />        <area name="阳西县" postcode="441721" />        <area name="阳东县" postcode="441723" />        <area name="阳春市" postcode="441781" />    </city>    <city name="清远市" postcode="441800" >        <area name="清城区" postcode="441802" />        <area name="佛冈县" postcode="441821" />        <area name="阳山县" postcode="441823" />        <area name="连山壮族瑶族自治县" postcode="441825" />        <area name="连南瑶族自治县" postcode="441826" />        <area name="清新县" postcode="441827" />        <area name="英德市" postcode="441881" />        <area name="连州市" postcode="441882" />    </city>    <city name="东莞市" postcode="441900" >        <area name="市辖区" postcode="441901" />    </city>    <city name="中山市" postcode="442000" >        <area name="市辖区" postcode="442001" />    </city>    <city name="潮州市" postcode="445100" >        <area name="湘桥区" postcode="445102" />        <area name="潮安县" postcode="445121" />        <area name="饶平县" postcode="445122" />    </city>    <city name="揭阳市" postcode="445200" >        <area name="榕城区" postcode="445202" />        <area name="揭东县" postcode="445221" />        <area name="揭西县" postcode="445222" />        <area name="惠来县" postcode="445224" />        <area name="普宁市" postcode="445281" />    </city>    <city name="云浮市" postcode="445300" >        <area name="云城区" postcode="445302" />        <area name="新兴县" postcode="445321" />        <area name="郁南县" postcode="445322" />        <area name="云安县" postcode="445323" />        <area name="罗定市" postcode="445381" />    </city>  </province>

根据原XML文件结构,构建辅助类。
原结构最小单元是县区,市级由若干县区构成,省又由若干市级构成。
因此构建省类内含市级集合,市内含有县区集合。

public class Provinces {    private String provinces_name;    private String provinces_postcode;    private List<City> city_name;    public Provinces(){}    public Provinces(String provinces_name,String postcode,List<City> city_name){        this.provinces_name=provinces_name;        this.provinces_postcode=postcode;        this.city_name=city_name;    }    @Override    public String toString() {        return "Provinces [provinces_name=" + provinces_name                + ", provinces_postcode=" + provinces_postcode + ", city_name="                + city_name + "]";    }}public class City {    private String city_name;    private String city_postcode;    private List<Area> area;    public City(){}    public City(String name,String city_postcode,List<Area> area){        this.city_name=name;        this.city_postcode=city_postcode;        this.area=area;    }    @Override    public String toString() {        return "City [city_name=" + city_name + ", city_postcode="                + city_postcode + ", area=" + area + "]";    }}public class Area {    private String area_name;    private String area_postcode;    public Area(){}    public Area(String area_name,String area_postcode){        this.area_name=area_name;        this.area_postcode=area_postcode;    }    @Override    public String toString() {        return "Area [area_name=" + area_name + ", area_postcode="                + area_postcode + "]";    }}

以下是DOM解析片段:

public class DOMXML {    public static void main(String[] args){        List<Provinces> provinces_arrayList=null;        //实例化一个DOM解析器工厂对象        DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();        try {            //根据工厂对象得到解析器对象            DocumentBuilder db=dbf.newDocumentBuilder();            InputStream is=new FileInputStream(new File("d://Resource/city.xml"));            //指定要解析的XML文件,并将其挂在一个文档上            Document document=db.parse(is);            //根据文档对象得到根节点对象            Element root=document.getDocumentElement();            //根据标签名获取指定节点的集合储存到NodeList对象中            NodeList provinces_nodeList=root.getElementsByTagName("province");            //获取集合中的节点个数            int len=provinces_nodeList.getLength();             //TAG PROVINCE_BEGIN            provinces_arrayList=new ArrayList<>();            for(int i=0;i<len;i++){                //根据索引位获取指定位置的Node                Node province_node=provinces_nodeList.item(i);                //为了使用Eelment的 String getAttribute(String name)根据属性名获取属性值的方法,转型                Element province_element=(Element) province_node;                String province_name=province_element.getAttribute("name");                String province_postcode=province_element.getAttribute("postcode");                NodeList child_node=province_element.getElementsByTagName("city");                //TAG CITY_BEGIN                List<City> city_arrayList=new ArrayList<>();                for(int j=0;j<child_node.getLength();j++){                    Node city_node=child_node.item(j);                    Element city_element=(Element)city_node;                    String city_name=city_element.getAttribute("name");                    String city_postcode=city_element.getAttribute("postcode");                    NodeList area_list=city_element.getElementsByTagName("area");                    //TAG AREA_START                    List<Area> area_arrayList=new ArrayList<>();                        for(int k=0;k<area_list.getLength();k++){                        Node area_node=area_list.item(k);                        Element area_element=(Element) area_node;                        String area_name=area_element.getAttribute("name");                        String area_postcode=area_element.getAttribute("postcode");                        Area area=new Area(area_name,area_postcode);                        area_arrayList.add(area);                    }                    //TAG AREA_END                    City city=new City(city_name,city_postcode,area_arrayList);                    city_arrayList.add(city);                }                //TAG CITY_END                Provinces provinces_=new Provinces(province_name,province_postcode,city_arrayList);                provinces_arrayList.add(provinces_);            }            //TAG PROVINCE_END        } catch (Exception e) {            // TODO Auto-generated catch block            e.printStackTrace();        }    }}

PULL解析,因为PULL解析时遇到标签时要实例化对象,因此需要提供setter和getter方法。

public class PULLXML {    public static void main(String[] args){        List<Provinces> province_list=new ArrayList<>();        List<City> city_list=null;        List<Area> area_list=null;        Provinces province=null;        City city=null;        Area area=null;        try {            //获取Pull解析器工厂对象            XmlPullParserFactory  xmlPullParserFactory=XmlPullParserFactory.newInstance();            //根据工厂对象获取Pull对象            XmlPullParser xmlPullParser=xmlPullParserFactory.newPullParser();            //指定解析文件与编码表            InputStream is=new FileInputStream(new File("d://Resource/city.xml"));            xmlPullParser.setInput(is,"utf-8");            //得到时间对象值            int eventType=xmlPullParser.getEventType();            //只要没有解析到xml结尾则一直继续            while(eventType!=XmlPullParser.END_DOCUMENT){                switch(eventType){                case XmlPullParser.START_DOCUMENT:                    break;                case XmlPullParser.START_TAG:                    String st=xmlPullParser.getName();                    if("province".equals(st)){                        province=new Provinces();                        String province_name=xmlPullParser.getAttributeValue(null,"name");                        String province_postcode=xmlPullParser.getAttributeValue(null, "postcode");                        province.setProvinces_name(province_name);                        province.setProvinces_postcode(province_postcode);                        city_list=new ArrayList<>();                    }                    if("city".equals(st)){                        city=new City();                        String city_name=xmlPullParser.getAttributeValue(null, "name");                        String city_postcode=xmlPullParser.getAttributeValue(null, "postcode");                        city.setCity_name(city_name);                        city.setCity_postcode(city_postcode);                        area_list=new ArrayList<>();                    }                    if("area".equals(st)){                        area=new Area();                        String area_name=xmlPullParser.getAttributeValue(null,"name");                        String area_postcode=xmlPullParser.getAttributeValue(null, "postcode");                        area.setArea_name(area_name);                        area.setArea_postcode(area_postcode);                    }                    break;                case XmlPullParser.TEXT:                    break;                case XmlPullParser.END_TAG:                    String str=xmlPullParser.getName();                    if("province".equals(str)){                        province.setCity_name(city_list);                        province_list.add(province);                    }                    if("city".equals(str)){                        city.setArea(area_list);                        city_list.add(city);                    }                    if("area".equals(str)){                        area_list.add(area);                    }                    break;                }                eventType=xmlPullParser.next();            }            FileOutputStream fos=new FileOutputStream(new File("d:he.txt"));            fos.write(province_list.toString().getBytes());        } catch (Exception e) {            // TODO Auto-generated catch block            e.printStackTrace();        }    }}
0 0
原创粉丝点击