浅述DOM和SAX解析XML

来源：互联网发布：java微信获取临时素材编辑：程序博客网时间：2024/05/11 09:23

完整版见https://jadyer.github.io/2012/02/21/xml-sax-dom/

首先列出的是待解析的XML文档：candidate.xml

<?xml version="1.0" encoding="UTF-8" standalone="yes"?><people><!-- This is Jadyer`s comment --><person personID="P01"><姓名 userID="U01">Tony Blair</姓名><邮箱>blair@everywhere.com</邮箱><地址>10 Downing Street, London, UK</地址></person><person personID="P02"><姓名 userID="U02">Bill Clinton</姓名><邮箱>bill@everywhere.com</邮箱><地址>White House, USA</地址></person><person personID="P03"><姓名 userID="U03">Tom Cruise</姓名><邮箱>cruise@everywhere.com</邮箱><地址>57 Jumbo Street, New York, USA</地址></person><person personID="P04"><姓名 userID="U04">Linda Goodman</姓名><邮箱>linda@everywhere.com</邮箱><地址>78 Crax Lane, London, UK</地址></person></people>

下面是以DOM方式解析XML的演示：DomParse.java

package com.jadyer.xml;import java.io.File;import javax.xml.parsers.DocumentBuilder;import javax.xml.parsers.DocumentBuilderFactory;import org.w3c.dom.Attr;import org.w3c.dom.Comment;import org.w3c.dom.Document;import org.w3c.dom.Element;import org.w3c.dom.NamedNodeMap;import org.w3c.dom.Node;import org.w3c.dom.NodeList;/** * 以DOM(Document Object Model)方式解析XML文档 */public class DomParse {public static void main(String[] args) throws Exception {//Step01：获得DOM解析器工厂，该工厂用于创建具体的解析器//  这里使用DocumentBuilderFactory的目的是为了创建与具体解析器无关的程序//        当newInstance()被调用时，它会根据一个系统变量来决定具体使用哪一个解析器//        又因为所有的解析器都服从于JAXP(Java API for Xml Parse)所定义的接口//        所以无论具体使用哪一个解析器，代码都是一样的//        所以当在不同的解析器之间进行切换时，只需更改系统变量的值，而不用更改任何代码//        注：所使用的系统变量可以查看DocumentBuilderFactory.newInstance()的APIDocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();//Step02：获得具体的DOM解析器DocumentBuilder db = dbf.newDocumentBuilder();//Step03：解析一个XML文档，获得Document对象，即XML文档的根结点//        注：由于使用的是DocumentBuilder.parse(File file)方法//        注：故candidate.xml只需放在JavaProject的根目录下即可，不需要放到src下面Document document = db.parse(new File("candidate.xml"));//获得文档的根元素节点Element root = document.getDocumentElement();System.out.println("*****************************************************************************************");System.out.println("*****************************************************************************************");System.out.println("Factory Class: " + dbf.getClass().getName());System.out.println("Builder Class: " + db.getClass().getName());System.out.println("Document Class: " + document.getClass().getName());System.out.println("*****************************************************************************************");System.out.println("*****************************************************************************************");parseElement(root); //以DOM方式，并通过递归的手段，解析任意的XML文档，最后将其内容输出到命令行System.out.println("\n*****************************************************************************************");System.out.println("*****************************************************************************************");//解析XML时要注意：节点与节点之间的空格文本，也算是根元素节点的孩子的组成部分NodeList list11 = root.getChildNodes();System.out.println("该XML文档的根元素节点为: " + root.getTagName());System.out.println("根元素节点下的孩子数目为: " + list11.getLength());System.out.println("根元素节点下的孩子名字为: 如下所示");for(int i=0; i<list11.getLength(); i++){System.out.println("                     " + list11.item(i).getNodeName());}System.out.println("=========================================================================================");//获得所有的标签名为'PERSON'的节点NodeList list22 = document.getElementsByTagName("person");for(int i=0; i<list22.getLength(); i++){Element element = (Element)list22.item(i);NamedNodeMap nnm = element.getAttributes();System.out.print(nnm.item(0).getNodeName());System.out.print("=");System.out.println(nnm.item(0).getNodeValue());String content = element.getElementsByTagName("姓名").item(0).getFirstChild().getNodeValue();String contentAttr = element.getElementsByTagName("姓名").item(0).getAttributes().item(0).getNodeName();String contentvalue = element.getElementsByTagName("姓名").item(0).getAttributes().item(0).getNodeValue();System.out.println(contentAttr + "=" + contentvalue);System.out.println("姓名: " + content);content = element.getElementsByTagName("邮箱").item(0).getFirstChild().getNodeValue();System.out.println("邮箱: " + content);content = element.getElementsByTagName("地址").item(0).getFirstChild().getNodeValue();System.out.println("地址: " + content);System.out.println("=========================================================================================");}}/** * 以DOM方式，并通过递归的手段，解析任意的XML文档，最后将其内容输出到命令行 */private static void parseElement(Element element) {String tagName = element.getNodeName();NodeList children = element.getChildNodes();System.out.print("<" + tagName);//element元素的所有属性所构成的NamedNodeMap对象，需要对其进行判断NamedNodeMap map = element.getAttributes();if(null != map){ //如果该元素存在属性for(int i=0; i<map.getLength(); i++){Attr attr = (Attr) map.item(i); //获得该元素的每一个属性System.out.print(" " + attr.getName() + "=\"" + attr.getValue() + "\"");}}System.out.print(">");for(int i=0; i<children.getLength(); i++){Node node = children.item(i);short nodeType = node.getNodeType(); //获得节点的类型if(Node.ELEMENT_NODE == nodeType){parseElement((Element)node); //是元素，继续递归}else if(Node.TEXT_NODE == nodeType){System.out.print(node.getNodeValue()); //递归出口}else if(Node.COMMENT_NODE == nodeType){System.out.print("<!--");Comment comment = (Comment) node;System.out.print(comment.getData()); //注释的内容System.out.print("-->");}}System.out.print("</" + tagName + ">");}}

下面是以SAX方式解析XML的演示：SaxParse.java

package com.jadyer.xml;import java.io.File;import java.util.Stack;import javax.xml.parsers.SAXParser;import javax.xml.parsers.SAXParserFactory;import org.xml.sax.Attributes;import org.xml.sax.SAXException;import org.xml.sax.helpers.DefaultHandler;/** * 以SAX(Simple APIs for XML)方式解析XML文档 * @see DOM解析XML时，会将XML文档加载到内存，然后通过随机的方式访问内存中的DOM树 * @see SAX是基于事件而且是顺序执行的，一旦经过了某元素，我们就没有办法再去访问它了 * @see SAX不必事先将整个XML文档加载到内存当中，因此它占据内存要比DOM小 * @see 对于大型的XML文档来说，通常会使用SAX而不是DOM进行解析 */public class SaxParse {public static void main(String[] args) throws Exception {//Step01:获得SAX解析器工厂实例SAXParserFactory factory = SAXParserFactory.newInstance();//Step02:获得SAX解析器实例SAXParser parser = factory.newSAXParser();//Step03:开始解析parser.parse(new File("candidate.xml"), new MyHandler11());parser.parse(new File("candidate.xml"), new MyHandler22());}}/** *  * MyHandler11 * @see org.xml.sax.helpers.DefaultHandler类的处理方法 * @see      项目         处理方法 * @see    文档的开始   startDocument() * @see    <people>   startElement() * @see    "春运订票"   characters() * @see    </people>  endElement() * @see    文档的开始   endDocument() */class MyHandler11 extends DefaultHandler {@Overridepublic void startDocument() throws SAXException {System.out.println("=====================================================");System.out.println("Document_start");}@Overridepublic void endDocument() throws SAXException {System.out.println("Document_end");System.out.println("=====================================================");}@Overridepublic void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {System.out.println("Element_Start");}@Overridepublic void endElement(String uri, String localName, String qName) throws SAXException {System.out.println("Element_End");}}class MyHandler22 extends DefaultHandler {private Stack<String> stack = new Stack<String>();private String name;private String email;private String address;@Overridepublic void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {stack.push(qName); //qName表示标签的名字。即<person personID="P01">中的personfor(int i = 0; i<attributes.getLength(); i++){String attrName = attributes.getQName(i); //获得第i个属性的名字。即personIDString attrValue = attributes.getValue(i); //获得第i个属性的值。即P01System.out.println(attrName + "=" + attrValue);}}@Overridepublic void characters(char[] ch, int start, int length) throws SAXException {String tag = stack.peek();if("姓名".equals(tag)){name = new String(ch, start, length);}else if("邮箱".equals(tag)){email = new String(ch, start, length);}else if("地址".equals(tag)){address = new String(ch, start, length);}}@Overridepublic void endElement(String uri, String localName, String qName) throws SAXException {stack.pop(); //表示该元素已经解析完毕，需要从栈中弹出if("person".equals(qName)){System.out.println("姓名: " + name);System.out.println("邮箱: " + email);System.out.println("地址: " + address);System.out.println("=====================================================");}}}