Java解析XML

来源：互联网发布：天猫好还是淘宝好编辑：程序博客网时间：2024/06/06 08:58

XML现在已经成为一种通用的数据交换格式,它的平台无关性,语言无关性,系统无关性,给数据集成与交互带来了极大的方便。对于XML本身的语法知识与技术细节,需要阅读相关的技术文献,这里面包括的内容有DOM(Document Object Model),DTD(Document Type Definition),SAX(Simple API for XML),XSD(Xml Schema Definition),XSLT(Extensible Stylesheet Language Transformations)。

XML在不同的语言里解析方式都是一样的,只不过实现的语法不同而已。基本的解析方式有两种,一种叫SAX，另一种叫DOM。SAX是基于事件流的解析,DOM是基于XML文档树结构的解析。假设我们XML的内容和结构如下:

<?xml version="1.0" encoding="UTF-8"?>     <university name="xidian">          <college name="software">              <class name="class1">                  <student name="first" sex='male' age="21">dasdasdasdsa</student>                  <student name="second" sex='female' age="20" />                <student name="third" sex='female' age="20" />              </class>              <class name="class2">                  <student name="forth" sex='male' age="19" />                <student name="fifth" sex='female' age="20" />                 <student name="sixth" sex='female' age="21" />             </class>          </college>        <!--  <college name="hardware">              <class name="class1">                  <student name="李一" sex='male' age="21" />                  <student name="王二" sex='female' age="20" />                  <student name="张三" sex='female' age="20" />              </class>              <class name="class2">                  <student name="李四" sex='male' age="19" />                  <student name="王五" sex='female' age="20" />                  <student name="赵六" sex='female' age="21" />              </class>          </college> -->    </university>

本文使用JAVA语言来实现DOM与SAX的XML文档解析。

1.DOM生成和解析XML文档

为 XML 文档的已解析版本定义了一组接口。解析器读入整个文档，然后构建一个驻留内存的树结构，然后代码就可以使用 DOM 接口来操作这个树结构。优点：整个文档树在内存中，便于操作；支持删除、修改、重新排列等多种功能；缺点：将整个文档调入内存（包括无用的节点），浪费时间和空间；使用场合：一旦解析了文档还需多次访问这些数据；硬件资源充足（内存、CPU）。

import java.io.InputStream;import javax.xml.parsers.DocumentBuilder;import javax.xml.parsers.DocumentBuilderFactory;import org.w3c.dom.Document;import org.w3c.dom.Element;import org.w3c.dom.Node;import org.w3c.dom.NodeList;/** * @author liuyi6 */public class TestDemo1 {    public static void main(String[] args) {        read();    }    public static void read(){        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();        DocumentBuilder bulider;        try {            bulider = dbf.newDocumentBuilder();            InputStream in = TestDemo1.class.getClassLoader().getResourceAsStream("test.xml");            Document doc = bulider.parse(in);            //根节点            Element root = doc.getDocumentElement();            if (root==null)                 return;            listNodes(root);         } catch (Exception e) {            // TODO Auto-generated catch block            e.printStackTrace();        }    }    public static void listNodes(Node node){        System.out.print(node.getNodeName());        for (int i = 0; i < node.getAttributes().getLength(); i++) {            System.out.print("\t"+node.getAttributes().item(i).getNodeName());            System.out.print("\t"+node.getAttributes().getNamedItem(node.getAttributes().item(i).getNodeName()).getNodeValue());//          System.out.print("  "+node.getAttributes().getNamedItem(node.getAttributes().item(i).getNodeName()).getText());        }         System.out.println();         NodeList nodeList = node.getChildNodes();          for (int i = 0; i < nodeList.getLength(); i++) {            Node newNode = nodeList.item(i);            if(newNode!=null&&newNode.getNodeType() == Node.ELEMENT_NODE){                /**                 * node是节点的意思，那么它可能是文字内容、CDATA段、元素、属性等等，具体是什么，就要靠NodeType来判断节点的类型。                 * ELEMENT_NODE是一个枚举值，代表元素节点类型。                  * 所以if(node.getNodeType()==Node.ELEMENT_NODE)的意思就是:如果当前节点是元素节点的话。                 */                listNodes(newNode);            }        }    }}

2.SAX生成和解析XML文档

为解决DOM的问题，出现了SAX。SAX ，事件驱动。当解析器发现元素开始、元素结束、文本、文档的开始或结束等时，发送事件，程序员编写响应这些事件的代码，保存数据。优点：不用事先调入整个文档，占用资源少；SAX解析器代码比DOM解析器代码小，适于Applet，下载。缺点：不是持久的；事件过后，若没保存数据，那么数据就丢了；无状态性；从事件中只能得到文本，但不知该文本属于哪个元素；使用场合：Applet;只需XML文档的少量内容，很少回头访问；机器内存少。

import java.io.File;import java.util.Stack;import javax.xml.parsers.SAXParser;import javax.xml.parsers.SAXParserFactory;import org.xml.sax.Attributes;import org.xml.sax.SAXException;import org.xml.sax.helpers.DefaultHandler;public class TestDemo2 {    /**     * SAX不用将整个文档加载到内存，基于事件驱动的API(Observer模式)，用户只需要注册自己感兴趣的事件即可。     * SAX提供EntityResolver, DTDHandler, ContentHandler, ErrorHandler接口，分别用于监听解析实体事件、DTD处理事件、正文处理事件和处理出错事件，     * 与AWT类似，SAX还提供了一个对这4个接口默认的类DefaultHandler（这里的默认实现，其实就是一个空方法），一般只要继承DefaultHandler，重写自己感兴趣的事件即可。     * @param args     */    public static void main(String[] args) {        try {            // step 1: 获得SAX解析器工厂实例            SAXParserFactory factory = SAXParserFactory.newInstance();            // step 2: 获得SAX解析器实例            SAXParser parser = factory.newSAXParser();            // step 3: 开始进行解析            // 传入待解析的文档的处理器            parser.parse(new File("src/test.xml"), new MySAXHandler());        } catch (Exception e) {            // TODO: handle exception        }    }}class MySAXHandler extends DefaultHandler{    // 使用栈这个数据结构来保存    private Stack<String> stack = new Stack<String>();    // 数据    private String title;    private String author;    private String year;    private double price;    @Override    public void startDocument() throws SAXException    {        System.out.println("start document -> parse begin");    }    @Override    public void endDocument() throws SAXException    {        System.out.println("end document -> parse finished");    }    @Override    public void startElement(String uri, String localName, String qName,            Attributes attributes) throws SAXException    {        // System.out.println("start element-----------");        // System.out.println("    localName: " + localName);        // System.out.println("    qName: " + qName);        // 将标签名压入栈        stack.push(qName);        // 处理属性        for (int i = 0; i < attributes.getLength(); ++i)        {            String attrName = attributes.getQName(i);            String attrValue = attributes.getValue(i);            System.out.println("属性： " + attrName + "=" + attrValue);        }    }    @Override    public void characters(char[] ch, int start, int length)            throws SAXException    {        // System.out.println("characters-----------");        // System.out.println("    ch: " + Arrays.toString(ch) );        // System.out.println("    ch: " + ch);        // System.out.println("    start: " + start);        // System.out.println("    length: " + length);        // 取出标签名        String tag = stack.peek();        if ("title".equals(tag))        {            title = new String(ch, start, length);        }        else if ("author".equals(tag))        {            author = new String(ch, start, length);        }        else if ("year".equals(tag))        {            year = new String(ch, start, length);        }        else if ("price".equals(tag))        {            price = Double.parseDouble(new String(ch, start, length));        }    }    @Override    public void endElement(String uri, String localName, String qName)            throws SAXException    {        // System.out.println("end element-----------");        //        // System.out.println("    localName: " + localName);        // System.out.println("    qName: " + qName);        stack.pop();// 表示该元素解析完毕，需要从栈中弹出标签        if ("book".equals(qName))        {            System.out.println("Book info: -------");            System.out.println("    title: " + title);            System.out.println("    author: " + author);            System.out.println("    year: " + year);            System.out.println("    price: " + price);            System.out.println();        }    }}

3.DOM4J生成和解析XML文档

DOM4J 是一个非常非常优秀的Java XML API，具有性能优异、功能强大和极端易用使用的特点，同时它也是一个开放源代码的软件。如今你可以看到越来越多的 Java 软件都在使用 DOM4J 来读写 XML，特别值得一提的是连 Sun 的 JAXM 也在用 DOM4J。

import java.io.File;import java.util.Iterator;import java.util.List;import org.dom4j.Attribute;import org.dom4j.Document;import org.dom4j.Element;import org.dom4j.io.SAXReader;public class TestDemo4 {    public static void main(String[] args) {        test();    }     public static void test(){              //创建SAXReader对象              SAXReader reader = new SAXReader();              //读取文件 转换成Document              Document document;            try {                document = reader.read(new File("src/test.xml"));                 //获取根节点元素对象                  Element root = document.getRootElement();                  //遍历                  listNodes(root);             } catch (Exception e) {                // TODO Auto-generated catch block                e.printStackTrace();            }          }          //遍历当前节点下的所有节点          public static void listNodes(Element node){              System.out.print( node.getName());              //首先获取当前节点的所有属性节点              List<Attribute> list = node.attributes();              //遍历属性节点              for(Attribute attribute : list){                  System.out.print("\t"+attribute.getName() +":" + attribute.getValue());              }              //如果当前节点内容不为空，则输出              if(!(node.getTextTrim().equals(""))){                   System.out.print("  "+node.getName() + "文本内容：" + node.getText());                }              System.out.println();            //同时迭代当前节点下面的所有子节点              //使用递归              Iterator<Element> iterator = node.elementIterator();              while(iterator.hasNext()){                  Element e = iterator.next();                  listNodes(e);              }          }  }

4.JDOM生成和解析XML

为减少DOM、SAX的编码量，出现了JDOM；优点：20-80原则，极大减少了代码量。使用场合：要实现的功能简单，如解析、创建等，但在底层，JDOM还是使用SAX（最常用）、DOM、Xanan文档。

import java.io.FileNotFoundException;   import java.io.FileOutputStream;   import java.io.IOException;   import java.util.List;   import org.jdom.Document;   import org.jdom.Element;   import org.jdom.JDOMException;   import org.jdom.input.SAXBuilder;   import org.jdom.output.XMLOutputter;   /**  *   * @author hongliang.dinghl  * JDOM 生成与解析XML文档  *   */  public class JDomDemo implements XmlDocument {   public void createXml(String fileName) {   Document document;   Element  root;   root=new Element("employees");   document=new Document(root);   Element employee=new Element("employee");   root.addContent(employee);   Element name=new Element("name");   name.setText("ddvip");   employee.addContent(name);   Element sex=new Element("sex");   sex.setText("m");   employee.addContent(sex);   Element age=new Element("age");   age.setText("23");   employee.addContent(age);   XMLOutputter XMLOut = new XMLOutputter();   try {   XMLOut.output(document, new FileOutputStream(fileName));   } catch (FileNotFoundException e) {   e.printStackTrace();   } catch (IOException e) {   e.printStackTrace();   }   }       public void parserXml(String fileName) {   SAXBuilder builder=new SAXBuilder(false);    try {   Document document=builder.build(fileName);   Element employees=document.getRootElement();    List employeeList=employees.getChildren("employee");   for(int i=0;iElement employee=(Element)employeeList.get(i);   List employeeInfo=employee.getChildren();   for(int j=0;jSystem.out.println(((Element)employeeInfo.get(j)).getName()+":"+((Element)employeeInfo.get(j)).getValue());   }   }   } catch (JDOMException e) {   e.printStackTrace();   } catch (IOException e) {   e.printStackTrace();   }    }   }

阅读全文

1 0