SAX解析XML file

来源:互联网 发布:小明看看发布永久域名 编辑:程序博客网 时间:2024/04/26 07:02

SAX解析XML file

http://chris-freedream.iteye.com/blog/375772

 

1.      SAX简述:
SAX全称Simple APIfor XML, 用于简单并快速的解析XML文件,是基于事件处理的模型,SUN定义了规范,常用的主要接口包括ContentHandler,ErrorHanlder, XMLReader, XMLFilter, Attributes, InputSource, Locator, 及不常用的EntityResolver, DTDHandler,LexicalHandler, DeclHandler等, UML静态类图如下:
, 也许读者会问,这么多接口都要去实现,并且,很多接口中的方法并不是所要的,那岂不是做无用功,放心,其中有一个DefaultHanlder已经适配了所有的重要的接口,你只需要继承DefaultHandler并重写你所想处理的方法即可,比如 startElement,endElement, charactors等。

其中Apache下的Xerces(http://sax.sourceforge.net/)对这些接口有个集体的实现.
2.SAX in practice

被解析的XML文件:

    <?xmlversion="1.0" encoding="utf-8"?>

<books>

       <bookpages="1000" price="$99">

              <name>Thinkingin java</name>

              <version>3.0</version>

       </book>

       <bookpages="800" price="$40">

              <name>JUnitin Action</name>

              <version>2.0</version>

       </book>

       <bookpages="900" price="$70">

              <name>Lucenein Action</name>

              <version>2.0</version>

       </book>

</books>

 

解析代码如下:

package com.chris.sax.action;

 

import java.io.BufferedOutputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.PrintStream;

 

 

import org.xml.sax.Attributes;

import org.xml.sax.ContentHandler;

import org.xml.sax.ErrorHandler;

import org.xml.sax.InputSource;

import org.xml.sax.Locator;

import org.xml.sax.SAXException;

import org.xml.sax.SAXParseException;

import org.xml.sax.XMLReader;

importorg.xml.sax.helpers.XMLReaderFactory;

 

public class XMLParser

{

       //handlernormal info.

       protected  PrintStream output =

     new PrintStream( new BufferedOutputStream( new FileOutputStream(java.io.FileDescriptor.out ), 128 ), true );

       //handlererror info.

  protected  PrintStream error =

     new PrintStream( new BufferedOutputStream( new FileOutputStream(java.io.FileDescriptor.err ), 128 ), true );

  

  public void parserXMLFile(String fileName ) throws SAXException,IOException

   {

     XMLReader reader =XMLReaderFactory.createXMLReader();

     reader.setContentHandler( newMyContentHandler() );

     reader.setErrorHandler( new MyErrorHandler());

     InputSource source = new InputSource( newFileInputStream( new File( fileName ) ) );

     reader.parse(source);

   }

      

       classMyErrorHandler implements ErrorHandler

       {

 

              publicvoid error( SAXParseException exception ) throws SAXException

              {

                    

                     error.println(exception.getMessage() );

              }

 

              publicvoid fatalError( SAXParseException exception ) throws SAXException

              {

 

                     error.println(exception.getMessage() );

              }

 

              publicvoid warning( SAXParseException exception ) throws SAXException

              {

                     output.println(exception.getMessage() );

                    

              }

             

       }

       classMyContentHandler implements ContentHandler

       {

             

         

              publicvoid characters( char[] ch, int start, int length )

                            throwsSAXException

              {

                     output.print(ch);

              }

 

              publicvoid endDocument() throws SAXException

              {

                    

              }

 

              publicvoid endElement( String uri, String localName, String name )

                            throwsSAXException

              {

 

                     output.println("</"+localName+">");

                    

              }

 

              publicvoid endPrefixMapping( String prefix ) throws SAXException

              {

 

                    

              }

 

              publicvoid ignorableWhitespace( char[] ch, int start, int length )

                            throwsSAXException

              {

                    

              }

 

              publicvoid processingInstruction( String target, String data )

                            throwsSAXException

              {

                                  

              }

 

              publicvoid setDocumentLocator( Locator locator )

              {

                    

              }

 

              publicvoid skippedEntity( String name ) throws SAXException

              {

                    

              }

 

              publicvoid startDocument() throws SAXException

              {

                     output.println("<xmlversion=\"1.0\" encoding=\"utf-8\"?>");

              }

 

              publicvoid startElement( String uri, String localName, String name,

                                                                             Attributesatts ) throws SAXException

              {

                     //uriis identifier of namespace

                     //name-->prefix:localName

                     output.print("<"+localName );

                     for(int i = 0; i < atts.getLength(); i++ )

                     {

                            StringattrName = atts.getLocalName( i );

                            StringattrValue = atts.getValue( i );

                            output.print("" + attrName + "=" + attrValue );

                     }

                     output.print(">");

                    

              }

 

              publicvoid startPrefixMapping( String prefix, String uri )

                            throwsSAXException

              {

                    

              }           

       }

      

       publicstatic void main( String[] args ) throws Exception, IOException

       {

             

              XMLParserparser = new XMLParser();

              parser.parserXMLFile("books.xml");

       }

}

 

解析后的结果如下:

<xml version="1.0"encoding="utf-8"?>

<books>

<books>

       <bookpages="1000" price="$99">

              <name>Thinkingin java</name>

              <version>3.0</version>

       </book>

       <bookpages="800" price="$40">

              <name>JUnitin Action</name>

              <version>2.0</version>

       </book>

       <bookpages="900" price="$70">

              <name>Lucenein Action</name>

              <version>2.0</version>

       </book>

</books>

<books>

       <bookpages="1000" price="$99">

              <name>Thinkingin java</name>

              <version>3.0</version>

       </book>

       <bookpages="800" price="$40">

              <name>JUnitin Action</name>

              <version>2.0</version>

       </book>

       <bookpages="900" price="$70">

              <name>Lucenein Action</name>

              <version>2.0</version>

       </book>

</books>

 

这样就把XML文件大体输出到屏幕上来了, 但是你会发现,这些不是你真正想要的结果,甚至出现了乱码,原因就在于characters可能被多次调用,甚至次数不定,以下是个改进的版本

package com.chris.sax.action;

 

 

import java.io.BufferedOutputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.PrintStream;

import java.util.ArrayList;

import java.util.List;

 

 

import org.xml.sax.Attributes;

import org.xml.sax.ContentHandler;

import org.xml.sax.ErrorHandler;

import org.xml.sax.InputSource;

import org.xml.sax.Locator;

import org.xml.sax.SAXException;

import org.xml.sax.SAXParseException;

import org.xml.sax.XMLReader;

importorg.xml.sax.helpers.XMLReaderFactory;

 

public class XMLParser

{

       //handlernormal info.

       protected  PrintStream output =

     new PrintStream( new BufferedOutputStream( new FileOutputStream(java.io.FileDescriptor.out ), 128 ), true );

       //handlererror info.

  protected  PrintStream error =

      new PrintStream( new BufferedOutputStream(new FileOutputStream( java.io.FileDescriptor.err ), 128 ), true );

  

  public void parserXMLFile(String fileName ) throws SAXException,IOException

   {

     XMLReader reader =XMLReaderFactory.createXMLReader();

     reader.setContentHandler( newMyContentHandler() );

     reader.setErrorHandler( new MyErrorHandler());

     InputSource source = new InputSource( newFileInputStream( new File( fileName ) ) );

     reader.parse(source);

   }

      

       classMyErrorHandler implements ErrorHandler

       {

 

              publicvoid error( SAXParseException exception ) throws SAXException

              {

                    

                     error.println(exception.getMessage() );

              }

 

              publicvoid fatalError( SAXParseException exception ) throws SAXException

              {

 

                     error.println(exception.getMessage() );

              }

 

              publicvoid warning( SAXParseException exception ) throws SAXException

              {

                     output.println(exception.getMessage() );

                    

              }

             

       }

       classMyContentHandler implements ContentHandler

       {

             

           private StringBuffer  buffer = new StringBuffer();

           private String key;

          

          

              publicvoid characters( char[] ch, int start, int length )

                            throwsSAXException

              {

                     buffer.append(ch, start, length);

                    

              }

 

              publicvoid endDocument() throws SAXException

              {

                    

              }

 

              publicvoid endElement( String uri, String localName, String name )

                            throwsSAXException

              {

 

                     if(key.equals( localName))

                     {

                            output.print(buffer);

                     }

                    

                     output.print("</"+localName+">");

 

                    

                    

              }

 

              publicvoid endPrefixMapping( String prefix ) throws SAXException

              {

 

                    

              }

 

              publicvoid ignorableWhitespace( char[] ch, int start, int length )

                            throwsSAXException

              {

                    

              }

 

              publicvoid processingInstruction( String target, String data )

                            throwsSAXException

              {

                                  

              }

 

              publicvoid setDocumentLocator( Locator locator )

              {

                    

              }

 

              publicvoid skippedEntity( String name ) throws SAXException

              {

                    

              }

 

              publicvoid startDocument() throws SAXException

              {

                     output.println("<xmlversion=\"1.0\" encoding=\"utf-8\"?>");

              }

 

              publicvoid startElement( String uri, String localName, String name,

                                                                             Attributesatts ) throws SAXException

              {

                     //uriis identifier of namespace

                     //name-->prefix:localName

                    

                     buffer.delete(0,buffer.length());

                     key= localName;

                    

                     output.print("<"+localName );                   

                     for(int i = 0; i < atts.getLength(); i++ )

                     {

                            StringattrName = atts.getLocalName( i );

                            StringattrValue = atts.getValue( i );

                            output.print("" + attrName + "=" + attrValue );

                     }

                     output.print(">");

                    

              }

 

              publicvoid startPrefixMapping( String prefix, String uri )

                            throwsSAXException

              {

                    

              }           

       }

      

       publicstatic void main( String[] args ) throws Exception, IOException

       {

             

              XMLParserparser = new XMLParser();

              parser.parserXMLFile("books.xml");

       }

}

 

输出结果如下:

<xml version="1.0" encoding="utf-8"?>  

<books><book pages=1000 price=$99><name>Thinking in java</name><version>3.0</version></book><book pages=800 price=$40><name>JUnit in Action</name><version>2.0</version></book><book pages=900 price=$70><name>Lucene in Action</name><version>2.0</version></book></books>  


原创粉丝点击