XMLWriter

来源：互联网发布：证券行业研究员知乎编辑：程序博客网时间：2024/06/16 02:33

/*
* Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
*
* This software is open source.
* See the bottom of this file for the licence.
*/

package org.dom4j.io;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

import org.dom4j.Attribute;
import org.dom4j.CDATA;
import org.dom4j.Comment;
import org.dom4j.Document;
import org.dom4j.DocumentType;
import org.dom4j.Element;
import org.dom4j.Entity;
import org.dom4j.Namespace;
import org.dom4j.Node;
import org.dom4j.ProcessingInstruction;
import org.dom4j.Text;
import org.dom4j.tree.NamespaceStack;

import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.XMLFilterImpl;

/**
* 
* <code>XMLWriter</code> takes a DOM4J tree and formats it to a stream as
* XML. It can also take SAX events too so can be used by SAX clients as this
* object implements the {@link org.xml.sax.ContentHandler}and{@link
* LexicalHandler} interfaces. as well. This formatter performs typical document
* formatting. The XML declaration and processing instructions are always on
* their own lines. An {@link OutputFormat}object can be used to define how
* whitespace is handled when printing and allows various configuration options,
* such as to allow suppression of the XML declaration, the encoding declaration
* or whether empty documents are collapsed.
* 
*
* 
* There are <code>write(...)</code> methods to print any of the standard
* DOM4J classes, including <code>Document</code> and <code>Element</code>,
* to either a <code>Writer</code> or an <code>OutputStream</code>.
* Warning: using your own <code>Writer</code> may cause the writer's
* preferred character encoding to be ignored. If you use encodings other than
* UTF8, we recommend using the method that takes an OutputStream instead.
* 
*
* @author <a href="mailto:jstrachan@apache.org">James Strachan </a>
* @author Joseph Bowbeer
* @version $Revision: 1.83.2.2 $
*/
public class XMLWriter extends XMLFilterImpl implements LexicalHandler {
private static final String PAD_TEXT = " ";

    protected static final String[] LEXICAL_HANDLER_NAMES = {
            "http://xml.org/sax/properties/lexical-handler",
            "http://xml.org/sax/handlers/LexicalHandler"};

protected static final OutputFormat DEFAULT_FORMAT = new OutputFormat();

/** Should entityRefs by resolved when writing ? */
private boolean resolveEntityRefs = true;

    /**
     * Stores the last type of node written so algorithms can refer to the
     * previous node type
     */
    protected int lastOutputNodeType;

    /**
     * Stores if the last written element node was a closing tag or an opening
     * tag.
     */
    private boolean lastElementClosed = false;

/** Stores the xml:space attribute value of preserve for whitespace flag */
protected boolean preserve = false;

/** The Writer used to output to */
protected Writer writer;

/** The Stack of namespaceStack written so far */
private NamespaceStack namespaceStack = new NamespaceStack();

/** The format used by this writer */
private OutputFormat format;

/** whether we should escape text */
private boolean escapeText = true;

    /**
     * The initial number of indentations (so you can print a whole document
     * indented, if you like)
     */
    private int indentLevel = 0;

/** buffer used when escaping strings */
private StringBuffer buffer = new StringBuffer();

    /**
     * whether we have added characters before from the same chunk of characters
     */
    private boolean charsAdded = false;

private char lastChar;

/** Whether a flush should occur after writing a document */
private boolean autoFlush;

/** Lexical handler we should delegate to */
private LexicalHandler lexicalHandler;

    /**
     * Whether comments should appear inside DTD declarations - defaults to
     * false
     */
    private boolean showCommentsInDTDs;

/** Is the writer curerntly inside a DTD definition? */
private boolean inDTD;

/** The namespaces used for the current element when consuming SAX events */
private Map namespacesMap;

/**
 * what is the maximum allowed character code such as 127 in US-ASCII (7
 * bit) or 255 in ISO- (8 bit) or -1 to not escape any characters (other
 * than the special XML characters like < > &)
 */
 private int maximumAllowedCharacter;

    public XMLWriter(Writer writer) {
        this(writer, DEFAULT_FORMAT);
    }

    public XMLWriter(Writer writer, OutputFormat format) {
        this.writer = writer;
        this.format = format;
        namespaceStack.push(Namespace.NO_NAMESPACE);
    }

    public XMLWriter() {
        this.format = DEFAULT_FORMAT;
        this.writer = new BufferedWriter(new OutputStreamWriter(System.out));
        this.autoFlush = true;
        namespaceStack.push(Namespace.NO_NAMESPACE);
    }

    public XMLWriter(OutputStream out) throws UnsupportedEncodingException {
        this.format = DEFAULT_FORMAT;
        this.writer = createWriter(out, format.getEncoding());
        this.autoFlush = true;
        namespaceStack.push(Namespace.NO_NAMESPACE);
    }

    public XMLWriter(OutputStream out, OutputFormat format)
            throws UnsupportedEncodingException {
        this.format = format;
        this.writer = createWriter(out, format.getEncoding());
        this.autoFlush = true;
        namespaceStack.push(Namespace.NO_NAMESPACE);
    }

    public XMLWriter(OutputFormat format) throws UnsupportedEncodingException {
        this.format = format;
        this.writer = createWriter(System.out, format.getEncoding());
        this.autoFlush = true;
        namespaceStack.push(Namespace.NO_NAMESPACE);
    }

    public void setWriter(Writer writer) {
        this.writer = writer;
        this.autoFlush = false;
    }

    public void setOutputStream(OutputStream out)
            throws UnsupportedEncodingException {
        this.writer = createWriter(out, format.getEncoding());
        this.autoFlush = true;
    }

    /**
     * DOCUMENT ME!
     *
     * @return true if text thats output should be escaped. This is enabled by
     *         default. It could be disabled if the output format is textual,
     *         like in XSLT where we can have xml, html or text output.
     */
    public boolean isEscapeText() {
        return escapeText;
    }

    /**
     * Sets whether text output should be escaped or not. This is enabled by
     * default. It could be disabled if the output format is textual, like in
     * XSLT where we can have xml, html or text output.
     *
     * @param escapeText
     *            DOCUMENT ME!
     */
    public void setEscapeText(boolean escapeText) {
        this.escapeText = escapeText;
    }

    /**
     * Set the initial indentation level. This can be used to output a document
     * (or, more likely, an element) starting at a given indent level, so it's
     * not always flush against the left margin. Default: 0
     *
     * @param indentLevel
     *            the number of indents to start with
     */
    public void setIndentLevel(int indentLevel) {
        this.indentLevel = indentLevel;
    }

    /**
     * Returns the maximum allowed character code that should be allowed
     * unescaped which defaults to 127 in US-ASCII (7 bit) or 255 in ISO- (8
     * bit).
     *
     * @return DOCUMENT ME!
     */
    public int getMaximumAllowedCharacter() {
        if (maximumAllowedCharacter == 0) {
            maximumAllowedCharacter = defaultMaximumAllowedCharacter();
        }

return maximumAllowedCharacter;
}

/**
 * Sets the maximum allowed character code that should be allowed unescaped
 * such as 127 in US-ASCII (7 bit) or 255 in ISO- (8 bit) or -1 to not
 * escape any characters (other than the special XML characters like <
 * > &) If this is not explicitly set then it is defaulted from the
 * encoding.
 *
 * @param maximumAllowedCharacter
 * The maximumAllowedCharacter to set
 */
 public void setMaximumAllowedCharacter(int maximumAllowedCharacter) {
 this.maximumAllowedCharacter = maximumAllowedCharacter;
 }

    /**
     * Flushes the underlying Writer
     *
     * @throws IOException
     *             DOCUMENT ME!
     */
    public void flush() throws IOException {
        writer.flush();
    }

    /**
     * Closes the underlying Writer
     *
     * @throws IOException
     *             DOCUMENT ME!
     */
    public void close() throws IOException {
        writer.close();
    }

    /**
     * Writes the new line text to the underlying Writer
     *
     * @throws IOException
     *             DOCUMENT ME!
     */
    public void println() throws IOException {
        writer.write(format.getLineSeparator());
    }

/**
 * Writes the given {@link Attribute}.
 *
 * @param attribute
 * <code>Attribute</code> to output.
 *
 * @throws IOException
 * DOCUMENT ME!
 */
 public void write(Attribute attribute) throws IOException {
 writeAttribute(attribute);