xml去掉schema中没定义的标签

来源：互联网发布：棒球手套知乎编辑：程序博客网时间：2024/06/16 07:31

甲方突然提出这么个神奇的需求，schema是定好了的，但是还想在xml中加入一些schema中没有的标签。所以为了通过schema校验，需要在测试前先把schema中没有的标签给删了。
可能我搜索技能不够强，没找到现成的方法，只好自己写了段很智障的代码，用了无数个递归我都看不下去了。
（所以并没有什么参考价值）
测了几个文件目前都是正确的，姑且先发一下，万一哪天手滑把源代码删了还能在这找到。

需要两个jar，dom4j和jaxen，第二个是根据xpath删除节点的时候用到。

package meiko.schema;import java.io.BufferedWriter;import java.io.File;import java.io.FileOutputStream;import java.io.OutputStreamWriter;import java.util.ArrayList;import java.util.List;import org.dom4j.Document;import org.dom4j.Element;import org.dom4j.Node;import org.dom4j.io.SAXReader;/** * * @author Uebara * @category 功能：去掉自定义标签。 */public class MyValidateXml {    /**     * index : 用于解析schema，判断节点在schema当前遍历节点中的级别。     */    private int index = 0;    /**     * globalIndex : 索引，当前节点是存入到list中的第几个节点。     */    private int globalIndex = 0;    /**     * xmlXpathList : 存放XML文件所有节点的XPATH。     */    private ArrayList<String> xmlXpathList = new ArrayList<String>();    /**     * indexList : 存放index的List。     */    private ArrayList<Integer> indexList = new ArrayList<Integer>();    /**     * schemaNodeList : 存放schema各节点信息的List。     */    private ArrayList<SchemaNode> schemaNodeList = new ArrayList<SchemaNode>();    /**     * customList : 存放不存在的父子节点，可外部调用，用于打印等功能。     */    public static ArrayList<String> customList = new ArrayList<String>();    /**     * 对外开放调用的函数。 完成XML解析、schema解析、判断自定义标签、删除标签并生成新文件的功能。     *     * @param xmlFileName     * @param xsdFileName     * @return newXmlFileName     */    public String GetXmlWithoutCustomLabel(String xmlFileName, String xsdFileName) {        try {            getXmlNodes(xmlFileName);            getXsdNodes(xsdFileName);            ArrayList<String> deleteList = findCustomLabelInXml();            String newFileName = DeleteNodeByXpath(xmlFileName, deleteList);            return newFileName;        } catch (Exception e) {            // TODO Auto-generated catch block            e.printStackTrace();        }        return null;    }    /**     * 输入待处理的XML文件路径、需要删除的XPATH List。 删除XML中的指定节点并生成新XML文件，返回新文件的名称。     *     * @param xmlFileName     * @param deleteList     * @return outputFileName     * @throws Exception     */    private String DeleteNodeByXpath(String xmlFileName, ArrayList<String> deleteList) throws Exception {        SAXReader saxReader = new SAXReader();        Document document = saxReader.read(new File(xmlFileName));        Element root = document.getRootElement();        for (String deleteContent : deleteList) {            Node node = root.selectSingleNode(deleteContent);            if (node != null) {                node.getParent().setText("");// 如果不setText为空，则在删除节点的地方会留下空格，也许会引起其他错误。                node.getParent().remove(node);// 先node.getParent()再remove才能成功删除。            }        }        String outputFile = xmlFileName.substring(0, xmlFileName.lastIndexOf(".")) + "_删除自定义标签.xml";        FileOutputStream writerStream = new FileOutputStream(outputFile);        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(writerStream, "UTF-8"));        writer.write(document.asXML());        writer.close();        return outputFile;    }    /**     * 获得XML文件所有节点的XPATH。 调用ExtraChilds函数。     *     * @param xmlFileName     */    private void getXmlNodes(String xmlFileName) {        try {            SAXReader reader = new SAXReader();            Document document;            document = reader.read(new File(xmlFileName));            Element root = document.getRootElement();            ExtraChilds(root);        } catch (Exception e) {            // TODO Auto-generated catch block            e.printStackTrace();        }    }    /**     * 从根节点开始递归，将所有XPATH存入xmlXpathList。     *     * @param node     */    @SuppressWarnings("unchecked")    private void ExtraChilds(Element node) {        xmlXpathList.add(node.getPath());        // 进行子节点遍历        List<Element> childNodes = node.elements();        for (Element childNode : childNodes) {            ExtraChilds(childNode);        }    }    /**     * 获得schema文件的所有节点。 调用ExtractSchemaNodes函数。     *     * @param xsdFileName     */    private void getXsdNodes(String xsdFileName) {        try {            SAXReader reader = new SAXReader();            Document document = reader.read(new File(xsdFileName));            Element root = document.getRootElement();            ExtractSchemaNodes(root);        } catch (Exception e) {            // TODO Auto-generated catch block            e.printStackTrace();        }    }    /**     * 从根节点开始递归，将所有节点信息存入schemaNodeList。 只考虑两个节点间的父子关系。     *     * @param node     */    @SuppressWarnings("unchecked")    private void ExtractSchemaNodes(Element node) {        // typeName : 节点的type属性; nodeName : 节点的名称;        // nodeType : 节点的类型(name\ref\type三种);        String typeName = "", nodeName = "", nodeType = "";        boolean isHaveValue = false;        if (node.attributeValue("name") != null && node.getName() == "element") {            // 只有element:name类型的节点才可能有type(吧)            if (node.attributeValue("type") != null)                typeName = node.attributeValue("type");            nodeType = "name";            nodeName = node.attributeValue("name");            isHaveValue = true;        } else if (node.attributeValue("ref") != null && node.getName() == "element") {            nodeType = "ref";            nodeName = node.attributeValue("ref");            isHaveValue = true;        } else if (node.attributeValue("name") != null && node.getName() == "complexType") {            nodeType = "type";            nodeName = node.attributeValue("name");            isHaveValue = true;        }        if (isHaveValue) {            SchemaNode schemaNode = new SchemaNode();            int tempglobalIndex = globalIndex;            String parentName = "/";            indexList.add(index);            // 如果当前节点的index不为1，且与上一个节点的差值为1，说明与上一个节点中间有choice等内容，且应与上一个节点是平级关系。            // 因此将当前节点的index设置为与上一个节点的index相同            if (indexList.get(tempglobalIndex) != 1                    && indexList.get(tempglobalIndex) - indexList.get(tempglobalIndex - 1) == 1) {                indexList.set(tempglobalIndex, indexList.get(tempglobalIndex - 1));            }            if (indexList.get(tempglobalIndex) == 1) {                // 如果当前节点的index为1，说明当前节点没有父节点，可能有子节点。            } else if (indexList.get(tempglobalIndex) > indexList.get(tempglobalIndex - 1)) {                // 如果当前节点的index大于上一个节点，说明上一节点是当前节点的父节点。                parentName = schemaNodeList.get(tempglobalIndex - 1).getNodeName();            } else if (indexList.get(tempglobalIndex) <= indexList.get(tempglobalIndex - 1)) {                // 如果当前节点的index小于上一个节点，往前寻找比当前节点index小的节点，即当前节点的父节点。                int preSearchIndex = tempglobalIndex;                while (indexList.get(preSearchIndex) <= indexList.get(tempglobalIndex - 1)                        && tempglobalIndex - 1 >= 0) {                    tempglobalIndex--;                }                parentName = schemaNodeList.get(tempglobalIndex - 1).getNodeName();            }            schemaNode.setType(nodeType);            schemaNode.setNodeName("/" + nodeName);            schemaNode.setParentName(parentName);            schemaNode.setAttributeType(typeName);            schemaNodeList.add(schemaNode);            globalIndex++;        }        List<Element> childNodes = node.elements();        for (Element childNode : childNodes) {            // 每进入一次递归，index++，从递归出来，index--。以此判断当前节点属于第几级。            index++;            ExtractSchemaNodes(childNode);            index--;        }    }    /**     * 找到xml文件中的自定义标签，即schema中没有定义的标签。     *     * @return     */    private ArrayList<String> findCustomLabelInXml() {        // TODO Auto-generated method stub        ArrayList<String> deleteList = new ArrayList<String>();        for (String oneXpath : xmlXpathList) {            boolean isDeleted = false;            // 将XPATH拆为节点名称的数组。2个为一组，判断在schema中是否有上下级关系。            String[] xpathArr = oneXpath.split("/");            for (int i = 0; i < xpathArr.length - 1; i++) {                isDeleted = false;                boolean isFind = false;                // 直接找到                for (SchemaNode schemaNode : schemaNodeList) {                    if (schemaNode.getParentName().equals("/" + xpathArr[i])                            && schemaNode.getNodeName().equals("/" + xpathArr[i + 1])) {                        isFind = true;                        break;                    }                }                // 如果不能直接找到子标签，可能是使用了type，在type中寻找子标签                if (!isFind) {                    for (SchemaNode schemaNode : schemaNodeList) {                        // 寻找与父标签名称相同的NodeName，如果找到的标签有type属性，进入下一轮循环。                        if (schemaNode.getNodeName().equals("/" + xpathArr[i])                                && !schemaNode.getAttributeType().equals("")) {                            for (SchemaNode findAttribute : schemaNodeList) {                                // 寻找与父标签的type属性名称一样的NodeName，如果找到了，进入下一轮循环。                                if (findAttribute.getNodeName().substring(1).equals(schemaNode.getAttributeType())) {                                    for (SchemaNode findTypeNode : schemaNodeList) {                                        // 寻找与type属性同名的NodeName的子标签中是否有待查找的子标签名称。                                        if (findTypeNode.getParentName().equals(findAttribute.getNodeName())                                                && findTypeNode.getNodeName().equals("/" + xpathArr[i + 1])) {                                            isFind = true;                                            break;                                        }                                    }                                    break;                                }                            }                            break;                        }                    }                }                if (!isFind) {                    isDeleted = true;                    customList.add(xpathArr[i]+"没有子节点"+xpathArr[i+1]);                }            }            // 如果循环结束，当前XPATH中有任何一组没找到父子关系，则需要删除。            if (isDeleted)                deleteList.add(oneXpath);        }        return deleteList;    }    // public static void main(String[] args) throws Exception {    // MyValidateXml m = new MyValidateXml();    // String xmlFileName = "thisMain.xml";    // String xsdFileName = "thisSchema.xsd";    //    // m.GetXmlWithoutCustomLabel(xmlFileName, xsdFileName);    // }}

用到的自定义类SchemaNode.java

package meiko.schema;import java.util.ArrayList;public class SchemaNode {    private String Type;    private String NodeName;    private String ParentName;    private String AttributeType;    /**     * 初始化，全为空     */    public SchemaNode(){        Type = "";        NodeName = "";        ParentName = "";        AttributeType = "";    }    public String getType() {        return Type;    }    public void setType(String type) {        Type = type;    }    public String getNodeName() {        return NodeName;    }    public void setNodeName(String nodeName) {        NodeName = nodeName;    }    public String getParentName() {        return ParentName;    }    public void setParentName(String parentName) {        ParentName = parentName;    }    public String getAttributeType() {        return AttributeType;    }    public void setAttributeType(String attributeType) {        AttributeType = attributeType;    }}

阅读全文

0 0