python之xml库

来源：互联网发布：中国机床知乎编辑：程序博客网时间：2024/06/08 17:01

1.用xml.dom.minidom操作xml
用于测试的xml文件名为:mytest.xml,其内容如下

<?xml version='1.0' encoding='utf-8'?><animals>    <animal name = 'dog'>        <color>white</color>        <voice>wangwang</voice>        <food>meat</food>    </animal>    <animal name = 'cat'>        <color>black</color>        <voice>miaomiao</voice>        <food>fish</food>    </animal></animals>

同目录下的python文件名为:minidomtest.py,其内容如下

#!/usr/bin/env pythonfrom xml.dom import minidom#xml.dom.minidom.Document instancemydom = minidom.parse('mytest.xml')#DOM Element:animalsroot = mydom.documentElement#DOM Element Listmynodes = root.getElementsByTagName('animal')print mynodes[0].parentNode.nodeNamefor node in mynodes:    animal_name = node.getArrtibute('name')    animal_color = node.getElementsByTagName('color')[0].childNodes[0].nodeValue    animal_voice = node.getElementsByTagName('voice')[0].childNodes[0].nodeValue    animal_food = node.getElementsByTagName('food')[0].childNodes[0].nodeValue    print animal_name    print animal_color    print animal_voice    print animal_foodprint '********************************************'print mydomprint rootprint root.nodeNameprint mynodesprint mynodes[0].childNodesprint mynodes[0].childNodes[1]print mynodes[0].childNodes[1].childNodesprint mynodes[0].childNodes[1].childNodes[0]print mynodes[0].childNodes[1].childNodes[0].nodeValueprint '***********create new element**********'impl = minidom.getDOMImplementation()dom = impl.createDocument(None,'rootElement','None')root = dom.documentElement#设置属性root.setAttribute('id',1)item = dom.createElement('item')text = dom.createTextNode('test')item.appendChild(text)root.appendChild(item)print root.toxml()print '**********write to file**********'#writexml(writer, indent, addindent, newl, encoding)#writer是文件对象#indent是每个tag前填充的字符,如:'  ',则表示每个tag前有两个空格#addindent是每个子结点的缩近字符#newl是每个tag后填充的字符,如:'\n',则表示每个tag后面有一个回车#encoding是生成的XML信息头中的encoding属性值,在输出时minidom并不真正进行编码的处理,如果你保存的文本内容中有汉字,则需要自已进行编码转换myfile = file('minidom.xml','w')dom.writexml(myfile,encoding='utf-8')myfile.close()print '**********get pretty look**********'def Indent(dom, node, indent = 0):    children = node.childNodes[:]    if indent:        text = dom.createTextNode('\n' + '\t' * indent)        node.parentNode.insertBefore(text, node)    if children:        if children[-1].nodeType == node.ELEMENT_NODE:            text = dom.createTextNode('\n' + '\t' * indent)            node.appendChild(text)        for n in children:            if n.nodeType == node.ELEMENT_NODE:                Indent(dom, n, indent + 1)Indent(dom,root,0)print root.toxml()

输出结果为

animalsdogwhitewangwangmeatcatblackmiaomiaofish*********************************************<xml.dom.minidom.Document instance at 0xb70c764c><DOM Element: animals at 0xb706742c>animals[<DOM Element: animal at 0xb70674cc>, <DOM Element: animal at 0xb706778c>][<DOM Text node "u'\n        '">, <DOM Element: color at 0xb706758c>, <DOM Text node "u'\n        '">, <DOM Element: voice at 0xb706762c>, <DOM Text node "u'\n        '">, <DOM Element: food at 0xb70676ac>, <DOM Text node "u'\n    '">]<DOM Element: color at 0xb706758c>[<DOM Text node "u'white'">]<DOM Text node "u'white'">white************create new element****************<rootElement id="1"><item>test</item></rootElement>**********write to file********************get pretty look**********<rootElement id="1">    <item>test</item></rootElement>

2.用xml.etree.ElementTree操作xml
先说一下tag,attrib,text,tail的含义

<animal id='1'>dog</animal>    tag  attib  text         tailtail就是元素末尾的字符,一般用于排版

xml文件同上,python文件名为elementtreetest.py,内容如下

#!/usr/bin/env pythonimport xml.etree.ElementTree as ET#load xml fileanimals = ET.parse('mytest.xml')#obtain root elementanimal_root = animals.getroot()#elem元素所具有的属性或方法#root element's nameprint animal_root.tag#root element's textprint = animal_root.text#root element's attribute,the result is dictprint animal_root.attribprint animal_root.tail#迭代取值for elem in animal_root:    print elem.tag,elem.attrib#搜索子元素print animal_root.find('animal')print animal_root.findall('animal')print animal_root.find('animal/color')print animal_root.findall('animal/color')print animal_root.findtext('animal/color')#elem.iter(tag=None)#默认遍历elem的所有后代,可指定tag来遍历指定后代for elem in animal_root.iter('color'):    print elem.text#elem.iterfind(tag)for elem in animal_root.iterfind('animal/color')    print elem.text#elem.itertext()#遍历所有后代的textfor text in animal_root.itertext():    print text#elem.get(key,default=None)#获取属性值,没有的话默认返回None,可以自己指定其它值print animal_root.get('name','I do not have name')#elem.items()#返回属性的列表,列表元素为(key,value)print animal_root.find('animal').items()#elem.keys()#返回属性键值的列表print animal_root.find('animal').keys()#elem.set(key,value)#设置新的属性animal_root.set('id','1')#elem.clear()#清空元素的后代,属性,同时text和tail也被设置为None#创建新元素myanimal = ET.Element('animal',{'name':'rabbit'})myanimal_color = ET.Element('color')myanimal_color.text = 'gray'myanimal_food = ET.Element('food')myanimal_food.text = 'grass'myanimal.append(myanimal_color)myanimal.append(myanimal_food)animal_root.append(myanimal)#删除元素animal_root.remove(myanimal)#在指定位置插入元素animal_root.insert(0,myanimal)#ET.ElementTree(element=None,file=None)#如果给定element,则为ElementTree的根节点#如果两者同时给定,element无效mytree = ET.ElementTree()mytree.parse('mytest.xml')#也可以写成 mytree = ET.ElementTree(file='mytest.xml')#write(file,encoding='us-ascii',xml_decalration=None,default_namespace=None,method='xml')#将dom树写进文件animals.write('temp.xml',encoding='utf-8',xml_declaration=True)#get pretty lookdef indent(elem, level=0):    i = "\n" + level*"  "    if len(elem):        if not elem.text or not elem.text.strip():            elem.text = i + "  "        for e in elem:            indent(e, level+1)        if not e.tail or not e.tail.strip():            e.tail = i    if level and (not elem.tail or not elem.tail.strip()):        elem.tail = i    return elemindent(animal_root,0)#用SubElement插入元素myanimal = ET.SubElement(animal_root,'animal',{'name':'snake'})ET.SubElement(myanimal,'color').text = 'black'ET.SubElement(myanimal,'food').text = 'meat'#用fromstring创建elementmyele = ET.fromstring('<animal name="bird"><color>white</color></animal>')#输出到屏幕ET.dump(animals)#或 ET.dump(animals.getroot())#其参数可以是元素树,也可以是element

3.用xml.sax操作xml
xml.sax提供了3个函数以及sax异常类

函数作用参数说明 xml.sax.make_parser([parser_list]) 创建并返回一个SAX XMLReader对象 parser_list - 可选参数,解析器列表 xml.sax.parse(filename_or_stream, handler[, error_handler]) 创建一个 SAX 解析器并解析xml文档 file_or_stream:xml文件名,handler:必须是一个ContentHandler的对象,error_handler:如果指定该参数，errorhandler必须是一个SAX ErrorHandler对象 xml.sax.parseString(string, handler[, error_handler]) 创建一个XML解析器并解析xml字符串 string:xml字符串,其它参数同上

xml.sax.handler.ContentHandler类的方法

方法调用时机参数说明 startDocument() 文档启动的时候调用 endDocument() 解析器到达文档结尾时调用 startElement(name，attrs) 遇到XML开始标签时调用 name是标签的名字，attrs是标签的属性值字典 endElement(name) 遇到XML结束标签时调用 name是标签的名字 characters(content) 从行开始,遇到标签之前或从一个标签开始,遇到下一个标签之前或从一个标签开始,遇到行结束符之前存在字符就调用存在的字符保存在content

一个例子,xml文件同上,为mytest.xml,python文件名为saxtest.py,内容如下

#!/usr/bin/env pythonfrom xml.sax.handler import ContentHandlerfrom xml.sax import parseclass saxtest(ContentHandler):    passthrough = False    def startDocument(self):        print 'Start parsing mytest.xml'    def startElement(self,tag,attributes):        if tag == 'animals':            pass        elif tag == 'animal':            print            print 'name:',attributes['name']        else:            print tag            self.passthrough = True    def endElement(self,tag):        self.passthrough = False    def characters(self,content):        if self.passthrough:            print content    def endDocument(self):        print                                                                                                                                                                         print 'End parsing mytest.xml'parse('mytest.xml',saxtest())

结果为

Start parsing mytest.xmlname: dogcolorwhitevoicewangwangfoodmeatname: catcolorblackvoicemiaomiaofoodfishEnd parsing mytest.xml

由于characters的调用时机问题,我们必须对其输出时机进行控制(在上例中我们通过passthrough进行控制),否则会输出很多不需要的换行(例如开始元素与开始元素之间,结束元素与开始元素之间,结束元素与结束元素之间的空白符)

参考: python对XML 操作; 使用Python读写XML文件; Python：使用基于事件驱动的SAX解析XML

0 0