基于python标准库对XML文件的保存和加载

来源：互联网发布：写日志的软件知乎编辑：程序博客网时间：2024/05/16 18:14

上文对基于PyQt对XML文件进行保存和加载进行说明，本文则是通过python标准库xml dom讲数据写入到xml文件然后解析，解析XML文件则通过两种方式：DOM和SAX。两者的区别是前者适合编辑文档结构，后者采用递归工作方式，适用于XML文档的搜索和处理。

下例通过XML DOM将不同类型的数据保存为XML格式如下：

<MOVIES VERSION="1.0"><MOVIE YEAR="1989" MINUTES="45" ACQUIRED="2017-01-15"><TITLE>God save world</TITLE><NOTES>HELLO WORLD</NOTES></MOVIE></MOVIES>

然后通过python标准库，DOM和SAX方式讲内容解析出来。

from xml.sax import *from xml.dom.minidom import Document,parseimport xml.dom.minidomimport osimport datetimeclass Movie(object):    UNKNOWNYEAR = 1890    UNKNOWNMINUTES = 0    def __init__(self, title=None, year=UNKNOWNYEAR,                 minutes=UNKNOWNMINUTES, acquired=None, notes=None):        self.title = title        self.year = year        self.minutes = minutes        self.acquired = (acquired if acquired is not None                                  else datetime.date.today())        self.notes = notesclass MovieContainer(object):    def __init__(self,fname,movies):        self.__fname = fname        self.__movies = movies    def exportXml(self, fname):        try:            doc = Document()            movies = doc.createElement("MOVIES")            movies.setAttribute("VERSION", "1.0")            doc.appendChild(movies)            movie=doc.createElement("MOVIE")            movie.setAttribute("YEAR", str(self.__movies.year))            movie.setAttribute("MINUTES", str(self.__movies.minutes))            movie.setAttribute("ACQUIRED", str(self.__movies.acquired))            movies.appendChild(movie)            title=doc.createElement("TITLE")            title_value = doc.createTextNode(str(self.__movies.title))            title.appendChild(title_value)            movie.appendChild(title)            notes=doc.createElement("NOTES")            notes_value = doc.createTextNode(str(self.__movies.notes))            notes.appendChild(notes_value)            movie.appendChild(notes)                    # 将dom对象写入本地xml文件            with open(fname, 'wb') as f:                f.write(doc.toprettyxml(indent='\t', newl="\n",encoding='utf-8'))        except Exception as e:            error = "Failed to export: {0}".format(e)            print(error)        else:            print("Exported 1 movie records to {0}".format(                    os.path.basename(fname)))    def importDOM(self, fname):        try:            # 使用minidom解析器打开 XML 文档            DOMTree = xml.dom.minidom.parse(fname)            movies = DOMTree.documentElement              # 在集合中获取所有电影            movie_c = movies.getElementsByTagName("MOVIE")                    for movie in movie_c:                try:                                #print("*****Movie*****")                    if movie.hasAttribute("YEAR"):                        year=movie.getAttribute("YEAR")                    if movie.hasAttribute("MINUTES"):                        minutes=movie.getAttribute("MINUTES")                    if movie.hasAttribute("ACQUIRED"):                        ymd=movie.getAttribute("ACQUIRED").split("-")                        if len(ymd) != 3:                            raise ValueError("invalid acquired date {0}".format(                                    str(movie.getAttribute("ACQUIRED"))))                        acquired = datetime.date(int(ymd[0]), int(ymd[1]),                                                int(ymd[2]))                    title = notes = None                    title_tag = movie.getElementsByTagName('TITLE')[0]                    title=title_tag.childNodes[0].data                      notes_tag = movie.getElementsByTagName('NOTES')[0]                    try:                        notes=notes_tag.childNodes[0].data                    except Exception:                        raise ValueError("missing title or notes")                except ValueError as e:                    print("Failed to import: {0}".format(e))                print(title,year,minutes,acquired,notes)        except Exception as e:            print(e)    def importSAX_standard(self, fname):        #try:            # 创建一个 XMLReader            parser = xml.sax.make_parser()            # turn off namepsaces            parser.setFeature(xml.sax.handler.feature_namespaces, 0)                    # 重写 ContextHandler            Handler = MovieHandler(self.__movies)            parser.setContentHandler(Handler)                             parser.parse(fname)            print("Imported 1 movie records from {0}".format(                    os.path.basename(fname)))                #except Exception as e:            #print(e)class MovieHandler(ContentHandler):    def __init__(self, movies):        super(ContentHandler, self).__init__()        self.CurrentData = ""        self.movies = movies        self.text = ""        self.error = None    def clear(self):        self.year = None        self.minutes = None        self.acquired = None        self.title = None        self.notes = None    def startElement(self, tag, attributes):        #self.CurrentData = tag        if tag == "MOVIE":            self.clear()            self.year = int(attributes["YEAR"])            self.minutes = int(attributes["MINUTES"])            ymd = attributes["ACQUIRED"].split("-")            if len(ymd) != 3:                raise ValueError("invalid acquired date {0}".format(                        str(attributes["ACQUIRED"])))            self.acquired = datetime.date(int(ymd[0]),                    int(ymd[1]), int(ymd[2]))        elif tag in ("TITLE", "NOTES"):            self.text = ""        return True    def characters(self, text):        self.text += text        return True    def endElement(self, tag):        if tag == "MOVIE":            if (self.year is None or self.minutes is None or                self.acquired is None or self.title is None or                self.notes is None or not self.title):                raise ValueError("incomplete movie record")            print(self.title, self.year,                    self.minutes, self.acquired,self.notes)        elif tag == "TITLE":            self.title = self.text.strip()        elif tag == "NOTES":            self.notes = self.text.strip()        return Trueif __name__ == "__main__":    textdata=[["God save world",1989,45,None,"HELLO WORLD"]]    fname="/home/yrd/work/movietest.xml"    for data in textdata:        movie=Movie(data[0],data[1],data[2],data[3],data[4])        moviecontainer=MovieContainer(fname, movie)        print("#exportDOM")        moviecontainer.exportXml(fname)        print("#importDOM")        moviecontainer.importDOM(fname)        print("#importSAX")        moviecontainer.importSAX_standard(fname)

运行结果；

exportDOM
Exported 1 movie records to movietest.xml
importDOM
God save world 1989 45 2017-01-15 HELLO WORLD
importSAX
God save world 1989 45 2017-01-15 HELLO WORLD
Imported 1 movie records from movietest.xml

0 0