读取INRIA 说明文件 生成 voc xml python 版本

来源:互联网 发布:sql server 教材 编辑:程序博客网 时间:2024/05/19 16:20
#!/usr/bin/python# -*- coding: UTF-8 -*-# 文件名:txt_to_xml.pyfrom xml.dom.minidom import Documentimport osimport relist = os.listdir("Annotations")os.chdir('Annotations')#os.system('pause')#raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))for oldfilename in list:    if str(".txt") not in oldfilename:        continue    print oldfilename    #raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))    fileindex = re.findall('\d+', oldfilename)    print fileindex    #raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))    print str(int(fileindex[0]))    #raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))    newfilename = "00" + str(int(fileindex[0])) + ".xml"    #print newfilename    #raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))    f = open(oldfilename, "r")    print 'processing:' + f.name    doc = Document()    annotation = doc.createElement('annotation')    doc.appendChild(annotation)    folder = doc.createElement('folder')    folder.appendChild(doc.createTextNode('VOC2007'))    annotation.appendChild(folder)    filename = doc.createElement('filename')    filename.appendChild(doc.createTextNode("00" + str(int(fileindex[0])) + ".jpg"))    annotation.appendChild(filename)    source = doc.createElement('source')    annotation.appendChild(source)    database = doc.createElement('database')    database.appendChild(doc.createTextNode('PASperson Database'))    source.appendChild(database)    annotation1 = doc.createElement('annotation')    annotation1.appendChild(doc.createTextNode('PASperson'))    source.appendChild(annotation1)    fr = f.readlines()  # 调用文件的 readline()方法一次读取    for line in fr:        if str(line).__contains__("size"):            sizes = []            sizes = re.findall('\d+', line)            size = doc.createElement('size')            annotation.appendChild(size)            width = doc.createElement('width')            width.appendChild(doc.createTextNode(sizes[0]))            size.appendChild(width)            height = doc.createElement('height')            height.appendChild(doc.createTextNode(sizes[1]))            size.appendChild(height)            depth = doc.createElement('depth')            depth.appendChild(doc.createTextNode(sizes[2]))            size.appendChild(depth)            segmented = doc.createElement('segmented')            segmented.appendChild(doc.createTextNode('0'))            annotation.appendChild(segmented)        if (str(line).__contains__('Objects')):            nums = re.findall('\d+', line)            break    for index in range(1, int(nums[0])+1):        for line in fr:            if str(line).__contains__("Bounding box for object " + str(index)):                coordinate = re.findall('\d+', line)                object = doc.createElement('object')                annotation.appendChild(object)                name = doc.createElement('name')                name.appendChild(doc.createTextNode('person'))                object.appendChild(name)                pose = doc.createElement('pose')                pose.appendChild(doc.createTextNode('Unspecified'))                object.appendChild(pose)                truncated = doc.createElement('truncated')                truncated.appendChild(doc.createTextNode('0'))                object.appendChild(truncated)                difficult = doc.createElement('difficult')                difficult.appendChild(doc.createTextNode('0'))                object.appendChild(difficult)                bndbox = doc.createElement('bndbox')                object.appendChild(bndbox)                                #数字中包含序号,下标应从1开始                xmin = doc.createElement('xmin')                xmin.appendChild(doc.createTextNode(coordinate[1]))                bndbox.appendChild(xmin)                ymin = doc.createElement('ymin')                ymin.appendChild(doc.createTextNode(coordinate[2]))                bndbox.appendChild(ymin)                xmax = doc.createElement('xmax')                xmax.appendChild(doc.createTextNode(coordinate[3]))                bndbox.appendChild(xmax)                ymax = doc.createElement('ymax')                ymax.appendChild(doc.createTextNode(coordinate[4]))                bndbox.appendChild(ymax)    f.close()    f = open(newfilename, 'w')    f.write(doc.toprettyxml(indent=""))    f.close()    print str(fileindex) + " compelete"print 'process compelete'