把faster-rcnn检测出来的结果保存成txt,再转成xml

来源：互联网发布：乐乎lofter 编辑：程序博客网时间：2024/05/20 05:26

利用faster-rcnn检测图片，先把结果保存成txt，就像下面这样

利用下面这段代码就可以做到，把这段代码保存成XX.py，再运行。代码里需要改的地方都注释了，不知道怎么上传源码的，将就着用

[python] view plain copy
#!/usr/bin/env python  
# -*- coding: UTF-8 -*-   
# --------------------------------------------------------  
# Faster R-CNN  
# Copyright (c) 2015 Microsoft  
# Licensed under The MIT License [see LICENSE for details]  
# Written by Ross Girshick  
# --------------------------------------------------------  
  
""" 
Demo script showing detections in sample images. 
 
See README.md for installation instructions before running. 
"""  
  
import _init_paths  
from fast_rcnn.config import cfg  
from fast_rcnn.test import im_detect  
from fast_rcnn.nms_wrapper import nms  
from utils.timer import Timer  
import matplotlib.pyplot as plt  
import numpy as np  
import scipy.io as sio  
import caffe, os, sys, cv2  
import argparse  
  
CLASSES = ('__background__',  
           'aeroplane', 'bicycle', 'bird', 'boat',  
           'bottle', 'bus', 'car', 'cat', 'chair',  
           'cow', 'diningtable', 'dog', 'horse',  
           'motorbike', 'person', 'pottedplant',  
           'sheep', 'sofa', 'train', 'tvmonitor')                                     #改成你的类别  
  
NETS = {'vgg16': ('VGG16',  
                  'VGG16_faster_rcnn_final.caffemodel'),  
        'zf': ('ZF',  
                  'ZF_faster_rcnn_final.caffemodel')}  
  
  
def vis_detections(image_name, class_name, dets, thresh=0.5):  
    """Draw detected bounding boxes."""  
    inds = np.where(dets[:, -1] >= thresh)[0]  
    if len(inds) == 0:  
        return  
  
    for i in inds:  
        bbox = dets[i, :4]  
        score = dets[i, -1]  
    if(class_name == '__background__'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')                                                             #最终的txt保存在这个路径下，下面的都改  
        fw.write(str(image_name)+' '+class_name+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')  

[python] view plain copy
            #                    图片名                 标签名                          四个坐标  
        fw.close()  
    elif(class_name == 'aeroplane'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+class_name+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')  
        fw.close()  
        elif(class_name == 'bicycle'):      
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+'n02835271'+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')  #双人自行车  
        fw.close()  
        elif(class_name == 'bird'):        
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+'n01833805'+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')   #蜂鸟  
        fw.close()  
        elif(class_name == 'boat'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+'n04273569'+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')    #快艇  
        fw.close()  
        elif(class_name == 'bottle'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+'n04557648'+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')    #水瓶  
        fw.close()  
        elif(class_name == 'bus'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+'n03769881'+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')    #小巴  
        fw.close()  
        elif(class_name == 'car'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+'n04461696'+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')   #拖车  
        fw.close()  
        elif(class_name == 'cat'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+'n02123045'+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')   #大花猫  
        fw.close()  
        elif(class_name == 'chair'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+'n02791124'+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')    #理发椅  
        fw.close()  
        elif(class_name == 'cow'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+class_name+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')  
        fw.close()  
        elif(class_name == 'diningtable'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+class_name+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')  
        fw.close()  
        elif(class_name == 'dog'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+'n0211673'+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')     #非洲猎犬，土狼狗，普猎犬，红腹锦鸡森林狼  
        fw.close()  
        elif(class_name == 'horse'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+'n12768682'+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')  
        fw.close()  
        elif(class_name == 'motorbike'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+class_name+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')  
        fw.close()  
        elif(class_name == 'person'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+class_name+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')  
        fw.close()  
        elif(class_name == 'pottedplant'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+class_name+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')  
        fw.close()  
        elif(class_name == 'sheep'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+'n02415577'+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')   #大角羊  
        fw.close()  
        elif(class_name == 'sofa'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+class_name+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')  
        fw.close()  
        elif(class_name == 'train'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+'n02917067'+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')  #子弹头列车  
        fw.close()  
        elif(class_name == 'tvmonitor'):  
            fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
        fw.write(str(image_name)+' '+class_name+' '+str(int(bbox[0]))+' '+str(int(bbox[1]))+' '+str(int(bbox[2]))+' '+str(int(bbox[3]))+'\n')  
        fw.close()  
  
  
def demo(net, image_name):  
    """Detect object classes in an image using pre-computed object proposals."""  
  
    # Load the demo image  
    #im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)  
    im_file = os.path.join('/','media','zc','A','Imagenet2012','img_train','n01440764',image_name)                  #改成你图片的位置  
    im = cv2.imread(im_file)  
  
    # Detect all object classes and regress object bounds  
    timer = Timer()  
    timer.tic()  
    scores, boxes = im_detect(net, im)  
    timer.toc()  
    print ('Detection took {:.3f}s for '  
           '{:d} object proposals').format(timer.total_time, boxes.shape[0])  
  
    # Visualize detections for each class  
    CONF_THRESH = 0.8  
    NMS_THRESH = 0.3  
     
    #fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
    #fw.write(str(image_name)+'\t')  
    #fw.close()  
     
    for cls_ind, cls in enumerate(CLASSES[1:]):  
        cls_ind += 1 # because we skipped background  
        cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]  
        cls_scores = scores[:, cls_ind]  
        dets = np.hstack((cls_boxes,  
                          cls_scores[:, np.newaxis])).astype(np.float32)  
        keep = nms(dets, NMS_THRESH)  
        dets = dets[keep, :]  
        vis_detections(image_name, cls, dets, thresh=CONF_THRESH)  
    #fw = open('/media/zc/A/Imagenet2012/img_train/n01440764/result.txt','a')  
    #fw.write('\n')  
    #fw.close()  
  
def parse_args():  
    """Parse input arguments."""  
    parser = argparse.ArgumentParser(description='Faster R-CNN demo')  
    parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',  
                        default=0, type=int)  
    parser.add_argument('--cpu', dest='cpu_mode',  
                        help='Use CPU mode (overrides --gpu)',  
                        action='store_true')  
    parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]',  
                        choices=NETS.keys(), default='vgg16')  
  
    args = parser.parse_args()  
  
    return args  
  
if __name__ == '__main__':  
    cfg.TEST.HAS_RPN = True  # Use RPN for proposals  
  
    args = parse_args()  
  
    prototxt = os.path.join(cfg.MODELS_DIR, NETS[args.demo_net][0],  
                            'faster_rcnn_alt_opt', 'faster_rcnn_test.pt')  
    caffemodel = os.path.join(cfg.DATA_DIR, 'faster_rcnn_models',  
                              NETS[args.demo_net][1])  
  
    if not os.path.isfile(caffemodel):  
        raise IOError(('{:s} not found.\nDid you run ./data/script/'  
                       'fetch_faster_rcnn_models.sh?').format(caffemodel))  
  
    if args.cpu_mode:  
        caffe.set_mode_cpu()  
    else:  
        caffe.set_mode_gpu()  
        caffe.set_device(args.gpu_id)  
        cfg.GPU_ID = args.gpu_id  
    net = caffe.Net(prototxt, caffemodel, caffe.TEST)  
  
    print '\n\nLoaded network {:s}'.format(caffemodel)  
  
    # Warmup on a dummy image  
    im = 128 * np.ones((300, 500, 3), dtype=np.uint8)  
    for i in xrange(2):  
        _, _= im_detect(net, im)  
  
   #im_names = ['000456.jpg', '000542.jpg', '001150.jpg',  
    #            '001763.jpg', '004545.jpg']  
      
    fr = open('/media/zc/A/Imagenet2012/img_train/n01440764/temp.txt','r')          #这个txt里面保存的是图片的名字，一行一个  
    for im_name in fr:  
        im_name = im_name.strip('\n')  
        print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'  
        print 'Demo for data/demo/{}'.format(im_name)  

[python] view plain copy
    demo(net, im_name)  
  
plt.show()  
fr.close  

再用一个matlab代码，就可以把txt转化成xml,感谢小咸鱼的分享，如果你的图片是jpg，只要修改四个变量就能用，十分方便，如果是JPEG,下面还要修改两个地方，我注释了

[plain] view plain copy
%%  
%该代码可以做voc2007数据集中的xml文件，  
%txt文件每行格式为：000002.jpg dog 44 28 132 121  
%即每行由图片名、目标类型、包围框坐标组成，空格隔开  
%如果一张图片有多个目标，则格式如下：（比如两个目标）  
%000002.jpg dog 44 28 132 121  
%000002.jpg car 50 27 140 110  
%包围框坐标为左上角和右下角  
%作者：小咸鱼_  
%CSDN:http://blog.csdn.net/sinat_30071459  
%%  
clc;  
clear;  
%注意修改下面四个变量  
imgpath='img\';%图像存放文件夹  
txtpath='img\output.txt';%txt文件  
xmlpath_new='Annotations/';%修改后的xml保存文件夹  
foldername='VOC2007';%xml的folder字段名  
  
  
fidin=fopen(txtpath,'r');  
lastname='begin';  
  
while ~feof(fidin)  
     tline=fgetl(fidin);  
     str = regexp(tline, ' ','split');  
     filepath=[imgpath,str{1}];  
     img=imread(filepath);  
     [h,w,d]=size(img);  
      imshow(img);  
      rectangle('Position',[str2double(str{3}),str2double(str{4}),str2double(str{5})-str2double(str{3}),str2double(str{6})-str2double(str{4})],'LineWidth',4,'EdgeColor','r');  
      pause(0.1);  
        
        if strcmp(str{1},lastname)%如果文件名相等，只需增加object  
           object_node=Createnode.createElement('object');  
           Root.appendChild(object_node);  
           node=Createnode.createElement('name');  
           node.appendChild(Createnode.createTextNode(sprintf('%s',str{2})));  
           object_node.appendChild(node);  
            
           node=Createnode.createElement('pose');  
           node.appendChild(Createnode.createTextNode(sprintf('%s','Unspecified')));  
           object_node.appendChild(node);  
            
           node=Createnode.createElement('truncated');  
           node.appendChild(Createnode.createTextNode(sprintf('%s','0')));  
           object_node.appendChild(node);  
  
           node=Createnode.createElement('difficult');  
           node.appendChild(Createnode.createTextNode(sprintf('%s','0')));  
           object_node.appendChild(node);  
            
           bndbox_node=Createnode.createElement('bndbox');  
           object_node.appendChild(bndbox_node);  
  
           node=Createnode.createElement('xmin');  
           node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{3}))));  
           bndbox_node.appendChild(node);  
  
           node=Createnode.createElement('ymin');  
           node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{4}))));  
           bndbox_node.appendChild(node);  
  
           node=Createnode.createElement('xmax');  
           node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{5}))));  
           bndbox_node.appendChild(node);  
  
           node=Createnode.createElement('ymax');  
           node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{6}))));  
           bndbox_node.appendChild(node);  
        else %如果文件名不等，则需要新建xml  
           copyfile(filepath, 'JPEGImages');  
            %先保存上一次的xml  
           if exist('Createnode','var')  
              tempname=lastname;  
              tempname=strrep(tempname,'.jpg','.xml');                                                                          %你的图片是JPEG,这里就要把jpg改成JPEG   
              xmlwrite(tempname,Createnode);     
           end  
              
              
            Createnode=com.mathworks.xml.XMLUtils.createDocument('annotation');  
            Root=Createnode.getDocumentElement;%根节点  
            node=Createnode.createElement('folder');  
            node.appendChild(Createnode.createTextNode(sprintf('%s',foldername)));  
            Root.appendChild(node);  
            node=Createnode.createElement('filename');  
            node.appendChild(Createnode.createTextNode(sprintf('%s',str{1})));  
            Root.appendChild(node);  
            source_node=Createnode.createElement('source');  
            Root.appendChild(source_node);  
            node=Createnode.createElement('database');  
            node.appendChild(Createnode.createTextNode(sprintf('My Database')));  
            source_node.appendChild(node);  
            node=Createnode.createElement('annotation');  
            node.appendChild(Createnode.createTextNode(sprintf('VOC2007')));  
            source_node.appendChild(node);  
  
           node=Createnode.createElement('image');  
           node.appendChild(Createnode.createTextNode(sprintf('flickr')));  
           source_node.appendChild(node);  
  
           node=Createnode.createElement('flickrid');  
           node.appendChild(Createnode.createTextNode(sprintf('NULL')));  
           source_node.appendChild(node);  
           owner_node=Createnode.createElement('owner');  
           Root.appendChild(owner_node);  
           node=Createnode.createElement('flickrid');  
           node.appendChild(Createnode.createTextNode(sprintf('NULL')));  
           owner_node.appendChild(node);  
  
           node=Createnode.createElement('name');  
           node.appendChild(Createnode.createTextNode(sprintf('xiaoxianyu')));  
           owner_node.appendChild(node);  
           size_node=Createnode.createElement('size');  
           Root.appendChild(size_node);  
  
          node=Createnode.createElement('width');  
          node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(w))));  
          size_node.appendChild(node);  
  
          node=Createnode.createElement('height');  
          node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(h))));  
          size_node.appendChild(node);  
  
         node=Createnode.createElement('depth');  
         node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(d))));  
         size_node.appendChild(node);  
           
          node=Createnode.createElement('segmented');  
          node.appendChild(Createnode.createTextNode(sprintf('%s','0')));  
          Root.appendChild(node);  
          object_node=Createnode.createElement('object');  
          Root.appendChild(object_node);  
          node=Createnode.createElement('name');  
          node.appendChild(Createnode.createTextNode(sprintf('%s',str{2})));  
          object_node.appendChild(node);  
            
          node=Createnode.createElement('pose');  
          node.appendChild(Createnode.createTextNode(sprintf('%s','Unspecified')));  
          object_node.appendChild(node);  
            
          node=Createnode.createElement('truncated');  
          node.appendChild(Createnode.createTextNode(sprintf('%s','0')));  
          object_node.appendChild(node);  
  
          node=Createnode.createElement('difficult');  
          node.appendChild(Createnode.createTextNode(sprintf('%s','0')));  
          object_node.appendChild(node);  
            
          bndbox_node=Createnode.createElement('bndbox');  
          object_node.appendChild(bndbox_node);  
  
         node=Createnode.createElement('xmin');  
         node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{3}))));  
         bndbox_node.appendChild(node);  
  
         node=Createnode.createElement('ymin');  
         node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{4}))));  
         bndbox_node.appendChild(node);  
  
        node=Createnode.createElement('xmax');  
        node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{5}))));  
        bndbox_node.appendChild(node);  
  
        node=Createnode.createElement('ymax');  
        node.appendChild(Createnode.createTextNode(sprintf('%s',num2str(str{6}))));  
        bndbox_node.appendChild(node);  
         
       lastname=str{1};  
        end  
        %处理最后一行  
        if feof(fidin)  
            tempname=lastname;  
            tempname=strrep(tempname,'.jpg','.xml');                                                         %你的图片是JPEG,这里就要把jpg改成JPEG   
            xmlwrite(tempname,Createnode);  
        end  
end  
fclose(fidin);  
  
file=dir(pwd);  
for i=1:length(file)  
   if length(file(i).name)>=4 && strcmp(file(i).name(end-3:end),'.xml')  
    fold=fopen(file(i).name,'r');  
    fnew=fopen([xmlpath_new file(i).name],'w');  
    line=1;  
    while ~feof(fold)  
        tline=fgetl(fold);  
        if line==1  
           line=2;  
           continue;  
        end  
        expression = '   ';  
        replace=char(9);  
        newStr=regexprep(tline,expression,replace);  
        fprintf(fnew,'%s\n',newStr);  
    end  
    fprintf('已处理%s\n',file(i).name);  
    fclose(fold);  
    fclose(fnew);  
    delete(file(i).name);  
   end  
end  

阅读全文

0 0