YOLO tensorflow

来源:互联网 发布:陈子豪淘宝店 编辑:程序博客网 时间:2024/06/07 22:19
# coding: utf-8# In[1]:import numpy as npimport tensorflow as tfimport cv2import time# In[1]:fromfile = 'test/person.jpg'imshow = Truefilewrite_img = Truefilewrite_txt = Truedisp_console = Trueweights_file = 'weights/YOLO_small.ckpt'alpha = 0.1threshold = 0.2iou_threshold = 0.5num_class = 20num_box = 2sqrt=2grid_size = 7num_cell=grid_size*grid_sizeclasses =  ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]w_img = 640h_img = 480# 定义卷积层函数 激活函数Relu# In[1]:def conv_layer(idx, inputs, filters, size, stride):    channels = inputs.get_shape()[3]    weight = tf.Variable(tf.truncated_normal([size, size, int(channels), filters], stddev=0.1))    biases = tf.Variable(tf.constant(0.1, shape=[filters]))    pad_size = size // 2    pad_mat = np.array([[0, 0], [pad_size, pad_size], [pad_size, pad_size], [0, 0]])    inputs_pad = tf.pad(inputs, pad_mat)    conv = tf.nn.conv2d(inputs_pad, weight, strides=[1, stride, stride, 1], padding='VALID', name=str(idx) + '_conv')    conv_biased = tf.add(conv, biases, name=str(idx) + '_conv_biased')    if disp_console: print (    '    Layer  %d : Type = Conv, Size = %d * %d, Stride = %d, Filters = %d, Input channels = %d' % (    idx, size, size, stride, filters, int(channels)))    return tf.maximum(alpha * conv_biased, conv_biased, name=str(idx) + '_leaky_relu')# 定义池化层函数# In[4]:def pooling_layer(idx, inputs, size, stride):    print ('    Layer  %d : Type = Pool, Size = %d * %d, Stride = %d' % (idx, size, size, stride))    return tf.nn.max_pool(inputs, ksize=[1, size, size, 1], strides=[1, stride, stride, 1], padding='SAME',                          name=str(idx) + '_pool')# 定义全连接层函数 激活函数Relu# # In[5]:def fc_layer(idx, inputs, hiddens, flat=False, linear=False):    input_shape = inputs.get_shape().as_list()    if flat:        dim = input_shape[1] * input_shape[2] * input_shape[3]        inputs_transposed = tf.transpose(inputs, (0, 3, 1, 2))        inputs_processed = tf.reshape(inputs_transposed, [-1, dim])    else:        dim = input_shape[1]        inputs_processed = inputs    weight = tf.Variable(tf.truncated_normal([dim, hiddens], stddev=0.1))    biases = tf.Variable(tf.constant(0.1, shape=[hiddens]))    print ('    Layer  %d : Type = Full, Hidden = %d, Input dimension = %d, Flat = %d, Activation = %d' % (    idx, hiddens, int(dim), int(flat), 1 - int(linear)))    if linear: return tf.add(tf.matmul(inputs_processed, weight), biases, name=str(idx) + '_fc')    ip = tf.add(tf.matmul(inputs_processed, weight), biases)    return tf.maximum(alpha * ip, ip, name=str(idx) + '_fc')# 创建网络结构# In[6]:print ("Building YOLO_small graph...")x = tf.placeholder('float32', [None, 448, 448, 3])conv_1 = conv_layer(1, x, 64, 7, 2)pool_2 = pooling_layer(2, conv_1, 2, 2)conv_3 = conv_layer(3, pool_2, 192, 3, 1)pool_4 = pooling_layer(4, conv_3, 2, 2)conv_5 = conv_layer(5, pool_4, 128, 1, 1)conv_6 = conv_layer(6, conv_5, 256, 3, 1)conv_7 = conv_layer(7, conv_6, 256, 1, 1)conv_8 = conv_layer(8, conv_7, 512, 3, 1)pool_9 = pooling_layer(9, conv_8, 2, 2)conv_10 = conv_layer(10, pool_9, 256, 1, 1)conv_11 = conv_layer(11, conv_10, 512, 3, 1)conv_12 = conv_layer(12, conv_11, 256, 1, 1)conv_13 = conv_layer(13, conv_12, 512, 3, 1)conv_14 = conv_layer(14, conv_13, 256, 1, 1)conv_15 = conv_layer(15, conv_14, 512, 3, 1)conv_16 = conv_layer(16, conv_15, 256, 1, 1)conv_17 = conv_layer(17, conv_16, 512, 3, 1)conv_18 = conv_layer(18, conv_17, 512, 1, 1)conv_19 = conv_layer(19, conv_18, 1024, 3, 1)pool_20 = pooling_layer(20, conv_19, 2, 2)conv_21 = conv_layer(21, pool_20, 512, 1, 1)conv_22 = conv_layer(22, conv_21, 1024, 3, 1)conv_23 = conv_layer(23, conv_22, 512, 1, 1)conv_24 = conv_layer(24, conv_23, 1024, 3, 1)conv_25 = conv_layer(25, conv_24, 1024, 3, 1)conv_26 = conv_layer(26, conv_25, 1024, 3, 2)conv_27 = conv_layer(27, conv_26, 1024, 3, 1)conv_28 = conv_layer(28, conv_27, 1024, 3, 1)fc_29 = fc_layer(29, conv_28, 512, flat=True, linear=False)fc_30 = fc_layer(30, fc_29, 4096, flat=False, linear=False)# skip dropout_31fc_32 = fc_layer(32, fc_30, 1470, flat=False, linear=True)# 创建session 并恢复权重文件# In[7]:sess = tf.Session()sess.run(tf.initialize_all_variables())saver = tf.train.Saver()saver.restore(sess, weights_file)print ("Loading complete!" + '\n')# 计算IOU# In[8]:def iou(box1,box2):    right1=box1[0]+0.5*box1[2]    right2=box2[0]+0.5*box2[2]    left1=box1[0]-0.5*box1[2]    left2=box2[0]-0.5*box2[2]    up1=box1[1]-0.5*box1[3]    up2=box2[1]-0.5*box2[3]    down1=box1[1]+0.5*box1[3]    down2=box2[1]+0.5*box2[3]    tb = min(right1,right2)-max(left1,left2)    lr = min(down1,down2)-max(up1,up2)    if tb < 0 or lr < 0 : intersection = 0    else : intersection =  tb*lr    union=(box1[2]*box1[3] + box2[2]*box2[3] - intersection)    return intersection / union# 把输出转化为坐标框和类别# In[9]:def interpret_output(output):    # divide output into three class    # 7*7*20=980    class_probs_size=num_cell*num_class    class_probs = np.reshape(output[0:class_probs_size],                             (num_cell, num_class))    # two box  confidence    conf_size = num_cell * num_box    conf = np.reshape(output[class_probs_size:(class_probs_size+conf_size)],                      (num_cell, num_box))    # two box coor    coords = np.reshape(output[(class_probs_size+conf_size):],                        (num_cell, num_box, 4))    # restore to  original    # confidence per class(prob) for each box=class_probs*confidence    # find out the box which confidence per class >threshold    Boxes=[]    p = np.zeros((num_cell, num_box, num_class))    for grid in range(num_cell):        for b in range(num_box):            c=conf[grid,b]            x=(coords[grid,b,0]+(grid%grid_size))/grid_size            y=(coords[grid,b,1]+(grid//grid_size))/grid_size            w=coords[grid,b,2]**sqrt            h=coords[grid,b,3]**sqrt            x*= w_img            y *= h_img            w *= w_img            h *= h_img            for c_idx in range(20):                p[grid,b,c_idx]=np.multiply(class_probs[grid,c_idx],c)            if np.max(p[grid,b])>threshold:                class_idx=np.argmax(p[grid,b],axis=0)                prob = p[grid,b,class_idx]                bx={0:x,1:y,2:w,3:h,"prob":prob,"class_idx":class_idx}                Boxes.append(bx)    Boxes.sort(key=lambda bx:bx["prob"],reverse=True)    # comepute iou of probs_filtered    # if iou >threshold,delete the smaller    for i in range(len(Boxes)):        if Boxes[i]["prob"] == 0: continue        for j in range(i + 1, len(Boxes)):            if iou(Boxes[i], Boxes[j]) > iou_threshold:                Boxes[j]["prob"] = 0.0    Boxes=[b for b in Boxes if b["prob"]>0]    result = []    for i in range(len(Boxes)):        result.append([classes[Boxes[i]["class_idx"]],                       Boxes[i][0], Boxes[i][1],                       Boxes[i][2], Boxes[i][3],                       Boxes[i]["prob"]])    return result# 读入测试图片 并feed进网络 执行输出节点# In[ ]:#detect_from_fileimg = cv2.imread(fromfile)s = time.time()h_img,w_img,_ = img.shapeimg_resized = cv2.resize(img, (448, 448))#convert from BGR to RGBimg_RGB = cv2.cvtColor(img_resized,cv2.COLOR_BGR2RGB)#Convert the input to an array.img_resized_np = np.asarray( img_RGB )#ONE PICTURE, three color 488*488 pixinputs = np.zeros((1,448,448,3),dtype='float32')#convert input 255 to (-1,+1)inputs[0] = (img_resized_np/255.0)*2.0-1.0#feed input and runin_dict = {x: inputs}net_output =sess.run(fc_32,feed_dict=in_dict)#interpret resultresults = interpret_output(net_output[0])# 画出框和类别 显示图片# In[11]:#show_resultsimg_cp = img.copy()for i in range(len(results)):    # x,y:center point coor of x and y    x = int(results[i][1])    y = int(results[i][2])    w = int(results[i][3]) // 2    h = int(results[i][4]) // 2    print('    class : ' + results[i][0] + ' , [x,y,w,h]=[' + str(x) + ',' + str(y) + ',' + str(        int(results[i][3])) + ',' + str(int(results[i][4])) + '], Confidence = ' + str(results[i][5]))    cv2.rectangle(img_cp, (x - w, y - h), (x + w, y + h), (0, 255, 0), 2)    cv2.rectangle(img_cp, (x - w, y - h - 20), (x + w, y - h), (125, 125, 125), -1)    cv2.putText(img_cp, results[i][0] + ' : %.2f' % results[i][5], (x - w + 5, y - h - 7), cv2.FONT_HERSHEY_SIMPLEX,                0.5, (0, 0, 0), 1)#get_ipython().magic('matplotlib inline')import matplotlib.pyplot as pltplt.imshow( img_cp)# In[ ]: