人脸检测——滑动窗口篇(训练和实现)
来源:互联网 发布:天音淘宝宝贝复制软件 编辑:程序博客网 时间:2024/06/05 08:44
人脸检测:cascade cnn,mtcnn,都可以通过下面代码复现。但是下面的实现是比较low的,后面更新FCN的方法。
注意mtcnn的标签加了回归框,训练时候的输出层要作修改:(回归框的作用还是很大的)
# compute bbox reg label,其中x1,x2,y1,y2为真实的人脸坐标,x_left,x_right,y_top,y_bottom,width,height为预测的人脸坐标,
# 如果是在准备人脸和非人脸样本的时候,x_left,x_right,y_top,y_bottom,width,height就是你的滑动窗与真实人脸的IOU>0.6(根据你的定义)的滑动窗坐标。
offset_x1 = (x1 - x_left) / float(width)
offset_y1 = (y1 - y_top) / float(height)
offset_x2 = (x2 - x_right) / float(width)
offset_y2 = (y2 - y_bottom ) / float(height)
tensorflow:12-net训练
2016年9月份的代码,比较乱>.<,仅供参考,需要的话自己阅读整理吧。
train_net_12.py : face_AFLW文件夹下包含有人脸和非人脸两个文件夹。
import tensorflow as tfimport cv2import osimport csvfrom pandas import read_csvimport randomimport numpy as npimport utilsfilename = '/Users/liupeng/Desktop/anaconda/Dlib/12'text_data = []label = 0'''for filename1 in os.listdir(filename): #print (filename1) label = label + 1 if (filename1[0] != '.'): filename1 = filename + '/' + filename1 for filename2 in os.listdir(filename1): #print (filename2) if (filename2[0] != '.' ): #print (filename2) filename2 = filename1 + '/' + filename2 image = cv2.imread(filename2) if image is None: continue text_data.append(filename2 + ' ' + str(label-2))'''nface_filename = filename + '/negative'for filename1 in os.listdir(nface_filename): #print (filename2) if (filename1[0] != '.' ): #print (filename2) filename1 = nface_filename + '/' + filename1 image = cv2.imread(filename1) if image is None: continue text_data.append(filename1 + ' ' + str(0))zface_filename = filename + '/positive'for filename1 in os.listdir(zface_filename): #print (filename2) if (filename1[0] != '.' ): #print (filename2) filename1 = zface_filename + '/' + filename1 image = cv2.imread(filename1) if image is None: continue text_data.append(filename1 + ' ' + str(1))'''part_filename = filename + '/part'for filename1 in os.listdir(part_filename): #print (filename2) if (filename1[0] != '.' ): #print (filename2) filename1 = part_filename + '/' + filename1 image = cv2.imread(filename1) if image is None: continue text_data.append(filename1 + ' ' + str(1))'''text_data = [x.split(' ') for x in text_data]random.shuffle(text_data)train_image = []train_label = []for i in range(len(text_data)): train_image.append(text_data[i][0]) train_label.append(text_data[i][1])#print (train_image)print (train_label)batch_size = 128IMAGE_SIZE = 12def get_next_batch(pointer): batch_x = np.zeros([batch_size, IMAGE_SIZE, IMAGE_SIZE, 3]) batch_y = np.zeros([batch_size, 2]) # images = train_image[pointer*batch_size : (pointer+1)*batch_size] # label = train_label[pointer*batch_size : (pointer+1)*batch_size] for i in range(batch_size): image = cv2.imread(train_image[i+pointer*batch_size]) image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE)) image = (image - 127.5)*0.0078125 '''m = image.mean() s = image.std() min_s = 1.0/(np.sqrt(image.shape[0]*image.shape[1]*image.shape[2])) std = max(min_s, s) image = (image-m)/std''' batch_x[i,:] = image.astype('float32') #/ 255.0 # print (batch_x[i]) if train_label[i+pointer*batch_size] == '0': batch_y[i,0] = 1 else: batch_y[i,1] = 1 # print (train_image[i+pointer*batch_size],batch_y[i]) return batch_x, batch_ydef fcn_12_detect(threshold, dropout=False, activation=tf.nn.relu): imgs = tf.placeholder(tf.float32, [None, IMAGE_SIZE, IMAGE_SIZE, 3]) labels = tf.placeholder(tf.float32, [None, 2]) keep_prob = tf.placeholder(tf.float32, name='keep_prob') with tf.variable_scope('net_12'): conv1,_ = utils.conv2d(x=imgs, n_output=16, k_w=3, k_h=3, d_w=1, d_h=1, name="conv1") conv1 = activation(conv1) pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME", name="pool1") ip1,W1 = utils.conv2d(x=pool1, n_output=16, k_w=6, k_h=6, d_w=1, d_h=1, padding="VALID", name="ip1") ip1 = activation(ip1) if dropout: ip1 = tf.nn.dropout(ip1, keep_prob) ip2,W2 = utils.conv2d(x=ip1, n_output=2, k_w=1, k_h=1, d_w=1, d_h=1, name="ip2") pred = tf.nn.sigmoid(utils.flatten(ip2)) target = utils.flatten(labels) regularizer = 8e-3 * (tf.nn.l2_loss(W1)+100*tf.nn.l2_loss(W2)) loss = tf.reduce_mean(tf.div(tf.add(-tf.reduce_sum(target * tf.log(pred + 1e-9),1), -tf.reduce_sum((1-target) * tf.log(1-pred + 1e-9),1)),2)) + regularizer cost = tf.reduce_mean(loss) predict = pred max_idx_p = tf.argmax(predict, 1) max_idx_l = tf.argmax(target, 1) correct_pred = tf.equal(max_idx_p, max_idx_l) acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) thresholding_12 = tf.cast(tf.greater(pred, threshold), "float") recall_12 = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(thresholding_12, tf.constant([1.0])), tf.equal(target, tf.constant([1.0]))), "float")) / tf.reduce_sum(target) ''' correct_prediction = tf.equal(tf.cast(tf.greater(pred, threshold), tf.int32), tf.cast(target, tf.int32)) acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))''' return {'imgs': imgs, 'labels': labels, 'keep_prob': keep_prob, 'cost': cost, 'pred': pred, 'accuracy': acc, 'features': ip1, 'recall': recall_12, 'thresholding': thresholding_12}def train(): net_output = fcn_12_detect(0.0) global_step = tf.Variable(0, tf.int32) starter_learning_rate = 0.00001 learning_rate = tf.train.exponential_decay( learning_rate=starter_learning_rate, global_step=global_step, decay_steps=1000, decay_rate=1.0, staircase=True, name=None) train_step = tf.train.AdamOptimizer(learning_rate).minimize(net_output['cost'], global_step=global_step) sess = tf.Session() saver = tf.train.Saver(tf.trainable_variables()) # import pdb; pdb.set_trace() sess.run(tf.initialize_all_variables()) saver.restore(sess, 'model/12-net/model_net_12-176646') for j in range(2000): for i in range(7000): imgs, labels = get_next_batch(i) # labels = labels.reshape((labels.shape[0])) if i%300==0 and i!=0: saver.save(sess, 'model/12-net/model_net_12', global_step=global_step, write_meta_graph=False) if i%1==0: img, label = get_next_batch(7000+i%700) cost, accuracy, recall, lr, pre = sess.run( [net_output['cost'], net_output['accuracy'], net_output['recall'], learning_rate, net_output['pred']], feed_dict={net_output['imgs']: img, net_output['labels']: label}) print("Step %d, cost: %f, acc: %f, recall: %f, lr: %f"%(i, cost, accuracy, recall, lr)) print (pre[0], label[0]) print (pre[1], label[1]) print (pre[2], label[2]) print (pre[3], label[3]) print (pre[4], label[4]) # print("target: ", target) # print("pred: ", pred) # train sess.run(train_step, feed_dict={net_output['imgs']: imgs, net_output['labels']: labels}) sess.close()def test(): image = cv2.imread('images/8.jpg') image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE)) m = image.mean() s = image.std() min_s = 1.0/(np.sqrt(image.shape[0]*image.shape[1]*image.shape[2])) std = max(min_s, s) image = (image-m)/std image = image.astype('float32') #/ 255 net_12 = fcn_12_detect(0.2) saver = tf.train.Saver() sess = tf.Session() # saver.restore(sess, tf.train.latest_checkpoint('/Users/liupeng/Desktop/anaconda/i_code', 'checkpoint')) sess.run(tf.initialize_all_variables()) print ('start restore model') saver.restore(sess, 'model/model_net_12-71400') print ('ok') # saver.restore(sess, tf.train.latest_checkpoint('.')) # predict = tf.argmax(tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2) predict = sess.run(net_12['pred'], feed_dict={net_12['imgs']: [image]}) print ("predict:", predict) return predict if __name__ == '__main__': train() # test()
utils.pyimport tensorflow as tfdef conv2d(x, n_output, k_h=5, k_w=5, d_h=2, d_w=2, padding='SAME', name='conv2d', reuse=None): """Helper for creating a 2d convolution operation. Parameters ---------- x : tf.Tensor Input tensor to convolve. n_output : int Number of filters. k_h : int, optional Kernel height k_w : int, optional Kernel width d_h : int, optional Height stride d_w : int, optional Width stride padding : str, optional Padding type: "SAME" or "VALID" name : str, optional Variable scope Returns ------- op : tf.Tensor Output of convolution """ with tf.variable_scope(name or 'conv2d', reuse=reuse): W = tf.get_variable( name='W', shape=[k_h, k_w, x.get_shape()[-1], n_output], initializer=tf.contrib.layers.xavier_initializer_conv2d()) conv = tf.nn.conv2d( name='conv', input=x, filter=W, strides=[1, d_h, d_w, 1], padding=padding) b = tf.get_variable( name='b', shape=[n_output], initializer=tf.constant_initializer(0.0)) h = tf.nn.bias_add( name='h', value=conv, bias=b) return h, Wdef linear(x, n_output, name=None, activation=None, reuse=None): """Fully connected layer. Parameters ---------- x : tf.Tensor Input tensor to connect n_output : int Number of output neurons name : None, optional Scope to apply Returns ------- h, W : tf.Tensor, tf.Tensor Output of fully connected layer and the weight matrix """ if len(x.get_shape()) != 2: x = flatten(x, reuse=reuse) n_input = x.get_shape().as_list()[1] with tf.variable_scope(name or "fc", reuse=reuse): W = tf.get_variable( name='W', shape=[n_input, n_output], dtype=tf.float32, initializer=tf.tf.contrib.layers.xavier_initializer()) b = tf.get_variable( name='b', shape=[n_output], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) h = tf.nn.bias_add( name='h', value=tf.matmul(x, W), bias=b) if activation: h = activation(h) return h, Wdef flatten(x, name=None, reuse=None): """Flatten Tensor to 2-dimensions. Parameters ---------- x : tf.Tensor Input tensor to flatten. name : None, optional Variable scope for flatten operations Returns ------- flattened : tf.Tensor Flattened tensor. """ with tf.variable_scope('flatten'): dims = x.get_shape().as_list() if len(dims) == 4: flattened = tf.reshape( x, shape=[-1, dims[1] * dims[2] * dims[3]]) elif len(dims) == 2 or len(dims) == 1: flattened = x else: raise ValueError('Expected n dimensions of 1, 2 or 4. Found:', len(dims)) return flatteneddef lrelu(features, leak=0.2): """Leaky rectifier. Parameters ---------- features : tf.Tensor Input to apply leaky rectifier to. leak : float, optional Percentage of leak. Returns ------- op : tf.Tensor Resulting output of applying leaky rectifier activation. """ f1 = 0.5 * (1 + leak) f2 = 0.5 * (1 - leak) return f1 * features + f2 * abs(features)
train_net_24.py 参考train_net_12.py,加深一下网络,自己写吧。。。。下面是滑动窗人脸检测的流程:
(1)确定最小检测人脸,对原图img缩放,缩放比例为(滑动窗大小/最小人脸大小)。
(2)缩放后的图片,构建金字塔。
(3)对金字塔的每一层,通过滑动窗获取patch,对patch归一化处理,之后给训练好的人脸检测器识别,将识别为人脸的窗口位置和概率保存。
(4)将人脸窗口映射到原图img中的人脸位置,概率不变。
(5)NMS处理重叠窗口。
(6)级联的方式提高准确率。
(7)在原图画出人脸位置。
*****调节的参数有:
# 步长
stride = 2
# 最小人脸大小
F = 40
# 构建金字塔的比例
ff = 0.8
# 概率多大时判定为人脸?
p = 0.8
# nms
overlapThresh_12 = 0.7
overlapThresh_24 = 0.7
下面不是完成代码,需要自己添加训练好的model,稍作修改就可以。
import numpy as npimport tensorflow as tffrom model import fcn_12_detectdef py_nms(dets, thresh, mode="Union"): """ greedily select boxes with high confidence keep boxes overlap <= thresh rule out overlap > thresh :param dets: [[x1, y1, x2, y2 score]] :param thresh: retain overlap <= thresh :return: indexes to keep """ if len(dets) == 0: return [] x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = dets[:, 4] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] keep = [] while order.size > 0: i = order[0] keep.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h if mode == "Union": ovr = inter / (areas[i] + areas[order[1:]] - inter) elif mode == "Minimum": ovr = inter / np.minimum(areas[i], areas[order[1:]]) inds = np.where(ovr <= thresh)[0] order = order[inds + 1] return dets[keep]def image_preprocess(img): img = (img - 127.5)*0.0078125 '''m = img.mean() s = img.std() min_s = 1.0/(np.sqrt(img.shape[0]*img.shape[1]*img.shape[2])) std = max(min_s, s) img = (img-m)/std''' return imgdef slide_window(img, window_size, stride): # 对构建的金字塔图片,滑动窗口。 # img:图片, window_size:滑动窗的大小,stride:步长。 window_list = [] w = img.shape[1] h = img.shape[0] if w<=window_size+stride or h<=window_size+stride: return None if len(img.shape)!=3: return None for i in range(int((w-window_size)/stride)): for j in range(int((h-window_size)/stride)): box = [j*stride, i*stride, j*stride+window_size, i*stride+window_size] window_list.append(box)return img, np.asarray(window_list)def pyramid(image, f, window_size): # 构建图像的金字塔,以便进行多尺度滑动窗口 # image:输入图像,f:缩放的尺度, window_size:滑动窗大小。 w = image.shape[1] h = image.shape[0] img_ls = [] while( w > window_size and h > window_size): img_ls.append(image) w = int(w * f) h = int(h * f) image = cv2.resize(image, (w, h)) return img_lsdef min_face(img, F, window_size, stride): # img:输入图像,F:最小人脸大小, window_size:滑动窗,stride:滑动窗的步长。 h, w, _ = img.shape w_re = int(float(w)*window_size/F) h_re = int(float(h)*window_size/F) if w_re<=window_size+stride or h_re<=window_size+stride: print (None) # 调整图片大小的时候注意参数,千万不要写反了 # 根据最小人脸缩放图片 img = cv2.resize(img, (w_re, h_re)) return imgif __name__ = "__main__": image = cv2.imread('images/1.jpg') h,w,_ = image.shape ...... # 调参的参数 IMAGE_SIZE = 12 # 步长 stride = 2 # 最小人脸大小 F = 40 # 构建金字塔的比例 ff = 0.8 # 概率多大时判定为人脸? p_12 = 0.8 p_24 = 0.8 # nms overlapThresh_12 = 0.7 overlapThresh_24 = 0.3 ...... # 加载 model net_12 = fcn_12_detect() net_12_vars = [v for v in tf.trainable_variables() if v.name.startswith('net_12')] saver_net_12 = tf.train.Saver(net_12_vars) sess = tf.Session() sess.run(tf.initialize_all_variables()) saver_net_12.restore(sess, 'model/12-net/model_net_12-123200') # net_24... ...... # 需要检测的最小人脸 image_ = min_face(image, F, IMAGE_SIZE, stride) ...... # 金字塔 pyd = pyramid(np.array(image_), ff, IMAGE_SIZE) ...... # net-12 window_after_12 = [] for i, img in enumerate(pyd): # 滑动窗口 slide_return = slide_window(img, IMAGE_SIZE, stride) if slide_return is None: break img_12 = slide_return[0] window_net_12 = slide_return[1] w_12 = img_12.shape[1] h_12 = img_12.shape[0] patch_net_12 = [] for box in window_net_12: patch = img_12[box[0]:box[2], box[1]:box[3], :] # 做归一化处理 patch = image_preprocess(patch) patch_net_12.append(patch) patch_net_12 = np.array(patch_net_12) # 预测人脸 pred_cal_12 = sess.run(net_12['pred'], feed_dict={net_12['imgs']: patch_net_12}) window_net = window_net_12 # print (pred_cal_12) windows = [] for i, pred in enumerate(pred_cal_12): # 概率大于0.8的判定为人脸。 s = np.where(pred[1]>p_12)[0] if len(s)==0: continue #保存窗口位置和概率。 windows.append([window_net[i][0],window_net[i][1],window_net[i][2],window_net[i][3],pred[1]]) # 按照概率值 由大到小排序 windows = np.asarray(windows) windows = py_nms(windows, overlapThresh_12, 'Union') window_net = windows for box in window_net: lt_x = int(float(box[0])*w/w_12) lt_y = int(float(box[1])*h/h_12) rb_x = int(float(box[2])*w/w_12) rb_y = int(float(box[3])*h/h_12) p_box = box[4] window_after_12.append([lt_x, lt_y, rb_x, rb_y, p_box]) # 按照概率值 由大到小排序 # window_after_12 = np.asarray(window_after_12) # window_net = py_nms(window_after_12, overlapThresh_12, 'Union') window_net = window_after_12 print (window_net) # net-24 windows_24 = [] if window_net == []: print "windows is None!" if window_net != []: patch_net_24 = [] img_24 = image for box in window_net: patch = img_24[box[0]:box[2], box[1]:box[3], :] patch = cv2.resize(patch, (24, 24)) # 做归一化处理 patch = image_preprocess(patch) patch_net_24.append(patch) # 预测人脸 pred_net_24 = sess.run(net_24['pred'], feed_dict={net_24['imgs']: patch_net_24}) print (pred_net_24) window_net = window_net # print (pred_net_24) for i, pred in enumerate(pred_net_24): s = np.where(pred[1]>p_24)[0] if len(s)==0: continue windows_24.append([window_net[i][0],window_net[i][1],window_net[i][2],window_net[i][3],pred[1]]) # 按照概率值 由大到小排序 windows_24 = np.asarray(windows_24) #window_net = nms_max(windows_24, overlapThresh=0.7) window_net = py_nms(windows_24, overlapThresh_24, 'Union') if window_net == []: print "windows is None!" if window_net != []: print(window_net.shape) for box in window_net: #ImageDraw.Draw(image).rectangle((box[1], box[0], box[3], box[2]), outline = "red") cv2.rectangle(image, (int(box[1]),int(box[0])), (int(box[3]),int(box[2])), (0, 255, 0), 2) cv2.imwrite("images/face_img.jpg", image) cv2.imshow("face detection", image) cv2.waitKey(10000) cv2.destroyAllWindows() coord.request_stop() coord.join(threads) sess.close()检测结果:(下面的重叠窗口可以通过设置overlapThresh去除)
- 人脸检测——滑动窗口篇(训练和实现)
- 人脸检测及识别python实现系列(3)——为模型训练准备人脸数据
- FastRCNN 训练自己数据集(三)——训练和检测
- 【leetcode】滑动窗口法训练
- 【人脸检测】“人脸训练代码”项目笔记(4)——代码结构分析:训练部分结构
- 人脸检测源码解析——1、训练参数
- 【人脸检测】“人脸训练代码”项目笔记(1)——头文件
- 【人脸检测】“人脸训练代码”项目笔记(2)——代码部分
- 【人脸检测】“人脸训练代码”项目笔记(3)——代码结构分析
- 使用滑动窗口进行人脸检测 Face detection with a sliding window
- 训练自己的人脸检测分类器(级联+LBP的Matlab的实现)
- 人脸检测和对齐--MTCNN训练1--P-net
- 滑动窗口——TCP可靠传输的实现
- 通信网络实验——滑动窗口协议模拟实现
- 【Angular】——BootStrap+Swiper实现手机端滑动窗口
- 【算法——Python实现】滑动窗口解决数组问题
- 人脸检测训练样本
- 人脸检测 训练心得
- mysql索引总结----mysql 索引类型以及创建
- 【开发笔记】JS中encodeURI与encodeURIComponent
- 求三维凸包重心到表面最短距离
- angular的缓存机制
- nodeJS入门——新建一个项目及代码详解
- 人脸检测——滑动窗口篇(训练和实现)
- Codeforces 842 D Vitya and Strange Lesson 线段树 (未理解透)
- 51nod 1185 威佐夫游戏 V2 (用乘法模拟解决大数精度问题)
- 青岛往事-四
- 类加载器入了个门
- python
- ZooKeeper_9_Java操作ZK_检测节点
- ZooKeeper之以复制模式(replicated mode)运行(Windows环境)(二)
- 怎么把java代码转到jsp页面中