Faster-RCNN_TF代码解读8:networks.py
来源:互联网 发布:菜鸟网络会员干什么的 编辑:程序博客网 时间:2024/04/28 03:43
import numpy as npimport tensorflow as tfimport roi_pooling_layer.roi_pooling_op as roi_pool_opimport roi_pooling_layer.roi_pooling_op_gradfrom rpn_msr.proposal_layer_tf import proposal_layer as proposal_layer_pyfrom rpn_msr.anchor_target_layer_tf import anchor_target_layer as anchor_target_layer_pyfrom rpn_msr.proposal_target_layer_tf import proposal_target_layer as proposal_target_layer_pyDEFAULT_PADDING = 'SAME'def layer(op): def layer_decorated(self, *args, **kwargs): # Automatically set a name if not provided. #op.__name__的是各个操作函数名,如conv、max_pool #get_unique_name返回类似与conv_4,以name:'conv_4'存在kwargs字典 name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) # Figure out the layer inputs. if len(self.inputs)==0: raise RuntimeError('No input variables found for layer %s.'%name) #此情况说明刚有输入层,即取输入数据即可 elif len(self.inputs)==1: layer_input = self.inputs[0] else: layer_input = list(self.inputs) # Perform the operation and get the output. #开始做卷积,做pool操作!!!!正式开始做操作的是这里,而不是函数定义,会发现下面函数定义中与所给参数个数不符合,原因在于input没给定 layer_output = op(self, layer_input, *args, **kwargs) # Add to layer LUT. #在self.layer中添加该name操作信息 self.layers[name] = layer_output # This output is now the input for the next layer. #将该output添加到inputs中 self.feed(layer_output) # Return self for chained calls. return self return layer_decoratedclass Network(object): def __init__(self, inputs, trainable=True): self.inputs = [] self.layers = dict(inputs) self.trainable = trainable self.setup() def setup(self): raise NotImplementedError('Must be subclassed.') def load(self, data_path, session, saver, ignore_missing=False): if data_path.endswith('.ckpt'): saver.restore(session, data_path) else: data_dict = np.load(data_path).item() for key in data_dict: with tf.variable_scope(key, reuse=True): for subkey in data_dict[key]: try: var = tf.get_variable(subkey) session.run(var.assign(data_dict[key][subkey])) print "assign pretrain model "+subkey+ " to "+key except ValueError: print "ignore "+key if not ignore_missing: raise #*args中存的是多余的变量,且无标签,存在tuple中,如果有标签,则需要将函数改为feed(self, *args,**kwargs): #**kwargs为一个dict #layers为一个dict,inputs为一个list def feed(self, *args): #如果没给参数,就raise一个error assert len(args)!=0 self.inputs = [] for layer in args: #先判断如果给定参数是一个str if isinstance(layer, basestring): #self.layers在VGGnet_train 重载,为一个有值的dict try: #将layer改为真实的variable,虽然目前还只是数据流图的一部分,还没有真正的开始运作 layer = self.layers[layer] print layer except KeyError: print self.layers.keys() raise KeyError('Unknown layer name fed: %s'%layer) #将取出的layer数据存入input列表 self.inputs.append(layer) return self def get_output(self, layer): try: #self.layers在VGGnet_train.py中重载了,为一个dict,记录的是每一层的输出 layer = self.layers[layer] except KeyError: print self.layers.keys() raise KeyError('Unknown layer name fed: %s'%layer) return layer #得到唯一的名字,prefix传回来的是conv、max_pool.. #self.layers为一个dict,item将其转换为可迭代形式 def get_unique_name(self, prefix): # startswith() 方法用于检查字符串是否是以指定子字符串开头,返回true与false #即查看有没有conv开头的key,记录有的个数(true),相加再加1为id id = sum(t.startswith(prefix) for t,_ in self.layers.items())+1 #返回的就是类似与conv_4 return '%s_%d'%(prefix, id) #此函数就是在tensorflow格式下建立变量 def make_var(self, name, shape, initializer=None, trainable=True): return tf.get_variable(name, shape, initializer=initializer, trainable=trainable) #判断padding类型是否符合要求 def validate_padding(self, padding): assert padding in ('SAME', 'VALID') @layer #就因为上面的属性函数,是的真正的conv操作没有在这里进行,而是在上面的layer函数中进行 def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1, trainable=True): #判断padding是否为same与valid的一种 self.validate_padding(padding) #shape最后一位为深度 #input形状为[batch, in_height, in_width, in_channels] #c_i.c_o分别为输入激活图层的深度,与输入激活图层深度,即卷积核个数 c_i = input.get_shape()[-1] assert c_i%group==0 assert c_o%group==0 ##conv2d中stride[]第一位与最后一位都必须为1,第一位表示在batch上的位移,第四位表示在深度方向上的位移,i与k目前没找到定义,应该为input与卷积核 #目前问题解决,lambda相当与一个def,只是定义函数 ##1.将参数filter变为一个二维矩阵,形状为:[filter_height*filter_width*in_channels,output_channels] #2.将输入(input)转化为一个具有如下形状的Tensor,形状为:[batch,out_height,out_width,filter_height * filter_width * in_channels] #3.操作sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *filter[di, dj, q, k] convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) with tf.variable_scope(name) as scope: #采取截断是正态初始化权重,这只是一种initializer方法,mean=0,stddev=0.01 init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) #这也只是定义initializer的方法,初始化为0 init_biases = tf.constant_initializer(0.0) #make_var就是用get_variable来建立变量 #weight.shape[高,宽,深度,多少个] kernel = self.make_var('weights', [k_h, k_w, c_i/group, c_o], init_weights, trainable) biases = self.make_var('biases', [c_o], init_biases, trainable) if group==1: conv = convolve(input, kernel) else: #如果group不为0,将input与kernel第4个维度,即深度信息平分为group组 input_groups = tf.split(3, group, input) kernel_groups = tf.split(3, group, kernel) output_groups = [convolve(i, k) for i,k in zip(input_groups, kernel_groups)] #分开的group组合起来 conv = tf.concat(3, output_groups) if relu: bias = tf.nn.bias_add(conv, biases) return tf.nn.relu(bias, name=scope.name) return tf.nn.bias_add(conv, biases, name=scope.name) @layer def relu(self, input, name): return tf.nn.relu(input, name=name) @layer def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): self.validate_padding(padding) return tf.nn.max_pool(input, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name) @layer def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): self.validate_padding(padding) return tf.nn.avg_pool(input, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name) @layer def roi_pool(self, input, pooled_height, pooled_width, spatial_scale, name): # only use the first input if isinstance(input[0], tuple): input[0] = input[0][0] if isinstance(input[1], tuple): input[1] = input[1][0] print input return roi_pool_op.roi_pool(input[0], input[1], pooled_height, pooled_width, spatial_scale, name=name)[0] @layer def proposal_layer(self, input, _feat_stride, anchor_scales, cfg_key, name): #cfg_key为TRAIN if isinstance(input[0], tuple): input[0] = input[0][0] #就是返回blob,内容为[proposal引索(全0),proposal],shape(proposals.shape[0],5),引索(全0)占一列,proposal占4列 return tf.reshape(tf.py_func(proposal_layer_py,[input[0],input[1],input[2], cfg_key, _feat_stride, anchor_scales], [tf.float32]),[-1,5],name =name) @layer def anchor_target_layer(self, input, _feat_stride, anchor_scales, name): #input为'rpn_cls_score','gt_boxes','im_info','data'信息组成的一个列表,input[0]为rpn_cls_score信息 if isinstance(input[0], tuple): #input[0]就是'rpn_cls_score'的输出 input[0] = input[0][0] with tf.variable_scope(name) as scope: #tf.py_func将任意的python函数func转变为TensorFlow op,格式tf.py_func(func, inp, Tout, stateful=True, name=None) #func为python函数,inp为输入(ndarray),Tout为自定义输出格式,下面的输入输出分别为[input[0],input[1],input[2],input[3], _feat_stride, anchor_scales],[tf.float32,tf.float32,tf.float32,tf.float32] #rpn_label中存的是所有anchor的label(-1,0,1),rpn_bbox_targets是所有anchor的4回归值,对于标签为-1的anchor,4个回归值全是0, #rpn_bbox_inside_weights,rpn_bbox_outside_weights是两个权重,初始化方式不一样 #要将tf.float32类型转换为tf.Tensor类型 #tf.convert_to_tensor函数,以确保我们处理张量而不是其他类型 rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights = tf.py_func(anchor_target_layer_py,[input[0],input[1],input[2],input[3], _feat_stride, anchor_scales],[tf.float32,tf.float32,tf.float32,tf.float32]) rpn_labels = tf.convert_to_tensor(tf.cast(rpn_labels,tf.int32), name = 'rpn_labels') rpn_bbox_targets = tf.convert_to_tensor(rpn_bbox_targets, name = 'rpn_bbox_targets') rpn_bbox_inside_weights = tf.convert_to_tensor(rpn_bbox_inside_weights , name = 'rpn_bbox_inside_weights') rpn_bbox_outside_weights = tf.convert_to_tensor(rpn_bbox_outside_weights , name = 'rpn_bbox_outside_weights') return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights @layer def proposal_target_layer(self, input, classes, name): if isinstance(input[0], tuple): input[0] = input[0][0] with tf.variable_scope(name) as scope: #产生筛选后的roi,对应labels,三个(len(rois),4*21)大小的矩阵,其中一个对fg-roi对应引索行的对应类别的4个位置填上(dx,dy,dw,dh),另两个对fg-roi对应引索行的对应类别的4个位置填上(1,1,1,1) rois,labels,bbox_targets,bbox_inside_weights,bbox_outside_weights = tf.py_func(proposal_target_layer_py,[input[0],input[1],classes],[tf.float32,tf.float32,tf.float32,tf.float32,tf.float32]) rois = tf.reshape(rois,[-1,5] , name = 'rois') # 要将tf.float32类型转换为tf.Tensor类型 # tf.convert_to_tensor函数,以确保我们处理张量而不是其他类型 labels = tf.convert_to_tensor(tf.cast(labels,tf.int32), name = 'labels') bbox_targets = tf.convert_to_tensor(bbox_targets, name = 'bbox_targets') bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name = 'bbox_inside_weights') bbox_outside_weights = tf.convert_to_tensor(bbox_outside_weights, name = 'bbox_outside_weights') return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights @layer def reshape_layer(self, input, d,name): input_shape = tf.shape(input) if name == 'rpn_cls_prob_reshape': #还原回rpn_cls_score的信息位置格式 return tf.transpose(tf.reshape(tf.transpose(input,[0,3,1,2]),[input_shape[0], int(d),tf.cast(tf.cast(input_shape[1],tf.float32)/tf.cast(d,tf.float32)*tf.cast(input_shape[3],tf.float32),tf.int32),input_shape[2]]),[0,2,3,1],name=name) else: # 假设rpn_cls_score.shape为[1,n,n,18],最后reshape成[1,9n,n,2] #假如rpn_cls_score.shape为[1,3,3,18],元素内容为range(3*3*18),最后得到的形状为[0,81],[1,82],[2,83]..意思为前81个元素(3*3*9)为bg,后81个元素对应fg,0与36对应着该featuremap的位置i,对应原图的可视野为前景或者背景的概率 # 当然需要再一步softmax才能给出该可视野为fg与bg的概率 return tf.transpose(tf.reshape(tf.transpose(input,[0,3,1,2]),[input_shape[0], int(d),tf.cast(tf.cast(input_shape[1],tf.float32)*(tf.cast(input_shape[3],tf.float32)/tf.cast(d,tf.float32)),tf.int32),input_shape[2]]),[0,2,3,1],name=name) @layer def feature_extrapolating(self, input, scales_base, num_scale_base, num_per_octave, name): return feature_extrapolating_op.feature_extrapolating(input, scales_base, num_scale_base, num_per_octave, name=name) @layer def lrn(self, input, radius, alpha, beta, name, bias=1.0): return tf.nn.local_response_normalization(input, depth_radius=radius, alpha=alpha, beta=beta, bias=bias, name=name) @layer def concat(self, inputs, axis, name): return tf.concat(concat_dim=axis, values=inputs, name=name) @layer def fc(self, input, num_out, name, relu=True, trainable=True): with tf.variable_scope(name) as scope: # only use the first input if isinstance(input, tuple): input = input[0] input_shape = input.get_shape() if input_shape.ndims == 4: dim = 1 for d in input_shape[1:].as_list(): dim *= d feed_in = tf.reshape(tf.transpose(input,[0,3,1,2]), [-1, dim]) else: feed_in, dim = (input, int(input_shape[-1])) if name == 'bbox_pred': init_weights = tf.truncated_normal_initializer(0.0, stddev=0.001) init_biases = tf.constant_initializer(0.0) else: init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) init_biases = tf.constant_initializer(0.0) weights = self.make_var('weights', [dim, num_out], init_weights, trainable) biases = self.make_var('biases', [num_out], init_biases, trainable) op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b fc = op(feed_in, weights, biases, name=scope.name) return fc @layer #多项逻辑斯特回归 def softmax(self, input, name): input_shape = tf.shape(input) if name == 'rpn_cls_prob': #就是对原始数据进行softmax激活 return tf.reshape(tf.nn.softmax(tf.reshape(input,[-1,input_shape[3]])),[-1,input_shape[1],input_shape[2],input_shape[3]],name=name) else: return tf.nn.softmax(input,name=name) @layer #dropout防止过拟合 def dropout(self, input, keep_prob, name): return tf.nn.dropout(input, keep_prob, name=name)
阅读全文
0 0
- Faster-RCNN_TF代码解读8:networks.py
- Faster-RCNN_TF代码解读5:networks/factory.py
- Faster-RCNN_TF代码解读3:train.py
- Faster-RCNN_TF代码解读4:config.py
- Faster-RCNN_TF代码解读6:pascal_voc.py
- Faster-RCNN_TF代码解读7:VGGnet_train.py
- Faster-RCNN_TF代码解读9:proposal_target_layer_tf.py
- Faster-RCNN_TF代码解读10:proposal_layer_tf.py
- Faster-RCNN_TF代码解读11:imdb.py
- Faster-RCNN_TF代码解读12:bbox_transform.py
- Faster-RCNN_TF代码解读13:nms_wrapper.py
- Faster-RCNN_TF代码解读17:anchor_target_layer_tf.py
- Faster-RCNN_TF代码解读18:generate_anchors.py
- Faster-RCNN_TF代码解读20:blob.py
- Faster-RCNN_TF代码解读1:train-net.py
- Faster-RCNN_TF代码解读2:datasets/factory.py
- Faster-RCNN_TF代码解读14:roi_data_layer/layer.py
- Faster-RCNN_TF代码解读15:roi_data_layer/minibatch.py
- java的反射机制
- Android---保存数据工具类
- Spring Cloud Bus
- ArrayBlockingQueue源码讲解
- android对话框外全部透明样式
- Faster-RCNN_TF代码解读8:networks.py
- webpack踩坑之路——图片的路径与打包
- 贝叶斯相关(整理)
- hdu 6208 The Dominator of Strings
- 深入java 使用正则表达式
- STL中的sort函数
- Java中的关键字_动力节点Java学院整理
- SpringMVC(二) Ajax 的使用
- 如何加密/混乱C源代码