Faster-RCNN_TF代码解读8:networks.py

来源:互联网 发布:菜鸟网络会员干什么的 编辑:程序博客网 时间:2024/04/28 03:43
import numpy as npimport tensorflow as tfimport roi_pooling_layer.roi_pooling_op as roi_pool_opimport roi_pooling_layer.roi_pooling_op_gradfrom rpn_msr.proposal_layer_tf import proposal_layer as proposal_layer_pyfrom rpn_msr.anchor_target_layer_tf import anchor_target_layer as anchor_target_layer_pyfrom rpn_msr.proposal_target_layer_tf import proposal_target_layer as proposal_target_layer_pyDEFAULT_PADDING = 'SAME'def layer(op):    def layer_decorated(self, *args, **kwargs):        # Automatically set a name if not provided.        #op.__name__的是各个操作函数名,如conv、max_pool        #get_unique_name返回类似与conv_4,以name:'conv_4'存在kwargs字典        name = kwargs.setdefault('name', self.get_unique_name(op.__name__))        # Figure out the layer inputs.        if len(self.inputs)==0:            raise RuntimeError('No input variables found for layer %s.'%name)        #此情况说明刚有输入层,即取输入数据即可        elif len(self.inputs)==1:            layer_input = self.inputs[0]        else:            layer_input = list(self.inputs)        # Perform the operation and get the output.        #开始做卷积,做pool操作!!!!正式开始做操作的是这里,而不是函数定义,会发现下面函数定义中与所给参数个数不符合,原因在于input没给定        layer_output = op(self, layer_input, *args, **kwargs)        # Add to layer LUT.        #在self.layer中添加该name操作信息        self.layers[name] = layer_output        # This output is now the input for the next layer.        #将该output添加到inputs中        self.feed(layer_output)        # Return self for chained calls.        return self    return layer_decoratedclass Network(object):    def __init__(self, inputs, trainable=True):        self.inputs = []        self.layers = dict(inputs)        self.trainable = trainable        self.setup()    def setup(self):        raise NotImplementedError('Must be subclassed.')    def load(self, data_path, session, saver, ignore_missing=False):        if data_path.endswith('.ckpt'):            saver.restore(session, data_path)        else:            data_dict = np.load(data_path).item()            for key in data_dict:                with tf.variable_scope(key, reuse=True):                    for subkey in data_dict[key]:                        try:                            var = tf.get_variable(subkey)                            session.run(var.assign(data_dict[key][subkey]))                            print "assign pretrain model "+subkey+ " to "+key                        except ValueError:                            print "ignore "+key                            if not ignore_missing:                                raise    #*args中存的是多余的变量,且无标签,存在tuple中,如果有标签,则需要将函数改为feed(self, *args,**kwargs):    #**kwargs为一个dict    #layers为一个dict,inputs为一个list    def feed(self, *args):        #如果没给参数,就raise一个error        assert len(args)!=0        self.inputs = []        for layer in args:            #先判断如果给定参数是一个str            if isinstance(layer, basestring):                #self.layers在VGGnet_train 重载,为一个有值的dict                try:                    #将layer改为真实的variable,虽然目前还只是数据流图的一部分,还没有真正的开始运作                    layer = self.layers[layer]                    print layer                except KeyError:                    print self.layers.keys()                    raise KeyError('Unknown layer name fed: %s'%layer)            #将取出的layer数据存入input列表            self.inputs.append(layer)        return self    def get_output(self, layer):        try:            #self.layers在VGGnet_train.py中重载了,为一个dict,记录的是每一层的输出            layer = self.layers[layer]        except KeyError:            print self.layers.keys()            raise KeyError('Unknown layer name fed: %s'%layer)        return layer    #得到唯一的名字,prefix传回来的是conv、max_pool..    #self.layers为一个dict,item将其转换为可迭代形式    def get_unique_name(self, prefix):        # startswith() 方法用于检查字符串是否是以指定子字符串开头,返回true与false        #即查看有没有conv开头的key,记录有的个数(true),相加再加1为id        id = sum(t.startswith(prefix) for t,_ in self.layers.items())+1        #返回的就是类似与conv_4        return '%s_%d'%(prefix, id)    #此函数就是在tensorflow格式下建立变量    def make_var(self, name, shape, initializer=None, trainable=True):        return tf.get_variable(name, shape, initializer=initializer, trainable=trainable)    #判断padding类型是否符合要求    def validate_padding(self, padding):        assert padding in ('SAME', 'VALID')    @layer    #就因为上面的属性函数,是的真正的conv操作没有在这里进行,而是在上面的layer函数中进行    def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1, trainable=True):        #判断padding是否为same与valid的一种        self.validate_padding(padding)        #shape最后一位为深度        #input形状为[batch, in_height, in_width, in_channels]        #c_i.c_o分别为输入激活图层的深度,与输入激活图层深度,即卷积核个数        c_i = input.get_shape()[-1]        assert c_i%group==0        assert c_o%group==0        ##conv2d中stride[]第一位与最后一位都必须为1,第一位表示在batch上的位移,第四位表示在深度方向上的位移,i与k目前没找到定义,应该为input与卷积核        #目前问题解决,lambda相当与一个def,只是定义函数        ##1.将参数filter变为一个二维矩阵,形状为:[filter_height*filter_width*in_channels,output_channels]        #2.将输入(input)转化为一个具有如下形状的Tensor,形状为:[batch,out_height,out_width,filter_height * filter_width * in_channels]        #3.操作sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *filter[di, dj, q, k]        convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)        with tf.variable_scope(name) as scope:            #采取截断是正态初始化权重,这只是一种initializer方法,mean=0,stddev=0.01            init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01)            #这也只是定义initializer的方法,初始化为0            init_biases = tf.constant_initializer(0.0)            #make_var就是用get_variable来建立变量            #weight.shape[高,宽,深度,多少个]            kernel = self.make_var('weights', [k_h, k_w, c_i/group, c_o], init_weights, trainable)            biases = self.make_var('biases', [c_o], init_biases, trainable)            if group==1:                conv = convolve(input, kernel)            else:                #如果group不为0,将input与kernel第4个维度,即深度信息平分为group组                input_groups = tf.split(3, group, input)                kernel_groups = tf.split(3, group, kernel)                output_groups = [convolve(i, k) for i,k in zip(input_groups, kernel_groups)]                #分开的group组合起来                conv = tf.concat(3, output_groups)            if relu:                bias = tf.nn.bias_add(conv, biases)                return tf.nn.relu(bias, name=scope.name)            return tf.nn.bias_add(conv, biases, name=scope.name)    @layer    def relu(self, input, name):        return tf.nn.relu(input, name=name)    @layer    def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):        self.validate_padding(padding)        return tf.nn.max_pool(input,                              ksize=[1, k_h, k_w, 1],                              strides=[1, s_h, s_w, 1],                              padding=padding,                              name=name)    @layer    def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):        self.validate_padding(padding)        return tf.nn.avg_pool(input,                              ksize=[1, k_h, k_w, 1],                              strides=[1, s_h, s_w, 1],                              padding=padding,                              name=name)    @layer    def roi_pool(self, input, pooled_height, pooled_width, spatial_scale, name):        # only use the first input        if isinstance(input[0], tuple):            input[0] = input[0][0]        if isinstance(input[1], tuple):            input[1] = input[1][0]        print input        return roi_pool_op.roi_pool(input[0], input[1],                                    pooled_height,                                    pooled_width,                                    spatial_scale,                                    name=name)[0]    @layer    def proposal_layer(self, input, _feat_stride, anchor_scales, cfg_key, name):        #cfg_key为TRAIN        if isinstance(input[0], tuple):            input[0] = input[0][0]        #就是返回blob,内容为[proposal引索(全0),proposal],shape(proposals.shape[0],5),引索(全0)占一列,proposal占4列        return tf.reshape(tf.py_func(proposal_layer_py,[input[0],input[1],input[2], cfg_key, _feat_stride, anchor_scales], [tf.float32]),[-1,5],name =name)    @layer    def anchor_target_layer(self, input, _feat_stride, anchor_scales, name):        #input为'rpn_cls_score','gt_boxes','im_info','data'信息组成的一个列表,input[0]为rpn_cls_score信息        if isinstance(input[0], tuple):            #input[0]就是'rpn_cls_score'的输出            input[0] = input[0][0]        with tf.variable_scope(name) as scope:            #tf.py_func将任意的python函数func转变为TensorFlow op,格式tf.py_func(func, inp, Tout, stateful=True, name=None)            #func为python函数,inp为输入(ndarray),Tout为自定义输出格式,下面的输入输出分别为[input[0],input[1],input[2],input[3], _feat_stride, anchor_scales],[tf.float32,tf.float32,tf.float32,tf.float32]            #rpn_label中存的是所有anchor的label(-1,0,1),rpn_bbox_targets是所有anchor的4回归值,对于标签为-1的anchor,4个回归值全是0,            #rpn_bbox_inside_weights,rpn_bbox_outside_weights是两个权重,初始化方式不一样            #要将tf.float32类型转换为tf.Tensor类型            #tf.convert_to_tensor函数,以确保我们处理张量而不是其他类型            rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights = tf.py_func(anchor_target_layer_py,[input[0],input[1],input[2],input[3], _feat_stride, anchor_scales],[tf.float32,tf.float32,tf.float32,tf.float32])            rpn_labels = tf.convert_to_tensor(tf.cast(rpn_labels,tf.int32), name = 'rpn_labels')            rpn_bbox_targets = tf.convert_to_tensor(rpn_bbox_targets, name = 'rpn_bbox_targets')            rpn_bbox_inside_weights = tf.convert_to_tensor(rpn_bbox_inside_weights , name = 'rpn_bbox_inside_weights')            rpn_bbox_outside_weights = tf.convert_to_tensor(rpn_bbox_outside_weights , name = 'rpn_bbox_outside_weights')            return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights    @layer    def proposal_target_layer(self, input, classes, name):        if isinstance(input[0], tuple):            input[0] = input[0][0]        with tf.variable_scope(name) as scope:            #产生筛选后的roi,对应labels,三个(len(rois),4*21)大小的矩阵,其中一个对fg-roi对应引索行的对应类别的4个位置填上(dx,dy,dw,dh),另两个对fg-roi对应引索行的对应类别的4个位置填上(1,1,1,1)            rois,labels,bbox_targets,bbox_inside_weights,bbox_outside_weights = tf.py_func(proposal_target_layer_py,[input[0],input[1],classes],[tf.float32,tf.float32,tf.float32,tf.float32,tf.float32])            rois = tf.reshape(rois,[-1,5] , name = 'rois')            # 要将tf.float32类型转换为tf.Tensor类型            # tf.convert_to_tensor函数,以确保我们处理张量而不是其他类型            labels = tf.convert_to_tensor(tf.cast(labels,tf.int32), name = 'labels')            bbox_targets = tf.convert_to_tensor(bbox_targets, name = 'bbox_targets')            bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name = 'bbox_inside_weights')            bbox_outside_weights = tf.convert_to_tensor(bbox_outside_weights, name = 'bbox_outside_weights')            return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights    @layer    def reshape_layer(self, input, d,name):        input_shape = tf.shape(input)        if name == 'rpn_cls_prob_reshape':            #还原回rpn_cls_score的信息位置格式             return tf.transpose(tf.reshape(tf.transpose(input,[0,3,1,2]),[input_shape[0],                    int(d),tf.cast(tf.cast(input_shape[1],tf.float32)/tf.cast(d,tf.float32)*tf.cast(input_shape[3],tf.float32),tf.int32),input_shape[2]]),[0,2,3,1],name=name)        else:            # 假设rpn_cls_score.shape为[1,n,n,18],最后reshape成[1,9n,n,2]            #假如rpn_cls_score.shape为[1,3,3,18],元素内容为range(3*3*18),最后得到的形状为[0,81],[1,82],[2,83]..意思为前81个元素(3*3*9)为bg,后81个元素对应fg,0与36对应着该featuremap的位置i,对应原图的可视野为前景或者背景的概率            # 当然需要再一步softmax才能给出该可视野为fg与bg的概率             return tf.transpose(tf.reshape(tf.transpose(input,[0,3,1,2]),[input_shape[0],                    int(d),tf.cast(tf.cast(input_shape[1],tf.float32)*(tf.cast(input_shape[3],tf.float32)/tf.cast(d,tf.float32)),tf.int32),input_shape[2]]),[0,2,3,1],name=name)    @layer    def feature_extrapolating(self, input, scales_base, num_scale_base, num_per_octave, name):        return feature_extrapolating_op.feature_extrapolating(input,                              scales_base,                              num_scale_base,                              num_per_octave,                              name=name)    @layer    def lrn(self, input, radius, alpha, beta, name, bias=1.0):        return tf.nn.local_response_normalization(input,                                                  depth_radius=radius,                                                  alpha=alpha,                                                  beta=beta,                                                  bias=bias,                                                  name=name)    @layer    def concat(self, inputs, axis, name):        return tf.concat(concat_dim=axis, values=inputs, name=name)    @layer    def fc(self, input, num_out, name, relu=True, trainable=True):        with tf.variable_scope(name) as scope:            # only use the first input            if isinstance(input, tuple):                input = input[0]            input_shape = input.get_shape()            if input_shape.ndims == 4:                dim = 1                for d in input_shape[1:].as_list():                    dim *= d                feed_in = tf.reshape(tf.transpose(input,[0,3,1,2]), [-1, dim])            else:                feed_in, dim = (input, int(input_shape[-1]))            if name == 'bbox_pred':                init_weights = tf.truncated_normal_initializer(0.0, stddev=0.001)                init_biases = tf.constant_initializer(0.0)            else:                init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01)                init_biases = tf.constant_initializer(0.0)            weights = self.make_var('weights', [dim, num_out], init_weights, trainable)            biases = self.make_var('biases', [num_out], init_biases, trainable)            op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b            fc = op(feed_in, weights, biases, name=scope.name)            return fc    @layer    #多项逻辑斯特回归    def softmax(self, input, name):        input_shape = tf.shape(input)        if name == 'rpn_cls_prob':            #就是对原始数据进行softmax激活            return tf.reshape(tf.nn.softmax(tf.reshape(input,[-1,input_shape[3]])),[-1,input_shape[1],input_shape[2],input_shape[3]],name=name)        else:            return tf.nn.softmax(input,name=name)    @layer    #dropout防止过拟合    def dropout(self, input, keep_prob, name):        return tf.nn.dropout(input, keep_prob, name=name)
原创粉丝点击