theano-xnor-net代码注释8 xnornet_layers.py

来源:互联网 发布:python 99乘法表思路 编辑:程序博客网 时间:2024/06/08 15:41
""" Class and method definition for the layers in XNOR-Net"""import theanoimport theano.tensor.nnetimport numpy as npimport lasagneimport theano.tensor as Timport timefrom fxp_helper import to_fixed_point_theanodef SignTheano(x):    #T.ge(x,0)判断x>=0是否成立,成立返回1,不成立返回0    return T.cast(2.*T.ge(x,0)-1., theano.config.floatX)def SignNumpy(x):    #np.greater_equal(x,0)判断x>=0是否成立,成立返回1,不成立返回0    return np.float32(2.*np.greater_equal(x,0)-1.)#binarize_conv_input函数用来二值化输入,conv_input为输入,K为1/ndef binarize_conv_input(conv_input, k):    #H=sign(X)    bin_conv_out = SignTheano(conv_input)    #A就是把激活图层的先计算绝对值然后深度方向相加成一个二维张量,再除以c,此时没有除以深度c,而是在计算K值时多乘一个1/c    # scaling factor for the activation.    #此时不是一个(1*n*n)的张量,与论文有冲突,但是后面与k做卷积得到K,效果相同    A =T.abs_(conv_input)    # K will have scaling matrixces for each input in the batch.    # K's shape = (batch_size, 1, map_height, map_width)    k_shape = k.eval().shape    pad = (k_shape[-2]/2, k_shape[-1]/2)    #填充方式为(int1, int2)类型,为手动添加pad的行数与列数,由于原激活图层都是奇数行列,取行数列数-1的1/2做填充,此时没有-1是由于除法自动省略0.5,    #卷积出来的激活图谱与原激活图谱行数列数相同    #除此之外还有vlib(不填充),full(行-1,列-1)填充方式    K = theano.tensor.nnet.conv2d(A, k, border_mode=pad)    return bin_conv_out, Kdef binarize_fc_input(fc_input):    bin_out = SignTheano(fc_input)    if(fc_input.ndim == 4):  # prev layer is conv or pooling. hence compute the l1 norm using all maps        #对于后三个fc全连接层,第一个fc层的输入还是一个4维张量(feature_map),其中第0维为batch_size,所以对fc层输入进行二值化就是将1,2,3维度的数值绝对值求平均        beta = T.mean(T.abs_(fc_input), axis=[1, 2, 3])    else: # feeding layer is FC layer        #输入不是4维的,此时W是个2维张量,0维为batch_size,所以对1维度绝对值求平均即为尺度因子β        beta = T.mean(T.abs_(fc_input), axis=1)    return bin_out, betaclass Conv2DLayer(lasagne.layers.Conv2DLayer):    """ Binary convolution layer which performs convolution using XNOR and popcount operations.    This is followed by the scaling with input and weight scaling factors K and alpha respectively.    """    def __init__(self, incoming, num_filters, filter_size, format='float', data_bits=15, int_bits=0, **kwargs):        """        Parameters        -----------        incoming : layer or tuple            Ipnut layer to this layer. If this is fed by a data layer then this is a tuple representing input dimensions.        num_filters: int            Number of 3D filters present in this layer = No of feature maps generated by this layer        filter_size: tuple            Filter size of this layer. Leading dimension is = no of input feature maps.        """        #incoming是一个层或者一个数组,输入一个层,如果输入的是一个层数据,则用一个数组来表示其形状,例如(100,5,5)        #int类型的num_filter,表示卷积核的个数,即也表示下一层激活图层的深度        #数组类型的filter_size,表示卷积核的规格        # average filter to compute scaling factor for activation        #shape为这一层卷积核参数与上一层卷积核参数,格式为(卷积核个数,卷积核深度,卷积核行数,卷积核列数),incoming.output_shape[1]为上一层生成激活图层的深度        no_inputs = incoming.output_shape[1]        shape = (num_filters, no_inputs, filter_size[0], filter_size[1])        #继承Conv2DLayer,调用Conv2DLayer构造函数进行参数初始化        super(Conv2DLayer, self).__init__(incoming, num_filters, filter_size, **kwargs)        # average filter to compute the activation scaling factor        #beta_filter为一个shape大小的张量,数值为k×1/c,k=1/w*h,c,w,h分别对应卷积核的深度,宽度,高度,在后面调用binarize_conv_input函数时,        # 这个值直接作为了k,其实为k×1/c        beta_filter = np.ones(shape=shape).astype(np.float32) / (no_inputs*filter_size[0]*filter_size[1])        #将beta_filter添加进self参数        self.beta_filter = self.add_param(beta_filter, shape, name='beta_filter', trainable=False, regularizable=False)        # binary weight scaling factor.        #xalpha为建立的shape大小的张量,数值全为0.1        xalpha = lasagne.init.Constant(0.1)        #将xalpha添加进self参数        self.xalpha = self.add_param(xalpha, [num_filters,], name='xalpha', trainable=False, regularizable=False)        # params for fixed point simulation        #将format、data_bits、int_bits依次存为self参数,其中data_bits、int_bits为共享变量        #定义共享变量的原因在于GPU的使用,如果不定义共享的话,那么当GPU调用这些变量时,遇到一次就要调用一次,这样就会花费大量时间在数据存取上,导致使用GPU代码运行很慢        self.format = format        self.data_bits = theano.shared(data_bits)        self.int_bits = theano.shared(int_bits)    def convolve(self, input, deterministic=False, **kwargs):        """ Binary convolution. Both inputs and weights are binary (+1 or -1)        This overrides convolve operation from Conv2DLayer implementation        """        #重载Conv2DLayer类中的convolve函数,用来计算xnor卷积        if(self.format == 'fixed'):            # reduce precision of input as this influences computation of beta            #降低输入精度,这对计算β有影响            #to_fixed_point_theano函数作用为将卷积核先乘一个较大数,得到的float张量取最近整数,然后设定一个范围[-c,c]大于c与小于-c的数被置换为c与-c            fxp_input = to_fixed_point_theano(input, self.data_bits, self.int_bits)            # compute the binary inputs H and the scaling matrix K            #计算二值化输入参数H矩阵与K张量,调用的是binarize_conv_input函数            bin_input, K = binarize_conv_input(fxp_input, self.beta_filter)            # self.W must be already binarized during model init. Here we just call the parents conv method            #之前操作已经把权重与输入二值化了,所以接下来只需要调用普通卷积            feat_maps = super(Conv2DLayer, self).convolve(bin_input, **kwargs)            #对应点乘K后对应点乘xalpha,(其中将xalpha展成(1,shape,1,1)格式,对应当前激活图层操作)            feat_maps = feat_maps * K            feat_maps = feat_maps * self.xalpha.dimshuffle('x', 0, 'x', 'x')            feat_maps = to_fixed_point_theano(feat_maps, self.data_bits, self.int_bits)        else:#format为float类型时,操作过程与fixed类似,只是少了to_fixed_point_theano操作            # compute the binary inputs H and the scaling matrix K            input, K = binarize_conv_input(input, self.beta_filter)            # self.W must be already binarized during model init. Here we just call the parents conv method            feat_maps = super(Conv2DLayer, self).convolve(input, **kwargs)            # scale by K and alpha            # FIXME: Actually we are scaling after adding bias here. Need to scale first and then add bias.            # The super class method automatically adds bias. Somehow need to overcome this..            # may subtract the bias, scale by alpha and beta ans then add bias ?            feat_maps = feat_maps * K            feat_maps = feat_maps * self.xalpha.dimshuffle('x', 0, 'x', 'x')        return feat_mapsclass DenseLayer(lasagne.layers.DenseLayer):    """Binary version of fully connected layer. XNOR and bitcount ops are used for     this in a similar fashion as that of Conv Layer.    """    #计算二值化后的全链接层操作,与卷积层操作类似    def __init__(self, incoming, num_units, format='float', data_bits=15, int_bits=0, **kwargs):        """ XNOR-Net fully connected layer        """        num_inputs = int(np.prod(incoming.output_shape[1:]))        super(DenseLayer, self).__init__(incoming, num_units,  **kwargs)        # xnor-net dense layer requires one more new parameter for weight scaling.        xalpha = np.zeros(shape=(num_units,), dtype=np.float32)        self.xalpha = self.add_param(xalpha, xalpha.shape, name='xalpha', trainable=False, regularizable=False)        # params for fixed point simulation        self.format = format        self.data_bits = theano.shared(data_bits)        self.int_bits = theano.shared(int_bits)    def get_output_for(self, input, deterministic=True, **kwargs):        """ Binary dense layer dot product computation        """        if(self.format == 'fixed'):            # reduce precision of input as this influences computation of beta            fxp_input = to_fixed_point_theano(input, self.data_bits, self.int_bits)            bin_input, beta = binarize_fc_input(fxp_input)            # beta will have same range as input because it is just L1 norm of input            # dot-product at full precision            fc_out = super(DenseLayer, self).get_output_for(bin_input, **kwargs)            fc_out = fc_out * beta.dimshuffle(0, 'x')            fc_out = fc_out * self.xalpha.dimshuffle('x', 0)            # reduce the precision of the output based on the data bit widths specified/            fc_out = to_fixed_point_theano(fc_out, self.data_bits, self.int_bits)        else:            # binarize the input            bin_input, beta = binarize_fc_input(input)            fc_out = super(DenseLayer, self).get_output_for(bin_input, **kwargs)            # scale the output by alpha and beta            # FIXME: Actually we are scaling after adding bias here. Need to scale first and then add bias.            # The super class method automatically adds bias. Somehow need to overcome this..            # may subtract the bias, scale by alpha and beta ans then add bias ?            fc_out = fc_out * beta.dimshuffle(0, 'x')            fc_out = fc_out * self.xalpha.dimshuffle('x', 0)        return fc_out
原创粉丝点击