theano-xnor-net代码注释8 xnornet_layers.py

来源：互联网发布：python 99乘法表思路编辑：程序博客网时间：2024/06/08 15:41
""" Class and method definition for the layers in XNOR-Net"""import theanoimport theano.tensor.nnetimport numpy as npimport lasagneimport theano.tensor as Timport timefrom fxp_helper import to_fixed_point_theanodef SignTheano(x):    #T.ge(x,0)判断x>=0是否成立，成立返回1,不成立返回0    return T.cast(2.*T.ge(x,0)-1., theano.config.floatX)def SignNumpy(x):    #np.greater_equal(x,0)判断x>=0是否成立，成立返回1,不成立返回0    return np.float32(2.*np.greater_equal(x,0)-1.)#binarize_conv_input函数用来二值化输入，conv_input为输入，K为1/ndef binarize_conv_input(conv_input, k):    #H=sign（X）    bin_conv_out = SignTheano(conv_input)    #A就是把激活图层的先计算绝对值然后深度方向相加成一个二维张量，再除以c，此时没有除以深度c，而是在计算K值时多乘一个1/c    # scaling factor for the activation.    #此时不是一个(1*n*n)的张量，与论文有冲突，但是后面与k做卷积得到K，效果相同    A =T.abs_(conv_input)    # K will have scaling matrixces for each input in the batch.    # K's shape = (batch_size, 1, map_height, map_width)    k_shape = k.eval().shape    pad = (k_shape[-2]/2, k_shape[-1]/2)    #填充方式为(int1, int2)类型，为手动添加pad的行数与列数，由于原激活图层都是奇数行列，取行数列数-1的1/2做填充，此时没有-1是由于除法自动省略0.5，    #卷积出来的激活图谱与原激活图谱行数列数相同    #除此之外还有vlib（不填充），full（行-1,列-1）填充方式    K = theano.tensor.nnet.conv2d(A, k, border_mode=pad)    return bin_conv_out, Kdef binarize_fc_input(fc_input):    bin_out = SignTheano(fc_input)    if(fc_input.ndim == 4):  # prev layer is conv or pooling. hence compute the l1 norm using all maps        #对于后三个fc全连接层，第一个fc层的输入还是一个4维张量（feature_map），其中第0维为batch_size，所以对fc层输入进行二值化就是将1,2,3维度的数值绝对值求平均        beta = T.mean(T.abs_(fc_input), axis=[1, 2, 3])    else: # feeding layer is FC layer        #输入不是4维的，此时W是个2维张量，0维为batch_size，所以对1维度绝对值求平均即为尺度因子β        beta = T.mean(T.abs_(fc_input), axis=1)    return bin_out, betaclass Conv2DLayer(lasagne.layers.Conv2DLayer):    """ Binary convolution layer which performs convolution using XNOR and popcount operations.    This is followed by the scaling with input and weight scaling factors K and alpha respectively.    """    def __init__(self, incoming, num_filters, filter_size, format='float', data_bits=15, int_bits=0, **kwargs):        """        Parameters        -----------        incoming : layer or tuple            Ipnut layer to this layer. If this is fed by a data layer then this is a tuple representing input dimensions.        num_filters: int            Number of 3D filters present in this layer = No of feature maps generated by this layer        filter_size: tuple            Filter size of this layer. Leading dimension is = no of input feature maps.        """        #incoming是一个层或者一个数组，输入一个层，如果输入的是一个层数据，则用一个数组来表示其形状，例如（100，5,5）        #int类型的num_filter，表示卷积核的个数，即也表示下一层激活图层的深度        #数组类型的filter_size,表示卷积核的规格        # average filter to compute scaling factor for activation        #shape为这一层卷积核参数与上一层卷积核参数，格式为（卷积核个数，卷积核深度，卷积核行数，卷积核列数）,incoming.output_shape[1]为上一层生成激活图层的深度        no_inputs = incoming.output_shape[1]        shape = (num_filters, no_inputs, filter_size[0], filter_size[1])        #继承Conv2DLayer,调用Conv2DLayer构造函数进行参数初始化        super(Conv2DLayer, self).__init__(incoming, num_filters, filter_size, **kwargs)        # average filter to compute the activation scaling factor        #beta_filter为一个shape大小的张量，数值为k×1/c，k=1/w*h，c，w，h分别对应卷积核的深度，宽度，高度，在后面调用binarize_conv_input函数时，        # 这个值直接作为了k，其实为k×1/c        beta_filter = np.ones(shape=shape).astype(np.float32) / (no_inputs*filter_size[0]*filter_size[1])        #将beta_filter添加进self参数        self.beta_filter = self.add_param(beta_filter, shape, name='beta_filter', trainable=False, regularizable=False)        # binary weight scaling factor.        #xalpha为建立的shape大小的张量，数值全为0.1        xalpha = lasagne.init.Constant(0.1)        #将xalpha添加进self参数        self.xalpha = self.add_param(xalpha, [num_filters,], name='xalpha', trainable=False, regularizable=False)        # params for fixed point simulation        #将format、data_bits、int_bits依次存为self参数，其中data_bits、int_bits为共享变量        #定义共享变量的原因在于GPU的使用，如果不定义共享的话，那么当GPU调用这些变量时，遇到一次就要调用一次，这样就会花费大量时间在数据存取上，导致使用GPU代码运行很慢        self.format = format        self.data_bits = theano.shared(data_bits)        self.int_bits = theano.shared(int_bits)    def convolve(self, input, deterministic=False, **kwargs):        """ Binary convolution. Both inputs and weights are binary (+1 or -1)        This overrides convolve operation from Conv2DLayer implementation        """        #重载Conv2DLayer类中的convolve函数，用来计算xnor卷积        if(self.format == 'fixed'):            # reduce precision of input as this influences computation of beta            #降低输入精度，这对计算β有影响            #to_fixed_point_theano函数作用为将卷积核先乘一个较大数，得到的float张量取最近整数，然后设定一个范围[-c,c]大于c与小于-c的数被置换为c与-c            fxp_input = to_fixed_point_theano(input, self.data_bits, self.int_bits)            # compute the binary inputs H and the scaling matrix K            #计算二值化输入参数H矩阵与K张量，调用的是binarize_conv_input函数            bin_input, K = binarize_conv_input(fxp_input, self.beta_filter)            # self.W must be already binarized during model init. Here we just call the parents conv method            #之前操作已经把权重与输入二值化了，所以接下来只需要调用普通卷积            feat_maps = super(Conv2DLayer, self).convolve(bin_input, **kwargs)            #对应点乘K后对应点乘xalpha，（其中将xalpha展成（1,shape，1,1）格式，对应当前激活图层操作）            feat_maps = feat_maps * K            feat_maps = feat_maps * self.xalpha.dimshuffle('x', 0, 'x', 'x')            feat_maps = to_fixed_point_theano(feat_maps, self.data_bits, self.int_bits)        else:#format为float类型时，操作过程与fixed类似，只是少了to_fixed_point_theano操作            # compute the binary inputs H and the scaling matrix K            input, K = binarize_conv_input(input, self.beta_filter)            # self.W must be already binarized during model init. Here we just call the parents conv method            feat_maps = super(Conv2DLayer, self).convolve(input, **kwargs)            # scale by K and alpha            # FIXME: Actually we are scaling after adding bias here. Need to scale first and then add bias.            # The super class method automatically adds bias. Somehow need to overcome this..            # may subtract the bias, scale by alpha and beta ans then add bias ?            feat_maps = feat_maps * K            feat_maps = feat_maps * self.xalpha.dimshuffle('x', 0, 'x', 'x')        return feat_mapsclass DenseLayer(lasagne.layers.DenseLayer):    """Binary version of fully connected layer. XNOR and bitcount ops are used for     this in a similar fashion as that of Conv Layer.    """    #计算二值化后的全链接层操作，与卷积层操作类似    def __init__(self, incoming, num_units, format='float', data_bits=15, int_bits=0, **kwargs):        """ XNOR-Net fully connected layer        """        num_inputs = int(np.prod(incoming.output_shape[1:]))        super(DenseLayer, self).__init__(incoming, num_units,  **kwargs)        # xnor-net dense layer requires one more new parameter for weight scaling.        xalpha = np.zeros(shape=(num_units,), dtype=np.float32)        self.xalpha = self.add_param(xalpha, xalpha.shape, name='xalpha', trainable=False, regularizable=False)        # params for fixed point simulation        self.format = format        self.data_bits = theano.shared(data_bits)        self.int_bits = theano.shared(int_bits)    def get_output_for(self, input, deterministic=True, **kwargs):        """ Binary dense layer dot product computation        """        if(self.format == 'fixed'):            # reduce precision of input as this influences computation of beta            fxp_input = to_fixed_point_theano(input, self.data_bits, self.int_bits)            bin_input, beta = binarize_fc_input(fxp_input)            # beta will have same range as input because it is just L1 norm of input            # dot-product at full precision            fc_out = super(DenseLayer, self).get_output_for(bin_input, **kwargs)            fc_out = fc_out * beta.dimshuffle(0, 'x')            fc_out = fc_out * self.xalpha.dimshuffle('x', 0)            # reduce the precision of the output based on the data bit widths specified/            fc_out = to_fixed_point_theano(fc_out, self.data_bits, self.int_bits)        else:            # binarize the input            bin_input, beta = binarize_fc_input(input)            fc_out = super(DenseLayer, self).get_output_for(bin_input, **kwargs)            # scale the output by alpha and beta            # FIXME: Actually we are scaling after adding bias here. Need to scale first and then add bias.            # The super class method automatically adds bias. Somehow need to overcome this..            # may subtract the bias, scale by alpha and beta ans then add bias ?            fc_out = fc_out * beta.dimshuffle(0, 'x')            fc_out = fc_out * self.xalpha.dimshuffle('x', 0)        return fc_out
阅读全文
0 0