深度学习小白——CS231n Assignment2（CNN）

来源：互联网发布：naca64翼型数据编辑：程序博客网时间：2024/06/11 22:08

二、卷积神经网络

【补1】np.pad(array, pad_width, mode, **kwargs)

对矩阵进行填充，此处用0将图像矩阵进行填充，所用代码为

x_pad[m][n]=np.pad(x[m][n], pad, 'constant', constant_values=0)

当pad为单独一个数时，意味着在原数组上下左右角上填充pad*pad大小的数组，其余位置自动补齐

example:

a=np.ones((2,3))
>>> a
array([[ 1., 1., 1.],
       [ 1., 1., 1.]])
>>> np.pad(a,2,'constant',constant_values=0)
array([[ 0., 0., 0., 0., 0., 0., 0.],
       [ 0., 0., 0., 0., 0., 0., 0.],
       [ 0., 0., 1., 1., 1., 0., 0.],
       [ 0., 0., 1., 1., 1., 0., 0.],
       [ 0., 0., 0., 0., 0., 0., 0.],
       [ 0., 0., 0., 0., 0., 0., 0.]])

【补2】np.where(condition, [x, y])用于寻找满足特定条件的数组的索引

example:

>>> x=np.arange(9.).reshape(3,3)
>>> x
array([[ 0., 1., 2.],
[ 3., 4., 5.],
[ 6., 7., 8.]])
>>> np.where(x>5)
(array([2, 2, 2], dtype=int64), array([0, 1, 2], dtype=int64))

前面为索引的第一个值，后面的数组对应索引的第二个值，此函数用于max pool反向传播时找到最大值位置用的

网络结构：conv--> relu --> 2*2 max pool --> batch_normalization --> affine-->relu-->affine-->softmax

【layers.py】

1.卷积操作（前向+反向）

def conv_forward_naive(x, w, b, conv_param):  """  A naive implementation of the forward pass for a convolutional layer.  The input consists of N data points, each with C channels, height H and width  W. We convolve each input with F different filters, where each filter spans  all C channels and has height HH and width HH.  Input:  - x: Input data of shape (N, C, H, W)  - w: Filter weights of shape (F, C, HH, WW)  - b: Biases, of shape (F,)  - conv_param: A dictionary with the following keys:    - 'stride': The number of pixels between adjacent receptive fields in the      horizontal and vertical directions.    - 'pad': The number of pixels that will be used to zero-pad the input.  Returns a tuple of:  - out: Output data, of shape (N, F, H', W') where H' and W' are given by    H' = 1 + (H + 2 * pad - HH) / stride    W' = 1 + (W + 2 * pad - WW) / stride  - cache: (x, w, b, conv_param)  """  stride=conv_param['stride']  pad=conv_param['pad']  F,C,HH,WW=w.shape  N,C,H,W=x.shape  H1=np.int32(1+(H+2*pad-HH)/stride)  W1=np.int32(1+(W+2*pad-WW)/stride)  out=np.zeros((N,F,H1,W1))  #pad  x_pad=np.zeros((N,C,H+2*pad,W+2*pad))  for m in range(N):    for n in range(C):      x_pad[m][n]=np.pad(x[m][n], pad, 'constant', constant_values=0)  for k in range(N):#N个样本    for l in range(F):      for i in range(H1):        for j in range(W1):          #a=np.sum(x_pad[k][0][i*stride:i*stride+HH,stride*j:stride*j+WW]*w[l][0])          #bb=np.sum(x_pad[k][1][i*stride:i*stride+HH,stride*j:stride*j+WW]*w[l][1])          #c = np.sum(x_pad[k][2][i*stride:i*stride+ HH,stride * j:stride * j + WW] * w[l][2])          #out[k][l][i][j]=a+bb+c+b[l]          out[k, l, i, j] = np.sum(w[l] * x_pad[k, :, i * stride:i * stride + HH, j * stride:j * stride + WW]) + b[l]  cache = (x, x_pad, w, b, conv_param)  return out, cache

def conv_backward_naive(dout, cache):  """  卷积层的反向传播即为把卷积核上下左右翻转后与pad后的feature map再做卷积得到x  Inputs:  - dout: Upstream derivatives.  - cache: A tuple of (x, x_pad,w, b, conv_param) as in conv_forward_naive  Returns a tuple of:  - dx: Gradient with respect to x  - dw: Gradient with respect to w  - db: Gradient with respect to b  """  dx, dw, db = None, None, None  #############################################################################  # TODO: Implement the convolutional backward pass.                          #  #############################################################################  x,x_pad,w,b,conv_param=cache  pad=conv_param['pad']  stride=conv_param['stride']  N,F,H1,W1=dout.shape  N,C,H,W=x.shape  HH=w.shape[2]  WW=w.shape[3]  dx=np.zeros((N,C,H,W))  dx_pad=np.zeros(x_pad.shape)  dw=np.zeros(w.shape)  db=np.zeros(b.shape)  for n in range(N):    for f in range(F):      for i in range(H1):        for j in range(W1):          db[f]+=dout[n,f,i,j] # out[k, l, i, j] = np.sum(w[l] * x_pad[k, :, i * stride:i * stride + HH, j * stride:j * stride + WW]) + b[l]          dw[f]+=dout[n,f,i,j]*x_pad[n,:,i*stride:i*stride+HH,j*stride:j*stride+WW]          dx_pad[n, :, i * stride:i * stride + HH, j * stride:j * stride + WW] += dout[n, f, i, j] * w[f]  dx=dx_pad[:,:,pad:pad+H,pad:pad+W].copy()  #############################################################################  #                             END OF YOUR CODE                              #  #############################################################################  return dx, dw, db

2. max pooling

def max_pool_forward_naive(x, pool_param):  """  A naive implementation of the forward pass for a max pooling layer.  Inputs:  - x: Input data, of shape (N, C, H, W)  - pool_param: dictionary with the following keys:    - 'pool_height': The height of each pooling region    - 'pool_width': The width of each pooling region    - 'stride': The distance between adjacent pooling regions  Returns a tuple of:  - out: Output data  - cache: (x, pool_param)  """  out = None  HH=pool_param['pool_height']  WW=pool_param['pool_width']  stride=pool_param['stride']  N,C,H,W=x.shape  H1=np.int32(1+(H-HH)/stride)  W1=np.int32(1+(W-WW)/stride)  out=np.zeros((N,C,H1,W1))  for n in range(N):    for c in range(C):      for i in range(H1):        for j in range(W1):          out[n,c,i,j]=np.max(x[n,c,i*stride:i*stride+HH,j*stride:j*stride+WW])  cache = (x, pool_param)  return out, cache

def max_pool_backward_naive(dout, cache):  """  A naive implementation of the backward pass for a max pooling layer.  Inputs:  - dout: Upstream derivatives  - cache: A tuple of (x, pool_param) as in the forward pass.  Returns:  - dx: Gradient with respect to x  """  dx = None  x,pool_param=cache  N,C,H,W=x.shape  HH=pool_param['pool_height']  WW=pool_param['pool_width']  stride=pool_param['stride']  H1 = np.int32(1 + (H - HH) / stride)  W1 = np.int32(1 + (W - WW) / stride)  dx=np.zeros(x.shape)  for n in range(N):    for c in range(C):      for i in range(H1):        for j in range(W1):          index=np.where(x[n,c]==np.max(x[n,c,i*stride:i*stride+HH,j*stride:j*stride+WW]))          length=len(index[0])          for m in range(length):            dx[n,c,index[0][m],index[1][m]]=dout[n,c,i,j]  return dx

3. Batch Normalization

此处不能用FC里面的BN，因为在CNN中，我们处理得都是二维矩阵，不再是（N,D）维的了，所以此处要重写一个spatial batch normalization，原理就是把（N,C,H,W）维度reshape成（N*H*W，C）维度，就可以调用之前的BN函数，目的是将一个通道看做一个特征维度，按通道求均值和方差

def spatial_batchnorm_forward(x, gamma, beta, bn_param):  """  Computes the forward pass for spatial batch normalization.    Inputs:  - x: Input data of shape (N, C, H, W)  - gamma: Scale parameter, of shape (C,)  - beta: Shift parameter, of shape (C,)  - bn_param: Dictionary with the following keys:    - mode: 'train' or 'test'; required    - eps: Constant for numeric stability    - momentum: Constant for running mean / variance. momentum=0 means that      old information is discarded completely at every time step, while      momentum=1 means that new information is never incorporated. The      default of momentum=0.9 should work well in most situations.    - running_mean: Array of shape (D,) giving running mean of features    - running_var Array of shape (D,) giving running variance of features      Returns a tuple of:  - out: Output data, of shape (N, C, H, W)  - cache: Values needed for the backward pass  """  out, cache = None, None  N,C,H,W=x.shape  out,cache=batchnorm_forward(x.transpose(0,2,3,1).reshape(N*H*W,C),gamma,beta,bn_param)  out=out.reshape([N,H,W,C]).transpose(0,3,1,2)  return out, cache

def spatial_batchnorm_backward(dout, cache):  """  Computes the backward pass for spatial batch normalization.    Inputs:  - dout: Upstream derivatives, of shape (N, C, H, W)  - cache: Values from the forward pass    Returns a tuple of:  - dx: Gradient with respect to inputs, of shape (N, C, H, W)  - dgamma: Gradient with respect to scale parameter, of shape (C,)  - dbeta: Gradient with respect to shift parameter, of shape (C,)  """  dx, dgamma, dbeta = None, None, None  N,C,H,W=dout.shape  dx,dgamma,dbeta=batchnorm_backward(dout.transpose(0,2,3,1).reshape(N*H*W,C),cache)  dx=dx.reshape(N,H,W,C).transpose(0,3,1,2)  return dx, dgamma, dbeta

【cnn.py】

此文件定义了网络的模型，并写出了loss和grads的求法

import numpy as npfrom cs231n.layers import *from cs231n.fast_layers import *from cs231n.layer_utils import *class ThreeLayerConvNet(object):  """  A three-layer convolutional network with the following architecture:    conv - relu - 2x2 max pool -batch_normalization- affine - relu - affine - softmax    The network operates on minibatches of data that have shape (N, C, H, W)  consisting of N images, each with height H and width W and with C input  channels.  """    def __init__(self, input_dim=(3, 32, 32), num_filters=32, filter_size=7,               hidden_dim=100, num_classes=10, weight_scale=1e-3, reg=0.0,               dtype=np.float32):    """    Initialize a new network.        Inputs:    - input_dim: Tuple (C, H, W) giving size of input data    - num_filters: Number of filters to use in the convolutional layer    - filter_size: Size of filters to use in the convolutional layer    - hidden_dim: Number of units to use in the fully-connected hidden layer    - num_classes: Number of scores to produce from the final affine layer.    - weight_scale: Scalar giving standard deviation for random initialization      of weights.    - reg: Scalar giving L2 regularization strength    - dtype: numpy datatype to use for computation.    """    self.params = {}    self.reg = reg    self.dtype = dtype    C,H,W=input_dim    self.params['W1']=np.random.randn(num_filters,C,filter_size,filter_size)*weight_scale    self.params['b1']=np.zeros(num_filters)    self.params['W2'] = np.random.randn(int(num_filters * H * W / 4), hidden_dim) * weight_scale    self.params['b2']=np.zeros(hidden_dim)    self.params['W3']=np.random.randn(hidden_dim,num_classes)*weight_scale    self.params['b3']=np.zeros(num_classes)    self.params['gamma']=np.ones(num_filters)    self.params['beta']=np.zeros(num_filters)    for k, v in self.params.items():      self.params[k] = v.astype(dtype)        def loss(self, X, y=None):    """    Evaluate loss and gradient for the three-layer convolutional network.        Input / output: Same API as TwoLayerNet in fc_net.py.    """    W1, b1 = self.params['W1'], self.params['b1']    W2, b2 = self.params['W2'], self.params['b2']    W3, b3 = self.params['W3'], self.params['b3']    gamma,beta=self.params['gamma'],self.params['beta']        # pass conv_param to the forward pass for the convolutional layer    filter_size = W1.shape[2]    bn_param={      'mode':'test' if  y is None else 'train',    }    conv_param = {'stride': 1, 'pad': int((filter_size - 1) / 2)}    # pass pool_param to the forward pass for the max-pooling layer    pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}    scores = None    self.cache={}    y1,self.cache['cache1']=conv_bn_relu_pool_forward(X,W1,b1,gamma,beta,conv_param,bn_param,pool_param)    y2,self.cache['cache2']=affine_relu_forward(y1,W2,b2)    scores,self.cache['cache3']=affine_forward(y2,W3,b3)        if y is None:      return scores        loss, grads = 0, {}    loss,dy=softmax_loss(scores,y)    loss+=0.5*self.reg*(np.sum(np.square(self.params['W1']))+np.sum(np.square(self.params['W2']))+np.sum(np.square(self.params['W3'])))    grad_term,grads['W3'],grads['b3']=affine_backward(dy,self.cache['cache3'])    grads['W3']+=self.reg*self.params['W3']    grad_term,grads['W2'],grads['b2']=affine_relu_backward(grad_term,self.cache['cache2'])    grads['W2']+=self.reg*self.params['W2']    dx,grads['W1'],grads['b1'],grads['gamma'],grads['beta']=conv_bn_relu_pool_backward(grad_term,self.cache['cache1'])    grads['W1']+=self.reg*self.params['W1']         return loss, grads    pass

【solver.py】

仍是FC中的solver类，定义了train_op

【main.py】

定义主函数

import numpy as npimport matplotlib.pyplot as pltfrom cs231n.classifiers.cnn import *from cs231n.data_utils import load_CIFAR10from cs231n.gradient_check import eval_numerical_gradient,eval_numerical_gradient_arrayfrom cs231n.layers import *from cs231n.fast_layers import *from cs231n.solver import Solverdef rel_error(x, y):  """ returns relative error """  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))#导入数据data =load_CIFAR10('E:\\Python\\deep learning CS231n\\assignment2\\cs231n\\datasets')for k, v in data.items():  print('%s: ' % k, v.shape)

#train small datanum_train=100small_data={  'X_train':data['X_train'][:num_train],  'y_train':data['y_train'][:num_train],  'X_val':data['X_val'],  'y_val':data['y_val'],}model=ThreeLayerConvNet(weight_scale=1e-2)solver=Solver(model,small_data,num_epochs=10,batch_size=50,update_rule='adam',optim_config={  'learning_rate':1e-3,},verbose=True,print_every=1)solver.train()

1 0