【deeplearning.ai】Neural Networks and Deep Learning——浅层神经网络

来源：互联网发布：淘宝管控记录是干吗的编辑：程序博客网时间：2024/06/06 12:44

吴恩达的deeplearning.ai公开课，第二周内容的学习笔记。

一、基础知识

1、浅层神经网络结构

此网络为2层。在说神经网络的层数时，不包括输入层。

2、前向传播

训练时循环每个样本：

可以设：

将其向量化，去掉for循环：

3、激活函数

（1）tanh函数

（2）ReLu函数

（3）Leaky ReLu函数

4、反向传播

二、代码实践——平面数据分类

要进行分类的数据如下：

红点代表标签y=0，蓝点代表标签y=1。最终预测准确率达90%，源码如下：

planar_utils.py文件：载入训练数据

import matplotlib.pyplot as pltimport numpy as npimport sklearnimport sklearn.datasetsimport sklearn.linear_modeldef plot_decision_boundary(model, X, y):    # Set min and max values and give it some padding    x_min, x_max = X[0, :].min() - 1, X[0, :].max() + 1    y_min, y_max = X[1, :].min() - 1, X[1, :].max() + 1    h = 0.01    # Generate a grid of points with distance h between them    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))    # Predict the function value for the whole grid    Z = model(np.c_[xx.ravel(), yy.ravel()])    Z = Z.reshape(xx.shape)    # Plot the contour and training examples    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)    plt.ylabel('x2')    plt.xlabel('x1')    plt.scatter(X[0, :], X[1, :], c=y, cmap=plt.cm.Spectral)def sigmoid(x):    """    Compute the sigmoid of x    Arguments:    x -- A scalar or numpy array of any size.    Return:    s -- sigmoid(x)    """    s = 1 / (1 + np.exp(-x))    return sdef load_planar_dataset():    np.random.seed(1)    m = 400  # number of examples    N = int(m / 2)  # number of points per class    D = 2  # dimensionality    X = np.zeros((m, D))  # data matrix where each row is a single example    Y = np.zeros((m, 1), dtype='uint8')  # labels vector (0 for red, 1 for blue)    a = 4  # maximum ray of the flower    for j in range(2):        ix = range(N * j, N * (j + 1))        t = np.linspace(j * 3.12, (j + 1) * 3.12, N) + np.random.randn(N) * 0.2  # theta        r = a * np.sin(4 * t) + np.random.randn(N) * 0.2  # radius        X[ix] = np.c_[r * np.sin(t), r * np.cos(t)]        Y[ix] = j    X = X.T    Y = Y.T    return X, Ydef load_extra_datasets():    N = 200    noisy_circles = sklearn.datasets.make_circles(n_samples=N, factor=.5, noise=.3)    noisy_moons = sklearn.datasets.make_moons(n_samples=N, noise=.2)    blobs = sklearn.datasets.make_blobs(n_samples=N, random_state=5, n_features=2, centers=6)    gaussian_quantiles = sklearn.datasets.make_gaussian_quantiles(mean=None, cov=0.5, n_samples=N, n_features=2,                                                                  n_classes=2, shuffle=True, random_state=None)    no_structure = np.random.rand(N, 2), np.random.rand(N, 2)    return noisy_circles, noisy_moons, blobs, gaussian_quantiles, no_structure

SNN.py文件：算法实现

import numpy as npimport matplotlib.pyplot as pltimport sklearnimport sklearn.datasetsimport sklearn.linear_modelfrom planar_utils import plot_decision_boundary, sigmoid, load_planar_dataset, load_extra_datasetsnp.random.seed(1) # 使每次随机产生的数都相同# 导入数据# 2维特征X, Y = load_planar_dataset()shape_X = X.shape       # X,2行400列shape_Y = Y.shape       # Y,1行400列m = X.shape[1]          # 样本数，400# 定义神经网络结构def layer_sizes(X, Y):    """    Arguments:    X -- input dataset of shape (input size, number of examples)    Y -- labels of shape (output size, number of examples)    Returns:    n_x -- the size of the input layer    n_h -- the size of the hidden layer    n_y -- the size of the output layer    """    n_x = X.shape[0]    # 输入层神经元个数    n_h = 4             # 隐藏层神经元个数    n_y = Y.shape[0]    # 输出神经元个数    return (n_x, n_h, n_y)# 初始化模型参数def initialize_parameters(n_x, n_h, n_y):    """    Argument:    n_x -- size of the input layer    n_h -- size of the hidden layer    n_y -- size of the output layer    Returns:    params -- python dictionary containing your parameters:                    W1 -- weight matrix of shape (n_h, n_x)                    b1 -- bias vector of shape (n_h, 1)                    W2 -- weight matrix of shape (n_y, n_h)                    b2 -- bias vector of shape (n_y, 1)    """    np.random.seed(2)  # we set up a seed so that your output matches ours although the initialization is random.    W1 = np.random.randn(n_h, n_x) * 0.01    b1 = np.zeros((n_h, 1))    W2 = np.random.randn(n_y, n_h) * 0.01    b2 = np.zeros((n_y, 1))    assert (W1.shape == (n_h, n_x))    assert (b1.shape == (n_h, 1))    assert (W2.shape == (n_y, n_h))    assert (b2.shape == (n_y, 1))    parameters = {"W1": W1,                  "b1": b1,                  "W2": W2,                  "b2": b2}    return parameters# 前向传播def forward_propagation(X, parameters):    """    Argument:    X -- input data of size (n_x, m)    parameters -- python dictionary containing your parameters (output of initialization function)    Returns:    A2 -- The sigmoid output of the second activation    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2"    """    # Retrieve each parameter from the dictionary "parameters"    W1 = parameters["W1"]    b1 = parameters["b1"]    W2 = parameters["W2"]    b2 = parameters["b2"]    # Implement Forward Propagation to calculate A2 (probabilities)    Z1 = np.dot(W1, X) + b1    A1 = np.tanh(Z1)    Z2 = np.dot(W2, A1) + b2    A2 = sigmoid(Z2)    assert (A2.shape == (1, X.shape[1]))    cache = {"Z1": Z1,             "A1": A1,             "Z2": Z2,             "A2": A2}    return A2, cache# 计算costdef compute_cost(A2, Y, parameters):    """    Computes the cross-entropy cost given in equation (13)    Arguments:    A2 -- The sigmoid output of the second activation, of shape (1, number of examples)    Y -- "true" labels vector of shape (1, number of examples)    parameters -- python dictionary containing your parameters W1, b1, W2 and b2    Returns:    cost -- cross-entropy cost given equation (13)    """    m = Y.shape[1]  # number of example    # Compute the cross-entropy cost    logprobs = np.multiply(np.log(A2), Y) + np.multiply(np.log(1 - A2), 1 - Y)    cost = -np.sum(logprobs) / m    cost = np.squeeze(cost)  # 压缩维数，E.g., turns [[17]] into 17    assert (isinstance(cost, float))    return cost# 反向传播def backward_propagation(parameters, cache, X, Y):    """    Implement the backward propagation using the instructions above.    Arguments:    parameters -- python dictionary containing our parameters     cache -- a dictionary containing "Z1", "A1", "Z2" and "A2".    X -- input data of shape (2, number of examples)    Y -- "true" labels vector of shape (1, number of examples)    Returns:    grads -- python dictionary containing your gradients with respect to different parameters    """    m = X.shape[1]      # 样本数目    # First, retrieve W1 and W2 from the dictionary "parameters".    W1 = parameters["W1"]    W2 = parameters["W2"]    # Retrieve also A1 and A2 from dictionary "cache".    A1 = cache["A1"]    A2 = cache["A2"]    # Backward propagation: calculate dW1, db1, dW2, db2.    dZ2 = A2 - Y    dW2 = np.dot(dZ2, A1.T) / m    db2 = np.sum(dZ2, axis=1, keepdims=True) / m    dZ1 = np.multiply(np.dot(W2.T, dZ2), (1 - np.power(A1, 2)))    dW1 = np.dot(dZ1, X.T) / m    db1 = np.sum(dZ1, axis=1, keepdims=True) / m    grads = {"dW1": dW1,             "db1": db1,             "dW2": dW2,             "db2": db2}    return grads# 更新参数def update_parameters(parameters, grads, learning_rate=1.2):    """    Updates parameters using the gradient descent update rule given above    Arguments:    parameters -- python dictionary containing your parameters     grads -- python dictionary containing your gradients     Returns:    parameters -- python dictionary containing your updated parameters     """    # Retrieve each parameter from the dictionary "parameters"    W1 = parameters["W1"]    b1 = parameters["b1"]    W2 = parameters["W2"]    b2 = parameters["b2"]    # Retrieve each gradient from the dictionary "grads"    dW1 = grads["dW1"]    db1 = grads["db1"]    dW2 = grads["dW2"]    db2 = grads["db2"]    # Update rule for each parameter    W1 = W1 - learning_rate * dW1    b1 = b1 - learning_rate * db1    W2 = W2 - learning_rate * dW2    b2 = b2 - learning_rate * db2    parameters = {"W1": W1,                  "b1": b1,                  "W2": W2,                  "b2": b2}    return parameters# 打包模型def nn_model(X, Y, n_h, num_iterations=10000, print_cost=False):    """    Arguments:    X -- dataset of shape (2, number of examples)    Y -- labels of shape (1, number of examples)    n_h -- size of the hidden layer    num_iterations -- Number of iterations in gradient descent loop    print_cost -- if True, print the cost every 1000 iterations    Returns:    parameters -- parameters learnt by the model. They can then be used to predict.    """    np.random.seed(3)    n_x = layer_sizes(X, Y)[0]    n_y = layer_sizes(X, Y)[2]    # Initialize parameters, then retrieve W1, b1, W2, b2. Inputs: "n_x, n_h, n_y". Outputs = "W1, b1, W2, b2, parameters".    parameters = initialize_parameters(n_x, n_h, n_y)    W1 = parameters["W1"]    b1 = parameters["b1"]    W2 = parameters["W2"]    b2 = parameters["b2"]    # Loop (gradient descent)    for i in range(0, num_iterations):        # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".        A2, cache = forward_propagation(X, parameters)        # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".        cost = compute_cost(A2, Y, parameters)        # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".        grads = backward_propagation(parameters, cache, X, Y)        # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".        parameters = update_parameters(parameters, grads)        # Print the cost every 1000 iterations        if print_cost and i % 1000 == 0:            print("Cost after iteration %i: %f" % (i, cost))    return parameters# 预测函数def predict(parameters, X):    """    Using the learned parameters, predicts a class for each example in X    Arguments:    parameters -- python dictionary containing your parameters     X -- input data of size (n_x, m)    Returns    predictions -- vector of predictions of our model (red: 0 / blue: 1)    """    # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.    A2, cache = forward_propagation(X, parameters)    predictions = (A2 > 0.5)    return predictions# 训练parameters = nn_model(X, Y, n_h = 4, num_iterations = 10000, print_cost=True)# 预测predictions = predict(parameters, X)print ('Accuracy: %d' % float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100) + '%')

阅读全文

0 0