文章标题

来源:互联网 发布:linux 7 dns配置 编辑:程序博客网 时间:2024/06/05 20:28
import numpy as npimport matplotlib.pyplot as pltimport sklearnimport sklearn.datasetsfrom init_utils import sigmoid, relu, compute_loss, forward_propagation, backward_propagationfrom init_utils import update_parameters, predict, load_dataset, plot_decision_boundary, predict_dec%matplotlib inlineplt.rcParams['figure.figsize'] = (7.0, 4.0) # set default size of plotsplt.rcParams['image.interpolation'] = 'nearest'plt.rcParams['image.cmap'] = 'gray'# load image dataset: blue/red dots in circlestrain_X, train_Y, test_X, test_Y = load_dataset()

导入各种所需库、函数及输入参数

def model(X, Y, learning_rate = 0.01, num_iterations = 15000, print_cost = True, initialization = "he"):    """    Implements a three-layer neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SIGMOID.    Arguments:    X -- input data, of shape (2, number of examples)    Y -- true "label" vector (containing 0 for red dots; 1 for blue dots), of shape (1, number of examples)    learning_rate -- learning rate for gradient descent     num_iterations -- number of iterations to run gradient descent    print_cost -- if True, print the cost every 1000 iterations    initialization -- flag to choose which initialization to use ("zeros","random" or "he")    Returns:    parameters -- parameters learnt by the model    """    grads = {}    costs = [] # to keep track of the loss    m = X.shape[1] # number of examples    layers_dims = [X.shape[0], 10, 5, 1]    # Initialize parameters dictionary.    if initialization == "zeros":        parameters = initialize_parameters_zeros(layers_dims)    elif initialization == "random":        parameters = initialize_parameters_random(layers_dims)    elif initialization == "he":        parameters = initialize_parameters_he(layers_dims)    # Loop (gradient descent)    for i in range(0, num_iterations):        # Forward propagation: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID.        a3, cache = forward_propagation(X, parameters)        # Loss        cost = compute_loss(a3, Y)        # Backward propagation.        grads = backward_propagation(X, Y, cache)        # Update parameters.        parameters = update_parameters(parameters, grads, learning_rate)        # Print the loss every 1000 iterations        if print_cost and i % 1000 == 0:            print("Cost after iteration {}: {}".format(i, cost))            costs.append(cost)    # plot the loss    plt.plot(costs)    plt.ylabel('cost')    plt.xlabel('iterations (per hundreds)')    plt.title("Learning rate =" + str(learning_rate))    plt.show()    return parameters

模型函数、输入X、Y、学习率、步数、设置输出cost、设置初始化函数。输出parameters参数,模型经过前向传播、后向传播、参数更新、输出cost、画出loss函数步骤
def initialize_parameters_zeros(layers_dims):
“””
Arguments:
layer_dims – python array (list) containing the size of each layer.

    Returns:    parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":                    W1 -- weight matrix of shape (layers_dims[1], layers_dims[0])                    b1 -- bias vector of shape (layers_dims[1], 1)                    ...                    WL -- weight matrix of shape (layers_dims[L], layers_dims[L-1])                    bL -- bias vector of shape (layers_dims[L], 1)    """    parameters = {}    L = len(layers_dims)            # number of layers in the network    for l in range(1, L):        ### START CODE HERE ### (≈ 2 lines of code)        parameters['W' + str(l)] = np.zeros((layers_dims[l],layers_dims[l-1]))        parameters['b' + str(l)] = np.zeros((layers_dims[l],1))        ### END CODE HERE ###    return parameters

使用np.zeros(())来初始化W、b
parameters = model(train_X, train_Y, initialization = “zeros”)
print (“On the train set:”)
predictions_train = predict(train_X, train_Y, parameters)
print (“On the test set:”)
predictions_test = predict(test_X, test_Y, parameters)
On the train set:
Accuracy: 0.5
On the test set:
Accuracy: 0.5
初始化W、b都为0时发现模型与线性模型作用相似 只能预测出一个值并不能准确的预测。

    def initialize_parameters_random(layers_dims):        """        Arguments:        layer_dims -- python array (list) containing the size of each layer.    Returns:    parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":                    W1 -- weight matrix of shape (layers_dims[1], layers_dims[0])                    b1 -- bias vector of shape (layers_dims[1], 1)                    ...                    WL -- weight matrix of shape (layers_dims[L], layers_dims[L-1])                    bL -- bias vector of shape (layers_dims[L], 1)    """    np.random.seed(3)               # This seed makes sure your "random" numbers will be the as ours    parameters = {}    L = len(layers_dims)            # integer representing the number of layers    for l in range(1, L):        ### START CODE HERE ### (≈ 2 lines of code)        parameters['W' + str(l)] = np.random.randn(layers_dims[l],layers_dims[l-1])*10        parameters['b' + str(l)] = np.zeros((layers_dims[l],1))        ### END CODE HERE ###    return parameters

随机初始化W0初始化b
parameters = model(train_X, train_Y, initialization = “random”)
print (“On the train set:”)
predictions_train = predict(train_X, train_Y, parameters)
print (“On the test set:”)
predictions_test = predict(test_X, test_Y, parameters)
On the train set:
Accuracy: 0.83
On the test set:
Accuracy: 0.86
训练集准确率0.83 测试集0.86 相比于0初始化W有了很大的进步
def initialize_parameters_he(layers_dims):
“””
Arguments:
layer_dims – python array (list) containing the size of each layer.

    Returns:    parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":                    W1 -- weight matrix of shape (layers_dims[1], layers_dims[0])                    b1 -- bias vector of shape (layers_dims[1], 1)                    ...                    WL -- weight matrix of shape (layers_dims[L], layers_dims[L-1])                    bL -- bias vector of shape (layers_dims[L], 1)    """    np.random.seed(3)    parameters = {}    L = len(layers_dims) - 1 # integer representing the number of layers    for l in range(1, L + 1):        ### START CODE HERE ### (≈ 2 lines of code)        parameters['W' + str(l)] = np.random.randn(layers_dims[l],layers_dims[l-1])*np.sqrt(2/layers_dims[l-1])        parameters['b' + str(l)] = np.zeros((layers_dims[l],1))        ### END CODE HERE ###    return parameters

利用HE初始化方法取值W、b。W与随机初始化相差了一个*np.sqrt(2/layers_dims[l-1])

parameters = model(train_X, train_Y, initialization = "he")print ("On the train set:")predictions_train = predict(train_X, train_Y, parameters)print ("On the test set:")predictions_test = predict(test_X, test_Y, parameters)

On the train set:
Accuracy: 0.993333333333
On the test set:
Accuracy: 0.96
训练集准确0.993测试集0.96,相比前两种有很大的提升。因此对于对称性的分类来说HE初始化方法是很有用的。0初始化对于有对称性的分类来说是完全不行的,随机初始化是可行但不如HE那么有用。