2017.08.14:神经网络入门

来源:互联网 发布:mac chrome 书签 编辑:程序博客网 时间:2024/04/28 06:41

Langage:Python

Version:3.4

Reference:http://blog.csdn.net/dream_an/article/details/51782161

神经网络专题

#needed modules

import matplotlib.pyplot as pltimport numpy as npimport sklearnimport sklearn.datasetsimport sklearn.linear_modelimport matplotlib

# Generate a dataset and plot it

np.random.seed(0)x, y = sklearn.datasets.make_moons(200, noise=0.20)plt.scatter(x[:,0], x[:,1], s=40, c=y, cmap=plt.cm.Spectral)

%If you type these codes on the terminal, the following picture will show:

figure_1


%

 # training set size

num_examples = len(x) 

# input layer dimensionality

nn_input_dim = 2 

# output layer dimensionality

nn_output_dim = 2 

# Gradient descent parameters (I picked these by hand)

# learning rate for gradient descent

epsilon = 0.01

 # regularization strength

reg_lambda = 0.01

# Helper function to plot a decision boundary.
# If you don't fully understand this function don't worry, it just generates the contour plot below.

def plot_decision_boundary(pred_func):
    # Set min and max values and give it some padding
  x_min, x_max = x[:, 0].min() - .5, x[:, 0].max() + .5  y_min, y_max = x[:, 1].min() - .5, x[:, 1].max() + .5  h = 0.01
    # Generate a grid of points with distance h between them
  xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole gid
  z = pred_func(np.c_[xx.ravel(), yy.ravel()])  z = z.reshape(xx.shape)
    # Plot the contour and training examples

  plt.contourf(xx, yy, z, cmap=plt.cm.Spectral)  plt.scatter(x[:, 0], x[:, 1], c=y, cmap=plt.cm.Spectral)

# Helper function to evaluate the total loss on the dataset

%Principal:



  Applying the backpropagation formula we find the following (trust me on this):



%

def calculate_loss(model):  w1, b1, w2, b2 = model['w1'], model['b1'], model['w2'], model['b2']

    # Forward propagation to calculate our predictions

  z1 = x.dot(w1) + b1  a1 = np.tanh(z1)  z2 = a1.dot(w2) + b2  exp_scores = np.exp(z2)  probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    # Calculating the loss
  corect_logprobs = -np.log(probs[range(num_examples), y])  data_loss = np.sum(corect_logprobs)
    # Add regulatization term to loss (optional)
  data_loss += reg_lambda/2 * (np.sum(np.square(w1)) + np.sum(np.square(w2)))  return 1./num_examples * data_loss
# Helper function to predict an output (0 or 1)
def predict(model, x):    w1, b1, w2, b2 = model['w1'], model['b1'], model['w2'], model['b2']
    # Forward propagation
  z1 = x.dot(w1) + b1  a1 = np.tanh(z1)  z2 = a1.dot(w2) + b2  exp_scores = np.exp(z2)  probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)  return np.argmax(probs, axis=1)
# This function learns parameters for the neural network and returns the model.

%Princial of  Neural Net:



<img src='./nn-3-layer-network.png' style='width: 50%'/>

%

# - nn_hdim: Number of nodes in the hidden layer

# - num_passes: Number of passes through the training data for gradient descent
# - print_loss: If True, print the loss every 1000 iterations

def build_model(nn_hdim, num_passes=20000, print_loss=False):
    # Initialize the parameters to random values. We need to learn these.
  np.random.seed(0)  w1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)  b1 = np.zeros((1, nn_hdim))  w2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)  b2 = np.zeros((1, nn_output_dim))
    # This is what we return at the end
  model = {}
    # Gradient descent. For each batch...
  for i in range(0, num_passes):
        # Forward propagation
    z1 = x.dot(w1) + b1    a1 = np.tanh(z1)    z2 = a1.dot(w2) + b2    exp_scores = np.exp(z2)    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        # Backpropagation
    delta3 = probs    delta3[range(num_examples), y] -= 1    dw2 = (a1.T).dot(delta3)    db2 = np.sum(delta3, axis=0, keepdims=True)    delta2 = delta3.dot(w2.T) * (1 - np.power(a1, 2))    dw1 = np.dot(x.T, delta2)    db1 = np.sum(delta2, axis=0)
        # Add regularization terms (b1 and b2 don't have regularization terms)
    dw2 += reg_lambda * w2    dw1 += reg_lambda * w1
        # Gradient descent parameter update
    w1 += -epsilon * dw1    b1 += -epsilon * db1    w2 += -epsilon * dw2    b2 += -epsilon * db2
        # Assign new parameters to the model   
    model = { 'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2}
        # Optionally print the loss.
        # This is expensive because it uses the whole dataset, so we don't want to do it too often.

    if print_loss and i % 1000 == 0:        print ("Loss after iteration %i: %f" %(i, calculate_loss(model)))return model

# Build a model with a 3-dimensional hidden layer

model = build_model(3, print_loss=True)
# Plot the decision boundary
plot_decision_boundary(lambda x: predict(model, x))plt.title("Decision Boundary for hidden layer size 3")plt.show()








'