CS231n+assignment1(作业一)
来源:互联网 发布:mysql group by 多列 编辑:程序博客网 时间:2024/05/23 13:56
一、第一个是KNN的代码,这里的trick是计算距离的三种方法,核心的话还是python和machine learning中非常实用的向量化操作,可以大大的提高计算速度。
import numpy as np class KNearestNeighbor(object):#首先是定义一个处理KNN的类 """ a kNN classifier with L2 distance """ def __init__(self): pass def train(self, X, y): """ Train the classifier. For k-nearest neighbors this is just memorizing the training data. Inputs: - X: A numpy array of shape (num_train, D) containing the training data consisting of num_train samples each of dimension D. - y: A numpy array of shape (N,) containing the training labels, where y[i] is the label for X[i]. """ self.X_train = X self.y_train = y def predict(self, X, k=1, num_loops=0): """ Predict labels for test data using this classifier. Inputs: - X: A numpy array of shape (num_test, D) containing test data consisting of num_test samples each of dimension D. - k: The number of nearest neighbors that vote for the predicted labels. - num_loops: Determines which implementation to use to compute distances between training points and testing points. Returns: - y: A numpy array of shape (num_test,) containing predicted labels for the test data, where y[i] is the predicted label for the test point X[i]. """ if num_loops == 0: dists = self.compute_distances_no_loops(X) elif num_loops == 1: dists = self.compute_distances_one_loop(X) elif num_loops == 2: dists = self.compute_distances_two_loops(X) else: raise ValueError('Invalid value %d for num_loops' % num_loops) return self.predict_labels(dists, k=k) def compute_distances_two_loops(self, X): """ Compute the distance between each test point in X and each training point in self.X_train using a nested loop over both the training data and the test data. Inputs: - X: A numpy array of shape (num_test, D) containing test data. Returns: - dists: A numpy array of shape (num_test, num_train) where dists[i, j] is the Euclidean distance between the ith test point and the jth training point. """ num_test = X.shape[0] num_train = self.X_train.shape[0] dists = np.zeros((num_test, num_train)) for i in xrange(num_test): for j in xrange(num_train): dists[i][j] = np.sqrt(np.sum(np.square(self.X_train[j,:] - X[i,:]))) ##################################################################### # TODO: # # Compute the l2 distance between the ith test point and the jth # # training point, and store the result in dists[i, j]. You should # # not use a loop over dimension. # ##################################################################### ##################################################################### # END OF YOUR CODE # ##################################################################### return dists def compute_distances_one_loop(self, X): """ Compute the distance between each test point in X and each training point in self.X_train using a single loop over the test data. Input / Output: Same as compute_distances_two_loops """ num_test = X.shape[0] num_train = self.X_train.shape[0] dists = np.zeros((num_test, num_train)) for i in xrange(num_test): ####################################################################### # TODO: # # Compute the l2 distance between the ith test point and all training # # points, and store the result in dists[i, :]. # ####################################################################### dists[i,:] = np.sqrt(np.sum(np.square(self.X_train-X[i,:]),axis = 1)) ####################################################################### # END OF YOUR CODE # ####################################################################### return dists def compute_distances_no_loops(self, X): """ Compute the distance between each test point in X and each training point in self.X_train using no explicit loops. Input / Output: Same as compute_distances_two_loops """ num_test = X.shape[0] num_train = self.X_train.shape[0] dists = np.zeros((num_test, num_train)) ######################################################################### # TODO: # # Compute the l2 distance between all test points and all training # # points without using any explicit loops, and store the result in # # dists. # # # # You should implement this function using only basic array operations; # # in particular you should not use functions from scipy. # # # # HINT: Try to formulate the l2 distance using matrix multiplication # # and two broadcast sums. # ######################################################################### dists = np.multiply(np.dot(X,self.X_train.T),-2) sq1 = np.sum(np.square(X),axis=1,keepdims = True) sq2 = np.sum(np.square(self.X_train),axis=1) dists = np.add(dists,sq1) dists = np.add(dists,sq2) dists = np.sqrt(dists) ######################################################################### # END OF YOUR CODE # ######################################################################### return dists def predict_labels(self, dists, k=1): """ Given a matrix of distances between test points and training points, predict a label for each test point. Inputs: - dists: A numpy array of shape (num_test, num_train) where dists[i, j] gives the distance betwen the ith test point and the jth training point. Returns: - y: A numpy array of shape (num_test,) containing predicted labels for the test data, where y[i] is the predicted label for the test point X[i]. """ num_test = dists.shape[0] y_pred = np.zeros(num_test) for i in xrange(num_test): # A list of length k storing the labels of the k nearest neighbors to # the ith test point. closest_y = [] ######################################################################### # TODO: # # Use the distance matrix to find the k nearest neighbors of the ith # # training point, and use self.y_train to find the labels of these # # neighbors. Store these labels in closest_y. # # Hint: Look up the function numpy.argsort. # ######################################################################### closest_y = self.y_train[np.argsort(dists[i,:])[:k]] ######################################################################### # TODO: # # Now that you have found the labels of the k nearest neighbors, you # # need to find the most common label in the list closest_y of labels. # # Store this label in y_pred[i]. Break ties by choosing the smaller # # label. # ######################################################################### y_pred[i] = np.argmax(np.bincount(closest_y)) ######################################################################### # END OF YOUR CODE # ######################################################################### return y_pred
测试和交叉验证代码:
#coding:utf-8'''Created on 2017@author: '''import randomimport numpy as npfrom assignment1.data_utils import load_CIFAR10from assignment1.classifiers.k_nearest_neighbor import KNearestNeighborimport matplotlib.pyplot as plt# This is a bit of magic to make matplotlib figures appear inline in the notebook# rather than in a new window.plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plotsplt.rcParams['image.interpolation'] = 'nearest'plt.rcParams['image.cmap'] = 'gray'X_train, y_train, X_test, y_test = load_CIFAR10('../datasets')# As a sanity check, we print out the size of the training and test data.print('Training data shape: ', X_train.shape)print('Training labels shape: ', y_train.shape)print('Test data shape: ', X_test.shape)print('Test labels shape: ', y_test.shape)# 从数据集中展示一部分数据# 每个类别展示若干张对应图片classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']num_classes = len(classes)samples_per_class = 7for y, cls in enumerate(classes): idxs = np.flatnonzero(y_train == y) idxs = np.random.choice(idxs, samples_per_class, replace=False) for i, idx in enumerate(idxs): plt_idx = i * num_classes + y + 1 plt.subplot(samples_per_class, num_classes, plt_idx) plt.imshow(X_train[idx].astype('uint8')) plt.axis('off') if i == 0: plt.title(cls)plt.show()# 截取部分样本数据,以提高本作业的执行效率num_training = 5000mask = range(num_training)X_train = X_train[mask]y_train = y_train[mask]num_test = 500mask = range(num_test)X_test = X_test[mask]y_test = y_test[mask]# reshape训练和测试数据,转换为行的形式X_train = np.reshape(X_train, (X_train.shape[0], -1))X_test = np.reshape(X_test, (X_test.shape[0], -1))print(X_train.shape)print(X_test.shape)classifier = KNearestNeighbor()classifier.train(X_train, y_train)dists = classifier.compute_distances_two_loops(X_test)print(dists.shape)plt.imshow(dists, interpolation='none')plt.show()# Now implement the function predict_labels and run the code below:# k=1时y_test_pred = classifier.predict_labels(dists, k=1)# Compute and print the fraction of correctly predicted examplesnum_correct = np.sum(y_test_pred == y_test)accuracy = float(num_correct) / num_testprint('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))# k=5时y_test_pred = classifier.predict_labels(dists, k=5)num_correct = np.sum(y_test_pred == y_test)accuracy = float(num_correct) / num_testprint('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))####测试三种距离计算法的效率dists_one = classifier.compute_distances_one_loop(X_test)difference = np.linalg.norm(dists - dists_one, ord='fro')print('Difference was: %f' % (difference, ))if difference < 0.001: print('Good! The distance matrices are the same')else: print('Uh-oh! The distance matrices are different')dists_two = classifier.compute_distances_no_loops(X_test)difference = np.linalg.norm(dists - dists_two, ord='fro')print('Difference was: %f' % (difference, ))if difference < 0.001: print('Good! The distance matrices are the same')else: print('Uh-oh! The distance matrices are different')def time_function(f, *args): """ Call a function f with args and return the time (in seconds) that it took to execute. """ import time tic = time.time() f(*args) toc = time.time() return toc - tictwo_loop_time = time_function(classifier.compute_distances_two_loops, X_test)print('Two loop version took %f seconds' % two_loop_time)one_loop_time = time_function(classifier.compute_distances_one_loop, X_test)print('One loop version took %f seconds' % one_loop_time)no_loop_time = time_function(classifier.compute_distances_no_loops, X_test)print('No loop version took %f seconds' % no_loop_time)# 交叉验证num_folds = 5k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]X_train_folds = []y_train_folds = []################################################################################# TODO: ## Split up the training data into folds. After splitting, X_train_folds and ## y_train_folds should each be lists of length num_folds, where ## y_train_folds[i] is the label vector for the points in X_train_folds[i]. ## Hint: Look up the numpy array_split function. ##################################################################################数据划分X_train_folds = np.array_split(X_train, num_folds);y_train_folds = np.array_split(y_train, num_folds)################################################################################# END OF YOUR CODE ################################################################################## A dictionary holding the accuracies for different values of k that we find# when running cross-validation. After running cross-validation,# k_to_accuracies[k] should be a list of length num_folds giving the different# accuracy values that we found when using that value of k.k_to_accuracies = {}################################################################################# TODO: ## Perform k-fold cross validation to find the best value of k. For each ## possible value of k, run the k-nearest-neighbor algorithm num_folds times, ## where in each case you use all but one of the folds as training data and the ## last fold as a validation set. Store the accuracies for all fold and all ## values of k in the k_to_accuracies dictionary. #################################################################################for k in k_choices: k_to_accuracies[k] = []for k in k_choices:#find the best k-value for i in range(num_folds): X_train_cv = np.vstack(X_train_folds[:i]+X_train_folds[i+1:]) X_test_cv = X_train_folds[i] y_train_cv = np.hstack(y_train_folds[:i]+y_train_folds[i+1:]) #size:4000 y_test_cv = y_train_folds[i] classifier.train(X_train_cv, y_train_cv) dists_cv = classifier.compute_distances_no_loops(X_test_cv) y_test_pred = classifier.predict_labels(dists_cv, k) num_correct = np.sum(y_test_pred == y_test_cv) accuracy = float(num_correct) / y_test_cv.shape[0] k_to_accuracies[k].append(accuracy)################################################################################# END OF YOUR CODE ################################################################################## Print out the computed accuraciesfor k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print('k = %d, accuracy = %f' % (k, accuracy))# plot the raw observationsfor k in k_choices: accuracies = k_to_accuracies[k] plt.scatter([k] * len(accuracies), accuracies)# plot the trend line with error bars that correspond to standard deviationaccuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())])accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())])plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)plt.title('Cross-validation on k')plt.xlabel('k')plt.ylabel('Cross-validation accuracy')plt.show()
testKNN.py
import numpy as npimport randomimport osimport syssys.path.append('cs231n.classifiers')from cs231n.classifiers.k_nearest_neighbor import KNearestNeighborfrom six.moves import cPickle as picklefrom scipy.misc import imreadfrom cs231n.data_utils import load_CIFAR10xtr,ytr,xte,yte=load_CIFAR10('data/cifar10')num_training = 5000mask = range(num_training)X_train = xtr[mask]y_train = ytr[mask]num_test = 500mask = range(num_test)X_test = xte[mask]y_test = yte[mask]# Reshape the image data into rowsX_train = X_train.reshape(X_train.shape[0], 32*32*3)X_test = X_test.reshape(X_test.shape[0],32*32*3)print X_train.shape, X_test.shapenum_folds = 5k_choices = [1, 5, 10, 15, 20, 50, 100]X_train_folds = []y_train_folds = []X_train_folds = np.array_split(X_train,num_folds,axis=0)y_train_folds = np.array_split(y_train,num_folds,axis=0)k_to_accuracies = {}nn = KNearestNeighbor()for k in k_choices: k_to_accuracies[k] = [] for j in xrange(num_folds): X_tr = np.array(X_train_folds[0:j]+X_train_folds[(j+1):]) X_tr = np.reshape(X_tr,(X_train.shape[0]*(num_folds-1)/num_folds,-1)) y_tr = np.array(y_train_folds[0:j]+y_train_folds[(j+1):]) y_tr = np.reshape(y_tr,(X_train.shape[0]*(num_folds-1)/num_folds)) X_te = np.array(X_train_folds[j]) X_te = np.reshape( X_te, (X_train.shape[0]/num_folds,-1)) y_te = np.array(y_train_folds[j]) y_te = np.reshape(y_te,(y_train.shape[0]/num_folds)) nn.train(X_tr,y_tr) yte_pred = nn.predict(X_te,k,1) num_correct = np.sum(y_te==yte_pred) num_test1 = np.shape(X_te)[0] accuracy = float(num_correct)/num_test1 k_to_accuracies[k].append(accuracy)for k in sorted(k_to_accuracies): for accuracy in k_to_accuracies[k]: print 'k = %d, accuracy = %f' % (k, accuracy)accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())])accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())])plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)plt.title('Cross-validation on k')plt.xlabel('k')plt.ylabel('Cross-validation accuracy')plt.show()plt.savefig("k.png")plt.clf()
二、softmax
同样是需要完成naive和vector的两种操作来比较速度。
import numpy as npdef softmax_loss_naive(W, X, y, reg): # Initialize the loss and gradient to zero. loss = 0.0 dW = np.zeros_like(W) # 得到一个和W同样shape的矩阵 dW_each = np.zeros_like(W) num_train, dim = X.shape num_class = W.shape[1] f = X.dot(W) # N by C # Considering the Numeric Stability f_max = np.reshape(np.max(f, axis=1), (num_train, 1)) # 找到最大值然后减去,这样是为了防止后面的操作会出现数值上的一些偏差 prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1, keepdims=True) # N by C y_trueClass = np.zeros_like(prob) y_trueClass[np.arange(num_train), y] = 1.0 for i in xrange(num_train): for j in xrange(num_class): loss += -(y_trueClass[i, j] * np.log(prob[i, j])) # 损失函数的公式L = -(1/N)∑i∑j1(k=yi)log(exp(fk)/∑j exp(fj)) + λR(W) dW_each[:, j] = -(y_trueClass[i, j] - prob[i, j]) * X[i, :]#梯度的公式 ∇Wk L = -(1/N)∑i xiT(pi,m-Pm) + 2λWk, where Pk = exp(fk)/∑j exp(fj dW += dW_each #这是把每个类的放在了一起 loss /= num_train loss += 0.5 * reg * np.sum(W * W) # 加上正则 dW /= num_traindW += reg * W return loss, dWdef softmax_loss_vectorized(W, X, y, reg): """ Softmax loss function, vectorized version. Inputs and outputs are the same as softmax_loss_naive. """ # Initialize the loss and gradient to zero. loss = 0.0 dW = np.zeros_like(W) # D by C num_train, dim = X.shape f = X.dot(W) # N by C # Considering the Numeric Stability f_max = np.reshape(np.max(f, axis=1), (num_train, 1)) # N by 1 prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1), keepdims=True) y_trueClass = np.zeros_like(prob) y_trueClass[range(num_train), y] = 1.0 # N by C loss += -np.sum(y_trueClass * np.log(prob)) / num_train + 0.5 * reg * np.sum(W * W)#向量化直接操作即可 dW += -np.dot(X.T, y_trueClass - prob) / num_train + reg * W return loss, dW
三、SVM
import numpy as npdef svm_loss_naive(W, X, y, reg): """ Inputs: - W: A numpy array of shape (D, C) containing weights. - X: A numpy array of shape (N, D) containing a minibatch of data. - y: A numpy array of shape (N,) containing training labels; y[i] = c means that X[i] has label c, where 0 <= c < C. - reg: (float) regularization strength Returns a tuple of: - loss as single float - gradient with respect to weights W; an array of same shape as W """ dW = np.zeros(W.shape) # initialize the gradient as zero # compute the loss and the gradient num_classes = W.shape[1] num_train = X.shape[0] loss = 0.0 for i in xrange(num_train): scores = X[i].dot(W) correct_class_score = scores[y[i]] for j in xrange(num_classes): if j == y[i]: #根据公式,正确的那个不用算 continue margin = scores[j] - correct_class_score + 1 # note delta = 1 if margin > 0: loss += margin dW[:, y[i]] += -X[i, :] # 根据公式:∇Wyi Li = - xiT(∑j≠yi1(xiWj - xiWyi +1>0)) + 2λWyi dW[:, j] += X[i, :] # 根据公式: ∇Wj Li = xiT 1(xiWj - xiWyi +1>0) + 2λWj , (j≠yi) # Right now the loss is a sum over all training examples, but we want it # to be an average instead so we divide by num_train. loss /= num_train dW /= num_train # Add regularization to the loss. loss += 0.5 * reg * np.sum(W * W) dW += reg * W return loss, dWdef svm_loss_vectorized(W, X, y, reg): """ Structured SVM loss function, vectorized implementation.Inputs and outputs are the same as svm_loss_naive. """ loss = 0.0 dW = np.zeros(W.shape) # initialize the gradient as zero scores = X.dot(W) # N by C num_train = X.shape[0] num_classes = W.shape[1] scores_correct = scores[np.arange(num_train), y] # 1 by N scores_correct = np.reshape(scores_correct, (num_train, 1)) # N by 1 margins = scores - scores_correct + 1.0 # N by C margins[np.arange(num_train), y] = 0.0 margins[margins <= 0] = 0.0 loss += np.sum(margins) / num_train loss += 0.5 * reg * np.sum(W * W) # compute the gradient margins[margins > 0] = 1.0 row_sum = np.sum(margins, axis=1) # 1 by N margins[np.arange(num_train), y] = -row_sum dW += np.dot(X.T, margins)/num_train + reg * W # D by C return loss, dW
SVM_test.py
#-*-coding:utf-8-*-import randomimport numpy as npimport matplotlib.pyplot as pltfrom cs231n.data_utils import load_CIFAR10# Load the raw CIFAR-10 data.cifar10_dir = 'data/cifar10'X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)'''# As a sanity check, we print out the size of the training and test data.print('Training data shape: ', X_train.shape)print('Training labels shape: ', y_train.shape)print('Test data shape: ', X_test.shape)print('Test labels shape: ', y_test.shape)''''''# Visualize some examples from the dataset.# We show a few examples of training images from each class.classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']num_classes = len(classes)samples_per_class = 7for y, cls in enumerate(classes): idxs = np.flatnonzero(y_train == y) idxs = np.random.choice(idxs, samples_per_class, replace=False) for i, idx in enumerate(idxs): plt_idx = i * num_classes + y + 1 plt.subplot(samples_per_class, num_classes, plt_idx) plt.imshow(X_train[idx].astype('uint8')) plt.axis('off') if i == 0: plt.title(cls)plt.show()'''# Split the data into train, val, and test sets. In addition we will# create a small development set as a subset of the training data;# we can use this for development so our code runs faster.num_training = 49000num_validation = 1000num_test = 1000num_dev = 500# Our validation set will be num_validation points from the original training set.mask = range(num_training, num_training + num_validation)X_val = X_train[mask]y_val = y_train[mask]# Our training set will be the first num_train points from the original training set.mask = range(num_training)X_train = X_train[mask]y_train = y_train[mask]# We will also make a development set, which is a small subset of the training set.mask = np.random.choice(num_training, num_dev, replace=False)X_dev = X_train[mask]y_dev = y_train[mask]# We use the first num_test points of the original test set as our test set.mask = range(num_test)X_test = X_test[mask]y_test = y_test[mask]'''print('Train data shape: ', X_train.shape)print('Train labels shape: ', y_train.shape)print('Validation data shape: ', X_val.shape)print('Validation labels shape: ', y_val.shape)print('Test data shape: ', X_test.shape)print('Test labels shape: ', y_test.shape)'''# Preprocessing: reshape the image data into rowsX_train = np.reshape(X_train, (X_train.shape[0], -1))X_val = np.reshape(X_val, (X_val.shape[0], -1))X_test = np.reshape(X_test, (X_test.shape[0], -1))X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))'''# As a sanity check, print out the shapes of the dataprint('Training data shape: ', X_train.shape)print('Validation data shape: ', X_val.shape)print('Test data shape: ', X_test.shape)print('dev data shape: ', X_dev.shape)'''# Preprocessing: subtract the mean image# first: compute the image mean based on the training datamean_image = np.mean(X_train, axis=0)'''print(mean_image[:10]) # print a few of the elementsplt.figure(figsize=(4,4))plt.imshow(mean_image.reshape((32,32,3)).astype('uint8')) # visualize the mean imageplt.show()'''# second: subtract the mean image from train and test dataX_train -= mean_imageX_val -= mean_imageX_test -= mean_imageX_dev -= mean_image# third: append the bias dimension of ones (i.e. bias trick) so that our SVM# only has to worry about optimizing a single weight matrix W.X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])#print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)# Evaluate the naive implementation of the loss we provided for you:from cs231n.classifiers.linear_svm import svm_loss_naiveimport time# generate a random SVM weight matrix of small numbersW = np.random.randn(3073, 10) * 0.0001loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.000005)print('loss: %f' % (loss, )) #输出:loss: 9.548658# Compute the loss and its gradient at W.loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)'''# Numerically compute the gradient along several randomly chosen dimensions, and# compare them with your analytically computed gradient. The numbers should match# almost exactly along all dimensions.from cs231n.gradient_check import grad_check_sparsef = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0]grad_numerical = grad_check_sparse(f, W, grad)# do the gradient check once again with regularization turned on# you didn't forget the regularization gradient did you?loss, grad = svm_loss_naive(W, X_dev, y_dev, 5e1)f = lambda w: svm_loss_naive(w, X_dev, y_dev, 5e1)[0]grad_numerical = grad_check_sparse(f, W, grad)''''''# Next implement the function svm_loss_vectorized; for now only compute the loss;# we will implement the gradient in a moment.tic = time.time()loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.000005)toc = time.time()print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic))from cs231n.classifiers.linear_svm import svm_loss_vectorizedtic = time.time()loss_vectorized, _ = svm_loss_vectorized(W, X_dev, y_dev, 0.000005)toc = time.time()print('Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))# The losses should match but your vectorized implementation should be much faster.print('difference: %f' % (loss_naive - loss_vectorized))''''''# Complete the implementation of svm_loss_vectorized, and compute the gradient# of the loss function in a vectorized way.# The naive implementation and the vectorized implementation should match, but# the vectorized version should still be much faster.tic = time.time()loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.000005)toc = time.time()print('Naive loss and gradient: computed in %fs' % (toc - tic))tic = time.time()#_, grad_vectorized = svm_loss_vectorized(W, X_dev, y_dev, 0.000005)toc = time.time()print('Vectorized loss and gradient: computed in %fs' % (toc - tic))# The loss is a single number, so it is easy to compare the values computed# by the two implementations. The gradient on the other hand is a matrix, so# we use the Frobenius norm to compare them.#difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')#print('difference: %f' % difference)'''# In the file linear_classifier.py, implement SGD in the function# LinearClassifier.train() and then run it with the code below.from cs231n.classifiers.linear_classifier import LinearSVMsvm = LinearSVM()tic = time.time()loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4, num_iters=1500, verbose=True)toc = time.time()print('That took %fs' % (toc - tic))'''# A useful debugging strategy is to plot the loss as a function of# iteration number:plt.plot(loss_hist)plt.xlabel('Iteration number')plt.ylabel('Loss value')plt.show()'''# Write the LinearSVM.predict function and evaluate the performance on both the# training and validation sety_train_pred = svm.predict(X_train)print('training accuracy: %f' % (np.mean(y_train == y_train_pred), ))y_val_pred = svm.predict(X_val)print('validation accuracy: %f' % (np.mean(y_val == y_val_pred), ))# Use the validation set to tune hyperparameters (regularization strength and# learning rate). You should experiment with different ranges for the learning# rates and regularization strengths; if you are careful you should be able to# get a classification accuracy of about 0.4 on the validation set.learning_rates = [1e-7, 5e-5]regularization_strengths = [2.5e4, 5e4]# results is dictionary mapping tuples of the form# (learning_rate, regularization_strength) to tuples of the form# (training_accuracy, validation_accuracy). The accuracy is simply the fraction# of data points that are correctly classified.results = {}best_val = -1 # The highest validation accuracy that we have seen so far.best_svm = None # The LinearSVM object that achieved the highest validation rate.iters= 1000for lr in learning_rates: for rs in regularization_strengths: svm = LinearSVM() svm.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters) y_train_pred = svm.predict(X_train) acc_train = np.mean(y_train == y_train_pred) y_val_pred = svm.predict(X_val) acc_val = np.mean(y_val == y_val_pred) results[(lr, rs)] = (acc_train, acc_val) if best_val < acc_val: best_val = acc_val best_svm = svm# Print out results.for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print 'lr %e reg %e train accuracy: %f val accuracy: %f' % ( lr, reg, train_accuracy, val_accuracy)print 'best validation accuracy achieved during cross-validation: %f' % best_val'''# Visualize the cross-validation resultsimport mathx_scatter = [math.log10(x[0]) for x in results]y_scatter = [math.log10(x[1]) for x in results]# plot training accuracymarker_size = 100colors = [results[x][0] for x in results]plt.subplot(2, 1, 1)plt.scatter(x_scatter, y_scatter, marker_size, c=colors)plt.colorbar()plt.xlabel('log learning rate')plt.ylabel('log regularization strength')plt.title('CIFAR-10 training accuracy')# plot validation accuracycolors = [results[x][1] for x in results] # default size of markers is 20plt.subplot(2, 1, 2)plt.scatter(x_scatter, y_scatter, marker_size, c=colors)plt.colorbar()plt.xlabel('log learning rate')plt.ylabel('log regularization strength')plt.title('CIFAR-10 validation accuracy')plt.show()'''# Evaluate the best svm on test sety_test_pred = best_svm.predict(X_test)test_accuracy = np.mean(y_test == y_test_pred)print('linear SVM on raw pixels final test set accuracy: %f' % test_accuracy)# Visualize the learned weights for each class.# Depending on your choice of learning rate and regularization strength, these may# or may not be nice to look at.w = best_svm.W[:-1, :] # strip out the biasw = w.reshape(32, 32, 3, 10)w_min, w_max = np.min(w), np.max(w)classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']for i in range(10): plt.subplot(2, 5, i + 1) # Rescale the weights to be between 0 and 255 wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min) plt.imshow(wimg.astype('uint8')) plt.axis('off') plt.title(classes[i])plt.show()
四、linear_classifier
从编程思路上来看,上面三个是不同的策略,确切的说是线性分类器的集中方法,所以我们用一个LinearClassifier类来调用他们。
from linear_svm import *from softmax import *class LinearClassifier(object): def __init__(self): self.W = None def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100, batch_size=200, verbose=True): #注意这里传递的参数设置 """ Train this linear classifier using stochastic gradient descent. Inputs: - X: A numpy array of shape (N, D) containing training data; there are N training samples each of dimension D. - y: A numpy array of shape (N,) containing training labels; y[i] = c means that X[i] has label 0 <= c < C for C classes. - learning_rate: (float) learning rate for optimization. - reg: (float) regularization strength. - num_iters: (integer) number of steps to take when optimizing - batch_size: (integer) number of training examples to use at each step. - verbose: (boolean) If true, print progress during optimization. Outputs: A list containing the value of the loss function at each training iteration. """ num_train, dim = X.shape # assume y takes values 0...K-1 where K is number of classes num_classes = np.max(y) + 1 if self.W is None: # lazily initialize W self.W = 0.001 * np.random.randn(dim, num_classes) # 初始化W # Run stochastic gradient descent(Mini-Batch) to optimize W loss_history = [] for it in xrange(num_iters): #每次随机取batch的数据来进行梯度下降 X_batch = None y_batch = None # Sampling with replacement is faster than sampling without replacement. sample_index = np.random.choice(num_train, batch_size, replace=False) X_batch = X[sample_index, :] # batch_size by D y_batch = y[sample_index] # 1 by batch_size # evaluate loss and gradient loss, grad = self.loss(X_batch, y_batch, reg) loss_history.append(loss) # perform parameter update self.W += -learning_rate * grad if verbose and it % 100 == 0: print 'Iteration %d / %d: loss %f' % (it, num_iters, loss) return loss_history def predict(self, X): """ Use the trained weights of this linear classifier to predict labels for data points. Inputs: - X: D x N array of training data. Each column is a D-dimensional point. Returns: - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional array of length N, and each element is an integer giving the predicted class. """ y_pred = np.zeros(X.shape[1]) # 1 by N X=X.T y_pred = np.argmax(X.dot(self.W), axis=0) #预测直接找到最后y最大的那个值 return y_pred def loss(self, X_batch, y_batch, reg): """ Compute the loss function and its derivative. Subclasses will override this. Inputs: - X_batch: A numpy array of shape (N, D) containing a minibatch of N data points; each point has dimension D. - y_batch: A numpy array of shape (N,) containing labels for the minibatch. - reg: (float) regularization strength. Returns: A tuple containing: - loss as a single float - gradient with respect to self.W; an array of the same shape as W """ passclass LinearSVM(LinearClassifier): """ A subclass that uses the Multiclass SVM loss function """ def loss(self, X_batch, y_batch, reg): return svm_loss_vectorized(self.W, X_batch, y_batch, reg)class Softmax(LinearClassifier): """ A subclass that uses the Softmax + Cross-entropy loss function """ def loss(self, X_batch, y_batch, reg): return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)
Softmax_test.py
#-*-coding:utf-8-*-from __future__ import print_functionimport randomimport numpy as npfrom cs231n.data_utils import load_CIFAR10import matplotlib.pyplot as plt#matplotlib inlineplt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plotsplt.rcParams['image.interpolation'] = 'nearest'plt.rcParams['image.cmap'] = 'gray'cifar10_dir = 'data/cifar10'X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)num_training=49000num_validation=1000num_test=1000num_dev=500mask = list(range(num_training,num_training+num_validation))x_val = X_train[mask]y_val = y_train[mask]mask = list(range(num_training))x_train = X_train[mask]y_train = y_train[mask]mask = list(range(num_test))x_test = X_test[mask]y_test = y_test[mask]mask = np.random.choice(num_training, num_dev, replace=False)x_dev = X_train[mask]y_dev = y_train[mask]#reshape the image date into rowsx_train = np.reshape(x_train,(x_train.shape[0],-1))x_test = np.reshape(x_test,(x_test.shape[0],-1))x_dev = np.reshape(x_dev,(x_dev.shape[0],-1))x_val = np.reshape(x_val,(x_val.shape[0],-1))# Normalize the data: subtract the mean imagemean_image = np.mean(x_train,axis=0)x_train -= mean_imagex_val -= mean_imagex_dev -= mean_imagex_test -= mean_image# add bias dimension and transform into columnsx_train = np.hstack([x_train,np.ones((x_train.shape[0],1))])x_test = np.hstack([x_test,np.ones((x_test.shape[0],1))])x_dev = np.hstack([x_dev,np.ones((x_dev.shape[0],1))])x_val = np.hstack([x_val,np.ones((x_val.shape[0],1))])from cs231n.classifiers.softmax import softmax_loss_naiveimport time# Generate a random softmax weight matrix and use it to compute the loss.w = np.random.randn(x_train.shape[1],10)*0.0001loss, grad = softmax_loss_naive(w, x_dev, y_dev, 0.0)'''# As we did for the SVM, use numeric gradient checking as a debugging tool.# The numeric gradient should be close to the analytic gradient.from cs231n.gradient_check import grad_check_sparsef = lambda w: softmax_loss_naive(w, x_dev, y_dev, 0.0)[0]grad_numerical = grad_check_sparse(f, w, grad, 10)# similar to SVM case, do another gradient check with regularizationloss, grad = softmax_loss_naive(w, x_dev, y_dev, 5e1)f = lambda w: softmax_loss_naive(w, x_dev, y_dev, 5e1)[0]grad_numerical = grad_check_sparse(f, w, grad, 10)''''''# Now that we have a naive implementation of the softmax loss function and its gradient,# implement a vectorized version in softmax_loss_vectorized.# The two versions should compute the same results, but the vectorized version should be# much faster.tic = time.time()loss_naive, grad_naive = softmax_loss_naive(w, x_dev, y_dev, 0.000005)toc = time.time()print('naive loss: %e computed in %fs' % (loss_naive, toc - tic))from cs231n.classifiers.softmax import softmax_loss_vectorizedtic = time.time()loss_vectorized, grad_vectorized = softmax_loss_vectorized(w, x_dev, y_dev, 0.000005)toc = time.time()print('vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))# As we did for the SVM, we use the Frobenius norm to compare the two versions# of the gradient.grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized))print('Gradient difference: %f' % grad_difference)'''# Use the validation set to tune hyperparameters (regularization strength and# learning rate). You should experiment with different ranges for the learning# rates and regularization strengths; if you are careful you should be able to# get a classification accuracy of over 0.35 on the validation set.from cs231n.classifiers import Softmaxresults = {}best_val = -1best_softmax = Nonelearning_rates = [1e-7, 5e-7]regularization_strengths = [2.5e4, 5e4]################################################################################# TODO: ## Use the validation set to set the learning rate and regularization strength. ## This should be identical to the validation that you did for the SVM; save ## the best trained softmax classifer in best_softmax. #################################################################################iters= 1000for lr in learning_rates: for rs in regularization_strengths: softmax = Softmax() softmax.train(x_train, y_train, learning_rate=lr, reg=rs, num_iters=iters) y_train_pred = softmax.predict(x_train) acc_train = np.mean(y_train == y_train_pred) y_val_pred = softmax.predict(x_val) acc_val = np.mean(y_val == y_val_pred) results[(lr, rs)] = (acc_train, acc_val) if best_val < acc_val: best_val = acc_val best_softmax = softmax################################################################################# END OF YOUR CODE ################################################################################## Print out results.for lr, reg in sorted(results): train_accuracy, val_accuracy = results[(lr, reg)] print('lr %e reg %e train accuracy: %f val accuracy: %f' % ( lr, reg, train_accuracy, val_accuracy))print('best validation accuracy achieved during cross-validation: %f' % best_val)# evaluate on test set# Evaluate the best softmax on test sety_test_pred = best_softmax.predict(x_test)test_accuracy = np.mean(y_test == y_test_pred)print('softmax on raw pixels final test set accuracy: %f' % (test_accuracy, ))# Visualize the learned weights for each classw = best_softmax.W[:-1, :] # strip out the biasw = w.reshape(32, 32, 3, 10)w_min, w_max = np.min(w), np.max(w)classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']for i in range(10): plt.subplot(2, 5, i + 1) # Rescale the weights to be between 0 and 255 wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min) plt.imshow(wimg.astype('uint8')) plt.axis('off') plt.title(classes[i])plt.show()
五、简单的两层神经网络
这里只是一个简单的神经网络的写法,在下次作业会有一个很好很强大的神经网络等我们去构造。
BP可以看这幅图来理解:
# -*- coding:utf-8 -*-from __future__ import print_functionimport numpy as npimport matplotlib.pyplot as pltfrom past.builtins import xrangeclass TwoLayerNet(object): """ A two-layer fully-connected neural network. The net has an input dimension of N, a hidden layer dimension of H, and performs classification over C classes. We train the network with a softmax loss function and L2 regularization on the weight matrices. The network uses a ReLU nonlinearity after the first fully connected layer. In other words, the network has the following architecture: input - fully connected layer - ReLU - fully connected layer - softmax The outputs of the second fully-connected layer are the scores for each class. """ def __init__(self, input_size, hidden_size, output_size, std=1e-4): """ Initialize the model. Weights are initialized to small random values and biases are initialized to zero. Weights and biases are stored in the variable self.params, which is a dictionary with the following keys: W1: First layer weights; has shape (D, H) b1: First layer biases; has shape (H,) W2: Second layer weights; has shape (H, C) b2: Second layer biases; has shape (C,) Inputs: - input_size: The dimension D of the input data. - hidden_size: The number of neurons H in the hidden layer. - output_size: The number of classes C. """ self.params = {} self.params['W1'] = std * np.random.randn(input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = std * np.random.randn(hidden_size, output_size) self.params['b2'] = np.zeros(output_size) def loss(self, X, y=None, reg=0.0): """ Compute the loss and gradients for a two layer fully connected neural network. Inputs: - X: Input data of shape (N, D). Each X[i] is a training sample. - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is an integer in the range 0 <= y[i] < C. This parameter is optional; if it is not passed then we only return scores, and if it is passed then we instead return the loss and gradients. - reg: Regularization strength. Returns: If y is None, return a matrix scores of shape (N, C) where scores[i, c] is the score for class c on input X[i]. If y is not None, instead return a tuple of: - loss: Loss (data loss and regularization loss) for this batch of training samples. - grads: Dictionary mapping parameter names to gradients of those parameters with respect to the loss function; has the same keys as self.params. """ # Unpack variables from the params dictionary W1, b1 = self.params['W1'], self.params['b1'] W2, b2 = self.params['W2'], self.params['b2'] N, D = X.shape # Compute the forward pass scores = None ############################################################################# # TODO: Perform the forward pass, computing the class scores for the input. # # Store the result in the scores variable, which should be an array of # # shape (N, C). # ############################################################################# # evaluate class scores, [N x K] hidden_layer = np.maximum(0, np.dot(X, W1) + b1) # ReLU activation scores = np.dot(hidden_layer, W2) + b2 ############################################################################# # END OF YOUR CODE # ############################################################################# # If the targets are not given then jump out, we're done if y is None: return scores # Compute the loss loss = None ############################################################################# # TODO: Finish the forward pass, and compute the loss. This should include # # both the data loss and L2 regularization for W1 and W2. Store the result # # in the variable loss, which should be a scalar. Use the Softmax # # classifier loss. # ############################################################################# # compute the class probabili。ties # scores -= np.max(scores, axis = 1)[:, np.newaxis] exp_scores = np.exp(scores - np.max(scores,axis=1,keepdims=True)) #exp_scores = np.exp(scores) probs = exp_scores/np.sum(exp_scores,axis=1,keepdims=True) correct_logprobs = np.log(probs[range(N), y])#find the num corrosd to y data_loss = -np.sum(correct_logprobs)/N reg_loss = 0.5*reg*(np.sum(W1*W1)+np.sum(W2*W2)) loss = data_loss + reg_loss ############################################################################# # END OF YOUR CODE # ############################################################################# # Backward pass: compute gradients grads = {} ############################################################################# # TODO: Compute the backward pass, computing the derivatives of the weights # # and biases. Store the results in the grads dictionary. For example, # # grads['W1'] should store the gradient on W1, and be a matrix of same size # ############################################################################# # compute the gradient on scores dscores = probs dscores[range(N), y] -= 1 dscores /= N # backpropate the gradient to the parameters # first backprop into parameters W2 and b2 dW2 = np.dot(hidden_layer.T, dscores) db2 = np.sum(dscores, axis=0, keepdims=False) # next backprop into hidden layer dhidden = np.dot(dscores, W2.T) #上一层的损失*该层的权重 # backprop the ReLU non-linearity dhidden[hidden_layer <= 0] = 0 # finally into W,b dW1 = np.dot(X.T, dhidden) db1 = np.sum(dhidden, axis=0, keepdims=False) # add regularization gradient contribution dW2 += reg * W2 dW1 += reg * W1 grads['W1'] = dW1 grads['W2'] = dW2 grads['b1'] = db1 grads['b2'] = db2 # print dW1.shape, dW2.shape, db1.shape, db2.shape ############################################################################# # END OF YOUR CODE # ############################################################################# return loss, grads def train(self, X, y, X_val, y_val, learning_rate=1e-3, learning_rate_decay=0.95, reg=5e-6, num_iters=100, batch_size=200, verbose=False): """ Train this neural network using stochastic gradient descent. Inputs: - X: A numpy array of shape (N, D) giving training data. - y: A numpy array f shape (N,) giving training labels; y[i] = c means that X[i] has label c, where 0 <= c < C. - X_val: A numpy array of shape (N_val, D) giving validation data. - y_val: A numpy array of shape (N_val,) giving validation labels. - learning_rate: Scalar giving learning rate for optimization. - learning_rate_decay: Scalar giving factor used to decay the learning rate after each epoch. - reg: Scalar giving regularization strength. - num_iters: Number of steps to take when optimizing. - batch_size: Number of training examples to use per step. - verbose: boolean; if true print progress during optimization. """ num_train = X.shape[0] iterations_per_epoch = max(num_train / batch_size, 1) # Use SGD to optimize the parameters in self.model loss_history = [] train_acc_history = [] val_acc_history = [] for it in xrange(num_iters): X_batch = None y_batch = None ######################################################################### # TODO: Create a random minibatch of training data and labels, storing # # them in X_batch and y_batch respectively. # ######################################################################### sample_index = np.random.choice(num_train, batch_size, replace=True) X_batch = X[sample_index, :] y_batch = y[sample_index] ######################################################################### # END OF YOUR CODE # ######################################################################### # Compute loss and gradients using the current minibatch loss, grads = self.loss(X_batch, y=y_batch, reg=reg) loss_history.append(loss) ######################################################################### # TODO: Use the gradients in the grads dictionary to update the # # parameters of the network (stored in the dictionary self.params) # # using stochastic gradient descent. You'll need to use the gradients # # stored in the grads dictionary defined above. # ######################################################################### dW1 = grads['W1'] dW2 = grads['W2'] db1 = grads['b1'] db2 = grads['b2'] self.params['W1'] -= learning_rate * dW1 self.params['W2'] -= learning_rate * dW2 self.params['b1'] -= learning_rate * db1 self.params['b2'] -= learning_rate * db2 ######################################################################### # END OF YOUR CODE # ######################################################################### if verbose and it % 100 == 0: print('iteration %d / %d: loss %f' % (it, num_iters, loss)) # Every epoch, check train and val accuracy and decay learning rate. if it % iterations_per_epoch == 0: # Check accuracy train_acc = (self.predict(X_batch) == y_batch).mean() val_acc = (self.predict(X_val) == y_val).mean() train_acc_history.append(train_acc) val_acc_history.append(val_acc) # Decay learning rate learning_rate *= learning_rate_decay return { 'loss_history': loss_history, 'train_acc_history': train_acc_history, 'val_acc_history': val_acc_history, } def predict(self, X): """ Use the trained weights of this two-layer network to predict labels for data points. For each data point we predict scores for each of the C classes, and assign each data point to the class with the highest score. Inputs: - X: A numpy array of shape (N, D) giving N D-dimensional data points to classify. Returns: - y_pred: A numpy array of shape (N,) giving predicted labels for each of the elements of X. For all i, y_pred[i] = c means that X[i] is predicted to have class c, where 0 <= c < C. """ y_pred = None ########################################################################### # TODO: Implement this function; it should be VERY simple! # ########################################################################### hidden_lay = np.maximum(0, np.dot(X, self.params['W1']) + self.params['b1']) y_pred = np.argmax(np.dot(hidden_lay, self.params['W2']), axis=1) ########################################################################### # END OF YOUR CODE # ########################################################################### return y_pred
TwoLayersNet_test.py
from __future__ import print_functionimport numpy as npimport matplotlib.pyplot as pltfrom cs231n.classifiers.neural_net import TwoLayerNet'''return ralatives error'''def rel_error(x,y): return np.max(np.abs(x-y)/(np.maximum(1e-8,np.abs(x)+np.abs(y))))'''#creat a small net and some toy data to check your implementationsinput_size = 4hidden_size = 10num_classes = 3num_inputs = 5def init_model(): np.random.seed(0) return TwoLayerNet(input_size, hidden_size, num_classes, std=1e-1)def init_data(): np.random.seed(1) x = 10*np.random.randn(num_inputs,input_size) #y = ([0, 1, 2, 2, 1]) y = np.array([0,1,2,2,1]) return x,yx,y = init_data()net = init_model()scores = net.loss(x)print (scores)print('correct scores:')correct_scores = np.asarray([ [-0.81233741, -1.27654624, -0.70335995], [-0.17129677, -1.18803311, -0.47310444], [-0.51590475, -1.01354314, -0.8504215 ], [-0.15419291, -0.48629638, -0.52901952], [-0.00618733, -0.12435261, -0.15226949]])print(correct_scores)print('Difference between your scores and correct scores:')print(np.sum(np.abs(scores - correct_scores)))loss, _ = net.loss(x, y, reg=0.05)correct_loss = 1.30378789133print ('loss:%f' % loss)# should be very small, we get < 1e-12print('Difference between your loss and correct loss:')print(np.sum(np.abs(loss - correct_loss)))from cs231n.gradient_check import eval_numerical_gradient# Use numeric gradient checking to check your implementation of the backward pass.# If your implementation is correct, the difference between the numeric and# analytic gradients should be less than 1e-8 for each of W1, W2, b1, and b2.loss, grads = net.loss(x, y, reg=0.05)# these should all be less than 1e-8 or sofor param_name in grads: f = lambda W: net.loss(x, y, reg=0.05)[0] param_grad_num = eval_numerical_gradient(f, net.params[param_name], verbose=False) print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))net = init_model()stats = net.train(x, y, x, y, learning_rate=1e-1, reg=5e-6, num_iters=100, batch_size=4,verbose=False)print('Final training loss: ', stats['loss_history'][-1])# plot the loss historyplt.plot(stats['loss_history'])plt.xlabel('iteration')plt.ylabel('training loss')plt.title('Training Loss history')plt.show()'''from cs231n.data_utils import load_CIFAR10def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the two-layer neural net classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = 'data/cifar10' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Reshape data to rows X_train = X_train.reshape(num_training, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) return X_train, y_train, X_val, y_val, X_test, y_test# Invoke the above function to get our data.x_train, y_train, x_val, y_val, x_test, y_test = get_CIFAR10_data()print('Train data shape: ', x_train.shape)print('Train labels shape: ', y_train.shape)print('Validation data shape: ', x_val.shape)print('Validation labels shape: ', y_val.shape)print('Test data shape: ', x_test.shape)print('Test labels shape: ', y_test.shape)input_size = 32*32*3hidden_size = 50num_classes = 10'''net = TwoLayerNet(input_size, hidden_size, num_classes)stats = net.train(x_train, y_train, x_val, y_val, learning_rate=1e-4, learning_rate_decay=0.95, reg=0.25, num_iters = 10000, batch_size=200,verbose= True)# Predict on the validation setval_acc = (net.predict(x_val) == y_val).mean()print('Validation accuracy: ', val_acc)# Plot the loss function and train / validation accuraciesplt.subplot(2, 1, 1)plt.plot(stats['loss_history'])plt.title('Loss history')plt.xlabel('Iteration')plt.ylabel('Loss')plt.subplot(2, 1, 2)plt.plot(stats['train_acc_history'], label='train')plt.plot(stats['val_acc_history'], label='val')plt.title('Classification accuracy history')plt.xlabel('Epoch')plt.ylabel('Clasification accuracy')plt.show()from cs231n.vis_utils import visualize_grid# Visualize the weights of the networkdef show_net_weights(net): W1 = net.params['W1'] W1 = W1.reshape(32, 32, 3, -1).transpose(3, 0, 1, 2) plt.imshow(visualize_grid(W1, padding=3).astype('uint8')) plt.gca().axis('off') plt.show()show_net_weights(net)'''best_net = None # store the best model into this \best_acc = 0hidden_size_choice = [x*100+50 for x in xrange(11)]reg_choice = [0.1, 0.5, 5, 15, 50, 100, 1000]learning_rate_choice = [1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 1e-1, 1]batch_size_choice = [8, 40, 80, 160, 500, 1000]#hidden_size_choice = [400]#learning_rate_choice = [3e-3]#reg_choice = [0.02, 0.05, 0.1]#batch_size_choice =[500]num_iters_choice = [5000]for batch_size_curr in batch_size_choice: for reg_cur in reg_choice: for learning_rate_curr in learning_rate_choice: for hidden_size_curr in hidden_size_choice: for num_iters_curr in num_iters_choice: print print ("current training hidden_size:",hidden_size_curr) print ("current training learning_rate:",learning_rate_curr) print ("current training reg:",reg_cur) print ("current training batch_size:",batch_size_curr) net = TwoLayerNet(input_size, hidden_size_curr, num_classes) stats = net.train(x_train, y_train, x_val, y_val, learning_rate=learning_rate_curr, learning_rate_decay=0.95,reg=reg_cur, num_iters=num_iters_curr, batch_size=batch_size_curr, verbose=True) val_acc = (net.predict(x_val) == y_val).mean() print ("current val_acc:%f" % val_acc) if val_acc>best_acc: best_acc = val_acc best_net = net best_stats = stats best_learning_rate = learning_rate_curr best_reg = reg_cur best_batch_size = batch_size_curr print print ("best_acc:",best_acc) print ("best hidden_size:",best_net.params['W1'].shape[1]) print ("best learning_rate:",best_learning_rate) print ("best reg:",best_reg) print ("best batch_size:",best_batch_size) print
阅读全文
0 0
- CS231n+assignment1(作业一)
- cs231n课程作业assignment1(KNN)
- cs231n课程作业assignment1(SVM)
- cs231n课程作业assignment1(Softmax)
- CS231n-assignment1(作业1)-knn
- CS231n-Assignment1(作业1)-SVM
- CS231n-Assignment1(作业1)-softmax
- cs231n assignment1
- CS231n-assignment1
- [CS231n@Stanford] Assignment1-Q1
- cs231n assignment1 tips
- cs231n:assignment1:KNN解答
- CS231n Assignment1--Q1
- CS231n Assignment1--Q2
- CS231n Assignment1--Q3
- CS231n Assignment1--Q4
- CS231n Assignment1--Q5
- cs231n assignment1--svm
- zookeeper单机伪集群配置
- [leetcode]128. Longest Consecutive Sequence
- java反射应用
- UVa 11059(枚举)
- windows 7安装CartoCss
- CS231n+assignment1(作业一)
- SpringMVC上传文件的简单代码实例
- 高效程序员的特征
- 《Keyword Search over RDF Graphs》——读书笔记
- Java实现-删除元素
- 用户体验总结
- 详解C#中的反射
- Java实现-删除链表中倒数第n个节点
- failed to find Build Tools revision 23.0.0 rc2 Install Build Tools 23.0.0 rc2 and sync project