从头到尾用Python实现一个深度神经网络
来源:互联网 发布:网络赌钱平台怎么做 编辑:程序博客网 时间:2024/05/21 16:19
from sklearn.datasets import make_classificationfrom sklearn import preprocessingimport numpy as npimport mathfrom matplotlib import pyplot as pltfrom copy import deepcopydef ReLu(X): return X*(X>0)def dReLu(X): return 1.*(X>0)def Sigmod(X): return 1.0/(1.0+np.exp(-X))def dSigmod(X): return Sigmod(X)*(1-Sigmod(X))X, Y = make_classification(n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, n_samples=6000)X_train = X[0:int(X.shape[0]*0.7),:]Y_train = Y[0:int(X.shape[0]*0.7)]X_test = X[int(X.shape[0]*0.7):,:]Y_test = Y[int(X.shape[0]*0.7):]scaler = preprocessing.StandardScaler().fit(X_train)X_train = scaler.transform(X_train)X_test = scaler.transform(X_test)X_train = X_train.T # n*m 特征数*样本数Y_train = Y_train.reshape(1,Y_train.shape[0])X_test = X_test.T # n*mY_test = Y_test.reshape(1,Y_test.shape[0])X = X.T# 构造神经网络# superparamm = X_train.shape[1]n = [X_train.shape[0],8,4,2,1] # 各层神经元数量, 包含第一层输入层f = [0, ReLu, ReLu, ReLu, Sigmod] # 各隐层激活函数, 第一个0为占位df = [0, dReLu, dReLu, dReLu, dSigmod] # 各隐层激活函数求导, 第一个0为占位# n = [X_train.shape[0],1] # 各层神经元数量, 包含第一层输入层# f = [0, Sigmod] # 各隐层激活函数, 第一个0为占位# df = [0, dSigmod] # 各隐层激活函数求导, 第一个0为占位layers = len(n)-1# paramW = [0 for i in range(len(n))] # 第一个0补位,为了让W[i]对应第i层参数b = [0 for i in range(len(n))]for l in range(1, len(n)): W[l] = np.random.randn(n[l],n[l-1])*np.sqrt(2.0/n[l-1]) # 为了避免梯度消失和梯度爆炸问题*np.sqrt(1.0/n[l-1]),对于ReLu2.0更好 b[l] = np.random.randn(n[l],1)#构造中间值Z = [0 for i in range(len(n))]A = [0 for i in range(len(n))]dZ = [0 for i in range(len(n))]dA = [0 for i in range(len(n))]dW = [0 for i in range(len(n))]db = [0 for i in range(len(n))]# 迭代# super paramrate = 0.01iteration = 5000lambd = 0.01# for graphloss_train = []loss_test = []accuracy_train = 0accuracy_test = 0# if debug, grad checkdebug = Falseepsilon = 0.00001for i in range(iteration): i += 1 # forward Z[0] = X_train A[0] = X_train for l in range(1, len(n)): Z[l] = np.dot(W[l], A[l-1]) + b[l] A[l] = f[l](Z[l]) assert(A[layers].shape == (n[layers],m)) assert(True not in (A[layers]<0)[:]) l2_norm = sum([np.sum(w**2) for w in W])*lambd/(2.0*m) J_train = -(np.dot(np.log(A[layers]),Y_train.T)+np.dot(np.log(1-A[layers]),(1-Y_train).T))/m + l2_norm # add l2_norm , it only affect dW[l] # predict train Y_pred = 1*(A[layers]>0.5) accuracy_train = (Y_pred == Y_train).mean()# print(J) # backward dA[layers] = -Y_train/A[layers] + (1-Y_train)/(1-A[layers]) # end layer for l in range(len(n)-1, 0, -1): dZ[l] = dA[l]*df[l](Z[l]) # after calcute, it is dZ2 = A2-Y_train assert(dZ[l].shape == Z[l].shape) dW[l] = np.dot(dZ[l], A[l-1].T)/m +lambd*W[l]/m assert(dW[l].shape == W[l].shape) db[l] = np.sum(dZ[l], axis=1, keepdims=True)/m assert(db[l].shape == b[l].shape) dA[l-1] = np.dot(W[l].T,dZ[l]) # grad check if debug: W_big = deepcopy(W) b_big = deepcopy(b) W_small = deepcopy(W) b_small = deepcopy(b) dW_diff = deepcopy(W) db_diff = deepcopy(b) Z_big = deepcopy(Z) A_big = deepcopy(A) Z_small = deepcopy(Z) A_small = deepcopy(A) # flatten to vector theta = np.array([]) dtheta = np.array([]) # store dW db for check for l in range(1, len(n)): theta = np.concatenate([theta,W[l].flatten()]) theta = np.concatenate([theta,b[l].flatten()]) dtheta = np.concatenate([dtheta,dW[l].flatten()]) dtheta = np.concatenate([dtheta,db[l].flatten()]) # calculate every theta dtheta_debug = np.zeros(dtheta.shape) for t in range(len(theta)): # add or minus a little bit theta_big = theta.copy() theta_small = theta.copy() theta_big[t] = theta[t] + epsilon theta_small[t] = theta[t] - epsilon node_cnt = 0 # resore big and mall of W b for l in range(1, len(n)): W_big[l] = theta_big[node_cnt:node_cnt+n[l]*n[l-1]].reshape((n[l],n[l-1])) W_small[l] = theta_small[node_cnt:node_cnt+n[l]*n[l-1]].reshape((n[l],n[l-1])) node_cnt = node_cnt+n[l]*n[l-1] b_big[l] = theta_big[node_cnt:node_cnt+n[l]*1].reshape((n[l],1)) b_small[l] = theta_small[node_cnt:node_cnt+n[l]*1].reshape((n[l],1)) node_cnt = node_cnt+n[l]*1 # forward Z_big[0] = X_train A_big[0] = X_train Z_small[0] = X_train A_small[0] = X_train for l in range(1, len(n)): Z_big[l] = np.dot(W_big[l], A_big[l-1]) + b_big[l] A_big[l] = f[l](Z_big[l]) Z_small[l] = np.dot(W_small[l], A_small[l-1]) + b_small[l] A_small[l] = f[l](Z_small[l]) l2_norm_big = sum([np.sum(w**2) for w in W_big])*lambd/(2.0*m) J_train_big = -(np.dot(np.log(A_big[layers]),Y_train.T)+np.dot(np.log(1-A_big[layers]),(1-Y_train).T))/m + l2_norm_big l2_norm_small = sum([np.sum(w**2) for w in W_small])*lambd/(2.0*m) J_train_small = -(np.dot(np.log(A_small[layers]),Y_train.T)+np.dot(np.log(1-A_small[layers]),(1-Y_train).T))/m + l2_norm_small dtheta_debug[t] = (J_train_big-J_train_small)/(2.0*epsilon ) d_diff = dtheta - dtheta_debug node_cnt = 0 # restore to dw and db for l in range(1, len(n)): dW_diff[l] = d_diff[node_cnt:node_cnt+n[l]*n[l-1]].reshape((n[l],n[l-1])) node_cnt = node_cnt+n[l]*n[l-1] db_diff[l] = d_diff[node_cnt:node_cnt+n[l]*1].reshape((n[l],1)) node_cnt = node_cnt+n[l]*1 grad_diff = np.sqrt(np.sum((dtheta-dtheta_debug)**2))/(np.sqrt(np.sum(dtheta**2))+np.sqrt(np.sum(dtheta_debug**2)))# print("dtheta diff: %f" % grad_diff) # gradient for l in range(len(n)-1, 0, -1): W[l] -= rate*dW[l] b[l] -= rate*db[l]# print("Iteration %d Loss: %lf" % (i, J)) # predict A_tmp = X_test for l in range(1, len(n)): Z_tmp = np.dot(W[l], A_tmp) + b[l] A_tmp = f[l](Z_tmp) J_test = -(np.dot(np.log(A_tmp),Y_test.T)+np.dot(np.log(1-A_tmp),(1-Y_test).T))/X_test.shape[1] Y_pred = 1*(A_tmp>0.5) accuracy_test = (Y_pred == Y_test).mean() # save loss loss_train.append(J_train[0][0]) loss_test.append(J_test[0][0])# final accuracyprint("accuracy_train: %lf" % accuracy_train)print("accuracy_test: %lf" % accuracy_test)plt.figure(num=0, figsize=(6, 8), dpi=80, facecolor='w', edgecolor='k')plt.plot(range(iteration), loss_train, c="blue")plt.plot(range(iteration), loss_test, c="red")plt.show()plt.figure(num=None, figsize=(6, 8), dpi=80, facecolor='w', edgecolor='k')plt.scatter(X[0], X[1], marker='o', c=Y, s=5, edgecolor='k')plt.show()
阅读全文
0 0
- 从头到尾用Python实现一个深度神经网络
- 用python实现一个神经网络
- 使用python实现深度神经网络
- 使用python实现深度神经网络 1
- 使用python实现深度神经网络 2
- 使用python实现深度神经网络 3
- 使用python实现深度神经网络 4
- 使用python实现深度神经网络 5
- 使用python实现深度神经网络--学习笔记
- 深度学习5:python实现三层神经网络
- 从头到尾实现一个用TP开发的个人站点【一】
- 从头到尾实现一个用TP开发的个人站点【二】
- 从头到尾实现一个用TP开发的个人站点【三】
- 深度学习与神经网络-吴恩达(Part1Week4)-深度神经网络编程实现(python)-基础篇
- 基于python实现一个简单的神经网络
- 深度学习第一课 第四周 深层神经网络用python的实现
- 【深度学习】1.2:简单神经网络的python实现
- 深度学习论文-神经网络的代码实现(python版本)
- Bitmaps与优化|磁盘缓存|内存缓存|双缓存
- b树和hash的应用场景
- nyoj 37 回文字符串(最小的添加字符)
- 9.15-9.16
- 如何理解js框架与js类库的不同?
- 从头到尾用Python实现一个深度神经网络
- 插入数据时返回主键
- 深度学习DeepLearning.ai系列课程学习总结:10. 初始化、正则化、梯度检查实战
- 剑指Offer--9.变态跳台阶
- 直观理解机器学习中的偏差和方差
- Java笔记之final修饰符
- SylixOS 之epoll异常分析
- mui中table的假分页
- js360导航拖住效果~