从头到尾用Python实现一个深度神经网络

来源:互联网 发布:网络赌钱平台怎么做 编辑:程序博客网 时间:2024/05/21 16:19
from sklearn.datasets import make_classificationfrom sklearn import preprocessingimport numpy as npimport mathfrom matplotlib import pyplot as pltfrom copy import deepcopydef ReLu(X):    return X*(X>0)def dReLu(X):    return 1.*(X>0)def Sigmod(X):    return 1.0/(1.0+np.exp(-X))def dSigmod(X):    return Sigmod(X)*(1-Sigmod(X))X, Y = make_classification(n_features=2, n_redundant=0, n_informative=2,                             n_clusters_per_class=1, n_samples=6000)X_train = X[0:int(X.shape[0]*0.7),:]Y_train = Y[0:int(X.shape[0]*0.7)]X_test = X[int(X.shape[0]*0.7):,:]Y_test = Y[int(X.shape[0]*0.7):]scaler = preprocessing.StandardScaler().fit(X_train)X_train = scaler.transform(X_train)X_test = scaler.transform(X_test)X_train = X_train.T  # n*m 特征数*样本数Y_train = Y_train.reshape(1,Y_train.shape[0])X_test = X_test.T  # n*mY_test = Y_test.reshape(1,Y_test.shape[0])X = X.T# 构造神经网络# superparamm = X_train.shape[1]n = [X_train.shape[0],8,4,2,1]  # 各层神经元数量, 包含第一层输入层f = [0, ReLu, ReLu, ReLu, Sigmod]  # 各隐层激活函数, 第一个0为占位df = [0, dReLu, dReLu, dReLu, dSigmod]  # 各隐层激活函数求导, 第一个0为占位# n = [X_train.shape[0],1]  # 各层神经元数量, 包含第一层输入层# f = [0, Sigmod]  # 各隐层激活函数, 第一个0为占位# df = [0, dSigmod]  # 各隐层激活函数求导, 第一个0为占位layers = len(n)-1# paramW = [0 for i in range(len(n))]  # 第一个0补位,为了让W[i]对应第i层参数b = [0 for i in range(len(n))]for l in range(1, len(n)):    W[l] = np.random.randn(n[l],n[l-1])*np.sqrt(2.0/n[l-1])  # 为了避免梯度消失和梯度爆炸问题*np.sqrt(1.0/n[l-1]),对于ReLu2.0更好    b[l] = np.random.randn(n[l],1)#构造中间值Z = [0 for i in range(len(n))]A = [0 for i in range(len(n))]dZ = [0 for i in range(len(n))]dA = [0 for i in range(len(n))]dW = [0 for i in range(len(n))]db = [0 for i in range(len(n))]# 迭代# super paramrate = 0.01iteration = 5000lambd = 0.01# for graphloss_train = []loss_test = []accuracy_train = 0accuracy_test = 0# if debug, grad checkdebug = Falseepsilon  = 0.00001for i in range(iteration):    i += 1    # forward    Z[0] = X_train    A[0] = X_train    for l in range(1, len(n)):        Z[l] = np.dot(W[l], A[l-1]) + b[l]        A[l] = f[l](Z[l])      assert(A[layers].shape == (n[layers],m))    assert(True not in (A[layers]<0)[:])    l2_norm = sum([np.sum(w**2) for w in W])*lambd/(2.0*m)    J_train = -(np.dot(np.log(A[layers]),Y_train.T)+np.dot(np.log(1-A[layers]),(1-Y_train).T))/m + l2_norm  # add l2_norm , it only affect dW[l]    # predict train    Y_pred = 1*(A[layers]>0.5)    accuracy_train = (Y_pred == Y_train).mean()#     print(J)    # backward    dA[layers] = -Y_train/A[layers] + (1-Y_train)/(1-A[layers])  # end layer    for l in range(len(n)-1, 0, -1):        dZ[l] = dA[l]*df[l](Z[l])  # after calcute, it is  dZ2 = A2-Y_train        assert(dZ[l].shape == Z[l].shape)        dW[l] = np.dot(dZ[l], A[l-1].T)/m +lambd*W[l]/m        assert(dW[l].shape == W[l].shape)        db[l] = np.sum(dZ[l], axis=1, keepdims=True)/m        assert(db[l].shape == b[l].shape)        dA[l-1] = np.dot(W[l].T,dZ[l])    # grad check     if debug:        W_big = deepcopy(W)        b_big = deepcopy(b)        W_small = deepcopy(W)        b_small = deepcopy(b)        dW_diff = deepcopy(W)        db_diff = deepcopy(b)        Z_big = deepcopy(Z)        A_big = deepcopy(A)        Z_small = deepcopy(Z)        A_small = deepcopy(A)        # flatten to vector        theta = np.array([])        dtheta = np.array([])  # store dW db for check        for l in range(1, len(n)):            theta = np.concatenate([theta,W[l].flatten()])            theta = np.concatenate([theta,b[l].flatten()])            dtheta = np.concatenate([dtheta,dW[l].flatten()])            dtheta = np.concatenate([dtheta,db[l].flatten()])        # calculate every theta        dtheta_debug = np.zeros(dtheta.shape)        for t in range(len(theta)):            # add or minus a little bit            theta_big = theta.copy()            theta_small = theta.copy()            theta_big[t] = theta[t] + epsilon             theta_small[t] = theta[t] - epsilon             node_cnt = 0            # resore big and mall of W b            for l in range(1, len(n)):                W_big[l] = theta_big[node_cnt:node_cnt+n[l]*n[l-1]].reshape((n[l],n[l-1]))                W_small[l] = theta_small[node_cnt:node_cnt+n[l]*n[l-1]].reshape((n[l],n[l-1]))                node_cnt = node_cnt+n[l]*n[l-1]                b_big[l] = theta_big[node_cnt:node_cnt+n[l]*1].reshape((n[l],1))                b_small[l] = theta_small[node_cnt:node_cnt+n[l]*1].reshape((n[l],1))                node_cnt = node_cnt+n[l]*1            # forward            Z_big[0] = X_train            A_big[0] = X_train            Z_small[0] = X_train            A_small[0] = X_train            for l in range(1, len(n)):                Z_big[l] = np.dot(W_big[l], A_big[l-1]) + b_big[l]                A_big[l] = f[l](Z_big[l])                Z_small[l] = np.dot(W_small[l], A_small[l-1]) + b_small[l]                A_small[l] = f[l](Z_small[l])            l2_norm_big = sum([np.sum(w**2) for w in W_big])*lambd/(2.0*m)            J_train_big = -(np.dot(np.log(A_big[layers]),Y_train.T)+np.dot(np.log(1-A_big[layers]),(1-Y_train).T))/m + l2_norm_big            l2_norm_small = sum([np.sum(w**2) for w in W_small])*lambd/(2.0*m)            J_train_small = -(np.dot(np.log(A_small[layers]),Y_train.T)+np.dot(np.log(1-A_small[layers]),(1-Y_train).T))/m + l2_norm_small            dtheta_debug[t] = (J_train_big-J_train_small)/(2.0*epsilon )        d_diff = dtheta - dtheta_debug        node_cnt = 0        # restore to dw and db        for l in range(1, len(n)):            dW_diff[l] = d_diff[node_cnt:node_cnt+n[l]*n[l-1]].reshape((n[l],n[l-1]))            node_cnt = node_cnt+n[l]*n[l-1]            db_diff[l] = d_diff[node_cnt:node_cnt+n[l]*1].reshape((n[l],1))            node_cnt = node_cnt+n[l]*1        grad_diff = np.sqrt(np.sum((dtheta-dtheta_debug)**2))/(np.sqrt(np.sum(dtheta**2))+np.sqrt(np.sum(dtheta_debug**2)))#         print("dtheta diff: %f" % grad_diff)    # gradient    for l in range(len(n)-1, 0, -1):                     W[l] -= rate*dW[l]        b[l] -= rate*db[l]#     print("Iteration %d Loss: %lf" % (i, J))    # predict        A_tmp = X_test    for l in range(1, len(n)):        Z_tmp = np.dot(W[l], A_tmp) + b[l]        A_tmp = f[l](Z_tmp)    J_test = -(np.dot(np.log(A_tmp),Y_test.T)+np.dot(np.log(1-A_tmp),(1-Y_test).T))/X_test.shape[1]    Y_pred = 1*(A_tmp>0.5)    accuracy_test = (Y_pred == Y_test).mean()    # save loss    loss_train.append(J_train[0][0])    loss_test.append(J_test[0][0])# final accuracyprint("accuracy_train: %lf" % accuracy_train)print("accuracy_test: %lf" % accuracy_test)plt.figure(num=0, figsize=(6, 8), dpi=80, facecolor='w', edgecolor='k')plt.plot(range(iteration), loss_train, c="blue")plt.plot(range(iteration), loss_test, c="red")plt.show()plt.figure(num=None, figsize=(6, 8), dpi=80, facecolor='w', edgecolor='k')plt.scatter(X[0], X[1], marker='o', c=Y, s=5, edgecolor='k')plt.show()
阅读全文
0 0
原创粉丝点击