60行代碼徒手實現(xiàn)深度神經(jīng)網(wǎng)絡(luò)
01
準備數(shù)據(jù)集
采用的數(shù)據(jù)集是sklearn中的breast cancer數(shù)據(jù)集,30維特征,569個樣本。訓(xùn)練前進行MinMax標準化縮放至[0,1]區(qū)間。按照75/25比例劃分成訓(xùn)練集和驗證集。
- # 獲取數(shù)據(jù)集
- import numpy as np
- import pandas as pd
- from sklearn import datasets
- from sklearn import preprocessing
- from sklearn.model_selection import train_test_split
- breast = datasets.load_breast_cancer()
- scaler = preprocessing.MinMaxScaler()
- data = scaler.fit_transform(breast['data'])
- target = breast['target']
- X_train,X_test,y_train,y_test = train_test_split(data,target)
02
模型結(jié)構(gòu)圖

03
正反傳播公式

04
NN實現(xiàn)代碼
- import numpy as np
- import pandas as pd
- #定義激活函數(shù)
- ReLu = lambda z:np.maximum(0.0,z)
- d_ReLu = lambda z:np.where(z<0,0,1)
- LeakyReLu = lambda z:np.maximum(0.01*z,z)
- d_LeakyReLu = lambda z:np.where(z<0,0.01,1)
- Sigmoid = lambda z:1/(1+np.exp(-z))
- d_Sigmoid = lambda z: Sigmoid(z)*(1-Sigmoid(z)) #d_Sigmoid = a(1-a)
- Tanh = np.tanh
- d_Tanh = lambda z:1 - Tanh(z)**2 #d_Tanh = 1 - a**2
- class NNClassifier(object):
- def __init__(self,n = [np.nan,5,5,1],alpha = 0.1,ITERNUM = 50000, gfunc = 'ReLu'):
- self.n = n #各層節(jié)點數(shù)
- self.gfunc = gfunc #隱藏層激活函數(shù)
- self.alpha,self.ITERNUM = alpha,ITERNUM
- self.dfJ = pd.DataFrame(data = np.zeros((ITERNUM,1)),columns = ['J'])
- self.W,self.b = np.nan,np.nan
- # 確定各層激活函數(shù)
- self.g = [eval(self.gfunc) for i in range(len(n))];
- self.g[-1] = Sigmoid;self.g[0] = np.nan
- # 確定隱藏層激活函數(shù)的導(dǎo)數(shù)
- self.d_gfunc = eval('d_' + self.gfunc)
- def fit(self,X_train,y_train):
- X,Y = X_train.T,y_train.reshape(1,-1)
- m = X.shape[1] #樣本個數(shù)
- n = self.n; n[0] = X.shape[0] # 各層節(jié)點數(shù)量
- # 節(jié)點值和參數(shù)初始化
- A = [np.zeros((ni,m)) for ni in n];A[0] = X #各層節(jié)點輸出值初始化
- Z = [np.zeros((ni,m)) for ni in n];Z[0] = np.nan #各層節(jié)點中間值初始化
- W = [np.nan] + [np.random.randn(n[i],n[i-1]) * 0.01 for i in range(1,len(n))] #各層系數(shù)參數(shù)
- b = [np.zeros((ni,1)) for ni in n];b[0] = np.nan #n各層偏置參數(shù)
- # 導(dǎo)數(shù)初始化
- dA = [np.zeros(Ai.shape) for Ai in A]
- dZ = [np.zeros(Ai.shape) for Ai in A]
- dW = [np.zeros(Wi.shape) if isinstance(Wi,np.ndarray) else np.nan for Wi in W]
- db = [np.zeros(bi.shape) if isinstance(bi,np.ndarray) else np.nan for bi in b]
- for k in range(self.ITERNUM):
- # ---------正向傳播 ----------
- for i in range(1,len(n)):
- Z[i] = np.dot(W[i],A[i-1]) + b[i]
- A[i] = self.g[i](Z[i])
- J = (1/m) * np.sum(- Y*np.log(A[len(n)-1]) -(1-Y)*np.log(1-A[len(n)-1]))
- self.dfJ.loc[k]['J']= J
- # ----------反向傳播 ---------
- hmax = len(n) - 1
- dA[hmax] = 1/m*(-Y/A[hmax] + (1-Y)/(1-A[hmax]))
- dZ[hmax] = 1/m*(A[hmax]-Y)
- dW[hmax] = np.dot(dZ[hmax],A[hmax-1].T)
- db[hmax] = np.dot(dZ[hmax],np.ones((m,1)))
- for i in range(len(n)-2,0,-1):
- dA[i] = np.dot(W[i+1].T,dZ[i+1])
- dZ[i] = dA[i]* self.d_gfunc(Z[i])
- dW[i] = np.dot(dZ[i],A[i-1].T)
- db[i] = np.dot(dZ[i],np.ones((m,1)))
- #-----------梯度下降 ---------
- for i in range(1,len(n)):
- W[i] = W[i] - self.alpha*dW[i]
- b[i] = b[i] - self.alpha*db[i]
- # 顯示進度
- if (k+1)%1000 == 0:
- print('progress rate:{}/{}'.format(k+1,self.ITERNUM),end = '\r')
- self.W,self.b = W,b
- def predict_prob(self,X_test):
- # ---------正向傳播 ----------
- W,b = self.W,self.b
- Ai = X_test.T
- for i in range(1,len(self.n)):
- Zi = np.dot(W[i],Ai) + b[i]
- Ai = self.g[i](Zi)
- return(Ai.reshape(-1))
- def predict(self,X_test):
- Y_prob = self.predict_prob(X_test)
- Y_test = Y_prob.copy()
- Y_test[Y_prob>=0.5] = 1
- Y_test[Y_prob< 0.5] = 0
- return(Y_test)
05
單隱層神經(jīng)網(wǎng)絡(luò)
設(shè)置1個隱藏層,隱藏層節(jié)點數(shù)為5,隱藏層使用Sigmoid激活函數(shù)。
- # 采用Sigmoid激活函數(shù)
- NN = NNClassifier(n = [np.nan,5,1],alpha = 0.02,
- ITERNUM = 200000, gfunc = 'Sigmoid')
- NN.fit(X_train,y_train)
- # 繪制目標函數(shù)迭代曲線
- %matplotlib inline
- NN.dfJ.plot(figsize = (12,8))
- # 測試在驗證集的auc得分
- from sklearn.metrics import roc_auc_score
- Y_prob = NN.predict_prob(X_test)
- roc_auc_score(list(y_test),list(Y_prob))

隱藏層使用Tanh激活函數(shù)。
- # 采用 Tanh激活函數(shù)
- NN = NNClassifier(n = [np.nan,5,1],alpha = 0.02,
- ITERNUM = 200000, gfunc = 'Tanh')
- NN.fit(X_train,y_train)
- # 繪制目標函數(shù)迭代曲線
- %matplotlib inline
- NN.dfJ.plot(figsize = (12,8))
- # 測試在驗證集的auc得分
- from sklearn.metrics import roc_auc_score
- Y_prob = NN.predict_prob(X_test)
- roc_auc_score(list(y_test),list(Y_prob))

隱藏層使用ReLu激活函數(shù)。
- # 采用 ReLu激活函數(shù)
- NN = NNClassifier(n = [np.nan,5,1],alpha = 0.02,
- ITERNUM = 200000, gfunc = 'ReLu')
- NN.fit(X_train,y_train)
- # 繪制目標函數(shù)迭代曲線
- %matplotlib inline
- NN.dfJ.plot(figsize = (12,8))
- # 測試在驗證集的auc得分
- from sklearn.metrics import roc_auc_score
- Y_prob = NN.predict_prob(X_test)
- roc_auc_score(list(y_test),list(Y_prob))

隱藏層使用LeakyReLu激活函數(shù)。
- # 采用 LeakyReLu激活函數(shù)
- NN = NNClassifier(n = [np.nan,5,1],alpha = 0.02,
- ITERNUM = 200000, gfunc = 'LeakyReLu')
- NN.fit(X_train,y_train)
- # 繪制目標函數(shù)迭代曲線
- %matplotlib inline
- NN.dfJ.plot(figsize = (12,8))
- # 測試在驗證集的auc得分
- from sklearn.metrics import roc_auc_score
- Y_prob = NN.predict_prob(X_test)
- roc_auc_score(list(y_test),list(Y_prob))

以上試驗似乎表明,在當(dāng)前的數(shù)據(jù)集上,隱藏層采用ReLu激活函數(shù)是一個最好的選擇,AUC最高得分為0.99958。
06
雙隱層神經(jīng)網(wǎng)絡(luò)
設(shè)置2個隱藏層,隱藏層節(jié)點數(shù)都為5,隱藏層都使用ReLu激活函數(shù)。
- # 設(shè)置兩個隱藏層,采用ReLu激活函數(shù)
- NN = NNClassifier(n = [np.nan,5,5,1],alpha = 0.02,
- ITERNUM = 200000, gfunc = 'ReLu')
- NN.fit(X_train,y_train)
- # 繪制目標函數(shù)迭代曲線
- %matplotlib inline
- NN.dfJ.plot(figsize = (12,8))
- # 測試在驗證集的auc得分
- from sklearn.metrics import roc_auc_score
- Y_prob = NN.predict_prob(X_test)
- roc_auc_score(list(y_test),list(Y_prob))

AUC得分0.99874比采用單隱藏層的最優(yōu)得分0.99958有所降低,可能是模型復(fù)雜度過高,我們嘗試減少隱藏層節(jié)點的個數(shù)至3以降低模型復(fù)雜度。
- # 雙隱藏層,隱藏層節(jié)點數(shù)為3
- NN = NNClassifier(n = [np.nan,3,3,1],alpha = 0.02,
- ITERNUM = 200000, gfunc = 'ReLu')
- NN.fit(X_train,y_train)
- # 繪制目標函數(shù)迭代曲線
- %matplotlib inline
- NN.dfJ.plot(figsize = (12,8))
- # 測試在驗證集的auc得分
- from sklearn.metrics import roc_auc_score
- Y_prob = NN.predict_prob(X_test)
- roc_auc_score(list(y_test),list(Y_prob))

AUC得分0.99979,又有所提高。
和sklearn中自帶的神經(jīng)網(wǎng)絡(luò)分類器進行對比。
- # 和sklearn中的模型對比
- from sklearn.neural_network import MLPClassifier
- # 第一隱藏層神經(jīng)元個數(shù)為3,第二隱藏層神經(jīng)元個數(shù)為3
- MLPClf = MLPClassifier(hidden_layer_sizes=(3,3),max_iter=200000,activation='relu')
- MLPClf.fit(X_train,y_train)
- # 繪制目標函數(shù)迭代曲線
- dfJ = pd.DataFrame(data = np.array(MLPClf.loss_curve_),columns = ['J'])
- dfJ.plot(figsize = (12,8))
- # 測試在驗證集的auc得分
- from sklearn.metrics import roc_auc_score
- Y_prob = MLPClf.predict_proba(X_test)[:,1]
- roc_auc_score(list(y_test),list(Y_prob))

以上試驗表明,針對當(dāng)前數(shù)據(jù)數(shù)據(jù)集,選擇ReLu激活函數(shù),采用雙隱藏層,每個隱藏層節(jié)點數(shù)設(shè)置為3是一個不錯的選擇,AUC得分為0.99979。該得分高于采用CV交叉驗證優(yōu)化超參數(shù)后的邏輯回歸模型的0.99897的AUC得分。