简单的深度学习其实就是多层的神经网络。接下使用代码实现简单的深度神经网络。
#输入的layer_num是一个list[]
def initialize_parameters(layer_num):
l = len(layer_num) #计算有几层,,把输入也看作一层
w = {} #创造两个dict
b = {}
for i in range(1,l):
w[str(i)] = np.random.randn(layer_num[i],layer_num[i-1])*0.01
b[str(i)] = np.zeros((layer_num[i],1)) #这里需要两个括号,不然不识别
return w,b
def linear_activation(A,w,b):
Z = np.dot(w,A) + b
A = sigmoid(Z)
return Z,A
def forward_pass(X,w,b,layer_num):
cachez = {}
cachea = {}
cachea[str(0)] = X
l = len(layer_num)
for i in range(1,l):
cachez[str(i)],cachea[str(i)] = linear_activation(cachea[str(i-1)],w[str(i)],b[str(i)])
A = cachea[str(i)]
return A,cachez,cachea
def calculate_cost(A,Y):
m = A.shape[0] #计算Y一维的长度
cost = (1.0/m)*((-np.dot(Y,np.log(A).T))-np.dot(1-Y,np.log(1-A).T))
cost = np.squeeze(cost) #去除秩为1的
return cost
def back_propogation(Y,A,Z,a,cachea):
l = len(Z)
dZ={}
dZ[str(l-1)] = A - Y
m = A.shape[0]
for i in range(l-1,0,-1):
dw = (1.0/m)*(np.dot(dZ[str(i)],cachea[str(i-1)].T))
db = (1.0/m)*(np.sum(dZ[str(i)],axis=1,keepdims=True))
w[str(i)] -= a*dw
b[str(i)] -= a*db
dA = np.dot(w[str(i)].T,dZ[str(i)])
dZ[str(i)] = np.multiply(dA,sigmoid_derivative(Z[str(i-1)]))
return w,b
def train(X,Y,layer_num,learn_rate=0.2,repeat=10):
np.random.seed(5) #给定随机数
a = 0.2
w,b = initialize_parameters(layer_num)
for i in range(1,repeat):
A,Z,cachea = forward_pass(X,w,b,layer_num)
cost = calculate_cost(A,Y)
back_propogation(Y,A,Z,a,cachea)
我们可以实验一下,训练一个。
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([0, 1])
layer_num=[4,3,6,1]
train(X,Y,layer_num)
之后可以def一个类来专门输入test数据。
这里可以发现矩阵的行列数字会成为编程中的关键因素。所以shape语句会很重要,但是在练习中没有都使用。
Z.shape == (W.shape[0], A.shape[1])
网友评论