吴恩达深度学习第一课第四周作业L层结果收敛在0.64的问题-SofaSofa

最近在学吴恩达深度学习，但是碰到一些问题，想请教大神。我学到了第一课第四周，我的程序是我参照吴老师的程序自己写了一遍，但是我遇到了问题。在双层网络时，程序结果和吴老师的一模一样，但是L层网络时，我程序代价函数的值收敛在0.64处，检查了两天都没有发现错误在哪里，所以想在这里问问，希望大家能告诉我在哪里错了。非常感谢

原版资源可以在下面这个链接找到

https://github.com/robbertliu/deeplearning.ai-andrewNG

以下是我的python程序：

import numpy as np
import h5py
import matplotlib.pyplot as plt
from dnn_app_utils_v2 import *

def Sigmoid(z):
    return 1 / (1 + np.exp(- z))

def ReLU(z):
    return np.maximum(0, z)

def SigmoidBackward(z):
    a = 1 / (1 + np.exp(- z))
    dz = a * (1 - a)
    return dz

def ReLUBackward(z):
    dz = np.ones(z.shape)
    dz[z < 0] = 0
    return dz

def InitializeParameters(dims):
    np.random.seed(1)
    parameters = {}
    L = len(dims) - 1
    for l in range(1, L + 1):
        parameters['W' + str(l)] = np.random.randn(dims[l], dims[l - 1]) * 0.01
        parameters['b' + str(l)] = np.zeros((dims[l], 1))
    return parameters

def LinearForward(A_pre,W,b):
    Z = W @ A_pre + b
    return Z

def ActivationForward(A_pre,W,b,activation):
    Z = LinearForward(A_pre, W, b)
    if activation == 'sigmoid':
        A = Sigmoid(Z)
    elif activation == 'relu':
        A = ReLU(Z)
    return Z,A

def ForwardPropagation(X,parameters):
    cache = {}
    cache['Z0'], cache['A0'] = np.zeros(X.shape), X
    L = len(parameters) // 2
    for l in range(1,L+1):
        if l == L:
            cache['Z' + str(l)],cache['A' + str(l)] = ActivationForward(cache['A' + str(l - 1)], parameters['W' + str(l)], parameters['b' + str(l)],activation = 'sigmoid')
        else:
            cache['Z' + str(l)], cache['A' + str(l)] = ActivationForward(cache['A' + str(l - 1)], parameters['W' + str(l)], parameters['b' + str(l)], activation='relu')
    #print(cache.keys())
    return cache

def ComputeCost(Y, cache):
    m = Y.shape[1]
    L = len(cache) // 2 - 1
    J = - Y @ np.log(cache['A' + str(L)].T) - (1 - Y) @ np.log(1 - cache['A' + str(L)].T)
    return J / m

def LinearBackward(A_pre,dZ,W,b):
    m = A_pre.shape[1]
    dW = dZ @ A_pre.T / m
    db = np.sum(dZ, axis = 1, keepdims = True) / m
    dA_pre = W.T @ dZ
    return dA_pre,dW,db

def ActivationBackward(Z,A_pre,dA,W,b,activation):
    if activation == 'sigmoid':
        dZ = dA * SigmoidBackward(Z)
    elif activation == 'relu':
        dZ = dA * ReLUBackward(Z)
    dA_pre, dW, db = LinearBackward(A_pre, dZ, W, b)
    return dZ,dW,db,dA_pre

def BackwardPropagation(Y,cache,parameters):
    L = len(parameters) // 2
    m = Y.shape[1]
    grads = {}
    grads['dA' + str(L)] = - Y / cache['A' + str(L)] + (1 - Y) / (1 - cache['A' + str(L)])
    for l in range(L,0,-1):
        if l == L:
            grads['dZ' + str(l)], grads['dW' + str(l)], grads['db' + str(l)], grads['dA' + str(l - 1)] = ActivationBackward(Z = cache['Z' + str(l)],
                                                                                                        A_pre = cache['A' + str(l - 1)],

                                                                                                              dA = grads['dA' + str(l)],

                                                                                                                            W = parameters['W' + str(l)],
                                                                                                                            b = parameters['b' + str(l)],activation = 'sigmoid')
        else:
            grads['dZ' + str(l)], grads['dW' + str(l)], grads['db' + str(l)], grads['dA' + str(l - 1)] = ActivationBackward(Z = cache['Z' + str(l)],
                                                                                                                            A_pre = cache['A' + str(l - 1)],

                                                                                                                            dA = grads['dA' + str(l)],
                                                                                                                            W = parameters['W' + str(l)],
                                                                                                                            b = parameters['b' + str(l)],activation='relu')
    grads['dZ0'] = np.array([0]).reshape(1,1)
    grads['dW0'] = np.array([0]).reshape(1,1)
    grads['db0'] = np.array([0]).reshape(1,1)
    #print(grads.keys())
    return grads

def UpdateParameters(parameters,grads,learning_rate):
    L = len(parameters) // 2
    for l in range(1,L+1):

        parameters['W' + str(l)] = parameters['W' + str(l)] - learning_rate * grads['dW' + str(l)]

        parameters['b' + str(l)] = parameters['b' + str(l)] - learning_rate * grads['db' + str(l)]

    return parameters

def Model(X,Y,dims,learning_rate,iteration_num):

    parameters = InitializeParameters(dims)

    costs = []

    for i in range(iteration_num):

        cache = ForwardPropagation(X, parameters)

        if i % 100 == 0:

            costs.append(np.squeeze(ComputeCost(Y, cache)))

            print('time:',i,'cost:',costs[len(costs) - 1])

        grads = BackwardPropagation(Y, cache, parameters)

        parameters = UpdateParameters(parameters, grads, learning_rate)

    return parameters,costs

def Prediction(parameters,X,Y):

    L = len(parameters) // 2

    cache = ForwardPropagation(X, parameters)

    A = cache['A' + str(L)]

    p = np.zeros(Y.shape)

    m = A.shape[1]

    for i in range(m):

        if A[0,i] > 0.5:

            p[0,i] = 1

        else:

            p[0,i] = 0

    print("Accuracy: " + str(np.sum((p == Y) / m)))

    return p

train_x_orig, train_y, test_x_orig, test_y, classes = load_data()

train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T

test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

train_x = train_x_flatten / 255

test_x = test_x_flatten / 255

n_x = train_x.shape[0]

n_y = train_y.shape[0]

dims = (n_x,20,7,5,n_y)

parameters,costs = Model(train_x, train_y, dims, learning_rate = 0.0075,iteration_num = 2500)

print('train set:')

Prediction(parameters,train_x,train_y)

print('test set:')

Prediction(parameters,test_x,test_y)

plt.plot(list(range(25)),costs)

plt.show()

wshid7 2020-06-09 19:16

2个回答

def ReLUBackward(z):  
    dz = np.ones(z.shape)  
    dz[z < 0] = 0  
    return dz

dz = np.ones(z.shape)这里dZ应该是dA

SofaSofa数据科学社区 DS面试题库 DS面经

juicylyyyyy 2020-07-06 17:12

我也遇到了这个问题，请问你解决了吗？这个问题已经困扰我好久了

SofaSofa数据科学社区 DS面试题库 DS面经

lhhdh2 2021-01-09 11:42

吴恩达深度学习第一课第四周作业L层结果收敛在0.64的问题

Warning

2个回答

Warning

Warning