假设损失函数J是这样的:

import numpy as np import matplotlib.pyplot as plt plot_x = np.linspace(-1, 6, 141) plot_y = (plot_x - 2.5) ** 2 -1 plt.plot(plot_x, plot_y) plt.show()

梯度下降算法实现

def dJ(theta): return 2*(theta - 2.5) def J(theta): return (theta-2.5) ** 2 -1 eta = 0.1 epsilon = 1e-8 theta = 0 while True: gradient = dJ(theta) last_theta = theta theta = theta - eta * gradient if (abs(J(theta) - J(last_theta)) < epsilon): break print (theta) print (J(theta))

输出结果:
2.499891109642585
-0.99999998814289

在图是显示所有的theta的轨迹

def gradient_descent(initial_theta, eta, epsilon=1e-8): theta = initial_theta theta_history = [theta] while True: gradient = dJ(theta) last_theta = theta theta = theta - eta * gradient theta_history.append(theta) #print (theta) if (abs(J(theta) - J(last_theta)) < epsilon): break return theta_history def plot_theta_history(): plt.plot(plot_x, J(plot_x)) plt.plot(np.array(theta_history), J(np.array(theta_history)), color='r') plt.show() eta = 0.01 theta_history = gradient_descent(0., eta) plot_theta_history()

输出结果:

eta取不同参数的绘制结果

def gradient_descent(initial_theta, eta, n_iters = 10, epsilon=1e-8): theta = initial_theta theta_history = [theta] i_iter = 0 while i_iter < n_iters: gradient = dJ(theta) last_theta = theta theta = theta - eta * gradient theta_history.append(theta) print (theta) if (abs(J(theta) - J(last_theta)) < epsilon): break i_iter += 1 return theta_history

输出结果: