假设损失函数J是这样的:
import numpy as np
import matplotlib.pyplot as plt
plot_x = np.linspace(-1, 6, 141)
plot_y = (plot_x - 2.5) ** 2 -1
plt.plot(plot_x, plot_y)
plt.show()
梯度下降算法实现
def dJ(theta):
return 2*(theta - 2.5)
def J(theta):
return (theta-2.5) ** 2 -1
eta = 0.1
epsilon = 1e-8
theta = 0
while True:
gradient = dJ(theta)
last_theta = theta
theta = theta - eta * gradient
if (abs(J(theta) - J(last_theta)) < epsilon):
break
print (theta)
print (J(theta))
输出结果:
2.499891109642585
-0.99999998814289
在图是显示所有的theta的轨迹
def gradient_descent(initial_theta, eta, epsilon=1e-8):
theta = initial_theta
theta_history = [theta]
while True:
gradient = dJ(theta)
last_theta = theta
theta = theta - eta * gradient
theta_history.append(theta)
#print (theta)
if (abs(J(theta) - J(last_theta)) < epsilon):
break
return theta_history
def plot_theta_history():
plt.plot(plot_x, J(plot_x))
plt.plot(np.array(theta_history), J(np.array(theta_history)), color='r')
plt.show()
eta = 0.01
theta_history = gradient_descent(0., eta)
plot_theta_history()
输出结果:
eta取不同参数的绘制结果
def gradient_descent(initial_theta, eta, n_iters = 10, epsilon=1e-8):
theta = initial_theta
theta_history = [theta]
i_iter = 0
while i_iter < n_iters:
gradient = dJ(theta)
last_theta = theta
theta = theta - eta * gradient
theta_history.append(theta)
print (theta)
if (abs(J(theta) - J(last_theta)) < epsilon):
break
i_iter += 1
return theta_history
输出结果: