嵊州市建设局网站,产品设计优秀网站,河北项目网手机版,企业云网站建设梯度下降
一、线性回归
线性回归算法推导过程可以基于最小二乘法直接求解#xff0c;但这并不是机器学习的思想#xff0c;由此引入了梯度下降方法。本文讲解其中每一步流程与实验对比分析。
1.初始化
import numpy as np
import os
%matplotlib inline
import matplotli…梯度下降
一、线性回归
线性回归算法推导过程可以基于最小二乘法直接求解但这并不是机器学习的思想由此引入了梯度下降方法。本文讲解其中每一步流程与实验对比分析。
1.初始化
import numpy as np
import os
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams[axes.labelsize] 14
plt.rcParams[xtick.labelsize] 12
plt.rcParams[ytick.labelsize] 12
import warnings
warnings.filterwarnings(ignore)
np.random.seed(42)2.回归方程 import numpy as np
X 2*np.random.rand(100,1)
y 4 3*X np.random.randn(100,1)
plt.plot(X,y,b.)
plt.xlabel(X_1)
plt.ylabel(y)
plt.axis([0,2,0,15])
plt.show()X_b np.c_[np.ones((100,1)),X]
theta_best np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
print(theta_best)
# 输出
array([[4.21509616],[2.77011339]])X_new np.array([[0],[2]])
X_new_b np.c_[np.ones((2,1)),X_new]
y_predict X_new_b.dot(theta_best)
print(y_predict)
# 输出
array([[4.21509616],[9.75532293]])plt.plot(X_new,y_predict,r--)
plt.plot(X,y,b.)
plt.axis([0,2,0,15])
plt.show()二、调用sklearn API
sklearnAPI官网 https://scikit-learn.org/stable/modules/classes.html
from sklearn.linear_model import LinearRegression
lin_reg LinearRegression()
lin_reg.fit(X,y)
print (lin_reg.coef_)
print (lin_reg.intercept_)
#
[[2.77011339]]
[4.21509616]三、梯度下降 当步长较小时训练次数较多 当步长较大时波动大 学习率应当尽可能小随着迭代的进行应当越来越小。
1.批量梯度下降
eta 0.1 #学习率
n_iterations 1000 # 迭代次数
m 100
theta np.random.randn(2,1) # 随机初始化参数theta
for iteration in range(n_iterations):gradients 2/m* X_b.T.dot(X_b.dot(theta)-y)theta theta - eta*gradients
theta
#
array([[4.21509616],[2.77011339]])
X_new_b.dot(theta)
#
array([[4.21509616],[9.75532293]])theta_path_bgd []
def plot_gradient_descent(theta,eta,theta_path None):m len(X_b)plt.plot(X,y,b.)n_iterations 1000for iteration in range(n_iterations):y_predict X_new_b.dot(theta)plt.plot(X_new,y_predict,b-)gradients 2/m* X_b.T.dot(X_b.dot(theta)-y)theta theta - eta*gradientsif theta_path is not None:theta_path.append(theta)plt.xlabel(X_1)plt.axis([0,2,0,15])plt.title(eta {}.format(eta))theta np.random.randn(2,1)plt.figure(figsize(10,4))
plt.subplot(131)
plot_gradient_descent(theta,eta 0.02)
plt.subplot(132)
plot_gradient_descent(theta,eta 0.1,theta_paththeta_path_bgd)
plt.subplot(133)
plot_gradient_descent(theta,eta 0.5)
plt.show()2.随机梯度下降 theta_path_sgd[]
m len(X_b)
np.random.seed(42)
n_epochs 50
t0 5
t1 50def learning_schedule(t):return t0/(t1t)
theta np.random.randn(2,1)for epoch in range(n_epochs):for i in range(m):if epoch 10 and i10:y_predict X_new_b.dot(theta)plt.plot(X_new,y_predict,r-)random_index np.random.randint(m)xi X_b[random_index:random_index1]yi y[random_index:random_index1]gradients 2* xi.T.dot(xi.dot(theta)-yi)eta learning_schedule(epoch*mi)theta theta-eta*gradientstheta_path_sgd.append(theta)plt.plot(X,y,b.)
plt.axis([0,2,0,15])
plt.show()3.MiniBatch梯度下降
theta_path_mgd[]
n_epochs 50
minibatch 16
theta np.random.randn(2,1)
t0, t1 200, 1000
def learning_schedule(t):return t0 / (t t1)
np.random.seed(42)
t 0
for epoch in range(n_epochs):shuffled_indices np.random.permutation(m)X_b_shuffled X_b[shuffled_indices]y_shuffled y[shuffled_indices]for i in range(0,m,minibatch):t1xi X_b_shuffled[i:iminibatch]yi y_shuffled[i:iminibatch]gradients 2/minibatch* xi.T.dot(xi.dot(theta)-yi)eta learning_schedule(t)theta theta-eta*gradientstheta_path_mgd.append(theta)
theta
#
array([[4.25490684],[2.80388785]])四、3种策略的对比实验
theta_path_bgd np.array(theta_path_bgd)
theta_path_sgd np.array(theta_path_sgd)
theta_path_mgd np.array(theta_path_mgd)plt.figure(figsize(12,6))
plt.plot(theta_path_sgd[:,0],theta_path_sgd[:,1],r-s,linewidth1,labelSGD)
plt.plot(theta_path_mgd[:,0],theta_path_mgd[:,1],g-,linewidth2,labelMINIGD)
plt.plot(theta_path_bgd[:,0],theta_path_bgd[:,1],b-o,linewidth3,labelBGD)
plt.legend(locupper left)
plt.axis([3.5,4.5,2.0,4.0])
plt.show()实际当中用minibatch比较多一般情况下选择batch数量应当越大越好。