一元线性回归 梯度下降法 含绘图
2023-12-13 07:51:59
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
class LinearRegression:
def __init__(self,dataX,dataY):
# 存储原始数据
self.dataOX = dataX;
# 定义特征列和标签列
self.dataX = LinearRegression.preHandle(dataX)
self.dataY = dataY
# 获取训练数据长度
self.dataSize = self.dataX.shape[0]
# 定义待求解的theta值,默认为全0数组
self.theta = np.zeros((self.dataX.shape[1],1))
# 定义学习率和迭代次数
self.alpha = 0.01
self.numIterations = 1000
# 损失值的变化历史
self.lossHistory = []
self.thetaHistory = np.empty((self.numIterations,2))
# 绘图设置,中文支持
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
self.fig = plt.figure()
self.fig.set_size_inches(10,10)
# 基于当前的theta,计算loss值,不在train上调用此方法因为会重复计算
def loss(self):
# 计算预测值
prediction = np.dot(self.dataX,self.theta)
# 计算差值
delta = prediction - self.dataY
# 差值平方/2倍的size就是loss
loss = np.dot(delta.T,delta) / (self.dataSize * 2)
def train(self):
for index in range(self.numIterations):
prediction = np.dot(self.dataX,self.theta)
delta = prediction-self.dataY
# 新的theta值 = 旧的theta值 - 学习率/样本数量 * ((预测数据-真实数据)·样本数据)
self.theta = self.theta - self.alpha / self.dataSize * np.dot(self.dataX.T,delta)
# 计算当前的loss,MSE(1/2)
mse = (np.dot(delta.T,delta) / (2 * self.dataSize))[0][0]
# 记录此次的theta值和loss值
self.lossHistory.append(mse)
self.thetaHistory[index] = self.theta.reshape(2,)
print("loss:",self.lossHistory[-1]," theta:" , self.theta)
# 绘图(仅限单变量)
def draw(self):
self.drawOriginalScatter()
self.drawNormalScatter()
self.drawGD()
self.drawRegression()
plt.show()
# 原始数据绘制散点图
def drawOriginalScatter(self):
ax = self.fig.add_subplot(221)
ax.set_title("Original Data Scatter")
ax.scatter(self.dataOX, self.dataY,label='Train Dataset',s = 5)
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.legend()
# 归一化的数据和原始数据绘制散点图
def drawNormalScatter(self):
ax = self.fig.add_subplot(222)
ax.set_title("Normal Data Scatter")
ax.scatter(self.dataOX, self.dataY,label='Original Train Dataset',s = 5)
ax.scatter(self.dataX[:,1], self.dataY,label='Normal Train Dataset',s = 5)
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.legend()
# loss值随迭代次数变化曲线
def drawGD(self):
ax = self.fig.add_subplot(223)
ax.set_title("NumIterations loss")
ax.plot(np.linspace(0,self.numIterations,self.numIterations),self.lossHistory,label='loss')
ax.plot(np.linspace(0,self.numIterations,self.numIterations),self.thetaHistory[:,0],label='x1')
ax.plot(np.linspace(0,self.numIterations,self.numIterations),self.thetaHistory[:,1],label='x2')
ax.set_xlabel("numIterations")
ax.set_ylabel("loss")
ax.legend()
# 绘制回归线
def drawRegression(self):
ax = self.fig.add_subplot(224)
ax.set_title("Regression Line")
ax.scatter(self.dataOX, self.dataY,label='Original Train Dataset',s = 5)
# 对比可见,回归方程是归一化处理后的数据的回归方程,而不是原始数据的回归方程
ax.scatter(self.dataX[:,1], self.dataY,label='Normal Train Dataset',s = 5)
# 从训练集的最小值到最大值,取训练集数量个点绘制回归线,注意原数据需要预处理一下
xx = LinearRegression.preHandle(np.linspace(self.dataX.min(),self.dataX.max(),self.dataSize).reshape(self.dataSize,1))
yy = np.dot(xx ,self.theta)
ax.plot(xx[:,-1],yy,label="Regression Line")
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.legend()
"""
预处理数据,零均值归一化处理
"""
@staticmethod
def preHandle(data):
normalData = np.copy(data)
# 平均值
mean = np.mean(normalData)
# 方差
std = np.std(normalData)
normalData = (normalData - mean) / std
normalData = np.hstack((np.ones((normalData.shape[0],1)),normalData))
return normalData
df = pd.read_csv("./data/world-happiness-report-2017.csv")
xName = "Economy..GDP.per.Capita."
yName = "Happiness.Score"
trainDataX = df[[xName]].values
trainDataY = df[[yName]].values
linearRegression = LinearRegression(trainDataX,trainDataY)
linearRegression.train()
linearRegression.draw()
文章来源:https://blog.csdn.net/qq_37293230/article/details/134556087
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。 如若内容造成侵权/违法违规/事实不符,请联系我的编程经验分享网邮箱:veading@qq.com进行投诉反馈,一经查实,立即删除!
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。 如若内容造成侵权/违法违规/事实不符,请联系我的编程经验分享网邮箱:veading@qq.com进行投诉反馈,一经查实,立即删除!