• 手写LASSO回归python实现


    1. import numpy as np
    2. from matplotlib.font_manager import FontProperties
    3. from sklearn.datasets import make_regression
    4. from sklearn.model_selection import train_test_split
    5. import matplotlib.pyplot as plt
    6. class Lasso():
    7. def __init__(self):
    8. pass
    9. # 数据准备
    10. def prepare_data(self):
    11. # 生成样本数据
    12. X, y = make_regression(n_samples=40, n_features=80, random_state=0, noise=0.5)
    13. # 划分数据集
    14. X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    15. return X_train, X_test, y_train.reshape(-1,1), y_test.reshape(-1,1)
    16. # 参数初始化
    17. def initialize_params(self, dims):
    18. w = np.zeros((dims, 1))
    19. b = 0
    20. return w, b
    21. # 定义L1损失函数
    22. def l1_loss(self, X, y, w, b, alpha):
    23. num_train = X.shape[0] # 样本数
    24. num_feature = X.shape[1] # 特征数
    25. y_hat = np.dot(X, w) + b # 回归预测数据
    26. # 计算损失
    27. loss = np.sum((y_hat - y) ** 2) / num_train + alpha * np.sum(np.abs(w)) # 修改此处
    28. # 计算梯度,即参数的变化
    29. dw = np.dot(X.T, (y_hat - y)) / num_train + alpha * np.sign(w) # 修改此处
    30. db = np.sum((y_hat - y)) / num_train
    31. return y_hat, loss, dw, db
    32. def lasso_train(self, X, y, learning_rate, epochs, alpha):
    33. loss_list = []
    34. w, b = self.initialize_params(X.shape[1])
    35. # 归一化特征
    36. X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
    37. for i in range(1, epochs):
    38. y_hat, loss, dw, db = self.l1_loss(X, y, w, b, alpha)
    39. # 更新参数
    40. w += -learning_rate * dw
    41. b += -learning_rate * db
    42. loss_list.append(loss)
    43. # if i % 300 == 0:
    44. # print('epoch %d loss %f' % (i, loss))
    45. params = {
    46. 'w': w,
    47. 'b': b
    48. }
    49. grads = {
    50. 'dw': dw,
    51. 'db': db
    52. }
    53. return loss, loss_list, params, grads
    54. # 根据计算的得到的参数进行预测
    55. def predict(self, X, params):
    56. w = params['w']
    57. b = params['b']
    58. y_pred = np.dot(X, w) + b
    59. return y_pred
    60. if __name__ == '__main__':
    61. lasso = Lasso()
    62. X_train, X_test, y_train, y_test = lasso.prepare_data()
    63. alphas=np.arange(0.01,0.11,0.01)
    64. wc=[]#统计参数w中绝对值小于0.1的个数,模拟稀疏度
    65. for alpha in alphas:
    66. # 参数:训练集x,训练集y,学习率,迭代次数,正则化系数
    67. loss, loss_list, params, grads = lasso.lasso_train(X_train, y_train, 0.02, 3000,alpha)
    68. w=np.squeeze(params['w'])
    69. count=np.sum(np.abs(w)<1e-1)
    70. wc.append(count)
    71. # 设置中文字体
    72. plt.rcParams['font.sans-serif'] = ['SimHei']
    73. plt.rcParams['axes.unicode_minus'] = False
    74. plt.figure(figsize=(10, 8))
    75. plt.plot(alphas, wc, 'o-')
    76. plt.xlabel('正则项系数',fontsize=15)
    77. plt.ylabel('参数w矩阵的稀疏度',fontsize=15)
    78. plt.show()

  • 相关阅读:
    React进阶之路(一)-- JSX基础、组件基础
    一个关于React与Node.js前后端分离项目部署问题
    Linux系统的FTP服务
    uniapp进行条件编译的两种方法
    对实现移动应用界面设计的思考
    【数据仓库设计基础(四)】数据仓库实施步骤
    shell脚本基础教程,快速上手
    bond0双网卡主备实验
    一幅长文细学node.js——一幅长文系列
    C#中.NET 7.0 Windows窗体应用通过EF访问已有数据库并实现追加、删除、修改、插入记录
  • 原文地址:https://blog.csdn.net/qq_58158950/article/details/134435537