• 第P6周—好莱坞明星识别(2)


    五、模型训练

    1. # 训练循环
    2. def train(dataloader, model, loss_fn, optimizer):
    3. size = len(dataloader.dataset) # 训练集的大小
    4. num_batches = len(dataloader) # 批次数目
    5. train_loss, train_acc = 0, 0 # 初始化训练损失和正确率
    6. for X, y in dataloader: # 获取图片及其标签
    7. X, y = X.to(device), y.to(device)
    8. # 计算预测误差
    9. pred = model(X) # 网络输出
    10. loss = loss_fn(pred, y) # 计算网络输出和真实值之间的差距,targets为真实值,计算二者差值即为损失
    11. # 反向传播
    12. optimizer.zero_grad() # grad属性归零
    13. loss.backward() # 反向传播
    14. optimizer.step() # 每一步自动更新
    15. # 记录acc与loss
    16. train_acc += (pred.argmax(1) == y).type(torch.float).sum().item()
    17. train_loss += loss.item()
    18. train_acc /= size
    19. train_loss /= num_batches
    20. return train_acc, train_loss
    21. # 测试函数
    22. def test(dataloader, model, loss_fn):
    23. size = len(dataloader.dataset) # 测试集的大小
    24. num_batches = len(dataloader) # 批次数目,(size/batch_size,向上取整)
    25. test_loss, test_acc = 0, 0
    26. # 当不进行训练时,停止梯度更新,节省计算内存消耗
    27. with torch.no_grad():
    28. for imgs, target in dataloader:
    29. imgs, target = imgs.to(device), target.to(device)
    30. # 计算loss
    31. target_pred = model(imgs)
    32. loss = loss_fn(target_pred, target)
    33. test_loss += loss.item()
    34. test_acc += (target_pred.argmax(1) == target).type(torch.float).sum().item()
    35. test_acc /= size
    36. test_loss /= num_batches
    37. return test_acc, test_loss
    38. ''' 自定义设置动态学习率 '''
    39. def adjust_learning_rate(optimizer, epoch, start_lr):
    40. # 每 2 个epoch衰减到原来的 0.92
    41. lr = start_lr * (0.92 ** (epoch // 2))
    42. for param_group in optimizer.param_groups:
    43. param_group['lr'] = lr
    44. # 设置初始学习率
    45. learn_rate = 1e-4
    46. optimizer = torch.optim.SGD(model.parameters(), lr=learn_rate)
    47. # 定义学习率调整函数
    48. lambda1 = lambda epoch: 0.92 ** (epoch // 4)
    49. scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) # 选定调整方法
    50. # 定义损失函数
    51. loss_fn = nn.CrossEntropyLoss()
    52. # 定义训练参数
    53. epochs = 40
    54. train_loss = []
    55. train_acc = []
    56. test_loss = []
    57. test_acc = []
    58. best_acc = 0 # 用于保存最佳模型的准确率
    59. for epoch in range(epochs):
    60. model.train()
    61. epoch_train_acc, epoch_train_loss = train(train_dl, model, loss_fn, optimizer)
    62. scheduler.step() # 更新学习率(调用官方动态学习率接口时使用)
    63. model.eval()
    64. epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
    65. # 保存最佳模型到best model
    66. if epoch_test_acc > best_acc:
    67. best_acc = epoch_test_acc
    68. best_model = copy.deepcopy(model)
    69. train_acc.append(epoch_train_acc)
    70. train_loss.append(epoch_train_loss)
    71. test_acc.append(epoch_test_acc)
    72. test_loss.append(epoch_test_loss)
    73. # 获取当前的学习率
    74. lr = optimizer.state_dict()['param_groups'][0]['lr']
    75. template = ('Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:3f}, Test_acc:{:.1f}%, Test_loss:{:.3f}, lr:{:.2E}')
    76. print(template.format(epoch+1, epoch_train_acc*100, epoch_train_loss, epoch_test_acc*100, epoch_test_loss, lr))
    77. # 保存最佳模型到文件中
    78. PATH = './best_model.pth' # 保存的参数文件名
    79. torch.save(best_model.state_dict(), PATH)
    80. print('Done')

    在非实时编译器中运行出现Python 脚本中使用多进程相关问题报错,问题通常发生在没有正确使用 if __name__ == '__main__': 块的情况下。为了解决这个问题,我将完整代码修改如下:

    1. import torch
    2. import torch.nn as nn
    3. import torchvision.transforms as transforms
    4. import torchvision
    5. from torchvision import transforms, datasets
    6. import os, PIL, random, pathlib, warnings
    7. import copy
    8. warnings.filterwarnings("ignore")
    9. def main():
    10. # 您的现有代码放在这里
    11. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    12. print(device)
    13. import os, PIL, random, pathlib
    14. data_dir = 'D:/P6/48-data/'
    15. data_dir = pathlib.Path(data_dir)
    16. data_path = list(data_dir.glob('*'))
    17. print(data_path)
    18. classname = [str(path).split("\\")[3] for path in data_path]
    19. print(classname)
    20. train_transforms = transforms.Compose([
    21. transforms.Resize([224, 224]),
    22. transforms.ToTensor(),
    23. transforms.Normalize(
    24. mean=[0.39354826, 0.41713402, 0.48036146],
    25. std=[0.25076334, 0.25809455, 0.28359835]
    26. )
    27. ])
    28. total_data = datasets.ImageFolder("D:/P6/48-data/", transform=train_transforms)
    29. print(total_data)
    30. print(total_data.class_to_idx)
    31. train_size = int(0.8 * len(total_data))
    32. test_size = len(total_data) - train_size
    33. train_dataset, test_dataset = torch.utils.data.random_split(total_data, [train_size, test_size])
    34. print(train_dataset, test_dataset)
    35. batch_size = 32
    36. train_dl = torch.utils.data.DataLoader(train_dataset,
    37. batch_size=batch_size,
    38. shuffle=True,
    39. num_workers=1)
    40. test_dl = torch.utils.data.DataLoader(test_dataset,
    41. batch_size=batch_size,
    42. shuffle=True,
    43. num_workers=1)
    44. from torchvision.models import vgg16
    45. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    46. print("Using {} device\n".format(device))
    47. ''' 调用官方的VGG-16模型 '''
    48. # 加载预训练模型,并且对模型进行微调
    49. model = vgg16(pretrained=True).to(device) # 加载预训练的vgg16模型
    50. for param in model.parameters():
    51. param.requires_grad = False # 冻结模型的参数,这样子在训练的时候只训练最后一层的参数
    52. # 修改classifier模块的第6层(即:(6): Linear(in_features=4096, out_features=2, bias=True))
    53. # 注意查看我们下方打印出来的模型
    54. model.classifier._modules['6'] = nn.Linear(4096, 17) # 修改vgg16模型中最后一层全连接层,输出目标类别个数
    55. model.to(device)
    56. print(model)
    57. # 训练循环
    58. def train(dataloader, model, loss_fn, optimizer):
    59. size = len(dataloader.dataset) # 训练集的大小
    60. num_batches = len(dataloader) # 批次数目
    61. train_loss, train_acc = 0, 0 # 初始化训练损失和正确率
    62. for X, y in dataloader: # 获取图片及其标签
    63. X, y = X.to(device), y.to(device)
    64. # 计算预测误差
    65. pred = model(X) # 网络输出
    66. loss = loss_fn(pred, y) # 计算网络输出和真实值之间的差距,targets为真实值,计算二者差值即为损失
    67. # 反向传播
    68. optimizer.zero_grad() # grad属性归零
    69. loss.backward() # 反向传播
    70. optimizer.step() # 每一步自动更新
    71. # 记录acc与loss
    72. train_acc += (pred.argmax(1) == y).type(torch.float).sum().item()
    73. train_loss += loss.item()
    74. train_acc /= size
    75. train_loss /= num_batches
    76. return train_acc, train_loss
    77. # 测试函数
    78. def test(dataloader, model, loss_fn):
    79. size = len(dataloader.dataset) # 测试集的大小
    80. num_batches = len(dataloader) # 批次数目,(size/batch_size,向上取整)
    81. test_loss, test_acc = 0, 0
    82. # 当不进行训练时,停止梯度更新,节省计算内存消耗
    83. with torch.no_grad():
    84. for imgs, target in dataloader:
    85. imgs, target = imgs.to(device), target.to(device)
    86. # 计算loss
    87. target_pred = model(imgs)
    88. loss = loss_fn(target_pred, target)
    89. test_loss += loss.item()
    90. test_acc += (target_pred.argmax(1) == target).type(torch.float).sum().item()
    91. test_acc /= size
    92. test_loss /= num_batches
    93. return test_acc, test_loss
    94. ''' 自定义设置动态学习率 '''
    95. def adjust_learning_rate(optimizer, epoch, start_lr):
    96. # 每 2 个epoch衰减到原来的 0.92
    97. lr = start_lr * (0.92 ** (epoch // 2))
    98. for param_group in optimizer.param_groups:
    99. param_group['lr'] = lr
    100. # 设置初始学习率
    101. learn_rate = 1e-4
    102. optimizer = torch.optim.SGD(model.parameters(), lr=learn_rate)
    103. # 定义学习率调整函数
    104. lambda1 = lambda epoch: 0.92 ** (epoch // 4)
    105. scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1) # 选定调整方法
    106. # 定义损失函数
    107. loss_fn = nn.CrossEntropyLoss()
    108. # 定义训练参数
    109. epochs = 40
    110. train_loss = []
    111. train_acc = []
    112. test_loss = []
    113. test_acc = []
    114. best_acc = 0 # 用于保存最佳模型的准确率
    115. for epoch in range(epochs):
    116. model.train()
    117. epoch_train_acc, epoch_train_loss = train(train_dl, model, loss_fn, optimizer)
    118. scheduler.step() # 更新学习率(调用官方动态学习率接口时使用)
    119. model.eval()
    120. epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
    121. # 保存最佳模型到best model
    122. if epoch_test_acc > best_acc:
    123. best_acc = epoch_test_acc
    124. best_model = copy.deepcopy(model)
    125. train_acc.append(epoch_train_acc)
    126. train_loss.append(epoch_train_loss)
    127. test_acc.append(epoch_test_acc)
    128. test_loss.append(epoch_test_loss)
    129. # 获取当前的学习率
    130. lr = optimizer.state_dict()['param_groups'][0]['lr']
    131. template = ('Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:3f}, Test_acc:{:.1f}%, Test_loss:{:.3f}, lr:{:.2E}')
    132. print(template.format(epoch+1, epoch_train_acc*100, epoch_train_loss, epoch_test_acc*100, epoch_test_loss, lr))
    133. # 保存最佳模型到文件中
    134. PATH = './best_model.pth' # 保存的参数文件名
    135. torch.save(best_model.state_dict(), PATH)
    136. print('Done')
    137. if __name__ == '__main__':
    138. main()

  • 相关阅读:
    为什么生命科学企业都在陆续上云?
    零基础入门低代码后端开发,只需几行代码就可以操作数据库
    专访科杰科技CEO于洋:湖仓一体数据底座,企业构建数据能力的“最优解” | 爱分析访谈
    【教3妹学编程-java基础6】详解父子类变量、代码块、构造函数执行顺序
    开学季征文 | 一位开发实习生的真情流露
    动态尺寸模型优化实践之Shape Constraint IR Part I
    Spring基础篇:注入
    SpingBoot之替换容器为Undertow
    猿创征文|Spring Boot 整合分布式调度框架:xxl-job
    BloomFilter:布隆过滤器和Redis缓存穿透
  • 原文地址:https://blog.csdn.net/qq_60245590/article/details/133220885