• alexnet pytorch模型和onnx模型速度对比


    1. # -*- coding: utf-8 -*-
    2. from tqdm import tqdm
    3. import pandas as pd
    4. import time
    5. import torch
    6. import torchvision
    7. import numpy as np
    8. import onnxruntime as ort
    9. def save_onnx(path, device):
    10. dummy_input = torch.randn(3, 3, 224, 224).to(device)
    11. model = torchvision.models.alexnet(pretrained=False).to(device)
    12. input_names = ["actual_input_1"] + ["learned_%d" % i for i in range(16)]
    13. output_names = ["output1"]
    14. torch.onnx.export(model, dummy_input, path, verbose=False, input_names=input_names, output_names=output_names,
    15. dynamic_axes={'actual_input_1': [0], 'output1': [0]})
    16. # print(model)
    17. return model
    18. def torch_t(model, batch_size, device):
    19. s = time.time()
    20. input_ = torch.randn(batch_size, 3, 224, 224).to(device)
    21. for i in range(epoch):
    22. model(input_)
    23. cost = time.time() - s
    24. return round(cost, 2)
    25. def onnx_t_fun(path, batch_size, device):
    26. if device == 'cpu':
    27. ort_session = ort.InferenceSession(path, providers=['CPUExecutionProvider'])
    28. else:
    29. ort_session = ort.InferenceSession(path, providers=['CUDAExecutionProvider'])
    30. s = time.time()
    31. input_ = np.random.randn(batch_size, 3, 224, 224).astype(np.float32)
    32. for i in range(epoch):
    33. ort_session.run(
    34. None,
    35. {"actual_input_1": input_},
    36. )
    37. cost = time.time() - s
    38. return round(cost, 2)
    39. if __name__ == '__main__':
    40. batch_size_list = [2 ** i for i in range(8)]
    41. device_list = ['cpu', torch.device(0)]
    42. tuples = [(device, batch_size) for device in device_list for batch_size in batch_size_list]
    43. index = pd.MultiIndex.from_tuples(tuples)
    44. epoch = 100
    45. path = "alexnet.onnx"
    46. df = pd.DataFrame(columns=['torch', 'onnx'], index=index)
    47. for batch_size in tqdm(batch_size_list):
    48. for device in device_list:
    49. model = save_onnx(path, device)
    50. cost_torch = torch_t(model, batch_size, device)
    51. cost_onnx = onnx_t_fun(path, batch_size, device)
    52. df.loc[(device, batch_size), :] = [cost_torch, cost_onnx]
    53. print(df)

                torch   onnx
    cpu    1      1.2   0.89
             2     2.47   1.15
             4     2.86   1.68
             8     3.67    2.7
             16    5.93   4.72
             32    9.47   8.85
             64   17.33  17.26
            128  32.77  34.27
    cuda:0  1     0.07   0.42
                2     0.08   0.65
               4     0.12    0.6
               8      0.2   0.27
               16    0.24    0.4
               32    0.38   0.89
               64    0.69   1.72
              128   1.26   3.12

    可以发现cpu时,onnx在batch_size较小时,速度能提升部分,随着batch_size变大,越来越慢

    gpu上反而变慢了,不知道是否合理

    cpu查询命令为cat /proc/cpuinfo | grep 'model name' |uniq

    结果为 Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz

  • 相关阅读:
    [附源码]SSM计算机毕业设计疫情背景下社区公共卫生服务系统JAVA
    SVM与基于马氏距离的径向基函数(MDRBF)核结合组合(Matlab代码实现)
    基于单片机的空气质量检测系统
    url后面直接拼接参数
    JUC并发编程:Monitor和对象结构
    设计模式最佳实践代码总结 - 结构型设计模式篇 - 代理模式最佳实践
    Linux服务器下搭建SFTP服务
    4.MySQL的数据类型
    信息服务上线渗透检测网络安全检查报告和解决方案4(XSS漏洞修复)
    第一篇博客:HTML:background的使用
  • 原文地址:https://blog.csdn.net/qq_22526061/article/details/126976847