• 玩转gpgpu-sim 04记—— __cudaRegisterBinary() of gpgpu-sim 到底做了什么


    1. 参考 nv 的 __cudaRegisterFatBinary

    nv的这个函数的实现藏在cuda sdk中,但是这个函数的调用源代码在nv cuda sdk 的安装好的头文件中;

    比如在cuda 12.1中调用源代码如下:

    1. #define __cudaRegisterBinary(X) \
    2. __cudaFatCubinHandle = __cudaRegisterFatBinary((void*)&__fatDeviceText); \
    3. { void (*callback_fp)(void **) = (void (*)(void **))(X); (*callback_fp)(__cudaFatCubinHandle); __cudaRegisterFatBinaryEnd(__cudaFatCubinHandle); }\
    4. atexit(__cudaUnregisterBinaryUtil)
    5. #define __cudaRegisterVariable(handle, var, ext, size, constant, global) \
    6. __cudaRegisterVar(handle, (char*)&__host##var, (char*)__device##var, __name##var, ext, size, constant, global)
    7. #define __cudaRegisterManagedVariable(handle, var, ext, size, constant, global) \
    8. __cudaRegisterManagedVar(handle, (void **)&__host##var, (char*)__device##var, __name##var, ext, size, constant, global)
    9. #define __cudaRegisterGlobalTexture(handle, tex, dim, norm, ext) \
    10. __cudaRegisterTexture(handle, (const struct textureReference*)&tex, (const void**)(void*)__device##tex, __name##tex, dim, norm, ext)
    11. #define __cudaRegisterGlobalSurface(handle, surf, dim, ext) \
    12. __cudaRegisterSurface(handle, (const struct surfaceReference*)&surf, (const void**)(void*)__device##surf, __name##surf, dim, ext)
    13. #define __cudaRegisterEntry(handle, funptr, fun, thread_limit) \
    14. __cudaRegisterFunction(handle, (const char*)funptr, (char*)__device_fun(fun), #fun, -1, (uint3*)0, (uint3*)0, (dim3*)0, (dim3*)0, (int*)0)

    这个函数会被编译器注入进 cuda app 的 main 数之前先调用;

    起到的作用大概是调用进 cuda runtime,把 cuda app中需要使用的 fatbin 和其中的cubin 解析出来,并发送给 gpu 预备好调用;

    fatbin文件放置在 cuda app 文件的 nv官方自定义段 nvFatBinSegment 中;

    2. gpgpu-sim 的 __cudaRegisterFatBinary

    官方文档:

    GPGPU-Sim 3.x Manual

    __cudaRegisterBinary(void*) 被执行到的代码逻辑如下:

    1. void** CUDARTAPI __cudaRegisterFatBinary( void *fatCubin )
    2. {
    3. #if (CUDART_VERSION < 2010)
    4. printf("GPGPU-Sim PTX: ERROR ** this version of GPGPU-Sim requires CUDA 2.1 or higher\n");
    5. exit(1);
    6. #endif
    7. CUctx_st *context = GPGPUSim_Context();
    8. static unsigned next_fat_bin_handle = 1;
    9. if(context->get_device()->get_gpgpu()->get_config().use_cuobjdump()) {
    10. // The following workaround has only been verified on 64-bit systems.
    11. if (sizeof(void*) == 4)
    12. printf("GPGPU-Sim PTX: FatBin file name extraction has not been tested on 32-bit system.\n");
    13. // FatBin handle from the .fatbin.c file (one of the intermediate files generated by NVCC)
    14. typedef struct {int m; int v; const unsigned long long* d; char* f;} __fatDeviceText __attribute__ ((aligned (8)));
    15. __fatDeviceText * fatDeviceText = (__fatDeviceText *) fatCubin;
    16. // Extract the source code file name that generate the given FatBin.
    17. // - Obtains the pointer to the actual fatbin structure from the FatBin handle (fatCubin).
    18. // - An integer inside the fatbin structure contains the relative offset to the source code file name.
    19. // - This offset differs among different CUDA and GCC versions.
    20. char * pfatbin = (char*) fatDeviceText->d;
    21. int offset = *((int*)(pfatbin+48));
    22. char * filename = (pfatbin+16+offset);
    23. // The extracted file name is associated with a fat_cubin_handle passed
    24. // into cudaLaunch(). Inside cudaLaunch(), the associated file name is
    25. // used to find the PTX/SASS section from cuobjdump, which contains the
    26. // PTX/SASS code for the launched kernel function.
    27. // This allows us to work around the fact that cuobjdump only outputs the
    28. // file name associated with each section.
    29. unsigned long long fat_cubin_handle = next_fat_bin_handle;
    30. next_fat_bin_handle++;
    31. printf("GPGPU-Sim PTX: __cudaRegisterFatBinary, fat_cubin_handle = %llu, filename=%s\n", fat_cubin_handle, filename);
    32. /*!
    33. * This function extracts all data from all files in first call
    34. * then for next calls, only returns the appropriate number
    35. */
    36. assert(fat_cubin_handle >= 1);
    37. if (fat_cubin_handle==1) cuobjdumpInit();
    38. cuobjdumpRegisterFatBinary(fat_cubin_handle, filename);
    39. return (void**)fat_cubin_handle;
    40. }else{ ... }
    41. }

    2.1. 调用关系

    刚开始一波的调用关系如下:

    代码方便索引,此处整理的整体关系为下面的函数调用上面首先定义的函数:

    1. class gpgpu_functional_sim_config
    2. { ...
    3. int m_ptx_use_cuobjdump;
    4. ...
    5. }
    6. void gpgpu_functional_sim_config::reg_options(class OptionParser * opp)
    7. { ...
    8. option_parser_register(opp,
    9. "-gpgpu_ptx_use_cuobjdump", OPT_BOOL,
    10. &m_ptx_use_cuobjdump,
    11. "Use cuobjdump to extract ptx and sass from binaries",
    12. "1");//CUDART_VERSION >= 4000
    13. ...
    14. }
    15. gpgpu_sim *gpgpu_ptx_sim_init_perf()
    16. { ...
    17. g_the_gpu_config.reg_options(opp);
    18. ...
    19. }
    20. class _cuda_device_id *GPGPUSim_Init()
    21. { ...
    22. gpgpu_sim *the_gpu = gpgpu_ptx_sim_init_perf();
    23. the_gpu->set_prop(prop);
    24. the_device = new _cuda_device_id(the_gpu);
    25. start_sim_thread(1);
    26. ...
    27. }
    28. void** CUDARTAPI __cudaRegisterFatBinary( void *fatCubin )
    29. { ...
    30. static CUctx_st* GPGPUSim_Context()
    31. class _cuda_device_id *GPGPUSim_Init()
    32. CUctx_st( _cuda_device_id *gpu ) { m_gpu = gpu; }//the_context = new CUctx_st(the_gpu);
    33. cuobjdumpInit();
    34. cuobjdumpRegisterFatBinary(fat_cubin_handle, filename);
    35. ...
    36. }

    2.2. GPGPUSim_Context() 做了什么

    2.3. 表示什么含义

    GPGPUSim_Context()->get_device()->get_gpgpu()->get_config().use_cuobjdump() 表示什么含义

    2.4. cuobjdumpInit() 做了什么

    2.5. cuobjdumpRegisterFatBinary() 做了什么

  • 相关阅读:
    音视频基础知识
    GoLand 2023.2.3(go语言开发)
    【20221114】【每日一题】子集
    Java 自定义Excel数据排序
    CMakeLists.txt 详解
    网站被黑后处理方法及删除批量恶意代码的方法步骤
    PerfView专题 (第十一篇):使用 Diff 功能洞察 C# 内存泄漏增量
    系列文章|云原生时代下微服务架构进阶之路 - Spring Cloud
    vue3+TS实现简易组件库
    代码随想录 | Day 48 - LeetCode 198. 打家劫舍、LeetCode 213. 打家劫舍II、LeetCode 337. 打家劫舍III
  • 原文地址:https://blog.csdn.net/eloudy/article/details/133280872