• CUDA小白 - NPP(8) 图像处理 Morphological Operations


    cuda小白
    原始API链接 NPP

    GPU架构近些年也有不少的变化,具体的可以参考别的博主的介绍,都比较详细。还有一些cuda中的专有名词的含义,可以参考《详解CUDA的Context、Stream、Warp、SM、SP、Kernel、Block、Grid》

    常见的NppStatus,可以看这里

    7 是图像的傅里叶变换,还在学习中

    本文主要讲述的是形态学变换

    Dilation

    膨胀操作(对二值化物体边界点进行扩充,将与物体接触的所有背景点合并到该物体中,使边界向外部扩张。如果两个物体间隔较近,会将两物体连通在一起。)

    // 返回mask下的最大像素值作为输出的pixel,如果mask的值为0,则不参与最大值查询
    NppStatus nppiDilate_8u_C3R(const Npp8u *pSrc,
    							Npp32s nSrcStep,
    							Npp8u *pDst,
    							Npp32s nDstStep,
    							NppiSize oSizeROI,
    							const Npp8u *pMask,
    							NppiSize oMaskSize,
    							NppiPoint oAnchor);
    // 与前一个接口的区别是多了一个borderType的类型指定
    /* 
    NppiBorderType {
      NPP_BORDER_UNDEFINED,
      NPP_BORDER_NONE,
      NPP_BORDER_CONSTANT,
      NPP_BORDER_REPLICATE,
      NPP_BORDER_WARP,
      NPP_BORDER_MIRROR	
    };
    */
    NppStatus nppiDilateBorder_8u_C3R(const Npp8u *pSrc,
    								  Npp32s nSrcStep,
    								  NppiSize oSrcSize,
    								  NppiPoint oSrcOffset,
    								  Npp8u *pDst,
    								  Npp32s nDstStep,
    								  NppiSize oSizeROI,
    								  const Npp8u *pMask,
    								  NppiSize oMaskSize,
    								  NppiPoint oAnchor,
    								  NppiBorderType eBorderType);
    // 特定大小的kernel
    NppStatus nppiDilate3x3_8u_C3R(const Npp8u *pSrc,
    							   Npp32s nSrcStep,
    							   Npp8u *pDst,
    						       Npp32s nDstStep,
    							   NppiSize oSizeROI);
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    code
    #include 
    #include 
    #include 
    #include 
    
    #define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } }
    
    int main() {
      std::string directory = "../";
      cv::Mat image_dog = cv::imread(directory + "dog.png");
      int image_width = image_dog.cols;
      int image_height = image_dog.rows;
      int image_size = image_width * image_height;
    
      // =============== device memory ===============
      // input
      uint8_t *in_image;
      cudaMalloc((void**)&in_image, image_size * 3 * sizeof(uint8_t));
      cudaMemcpy(in_image, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice);
    
      // output
      uint8_t *out_ptr1, *out_ptr2;
      cudaMalloc((void**)&out_ptr1, image_size * 3 * sizeof(uint8_t));  // 三通道
      cudaMalloc((void**)&out_ptr2, image_size * 3 * sizeof(uint8_t));  // 三通道
    
      NppiSize in_size;
      in_size.width = image_width;
      in_size.height = image_height;
      NppiRect rc;
      rc.x = 0;
      rc.y = 0;
      rc.width = image_width;
      rc.height = image_height;
    
      int mask_size = 10;
      cv::Mat mat_mask = cv::Mat::ones(mask_size, mask_size, CV_8UC1);
      uint8_t *mask;
      cudaMalloc((void**)&mask, mask_size * mask_size * sizeof(uint8_t));
      cudaMemcpy(mask, mat_mask.data, mask_size * mask_size * sizeof(uint8_t), cudaMemcpyHostToDevice);
    
      cv::Mat out_image = cv::Mat::zeros(image_height, image_width, CV_8UC3);
      NppStatus status;
      NppiSize npp_mask_size;
      npp_mask_size.width = mask_size;
      npp_mask_size.height = mask_size;
      NppiPoint pt;
      pt.x = 0;
      pt.y = 0;
      // =============== nppiDilate_8u_C3R ===============
      status = nppiDilate_8u_C3R(in_image, image_width * 3, out_ptr1, image_width * 3, 
                                 in_size, mask, npp_mask_size, pt);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiDilate_8u_C3R failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr1, image_size * 3, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "dilate.jpg", out_image);
    
      // =============== nppiDilateBorder_8u_C3R ===============
      NppiPoint src_pt;
      src_pt.x = 100;
      src_pt.y = 100;
      status = nppiDilateBorder_8u_C3R(in_image, image_width * 3, in_size, src_pt, out_ptr2, 
                                       image_width * 3, in_size, mask, npp_mask_size, pt, 
                                       NPP_BORDER_REPLICATE);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiDilateBorder_8u_C3R failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr2, image_size * 3, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "dilate_border.jpg", out_image);
    
      // free
      CUDA_FREE(in_image)
      CUDA_FREE(out_ptr1)
      CUDA_FREE(out_ptr2)
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    make
    cmake_minimum_required(VERSION 3.20)
    project(test)
    
    find_package(OpenCV REQUIRED)
    include_directories(${OpenCV_INCLUDE_DIRS})
    
    find_package(CUDA REQUIRED)
    include_directories(${CUDA_INCLUDE_DIRS})
    file(GLOB CUDA_LIBS "/usr/local/cuda/lib64/*.so")
    
    add_executable(test test.cpp)
    target_link_libraries(test
                          ${OpenCV_LIBS}
                          ${CUDA_LIBS}
    )
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    result

    请添加图片描述
    注意:

    1. nppiDilateBorder_8u_C3R 仅支持border的模式为 NPP_BORDER_REPLICATE,其他模式会报错,错误码为-9999。
    Erode

    腐蚀操作

    NppStatus nppiErode_8u_C3R(const Npp8u *pSrc,
    						   Npp32s nSrcStep,
    					       Npp8u *pDst,
    						   Npp32s nDstStep,
    						   NppiSize oSizeROI,
    						   const Npp8u *pMask,
    						   NppiSize oMaskSize,
    						   NppiPoint oAnchor);
    NppStatus nppiErodeBorder_8u_C3R(const Npp8u *pSrc,
    								 Npp32s nSrcStep,
    								 NppiSize oSrcSize,
    								 NppiPoint oSrcOffset,
    								 Npp8u *pDst,
    								 Npp32s nDstStep,
    								 NppiSize oSizeROI,
    								 const Npp8u *pMask,
    								 NppiSize oMaskSize,
    								 NppiPoint oAnchor,
    								 NppiBorderType eBorderType);
    // 固定大小的Erode
    NppStatus nppiErode3x3_8u_C3R(const Npp8u *pSrc,
    							  Npp32s nSrcStep,
    							  Npp8u *pDst,
    							  Npp32s nDstStep,
    							  NppiSize oSizeROI);
    // nppiErode3x3Border_8u_C3R 不详细介绍了
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26

    再此使用上一个实验膨胀之后的图像作为腐蚀的输入。

    code
    #include 
    #include 
    #include 
    #include 
    
    #define CUDA_FREE(ptr) { if (ptr != nullptr) { cudaFree(ptr); ptr = nullptr; } }
    
    int main() {
      std::string directory = "../";
      cv::Mat image_dog = cv::imread(directory + "dilate.jpg");
      int image_width = image_dog.cols;
      int image_height = image_dog.rows;
      int image_size = image_width * image_height;
    
      // =============== device memory ===============
      // input
      uint8_t *in_image;
      cudaMalloc((void**)&in_image, image_size * 3 * sizeof(uint8_t));
      cudaMemcpy(in_image, image_dog.data, image_size * 3 * sizeof(uint8_t), cudaMemcpyHostToDevice);
    
      // output
      uint8_t *out_ptr1, *out_ptr2;
      cudaMalloc((void**)&out_ptr1, image_size * 3 * sizeof(uint8_t));  // 三通道
      cudaMalloc((void**)&out_ptr2, image_size * 3 * sizeof(uint8_t));  // 三通道
    
      NppiSize in_size;
      in_size.width = image_width;
      in_size.height = image_height;
      NppiRect rc;
      rc.x = 0;
      rc.y = 0;
      rc.width = image_width;
      rc.height = image_height;
    
      int mask_size = 10;
      cv::Mat mat_mask = cv::Mat::ones(mask_size, mask_size, CV_8UC1);
      uint8_t *mask;
      cudaMalloc((void**)&mask, mask_size * mask_size * sizeof(uint8_t));
      cudaMemcpy(mask, mat_mask.data, mask_size * mask_size * sizeof(uint8_t), cudaMemcpyHostToDevice);
    
      cv::Mat out_image = cv::Mat::zeros(image_height, image_width, CV_8UC3);
      NppStatus status;
      NppiSize npp_mask_size;
      npp_mask_size.width = mask_size;
      npp_mask_size.height = mask_size;
      NppiPoint pt;
      pt.x = 0;
      pt.y = 0;
      // =============== nppiErode_8u_C3R ===============
      status = nppiErode_8u_C3R(in_image, image_width * 3, out_ptr1, image_width * 3, 
                                in_size, mask, npp_mask_size, pt);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiErode_8u_C3R failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr1, image_size * 3, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "erode.jpg", out_image);
    
      // =============== nppiErodeBorder_8u_C3R ===============
      NppiPoint src_pt;
      src_pt.x = 100;
      src_pt.y = 100;
      status = nppiErodeBorder_8u_C3R(in_image, image_width * 3, in_size, src_pt, out_ptr2, 
                                      image_width * 3, in_size, mask, npp_mask_size, pt, 
                                      NPP_BORDER_REPLICATE);
      if (status != NPP_SUCCESS) {
        std::cout << "[GPU] ERROR nppiErodeBorder_8u_C3R failed, status = " << status << std::endl;
        return false;
      }
      cudaMemcpy(out_image.data, out_ptr2, image_size * 3, cudaMemcpyDeviceToHost);
      cv::imwrite(directory + "erode_border.jpg", out_image);
    
      // free
      CUDA_FREE(in_image)
      CUDA_FREE(out_ptr1)
      CUDA_FREE(out_ptr2)
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    make
    cmake_minimum_required(VERSION 3.20)
    project(test)
    
    find_package(OpenCV REQUIRED)
    include_directories(${OpenCV_INCLUDE_DIRS})
    
    find_package(CUDA REQUIRED)
    include_directories(${CUDA_INCLUDE_DIRS})
    file(GLOB CUDA_LIBS "/usr/local/cuda/lib64/*.so")
    
    add_executable(test test.cpp)
    target_link_libraries(test
                          ${OpenCV_LIBS}
                          ${CUDA_LIBS}
    )
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    result

    请添加图片描述
    注意点:

    1. nppiErodeBorder_8u_C3R 仅支持border的模式为 NPP_BORDER_REPLICATE,其他模式会报错,错误码为-9999。
    ComplexImageMorphology

    复杂图像形态学,暂时不做介绍,后续视情况而定
    <<<链接>>>

  • 相关阅读:
    Java面试问题汇总
    关于找暑期实习后的一些反思
    时间复杂度(补充)和 空间复杂度
    操作系统【OS】调度算法对比图
    AI与大数据:智慧城市安全的护航者与变革引擎
    unity学习 -- 游戏资源导入
    【问题思考总结】武忠祥排除法漏洞(考研数学)
    E056-web安全应用-File Inclusion文件包含漏洞进阶
    C++ 构造函数
    Serverless Devs 重大更新,基于 Serverless 架构的 CI/CD 框架:Serverless-cd
  • 原文地址:https://blog.csdn.net/u011732139/article/details/132823196