• perflab 课程设计


    初始状态

    rotate

    版本I

    因为本题步步都在寻址而寻址的目标每一步又不同,并且在一个地址的值一次就赋值完毕,不会对同一个地址进行二次寻址,所以我首先想的改进方向就是使得寻址更加快速,于是我使得寻址的地址更加连续。

    1. int i, j, t,n;
    2. n = dim - 1;
    3. for (j = 0; j < dim; j++){
    4. t = (n-j)*dim;
    5. for (i = 0; i < dim; i++)
    6. dst[t+i] = src[RIDX(i, j, dim)];
    7. }

     

     版本II

    为了降低CPE,可以降低cache miss,所以可以用块化即通过不断使用一些数据块,而不是完整地遍历一行和一列,来改进空间局部性。

    int i,j,ki,kj;
       for (i = 0; i < dim; i+=8) 
        for (j = 0; j < dim; j+=8)
            for(ki=i; ki             for(kj=j; kj                     dst[RIDX(dim-1-kj, ki, dim)] = src[RIDX(ki, kj, dim)];

    Rotate的Summary由5.0提高至7.9,Dim规模较小时CPE优化不明显,当Dim规模较大时CPE明显下降

    版本III

    将前两种方法结合

    1. int i, j, a, b, t;
    2. int sdim = dim - 1;
    3. for (i = 0; i < dim; i += 8)
    4. {
    5. for (j = 0; j < dim; j += 8)
    6. {
    7. for (b = j; b < j + 8; b++)
    8. {
    9. t = (sdim - b)*dim;
    10. for (a = i; a < i + 8; a++)
    11. {
    12. dst[t+a] = src[RIDX(a, b, dim)];
    13. }
    14. }
    15. }
    16. }

     版本IV

    考虑到程序过多次调用RIDX函数,故消除该函数的调用。此外,改善读写顺序。具体来说,先处理矩阵第一列的前32个元素,再处理第二列前32个元素,以此类推直到处理完毕矩阵的前32行,再以相同的方法继续处理余下的矩阵元素。

    int i,j,k;
    for (i = 0; i < dim; i+=32)
        for (j = 0; j < dim; j++)
            for(k=0; k<32; k++) {
               dst[(dim-1-j)*dim+i+k] = src[(i+k)*dim+j];
            }

    smooth

    版本I

    虽然不同位置的像素点需要取相邻的不同数目的像素点的平均值,但数目只有4、6、9。对于四个顶点,取相邻四个像素点的平均值;对于和顶点接壤的像素点,取相邻六个像素点的平均值;剩下的取相邻九个像素点的平均值。

    1. int i=1,j=0;
    2. //左上角
    3. dst[0].red=(src[0].red+src[1].red+src[dim].red+src[dim+1].red)/4;
    4. dst[0].green=(src[0].green+src[1].green+src[dim].green+src[dim+1].green)/4;
    5. dst[0].blue=(src[0].blue+src[1].blue+src[dim].blue+src[dim+1].blue)/4;
    6. //第一行其他非右上角顶点
    7. for(j=1; j<dim-1; j++) {
    8. dst[j].red=(src[j-1].red+src[j].red+src[j+1].red+src[dim+j-1].red+src[dim+j].red+src[dim+j+1].red)/6;
    9. dst[j].green=(src[j-1].green+src[j].green+src[j+1].green+src[dim+j-1].green+src[dim+j].green+src[dim+j+1].green)/6;
    10. dst[j].blue=(src[j-1].blue+src[j].blue+src[j+1].blue+src[dim+j-1].blue+src[dim+j].blue+src[dim+j+1].blue)/6;
    11. }
    12. //右上角顶点
    13. dst[j].red=(src[j].red+src[j-1].red+src[dim+j].red+src[dim+j-1].red)/4;
    14. dst[j].green=(src[j].green+src[j-1].green+src[dim+j].green+src[dim+j-1].green)/4;
    15. dst[j].blue=(src[j].blue+src[j-1].blue+src[dim+j].blue+src[dim+j-1].blue)/4;
    16. //1至dim-2
    17. for(; i<dim-1; i++) {
    18. //每行第一个像素点
    19. dst[i*dim].red=(src[(i-1)*dim].red+src[(i-1)*dim+1].red+src[i*dim].red+src[i*dim+1].red+src[(i+1)*dim].red+src[(i+1)*dim+1].red)/6;
    20. dst[i*dim].green=(src[(i-1)*dim].green+src[(i-1)*dim+1].green+src[i*dim].green+src[i*dim+1].green+src[(i+1)*dim].green+src[(i+1)*dim+1].green)/6;
    21. dst[i*dim].blue=(src[(i-1)*dim].blue+src[(i-1)*dim+1].blue+src[i*dim].blue+src[i*dim+1].blue+src[(i+1)*dim].blue+src[(i+1)*dim+1].blue)/6;
    22. //每行第二个至第dim-1个像素点
    23. for(j=1; j<dim-1; j++) {
    24. dst[i*dim+j].red=(src[(i-1)*dim+j-1].red+src[(i-1)*dim+j].red+src[(i-1)*dim+j+1].red+src[i*dim+j-1].red+src[i*dim+j].red+src[i*dim+j+1].red+src[(i+1)*dim+j-1].red+src[(i+1)*dim+j].red+src[(i+1)*dim+j+1].red)/9;
    25. dst[i*dim+j].green=(src[(i-1)*dim+j-1].green+src[(i-1)*dim+j].green+src[(i-1)*dim+j+1].green+src[i*dim+j-1].green+src[i*dim+j].green+src[i*dim+j+1].green+src[(i+1)*dim+j-1].green+src[(i+1)*dim+j].green+src[(i+1)*dim+j+1].green)/9;
    26. dst[i*dim+j].blue=(src[(i-1)*dim+j-1].blue+src[(i-1)*dim+j].blue+src[(i-1)*dim+j+1].blue+src[i*dim+j-1].blue+src[i*dim+j].blue+src[i*dim+j+1].blue+src[(i+1)*dim+j-1].blue+src[(i+1)*dim+j].blue+src[(i+1)*dim+j+1].blue)/9;
    27. }
    28. //每行最后一个像素点
    29. dst[i*dim+j].red=(src[(i-1)*dim+j-1].red+src[(i-1)*dim+j].red+src[i*dim+j-1].red+src[i*dim+j].red+src[(i+1)*dim+j-1].red+src[(i+1)*dim+j].red)/6;
    30. dst[i*dim+j].green=(src[(i-1)*dim+j-1].green+src[(i-1)*dim+j].green+src[i*dim+j-1].green+src[i*dim+j].green+src[(i+1)*dim+j-1].green+src[(i+1)*dim+j].green)/6;
    31. dst[i*dim+j].blue=(src[(i-1)*dim+j-1].blue+src[(i-1)*dim+j].blue+src[i*dim+j-1].blue+src[i*dim+j].blue+src[(i+1)*dim+j-1].blue+src[(i+1)*dim+j].blue)/6;
    32. }
    33. //左下角
    34. dst[i*dim].red=(src[(i-1)*dim].red+src[(i-1)*dim+1].red+src[i*dim].red+src[i*dim+1].red)/4;
    35. dst[i*dim].green=(src[(i-1)*dim].green+src[(i-1)*dim+1].green+src[i*dim].green+src[i*dim+1].green)/4;
    36. dst[i*dim].blue=(src[(i-1)*dim].blue+src[(i-1)*dim+1].blue+src[i*dim].blue+src[i*dim+1].blue)/4;
    37. //最后一行非左下角、非右下角的像素点
    38. for(j=1; j<dim-1; j++) {
    39. dst[i*dim+j].red=(src[(i-1)*dim+j-1].red+src[(i-1)*dim+j].red+src[(i-1)*dim+j+1].red+src[i*dim+j-1].red+src[i*dim+j].red+src[i*dim+j+1].red)/6;
    40. dst[i*dim+j].green=(src[(i-1)*dim+j-1].green+src[(i-1)*dim+j].green+src[(i-1)*dim+j+1].green+src[i*dim+j-1].green+src[i*dim+j].green+src[i*dim+j+1].green)/6;
    41. dst[i*dim+j].blue=(src[(i-1)*dim+j-1].blue+src[(i-1)*dim+j].blue+src[(i-1)*dim+j+1].blue+src[i*dim+j-1].blue+src[i*dim+j].blue+src[i*dim+j+1].blue)/6;
    42. }
    43. //右下角像素点
    44. dst[i*dim+j].red=(src[(i-1)*dim+j-1].red+src[(i-1)*dim+j].red+src[i*dim+j-1].red+src[i*dim+j].red)/4;
    45. dst[i*dim+j].green=(src[(i-1)*dim+j-1].green+src[(i-1)*dim+j].green+src[i*dim+j-1].green+src[i*dim+j].green)/4;
    46. dst[i*dim+j].blue=(src[(i-1)*dim+j-1].blue+src[(i-1)*dim+j].blue+src[i*dim+j-1].blue+src[i*dim+j].blue)/4;

    版本II

    由于上个版本程序运行过程中比较多的重复计算区域,基于动态规划的思想,将每一个像素点的计算转换为一个块(2x2或2x3或3x2或3x3)内的各个像素点取平均值,并将每一块纵向分开为2或3个纵向块,用动规数组记录每一列(2个或3个像素点)的RGB之和,其中相邻的两个纵向块之间的递推关系通式为:dp[i][j]=dp[i-1][j]-src[(i-1)*dim+j]+src[(i+2)*dim+j]

    1. int i,j;
    2. int r2[2][dim],g2[2][dim],b2[2][dim];
    3. int r3[dim][dim],g3[dim][dim],b3[dim][dim];
    4. for(j=0; j<dim; j++) {
    5. //第j列一开始的大小为2的子块
    6. r2[0][j]=src[j].red;
    7. g2[0][j]=src[j].green;
    8. b2[0][j]=src[j].blue;
    9. r2[0][j]+=src[dim+j].red;
    10. g2[0][j]+=src[dim+j].green;
    11. b2[0][j]+=src[dim+j].blue;
    12. //第j列一开始的大小为3的子块
    13. r3[0][j]=r2[0][j]+src[(dim<<1)+j].red;
    14. g3[0][j]=g2[0][j]+src[(dim<<1)+j].green;
    15. b3[0][j]=b2[0][j]+src[(dim<<1)+j].blue;
    16. //其他子块
    17. for(i=1; i<dim-2; i++) {
    18. r3[i][j]=r3[i-1][j]-src[(i-1)*dim+j].red+src[(i+2)*dim+j].red;
    19. g3[i][j]=g3[i-1][j]-src[(i-1)*dim+j].green+src[(i+2)*dim+j].green;
    20. b3[i][j]=b3[i-1][j]-src[(i-1)*dim+j].blue+src[(i+2)*dim+j].blue;
    21. }
    22. //最后一个长度为2的子块
    23. r2[1][j]=r3[dim-3][j]-src[(dim-3)*dim+j].red;
    24. g2[1][j]=g3[dim-3][j]-src[(dim-3)*dim+j].green;
    25. b2[1][j]=b3[dim-3][j]-src[(dim-3)*dim+j].blue;
    26. }
    27. //处理左上角顶点
    28. dst[0].red=(r2[0][0]+r2[0][1])/4;
    29. dst[0].green=(g2[0][0]+g2[0][1])/4;
    30. dst[0].blue=(b2[0][0]+b2[0][1])/4;
    31. //处理第一行其他非右上角顶点
    32. for(j=1; j<dim-1; j++) {
    33. dst[j].red=(r2[0][j-1]+r2[0][j]+r2[0][j+1])/6;
    34. dst[j].green=(g2[0][j-1]+g2[0][j]+g2[0][j+1])/6;
    35. dst[j].blue=(b2[0][j-1]+b2[0][j]+b2[0][j+1])/6;
    36. }
    37. //右上角顶点
    38. dst[j].red=(r2[0][j-1]+r2[0][j])/4;
    39. dst[j].green=(g2[0][j-1]+g2[0][j])/4;
    40. dst[j].blue=(b2[0][j-1]+b2[0][j])/4;
    41. //1至dim-2
    42. for(i=1; i<dim-1; i++) {
    43. //每行的第一个像素点
    44. dst[i*dim].red=(r3[i-1][0]+r3[i-1][1])/6;
    45. dst[i*dim].green=(g3[i-1][0]+g3[i-1][1])/6;
    46. dst[i*dim].blue=(b3[i-1][0]+b3[i-1][1])/6;
    47. //每行第二个至第dim-1个像素点
    48. for(j=1; j<dim-1; j++) {
    49. dst[i*dim+j].red=(r3[i-1][j-1]+r3[i-1][j]+r3[i-1][j+1])/9;
    50. dst[i*dim+j].green=(g3[i-1][j-1]+g3[i-1][j]+g3[i-1][j+1])/9;
    51. dst[i*dim+j].blue=(b3[i-1][j-1]+b3[i-1][j]+b3[i-1][j+1])/9;
    52. }
    53. //每行最后一个像素点
    54. dst[i*dim+j].red=(r3[i-1][j-1]+r3[i-1][j])/6;
    55. dst[i*dim+j].green=(g3[i-1][j-1]+g3[i-1][j])/6;
    56. dst[i*dim+j].blue=(b3[i-1][j-1]+b3[i-1][j])/6;
    57. }
    58. //左下角像素点
    59. dst[i*dim].red=(r2[1][0]+r2[1][1])/4;
    60. dst[i*dim].green=(g2[1][0]+g2[1][1])/4;
    61. dst[i*dim].blue=(b2[1][0]+b2[1][1])/4;
    62. //最后一行非左下角、非右下角的像素点
    63. for(j=1; j<dim-1; j++) {
    64. dst[i*dim+j].red=(r2[1][j-1]+r2[1][j]+r2[1][j+1])/6;
    65. dst[i*dim+j].green=(g2[1][j-1]+g2[1][j]+g2[1][j+1])/6;
    66. dst[i*dim+j].blue=(b2[1][j-1]+b2[1][j]+b2[1][j+1])/6;
    67. }
    68. //右下角像素点
    69. dst[i*dim+j].red=(r2[1][j-1]+r2[1][j])/4;
    70. dst[i*dim+j].green=(g2[1][j-1]+g2[1][j])/4;
    71. dst[i*dim+j].blue=(b2[1][j-1]+b2[1][j])/4;

    版本III

    版本I将处理方式不同的位置分别讨论,但大量的重复计算限制了性能,而版本II的纵向三个像素点的R、G、B之和与二维数组的选择不够合理。又因为对任何像素点,待求平均的像素点所构成的块大小都不会超过三行,每行都不会超过三个。所以可以通过三个指针,每个指针控制行相邻的两个或三个像素点的读运算。

    1. void smooth3(int dim, pixel *src, pixel *dst) {
    2. //使用指针,尽量少移动
    3. int i,j;
    4. //每一个指针对应一行
    5. pixel *pixelA,*pixelB,*pixelC;
    6. int size = dim-1;
    7. //处理第一行第一个像素点
    8. pixelB = src;
    9. pixelC = pixelB + dim;
    10. dst->red = (pixelB->red + (pixelB+1)->red + pixelC->red + (pixelC+1)->red)>>2;
    11. dst->green = (pixelB->green + (pixelB+1)->green + pixelC->green + (pixelC+1)->green)>>2;
    12. dst->blue = (pixelB->blue + (pixelB+1)->blue + pixelC->blue + (pixelC+1)->blue)>>2;
    13. pixelB++;
    14. pixelC++;
    15. dst++;
    16. //处理第一行中间的dim-2个像素点
    17. for(i = 1; i < size; i++)
    18. {
    19. dst->red = (pixelB->red + (pixelB-1)->red + (pixelB+1)->red + pixelC->red + (pixelC-1)->red + (pixelC+1)->red)/6;
    20. dst->green = (pixelB->green + (pixelB-1)->green + (pixelB+1)->green + pixelC->green + (pixelC-1)->green + (pixelC+1)->green)/6;
    21. dst->blue = (pixelB->blue + (pixelB-1)->blue + (pixelB+1)->blue + pixelC->blue + (pixelC-1)->blue + (pixelC+1)->blue)/6;
    22. pixelB++;
    23. pixelC++;
    24. dst++;
    25. }
    26. //处理第一行最后一个像素点
    27. dst->red = (pixelC->red + (pixelC-1)->red + pixelB->red + (pixelB-1)->red)>>2;
    28. dst->green = (pixelC->green + (pixelC-1)->green + pixelB->green + (pixelB-1)->green)>>2;
    29. dst->blue = (pixelC->blue + (pixelC-1)->blue + pixelB->blue + (pixelB-1)->blue)>>2;
    30. dst++;
    31. //开始处理中间的dim-2
    32. pixelA = src;
    33. pixelB = pixelA + dim;
    34. pixelC = pixelB + dim;
    35. for(i = 1; i < size; i++)
    36. {
    37. //对于每一行的第一个像素点
    38. dst->red = (pixelA->red + (pixelA+1)->red + pixelB->red + (pixelB+1)->red + pixelC->red + (pixelC+1)->red)/6;
    39. dst->green = (pixelA->green + (pixelA+1)->green + pixelB->green + (pixelB+1)->green + pixelC->green + (pixelC+1)->green)/6;
    40. dst->blue = (pixelA->blue + (pixelA+1)->blue + pixelB->blue + (pixelB+1)->blue + pixelC->blue+ (pixelC+1)->blue)/6;
    41. dst++;
    42. pixelA++;
    43. pixelB++;
    44. pixelC++;
    45. //对于每一行中间的dim-2个像素点
    46. for(j = 1; j < dim-1; j++)
    47. {
    48. dst->red = (pixelA->red + (pixelA-1)->red + (pixelA+1)->red + pixelB->red + (pixelB-1)->red + (pixelB+1)->red + pixelC->red + (pixelC-1)->red + (pixelC+1)->red)/9;
    49. dst->green = (pixelA->green + (pixelA-1)->green + (pixelA+1)->green + pixelB->green + (pixelB-1)->green + (pixelB+1)->green + pixelC->green + (pixelC-1)->green + (pixelC+1)->green)/9;
    50. dst->blue = (pixelA->blue + (pixelA-1)->blue + (pixelA+1)->blue + pixelB->blue + (pixelB-1)->blue + (pixelB+1)->blue + pixelC->blue + (pixelC-1)->blue + (pixelC+1)->blue)/9;
    51. pixelA++;
    52. pixelB++;
    53. pixelC++;
    54. dst++;
    55. }
    56. //对于每一行最后一个像素点
    57. dst->red = (pixelA->red + (pixelA-1)->red + pixelB->red + (pixelB-1)->red + pixelC->red + (pixelC-1)->red)/6;
    58. dst->green = (pixelA->green + (pixelA-1)->green + pixelB->green + (pixelB-1)->green + pixelC->green + (pixelC-1)->green)/6;
    59. dst->blue = (pixelA->blue + (pixelA-1)->blue + pixelB->blue + (pixelB-1)->blue + pixelC->blue+ (pixelC-1)->blue)/6;
    60. pixelA++;
    61. pixelB++;
    62. pixelC++;
    63. dst++;
    64. }
    65. //处理最后一行第一个像素点
    66. dst->red = (pixelA->red + (pixelA+1)->red + pixelB->red + (pixelB+1)->red)>>2;
    67. dst->green = (pixelA->green + (pixelA+1)->green + pixelB->green + (pixelB+1)->green)>>2;
    68. dst->blue = (pixelA->blue + (pixelA+1)->blue + pixelB->blue + (pixelB+1)->blue)>>2;
    69. dst++;
    70. pixelA++;
    71. pixelB++;
    72. //处理最后一行中间dim-2个像素点
    73. for(i = 1; i < size; i++)
    74. {
    75. dst->red = (pixelA->red + (pixelA-1)->red + (pixelA+1)->red + pixelB->red + (pixelB-1)->red + (pixelB+1)->red)/6;
    76. dst->green = (pixelA->green + (pixelA-1)->green + (pixelA+1)->green + pixelB->green + (pixelB-1)->green + (pixelB+1)->green)/6;
    77. dst->blue = (pixelA->blue + (pixelA-1)->blue + (pixelA+1)->blue + pixelB->blue + (pixelB-1)->blue + (pixelB+1)->blue)/6;
    78. pixelA++;
    79. pixelB++;
    80. dst++;
    81. }
    82. //处理最后一行最后一个像素点
    83. dst->red = (pixelA->red + (pixelA-1)->red + pixelB->red + (pixelB-1)->red)>>2;
    84. dst->green = (pixelA->green + (pixelA-1)->green + pixelB->green + (pixelB-1)->green)>>2;
    85. dst->blue = (pixelA->blue + (pixelA-1)->blue + pixelB->blue + (pixelB-1)->blue)>>2;
    86. }

     

  • 相关阅读:
    TI mmWave radar sensors Tutorial 笔记 | Module 2: The phase of the IF signal
    发布Python包到pypi
    山东大学人工智能导论实验四 利用神经网络分类红色和蓝色的花
    9月7日扒面经
    总线仲裁的方式
    leetcode 刷题 log day 42
    操控xls文件的jxl包下载
    数据结构初阶之顺序表、链表--C语言实现
    sync_binlog和innodb_flush_log_at_trx_commit的区别
    蓝桥杯-网络安全-练习题-crypto-rsa
  • 原文地址:https://blog.csdn.net/ykrsgs/article/details/128201833