mirror of
https://github.com/opencv/opencv.git
synced 2025-06-22 19:51:26 +08:00

[hal_rvv] Add cv::integral implementation and more types of input for test #27060 This patch introduces an RVV-optimized implementation of `cv::integral()` in hal_rvv, along with performance and accuracy tests for all valid input/output type combinations specified in `modules/imgproc/src/hal_replacement.hpp`:2a8d4b8e43/modules/imgproc/src/hal_replacement.hpp (L960-L974)
The vectorized prefix sum algorithm follows the approach described in [Prefix Sum with SIMD - Algorithmica](https://en.algorithmica.org/hpc/algorithms/prefix/). I intentionally omitted support for the following cases by returning `CV_HAL_ERROR_NOT_IMPLEMENTED`, as they are harder to implement or show limited performance gains: 1. **Tilted Sum**: The data access pattern for tilted sums requires multi-row operations, making effective vectorization difficult. 2. **3-channel images (`cn == 3`)**: Current implementation requires `VLEN/SEW` (a.k.a. number of elements in a vector register) to be a multiple of channel count, which 3-channel formats typically cannot satisfy. - Support for 1, 2 and 4 channel images is implemented 4. **Small images (`!(width >> 8 || height >> 8)`)**: The scalar implementation demonstrates better performance for images with limited dimensions. - This is the same as `3rdparty/ndsrvp/src/integral.cpp`09c71aed14/3rdparty/ndsrvp/src/integral.cpp (L24-L26)
Test configuration: - Platform: SpacemiT Muse Pi (K1 @ 1.60 Ghz) - Toolchain: GCC 14.2.0 - `integral_sqsum_full` test is disabled by default, so `--gtest_also_run_disabled_tests` is needed Test results: ```plaintext Geometric mean (ms) Name of Test imgproc-gcc-scalar imgproc-gcc-hal imgproc-gcc-hal vs imgproc-gcc-scalar (x-factor) integral::Size_MatType_OutMatDepth::(640x480, 8UC1, CV_32F) 1.973 1.415 1.39 integral::Size_MatType_OutMatDepth::(640x480, 8UC1, CV_32S) 1.343 1.351 0.99 integral::Size_MatType_OutMatDepth::(640x480, 8UC1, CV_64F) 2.021 2.756 0.73 integral::Size_MatType_OutMatDepth::(640x480, 8UC2, CV_32F) 4.695 2.874 1.63 integral::Size_MatType_OutMatDepth::(640x480, 8UC2, CV_32S) 4.028 2.801 1.44 integral::Size_MatType_OutMatDepth::(640x480, 8UC2, CV_64F) 5.965 4.926 1.21 integral::Size_MatType_OutMatDepth::(640x480, 8UC4, CV_32F) 9.970 4.440 2.25 integral::Size_MatType_OutMatDepth::(640x480, 8UC4, CV_32S) 7.934 4.244 1.87 integral::Size_MatType_OutMatDepth::(640x480, 8UC4, CV_64F) 14.696 8.431 1.74 integral::Size_MatType_OutMatDepth::(1280x720, 8UC1, CV_32F) 5.949 4.108 1.45 integral::Size_MatType_OutMatDepth::(1280x720, 8UC1, CV_32S) 4.064 4.080 1.00 integral::Size_MatType_OutMatDepth::(1280x720, 8UC1, CV_64F) 6.137 7.975 0.77 integral::Size_MatType_OutMatDepth::(1280x720, 8UC2, CV_32F) 13.896 8.721 1.59 integral::Size_MatType_OutMatDepth::(1280x720, 8UC2, CV_32S) 10.948 8.513 1.29 integral::Size_MatType_OutMatDepth::(1280x720, 8UC2, CV_64F) 18.046 15.234 1.18 integral::Size_MatType_OutMatDepth::(1280x720, 8UC4, CV_32F) 35.105 13.778 2.55 integral::Size_MatType_OutMatDepth::(1280x720, 8UC4, CV_32S) 27.135 13.417 2.02 integral::Size_MatType_OutMatDepth::(1280x720, 8UC4, CV_64F) 43.477 25.616 1.70 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC1, CV_32F) 13.386 9.281 1.44 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC1, CV_32S) 9.159 9.194 1.00 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC1, CV_64F) 13.776 17.836 0.77 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC2, CV_32F) 31.943 19.435 1.64 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC2, CV_32S) 24.747 18.946 1.31 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC2, CV_64F) 35.925 33.943 1.06 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC4, CV_32F) 66.493 29.692 2.24 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC4, CV_32S) 54.737 28.250 1.94 integral::Size_MatType_OutMatDepth::(1920x1080, 8UC4, CV_64F) 91.880 57.495 1.60 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC1, CV_32F) 4.384 4.016 1.09 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC1, CV_32S) 3.676 3.960 0.93 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC1, CV_64F) 5.620 5.224 1.08 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC2, CV_32F) 9.971 7.696 1.30 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC2, CV_32S) 8.934 7.632 1.17 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC2, CV_64F) 9.927 9.759 1.02 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC4, CV_32F) 21.556 12.288 1.75 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC4, CV_32S) 21.261 12.089 1.76 integral_sqsum::Size_MatType_OutMatDepth::(640x480, 8UC4, CV_64F) 23.989 16.278 1.47 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC1, CV_32F) 15.232 11.752 1.30 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC1, CV_32S) 12.976 11.721 1.11 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC1, CV_64F) 16.450 15.627 1.05 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC2, CV_32F) 25.932 23.243 1.12 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC2, CV_32S) 24.750 23.019 1.08 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC2, CV_64F) 28.228 29.605 0.95 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC4, CV_32F) 61.665 37.477 1.65 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC4, CV_32S) 61.536 37.126 1.66 integral_sqsum::Size_MatType_OutMatDepth::(1280x720, 8UC4, CV_64F) 73.989 48.994 1.51 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC1, CV_32F) 49.640 26.529 1.87 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC1, CV_32S) 35.869 26.417 1.36 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC1, CV_64F) 34.378 35.056 0.98 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC2, CV_32F) 82.138 52.661 1.56 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC2, CV_32S) 54.644 52.089 1.05 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC2, CV_64F) 75.073 66.670 1.13 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC4, CV_32F) 143.283 83.943 1.71 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC4, CV_32S) 156.851 82.378 1.90 integral_sqsum::Size_MatType_OutMatDepth::(1920x1080, 8UC4, CV_64F) 521.594 111.375 4.68 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC1, DEPTH_32F_32F)) 3.529 2.787 1.27 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC1, DEPTH_32F_64F)) 4.396 3.998 1.10 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC1, DEPTH_32S_32F)) 3.229 2.774 1.16 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC1, DEPTH_32S_32S)) 2.945 2.780 1.06 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC1, DEPTH_32S_64F)) 3.857 3.995 0.97 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC1, DEPTH_64F_64F)) 5.872 5.228 1.12 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (16UC1, DEPTH_64F_64F)) 6.075 5.277 1.15 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (16SC1, DEPTH_64F_64F)) 5.680 5.296 1.07 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC1, DEPTH_32F_32F)) 3.355 2.896 1.16 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC1, DEPTH_32F_64F)) 4.183 4.000 1.05 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC1, DEPTH_64F_64F)) 6.237 5.143 1.21 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (64FC1, DEPTH_64F_64F)) 4.753 4.783 0.99 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC2, DEPTH_32F_32F)) 8.021 5.793 1.38 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC2, DEPTH_32F_64F)) 9.963 7.704 1.29 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC2, DEPTH_32S_32F)) 7.864 5.720 1.37 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC2, DEPTH_32S_32S)) 7.141 5.699 1.25 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC2, DEPTH_32S_64F)) 9.228 7.646 1.21 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC2, DEPTH_64F_64F)) 9.940 9.759 1.02 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (16UC2, DEPTH_64F_64F)) 10.606 9.716 1.09 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (16SC2, DEPTH_64F_64F)) 9.933 9.751 1.02 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC2, DEPTH_32F_32F)) 7.986 5.962 1.34 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC2, DEPTH_32F_64F)) 9.243 7.598 1.22 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC2, DEPTH_64F_64F)) 10.573 9.425 1.12 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (64FC2, DEPTH_64F_64F)) 11.029 8.977 1.23 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC4, DEPTH_32F_32F)) 17.236 8.881 1.94 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC4, DEPTH_32F_64F)) 20.905 12.322 1.70 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC4, DEPTH_32S_32F)) 16.011 8.666 1.85 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC4, DEPTH_32S_32S)) 15.932 8.507 1.87 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC4, DEPTH_32S_64F)) 20.713 12.115 1.71 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (8UC4, DEPTH_64F_64F)) 23.953 16.284 1.47 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (16UC4, DEPTH_64F_64F)) 25.127 16.341 1.54 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (16SC4, DEPTH_64F_64F)) 24.950 16.441 1.52 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC4, DEPTH_32F_32F)) 17.261 8.906 1.94 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC4, DEPTH_32F_64F)) 21.944 12.073 1.82 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (32FC4, DEPTH_64F_64F)) 25.921 15.539 1.67 integral_sqsum_full::Size_MatType_OutMatDepthArray::(640x480, (64FC4, DEPTH_64F_64F)) 27.938 14.824 1.88 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC1, DEPTH_32F_32F)) 11.156 8.260 1.35 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC1, DEPTH_32F_64F)) 14.777 11.869 1.24 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC1, DEPTH_32S_32F)) 9.693 8.221 1.18 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC1, DEPTH_32S_32S)) 9.023 8.256 1.09 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC1, DEPTH_32S_64F)) 13.276 11.821 1.12 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC1, DEPTH_64F_64F)) 15.406 15.618 0.99 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (16UC1, DEPTH_64F_64F)) 16.799 15.749 1.07 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (16SC1, DEPTH_64F_64F)) 15.054 15.806 0.95 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC1, DEPTH_32F_32F)) 10.055 7.999 1.26 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC1, DEPTH_32F_64F)) 13.506 11.253 1.20 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC1, DEPTH_64F_64F)) 14.952 15.021 1.00 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (64FC1, DEPTH_64F_64F)) 13.761 14.002 0.98 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC2, DEPTH_32F_32F)) 22.677 17.330 1.31 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC2, DEPTH_32F_64F)) 26.283 23.237 1.13 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC2, DEPTH_32S_32F)) 20.126 17.118 1.18 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC2, DEPTH_32S_32S)) 19.337 17.041 1.13 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC2, DEPTH_32S_64F)) 24.973 23.004 1.09 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC2, DEPTH_64F_64F)) 29.959 29.585 1.01 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (16UC2, DEPTH_64F_64F)) 33.598 29.599 1.14 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (16SC2, DEPTH_64F_64F)) 46.213 29.741 1.55 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC2, DEPTH_32F_32F)) 33.077 17.556 1.88 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC2, DEPTH_32F_64F)) 33.960 22.991 1.48 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC2, DEPTH_64F_64F)) 41.792 28.803 1.45 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (64FC2, DEPTH_64F_64F)) 34.660 28.532 1.21 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC4, DEPTH_32F_32F)) 52.989 27.659 1.92 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC4, DEPTH_32F_64F)) 62.418 37.515 1.66 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC4, DEPTH_32S_32F)) 50.902 27.310 1.86 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC4, DEPTH_32S_32S)) 47.301 27.019 1.75 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC4, DEPTH_32S_64F)) 61.982 37.140 1.67 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (8UC4, DEPTH_64F_64F)) 79.403 49.041 1.62 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (16UC4, DEPTH_64F_64F)) 86.550 49.180 1.76 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (16SC4, DEPTH_64F_64F)) 85.715 49.468 1.73 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC4, DEPTH_32F_32F)) 63.932 28.019 2.28 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC4, DEPTH_32F_64F)) 68.180 36.858 1.85 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (32FC4, DEPTH_64F_64F)) 83.063 46.483 1.79 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1280x720, (64FC4, DEPTH_64F_64F)) 91.990 44.545 2.07 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC1, DEPTH_32F_32F)) 25.503 18.609 1.37 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC1, DEPTH_32F_64F)) 29.544 26.635 1.11 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC1, DEPTH_32S_32F)) 22.581 18.514 1.22 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC1, DEPTH_32S_32S)) 20.860 18.547 1.12 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC1, DEPTH_32S_64F)) 26.046 26.373 0.99 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC1, DEPTH_64F_64F)) 34.831 34.997 1.00 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (16UC1, DEPTH_64F_64F)) 36.428 35.214 1.03 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (16SC1, DEPTH_64F_64F)) 32.435 35.314 0.92 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC1, DEPTH_32F_32F)) 22.548 18.845 1.20 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC1, DEPTH_32F_64F)) 28.589 25.790 1.11 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC1, DEPTH_64F_64F)) 32.625 33.791 0.97 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (64FC1, DEPTH_64F_64F)) 30.158 31.889 0.95 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC2, DEPTH_32F_32F)) 53.374 38.938 1.37 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC2, DEPTH_32F_64F)) 73.892 52.747 1.40 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC2, DEPTH_32S_32F)) 47.392 38.572 1.23 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC2, DEPTH_32S_32S)) 45.638 38.225 1.19 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC2, DEPTH_32S_64F)) 69.966 52.156 1.34 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC2, DEPTH_64F_64F)) 68.560 66.963 1.02 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (16UC2, DEPTH_64F_64F)) 71.487 65.420 1.09 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (16SC2, DEPTH_64F_64F)) 68.127 65.718 1.04 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC2, DEPTH_32F_32F)) 72.967 39.987 1.82 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC2, DEPTH_32F_64F)) 63.933 51.408 1.24 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC2, DEPTH_64F_64F)) 73.334 63.354 1.16 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (64FC2, DEPTH_64F_64F)) 80.983 60.778 1.33 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC4, DEPTH_32F_32F)) 116.981 59.908 1.95 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC4, DEPTH_32F_64F)) 155.085 83.974 1.85 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC4, DEPTH_32S_32F)) 109.567 58.525 1.87 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC4, DEPTH_32S_32S)) 105.457 57.124 1.85 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC4, DEPTH_32S_64F)) 157.325 82.485 1.91 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (8UC4, DEPTH_64F_64F)) 265.776 111.577 2.38 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (16UC4, DEPTH_64F_64F)) 585.218 110.583 5.29 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (16SC4, DEPTH_64F_64F)) 585.418 111.302 5.26 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC4, DEPTH_32F_32F)) 126.456 60.415 2.09 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC4, DEPTH_32F_64F)) 169.278 81.460 2.08 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (32FC4, DEPTH_64F_64F)) 281.256 104.732 2.69 integral_sqsum_full::Size_MatType_OutMatDepthArray::(1920x1080, (64FC4, DEPTH_64F_64F)) 620.885 99.953 6.21 ``` The vectorized implementation shows progressively better acceleration for larger image sizes and higher channel counts, achieving up to 6.21× speedup for 64FC4 (1920×1080) inputs with `DEPTH_64F_64F` configuration. This is my first time proposing patch for the OpenCV Project 🥹, if there's anything that can be improved, please tell me. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
163 lines
5.2 KiB
C++
163 lines
5.2 KiB
C++
// This file is part of OpenCV project.
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
// of this distribution and at http://opencv.org/license.html.
|
|
#include "perf_precomp.hpp"
|
|
|
|
namespace opencv_test {
|
|
|
|
enum PerfSqMatDepth{
|
|
DEPTH_32S_32S = 0,
|
|
DEPTH_32S_32F,
|
|
DEPTH_32S_64F,
|
|
DEPTH_32F_32F,
|
|
DEPTH_32F_64F,
|
|
DEPTH_64F_64F};
|
|
|
|
CV_ENUM(IntegralOutputDepths, DEPTH_32S_32S, DEPTH_32S_32F, DEPTH_32S_64F, DEPTH_32F_32F, DEPTH_32F_64F, DEPTH_64F_64F)
|
|
|
|
static int extraOutputDepths[6][2] = {{CV_32S, CV_32S}, {CV_32S, CV_32F}, {CV_32S, CV_64F}, {CV_32F, CV_32F}, {CV_32F, CV_64F}, {CV_64F, CV_64F}};
|
|
|
|
typedef tuple<Size, MatType, MatDepth> Size_MatType_OutMatDepth_t;
|
|
typedef perf::TestBaseWithParam<Size_MatType_OutMatDepth_t> Size_MatType_OutMatDepth;
|
|
|
|
typedef tuple<Size, std::tuple<MatType, IntegralOutputDepths>> Size_MatType_OutMatDepthArray_t;
|
|
typedef perf::TestBaseWithParam<Size_MatType_OutMatDepthArray_t> Size_MatType_OutMatDepthArray;
|
|
|
|
PERF_TEST_P(Size_MatType_OutMatDepth, integral,
|
|
testing::Combine(
|
|
testing::Values(TYPICAL_MAT_SIZES),
|
|
testing::Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4),
|
|
testing::Values(CV_32S, CV_32F, CV_64F)
|
|
)
|
|
)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int matType = get<1>(GetParam());
|
|
int sdepth = get<2>(GetParam());
|
|
|
|
Mat src(sz, matType);
|
|
Mat sum(sz, sdepth);
|
|
|
|
declare.in(src, WARMUP_RNG).out(sum);
|
|
if (sdepth == CV_32F)
|
|
src *= (1 << 23) / (double)(sz.area() * 256); // FP32 calculations are not accurate (mantissa is 23-bit)
|
|
|
|
TEST_CYCLE() integral(src, sum, sdepth);
|
|
|
|
Mat src_roi; src(Rect(src.cols - 4, src.rows - 4, 4, 4)).convertTo(src_roi, sdepth);
|
|
Mat restored_src_roi =
|
|
sum(Rect(sum.cols - 4, sum.rows - 4, 4, 4)) + sum(Rect(sum.cols - 5, sum.rows - 5, 4, 4)) -
|
|
sum(Rect(sum.cols - 4, sum.rows - 5, 4, 4)) - sum(Rect(sum.cols - 5, sum.rows - 4, 4, 4));
|
|
EXPECT_EQ(0, cvtest::norm(restored_src_roi, src_roi, NORM_INF))
|
|
<< src_roi << endl << restored_src_roi << endl
|
|
<< sum(Rect(sum.cols - 4, sum.rows - 4, 4, 4));
|
|
|
|
if (sdepth == CV_32F)
|
|
SANITY_CHECK_NOTHING();
|
|
else
|
|
SANITY_CHECK(sum, 1e-6);
|
|
}
|
|
|
|
PERF_TEST_P(Size_MatType_OutMatDepth, integral_sqsum,
|
|
testing::Combine(
|
|
testing::Values(TYPICAL_MAT_SIZES),
|
|
testing::Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4),
|
|
testing::Values(CV_32S, CV_32F, CV_64F)
|
|
)
|
|
)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int matType = get<1>(GetParam());
|
|
int sdepth = get<2>(GetParam());
|
|
|
|
Mat src(sz, matType);
|
|
Mat sum(sz, sdepth);
|
|
Mat sqsum(sz, sdepth);
|
|
|
|
declare.in(src, WARMUP_RNG).out(sum, sqsum);
|
|
declare.time(100);
|
|
|
|
TEST_CYCLE() integral(src, sum, sqsum, sdepth);
|
|
|
|
SANITY_CHECK(sum, 1e-6);
|
|
SANITY_CHECK(sqsum, 1e-6);
|
|
}
|
|
|
|
static std::vector<std::tuple<MatType, IntegralOutputDepths>> GetFullSqsumDepthPairs() {
|
|
static int extraDepths[12][2] = {
|
|
{CV_8U, DEPTH_32S_64F},
|
|
{CV_8U, DEPTH_32S_32F},
|
|
{CV_8U, DEPTH_32S_32S},
|
|
{CV_8U, DEPTH_32F_64F},
|
|
{CV_8U, DEPTH_32F_32F},
|
|
{CV_8U, DEPTH_64F_64F},
|
|
{CV_16U, DEPTH_64F_64F},
|
|
{CV_16S, DEPTH_64F_64F},
|
|
{CV_32F, DEPTH_32F_64F},
|
|
{CV_32F, DEPTH_32F_32F},
|
|
{CV_32F, DEPTH_64F_64F},
|
|
{CV_64F, DEPTH_64F_64F}
|
|
};
|
|
std::vector<std::tuple<MatType, IntegralOutputDepths>> valid_pairs;
|
|
for (size_t i = 0; i < 12; i++) {
|
|
for (int cn = 1; cn <= 4; cn++) {
|
|
valid_pairs.emplace_back(CV_MAKETYPE(extraDepths[i][0], cn), extraDepths[i][1]);
|
|
}
|
|
}
|
|
return valid_pairs;
|
|
}
|
|
|
|
PERF_TEST_P(Size_MatType_OutMatDepthArray, DISABLED_integral_sqsum_full,
|
|
testing::Combine(
|
|
testing::Values(TYPICAL_MAT_SIZES),
|
|
testing::ValuesIn(GetFullSqsumDepthPairs())
|
|
)
|
|
)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
auto depths = get<1>(GetParam());
|
|
int matType = get<0>(depths);
|
|
int sdepth = extraOutputDepths[get<1>(depths)][0];
|
|
int sqdepth = extraOutputDepths[get<1>(depths)][1];
|
|
|
|
Mat src(sz, matType);
|
|
Mat sum(sz, sdepth);
|
|
Mat sqsum(sz, sqdepth);
|
|
|
|
declare.in(src, WARMUP_RNG).out(sum, sqsum);
|
|
declare.time(100);
|
|
|
|
TEST_CYCLE() integral(src, sum, sqsum, sdepth, sqdepth);
|
|
|
|
SANITY_CHECK_NOTHING();
|
|
}
|
|
|
|
PERF_TEST_P( Size_MatType_OutMatDepth, integral_sqsum_tilted,
|
|
testing::Combine(
|
|
testing::Values(TYPICAL_MAT_SIZES),
|
|
testing::Values( CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4 ),
|
|
testing::Values( CV_32S, CV_32F, CV_64F )
|
|
)
|
|
)
|
|
{
|
|
Size sz = get<0>(GetParam());
|
|
int matType = get<1>(GetParam());
|
|
int sdepth = get<2>(GetParam());
|
|
|
|
Mat src(sz, matType);
|
|
Mat sum(sz, sdepth);
|
|
Mat sqsum(sz, sdepth);
|
|
Mat tilted(sz, sdepth);
|
|
|
|
declare.in(src, WARMUP_RNG).out(sum, sqsum, tilted);
|
|
declare.time(100);
|
|
|
|
TEST_CYCLE() integral(src, sum, sqsum, tilted, sdepth);
|
|
|
|
SANITY_CHECK(sum, 1e-6);
|
|
SANITY_CHECK(sqsum, 1e-6);
|
|
SANITY_CHECK(tilted, 1e-6, tilted.depth() > CV_32S ? ERROR_RELATIVE : ERROR_ABSOLUTE);
|
|
}
|
|
|
|
} // namespace
|