mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge pull request #25792 from asmorkalov:as/HAL_fast_GaussianBlur
Added flag to GaussianBlur for faster but not bit-exact implementation #25792 Rationale: Current implementation of GaussianBlur is almost always bit-exact. It helps to get predictable results according platforms, but prohibits most of approximations and optimization tricks. The patch converts `borderType` parameter to more generic `flags` and introduces `GAUSS_ALLOW_APPROXIMATIONS` flag to allow not bit-exact implementation. With the flag IPP and generic HAL implementation are called first. The flag naming and location is a subject for discussion. Replaces https://github.com/opencv/opencv/pull/22073 Possibly related issue: https://github.com/opencv/opencv/issues/24135 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
3ff97c5580
commit
15783d6598
@ -1258,7 +1258,11 @@ if(CMAKE_GENERATOR MATCHES "Xcode|Visual Studio|Multi-Config")
|
||||
else()
|
||||
status(" Configuration:" ${CMAKE_BUILD_TYPE})
|
||||
endif()
|
||||
|
||||
if(DEFINED OPENCV_ALGO_HINT_DEFAULT)
|
||||
status(" Algorithm Hint:" ${OPENCV_ALGO_HINT_DEFAULT})
|
||||
else()
|
||||
status(" Algorithm Hint:" " ALGO_ACCURATE")
|
||||
endif()
|
||||
|
||||
# ========================= CPU code generation mode =========================
|
||||
status("")
|
||||
|
@ -217,6 +217,7 @@ Following options can be used to produce special builds with instrumentation or
|
||||
| `ENABLE_BUILD_HARDENING` | GCC, Clang, MSVC | Enable compiler options which reduce possibility of code exploitation. |
|
||||
| `ENABLE_LTO` | GCC, Clang, MSVC | Enable Link Time Optimization (LTO). |
|
||||
| `ENABLE_THIN_LTO` | Clang | Enable thin LTO which incorporates intermediate bitcode to binaries allowing consumers optimize their applications later. |
|
||||
| `OPENCV_ALGO_HINT_DEFAULT` | Any | Set default OpenCV implementation hint value: `ALGO_ACCURATE` or `ALGO_APROX`. Dangerous! The option changes behaviour globally and may affect accuracy of many algorithms. |
|
||||
|
||||
@see [GCC instrumentation](https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html)
|
||||
@see [Build hardening](https://en.wikipedia.org/wiki/Hardening_(computing))
|
||||
|
@ -186,6 +186,10 @@ if(OPENCV_SEMIHOSTING)
|
||||
ocv_target_compile_definitions(${the_module} PRIVATE "-DOPENCV_SEMIHOSTING")
|
||||
endif(OPENCV_SEMIHOSTING)
|
||||
|
||||
if(DEFINED OPENCV_ALGO_HINT_DEFAULT)
|
||||
ocv_target_compile_definitions(${the_module} PRIVATE "-DOPENCV_ALGO_HINT_DEFAULT=${OPENCV_ALGO_HINT_DEFAULT}")
|
||||
endif(DEFINED OPENCV_ALGO_HINT_DEFAULT)
|
||||
|
||||
if(HAVE_HPX)
|
||||
ocv_target_link_libraries(${the_module} LINK_PRIVATE "${HPX_LIBRARIES}")
|
||||
endif()
|
||||
|
@ -150,6 +150,18 @@ It is possible to alternate error processing by using #redirectError().
|
||||
*/
|
||||
CV_EXPORTS CV_NORETURN void error(const Exception& exc);
|
||||
|
||||
/*! @brief Flags that allow to midify some functions behavior. Used as set of flags.
|
||||
*/
|
||||
enum AlgorithmHint {
|
||||
ALGO_DEFAULT = 0, //!< Default algorithm behaviour defined during OpenCV build
|
||||
ALGO_ACCURATE = 1, //!< Use generic portable implementation
|
||||
ALGO_APPROX = 2, //!< Allow alternative approximations to get faster implementation. Behaviour and result depends on a platform
|
||||
};
|
||||
|
||||
/*! @brief Returns ImplementationHint selected by default, a.k.a. `IMPL_DEFAULT` defined during OpenCV compilation.
|
||||
*/
|
||||
CV_EXPORTS_W AlgorithmHint getDefaultAlgorithmHint();
|
||||
|
||||
enum SortFlags { SORT_EVERY_ROW = 0, //!< each matrix row is sorted independently
|
||||
SORT_EVERY_COLUMN = 1, //!< each matrix column is sorted
|
||||
//!< independently; this flag and the previous one are
|
||||
|
@ -46,6 +46,7 @@
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/core/utils/configuration.private.hpp>
|
||||
#include <opencv2/core/utils/trace.private.hpp>
|
||||
|
||||
@ -2888,6 +2889,14 @@ bool restoreFPDenormalsState(const FPDenormalsModeState& state)
|
||||
|
||||
} // namespace details
|
||||
|
||||
AlgorithmHint getDefaultAlgorithmHint()
|
||||
{
|
||||
#ifdef OPENCV_ALGO_HINT_DEFAULT
|
||||
return OPENCV_ALGO_HINT_DEFAULT;
|
||||
#else
|
||||
return ALGO_ACCURATE;
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace cv
|
||||
|
||||
|
@ -1536,12 +1536,14 @@ respectively (see #getGaussianKernel for details); to fully control the result r
|
||||
possible future modifications of all this semantics, it is recommended to specify all of ksize,
|
||||
sigmaX, and sigmaY.
|
||||
@param borderType pixel extrapolation method, see #BorderTypes. #BORDER_WRAP is not supported.
|
||||
@param hint Implementation modfication flags. See #AlgorithmHint
|
||||
|
||||
@sa sepFilter2D, filter2D, blur, boxFilter, bilateralFilter, medianBlur
|
||||
*/
|
||||
CV_EXPORTS_W void GaussianBlur( InputArray src, OutputArray dst, Size ksize,
|
||||
double sigmaX, double sigmaY = 0,
|
||||
int borderType = BORDER_DEFAULT );
|
||||
int borderType = BORDER_DEFAULT,
|
||||
AlgorithmHint hint = cv::ALGO_DEFAULT );
|
||||
|
||||
/** @brief Applies the bilateral filter to an image.
|
||||
|
||||
|
@ -468,7 +468,7 @@ static bool openvx_gaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
|
||||
#endif
|
||||
|
||||
#if defined ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option
|
||||
#ifdef ENABLE_IPP_GAUSSIAN_BLUR // see CMake's OPENCV_IPP_GAUSSIAN_BLUR option
|
||||
|
||||
#define IPP_DISABLE_GAUSSIAN_BLUR_LARGE_KERNELS_1TH 1
|
||||
#define IPP_DISABLE_GAUSSIAN_BLUR_16SC4_1TH 1
|
||||
@ -526,14 +526,14 @@ private:
|
||||
|
||||
#endif
|
||||
|
||||
static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
static bool ipp_GaussianBlur(cv::Mat& src, cv::Mat& dst, Size ksize,
|
||||
double sigma1, double sigma2, int borderType )
|
||||
{
|
||||
#ifdef HAVE_IPP_IW
|
||||
CV_INSTRUMENT_REGION_IPP();
|
||||
|
||||
#if IPP_VERSION_X100 < 201800 && ((defined _MSC_VER && defined _M_IX86) || (defined __GNUC__ && defined __i386__))
|
||||
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
|
||||
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
|
||||
return false; // bug on ia32
|
||||
#else
|
||||
if(sigma1 != sigma2)
|
||||
@ -548,8 +548,6 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
// Acquire data and begin processing
|
||||
try
|
||||
{
|
||||
Mat src = _src.getMat();
|
||||
Mat dst = _dst.getMat();
|
||||
::ipp::IwiImage iwSrc = ippiGetImage(src);
|
||||
::ipp::IwiImage iwDst = ippiGetImage(dst);
|
||||
::ipp::IwiBorderSize borderSize = ::ipp::iwiSizeToBorderSize(ippiGetSize(ksize));
|
||||
@ -589,7 +587,7 @@ static bool ipp_GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
return true;
|
||||
#endif
|
||||
#else
|
||||
CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
|
||||
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(ksize); CV_UNUSED(sigma1); CV_UNUSED(sigma2); CV_UNUSED(borderType);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
@ -610,10 +608,13 @@ static bool validateGaussianBlurKernel(std::vector<T>& kernel)
|
||||
|
||||
void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
double sigma1, double sigma2,
|
||||
int borderType)
|
||||
int borderType, AlgorithmHint hint)
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
if (hint == cv::ALGO_DEFAULT)
|
||||
hint = cv::getDefaultAlgorithmHint();
|
||||
|
||||
CV_Assert(!_src.empty());
|
||||
|
||||
int type = _src.type();
|
||||
@ -693,7 +694,27 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
src2.locateROI( wsz, ofs );
|
||||
|
||||
CALL_HAL(gaussianBlurBinomial, cv_hal_gaussianBlurBinomial, src2.ptr(), src2.step, dst.ptr(), dst.step, src2.cols, src2.rows, sdepth, cn,
|
||||
ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED);
|
||||
ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width,
|
||||
borderType & ~BORDER_ISOLATED);
|
||||
}
|
||||
|
||||
if (hint == ALGO_APPROX)
|
||||
{
|
||||
Point ofs;
|
||||
Size wsz(src.cols, src.rows);
|
||||
if(!(borderType & BORDER_ISOLATED))
|
||||
src.locateROI( wsz, ofs );
|
||||
|
||||
CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
|
||||
ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
|
||||
sigma1, sigma2, borderType & ~BORDER_ISOLATED);
|
||||
|
||||
#ifdef ENABLE_IPP_GAUSSIAN_BLUR
|
||||
// IPP is not bit-exact to OpenCV implementation
|
||||
CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
|
||||
#endif
|
||||
CV_OVX_RUN(true,
|
||||
openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
|
||||
}
|
||||
|
||||
CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint16_t*)&fkx[0], (int)fkx.size(), (const uint16_t*)&fky[0], (int)fky.size(), borderType),
|
||||
@ -747,6 +768,25 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
ofs.x, ofs.y, wsz.width - src2.cols - ofs.x, wsz.height - src2.rows - ofs.y, ksize.width, borderType&~BORDER_ISOLATED);
|
||||
}
|
||||
|
||||
if (hint == ALGO_APPROX)
|
||||
{
|
||||
Point ofs;
|
||||
Size wsz(src.cols, src.rows);
|
||||
if(!(borderType & BORDER_ISOLATED))
|
||||
src.locateROI( wsz, ofs );
|
||||
|
||||
CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
|
||||
ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
|
||||
sigma1, sigma2, borderType & ~BORDER_ISOLATED);
|
||||
|
||||
#ifdef ENABLE_IPP_GAUSSIAN_BLUR
|
||||
// IPP is not bit-exact to OpenCV implementation
|
||||
CV_IPP_RUN_FAST(ipp_GaussianBlur(src, dst, ksize, sigma1, sigma2, borderType));
|
||||
#endif
|
||||
CV_OVX_RUN(true,
|
||||
openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
|
||||
}
|
||||
|
||||
CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint32_t*)&fkx[0], (int)fkx.size(), (const uint32_t*)&fky[0], (int)fky.size(), borderType),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
|
||||
@ -772,7 +812,7 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
|
||||
|
||||
CALL_HAL(gaussianBlur, cv_hal_gaussianBlur, src.ptr(), src.step, dst.ptr(), dst.step, src.cols, src.rows, sdepth, cn,
|
||||
ofs.x, ofs.y, wsz.width - src.cols - ofs.x, wsz.height - src.rows - ofs.y, ksize.width, ksize.height,
|
||||
sigma1, sigma2, borderType&~BORDER_ISOLATED);
|
||||
sigma1, sigma2, borderType & ~BORDER_ISOLATED);
|
||||
|
||||
CV_OVX_RUN(true,
|
||||
openvx_gaussianBlur(src, dst, ksize, sigma1, sigma2, borderType))
|
||||
|
@ -244,7 +244,7 @@ static void checkGaussianBlur_8Uvs32F(const Mat& src8u, const Mat& src32f, int N
|
||||
TEST(GaussianBlur_Bitexact, regression_9863)
|
||||
{
|
||||
Mat src8u = imread(cvtest::findDataFile("shared/lena.png"));
|
||||
Mat src32f; src8u.convertTo(src32f, CV_32F);
|
||||
Mat src32f; src8u.convertTo(src32f, CV_32F);
|
||||
|
||||
checkGaussianBlur_8Uvs32F(src8u, src32f, 151, 30);
|
||||
}
|
||||
@ -260,4 +260,58 @@ TEST(GaussianBlur_Bitexact, overflow_20792)
|
||||
EXPECT_GT(count, nintyPercent);
|
||||
}
|
||||
|
||||
CV_ENUM(GaussInputType, CV_8U, CV_16S);
|
||||
CV_ENUM(GaussBorder, BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT_101);
|
||||
|
||||
struct GaussianBlurVsBitexact: public testing::TestWithParam<tuple<GaussInputType, int, double, GaussBorder>>
|
||||
{
|
||||
virtual void SetUp()
|
||||
{
|
||||
orig = imread(findDataFile("shared/lena.png"));
|
||||
EXPECT_FALSE(orig.empty()) << "Cannot find test image shared/lena.png";
|
||||
}
|
||||
|
||||
Mat orig;
|
||||
};
|
||||
|
||||
// NOTE: The test was designed for IPP (-DOPENCV_IPP_GAUSSIAN_BLUR=ON)
|
||||
// Should be extended after new HAL integration
|
||||
TEST_P(GaussianBlurVsBitexact, approx)
|
||||
{
|
||||
auto testParams = GetParam();
|
||||
int dtype = get<0>(testParams);
|
||||
int ksize = get<1>(testParams);
|
||||
double sigma = get<2>(testParams);
|
||||
int border = get<3>(testParams);
|
||||
|
||||
Mat src;
|
||||
orig.convertTo(src, dtype);
|
||||
|
||||
cv::Mat gt;
|
||||
GaussianBlur(src, gt, Size(ksize, ksize), sigma, sigma, border, ALGO_ACCURATE);
|
||||
|
||||
cv::Mat dst;
|
||||
GaussianBlur(src, dst, Size(ksize, ksize), sigma, sigma, border, ALGO_APPROX);
|
||||
|
||||
cv::Mat diff;
|
||||
cv::absdiff(dst, gt, diff);
|
||||
cv::Mat flatten_diff = diff.reshape(1, diff.rows);
|
||||
|
||||
int nz = countNonZero(flatten_diff);
|
||||
EXPECT_LE(nz, 0.06*src.total()); // Less 6% of different pixels
|
||||
|
||||
double min_val, max_val;
|
||||
minMaxLoc(flatten_diff, &min_val, &max_val);
|
||||
EXPECT_LE(max_val, 2); // expectes results floating +-1
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, GaussianBlurVsBitexact,
|
||||
testing::Combine(
|
||||
GaussInputType::all(),
|
||||
testing::Values(3, 5, 7),
|
||||
testing::Values(0.75, 1.25),
|
||||
GaussBorder::all()
|
||||
)
|
||||
);
|
||||
|
||||
}} // namespace
|
||||
|
@ -987,6 +987,10 @@ class SamplesFindFile(NewOpenCVTests):
|
||||
except cv.error as _e:
|
||||
pass
|
||||
|
||||
class AlgorithmImplHit(NewOpenCVTests):
|
||||
def test_callable(self):
|
||||
res = cv.getDefaultAlgorithmHint()
|
||||
self.assertTrue(res is not None)
|
||||
|
||||
if __name__ == '__main__':
|
||||
NewOpenCVTests.bootstrap()
|
||||
|
@ -1126,6 +1126,7 @@ void SystemInfoCollector::OnTestProgramStart(const testing::UnitTest&)
|
||||
recordPropertyVerbose("cv_vcs_version", "OpenCV VCS version", getSnippetFromConfig("Version control:", "\n"));
|
||||
recordPropertyVerbose("cv_build_type", "Build type", getSnippetFromConfig("Configuration:", "\n"), CV_TEST_BUILD_CONFIG);
|
||||
recordPropertyVerbose("cv_compiler", "Compiler", getSnippetFromConfig("C++ Compiler:", "\n"));
|
||||
recordPropertyVerbose("implementation_hint", "Algorithm hint", getSnippetFromConfig("Algorithm Hint:", "\n"));
|
||||
const char* parallelFramework = cv::currentParallelFramework();
|
||||
if (parallelFramework)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user