diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 57d142eb0b..06992c2de0 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -341,8 +341,13 @@ if(MSVC) string(REPLACE "/W3" "/W4" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") string(REPLACE "/W3" "/W4" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") - if(NOT ENABLE_NOISY_WARNINGS AND MSVC_VERSION EQUAL 1400) - ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4510 /wd4610 /wd4312 /wd4201 /wd4244 /wd4328 /wd4267) + if(NOT ENABLE_NOISY_WARNINGS) + if(MSVC_VERSION EQUAL 1400) + ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4510 /wd4610 /wd4312 /wd4201 /wd4244 /wd4328 /wd4267) + endif() + if(MSVC_VERSION LESS 1900) # MSVS2015 + ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127) # warning C4127: conditional expression is constant + endif() endif() # allow extern "C" functions throw exceptions diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 90eb392f58..ea587b1870 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -1891,13 +1891,131 @@ void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows ) dft( src, dst, flags | DFT_INVERSE, nonzero_rows ); } +namespace { + +#define VAL(buf, elem) (((T*)((char*)data ## buf + (step ## buf * (elem))))[0]) +#define MUL_SPECTRUMS_COL(A, B, C) \ + VAL(C, 0) = VAL(A, 0) * VAL(B, 0); \ + for (size_t j = 1; j <= rows - 2; j += 2) \ + { \ + double a_re = VAL(A, j), a_im = VAL(A, j + 1); \ + double b_re = VAL(B, j), b_im = VAL(B, j + 1); \ + if (conjB) b_im = -b_im; \ + double c_re = a_re * b_re - a_im * b_im; \ + double c_im = a_re * b_im + a_im * b_re; \ + VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \ + } \ + if ((rows & 1) == 0) \ + VAL(C, rows-1) = VAL(A, rows-1) * VAL(B, rows-1) + +template static inline +void mulSpectrums_processCol_noinplace(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows) +{ + MUL_SPECTRUMS_COL(A, B, C); +} + +template static inline +void mulSpectrums_processCol_inplaceA(const T* dataB, T* dataAC, size_t stepB, size_t stepAC, size_t rows) +{ + MUL_SPECTRUMS_COL(AC, B, AC); +} +template static inline +void mulSpectrums_processCol(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows) +{ + if (inplaceA) + mulSpectrums_processCol_inplaceA(dataB, dataC, stepB, stepC, rows); + else + mulSpectrums_processCol_noinplace(dataA, dataB, dataC, stepA, stepB, stepC, rows); +} +#undef MUL_SPECTRUMS_COL +#undef VAL + +template static inline +void mulSpectrums_processCols(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols) +{ + mulSpectrums_processCol(dataA, dataB, dataC, stepA, stepB, stepC, rows); + if ((cols & 1) == 0) + { + mulSpectrums_processCol(dataA + cols - 1, dataB + cols - 1, dataC + cols - 1, stepA, stepB, stepC, rows); + } +} + +#define VAL(buf, elem) (data ## buf[(elem)]) +#define MUL_SPECTRUMS_ROW(A, B, C) \ + for (size_t j = j0; j < j1; j += 2) \ + { \ + double a_re = VAL(A, j), a_im = VAL(A, j + 1); \ + double b_re = VAL(B, j), b_im = VAL(B, j + 1); \ + if (conjB) b_im = -b_im; \ + double c_re = a_re * b_re - a_im * b_im; \ + double c_im = a_re * b_im + a_im * b_re; \ + VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \ + } +template static inline +void mulSpectrums_processRow_noinplace(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1) +{ + MUL_SPECTRUMS_ROW(A, B, C); +} +template static inline +void mulSpectrums_processRow_inplaceA(const T* dataB, T* dataAC, size_t j0, size_t j1) +{ + MUL_SPECTRUMS_ROW(AC, B, AC); +} +template static inline +void mulSpectrums_processRow(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1) +{ + if (inplaceA) + mulSpectrums_processRow_inplaceA(dataB, dataC, j0, j1); + else + mulSpectrums_processRow_noinplace(dataA, dataB, dataC, j0, j1); +} +#undef MUL_SPECTRUMS_ROW +#undef VAL + +template static inline +void mulSpectrums_processRows(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d_CN1) +{ + while (rows-- > 0) + { + if (is_1d_CN1) + dataC[0] = dataA[0]*dataB[0]; + mulSpectrums_processRow(dataA, dataB, dataC, j0, j1); + if (is_1d_CN1 && (cols & 1) == 0) + dataC[j1] = dataA[j1]*dataB[j1]; + + dataA = (const T*)(((char*)dataA) + stepA); + dataB = (const T*)(((char*)dataB) + stepB); + dataC = (T*)(((char*)dataC) + stepC); + } +} + + +template static inline +void mulSpectrums_Impl_(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1) +{ + if (!is_1d && isCN1) + { + mulSpectrums_processCols(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols); + } + mulSpectrums_processRows(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d && isCN1); +} +template static inline +void mulSpectrums_Impl(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1) +{ + if (dataA == dataC) + mulSpectrums_Impl_(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1); + else + mulSpectrums_Impl_(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1); +} + +} // namespace + void cv::mulSpectrums( InputArray _srcA, InputArray _srcB, OutputArray _dst, int flags, bool conjB ) { Mat srcA = _srcA.getMat(), srcB = _srcB.getMat(); int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type(); - int rows = srcA.rows, cols = srcA.cols; - int j, k; + size_t rows = srcA.rows, cols = srcA.cols; CV_Assert( type == srcB.type() && srcA.size() == srcB.size() ); CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 ); @@ -1905,149 +2023,42 @@ void cv::mulSpectrums( InputArray _srcA, InputArray _srcB, _dst.create( srcA.rows, srcA.cols, type ); Mat dst = _dst.getMat(); - bool is_1d = (flags & DFT_ROWS) || (rows == 1 || (cols == 1 && - srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous())); + // correct inplace support + // Case 'dst.data == srcA.data' is handled by implementation, + // because it is used frequently (filter2D, matchTemplate) + if (dst.data == srcB.data) + srcB = srcB.clone(); // workaround for B only + + bool is_1d = (flags & DFT_ROWS) + || (rows == 1) + || (cols == 1 && srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous()); if( is_1d && !(flags & DFT_ROWS) ) cols = cols + rows - 1, rows = 1; - int ncols = cols*cn; - int j0 = cn == 1; - int j1 = ncols - (cols % 2 == 0 && cn == 1); + bool isCN1 = cn == 1; + size_t j0 = isCN1 ? 1 : 0; + size_t j1 = cols*cn - (((cols & 1) == 0 && cn == 1) ? 1 : 0); - if( depth == CV_32F ) + if (depth == CV_32F) { const float* dataA = (const float*)srcA.data; const float* dataB = (const float*)srcB.data; float* dataC = (float*)dst.data; - - size_t stepA = srcA.step/sizeof(dataA[0]); - size_t stepB = srcB.step/sizeof(dataB[0]); - size_t stepC = dst.step/sizeof(dataC[0]); - - if( !is_1d && cn == 1 ) - { - for( k = 0; k < (cols % 2 ? 1 : 2); k++ ) - { - if( k == 1 ) - dataA += cols - 1, dataB += cols - 1, dataC += cols - 1; - dataC[0] = dataA[0]*dataB[0]; - if( rows % 2 == 0 ) - dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB]; - if( !conjB ) - for( j = 1; j <= rows - 2; j += 2 ) - { - double re = (double)dataA[j*stepA]*dataB[j*stepB] - - (double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB]; - double im = (double)dataA[j*stepA]*dataB[(j+1)*stepB] + - (double)dataA[(j+1)*stepA]*dataB[j*stepB]; - dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im; - } - else - for( j = 1; j <= rows - 2; j += 2 ) - { - double re = (double)dataA[j*stepA]*dataB[j*stepB] + - (double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB]; - double im = (double)dataA[(j+1)*stepA]*dataB[j*stepB] - - (double)dataA[j*stepA]*dataB[(j+1)*stepB]; - dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im; - } - if( k == 1 ) - dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1; - } - } - - for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC ) - { - if( is_1d && cn == 1 ) - { - dataC[0] = dataA[0]*dataB[0]; - if( cols % 2 == 0 ) - dataC[j1] = dataA[j1]*dataB[j1]; - } - - if( !conjB ) - for( j = j0; j < j1; j += 2 ) - { - double re = (double)dataA[j]*dataB[j] - (double)dataA[j+1]*dataB[j+1]; - double im = (double)dataA[j+1]*dataB[j] + (double)dataA[j]*dataB[j+1]; - dataC[j] = (float)re; dataC[j+1] = (float)im; - } - else - for( j = j0; j < j1; j += 2 ) - { - double re = (double)dataA[j]*dataB[j] + (double)dataA[j+1]*dataB[j+1]; - double im = (double)dataA[j+1]*dataB[j] - (double)dataA[j]*dataB[j+1]; - dataC[j] = (float)re; dataC[j+1] = (float)im; - } - } + if (!conjB) + mulSpectrums_Impl(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1); + else + mulSpectrums_Impl(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1); } else { const double* dataA = (const double*)srcA.data; const double* dataB = (const double*)srcB.data; double* dataC = (double*)dst.data; - - size_t stepA = srcA.step/sizeof(dataA[0]); - size_t stepB = srcB.step/sizeof(dataB[0]); - size_t stepC = dst.step/sizeof(dataC[0]); - - if( !is_1d && cn == 1 ) - { - for( k = 0; k < (cols % 2 ? 1 : 2); k++ ) - { - if( k == 1 ) - dataA += cols - 1, dataB += cols - 1, dataC += cols - 1; - dataC[0] = dataA[0]*dataB[0]; - if( rows % 2 == 0 ) - dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB]; - if( !conjB ) - for( j = 1; j <= rows - 2; j += 2 ) - { - double re = dataA[j*stepA]*dataB[j*stepB] - - dataA[(j+1)*stepA]*dataB[(j+1)*stepB]; - double im = dataA[j*stepA]*dataB[(j+1)*stepB] + - dataA[(j+1)*stepA]*dataB[j*stepB]; - dataC[j*stepC] = re; dataC[(j+1)*stepC] = im; - } - else - for( j = 1; j <= rows - 2; j += 2 ) - { - double re = dataA[j*stepA]*dataB[j*stepB] + - dataA[(j+1)*stepA]*dataB[(j+1)*stepB]; - double im = dataA[(j+1)*stepA]*dataB[j*stepB] - - dataA[j*stepA]*dataB[(j+1)*stepB]; - dataC[j*stepC] = re; dataC[(j+1)*stepC] = im; - } - if( k == 1 ) - dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1; - } - } - - for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC ) - { - if( is_1d && cn == 1 ) - { - dataC[0] = dataA[0]*dataB[0]; - if( cols % 2 == 0 ) - dataC[j1] = dataA[j1]*dataB[j1]; - } - - if( !conjB ) - for( j = j0; j < j1; j += 2 ) - { - double re = dataA[j]*dataB[j] - dataA[j+1]*dataB[j+1]; - double im = dataA[j+1]*dataB[j] + dataA[j]*dataB[j+1]; - dataC[j] = re; dataC[j+1] = im; - } - else - for( j = j0; j < j1; j += 2 ) - { - double re = dataA[j]*dataB[j] + dataA[j+1]*dataB[j+1]; - double im = dataA[j+1]*dataB[j] - dataA[j]*dataB[j+1]; - dataC[j] = re; dataC[j+1] = im; - } - } + if (!conjB) + mulSpectrums_Impl(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1); + else + mulSpectrums_Impl(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1); } } diff --git a/modules/core/test/test_dxt.cpp b/modules/core/test/test_dxt.cpp index 2aaabbf0f8..dad12ce1d0 100644 --- a/modules/core/test/test_dxt.cpp +++ b/modules/core/test/test_dxt.cpp @@ -419,9 +419,6 @@ static void fixCCS( Mat& mat, int cols, int flags ) } } -#if defined _MSC_VER && _MSC_VER >= 1700 -#pragma optimize("", off) -#endif static void mulComplex( const Mat& src1, const Mat& src2, Mat& dst, int flags ) { dst.create(src1.rows, src1.cols, src1.type()); @@ -430,12 +427,27 @@ static void mulComplex( const Mat& src1, const Mat& src2, Mat& dst, int flags ) CV_Assert( src1.size == src2.size && src1.type() == src2.type() && (src1.type() == CV_32FC2 || src1.type() == CV_64FC2) ); + const Mat* src1_ = &src1; + Mat src1_tmp; + if (dst.data == src1.data) + { + src1_tmp = src1.clone(); + src1_ = &src1_tmp; + } + const Mat* src2_ = &src2; + Mat src2_tmp; + if (dst.data == src2.data) + { + src2_tmp = src2.clone(); + src2_ = &src2_tmp; + } + for( i = 0; i < dst.rows; i++ ) { if( depth == CV_32F ) { - const float* a = src1.ptr(i); - const float* b = src2.ptr(i); + const float* a = src1_->ptr(i); + const float* b = src2_->ptr(i); float* c = dst.ptr(i); if( !(flags & CV_DXT_MUL_CONJ) ) @@ -459,8 +471,8 @@ static void mulComplex( const Mat& src1, const Mat& src2, Mat& dst, int flags ) } else { - const double* a = src1.ptr(i); - const double* b = src2.ptr(i); + const double* a = src1_->ptr(i); + const double* b = src2_->ptr(i); double* c = dst.ptr(i); if( !(flags & CV_DXT_MUL_CONJ) ) @@ -484,9 +496,6 @@ static void mulComplex( const Mat& src1, const Mat& src2, Mat& dst, int flags ) } } } -#if defined _MSC_VER && _MSC_VER >= 1700 -#pragma optimize("", on) -#endif } @@ -778,9 +787,7 @@ public: protected: void run_func(); void prepare_to_validation( int test_case_idx ); -#if defined(__aarch64__) && defined(NDEBUG) double get_success_error_level( int test_case_idx, int i, int j ); -#endif }; @@ -788,31 +795,19 @@ CxCore_MulSpectrumsTest::CxCore_MulSpectrumsTest() : CxCore_DXTBaseTest( true, t { } -#if defined(__aarch64__) && defined(NDEBUG) double CxCore_MulSpectrumsTest::get_success_error_level( int test_case_idx, int i, int j ) { + (void)test_case_idx; + CV_Assert(i == OUTPUT); + CV_Assert(j == 0); int elem_depth = CV_MAT_DEPTH(cvGetElemType(test_array[i][j])); - if( elem_depth <= CV_32F ) - { - return ArrayTest::get_success_error_level( test_case_idx, i, j ); - } - switch( test_case_idx ) - { - // Usual threshold is too strict for these test cases due to the difference of fmsub and fsub - case 399: - case 420: - return DBL_EPSILON * 20000; - case 65: - case 161: - case 287: - case 351: - case 458: - return DBL_EPSILON * 10000; - default: - return ArrayTest::get_success_error_level( test_case_idx, i, j ); - } + CV_Assert(elem_depth == CV_32F || elem_depth == CV_64F); + + element_wise_relative_error = false; + double maxInputValue = 1000; // ArrayTest::get_minmax_bounds + double err = 8 * maxInputValue; // result = A*B + C*D + return (elem_depth == CV_32F ? FLT_EPSILON : DBL_EPSILON) * err; } -#endif void CxCore_MulSpectrumsTest::run_func() { diff --git a/modules/imgproc/src/phasecorr.cpp b/modules/imgproc/src/phasecorr.cpp index 87e4d3080e..48d19fa3c1 100644 --- a/modules/imgproc/src/phasecorr.cpp +++ b/modules/imgproc/src/phasecorr.cpp @@ -167,6 +167,9 @@ static void divSpectrums( InputArray _srcA, InputArray _srcB, OutputArray _dst, _dst.create( srcA.rows, srcA.cols, type ); Mat dst = _dst.getMat(); + CV_Assert(dst.data != srcA.data); // non-inplace check + CV_Assert(dst.data != srcB.data); // non-inplace check + bool is_1d = (flags & DFT_ROWS) || (rows == 1 || (cols == 1 && srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous()));