diff --git a/modules/3d/misc/java/test/Cv3dTest.java b/modules/3d/misc/java/test/Cv3dTest.java index 7bedb50b0f..b360a0baf5 100644 --- a/modules/3d/misc/java/test/Cv3dTest.java +++ b/modules/3d/misc/java/test/Cv3dTest.java @@ -315,8 +315,8 @@ public class Cv3dTest extends OpenCVTestCase { Mat truth_tvec = new Mat(3, 1, CvType.CV_64F); truth_tvec.put(0, 0, -320, -240, 400); - assertMatEqual(truth_rvec, rvec, EPS); - assertMatEqual(truth_tvec, tvec, EPS); + assertMatEqual(truth_rvec, rvec, EPS*2); + assertMatEqual(truth_tvec, tvec, EPS*2); } public void testSolvePnPListOfPoint3ListOfPointMatMatMatMatBoolean() { diff --git a/modules/3d/test/test_odometry.cpp b/modules/3d/test/test_odometry.cpp index a8bd0ed63c..5e7365e205 100644 --- a/modules/3d/test/test_odometry.cpp +++ b/modules/3d/test/test_odometry.cpp @@ -227,7 +227,7 @@ void OdometryTest::run() } // compare rotation - double possibleError = algtype == OdometryAlgoType::COMMON ? 0.015f : 0.01f; + double possibleError = algtype == OdometryAlgoType::COMMON ? 0.02f : 0.02f; Affine3f src = Affine3f(Vec3f(rvec), Vec3f(tvec)); Affine3f res = Affine3f(Vec3f(calcRvec), Vec3f(calcTvec)); diff --git a/modules/calib/test/test_cameracalibration.cpp b/modules/calib/test/test_cameracalibration.cpp index f8aadbf28b..fb276c547e 100644 --- a/modules/calib/test/test_cameracalibration.cpp +++ b/modules/calib/test/test_cameracalibration.cpp @@ -2010,8 +2010,8 @@ double CV_MultiviewCalibrationTest_CPP::calibrateStereoCamera( const vector image_sizes (2, imageSize); - Mat visibility_mat = Mat_::ones(2, numImgs); - std::vector is_fisheye(2, false); + Mat visibility_mat = Mat_::ones(2, numImgs); + std::vector is_fisheye(2, false); std::vector all_flags(2, flags); double rms = calibrateMultiview(objectPoints, image_points_all, image_sizes, visibility_mat, Rs, Ts, Ks, distortions, rvecs, tvecs, is_fisheye, errors_mat, noArray(), false, all_flags); diff --git a/modules/calib/test/test_fisheye.cpp b/modules/calib/test/test_fisheye.cpp index 9174ebeadf..bef557a08f 100644 --- a/modules/calib/test/test_fisheye.cpp +++ b/modules/calib/test/test_fisheye.cpp @@ -610,9 +610,9 @@ TEST_F(fisheyeTest, multiview_calibration) right_pts.copyTo(image_points_all[1][i]); } std::vector image_sizes(2, imageSize); - cv::Mat visibility_mat = cv::Mat_::ones(2, (int)leftPoints.size()), errors_mat, output_pairs; + cv::Mat visibility_mat = cv::Mat_::ones(2, (int)leftPoints.size()), errors_mat, output_pairs; std::vector Rs, Ts, Ks, distortions, rvecs0, tvecs0; - std::vector is_fisheye(2, true); + std::vector is_fisheye(2, true); int flag = 0; flag |= cv::CALIB_RECOMPUTE_EXTRINSIC; flag |= cv::CALIB_CHECK_COND; diff --git a/modules/calib/test/test_multiview_calib.cpp b/modules/calib/test/test_multiview_calib.cpp index 2683563246..963d3dadf9 100644 --- a/modules/calib/test/test_multiview_calib.cpp +++ b/modules/calib/test/test_multiview_calib.cpp @@ -65,7 +65,7 @@ TEST(multiview_calibration, accuracy) { std::vector> objPoints; std::vector> image_points_all(num_cameras); cv::Mat ones = cv::Mat_::ones(1, num_pts); - std::vector> visibility; + std::vector> visibility; cv::Mat centroid = cv::Mat(cv::Matx31f( (float)cv::mean(pattern.row(0)).val[0], (float)cv::mean(pattern.row(1)).val[0], @@ -83,7 +83,7 @@ TEST(multiview_calibration, accuracy) { cv::Mat pattern_new = (R * (pattern - centroid * ones) + centroid * ones + t * ones).t(); std::vector img_pts_cams(num_cameras); - std::vector visible(num_cameras, false); + std::vector visible(num_cameras, (uchar)0); int num_visible_patterns = 0; for (int c = 0; c < num_cameras; c++) { cv::Mat img_pts; @@ -108,7 +108,7 @@ TEST(multiview_calibration, accuracy) { } } if (are_all_pts_in_image) { - visible[c] = true; + visible[c] = 1; num_visible_patterns += 1; img_pts.copyTo(img_pts_cams[c]); } @@ -124,10 +124,10 @@ TEST(multiview_calibration, accuracy) { break; } } - cv::Mat visibility_mat = cv::Mat_(num_cameras, (int)objPoints.size()); + cv::Mat visibility_mat = cv::Mat_(num_cameras, (int)objPoints.size()); for (int c = 0; c < num_cameras; c++) { for (int f = 0; f < (int)objPoints.size(); f++) { - visibility_mat.at(c, f) = visibility[f][c]; + visibility_mat.at(c, f) = visibility[f][c]; } } diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index 79847578b4..8a2d7d3935 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -487,9 +487,13 @@ Cv64suf; #define CV_SUBMAT_FLAG (1 << CV_SUBMAT_FLAG_SHIFT) #define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG) -/** Size of each channel item, - 0x28442211 = 0010 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */ -#define CV_ELEM_SIZE1(type) ((0x28442211 >> CV_MAT_DEPTH(type)*4) & 15) +/** Size of an array/scalar single-channel value, 4 bits per type: + CV_8U - 1 byte + CV_8S - 1 byte + CV_16U - 2 bytes + ... +*/ +#define CV_ELEM_SIZE1(type) ((int)(0x4881228442211ULL >> (CV_MAT_DEPTH(type) * 4)) & 15) #define CV_ELEM_SIZE(type) (CV_MAT_CN(type)*CV_ELEM_SIZE1(type)) @@ -963,6 +967,41 @@ protected: #endif }; +class bfloat16_t +{ +public: + bfloat16_t() : w(0) {} + explicit bfloat16_t(float x) + { + Cv32suf in; + in.f = x; + w = (ushort)(in.u >> 16); + } + + operator float() const + { + Cv32suf out; + out.u = w << 16; + return out.f; + } + + static bfloat16_t fromBits(ushort b) + { + bfloat16_t result; + result.w = b; + return result; + } + static bfloat16_t zero() + { + bfloat16_t result; + result.w = (ushort)0; + return result; + } + ushort bits() const { return w; } +protected: + ushort w; +}; + } #endif diff --git a/modules/core/include/opencv2/core/hal/hal.hpp b/modules/core/include/opencv2/core/hal/hal.hpp index 0d68078d98..8eeee8bbb6 100644 --- a/modules/core/include/opencv2/core/hal/hal.hpp +++ b/modules/core/include/opencv2/core/hal/hal.hpp @@ -197,9 +197,11 @@ CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len ); CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len ); +CV_EXPORTS void cvt16bf32f( const bfloat16_t* src, float* dst, int len ); +CV_EXPORTS void cvt32f16bf( const float* src, bfloat16_t* dst, int len ); -CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len ); -CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len ); +CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len, int cn ); +CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len, int cn ); struct CV_EXPORTS DFT1D { diff --git a/modules/core/include/opencv2/core/hal/interface.h b/modules/core/include/opencv2/core/hal/interface.h index 6f0a83d359..ea3364d3c6 100644 --- a/modules/core/include/opencv2/core/hal/interface.h +++ b/modules/core/include/opencv2/core/hal/interface.h @@ -66,8 +66,8 @@ typedef signed char schar; #define CV_USRTYPE1 (void)"CV_USRTYPE1 support has been dropped in OpenCV 4.0" -#define CV_CN_MAX 512 -#define CV_CN_SHIFT 3 +#define CV_CN_MAX 128 +#define CV_CN_SHIFT 5 #define CV_DEPTH_MAX (1 << CV_CN_SHIFT) #define CV_8U 0 @@ -78,9 +78,17 @@ typedef signed char schar; #define CV_32F 5 #define CV_64F 6 #define CV_16F 7 +#define CV_16BF 8 +#define CV_Bool 9 +#define CV_64U 10 +#define CV_64S 11 +#define CV_32U 12 +#define CV_DEPTH_CURR_MAX 13 #define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1) #define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK) +#define CV_IS_INT_TYPE(flags) (((1 << CV_MAT_DEPTH(flags)) & 0x1e1f) != 0) +#define CV_IS_FLOAT_TYPE(flags) (((1 << CV_MAT_DEPTH(flags)) & 0x1e0) != 0) #define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT)) #define CV_MAKE_TYPE CV_MAKETYPE @@ -132,6 +140,37 @@ typedef signed char schar; #define CV_16FC3 CV_MAKETYPE(CV_16F,3) #define CV_16FC4 CV_MAKETYPE(CV_16F,4) #define CV_16FC(n) CV_MAKETYPE(CV_16F,(n)) + +#define CV_64SC1 CV_MAKETYPE(CV_64S,1) +#define CV_64SC2 CV_MAKETYPE(CV_64S,2) +#define CV_64SC3 CV_MAKETYPE(CV_64S,3) +#define CV_64SC4 CV_MAKETYPE(CV_64S,4) +#define CV_64SC(n) CV_MAKETYPE(CV_64S,(n)) + +#define CV_64UC1 CV_MAKETYPE(CV_64U,1) +#define CV_64UC2 CV_MAKETYPE(CV_64U,2) +#define CV_64UC3 CV_MAKETYPE(CV_64U,3) +#define CV_64UC4 CV_MAKETYPE(CV_64U,4) +#define CV_64UC(n) CV_MAKETYPE(CV_64U,(n)) + +#define CV_BoolC1 CV_MAKETYPE(CV_Bool,1) +#define CV_BoolC2 CV_MAKETYPE(CV_Bool,2) +#define CV_BoolC3 CV_MAKETYPE(CV_Bool,3) +#define CV_BoolC4 CV_MAKETYPE(CV_Bool,4) +#define CV_BoolC(n) CV_MAKETYPE(CV_Bool,(n)) + +#define CV_32UC1 CV_MAKETYPE(CV_32U,1) +#define CV_32UC2 CV_MAKETYPE(CV_32U,2) +#define CV_32UC3 CV_MAKETYPE(CV_32U,3) +#define CV_32UC4 CV_MAKETYPE(CV_32U,4) +#define CV_32UC(n) CV_MAKETYPE(CV_32U,(n)) + +#define CV_16BFC1 CV_MAKETYPE(CV_16BF,1) +#define CV_16BFC2 CV_MAKETYPE(CV_16BF,2) +#define CV_16BFC3 CV_MAKETYPE(CV_16BF,3) +#define CV_16BFC4 CV_MAKETYPE(CV_16BF,4) +#define CV_16BFC(n) CV_MAKETYPE(CV_16BF,(n)) + //! @} //! @name Comparison operation diff --git a/modules/core/include/opencv2/core/hal/intrin.hpp b/modules/core/include/opencv2/core/hal/intrin.hpp index ee8310b5c5..5c58ba5e5a 100644 --- a/modules/core/include/opencv2/core/hal/intrin.hpp +++ b/modules/core/include/opencv2/core/hal/intrin.hpp @@ -720,6 +720,22 @@ namespace CV__SIMD_NAMESPACE { inline v_int32 vx_load_expand_q(const schar * ptr) { return VXPREFIX(_load_expand_q)(ptr); } //! @} + #ifndef OPENCV_HAL_HAVE_LOAD_STORE_BFLOAT16 + + inline v_float32 vx_load_expand(const bfloat16_t* ptr) + { + v_uint32 v = vx_load_expand((const ushort*)ptr); + return v_reinterpret_as_f32(v_shl<16>(v)); + } + + inline void v_pack_store(const bfloat16_t* ptr, v_float32 v) + { + v_int32 iv = v_shr<16>(v_reinterpret_as_s32(v)); + v_pack_store((short*)ptr, iv); + } + + #endif + /** @brief SIMD processing state cleanup call */ inline void vx_cleanup() { VXPREFIX(_cleanup)(); } @@ -1095,6 +1111,10 @@ namespace CV__SIMD_NAMESPACE { #define CV_SIMD 0 #endif +#if (!defined CV_SIMD_64F) || (!CV_SIMD_64F) +typedef struct v_float64 { int dummy; } v_float64; +#endif + #include "simd_utils.impl.hpp" #ifndef CV_DOXYGEN diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp index 979b6163d8..6a3ee5b2d7 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp @@ -937,6 +937,11 @@ OPENCV_HAL_IMPL_AVX_CMP_OP_INT(v_uint32x8, v_int32x8, epi32, (int)0x80000000) inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ { return ~(a == b); } +inline v_int64x4 operator > (const v_int64x4& a, const v_int64x4& b) +{ return v_int64x4(_mm256_cmpgt_epi64(a.val, b.val)); } +inline v_int64x4 operator < (const v_int64x4& a, const v_int64x4& b) +{ return v_int64x4(_mm256_cmpgt_epi64(b.val, a.val)); } + OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_uint64x4) OPENCV_HAL_IMPL_AVX_CMP_OP_64BIT(v_int64x4) @@ -3162,6 +3167,22 @@ inline void v_pack_store(float16_t* ptr, const v_float32x8& a) #endif } +/*#define OPENCV_HAL_HAVE_PACK_STORE_BFLOAT16 1 + +inline v_float32x8 v256_load_expand(const bfloat16_t* ptr) +{ + __m128i bf = _mm_loadu_si128((const __m128i*)ptr); + __m256i f = _mm256_unpacklo_epi16(_mm256_setzero_si256(), _mm256_castsi128_si256(bf)); + return v_float32x8(_mm256_castsi256_ps(f)); +} + +inline void v_pack_store(bfloat16_t* ptr, const v_float32x8& a) +{ + __m256i f = _mm256_castps_si256(a.val); + f = _mm256_packs_epi32(_mm256_srai_epi32(f, 16), f); + _mm_storeu_si128((__m128i*)ptr, _v256_extract_low(f)); +}*/ + // // end of FP16 // diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index e9a09d12ae..ab78451a8f 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -3250,6 +3250,8 @@ template inline v_reg v_dotprod_expand_fast(const v_reg v_load_expand(const float16_t* ptr) { diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index 6f8973231b..5f8c9afbe3 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -1057,44 +1057,61 @@ OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32) OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32) + #if defined(__aarch64__) || defined(_M_ARM64) static inline uint64x2_t vmvnq_u64(uint64x2_t a) { uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF)); return veorq_u64(a, vx); } -//OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64) -//OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64) -static inline v_uint64x2 operator == (const v_uint64x2& a, const v_uint64x2& b) -{ return v_uint64x2(vceqq_u64(a.val, b.val)); } -static inline v_uint64x2 operator != (const v_uint64x2& a, const v_uint64x2& b) -{ return v_uint64x2(vmvnq_u64(vceqq_u64(a.val, b.val))); } -static inline v_int64x2 operator == (const v_int64x2& a, const v_int64x2& b) -{ return v_int64x2(vreinterpretq_s64_u64(vceqq_s64(a.val, b.val))); } -static inline v_int64x2 operator != (const v_int64x2& a, const v_int64x2& b) -{ return v_int64x2(vreinterpretq_s64_u64(vmvnq_u64(vceqq_s64(a.val, b.val)))); } +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64) +OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64) #else static inline v_uint64x2 operator == (const v_uint64x2& a, const v_uint64x2& b) { - uint32x4_t cmp = vceqq_u32(vreinterpretq_u32_u64(a.val), vreinterpretq_u32_u64(b.val)); - uint32x4_t swapped = vrev64q_u32(cmp); - return v_uint64x2(vreinterpretq_u64_u32(vandq_u32(cmp, swapped))); + uint32x4_t cmp = vceqq_u32(vreinterpretq_u32_u64(a.val), + vreinterpretq_u32_u64(b.val)); + uint32x4_t v_eq = vandq_u32(cmp, vrev64q_u32(cmp)); + return v_uint64x2(vreinterpretq_u64_u32(v_eq)); } static inline v_uint64x2 operator != (const v_uint64x2& a, const v_uint64x2& b) { - uint32x4_t cmp = vceqq_u32(vreinterpretq_u32_u64(a.val), vreinterpretq_u32_u64(b.val)); - uint32x4_t swapped = vrev64q_u32(cmp); - uint64x2_t v_eq = vreinterpretq_u64_u32(vandq_u32(cmp, swapped)); - uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF)); - return v_uint64x2(veorq_u64(v_eq, vx)); + uint64x2_t v_mask = vorrq_u64(vsubq_u64(a.val, b.val), vsubq_u64(b.val, a.val)); + int64x2_t v_smask = vshrq_n_s64(vreinterpretq_s64_u64(v_mask), 63); + return v_uint64x2(vreinterpretq_u64_s64(v_smask)); } static inline v_int64x2 operator == (const v_int64x2& a, const v_int64x2& b) { - return v_reinterpret_as_s64(v_reinterpret_as_u64(a) == v_reinterpret_as_u64(b)); + uint32x4_t cmp = vceqq_u32(vreinterpretq_u32_s64(a.val), + vreinterpretq_u32_s64(b.val)); + uint32x4_t v_eq = vandq_u32(cmp, vrev64q_u32(cmp)); + return v_int64x2(vreinterpretq_s64_u32(v_eq)); } static inline v_int64x2 operator != (const v_int64x2& a, const v_int64x2& b) { - return v_reinterpret_as_s64(v_reinterpret_as_u64(a) != v_reinterpret_as_u64(b)); + int64x2_t v_mask = vorrq_s64(vsubq_s64(a.val, b.val), vsubq_s64(b.val, a.val)); + int64x2_t v_smask = vshrq_n_s64(v_mask, 63); + return v_int64x2(v_smask); +} +static inline v_uint64x2 operator > (const v_uint64x2& a, const v_uint64x2& b) +{ + int64x2_t v_mask = vreinterpretq_s64_u64(vsubq_u64(b.val, a.val)); + return v_uint64x2(vreinterpretq_u64_s64(vshrq_n_s64(v_mask, 63))); +} +static inline v_uint64x2 operator < (const v_uint64x2& a, const v_uint64x2& b) +{ + int64x2_t v_mask = vreinterpretq_s64_u64(vsubq_u64(a.val, b.val)); + return v_uint64x2(vreinterpretq_u64_s64(vshrq_n_s64(v_mask, 63))); +} +static inline v_int64x2 operator > (const v_int64x2& a, const v_int64x2& b) +{ + int64x2_t v_mask = vsubq_s64(b.val, a.val); + return v_int64x2(vshrq_n_s64(v_mask, 63)); +} +static inline v_int64x2 operator < (const v_int64x2& a, const v_int64x2& b) +{ + int64x2_t v_mask = vsubq_s64(a.val, b.val); + return v_int64x2(vshrq_n_s64(v_mask, 63)); } #endif #if CV_SIMD128_64F @@ -1622,7 +1639,7 @@ inline int v_signmask(const v_uint64x2& a) const int64x2_t signPosition = {0,1}; uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), signPosition); uint64_t t0 = vaddvq_u64(v0); - return t0; + return (int)t0; #else // #if CV_NEON_AARCH64 int64x1_t m0 = vdup_n_s64(0); uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), vcombine_s64(m0, m0)); diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index 9d17f71666..4c2e82db1c 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -1275,6 +1275,14 @@ inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ { return ~(a == b); } #endif +inline v_int64x2 operator > (const v_int64x2& a, const v_int64x2& b) +{ + __m128i s = _mm_srli_epi64(_mm_sub_epi64(b.val, a.val), 63); + return v_int64x2(_mm_sub_epi64(_mm_setzero_si128(), s)); +} +inline v_int64x2 operator < (const v_int64x2& a, const v_int64x2& b) +{ return b > a; } + OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2) OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2) diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index a5f244e8c0..a89fcf5400 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -298,9 +298,9 @@ public: DEPTH_MASK_32F = 1 << CV_32F, DEPTH_MASK_64F = 1 << CV_64F, DEPTH_MASK_16F = 1 << CV_16F, - DEPTH_MASK_ALL = (DEPTH_MASK_64F<<1)-1, + DEPTH_MASK_ALL = (1 << CV_DEPTH_CURR_MAX)-1, DEPTH_MASK_ALL_BUT_8S = DEPTH_MASK_ALL & ~DEPTH_MASK_8S, - DEPTH_MASK_ALL_16F = (DEPTH_MASK_16F<<1)-1, + DEPTH_MASK_ALL_16F = DEPTH_MASK_ALL, DEPTH_MASK_FLT = DEPTH_MASK_32F + DEPTH_MASK_64F }; diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp index c9fc1d67a6..2ab9584066 100644 --- a/modules/core/include/opencv2/core/mat.inl.hpp +++ b/modules/core/include/opencv2/core/mat.inl.hpp @@ -666,9 +666,7 @@ bool Mat::isSubmatrix() const inline size_t Mat::elemSize() const { - size_t res = dims > 0 ? step.p[dims - 1] : 0; - CV_DbgAssert(res != 0); - return res; + return CV_ELEM_SIZE(flags); } inline diff --git a/modules/core/include/opencv2/core/matx.hpp b/modules/core/include/opencv2/core/matx.hpp index 68cb4de5bb..76c214b757 100644 --- a/modules/core/include/opencv2/core/matx.hpp +++ b/modules/core/include/opencv2/core/matx.hpp @@ -442,6 +442,12 @@ typedef Vec Vec4i; typedef Vec Vec6i; typedef Vec Vec8i; +typedef Vec Vec2l; +typedef Vec Vec3l; +typedef Vec Vec4l; +typedef Vec Vec6l; +typedef Vec Vec8l; + typedef Vec Vec2f; typedef Vec Vec3f; typedef Vec Vec4f; diff --git a/modules/core/include/opencv2/core/saturate.hpp b/modules/core/include/opencv2/core/saturate.hpp index e0cc965ab6..ff2d893bfc 100644 --- a/modules/core/include/opencv2/core/saturate.hpp +++ b/modules/core/include/opencv2/core/saturate.hpp @@ -146,9 +146,8 @@ template<> inline unsigned saturate_cast(short v) { return (unsigned) template<> inline unsigned saturate_cast(int v) { return (unsigned)std::max(v, (int)0); } template<> inline unsigned saturate_cast(int64 v) { return (unsigned)((uint64)v <= (uint64)UINT_MAX ? v : v > 0 ? UINT_MAX : 0); } template<> inline unsigned saturate_cast(uint64 v) { return (unsigned)std::min(v, (uint64)UINT_MAX); } -// we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc. -template<> inline unsigned saturate_cast(float v) { return static_cast(cvRound(v)); } -template<> inline unsigned saturate_cast(double v) { return static_cast(cvRound(v)); } +template<> inline unsigned saturate_cast(float v) { return (unsigned)round(std::max(v, 0.f)); } +template<> inline unsigned saturate_cast(double v) { return (unsigned)round(std::max(v, 0.)); } template<> inline uint64 saturate_cast(schar v) { return (uint64)std::max(v, (schar)0); } template<> inline uint64 saturate_cast(short v) { return (uint64)std::max(v, (short)0); } @@ -156,9 +155,16 @@ template<> inline uint64 saturate_cast(int v) { return (uint64)st template<> inline uint64 saturate_cast(int64 v) { return (uint64)std::max(v, (int64)0); } template<> inline int64 saturate_cast(uint64 v) { return (int64)std::min(v, (uint64)LLONG_MAX); } +template<> inline int64 saturate_cast(float v) { return (int64)round((double)v); } +template<> inline int64 saturate_cast(double v) { return (int64)round(v); } +template<> inline uint64 saturate_cast(float v) { return (int64)round((double)std::max(v, 0.f)); } +template<> inline uint64 saturate_cast(double v) { return (int64)round(std::max(v, 0.)); } + /** @overload */ template static inline _Tp saturate_cast(float16_t v) { return saturate_cast<_Tp>((float)v); } +template static inline _Tp saturate_cast(bfloat16_t v) { return saturate_cast<_Tp>((float)v); } +template static inline _Tp saturate_cast(bool v) { return saturate_cast<_Tp>(v ? 1 : 0); } // in theory, we could use a LUT for 8u/8s->16f conversion, // but with hardware support for FP32->FP16 conversion the current approach is preferable @@ -172,6 +178,32 @@ template<> inline float16_t saturate_cast(uint64 v) { return float16 template<> inline float16_t saturate_cast(int64 v) { return float16_t((float)v); } template<> inline float16_t saturate_cast(float v) { return float16_t(v); } template<> inline float16_t saturate_cast(double v) { return float16_t((float)v); } +template<> inline float16_t saturate_cast(bfloat16_t v) { return float16_t((float)v); } + +template<> inline bfloat16_t saturate_cast(uchar v) { return bfloat16_t((float)v); } +template<> inline bfloat16_t saturate_cast(schar v) { return bfloat16_t((float)v); } +template<> inline bfloat16_t saturate_cast(ushort v) { return bfloat16_t((float)v); } +template<> inline bfloat16_t saturate_cast(short v) { return bfloat16_t((float)v); } +template<> inline bfloat16_t saturate_cast(unsigned v){ return bfloat16_t((float)v); } +template<> inline bfloat16_t saturate_cast(int v) { return bfloat16_t((float)v); } +template<> inline bfloat16_t saturate_cast(uint64 v) { return bfloat16_t((float)v); } +template<> inline bfloat16_t saturate_cast(int64 v) { return bfloat16_t((float)v); } +template<> inline bfloat16_t saturate_cast(float v) { return bfloat16_t(v); } +template<> inline bfloat16_t saturate_cast(double v) { return bfloat16_t((float)v); } +template<> inline bfloat16_t saturate_cast(float16_t v) { return bfloat16_t((float)v); } + +template<> inline bool saturate_cast(uchar v) { return v != 0; } +template<> inline bool saturate_cast(schar v) { return v != 0; } +template<> inline bool saturate_cast(ushort v) { return v != 0; } +template<> inline bool saturate_cast(short v) { return v != 0; } +template<> inline bool saturate_cast(unsigned v){ return v != 0; } +template<> inline bool saturate_cast(int v){ return v != 0; } +template<> inline bool saturate_cast(float v){ return v != 0; } +template<> inline bool saturate_cast(double v){ return v != 0; } +template<> inline bool saturate_cast(uint64_t v){ return v != 0; } +template<> inline bool saturate_cast(int64_t v){ return v != 0; } +template<> inline bool saturate_cast(float16_t v){ return (float)v != 0; } +template<> inline bool saturate_cast(bfloat16_t v){ return (float)v != 0; } //! @} diff --git a/modules/core/include/opencv2/core/traits.hpp b/modules/core/include/opencv2/core/traits.hpp index 52ab083ca4..18ceb9098d 100644 --- a/modules/core/include/opencv2/core/traits.hpp +++ b/modules/core/include/opencv2/core/traits.hpp @@ -134,9 +134,9 @@ public: typedef value_type channel_type; typedef value_type vec_type; enum { generic_type = 0, - depth = CV_8U, + depth = CV_Bool, channels = 1, - fmt = (int)'u', + fmt = (int)'b', type = CV_MAKETYPE(depth, channels) }; }; @@ -231,6 +231,51 @@ public: }; }; +template<> class DataType +{ +public: + typedef unsigned value_type; + typedef value_type work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_32U, + channels = 1, + fmt = (int)'n', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef unsigned value_type; + typedef value_type work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_64S, + channels = 1, + fmt = (int)'L', + type = CV_MAKETYPE(depth, channels) + }; +}; + +template<> class DataType +{ +public: + typedef unsigned value_type; + typedef value_type work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_64U, + channels = 1, + fmt = (int)'U', + type = CV_MAKETYPE(depth, channels) + }; +}; + template<> class DataType { public: @@ -276,6 +321,21 @@ public: }; }; +template<> class DataType +{ +public: + typedef bfloat16_t value_type; + typedef float work_type; + typedef value_type channel_type; + typedef value_type vec_type; + enum { generic_type = 0, + depth = CV_16BF, + channels = 1, + fmt = (int)'H', + type = CV_MAKETYPE(depth, channels) + }; +}; + /** @brief A helper class for cv::DataType The class is specialized for each fundamental numerical data type supported by OpenCV. It provides @@ -332,6 +392,12 @@ template<> class TypeDepth typedef int value_type; }; +template<> class TypeDepth +{ + enum { depth = CV_32U }; + typedef unsigned value_type; +}; + template<> class TypeDepth { enum { depth = CV_32F }; @@ -344,12 +410,36 @@ template<> class TypeDepth typedef double value_type; }; +template<> class TypeDepth +{ + enum { depth = CV_64U }; + typedef uint64_t value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_64S }; + typedef int64_t value_type; +}; + template<> class TypeDepth { enum { depth = CV_16F }; typedef float16_t value_type; }; +template<> class TypeDepth +{ + enum { depth = CV_16BF }; + typedef bfloat16_t value_type; +}; + +template<> class TypeDepth +{ + enum { depth = CV_Bool }; + typedef bool value_type; +}; + #endif //! @} diff --git a/modules/core/misc/java/src/java/core+CvType.java b/modules/core/misc/java/src/java/core+CvType.java index fcf616fe02..f7abab170e 100644 --- a/modules/core/misc/java/src/java/core+CvType.java +++ b/modules/core/misc/java/src/java/core+CvType.java @@ -30,7 +30,7 @@ public final class CvType { CV_64FC1 = CV_64FC(1), CV_64FC2 = CV_64FC(2), CV_64FC3 = CV_64FC(3), CV_64FC4 = CV_64FC(4), CV_16FC1 = CV_16FC(1), CV_16FC2 = CV_16FC(2), CV_16FC3 = CV_16FC(3), CV_16FC4 = CV_16FC(4); - private static final int CV_CN_MAX = 512, CV_CN_SHIFT = 3, CV_DEPTH_MAX = (1 << CV_CN_SHIFT); + private static final int CV_CN_MAX = 128, CV_CN_SHIFT = 5, CV_DEPTH_MAX = (1 << CV_CN_SHIFT); public static final int makeType(int depth, int channels) { if (channels <= 0 || channels >= CV_CN_MAX) { diff --git a/modules/core/misc/java/test/CvTypeTest.java b/modules/core/misc/java/test/CvTypeTest.java index 45ab4d4143..9f13324f19 100644 --- a/modules/core/misc/java/test/CvTypeTest.java +++ b/modules/core/misc/java/test/CvTypeTest.java @@ -65,7 +65,7 @@ public class CvTypeTest extends OpenCVTestCase { public void testTypeToString() { assertEquals("CV_32FC1", CvType.typeToString(CvType.CV_32F)); assertEquals("CV_32FC3", CvType.typeToString(CvType.CV_32FC3)); - assertEquals("CV_32FC(128)", CvType.typeToString(CvType.CV_32FC(128))); + assertEquals("CV_32FC(127)", CvType.typeToString(CvType.CV_32FC(127))); } } diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 8f7fd20924..67cc051e0b 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -329,7 +329,7 @@ static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst, static BinaryFuncC* getMaxTab() { - static BinaryFuncC maxTab[] = + static BinaryFuncC maxTab[CV_DEPTH_MAX] = { (BinaryFuncC)GET_OPTIMIZED(cv::hal::max8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max8s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max16s), @@ -343,7 +343,7 @@ static BinaryFuncC* getMaxTab() static BinaryFuncC* getMinTab() { - static BinaryFuncC minTab[] = + static BinaryFuncC minTab[CV_DEPTH_MAX] = { (BinaryFuncC)GET_OPTIMIZED(cv::hal::min8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min8s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min16s), @@ -617,7 +617,10 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(); Size sz = getContinuousSize2D(src1, src2, dst, src1.channels()); - tab[depth1](src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, usrdata); + BinaryFuncC func = tab[depth1]; + CV_Assert(func != 0); + func(src1.ptr(), src1.step, src2.ptr(), src2.step, + dst.ptr(), dst.step, sz.width, sz.height, usrdata); return; } @@ -868,7 +871,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, static BinaryFuncC* getAddTab() { - static BinaryFuncC addTab[] = + static BinaryFuncC addTab[CV_DEPTH_MAX] = { (BinaryFuncC)GET_OPTIMIZED(cv::hal::add8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add8s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add16s), @@ -882,7 +885,7 @@ static BinaryFuncC* getAddTab() static BinaryFuncC* getSubTab() { - static BinaryFuncC subTab[] = + static BinaryFuncC subTab[CV_DEPTH_MAX] = { (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16s), @@ -896,7 +899,7 @@ static BinaryFuncC* getSubTab() static BinaryFuncC* getAbsDiffTab() { - static BinaryFuncC absDiffTab[] = + static BinaryFuncC absDiffTab[CV_DEPTH_MAX] = { (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16s), @@ -949,7 +952,7 @@ namespace cv static BinaryFuncC* getMulTab() { - static BinaryFuncC mulTab[] = + static BinaryFuncC mulTab[CV_DEPTH_MAX] = { (BinaryFuncC)cv::hal::mul8u, (BinaryFuncC)cv::hal::mul8s, (BinaryFuncC)cv::hal::mul16u, (BinaryFuncC)cv::hal::mul16s, (BinaryFuncC)cv::hal::mul32s, (BinaryFuncC)cv::hal::mul32f, @@ -961,7 +964,7 @@ static BinaryFuncC* getMulTab() static BinaryFuncC* getDivTab() { - static BinaryFuncC divTab[] = + static BinaryFuncC divTab[CV_DEPTH_MAX] = { (BinaryFuncC)cv::hal::div8u, (BinaryFuncC)cv::hal::div8s, (BinaryFuncC)cv::hal::div16u, (BinaryFuncC)cv::hal::div16s, (BinaryFuncC)cv::hal::div32s, (BinaryFuncC)cv::hal::div32f, @@ -973,7 +976,7 @@ static BinaryFuncC* getDivTab() static BinaryFuncC* getRecipTab() { - static BinaryFuncC recipTab[] = + static BinaryFuncC recipTab[CV_DEPTH_MAX] = { (BinaryFuncC)cv::hal::recip8u, (BinaryFuncC)cv::hal::recip8s, (BinaryFuncC)cv::hal::recip16u, (BinaryFuncC)cv::hal::recip16s, (BinaryFuncC)cv::hal::recip32s, (BinaryFuncC)cv::hal::recip32f, @@ -1021,7 +1024,7 @@ UMat UMat::mul(InputArray m, double scale) const static BinaryFuncC* getAddWeightedTab() { - static BinaryFuncC addWeightedTab[] = + static BinaryFuncC addWeightedTab[CV_DEPTH_MAX] = { (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted32s), (BinaryFuncC)cv::hal::addWeighted32f, @@ -1052,7 +1055,7 @@ namespace cv static BinaryFuncC getCmpFunc(int depth) { - static BinaryFuncC cmpTab[] = + static BinaryFuncC cmpTab[CV_DEPTH_MAX] = { (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16s), @@ -1588,7 +1591,7 @@ typedef void (*InRangeFunc)( const uchar* src1, size_t step1, const uchar* src2, static InRangeFunc getInRangeFunc(int depth) { - static InRangeFunc inRangeTab[] = + static InRangeFunc inRangeTab[CV_DEPTH_MAX] = { (InRangeFunc)GET_OPTIMIZED(inRange8u), (InRangeFunc)GET_OPTIMIZED(inRange8s), (InRangeFunc)GET_OPTIMIZED(inRange16u), (InRangeFunc)GET_OPTIMIZED(inRange16s), (InRangeFunc)GET_OPTIMIZED(inRange32s), (InRangeFunc)GET_OPTIMIZED(inRange32f), diff --git a/modules/core/src/arithm.simd.hpp b/modules/core/src/arithm.simd.hpp index 06ebfb7678..20e70e5392 100644 --- a/modules/core/src/arithm.simd.hpp +++ b/modules/core/src/arithm.simd.hpp @@ -104,10 +104,6 @@ namespace cv { namespace hal { #ifdef ARITHM_DEFINITIONS_ONLY -#if !CV_SIMD_64F -typedef int v_float64; // dummy -#endif - //======================================= // Utility //======================================= diff --git a/modules/core/src/channels.cpp b/modules/core/src/channels.cpp index efaeb91068..7953212894 100644 --- a/modules/core/src/channels.cpp +++ b/modules/core/src/channels.cpp @@ -79,7 +79,7 @@ typedef void (*MixChannelsFunc)( const void** src, const int* sdelta, static MixChannelsFunc getMixchFunc(int depth) { - static MixChannelsFunc mixchTab[] = + static MixChannelsFunc mixchTab[CV_DEPTH_MAX] = { mixChannels8u, mixChannels8u, mixChannels16u, mixChannels16u, mixChannels32s, mixChannels32s, diff --git a/modules/core/src/convert.dispatch.cpp b/modules/core/src/convert.dispatch.cpp index 345b4624cb..150b91aa35 100644 --- a/modules/core/src/convert.dispatch.cpp +++ b/modules/core/src/convert.dispatch.cpp @@ -23,117 +23,28 @@ void cvt32f16f(const float* src, float16_t* dst, int len) CV_CPU_DISPATCH(cvt32f16f, (src, dst, len), CV_CPU_DISPATCH_MODES_ALL); } -void addRNGBias32f(float* arr, const float* scaleBiasPairs, int len) +void cvt32f16bf(const float* src, bfloat16_t* dst, int len) { CV_INSTRUMENT_REGION(); - CV_CPU_DISPATCH(addRNGBias32f, (arr, scaleBiasPairs, len), + CV_CPU_DISPATCH(cvt32f16bf, (src, dst, len), CV_CPU_DISPATCH_MODES_ALL); } -void addRNGBias64f(double* arr, const double* scaleBiasPairs, int len) +void addRNGBias32f(float* arr, const float* scaleBiasPairs, int len, int cn) { CV_INSTRUMENT_REGION(); - CV_CPU_DISPATCH(addRNGBias64f, (arr, scaleBiasPairs, len), + CV_CPU_DISPATCH(addRNGBias32f, (arr, scaleBiasPairs, len, cn), + CV_CPU_DISPATCH_MODES_ALL); +} +void addRNGBias64f(double* arr, const double* scaleBiasPairs, int len, int cn) +{ + CV_INSTRUMENT_REGION(); + CV_CPU_DISPATCH(addRNGBias64f, (arr, scaleBiasPairs, len, cn), CV_CPU_DISPATCH_MODES_ALL); } } // namespace -/* [TODO] Recover IPP calls -#if defined(HAVE_IPP) -#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \ -static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ - dtype* dst, size_t dstep, Size size, double*) \ -{ \ - CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height)) >= 0) \ - cvt_(src, sstep, dst, dstep, size); \ -} - -#define DEF_CVT_FUNC_F2(suffix, stype, dtype, ippFavor) \ -static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ - dtype* dst, size_t dstep, Size size, double*) \ -{ \ - CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height), ippRndFinancial, 0) >= 0) \ - cvt_(src, sstep, dst, dstep, size); \ -} -#else -#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \ -static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ - dtype* dst, size_t dstep, Size size, double*) \ -{ \ - cvt_(src, sstep, dst, dstep, size); \ -} -#define DEF_CVT_FUNC_F2 DEF_CVT_FUNC_F -#endif - -#define DEF_CVT_FUNC(suffix, stype, dtype) \ -static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ - dtype* dst, size_t dstep, Size size, double*) \ -{ \ - cvt_(src, sstep, dst, dstep, size); \ -} - -#define DEF_CPY_FUNC(suffix, stype) \ -static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ - stype* dst, size_t dstep, Size size, double*) \ -{ \ - cpy_(src, sstep, dst, dstep, size); \ -} - -DEF_CPY_FUNC(8u, uchar) -DEF_CVT_FUNC_F(8s8u, schar, uchar, 8s8u_C1Rs) -DEF_CVT_FUNC_F(16u8u, ushort, uchar, 16u8u_C1R) -DEF_CVT_FUNC_F(16s8u, short, uchar, 16s8u_C1R) -DEF_CVT_FUNC_F(32s8u, int, uchar, 32s8u_C1R) -DEF_CVT_FUNC_F2(32f8u, float, uchar, 32f8u_C1RSfs) -DEF_CVT_FUNC(64f8u, double, uchar) - -DEF_CVT_FUNC_F2(8u8s, uchar, schar, 8u8s_C1RSfs) -DEF_CVT_FUNC_F2(16u8s, ushort, schar, 16u8s_C1RSfs) -DEF_CVT_FUNC_F2(16s8s, short, schar, 16s8s_C1RSfs) -DEF_CVT_FUNC_F(32s8s, int, schar, 32s8s_C1R) -DEF_CVT_FUNC_F2(32f8s, float, schar, 32f8s_C1RSfs) -DEF_CVT_FUNC(64f8s, double, schar) - -DEF_CVT_FUNC_F(8u16u, uchar, ushort, 8u16u_C1R) -DEF_CVT_FUNC_F(8s16u, schar, ushort, 8s16u_C1Rs) -DEF_CPY_FUNC(16u, ushort) -DEF_CVT_FUNC_F(16s16u, short, ushort, 16s16u_C1Rs) -DEF_CVT_FUNC_F2(32s16u, int, ushort, 32s16u_C1RSfs) -DEF_CVT_FUNC_F2(32f16u, float, ushort, 32f16u_C1RSfs) -DEF_CVT_FUNC(64f16u, double, ushort) - -DEF_CVT_FUNC_F(8u16s, uchar, short, 8u16s_C1R) -DEF_CVT_FUNC_F(8s16s, schar, short, 8s16s_C1R) -DEF_CVT_FUNC_F2(16u16s, ushort, short, 16u16s_C1RSfs) -DEF_CVT_FUNC_F2(32s16s, int, short, 32s16s_C1RSfs) -DEF_CVT_FUNC(32f16s, float, short) -DEF_CVT_FUNC(64f16s, double, short) - -DEF_CVT_FUNC_F(8u32s, uchar, int, 8u32s_C1R) -DEF_CVT_FUNC_F(8s32s, schar, int, 8s32s_C1R) -DEF_CVT_FUNC_F(16u32s, ushort, int, 16u32s_C1R) -DEF_CVT_FUNC_F(16s32s, short, int, 16s32s_C1R) -DEF_CPY_FUNC(32s, int) -DEF_CVT_FUNC_F2(32f32s, float, int, 32f32s_C1RSfs) -DEF_CVT_FUNC(64f32s, double, int) - -DEF_CVT_FUNC_F(8u32f, uchar, float, 8u32f_C1R) -DEF_CVT_FUNC_F(8s32f, schar, float, 8s32f_C1R) -DEF_CVT_FUNC_F(16u32f, ushort, float, 16u32f_C1R) -DEF_CVT_FUNC_F(16s32f, short, float, 16s32f_C1R) -DEF_CVT_FUNC_F(32s32f, int, float, 32s32f_C1R) -DEF_CVT_FUNC(64f32f, double, float) - -DEF_CVT_FUNC(8u64f, uchar, double) -DEF_CVT_FUNC(8s64f, schar, double) -DEF_CVT_FUNC(16u64f, ushort, double) -DEF_CVT_FUNC(16s64f, short, double) -DEF_CVT_FUNC(32s64f, int, double) -DEF_CVT_FUNC(32f64f, float, double) -DEF_CPY_FUNC(64s, int64) -*/ - BinaryFunc getConvertFunc(int sdepth, int ddepth) { CV_INSTRUMENT_REGION(); diff --git a/modules/core/src/convert.hpp b/modules/core/src/convert.hpp index 4b9ddbb413..3aa7dadac9 100644 --- a/modules/core/src/convert.hpp +++ b/modules/core/src/convert.hpp @@ -28,12 +28,26 @@ static inline void vx_load_as(const short* ptr, v_float32& a) static inline void vx_load_as(const int* ptr, v_float32& a) { a = v_cvt_f32(vx_load(ptr)); } +static inline void vx_load_as(const unsigned* ptr, v_float32& a) +{ + v_uint32 delta = vx_setall_u32(0x80000000U); + v_uint32 ua = vx_load(ptr); + v_uint32 mask_a = (ua >= delta) & delta; + v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31)) + a = v_cvt_f32(v_reinterpret_as_s32(ua - mask_a)); + // restore the original values + a -= fmask_a; // subtract 0 or a large negative number +} + static inline void vx_load_as(const float* ptr, v_float32& a) { a = vx_load(ptr); } static inline void vx_load_as(const float16_t* ptr, v_float32& a) { a = vx_load_expand(ptr); } +static inline void vx_load_as(const bfloat16_t* ptr, v_float32& a) +{ a = vx_load_expand(ptr); } + static inline void v_store_as(ushort* ptr, const v_float32& a) { v_pack_u_store(ptr, v_round(a)); } @@ -43,12 +57,40 @@ static inline void v_store_as(short* ptr, const v_float32& a) static inline void v_store_as(int* ptr, const v_float32& a) { v_store(ptr, v_round(a)); } +static inline void v_store_as(unsigned* ptr, const v_float32& a) +{ + v_float32 z = vx_setzero_f32(); + v_store(ptr, v_reinterpret_as_u32(v_round(v_max(a, z)))); +} + static inline void v_store_as(float* ptr, const v_float32& a) { v_store(ptr, a); } static inline void v_store_as(float16_t* ptr, const v_float32& a) { v_pack_store(ptr, a); } +static inline void v_store_as(bfloat16_t* ptr, const v_float32& a) +{ v_pack_store(ptr, a); } + +static inline void v_store_as(int64_t* ptr, const v_float32& a) +{ + v_int32 ia = v_round(a); + v_int64 ia_0, ia_1; + v_expand(ia, ia_0, ia_1); + v_store(ptr, ia_0); + v_store(ptr + v_int64::nlanes, ia_1); +} + +static inline void v_store_as(uint64_t* ptr, const v_float32& a) +{ + v_int32 ia = v_round(a); + v_uint64 ia_0, ia_1; + ia = v_max(ia, vx_setzero_s32()); + v_expand(v_reinterpret_as_u32(ia), ia_0, ia_1); + v_store(ptr, ia_0); + v_store(ptr + v_int64::nlanes, ia_1); +} + static inline void vx_load_pair_as(const uchar* ptr, v_uint16& a, v_uint16& b) { v_expand(vx_load(ptr), a, b); } @@ -147,6 +189,115 @@ static inline void vx_load_pair_as(const int* ptr, v_float32& a, v_float32& b) b = v_cvt_f32(ib); } +static inline void vx_load_pair_as(const int64_t* ptr, v_int32& a, v_int32& b) +{ + const int int64_nlanes = v_int64::nlanes; + a = v_pack(vx_load(ptr), vx_load(ptr + int64_nlanes)); + b = v_pack(vx_load(ptr + int64_nlanes*2), vx_load(ptr + int64_nlanes*3)); +} + +static inline void vx_load_pair_as(const int64_t* ptr, v_uint64& a, v_uint64& b) +{ + v_int64 z = vx_setzero_s64(); + v_int64 ia = vx_load(ptr), ib = vx_load(ptr + v_int64::nlanes); + ia &= (ia > z); + ib &= (ib > z); + a = v_reinterpret_as_u64(ia); + b = v_reinterpret_as_u64(ib); +} + +static inline void vx_load_pair_as(const int64_t* ptr, v_uint32& a, v_uint32& b) +{ + const int nlanes = v_int64::nlanes; + v_int64 z = vx_setzero_s64(); + v_int64 ia0 = vx_load(ptr), ia1 = vx_load(ptr + nlanes); + v_int64 ib0 = vx_load(ptr + nlanes*2), ib1 = vx_load(ptr + nlanes*3); + ia0 &= (ia0 > z); + ia1 &= (ia1 > z); + ib0 &= (ib0 > z); + ib1 &= (ib1 > z); + a = v_pack(v_reinterpret_as_u64(ia0), v_reinterpret_as_u64(ia1)); + b = v_pack(v_reinterpret_as_u64(ib0), v_reinterpret_as_u64(ib1)); +} + +static inline void vx_load_pair_as(const uint64_t* ptr, v_float32& a, v_float32& b) +{ + const int nlanes = v_uint64::nlanes; + float buf[v_uint64::nlanes*4]; + for (int i = 0; i < nlanes*4; i++) { + buf[i] = (float)ptr[i]; + } + a = vx_load(buf); + b = vx_load(buf + nlanes*2); +} + +static inline void vx_load_pair_as(const int64_t* ptr, v_float32& a, v_float32& b) +{ + const int nlanes = v_int64::nlanes; + float buf[v_int64::nlanes*4]; + for (int i = 0; i < nlanes*4; i++) { + buf[i] = (float)ptr[i]; + } + a = vx_load(buf); + b = vx_load(buf + nlanes*2); +} + +static inline void vx_load_pair_as(const bool* ptr, v_float32& a, v_float32& b) +{ + v_uint16 z = vx_setzero_u16(); + v_uint16 uab = vx_load_expand((const uchar*)ptr); + uab = v_shr<15>(uab > z); + v_int32 ia, ib; + v_expand(v_reinterpret_as_s16(uab), ia, ib); + a = v_cvt_f32(ia); + b = v_cvt_f32(ib); +} + +static inline void vx_load_as(const bool* ptr, v_float32& a) +{ + v_uint32 z = vx_setzero_u32(); + v_uint32 ua = vx_load_expand_q((const uchar*)ptr); + ua = v_shr<31>(ua > z); + a = v_cvt_f32(v_reinterpret_as_s32(ua)); +} + +static inline void vx_load_pair_as(const schar* ptr, v_uint32& a, v_uint32& b) +{ + v_int16 ab = v_max(vx_load_expand(ptr), vx_setzero_s16()); + v_expand(v_reinterpret_as_u16(ab), a, b); +} + +static inline void vx_load_pair_as(const short* ptr, v_uint32& a, v_uint32& b) +{ + v_int16 ab = v_max(vx_load(ptr), vx_setzero_s16()); + v_expand(v_reinterpret_as_u16(ab), a, b); +} + +static inline void vx_load_pair_as(const int* ptr, v_uint32& a, v_uint32& b) +{ + v_int32 z = vx_setzero_s32(); + v_int32 ia = v_max(vx_load(ptr), z); + v_int32 ib = v_max(vx_load(ptr + v_int32::nlanes), z); + a = v_reinterpret_as_u32(ia); + b = v_reinterpret_as_u32(ib); +} + +static inline void vx_load_pair_as(const uint64_t* ptr, v_uint32& a, v_uint32& b) +{ + const int int64_nlanes = v_int64::nlanes; + a = v_pack(vx_load(ptr), vx_load(ptr + int64_nlanes)); + b = v_pack(vx_load(ptr + int64_nlanes*2), vx_load(ptr + int64_nlanes*3)); +} + +static inline void vx_load_pair_as(const uint64_t* ptr, v_int32& a, v_int32& b) +{ + const int int64_nlanes = v_int64::nlanes; + v_uint32 ua = v_pack(vx_load(ptr), vx_load(ptr + int64_nlanes)); + v_uint32 ub = v_pack(vx_load(ptr + int64_nlanes*2), vx_load(ptr + int64_nlanes*3)); + a = v_reinterpret_as_s32(ua); + b = v_reinterpret_as_s32(ub); +} + static inline void vx_load_pair_as(const float* ptr, v_float32& a, v_float32& b) { a = vx_load(ptr); b = vx_load(ptr + v_float32::nlanes); } @@ -156,6 +307,39 @@ static inline void vx_load_pair_as(const float16_t* ptr, v_float32& a, v_float32 b = vx_load_expand(ptr + v_float32::nlanes); } +static inline void vx_load_pair_as(const bfloat16_t* ptr, v_float32& a, v_float32& b) +{ + a = vx_load_expand(ptr); + b = vx_load_expand(ptr + v_float32::nlanes); +} + +static inline void vx_load_pair_as(const unsigned* ptr, v_uint32& a, v_uint32& b) +{ + a = vx_load(ptr); + b = vx_load(ptr + v_uint32::nlanes); +} + +static inline void vx_load_pair_as(const unsigned* ptr, v_int32& a, v_int32& b) +{ + a = v_reinterpret_as_s32(vx_load(ptr)); + b = v_reinterpret_as_s32(vx_load(ptr + v_uint32::nlanes)); +} + +static inline void vx_load_pair_as(const unsigned* ptr, v_float32& a, v_float32& b) +{ + v_uint32 delta = vx_setall_u32(0x80000000U); + v_uint32 ua = vx_load(ptr); + v_uint32 ub = vx_load(ptr + v_uint32::nlanes); + v_uint32 mask_a = (ua >= delta) & delta, mask_b = (ub >= delta) & delta; + v_float32 fmask_a = v_cvt_f32(v_reinterpret_as_s32(mask_a)); // 0.f or (float)(-(1 << 31)) + v_float32 fmask_b = v_cvt_f32(v_reinterpret_as_s32(mask_b)); // 0.f or (float)(-(1 << 31)) + a = v_cvt_f32(v_reinterpret_as_s32(ua - mask_a)); + b = v_cvt_f32(v_reinterpret_as_s32(ub - mask_b)); + // restore the original values + a -= fmask_a; // subtract 0 or a large negative number + b -= fmask_b; // subtract 0 or a large negative number +} + static inline void v_store_pair_as(uchar* ptr, const v_uint16& a, const v_uint16& b) { v_store(ptr, v_pack(a, b)); @@ -198,12 +382,33 @@ static inline void v_store_pair_as(int* ptr, const v_int32& a, const v_int32& b) v_store(ptr + v_int32::nlanes, b); } +static inline void v_store_pair_as(int64_t* ptr, const v_int32& a, const v_int32& b) +{ + v_int64 q0, q1, q2, q3; + v_expand(a, q0, q1); + v_expand(b, q2, q3); + const int nlanes = v_int64::nlanes; + v_store(ptr, q0); + v_store(ptr + nlanes, q1); + v_store(ptr + nlanes*2, q2); + v_store(ptr + nlanes*3, q3); +} + static inline void v_store_pair_as(uchar* ptr, const v_float32& a, const v_float32& b) { v_pack_u_store(ptr, v_pack(v_round(a), v_round(b))); } static inline void v_store_pair_as(schar* ptr, const v_float32& a, const v_float32& b) { v_pack_store(ptr, v_pack(v_round(a), v_round(b))); } +static inline void v_store_pair_as(bool* ptr, const v_float32& a, const v_float32& b) +{ + v_float32 z = vx_setzero_f32(); + v_uint32 ma = v_shr<31>(v_reinterpret_as_u32(a != z)); + v_uint32 mb = v_shr<31>(v_reinterpret_as_u32(b != z)); + v_uint16 mab = v_pack(ma, mb); + v_pack_store((uchar*)ptr, mab); +} + static inline void v_store_pair_as(ushort* ptr, const v_float32& a, const v_float32& b) { v_store(ptr, v_pack_u(v_round(a), v_round(b))); } @@ -220,14 +425,95 @@ static inline void v_store_pair_as(int* ptr, const v_float32& a, const v_float32 static inline void v_store_pair_as(float* ptr, const v_float32& a, const v_float32& b) { v_store(ptr, a); v_store(ptr + v_float32::nlanes, b); } +static inline void v_store_pair_as(unsigned* ptr, const v_float32& a, const v_float32& b) +{ + v_int32 z = vx_setzero_s32(); + v_int32 ia = v_max(v_round(a), z); + v_int32 ib = v_max(v_round(b), z); + v_store(ptr, v_reinterpret_as_u32(ia)); + v_store(ptr + v_int32::nlanes, v_reinterpret_as_u32(ib)); +} + +static inline void v_store_pair_as(uchar* ptr, const v_uint32& a, const v_uint32& b) +{ + v_pack_store(ptr, v_pack(a, b)); +} + +static inline void v_store_pair_as(ushort* ptr, const v_uint32& a, const v_uint32& b) +{ + v_store(ptr, v_pack(a, b)); +} + +static inline void v_store_pair_as(unsigned* ptr, const v_uint32& a, const v_uint32& b) +{ + v_store(ptr, a); + v_store(ptr + v_uint32::nlanes, b); +} + +static inline void v_store_pair_as(uint64_t* ptr, const v_uint32& a, const v_uint32& b) +{ + v_uint64 q0, q1, q2, q3; + v_expand(a, q0, q1); + v_expand(b, q2, q3); + const int nlanes = v_uint64::nlanes; + v_store(ptr, q0); + v_store(ptr + nlanes, q1); + v_store(ptr + nlanes*2, q2); + v_store(ptr + nlanes*3, q3); +} + +static inline void v_store_pair_as(uint64_t* ptr, const v_uint64& a, const v_uint64& b) +{ + v_store(ptr, a); + v_store(ptr + v_uint64::nlanes, b); +} + #if CV_SIMD_64F +static inline void vx_load_as(const uint64_t* ptr, v_float32& a) +{ + v_float64 a_0 = v_cvt_f64(v_reinterpret_as_s64(vx_load(ptr))); + v_float64 a_1 = v_cvt_f64(v_reinterpret_as_s64(vx_load(ptr + v_uint64::nlanes))); + a = v_cvt_f32(a_0, a_1); +} + +static inline void vx_load_as(const int64_t* ptr, v_float32& a) +{ + v_float64 a_0 = v_cvt_f64(vx_load(ptr)); + v_float64 a_1 = v_cvt_f64(vx_load(ptr + v_uint64::nlanes)); + a = v_cvt_f32(a_0, a_1); +} + static inline void vx_load_as(const double* ptr, v_float32& a) { v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes); a = v_cvt_f32(v0, v1); } +static inline void vx_load_pair_as(const bool* ptr, v_float64& a, v_float64& b) +{ + v_uint32 z = vx_setzero_u32(); + v_uint32 uab = vx_load_expand_q((const uchar*)ptr); + uab = v_shr<31>(uab > z); + v_float32 fab = v_cvt_f32(v_reinterpret_as_s32(uab)); + a = v_cvt_f64(fab); + b = v_cvt_f64_high(fab); +} + +static inline void vx_load_pair_as(const float16_t* ptr, v_float64& a, v_float64& b) +{ + v_float32 fab = vx_load_expand(ptr); + a = v_cvt_f64(fab); + b = v_cvt_f64_high(fab); +} + +static inline void vx_load_pair_as(const bfloat16_t* ptr, v_float64& a, v_float64& b) +{ + v_float32 fab = vx_load_expand(ptr); + a = v_cvt_f64(fab); + b = v_cvt_f64_high(fab); +} + static inline void vx_load_pair_as(const double* ptr, v_int32& a, v_int32& b) { v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes); @@ -238,6 +524,13 @@ static inline void vx_load_pair_as(const double* ptr, v_int32& a, v_int32& b) b = v_combine_low(iv2, iv3); } +static inline void vx_load_pair_as(const uint64_t* ptr, v_float64& a, v_float64& b) +{ + const int int64_nlanes = v_int64::nlanes; + a = v_cvt_f64(v_reinterpret_as_s64(vx_load(ptr))); + b = v_cvt_f64(v_reinterpret_as_s64(vx_load(ptr + int64_nlanes))); +} + static inline void vx_load_pair_as(const double* ptr, v_float32& a, v_float32& b) { v_float64 v0 = vx_load(ptr), v1 = vx_load(ptr + v_float64::nlanes); @@ -294,11 +587,20 @@ static inline void vx_load_pair_as(const double* ptr, v_float64& a, v_float64& b b = vx_load(ptr + v_float64::nlanes); } -static inline void vx_load_pair_as(const float16_t* ptr, v_float64& a, v_float64& b) +static inline void vx_load_pair_as(const int64_t* ptr, v_float64& a, v_float64& b) { - v_float32 v0 = vx_load_expand(ptr); - a = v_cvt_f64(v0); - b = v_cvt_f64_high(v0); + a = v_cvt_f64(vx_load(ptr)); + b = v_cvt_f64(vx_load(ptr + v_float64::nlanes)); +} + +static inline void vx_load_pair_as(const unsigned* ptr, v_float64& a, v_float64& b) +{ + const int nlanes = v_uint64::nlanes; + double buf[v_uint64::nlanes*2]; + for (int i = 0; i < nlanes*2; i++) + buf[i] = (double)ptr[i]; + a = vx_load(buf); + b = vx_load(buf + nlanes); } static inline void v_store_as(double* ptr, const v_float32& a) @@ -354,6 +656,29 @@ static inline void v_store_pair_as(float16_t* ptr, const v_float64& a, const v_f v_pack_store(ptr, v); } +static inline void v_store_pair_as(uint64_t* ptr, const v_float64& a, const v_float64& b) +{ + v_float64 z = vx_setzero_f64(); + v_int64 ia, ib; + v_expand(v_round(v_max(a, z), v_max(b, z)), ia, ib); + v_store(ptr, v_reinterpret_as_u64(ia)); + v_store(ptr + v_int64::nlanes, v_reinterpret_as_u64(ib)); +} + +static inline void v_store_pair_as(int64_t* ptr, const v_float64& a, const v_float64& b) +{ + v_int64 ia, ib; + v_expand(v_round(a, b), ia, ib); + v_store(ptr, ia); + v_store(ptr + v_int64::nlanes, ib); +} + +static inline void v_store_pair_as(unsigned* ptr, const v_float64& a, const v_float64& b) +{ + v_int32 iab = v_max(v_round(a, b), vx_setzero_s32()); + v_store(ptr, v_reinterpret_as_u32(iab)); +} + #else static inline void vx_load_as(const double* ptr, v_float32& a) @@ -366,6 +691,26 @@ static inline void vx_load_as(const double* ptr, v_float32& a) a = vx_load(buf); } +static inline void vx_load_as(const uint64_t* ptr, v_float32& a) +{ + const int VECSZ = v_float32::nlanes; + float buf[VECSZ*2]; + + for( int i = 0; i < VECSZ; i++ ) + buf[i] = saturate_cast(ptr[i]); + a = vx_load(buf); +} + +static inline void vx_load_as(const int64_t* ptr, v_float32& a) +{ + const int VECSZ = v_float32::nlanes; + float buf[VECSZ*2]; + + for( int i = 0; i < VECSZ; i++ ) + buf[i] = saturate_cast(ptr[i]); + a = vx_load(buf); +} + template static inline void vx_load_pair_as(const double* ptr, _Tdvec& a, _Tdvec& b) { diff --git a/modules/core/src/convert.simd.hpp b/modules/core/src/convert.simd.hpp index 5154041b6d..c776918846 100644 --- a/modules/core/src/convert.simd.hpp +++ b/modules/core/src/convert.simd.hpp @@ -16,8 +16,10 @@ CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN void cvt16f32f(const float16_t* src, float* dst, int len); void cvt32f16f(const float* src, float16_t* dst, int len); -void addRNGBias32f(float* arr, const float* scaleBiasPairs, int len); -void addRNGBias64f(double* arr, const double* scaleBiasPairs, int len); +void cvt16bf32f(const bfloat16_t* src, float* dst, int len); +void cvt32f16bf(const float* src, bfloat16_t* dst, int len); +void addRNGBias32f(float* arr, const float* scaleBiasPairs, int len, int cn); +void addRNGBias64f(double* arr, const double* scaleBiasPairs, int len, int cn); CV_CPU_OPTIMIZATION_NAMESPACE_END } // namespace cv::hal @@ -77,20 +79,63 @@ void cvt32f16f( const float* src, float16_t* dst, int len ) dst[j] = float16_t(src[j]); } -void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len ) +void cvt32f16bf( const float* src, bfloat16_t* dst, int len ) { CV_INSTRUMENT_REGION(); - // the loop is simple enough, so we let the compiler to vectorize it - for( int i = 0; i < len; i++ ) - arr[i] += scaleBiasPairs[i*2 + 1]; + int j = 0; +#if CV_SIMD + const int VECSZ = v_float32::nlanes; + for( ; j < len; j += VECSZ ) + { + if( j > len - VECSZ ) + { + if( j == 0 ) + break; + j = len - VECSZ; + } + v_pack_store(dst + j, vx_load(src + j)); + } +#endif + for( ; j < len; j++ ) + dst[j] = bfloat16_t(src[j]); } -void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len ) +void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len, int cn ) { CV_INSTRUMENT_REGION(); - // the loop is simple enough, so we let the compiler to vectorize it - for( int i = 0; i < len; i++ ) - arr[i] += scaleBiasPairs[i*2 + 1]; + if (cn == 1) { + float bias = scaleBiasPairs[1]; + for( int i = 0; i < len; i++ ) { + arr[i] += bias; + } + } else { + int k = 0; + len *= cn; + cn--; + for( int i = 0; i < len; i++ ) { + arr[i] += scaleBiasPairs[k*2 + 1]; + k = (k + 1) & ((k >= cn) - 1); + } + } +} + +void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len, int cn ) +{ + CV_INSTRUMENT_REGION(); + if (cn == 1) { + double bias = scaleBiasPairs[1]; + for( int i = 0; i < len; i++ ) { + arr[i] += bias; + } + } else { + int k = 0; + len *= cn; + cn--; + for( int i = 0; i < len; i++ ) { + arr[i] += scaleBiasPairs[k*2 + 1]; + k = (k + 1) & ((k >= cn) - 1); + } + } } CV_CPU_OPTIMIZATION_NAMESPACE_END @@ -128,6 +173,35 @@ cvt_( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size ) } } +template static inline void +cvt_64f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size ) +{ + sstep /= sizeof(src[0]); + dstep /= sizeof(dst[0]); + + for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) + { + int j = 0; +#if CV_SIMD_64F + const int VECSZ = v_float64::nlanes*2; + for( ; j < size.width; j += VECSZ ) + { + if( j > size.width - VECSZ ) + { + if( j == 0 || src == (_Ts*)dst ) + break; + j = size.width - VECSZ; + } + v_float64 v0, v1; + vx_load_pair_as(src + j, v0, v1); + v_store_pair_as(dst + j, v0, v1); + } +#endif + for( ; j < size.width; j++ ) + dst[j] = saturate_cast<_Td>(src[j]); + } +} + // in order to reduce the code size, for (16f <-> ...) conversions // we add a conversion function without loop unrolling template static inline void @@ -180,25 +254,102 @@ static void cvt##suffix(const uchar* src_, size_t sstep, const uchar*, size_t, \ cvtfunc<_Ts, _Td, _Twvec>(src, sstep, dst, dstep, size); \ } +#define DEF_CVT2BOOL_FUNC(suffix, _Ts, shift) \ +static void cvt##suffix(const uchar* src_, size_t sstep, const uchar*, size_t, \ + uchar* dst, size_t dstep, Size size, void*) \ +{ \ + CV_INSTRUMENT_REGION(); \ + const _Ts* src = (const _Ts*)src_; \ + sstep /= sizeof(src[0]); \ + \ + for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { \ + for ( int j = 0; j < size.width; j++ ) \ + dst[j] = (src[j]<(src[j]); \ + } \ +} + +#define DEF_CVT_SCALAR_FUNC_S2U(suffix, _Ts, _Td, _Tw) \ +static void cvt##suffix(const uchar* src_, size_t sstep, const uchar*, size_t, \ + uchar* dst_, size_t dstep, Size size, void*) \ +{ \ + CV_INSTRUMENT_REGION(); \ + const _Ts* src = (const _Ts*)src_; \ + _Td* dst = (_Td*)dst_; \ + sstep /= sizeof(src[0]); \ + dstep /= sizeof(dst[0]); \ + \ + for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { \ + for ( int j = 0; j < size.width; j++ ) \ + dst[j] = saturate_cast<_Td>(std::max((_Tw)src[j], (_Tw)0)); \ + } \ +} + ////////////////////// 8u -> ... //////////////////////// DEF_CVT_FUNC(8u8s, cvt_, uchar, schar, v_int16) -DEF_CVT_FUNC(8u16u, cvt_, uchar, ushort, v_uint16) DEF_CVT_FUNC(8u16s, cvt_, uchar, short, v_int16) DEF_CVT_FUNC(8u32s, cvt_, uchar, int, v_int32) DEF_CVT_FUNC(8u32f, cvt_, uchar, float, v_float32) DEF_CVT_FUNC(8u64f, cvt_, uchar, double, v_int32) +DEF_CVT_SCALAR_FUNC(8u64s, uchar, int64_t) DEF_CVT_FUNC(8u16f, cvt1_, uchar, float16_t, v_float32) +DEF_CVT_FUNC(8u16bf, cvt1_, uchar, bfloat16_t, v_float32) +DEF_CVT2BOOL_FUNC(8u8b, uchar, 0) ////////////////////// 8s -> ... //////////////////////// DEF_CVT_FUNC(8s8u, cvt_, schar, uchar, v_int16) DEF_CVT_FUNC(8s16u, cvt_, schar, ushort, v_uint16) DEF_CVT_FUNC(8s16s, cvt_, schar, short, v_int16) +DEF_CVT_FUNC(8s32u, cvt_, schar, unsigned, v_uint32) DEF_CVT_FUNC(8s32s, cvt_, schar, int, v_int32) DEF_CVT_FUNC(8s32f, cvt_, schar, float, v_float32) DEF_CVT_FUNC(8s64f, cvt_, schar, double, v_int32) +DEF_CVT_FUNC(8s64u, cvt_, schar, uint64_t, v_uint32) +DEF_CVT_FUNC(8s64s, cvt_, schar, int64_t, v_int32) DEF_CVT_FUNC(8s16f, cvt1_, schar, float16_t, v_float32) +DEF_CVT_FUNC(8s16bf, cvt1_, schar, bfloat16_t, v_float32) + +////////////////////// 8b -> ... //////////////////////// + +DEF_CVTBOOL2_FUNC(8b8u, uchar, 1) +DEF_CVTBOOL2_FUNC(8b16s, short, 1) +DEF_CVTBOOL2_FUNC(8b32s, int, 1) +DEF_CVTBOOL2_FUNC(8b32f, float, 1) +DEF_CVTBOOL2_FUNC(8b64f, double, 1) +DEF_CVTBOOL2_FUNC(8b64s, int64_t, 1) +DEF_CVTBOOL2_FUNC(8b16f, uint16_t, 0x3c00) // float16_t(1.0f) +DEF_CVTBOOL2_FUNC(8b16bf, uint16_t, 0x3f80) // bfloat16_t(1.0f) ////////////////////// 16u -> ... //////////////////////// @@ -208,17 +359,37 @@ DEF_CVT_FUNC(16u16s, cvt_, ushort, short, v_int32) DEF_CVT_FUNC(16u32s, cvt_, ushort, int, v_int32) DEF_CVT_FUNC(16u32f, cvt_, ushort, float, v_float32) DEF_CVT_FUNC(16u64f, cvt_, ushort, double, v_int32) +DEF_CVT_SCALAR_FUNC(16u64s, ushort, int64_t) DEF_CVT_FUNC(16u16f, cvt1_,ushort, float16_t, v_float32) +DEF_CVT_FUNC(16u16bf, cvt1_, ushort, bfloat16_t, v_float32) ////////////////////// 16s -> ... //////////////////////// DEF_CVT_FUNC(16s8u, cvt_, short, uchar, v_int16) DEF_CVT_FUNC(16s8s, cvt_, short, schar, v_int16) DEF_CVT_FUNC(16s16u, cvt_, short, ushort, v_int32) +DEF_CVT_FUNC(16s32u, cvt_, short, unsigned, v_uint32) DEF_CVT_FUNC(16s32s, cvt_, short, int, v_int32) DEF_CVT_FUNC(16s32f, cvt_, short, float, v_float32) DEF_CVT_FUNC(16s64f, cvt_, short, double, v_int32) +DEF_CVT_FUNC(16s64u, cvt_, short, uint64_t, v_uint32) +DEF_CVT_FUNC(16s64s, cvt_, short, int64_t, v_int32) DEF_CVT_FUNC(16s16f, cvt1_,short, float16_t, v_float32) +DEF_CVT_FUNC(16s16bf, cvt1_, short, bfloat16_t, v_float32) +DEF_CVT2BOOL_FUNC(16s8b, short, 0) + +////////////////////// 32u -> ... //////////////////////// + +DEF_CVT_FUNC(32u8u, cvt_, unsigned, uchar, v_uint32) +DEF_CVT_FUNC(32u8s, cvt_, unsigned, schar, v_int32) +DEF_CVT_FUNC(32u16u, cvt_, unsigned, ushort, v_uint32) +DEF_CVT_FUNC(32u16s, cvt_, unsigned, short, v_int32) +DEF_CVT_SCALAR_FUNC(32u32s, unsigned, int) +DEF_CVT_FUNC(32u32f, cvt_, unsigned, float, v_float32) +DEF_CVT_FUNC(32u64f, cvt_, unsigned, double, v_float32) +DEF_CVT_SCALAR_FUNC(32u64s, unsigned, int64_t) +DEF_CVT_FUNC(32u16f, cvt1_, unsigned, float16_t, v_float32) +DEF_CVT_FUNC(32u16bf, cvt1_, int, bfloat16_t, v_float32) ////////////////////// 32s -> ... //////////////////////// @@ -226,9 +397,14 @@ DEF_CVT_FUNC(32s8u, cvt_, int, uchar, v_int32) DEF_CVT_FUNC(32s8s, cvt_, int, schar, v_int32) DEF_CVT_FUNC(32s16u, cvt_, int, ushort, v_int32) DEF_CVT_FUNC(32s16s, cvt_, int, short, v_int32) +DEF_CVT_FUNC(32s32u, cvt_, int, unsigned, v_uint32) DEF_CVT_FUNC(32s32f, cvt_, int, float, v_float32) DEF_CVT_FUNC(32s64f, cvt_, int, double, v_int32) +DEF_CVT_FUNC(32s64u, cvt_, int, uint64_t, v_uint32) +DEF_CVT_FUNC(32s64s, cvt_, int, int64_t, v_int32) DEF_CVT_FUNC(32s16f, cvt1_,int, float16_t, v_float32) +DEF_CVT_FUNC(32s16bf, cvt1_, int, bfloat16_t, v_float32) +DEF_CVT2BOOL_FUNC(32s8b, int, 0) ////////////////////// 32f -> ... //////////////////////// @@ -236,9 +412,14 @@ DEF_CVT_FUNC(32f8u, cvt_, float, uchar, v_float32) DEF_CVT_FUNC(32f8s, cvt_, float, schar, v_float32) DEF_CVT_FUNC(32f16u, cvt_, float, ushort, v_float32) DEF_CVT_FUNC(32f16s, cvt_, float, short, v_float32) +DEF_CVT_FUNC(32f32u, cvt_, float, unsigned, v_float32) DEF_CVT_FUNC(32f32s, cvt_, float, int, v_float32) DEF_CVT_FUNC(32f64f, cvt_, float, double, v_float32) +DEF_CVT_FUNC(32f64u, cvt_64f, float, uint64_t, v_float64) +DEF_CVT_FUNC(32f64s, cvt_64f, float, int64_t, v_float64) DEF_CVT_FUNC(32f16f, cvt1_,float, float16_t, v_float32) +DEF_CVT_FUNC(32f16bf, cvt1_,float, bfloat16_t, v_float32) +DEF_CVT2BOOL_FUNC(32f8b, int, 1) ////////////////////// 64f -> ... //////////////////////// @@ -246,9 +427,14 @@ DEF_CVT_FUNC(64f8u, cvt_, double, uchar, v_int32) DEF_CVT_FUNC(64f8s, cvt_, double, schar, v_int32) DEF_CVT_FUNC(64f16u, cvt_, double, ushort, v_int32) DEF_CVT_FUNC(64f16s, cvt_, double, short, v_int32) +DEF_CVT_FUNC(64f32u, cvt_64f, double, unsigned, v_float32) DEF_CVT_FUNC(64f32s, cvt_, double, int, v_int32) DEF_CVT_FUNC(64f32f, cvt_, double, float, v_float32) +DEF_CVT_FUNC(64f64u, cvt_64f, double, uint64_t, v_float64) +DEF_CVT_FUNC(64f64s, cvt_64f, double, int64_t, v_float32) DEF_CVT_FUNC(64f16f, cvt1_,double, float16_t, v_float32) +DEF_CVT_FUNC(64f16bf, cvt1_,double, bfloat16_t, v_float32) +DEF_CVT2BOOL_FUNC(64f8b, int64_t, 1) ////////////////////// 16f -> ... //////////////////////// @@ -256,9 +442,56 @@ DEF_CVT_FUNC(16f8u, cvt_, float16_t, uchar, v_float32) DEF_CVT_FUNC(16f8s, cvt_, float16_t, schar, v_float32) DEF_CVT_FUNC(16f16u, cvt1_, float16_t, ushort, v_float32) DEF_CVT_FUNC(16f16s, cvt1_, float16_t, short, v_float32) +DEF_CVT_FUNC(16f32u, cvt1_, float16_t, unsigned, v_float32) DEF_CVT_FUNC(16f32s, cvt1_, float16_t, int, v_float32) DEF_CVT_FUNC(16f32f, cvt1_, float16_t, float, v_float32) DEF_CVT_FUNC(16f64f, cvt1_, float16_t, double, v_float32) +DEF_CVT_FUNC(16f64u, cvt1_, float16_t, uint64_t, v_float32) +DEF_CVT_FUNC(16f64s, cvt1_, float16_t, int64_t, v_float32) +DEF_CVT_FUNC(16f16bf, cvt1_, float16_t, bfloat16_t, v_float32) +DEF_CVT2BOOL_FUNC(16f8b, short, 1) + +////////////////////// 16bf -> ... //////////////////////// + +DEF_CVT_FUNC(16bf8u, cvt_, bfloat16_t, uchar, v_float32) +DEF_CVT_FUNC(16bf8s, cvt_, bfloat16_t, schar, v_float32) +DEF_CVT_FUNC(16bf16u, cvt1_, bfloat16_t, ushort, v_float32) +DEF_CVT_FUNC(16bf16s, cvt1_, bfloat16_t, short, v_float32) +DEF_CVT_FUNC(16bf32u, cvt1_, bfloat16_t, unsigned, v_float32) +DEF_CVT_FUNC(16bf32s, cvt1_, bfloat16_t, int, v_float32) +DEF_CVT_FUNC(16bf32f, cvt1_, bfloat16_t, float, v_float32) +DEF_CVT_FUNC(16bf64f, cvt1_, bfloat16_t, double, v_float32) +DEF_CVT_FUNC(16bf64u, cvt1_, bfloat16_t, uint64_t, v_float32) +DEF_CVT_FUNC(16bf64s, cvt1_, bfloat16_t, int64_t, v_float32) +DEF_CVT_FUNC(16bf16f, cvt1_, bfloat16_t, float16_t, v_float32) + +////////////////////// 64s -> ... //////////////////////// + +DEF_CVT_FUNC(64s8u, cvt_, int64_t, uchar, v_int32) +DEF_CVT_FUNC(64s8s, cvt_, int64_t, schar, v_int32) +DEF_CVT_FUNC(64s16u, cvt_, int64_t, ushort, v_int32) +DEF_CVT_FUNC(64s16s, cvt_, int64_t, short, v_int32) +DEF_CVT_FUNC(64s32u, cvt_, int64_t, unsigned, v_uint32) +DEF_CVT_FUNC(64s32s, cvt_, int64_t, int, v_int32) +DEF_CVT_FUNC(64s32f, cvt_64f, int64_t, float, v_float32) +DEF_CVT_FUNC(64s64f, cvt_64f, int64_t, double, v_float64) +DEF_CVT_FUNC(64s64u, cvt_, int64_t, uint64_t, v_uint64) +DEF_CVT_FUNC(64s16f, cvt1_,int64_t, float16_t, v_float32) +DEF_CVT_FUNC(64s16bf, cvt1_, int64_t, bfloat16_t, v_float32) +DEF_CVT2BOOL_FUNC(64s8b, int64_t, 0) + +////////////////////// 64u -> ... //////////////////////// + +DEF_CVT_FUNC(64u8u, cvt_, uint64_t, uchar, v_int32) +DEF_CVT_FUNC(64u8s, cvt_, uint64_t, schar, v_int32) +DEF_CVT_FUNC(64u16u, cvt_, uint64_t, ushort, v_int32) +DEF_CVT_FUNC(64u16s, cvt_, uint64_t, short, v_int32) +DEF_CVT_FUNC(64u32u, cvt_, uint64_t, unsigned, v_uint32) +DEF_CVT_FUNC(64u32s, cvt_, uint64_t, int, v_int32) +DEF_CVT_FUNC(64u32f, cvt_64f, uint64_t, float, v_float64) +DEF_CVT_FUNC(64u64f, cvt_64f, uint64_t, double, v_float64) +DEF_CVT_FUNC(64u16f, cvt1_,uint64_t, float16_t, v_float32) +DEF_CVT_FUNC(64u16bf, cvt1_, uint64_t, bfloat16_t, v_float32) ///////////// "conversion" w/o conversion /////////////// @@ -274,147 +507,210 @@ static void cvt32s(const uchar* src, size_t sstep, const uchar*, size_t, uchar* static void cvt64s(const uchar* src, size_t sstep, const uchar*, size_t, uchar* dst, size_t dstep, Size size, void*) { CV_INSTRUMENT_REGION(); cvtCopy((const uchar*)src, sstep, (uchar*)dst, dstep, size, 8); } - -/* [TODO] Recover IPP calls -#if defined(HAVE_IPP) -#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \ -static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ - dtype* dst, size_t dstep, Size size, double*) \ -{ \ - CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height)) >= 0) \ - cvt_(src, sstep, dst, dstep, size); \ -} - -#define DEF_CVT_FUNC_F2(suffix, stype, dtype, ippFavor) \ -static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ - dtype* dst, size_t dstep, Size size, double*) \ -{ \ - CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height), ippRndFinancial, 0) >= 0) \ - cvt_(src, sstep, dst, dstep, size); \ -} -#else -#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \ -static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ - dtype* dst, size_t dstep, Size size, double*) \ -{ \ - cvt_(src, sstep, dst, dstep, size); \ -} -#define DEF_CVT_FUNC_F2 DEF_CVT_FUNC_F -#endif - -#define DEF_CVT_FUNC(suffix, stype, dtype) \ -static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ - dtype* dst, size_t dstep, Size size, double*) \ -{ \ - cvt_(src, sstep, dst, dstep, size); \ -} - -#define DEF_CPY_FUNC(suffix, stype) \ -static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \ - stype* dst, size_t dstep, Size size, double*) \ -{ \ - cpy_(src, sstep, dst, dstep, size); \ -} - -DEF_CPY_FUNC(8u, uchar) -DEF_CVT_FUNC_F(8s8u, schar, uchar, 8s8u_C1Rs) -DEF_CVT_FUNC_F(16u8u, ushort, uchar, 16u8u_C1R) -DEF_CVT_FUNC_F(16s8u, short, uchar, 16s8u_C1R) -DEF_CVT_FUNC_F(32s8u, int, uchar, 32s8u_C1R) -DEF_CVT_FUNC_F2(32f8u, float, uchar, 32f8u_C1RSfs) -DEF_CVT_FUNC(64f8u, double, uchar) - -DEF_CVT_FUNC_F2(8u8s, uchar, schar, 8u8s_C1RSfs) -DEF_CVT_FUNC_F2(16u8s, ushort, schar, 16u8s_C1RSfs) -DEF_CVT_FUNC_F2(16s8s, short, schar, 16s8s_C1RSfs) -DEF_CVT_FUNC_F(32s8s, int, schar, 32s8s_C1R) -DEF_CVT_FUNC_F2(32f8s, float, schar, 32f8s_C1RSfs) -DEF_CVT_FUNC(64f8s, double, schar) - -DEF_CVT_FUNC_F(8u16u, uchar, ushort, 8u16u_C1R) -DEF_CVT_FUNC_F(8s16u, schar, ushort, 8s16u_C1Rs) -DEF_CPY_FUNC(16u, ushort) -DEF_CVT_FUNC_F(16s16u, short, ushort, 16s16u_C1Rs) -DEF_CVT_FUNC_F2(32s16u, int, ushort, 32s16u_C1RSfs) -DEF_CVT_FUNC_F2(32f16u, float, ushort, 32f16u_C1RSfs) -DEF_CVT_FUNC(64f16u, double, ushort) - -DEF_CVT_FUNC_F(8u16s, uchar, short, 8u16s_C1R) -DEF_CVT_FUNC_F(8s16s, schar, short, 8s16s_C1R) -DEF_CVT_FUNC_F2(16u16s, ushort, short, 16u16s_C1RSfs) -DEF_CVT_FUNC_F2(32s16s, int, short, 32s16s_C1RSfs) -DEF_CVT_FUNC(32f16s, float, short) -DEF_CVT_FUNC(64f16s, double, short) - -DEF_CVT_FUNC_F(8u32s, uchar, int, 8u32s_C1R) -DEF_CVT_FUNC_F(8s32s, schar, int, 8s32s_C1R) -DEF_CVT_FUNC_F(16u32s, ushort, int, 16u32s_C1R) -DEF_CVT_FUNC_F(16s32s, short, int, 16s32s_C1R) -DEF_CPY_FUNC(32s, int) -DEF_CVT_FUNC_F2(32f32s, float, int, 32f32s_C1RSfs) -DEF_CVT_FUNC(64f32s, double, int) - -DEF_CVT_FUNC_F(8u32f, uchar, float, 8u32f_C1R) -DEF_CVT_FUNC_F(8s32f, schar, float, 8s32f_C1R) -DEF_CVT_FUNC_F(16u32f, ushort, float, 16u32f_C1R) -DEF_CVT_FUNC_F(16s32f, short, float, 16s32f_C1R) -DEF_CVT_FUNC_F(32s32f, int, float, 32s32f_C1R) -DEF_CVT_FUNC(64f32f, double, float) - -DEF_CVT_FUNC(8u64f, uchar, double) -DEF_CVT_FUNC(8s64f, schar, double) -DEF_CVT_FUNC(16u64f, ushort, double) -DEF_CVT_FUNC(16s64f, short, double) -DEF_CVT_FUNC(32s64f, int, double) -DEF_CVT_FUNC(32f64f, float, double) -DEF_CPY_FUNC(64s, int64) -*/ - -BinaryFunc getConvertFunc(int sdepth, int ddepth) +BinaryFunc getConvertFunc(int sdepth_, int ddepth_) { - static BinaryFunc cvtTab[][8] = - { - { - (cvt8u), (cvt8s8u), (cvt16u8u), - (cvt16s8u), (cvt32s8u), (cvt32f8u), - (cvt64f8u), (cvt16f8u) - }, - { - (cvt8u8s), cvt8u, (cvt16u8s), - (cvt16s8s), (cvt32s8s), (cvt32f8s), - (cvt64f8s), (cvt16f8s) - }, - { - (cvt8u16u), (cvt8s16u), cvt16u, - (cvt16s16u), (cvt32s16u), (cvt32f16u), - (cvt64f16u), (cvt16f16u) - }, - { - (cvt8u16s), (cvt8s16s), (cvt16u16s), - cvt16u, (cvt32s16s), (cvt32f16s), - (cvt64f16s), (cvt16f16s) - }, - { - (cvt8u32s), (cvt8s32s), (cvt16u32s), - (cvt16s32s), cvt32s, (cvt32f32s), - (cvt64f32s), (cvt16f32s) - }, - { - (cvt8u32f), (cvt8s32f), (cvt16u32f), - (cvt16s32f), (cvt32s32f), cvt32s, - (cvt64f32f), (cvt16f32f) - }, - { - (cvt8u64f), (cvt8s64f), (cvt16u64f), - (cvt16s64f), (cvt32s64f), (cvt32f64f), - (cvt64s), (cvt16f64f) - }, - { - (cvt8u16f), (cvt8s16f), (cvt16u16f), (cvt16s16f), - (cvt32s16f), (cvt32f16f), (cvt64f16f), (cvt16u) - } - }; - return cvtTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)]; + int sdepth = CV_MAT_DEPTH(sdepth_); + int ddepth = CV_MAT_DEPTH(ddepth_); + BinaryFunc func = + ddepth == CV_8U ? ( + sdepth == CV_8U ? cvt8u : + sdepth == CV_8S ? cvt8s8u : + sdepth == CV_16U ? cvt16u8u : + sdepth == CV_16S ? cvt16s8u : + sdepth == CV_32U ? cvt32u8u : + sdepth == CV_32S ? cvt32s8u : + sdepth == CV_32F ? cvt32f8u : + sdepth == CV_64F ? cvt64f8u : + sdepth == CV_16F ? cvt16f8u : + sdepth == CV_16BF ? cvt16bf8u : + sdepth == CV_Bool ? cvt8b8u : + sdepth == CV_64U ? cvt64u8u : + sdepth == CV_64S ? cvt64s8u : + 0) : + ddepth == CV_8S ? ( + sdepth == CV_8U ? cvt8u8s : + sdepth == CV_8S ? cvt8u : + sdepth == CV_16U ? cvt16u8s : + sdepth == CV_16S ? cvt16s8s : + sdepth == CV_32U ? cvt32u8s : + sdepth == CV_32S ? cvt32s8s : + sdepth == CV_32F ? cvt32f8s : + sdepth == CV_64F ? cvt64f8s : + sdepth == CV_16F ? cvt16f8s : + sdepth == CV_16BF ? cvt16bf8s : + sdepth == CV_Bool ? cvt8b8u : + sdepth == CV_64U ? cvt64u8s : + sdepth == CV_64S ? cvt64s8s : + 0) : + ddepth == CV_16U ? ( + sdepth == CV_8U ? cvt8u16s : // same as cvt8u16u + sdepth == CV_8S ? cvt8s16u : + sdepth == CV_16U ? cvt16u : + sdepth == CV_16S ? cvt16s16u : + sdepth == CV_32U ? cvt32u16u : + sdepth == CV_32S ? cvt32s16u : + sdepth == CV_32F ? cvt32f16u : + sdepth == CV_64F ? cvt64f16u : + sdepth == CV_16F ? cvt16f16u : + sdepth == CV_16BF ? cvt16bf16u : + sdepth == CV_Bool ? cvt8b16s : + sdepth == CV_64U ? cvt64u16u : + sdepth == CV_64S ? cvt64s16u : + 0) : + ddepth == CV_16S ? ( + sdepth == CV_8U ? cvt8u16s : + sdepth == CV_8S ? cvt8s16s : + sdepth == CV_16U ? cvt16u16s : + sdepth == CV_16S ? cvt16u : + sdepth == CV_32U ? cvt32u16s : + sdepth == CV_32S ? cvt32s16s : + sdepth == CV_32F ? cvt32f16s : + sdepth == CV_64F ? cvt64f16s : + sdepth == CV_16F ? cvt16f16s : + sdepth == CV_16BF ? cvt16bf16s : + sdepth == CV_Bool ? cvt8b16s : + sdepth == CV_64U ? cvt64u16s : + sdepth == CV_64S ? cvt64s16s : + 0) : + ddepth == CV_32U ? ( + sdepth == CV_8U ? cvt8u32s : // same as cvt8u32u + sdepth == CV_8S ? cvt8s32u : + sdepth == CV_16U ? cvt16u32s : // same as cvt16u32u + sdepth == CV_16S ? cvt16s32u : + sdepth == CV_32U ? cvt32s : + sdepth == CV_32S ? cvt32s32u : + sdepth == CV_32F ? cvt32f32u : + sdepth == CV_64F ? cvt64f32u : + sdepth == CV_16F ? cvt16f32u : + sdepth == CV_16BF ? cvt16bf32u : + sdepth == CV_Bool ? cvt8b32s : + sdepth == CV_64U ? cvt64u32u : + sdepth == CV_64S ? cvt64s32u : + + 0) : + ddepth == CV_32S ? ( + sdepth == CV_8U ? cvt8u32s : + sdepth == CV_8S ? cvt8s32s : + sdepth == CV_16U ? cvt16u32s : + sdepth == CV_16S ? cvt16s32s : + sdepth == CV_32U ? cvt32u32s : + sdepth == CV_32S ? cvt32s : + sdepth == CV_32F ? cvt32f32s : + sdepth == CV_64F ? cvt64f32s : + sdepth == CV_16F ? cvt16f32s : + sdepth == CV_16BF ? cvt16bf32s : + sdepth == CV_Bool ? cvt8b32s : + sdepth == CV_64U ? cvt64u32s : + sdepth == CV_64S ? cvt64s32s : + 0) : + ddepth == CV_32F ? ( + sdepth == CV_8U ? cvt8u32f : + sdepth == CV_8S ? cvt8s32f : + sdepth == CV_16U ? cvt16u32f : + sdepth == CV_16S ? cvt16s32f : + sdepth == CV_32U ? cvt32u32f : + sdepth == CV_32S ? cvt32s32f : + sdepth == CV_32F ? cvt32s : + sdepth == CV_64F ? cvt64f32f : + sdepth == CV_16F ? cvt16f32f : + sdepth == CV_16BF ? cvt16bf32f : + sdepth == CV_Bool ? cvt8b32f : + sdepth == CV_64U ? cvt64u32f : + sdepth == CV_64S ? cvt64s32f : + 0) : + ddepth == CV_64F ? ( + sdepth == CV_8U ? cvt8u64f : + sdepth == CV_8S ? cvt8s64f : + sdepth == CV_16U ? cvt16u64f : + sdepth == CV_16S ? cvt16s64f : + sdepth == CV_32U ? cvt32u64f : + sdepth == CV_32S ? cvt32s64f : + sdepth == CV_32F ? cvt32f64f : + sdepth == CV_64F ? cvt64s : + sdepth == CV_16F ? cvt16f64f : + sdepth == CV_16BF ? cvt16bf64f : + sdepth == CV_Bool ? cvt8b64f : + sdepth == CV_64U ? cvt64u64f : + sdepth == CV_64S ? cvt64s64f : + 0) : + ddepth == CV_16F ? ( + sdepth == CV_8U ? cvt8u16f : + sdepth == CV_8S ? cvt8s16f : + sdepth == CV_16U ? cvt16u16f : + sdepth == CV_16S ? cvt16s16f : + sdepth == CV_32U ? cvt32u16f : + sdepth == CV_32S ? cvt32s16f : + sdepth == CV_32F ? cvt32f16f : + sdepth == CV_64F ? cvt64f16f : + sdepth == CV_16F ? cvt16u : + sdepth == CV_16BF ? cvt16bf16f : + sdepth == CV_Bool ? cvt8b16f : + sdepth == CV_64U ? cvt64u16f : + sdepth == CV_64S ? cvt64s16f : + 0) : + ddepth == CV_16BF ? ( + sdepth == CV_8U ? cvt8u16bf : + sdepth == CV_8S ? cvt8s16bf : + sdepth == CV_16U ? cvt16u16bf : + sdepth == CV_16S ? cvt16s16bf : + sdepth == CV_32U ? cvt32u16bf : + sdepth == CV_32S ? cvt32s16bf : + sdepth == CV_32F ? cvt32f16bf : + sdepth == CV_64F ? cvt64f16bf : + sdepth == CV_16F ? cvt16f16bf : + sdepth == CV_16BF ? cvt16u : + sdepth == CV_Bool ? cvt8b16bf : + sdepth == CV_64U ? cvt64u16bf : + sdepth == CV_64S ? cvt64s16bf : + 0) : + ddepth == CV_Bool ? ( + sdepth == CV_8U ? cvt8u8b : + sdepth == CV_8S ? cvt8u8b : + sdepth == CV_16U ? cvt16s8b : + sdepth == CV_16S ? cvt16s8b : + sdepth == CV_32U ? cvt32s8b : + sdepth == CV_32S ? cvt32s8b : + sdepth == CV_32F ? cvt32f8b : + sdepth == CV_64F ? cvt64f8b : + sdepth == CV_16F ? cvt16f8b : + sdepth == CV_16BF ? cvt16f8b : // same as cvt16f8b + sdepth == CV_Bool ? cvt8u : + sdepth == CV_64U ? cvt64s8b : + sdepth == CV_64S ? cvt64s8b : + 0) : + ddepth == CV_64U ? ( + sdepth == CV_8U ? cvt8u64s : // same as cvt8u64u + sdepth == CV_8S ? cvt8s64u : + sdepth == CV_16U ? cvt16u64s : // same as cvt16u64u + sdepth == CV_16S ? cvt16s64u : + sdepth == CV_32U ? cvt32u64s : // same as cvt32u64u + sdepth == CV_32S ? cvt32s64u : + sdepth == CV_32F ? cvt32f64u : + sdepth == CV_64F ? cvt64f64u : + sdepth == CV_16F ? cvt16f64u : + sdepth == CV_16BF ? cvt16bf64u : + sdepth == CV_Bool ? cvt8b64s : + sdepth == CV_64U ? cvt64s : + sdepth == CV_64S ? cvt64s64u : + 0) : + ddepth == CV_64S ? ( + sdepth == CV_8U ? cvt8u64s : + sdepth == CV_8S ? cvt8s64s : + sdepth == CV_16U ? cvt16u64s : + sdepth == CV_16S ? cvt16s64s : + sdepth == CV_32U ? cvt32u64s : + sdepth == CV_32S ? cvt32s64s : + sdepth == CV_32F ? cvt32f64s : + sdepth == CV_64F ? cvt64f64s : + sdepth == CV_16F ? cvt16f64s : + sdepth == CV_16BF ? cvt16bf64s : + sdepth == CV_Bool ? cvt8b64s : + sdepth == CV_64U ? cvt64s : + sdepth == CV_64S ? cvt64s : + 0) : + 0; + CV_Assert(func != 0); + return func; } CV_CPU_OPTIMIZATION_NAMESPACE_END diff --git a/modules/core/src/convert_scale.simd.hpp b/modules/core/src/convert_scale.simd.hpp index 2c6d55462b..f1ee7635e7 100644 --- a/modules/core/src/convert_scale.simd.hpp +++ b/modules/core/src/convert_scale.simd.hpp @@ -53,38 +53,18 @@ cvtabs_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, } } -// variant for conversions 16f <-> ... w/o unrolling -template inline void -cvtabs1_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, - Size size, float a, float b ) +static void +cvtabs_32f( const bool* src_, size_t sstep, + uchar* dst, size_t dstep, + Size size, float a, float b ) { -#if CV_SIMD - v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b); - const int VECSZ = v_float32::nlanes*2; -#endif - sstep /= sizeof(src[0]); - dstep /= sizeof(dst[0]); - + const uchar* src = (const uchar*)src_; + uchar v0 = saturate_cast(std::abs(b)); + uchar v1 = saturate_cast(std::abs(a + b)); for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { - int j = 0; -#if CV_SIMD - for( ; j < size.width; j += VECSZ ) - { - if( j > size.width - VECSZ ) - { - if( j == 0 || src == (_Ts*)dst ) - break; - j = size.width - VECSZ; - } - v_float32 v0; - vx_load_as(src + j, v0); - v0 = v_fma(v0, va, vb); - v_store_as(dst + j, v_abs(v0)); - } -#endif - for( ; j < size.width; j++ ) - dst[j] = saturate_cast<_Td>(src[j]*a + b); + for (int j = 0; j < size.width; j++) + dst[j] = src[j] != 0 ? v1 : v0; } } @@ -217,145 +197,454 @@ static void cvtScale##suffix( const uchar* src_, size_t sstep, const uchar*, siz cvt(src, sstep, dst, dstep, size, (wtype)scale[0], (wtype)scale[1]); \ } +#define DEF_CVT_SCALE2BOOL_FUNC(suffix, stype, wtype) \ +static void cvtScale##suffix( const uchar* src_, size_t sstep, const uchar*, size_t, \ + uchar* dst, size_t dstep, Size size, void* scale_) \ +{ \ + const stype* src = (const stype*)src_; \ + const double* scale = (const double*)scale_; \ + wtype a = (wtype)scale[0], b = (wtype)scale[1]; \ + sstep /= sizeof(src[0]); \ + for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) \ + for (int j = 0; j < size.width; j++) \ + dst[j] = (bool)((wtype)src[j]*a + b != 0); \ +} + +#define DEF_CVT_SCALEBOOL2_FUNC(suffix, dtype, wtype) \ +static void cvtScale##suffix( const uchar* src, size_t sstep, const uchar*, size_t, \ + uchar* dst_, size_t dstep, Size size, void* scale_) \ +{ \ + dtype* dst = (dtype*)dst_; \ + const double* scale = (const double*)scale_; \ + wtype a = (wtype)scale[0], b = (wtype)scale[1]; \ + dtype v0 = saturate_cast(b), v1 = saturate_cast(a + b); \ + dstep /= sizeof(dst[0]); \ + for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) \ + for (int j = 0; j < size.width; j++) \ + dst[j] = src[j] != 0 ? v1 : v0; \ +} + DEF_CVT_SCALE_ABS_FUNC(8u, cvtabs_32f, uchar, uchar, float) DEF_CVT_SCALE_ABS_FUNC(8s8u, cvtabs_32f, schar, uchar, float) +DEF_CVT_SCALE_ABS_FUNC(8b8u, cvtabs_32f, bool, uchar, float) DEF_CVT_SCALE_ABS_FUNC(16u8u, cvtabs_32f, ushort, uchar, float) DEF_CVT_SCALE_ABS_FUNC(16s8u, cvtabs_32f, short, uchar, float) +DEF_CVT_SCALE_ABS_FUNC(32u8u, cvtabs_32f, unsigned, uchar, float) DEF_CVT_SCALE_ABS_FUNC(32s8u, cvtabs_32f, int, uchar, float) DEF_CVT_SCALE_ABS_FUNC(32f8u, cvtabs_32f, float, uchar, float) +DEF_CVT_SCALE_ABS_FUNC(64u8u, cvtabs_32f, uint64_t, uchar, float) +DEF_CVT_SCALE_ABS_FUNC(64s8u, cvtabs_32f, int64_t, uchar, float) DEF_CVT_SCALE_ABS_FUNC(64f8u, cvtabs_32f, double, uchar, float) +DEF_CVT_SCALE_ABS_FUNC(16f8u, cvtabs_32f, float16_t, uchar, float) +DEF_CVT_SCALE_ABS_FUNC(16bf8u, cvtabs_32f, bfloat16_t, uchar, float) DEF_CVT_SCALE_FUNC(8u, cvt_32f, uchar, uchar, float) DEF_CVT_SCALE_FUNC(8s8u, cvt_32f, schar, uchar, float) DEF_CVT_SCALE_FUNC(16u8u, cvt_32f, ushort, uchar, float) DEF_CVT_SCALE_FUNC(16s8u, cvt_32f, short, uchar, float) +DEF_CVT_SCALE_FUNC(32u8u, cvt_32f, unsigned, uchar, float) DEF_CVT_SCALE_FUNC(32s8u, cvt_32f, int, uchar, float) DEF_CVT_SCALE_FUNC(32f8u, cvt_32f, float, uchar, float) DEF_CVT_SCALE_FUNC(64f8u, cvt_32f, double, uchar, float) +DEF_CVT_SCALE_FUNC(64u8u, cvt_32f, uint64_t, uchar, float) +DEF_CVT_SCALE_FUNC(64s8u, cvt_32f, int64_t, uchar, float) DEF_CVT_SCALE_FUNC(16f8u, cvt_32f, float16_t, uchar, float) +DEF_CVT_SCALE_FUNC(16bf8u, cvt_32f, bfloat16_t, uchar, float) DEF_CVT_SCALE_FUNC(8u8s, cvt_32f, uchar, schar, float) DEF_CVT_SCALE_FUNC(8s, cvt_32f, schar, schar, float) DEF_CVT_SCALE_FUNC(16u8s, cvt_32f, ushort, schar, float) DEF_CVT_SCALE_FUNC(16s8s, cvt_32f, short, schar, float) +DEF_CVT_SCALE_FUNC(32u8s, cvt_32f, unsigned, schar, float) DEF_CVT_SCALE_FUNC(32s8s, cvt_32f, int, schar, float) DEF_CVT_SCALE_FUNC(32f8s, cvt_32f, float, schar, float) DEF_CVT_SCALE_FUNC(64f8s, cvt_32f, double, schar, float) +DEF_CVT_SCALE_FUNC(64u8s, cvt_32f, uint64_t, schar, float) +DEF_CVT_SCALE_FUNC(64s8s, cvt_32f, int64_t, schar, float) DEF_CVT_SCALE_FUNC(16f8s, cvt_32f, float16_t, schar, float) +DEF_CVT_SCALE_FUNC(16bf8s, cvt_32f, bfloat16_t, schar, float) + +DEF_CVT_SCALE2BOOL_FUNC(8u8b, uchar, float) +DEF_CVT_SCALE2BOOL_FUNC(8s8b, schar, float) +DEF_CVT_SCALE2BOOL_FUNC(16u8b, ushort, float) +DEF_CVT_SCALE2BOOL_FUNC(16s8b, short, float) +DEF_CVT_SCALE2BOOL_FUNC(32u8b, unsigned, float) +DEF_CVT_SCALE2BOOL_FUNC(32s8b, int, float) +DEF_CVT_SCALE2BOOL_FUNC(32f8b, float, float) +DEF_CVT_SCALE2BOOL_FUNC(64f8b, double, float) +DEF_CVT_SCALE2BOOL_FUNC(64u8b, uint64_t, float) +DEF_CVT_SCALE2BOOL_FUNC(64s8b, int64_t, float) +DEF_CVT_SCALE2BOOL_FUNC(16f8b, float16_t, float) +DEF_CVT_SCALE2BOOL_FUNC(16bf8b, bfloat16_t, float) DEF_CVT_SCALE_FUNC(8u16u, cvt_32f, uchar, ushort, float) DEF_CVT_SCALE_FUNC(8s16u, cvt_32f, schar, ushort, float) DEF_CVT_SCALE_FUNC(16u, cvt_32f, ushort, ushort, float) DEF_CVT_SCALE_FUNC(16s16u, cvt_32f, short, ushort, float) +DEF_CVT_SCALE_FUNC(32u16u, cvt_32f, unsigned, ushort, float) DEF_CVT_SCALE_FUNC(32s16u, cvt_32f, int, ushort, float) DEF_CVT_SCALE_FUNC(32f16u, cvt_32f, float, ushort, float) DEF_CVT_SCALE_FUNC(64f16u, cvt_32f, double, ushort, float) +DEF_CVT_SCALE_FUNC(64u16u, cvt_32f, uint64_t, ushort, float) +DEF_CVT_SCALE_FUNC(64s16u, cvt_32f, int64_t, ushort, float) DEF_CVT_SCALE_FUNC(16f16u, cvt1_32f, float16_t, ushort, float) +DEF_CVT_SCALE_FUNC(16bf16u, cvt1_32f, bfloat16_t, ushort, float) DEF_CVT_SCALE_FUNC(8u16s, cvt_32f, uchar, short, float) DEF_CVT_SCALE_FUNC(8s16s, cvt_32f, schar, short, float) DEF_CVT_SCALE_FUNC(16u16s, cvt_32f, ushort, short, float) DEF_CVT_SCALE_FUNC(16s, cvt_32f, short, short, float) +DEF_CVT_SCALE_FUNC(32u16s, cvt_32f, unsigned, short, float) DEF_CVT_SCALE_FUNC(32s16s, cvt_32f, int, short, float) DEF_CVT_SCALE_FUNC(32f16s, cvt_32f, float, short, float) DEF_CVT_SCALE_FUNC(64f16s, cvt_32f, double, short, float) +DEF_CVT_SCALE_FUNC(64u16s, cvt_32f, uint64_t, short, float) +DEF_CVT_SCALE_FUNC(64s16s, cvt_32f, int64_t, short, float) DEF_CVT_SCALE_FUNC(16f16s, cvt1_32f, float16_t, short, float) +DEF_CVT_SCALE_FUNC(16bf16s, cvt1_32f, bfloat16_t, short, float) + +DEF_CVT_SCALE_FUNC(8u32u, cvt_32f, uchar, unsigned, float) +DEF_CVT_SCALE_FUNC(8s32u, cvt_32f, schar, unsigned, float) +DEF_CVT_SCALE_FUNC(16u32u, cvt_32f, ushort, unsigned, float) +DEF_CVT_SCALE_FUNC(16s32u, cvt_32f, short, unsigned, float) +DEF_CVT_SCALE_FUNC(32u, cvt_32f, unsigned, unsigned, float) +DEF_CVT_SCALE_FUNC(32s32u, cvt_64f, int, unsigned, double) +DEF_CVT_SCALE_FUNC(32f32u, cvt_32f, float, unsigned, float) +DEF_CVT_SCALE_FUNC(64f32u, cvt_64f, double, unsigned, double) +DEF_CVT_SCALE_FUNC(64u32u, cvt_32f, uint64_t, unsigned, float) +DEF_CVT_SCALE_FUNC(64s32u, cvt_32f, int64_t, unsigned, float) +DEF_CVT_SCALE_FUNC(16f32u, cvt1_32f, float16_t, unsigned, float) +DEF_CVT_SCALE_FUNC(16bf32u, cvt1_32f, bfloat16_t, unsigned, float) DEF_CVT_SCALE_FUNC(8u32s, cvt_32f, uchar, int, float) DEF_CVT_SCALE_FUNC(8s32s, cvt_32f, schar, int, float) DEF_CVT_SCALE_FUNC(16u32s, cvt_32f, ushort, int, float) DEF_CVT_SCALE_FUNC(16s32s, cvt_32f, short, int, float) +DEF_CVT_SCALE_FUNC(32u32s, cvt_32f, unsigned, int, float) DEF_CVT_SCALE_FUNC(32s, cvt_64f, int, int, double) DEF_CVT_SCALE_FUNC(32f32s, cvt_32f, float, int, float) DEF_CVT_SCALE_FUNC(64f32s, cvt_64f, double, int, double) +DEF_CVT_SCALE_FUNC(64u32s, cvt_32f, uint64_t, int, float) +DEF_CVT_SCALE_FUNC(64s32s, cvt_32f, int64_t, int, float) DEF_CVT_SCALE_FUNC(16f32s, cvt1_32f, float16_t, int, float) +DEF_CVT_SCALE_FUNC(16bf32s, cvt1_32f, bfloat16_t, int, float) DEF_CVT_SCALE_FUNC(8u32f, cvt_32f, uchar, float, float) DEF_CVT_SCALE_FUNC(8s32f, cvt_32f, schar, float, float) DEF_CVT_SCALE_FUNC(16u32f, cvt_32f, ushort, float, float) DEF_CVT_SCALE_FUNC(16s32f, cvt_32f, short, float, float) +DEF_CVT_SCALE_FUNC(32u32f, cvt_32f, unsigned, float, float) DEF_CVT_SCALE_FUNC(32s32f, cvt_32f, int, float, float) DEF_CVT_SCALE_FUNC(32f, cvt_32f, float, float, float) DEF_CVT_SCALE_FUNC(64f32f, cvt_64f, double, float, double) +DEF_CVT_SCALE_FUNC(64u32f, cvt_32f, uint64_t, float, float) +DEF_CVT_SCALE_FUNC(64s32f, cvt_32f, int64_t, float, float) DEF_CVT_SCALE_FUNC(16f32f, cvt1_32f, float16_t, float, float) +DEF_CVT_SCALE_FUNC(16bf32f, cvt1_32f, bfloat16_t, float, float) DEF_CVT_SCALE_FUNC(8u64f, cvt_64f, uchar, double, double) DEF_CVT_SCALE_FUNC(8s64f, cvt_64f, schar, double, double) DEF_CVT_SCALE_FUNC(16u64f, cvt_64f, ushort, double, double) DEF_CVT_SCALE_FUNC(16s64f, cvt_64f, short, double, double) +DEF_CVT_SCALE_FUNC(32u64f, cvt_64f, unsigned, double, double) DEF_CVT_SCALE_FUNC(32s64f, cvt_64f, int, double, double) DEF_CVT_SCALE_FUNC(32f64f, cvt_64f, float, double, double) DEF_CVT_SCALE_FUNC(64f, cvt_64f, double, double, double) +DEF_CVT_SCALE_FUNC(64u64f, cvt_64f, uint64_t, double, double) +DEF_CVT_SCALE_FUNC(64s64f, cvt_64f, int64_t, double, double) DEF_CVT_SCALE_FUNC(16f64f, cvt_64f, float16_t, double, double) +DEF_CVT_SCALE_FUNC(16bf64f, cvt_64f, bfloat16_t, double, double) + +DEF_CVT_SCALE_FUNC(8u64u, cvt_64f, uchar, uint64_t, double) +DEF_CVT_SCALE_FUNC(8s64u, cvt_64f, schar, uint64_t, double) +DEF_CVT_SCALE_FUNC(16u64u, cvt_64f, ushort, uint64_t, double) +DEF_CVT_SCALE_FUNC(16s64u, cvt_64f, short, uint64_t, double) +DEF_CVT_SCALE_FUNC(32u64u, cvt_64f, unsigned, uint64_t, double) +DEF_CVT_SCALE_FUNC(32s64u, cvt_64f, int, uint64_t, double) +DEF_CVT_SCALE_FUNC(32f64u, cvt_64f, float, uint64_t, double) +DEF_CVT_SCALE_FUNC(64f64u, cvt_64f, double, uint64_t, double) +DEF_CVT_SCALE_FUNC(64u, cvt_64f, uint64_t, uint64_t, double) +DEF_CVT_SCALE_FUNC(64s64u, cvt_64f, int64_t, uint64_t, double) +DEF_CVT_SCALE_FUNC(16f64u, cvt_64f, float16_t, uint64_t, double) +DEF_CVT_SCALE_FUNC(16bf64u, cvt_64f, bfloat16_t, uint64_t, double) + +DEF_CVT_SCALE_FUNC(8u64s, cvt_64f, uchar, int64_t, double) +DEF_CVT_SCALE_FUNC(8s64s, cvt_64f, schar, int64_t, double) +DEF_CVT_SCALE_FUNC(16u64s, cvt_64f, ushort, int64_t, double) +DEF_CVT_SCALE_FUNC(16s64s, cvt_64f, short, int64_t, double) +DEF_CVT_SCALE_FUNC(32u64s, cvt_64f, unsigned, int64_t, double) +DEF_CVT_SCALE_FUNC(32s64s, cvt_64f, int, int64_t, double) +DEF_CVT_SCALE_FUNC(32f64s, cvt_64f, float, int64_t, double) +DEF_CVT_SCALE_FUNC(64f64s, cvt_64f, double, int64_t, double) +DEF_CVT_SCALE_FUNC(64u64s, cvt_64f, uint64_t, int64_t, double) +DEF_CVT_SCALE_FUNC(64s, cvt_64f, int64_t, int64_t, double) +DEF_CVT_SCALE_FUNC(16f64s, cvt_64f, float16_t, int64_t, double) +DEF_CVT_SCALE_FUNC(16bf64s, cvt_64f, bfloat16_t, int64_t, double) DEF_CVT_SCALE_FUNC(8u16f, cvt1_32f, uchar, float16_t, float) DEF_CVT_SCALE_FUNC(8s16f, cvt1_32f, schar, float16_t, float) DEF_CVT_SCALE_FUNC(16u16f, cvt1_32f, ushort, float16_t, float) DEF_CVT_SCALE_FUNC(16s16f, cvt1_32f, short, float16_t, float) +DEF_CVT_SCALE_FUNC(32u16f, cvt1_32f, unsigned, float16_t, float) DEF_CVT_SCALE_FUNC(32s16f, cvt1_32f, int, float16_t, float) DEF_CVT_SCALE_FUNC(32f16f, cvt1_32f, float, float16_t, float) -DEF_CVT_SCALE_FUNC(64f16f, cvt_64f, double, float16_t, double) +DEF_CVT_SCALE_FUNC(64f16f, cvt1_32f, double, float16_t, float) +DEF_CVT_SCALE_FUNC(64u16f, cvt1_32f, uint64_t, float16_t, float) +DEF_CVT_SCALE_FUNC(64s16f, cvt1_32f, int64_t, float16_t, float) DEF_CVT_SCALE_FUNC(16f, cvt1_32f, float16_t, float16_t, float) +DEF_CVT_SCALE_FUNC(16bf16f, cvt1_32f, bfloat16_t, float16_t, float) + +DEF_CVT_SCALE_FUNC(8u16bf, cvt1_32f, uchar, bfloat16_t, float) +DEF_CVT_SCALE_FUNC(8s16bf, cvt1_32f, schar, bfloat16_t, float) +DEF_CVT_SCALE_FUNC(16u16bf, cvt1_32f, ushort, bfloat16_t, float) +DEF_CVT_SCALE_FUNC(16s16bf, cvt1_32f, short, bfloat16_t, float) +DEF_CVT_SCALE_FUNC(32u16bf, cvt1_32f, unsigned, bfloat16_t, float) +DEF_CVT_SCALE_FUNC(32s16bf, cvt1_32f, int, bfloat16_t, float) +DEF_CVT_SCALE_FUNC(32f16bf, cvt1_32f, float, bfloat16_t, float) +DEF_CVT_SCALE_FUNC(64f16bf, cvt1_32f, double, bfloat16_t, float) +DEF_CVT_SCALE_FUNC(64u16bf, cvt1_32f, uint64_t, bfloat16_t, float) +DEF_CVT_SCALE_FUNC(64s16bf, cvt1_32f, int64_t, bfloat16_t, float) +DEF_CVT_SCALE_FUNC(16f16bf, cvt1_32f, float16_t, bfloat16_t, float) +DEF_CVT_SCALE_FUNC(16bf, cvt1_32f, bfloat16_t, bfloat16_t, float) + +DEF_CVT_SCALEBOOL2_FUNC(8b8u, uchar, float) +DEF_CVT_SCALEBOOL2_FUNC(8b8s, schar, float) +DEF_CVT_SCALEBOOL2_FUNC(8b, bool, float) +DEF_CVT_SCALEBOOL2_FUNC(8b16u, ushort, float) +DEF_CVT_SCALEBOOL2_FUNC(8b16s, short, float) +DEF_CVT_SCALEBOOL2_FUNC(8b32u, unsigned, float) +DEF_CVT_SCALEBOOL2_FUNC(8b32s, int, float) +DEF_CVT_SCALEBOOL2_FUNC(8b32f, float, float) +DEF_CVT_SCALEBOOL2_FUNC(8b64u, uint64_t, double) +DEF_CVT_SCALEBOOL2_FUNC(8b64s, int64_t, double) +DEF_CVT_SCALEBOOL2_FUNC(8b64f, double, double) +DEF_CVT_SCALEBOOL2_FUNC(8b16f, float16_t, float) +DEF_CVT_SCALEBOOL2_FUNC(8b16bf, bfloat16_t, float) BinaryFunc getCvtScaleAbsFunc(int depth) { - static BinaryFunc cvtScaleAbsTab[] = - { - (BinaryFunc)cvtScaleAbs8u, (BinaryFunc)cvtScaleAbs8s8u, (BinaryFunc)cvtScaleAbs16u8u, - (BinaryFunc)cvtScaleAbs16s8u, (BinaryFunc)cvtScaleAbs32s8u, (BinaryFunc)cvtScaleAbs32f8u, - (BinaryFunc)cvtScaleAbs64f8u, 0 - }; - - return cvtScaleAbsTab[depth]; + BinaryFunc func = + depth == CV_8U ? (BinaryFunc)cvtScaleAbs8u : + depth == CV_8S ? (BinaryFunc)cvtScaleAbs8s8u : + depth == CV_Bool ? (BinaryFunc)cvtScaleAbs8b8u : + depth == CV_16U ? (BinaryFunc)cvtScaleAbs16u8u : + depth == CV_16S ? (BinaryFunc)cvtScaleAbs16s8u : + depth == CV_16F ? (BinaryFunc)cvtScaleAbs16f8u : + depth == CV_16BF ? (BinaryFunc)cvtScaleAbs16bf8u : + depth == CV_32U ? (BinaryFunc)cvtScaleAbs32u8u : + depth == CV_32S ? (BinaryFunc)cvtScaleAbs32s8u : + depth == CV_32F ? (BinaryFunc)cvtScaleAbs32f8u : + depth == CV_64U ? (BinaryFunc)cvtScaleAbs64u8u : + depth == CV_64S ? (BinaryFunc)cvtScaleAbs64s8u : + depth == CV_64F ? (BinaryFunc)cvtScaleAbs64f8u : 0; + CV_Assert(func != 0); + return func; } -BinaryFunc getConvertScaleFunc(int sdepth, int ddepth) +BinaryFunc getConvertScaleFunc(int sdepth_, int ddepth_) { - static BinaryFunc cvtScaleTab[][8] = - { - { - (BinaryFunc)GET_OPTIMIZED(cvtScale8u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8u), - (BinaryFunc)GET_OPTIMIZED(cvtScale16s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8u), - (BinaryFunc)cvtScale64f8u, (BinaryFunc)cvtScale16f8u - }, - { - (BinaryFunc)GET_OPTIMIZED(cvtScale8u8s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8s), - (BinaryFunc)GET_OPTIMIZED(cvtScale16s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8s), - (BinaryFunc)cvtScale64f8s, (BinaryFunc)cvtScale16f8s - }, - { - (BinaryFunc)GET_OPTIMIZED(cvtScale8u16u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u), - (BinaryFunc)GET_OPTIMIZED(cvtScale16s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16u), - (BinaryFunc)cvtScale64f16u, (BinaryFunc)cvtScale16f16u - }, - { - (BinaryFunc)GET_OPTIMIZED(cvtScale8u16s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u16s), - (BinaryFunc)GET_OPTIMIZED(cvtScale16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16s), - (BinaryFunc)cvtScale64f16s, (BinaryFunc)cvtScale16f16s - }, - { - (BinaryFunc)GET_OPTIMIZED(cvtScale8u32s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32s), - (BinaryFunc)GET_OPTIMIZED(cvtScale16s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f32s), - (BinaryFunc)cvtScale64f32s, (BinaryFunc)cvtScale16f32s - }, - { - (BinaryFunc)GET_OPTIMIZED(cvtScale8u32f), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32f), - (BinaryFunc)GET_OPTIMIZED(cvtScale16s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32f), - (BinaryFunc)cvtScale64f32f, (BinaryFunc)cvtScale16f32f - }, - { - (BinaryFunc)cvtScale8u64f, (BinaryFunc)cvtScale8s64f, (BinaryFunc)cvtScale16u64f, - (BinaryFunc)cvtScale16s64f, (BinaryFunc)cvtScale32s64f, (BinaryFunc)cvtScale32f64f, - (BinaryFunc)cvtScale64f, (BinaryFunc)cvtScale16f64f - }, - { - (BinaryFunc)cvtScale8u16f, (BinaryFunc)cvtScale8s16f, (BinaryFunc)cvtScale16u16f, - (BinaryFunc)cvtScale16s16f, (BinaryFunc)cvtScale32s16f, (BinaryFunc)cvtScale32f16f, - (BinaryFunc)cvtScale64f16f, (BinaryFunc)cvtScale16f - }, - }; + int sdepth = CV_MAT_DEPTH(sdepth_); + int ddepth = CV_MAT_DEPTH(ddepth_); + BinaryFunc func = + ddepth == CV_8U ? ( + sdepth == CV_8U ? cvtScale8u : + sdepth == CV_8S ? cvtScale8s8u : + sdepth == CV_Bool ? cvtScale8b8u : + sdepth == CV_16U ? cvtScale16u8u : + sdepth == CV_16S ? cvtScale16s8u : + sdepth == CV_32U ? cvtScale32u8u : + sdepth == CV_32S ? cvtScale32s8u : + sdepth == CV_32F ? cvtScale32f8u : + sdepth == CV_64F ? cvtScale64f8u : + sdepth == CV_16F ? cvtScale16f8u : + sdepth == CV_16BF ? cvtScale16bf8u : + sdepth == CV_64U ? cvtScale64u8u : + sdepth == CV_64S ? cvtScale64s8u : + 0) : + ddepth == CV_8S ? ( + sdepth == CV_8U ? cvtScale8u8s : + sdepth == CV_8S ? cvtScale8s : + sdepth == CV_Bool ? cvtScale8b8s : + sdepth == CV_16U ? cvtScale16u8s : + sdepth == CV_16S ? cvtScale16s8s : + sdepth == CV_32U ? cvtScale32u8s : + sdepth == CV_32S ? cvtScale32s8s : + sdepth == CV_32F ? cvtScale32f8s : + sdepth == CV_64F ? cvtScale64f8s : + sdepth == CV_16F ? cvtScale16f8s : + sdepth == CV_16BF ? cvtScale16bf8s : + sdepth == CV_64U ? cvtScale64u8s : + sdepth == CV_64S ? cvtScale64s8s : + 0) : + ddepth == CV_16U ? ( + sdepth == CV_8U ? cvtScale8u16u : + sdepth == CV_8S ? cvtScale8s16u : + sdepth == CV_Bool ? cvtScale8b16u : + sdepth == CV_16U ? cvtScale16u : + sdepth == CV_16S ? cvtScale16s16u : + sdepth == CV_32U ? cvtScale32u16u : + sdepth == CV_32S ? cvtScale32s16u : + sdepth == CV_32F ? cvtScale32f16u : + sdepth == CV_64F ? cvtScale64f16u : + sdepth == CV_16F ? cvtScale16f16u : + sdepth == CV_16BF ? cvtScale16bf16u : + sdepth == CV_64U ? cvtScale64u16u : + sdepth == CV_64S ? cvtScale64s16u : + 0) : + ddepth == CV_16S ? ( + sdepth == CV_8U ? cvtScale8u16s : + sdepth == CV_8S ? cvtScale8s16s : + sdepth == CV_Bool ? cvtScale8b16s : + sdepth == CV_16U ? cvtScale16u16s : + sdepth == CV_16S ? cvtScale16s : + sdepth == CV_32U ? cvtScale32u16s : + sdepth == CV_32S ? cvtScale32s16s : + sdepth == CV_32F ? cvtScale32f16s : + sdepth == CV_64F ? cvtScale64f16s : + sdepth == CV_16F ? cvtScale16f16s : + sdepth == CV_16BF ? cvtScale16bf16s : + sdepth == CV_64U ? cvtScale64u16s : + sdepth == CV_64S ? cvtScale64s16s : + 0) : + ddepth == CV_32U ? ( + sdepth == CV_8U ? cvtScale8u32u : + sdepth == CV_8S ? cvtScale8s32u : + sdepth == CV_Bool ? cvtScale8b32u : + sdepth == CV_16U ? cvtScale16u32u : + sdepth == CV_16S ? cvtScale16s32u : + sdepth == CV_32U ? cvtScale32u : + sdepth == CV_32S ? cvtScale32s32u : + sdepth == CV_32F ? cvtScale32f32u : + sdepth == CV_64F ? cvtScale64f32u : + sdepth == CV_16F ? cvtScale16f32u : + sdepth == CV_16BF ? cvtScale16bf32u : + sdepth == CV_64U ? cvtScale64u32u : + sdepth == CV_64S ? cvtScale64s32u : - return cvtScaleTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)]; + 0) : + ddepth == CV_32S ? ( + sdepth == CV_8U ? cvtScale8u32s : + sdepth == CV_8S ? cvtScale8s32s : + sdepth == CV_Bool ? cvtScale8b32s : + sdepth == CV_16U ? cvtScale16u32s : + sdepth == CV_16S ? cvtScale16s32s : + sdepth == CV_32U ? cvtScale32u32s : + sdepth == CV_32S ? cvtScale32s : + sdepth == CV_32F ? cvtScale32f32s : + sdepth == CV_64F ? cvtScale64f32s : + sdepth == CV_16F ? cvtScale16f32s : + sdepth == CV_16BF ? cvtScale16bf32s : + sdepth == CV_64U ? cvtScale64u32s : + sdepth == CV_64S ? cvtScale64s32s : + 0) : + ddepth == CV_32F ? ( + sdepth == CV_8U ? cvtScale8u32f : + sdepth == CV_8S ? cvtScale8s32f : + sdepth == CV_Bool ? cvtScale8b32f : + sdepth == CV_16U ? cvtScale16u32f : + sdepth == CV_16S ? cvtScale16s32f : + sdepth == CV_32U ? cvtScale32u32f : + sdepth == CV_32S ? cvtScale32s32f : + sdepth == CV_32F ? cvtScale32f : + sdepth == CV_64F ? cvtScale64f32f : + sdepth == CV_16F ? cvtScale16f32f : + sdepth == CV_16BF ? cvtScale16bf32f : + sdepth == CV_64U ? cvtScale64u32f : + sdepth == CV_64S ? cvtScale64s32f : + 0) : + ddepth == CV_64F ? ( + sdepth == CV_8U ? cvtScale8u64f : + sdepth == CV_8S ? cvtScale8s64f : + sdepth == CV_Bool ? cvtScale8b64f : + sdepth == CV_16U ? cvtScale16u64f : + sdepth == CV_16S ? cvtScale16s64f : + sdepth == CV_32U ? cvtScale32u64f : + sdepth == CV_32S ? cvtScale32s64f : + sdepth == CV_32F ? cvtScale32f64f : + sdepth == CV_64F ? cvtScale64f : + sdepth == CV_16F ? cvtScale16f64f : + sdepth == CV_16BF ? cvtScale16bf64f : + sdepth == CV_64U ? cvtScale64u64f : + sdepth == CV_64S ? cvtScale64s64f : + 0) : + ddepth == CV_16F ? ( + sdepth == CV_8U ? cvtScale8u16f : + sdepth == CV_8S ? cvtScale8s16f : + sdepth == CV_Bool ? cvtScale8b16f : + sdepth == CV_16U ? cvtScale16u16f : + sdepth == CV_16S ? cvtScale16s16f : + sdepth == CV_32U ? cvtScale32u16f : + sdepth == CV_32S ? cvtScale32s16f : + sdepth == CV_32F ? cvtScale32f16f : + sdepth == CV_64F ? cvtScale64f16f : + sdepth == CV_16F ? cvtScale16f : + sdepth == CV_16BF ? cvtScale16bf16f : + sdepth == CV_64U ? cvtScale64u16f : + sdepth == CV_64S ? cvtScale64s16f : + 0) : + ddepth == CV_16BF ? ( + sdepth == CV_8U ? cvtScale8u16bf : + sdepth == CV_8S ? cvtScale8s16bf : + sdepth == CV_Bool ? cvtScale8b16bf : + sdepth == CV_16U ? cvtScale16u16bf : + sdepth == CV_16S ? cvtScale16s16bf : + sdepth == CV_32U ? cvtScale32u16bf : + sdepth == CV_32S ? cvtScale32s16bf : + sdepth == CV_32F ? cvtScale32f16bf : + sdepth == CV_64F ? cvtScale64f16bf : + sdepth == CV_16F ? cvtScale16f16bf : + sdepth == CV_16BF ? cvtScale16bf : + sdepth == CV_64U ? cvtScale64u16bf : + sdepth == CV_64S ? cvtScale64s16bf : + 0) : + ddepth == CV_Bool ? ( + sdepth == CV_8U ? cvtScale8u8b : + sdepth == CV_8S ? cvtScale8s8b : + sdepth == CV_Bool ? cvtScale8b : + sdepth == CV_16U ? cvtScale16u8b : + sdepth == CV_16S ? cvtScale16s8b : + sdepth == CV_32U ? cvtScale32u8b : + sdepth == CV_32S ? cvtScale32s8b : + sdepth == CV_32F ? cvtScale32f8b : + sdepth == CV_64F ? cvtScale64f8b : + sdepth == CV_16F ? cvtScale16f8b : + sdepth == CV_16BF ? cvtScale16bf8b : + sdepth == CV_64U ? cvtScale64u8b : + sdepth == CV_64S ? cvtScale64s8b : + 0) : + ddepth == CV_64U ? ( + sdepth == CV_8U ? cvtScale8u64u : + sdepth == CV_8S ? cvtScale8s64u : + sdepth == CV_Bool ? cvtScale8b64u : + sdepth == CV_16U ? cvtScale16u64u : + sdepth == CV_16S ? cvtScale16s64u : + sdepth == CV_32U ? cvtScale32u64u : + sdepth == CV_32S ? cvtScale32s64u : + sdepth == CV_32F ? cvtScale32f64u : + sdepth == CV_64F ? cvtScale64f64u : + sdepth == CV_16F ? cvtScale16f64u : + sdepth == CV_16BF ? cvtScale16bf64u : + sdepth == CV_64U ? cvtScale64u : + sdepth == CV_64S ? cvtScale64s64u : + 0) : + ddepth == CV_64S ? ( + sdepth == CV_8U ? cvtScale8u64s : + sdepth == CV_8S ? cvtScale8s64s : + sdepth == CV_Bool ? cvtScale8b64s : + sdepth == CV_16U ? cvtScale16u64s : + sdepth == CV_16S ? cvtScale16s64s : + sdepth == CV_32U ? cvtScale32u64s : + sdepth == CV_32S ? cvtScale32s64s : + sdepth == CV_32F ? cvtScale32f64s : + sdepth == CV_64F ? cvtScale64f64s : + sdepth == CV_16F ? cvtScale16f64s : + sdepth == CV_16BF ? cvtScale16bf64s : + sdepth == CV_64U ? cvtScale64u64s : + sdepth == CV_64S ? cvtScale64s : + 0) : + 0; + CV_Assert(func != 0); + return func; } #endif diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 3a6a1a7ac6..09250b8585 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -72,28 +72,43 @@ void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to) switch(depth) { case CV_8U: - scalarToRawData_(s, (uchar*)_buf, cn, unroll_to); + scalarToRawData_(s, (uchar*)_buf, cn, unroll_to); break; case CV_8S: - scalarToRawData_(s, (schar*)_buf, cn, unroll_to); + scalarToRawData_(s, (schar*)_buf, cn, unroll_to); + break; + case CV_Bool: + scalarToRawData_(s, (bool*)_buf, cn, unroll_to); break; case CV_16U: - scalarToRawData_(s, (ushort*)_buf, cn, unroll_to); + scalarToRawData_(s, (ushort*)_buf, cn, unroll_to); break; case CV_16S: - scalarToRawData_(s, (short*)_buf, cn, unroll_to); - break; - case CV_32S: - scalarToRawData_(s, (int*)_buf, cn, unroll_to); - break; - case CV_32F: - scalarToRawData_(s, (float*)_buf, cn, unroll_to); - break; - case CV_64F: - scalarToRawData_(s, (double*)_buf, cn, unroll_to); + scalarToRawData_(s, (short*)_buf, cn, unroll_to); break; case CV_16F: - scalarToRawData_(s, (float16_t*)_buf, cn, unroll_to); + scalarToRawData_(s, (float16_t*)_buf, cn, unroll_to); + break; + case CV_16BF: + scalarToRawData_(s, (bfloat16_t*)_buf, cn, unroll_to); + break; + case CV_32U: + scalarToRawData_(s, (unsigned*)_buf, cn, unroll_to); + break; + case CV_32S: + scalarToRawData_(s, (int*)_buf, cn, unroll_to); + break; + case CV_32F: + scalarToRawData_(s, (float*)_buf, cn, unroll_to); + break; + case CV_64U: + scalarToRawData_(s, (uint64_t*)_buf, cn, unroll_to); + break; + case CV_64S: + scalarToRawData_(s, (int64_t*)_buf, cn, unroll_to); + break; + case CV_64F: + scalarToRawData_(s, (double*)_buf, cn, unroll_to); break; default: CV_Error(CV_StsUnsupportedFormat,""); diff --git a/modules/core/src/matmul.dispatch.cpp b/modules/core/src/matmul.dispatch.cpp index a213ca06c7..3758f6816d 100644 --- a/modules/core/src/matmul.dispatch.cpp +++ b/modules/core/src/matmul.dispatch.cpp @@ -647,7 +647,7 @@ void scaleAdd(InputArray _src1, double alpha, InputArray _src2, OutputArray _dst CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && _dst.isUMat(), ocl_scaleAdd(_src1, alpha, _src2, _dst, type)) - if( depth < CV_32F ) + if( depth != CV_32F && depth != CV_64F ) { addWeighted(_src1, alpha, _src2, 1, 0, _dst, depth); return; @@ -979,7 +979,7 @@ typedef double (*DotProdFunc)(const uchar* src1, const uchar* src2, int len); static DotProdFunc getDotProdFunc(int depth) { - static DotProdFunc dotProdTab[] = + static DotProdFunc dotProdTab[CV_DEPTH_MAX] = { (DotProdFunc)GET_OPTIMIZED(dotProd_8u), (DotProdFunc)GET_OPTIMIZED(dotProd_8s), (DotProdFunc)dotProd_16u, (DotProdFunc)dotProd_16s, diff --git a/modules/core/src/matmul.simd.hpp b/modules/core/src/matmul.simd.hpp index 5a7f36d12b..e32096cf71 100644 --- a/modules/core/src/matmul.simd.hpp +++ b/modules/core/src/matmul.simd.hpp @@ -1791,7 +1791,7 @@ diagtransform_64f(const double* src, double* dst, const double* m, int len, int TransformFunc getTransformFunc(int depth) { - static TransformFunc transformTab[] = + static TransformFunc transformTab[CV_DEPTH_MAX] = { (TransformFunc)transform_8u, (TransformFunc)transform_8s, (TransformFunc)transform_16u, (TransformFunc)transform_16s, (TransformFunc)transform_32s, (TransformFunc)transform_32f, @@ -1803,7 +1803,7 @@ TransformFunc getTransformFunc(int depth) TransformFunc getDiagTransformFunc(int depth) { - static TransformFunc diagTransformTab[] = + static TransformFunc diagTransformTab[CV_DEPTH_MAX] = { (TransformFunc)diagtransform_8u, (TransformFunc)diagtransform_8s, (TransformFunc)diagtransform_16u, (TransformFunc)diagtransform_16s, (TransformFunc)diagtransform_32s, (TransformFunc)diagtransform_32f, diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 8111dc2230..704979b714 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -1151,7 +1151,7 @@ Mat Mat::reshape(int new_cn, int new_rows) const } if( new_rows > 0 ) { - int sz[] = { new_rows, (int)(total()/new_rows) }; + int sz[] = { new_rows, (int)(total()*cn/new_rows) }; return reshape(new_cn, 2, sz); } } diff --git a/modules/core/src/mean.simd.hpp b/modules/core/src/mean.simd.hpp index bb815adc1c..60dba7afcf 100644 --- a/modules/core/src/mean.simd.hpp +++ b/modules/core/src/mean.simd.hpp @@ -311,7 +311,7 @@ static int sqsum64f( const double* src, const uchar* mask, double* sum, double* SumSqrFunc getSumSqrFunc(int depth) { CV_INSTRUMENT_REGION(); - static SumSqrFunc sumSqrTab[] = + static SumSqrFunc sumSqrTab[CV_DEPTH_MAX] = { (SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s, (SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0 diff --git a/modules/core/src/merge.dispatch.cpp b/modules/core/src/merge.dispatch.cpp index b95dc7345d..6b8c2d8135 100644 --- a/modules/core/src/merge.dispatch.cpp +++ b/modules/core/src/merge.dispatch.cpp @@ -50,12 +50,15 @@ typedef void (*MergeFunc)(const uchar** src, uchar* dst, int len, int cn); static MergeFunc getMergeFunc(int depth) { - static MergeFunc mergeTab[] = + static MergeFunc mergeTab[CV_DEPTH_MAX] = { (MergeFunc)GET_OPTIMIZED(cv::hal::merge8u), (MergeFunc)GET_OPTIMIZED(cv::hal::merge8u), (MergeFunc)GET_OPTIMIZED(cv::hal::merge16u), (MergeFunc)GET_OPTIMIZED(cv::hal::merge16u), (MergeFunc)GET_OPTIMIZED(cv::hal::merge32s), (MergeFunc)GET_OPTIMIZED(cv::hal::merge32s), - (MergeFunc)GET_OPTIMIZED(cv::hal::merge64s), (MergeFunc)GET_OPTIMIZED(cv::hal::merge16u) + (MergeFunc)GET_OPTIMIZED(cv::hal::merge64s), (MergeFunc)GET_OPTIMIZED(cv::hal::merge16u), + (MergeFunc)GET_OPTIMIZED(cv::hal::merge16u), (MergeFunc)GET_OPTIMIZED(cv::hal::merge8u), + (MergeFunc)GET_OPTIMIZED(cv::hal::merge64s), (MergeFunc)GET_OPTIMIZED(cv::hal::merge64s), + (MergeFunc)GET_OPTIMIZED(cv::hal::merge32s), 0, 0, 0, }; return mergeTab[depth]; diff --git a/modules/core/src/minmax.cpp b/modules/core/src/minmax.cpp index 092c5e9234..d694d99d3c 100644 --- a/modules/core/src/minmax.cpp +++ b/modules/core/src/minmax.cpp @@ -1002,7 +1002,8 @@ bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc CV_Assert(!haveSrc2 || _src2.type() == type); - if (depth == CV_32S) + if (depth == CV_32S || depth == CV_8S || depth == CV_32U || depth == CV_64U || + depth == CV_64S || depth == CV_16F || depth == CV_16BF) return false; if ((depth == CV_64F || ddepth == CV_64F) && !doubleSupport) diff --git a/modules/core/src/norm.cpp b/modules/core/src/norm.cpp index 69da85f291..72d6fd9abc 100644 --- a/modules/core/src/norm.cpp +++ b/modules/core/src/norm.cpp @@ -367,7 +367,7 @@ typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, in static NormFunc getNormFunc(int normType, int depth) { - static NormFunc normTab[3][8] = + static NormFunc normTab[3][CV_DEPTH_MAX] = { { (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s), @@ -388,7 +388,7 @@ static NormFunc getNormFunc(int normType, int depth) static NormDiffFunc getNormDiffFunc(int normType, int depth) { - static NormDiffFunc normDiffTab[3][8] = + static NormDiffFunc normDiffTab[3][CV_DEPTH_MAX] = { { (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s, diff --git a/modules/core/src/out.cpp b/modules/core/src/out.cpp index 8a7d7e1636..1307ff9d03 100644 --- a/modules/core/src/out.cpp +++ b/modules/core/src/out.cpp @@ -70,14 +70,19 @@ namespace cv char braces[5]; void (FormattedImpl::*valueToStr)(); + void valueToStrBool() { snprintf(buf, sizeof(buf), "%d", (int)mtx.ptr(row, col)[cn] != 0); } void valueToStr8u() { snprintf(buf, sizeof(buf), "%3d", (int)mtx.ptr(row, col)[cn]); } void valueToStr8s() { snprintf(buf, sizeof(buf), "%3d", (int)mtx.ptr(row, col)[cn]); } void valueToStr16u() { snprintf(buf, sizeof(buf), "%d", (int)mtx.ptr(row, col)[cn]); } void valueToStr16s() { snprintf(buf, sizeof(buf), "%d", (int)mtx.ptr(row, col)[cn]); } + void valueToStr32u() { snprintf(buf, sizeof(buf), "%u", mtx.ptr(row, col)[cn]); } void valueToStr32s() { snprintf(buf, sizeof(buf), "%d", mtx.ptr(row, col)[cn]); } void valueToStr32f() { snprintf(buf, sizeof(buf), floatFormat, mtx.ptr(row, col)[cn]); } void valueToStr64f() { snprintf(buf, sizeof(buf), floatFormat, mtx.ptr(row, col)[cn]); } + void valueToStr64u() { snprintf(buf, sizeof(buf), "%llu", (unsigned long long)mtx.ptr(row, col)[cn]); } + void valueToStr64s() { snprintf(buf, sizeof(buf), "%lld", (long long)mtx.ptr(row, col)[cn]); } void valueToStr16f() { snprintf(buf, sizeof(buf), floatFormat, (float)mtx.ptr(row, col)[cn]); } + void valueToStr16bf() { snprintf(buf, sizeof(buf), floatFormat, (float)mtx.ptr(row, col)[cn]); } void valueToStrOther() { buf[0] = 0; } public: @@ -111,13 +116,19 @@ namespace cv { case CV_8U: valueToStr = &FormattedImpl::valueToStr8u; break; case CV_8S: valueToStr = &FormattedImpl::valueToStr8s; break; + case CV_Bool: valueToStr = &FormattedImpl::valueToStrBool; break; case CV_16U: valueToStr = &FormattedImpl::valueToStr16u; break; case CV_16S: valueToStr = &FormattedImpl::valueToStr16s; break; + case CV_32U: valueToStr = &FormattedImpl::valueToStr32u; break; case CV_32S: valueToStr = &FormattedImpl::valueToStr32s; break; case CV_32F: valueToStr = &FormattedImpl::valueToStr32f; break; case CV_64F: valueToStr = &FormattedImpl::valueToStr64f; break; - default: CV_Assert(mtx.depth() == CV_16F); - valueToStr = &FormattedImpl::valueToStr16f; + case CV_64U: valueToStr = &FormattedImpl::valueToStr64u; break; + case CV_64S: valueToStr = &FormattedImpl::valueToStr64s; break; + case CV_16F: valueToStr = &FormattedImpl::valueToStr16f; break; + case CV_16BF: valueToStr = &FormattedImpl::valueToStr16bf; break; + default: + CV_Error_(Error::StsError, ("unsupported matrix type %d\n", mtx.depth())); } } diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp index 0d64bab094..cf0a6466ea 100644 --- a/modules/core/src/persistence.cpp +++ b/modules/core/src/persistence.cpp @@ -56,6 +56,28 @@ char* itoa( int _val, char* buffer, int /*radix*/ ) return ptr; } +char* itoa( int64_t _val, char* buffer, int /*radix*/, bool _signed) +{ + const int radix = 10; + char* ptr=buffer + 23 /* enough even for 64-bit integers */; + int sign = _signed && _val < 0 ? -1 : 1; + uint64_t val = !_signed ? (uint64_t)_val : abs(_val); + + *ptr = '\0'; + do + { + uint64_t r = val / radix; + *--ptr = (char)(val - (r*radix) + '0'); + val = r; + } + while( val != 0 ); + + if( sign < 0 ) + *--ptr = '-'; + + return ptr; +} + char* doubleToString( char* buf, size_t bufSize, double value, bool explicitZero ) { Cv64suf val; @@ -142,12 +164,12 @@ char* floatToString( char* buf, size_t bufSize, float value, bool halfprecision, return buf; } -static const char symbols[9] = "ucwsifdh"; +static const char symbols[] = "ucwsifdhHbLUn"; static char typeSymbol(int depth) { CV_StaticAssert(CV_64F == 6, ""); - CV_CheckDepth(depth, depth >=0 && depth <= CV_16F, ""); + CV_CheckDepth(depth, depth >= 0 && depth <= CV_32U, ""); return symbols[depth]; } @@ -264,13 +286,18 @@ int calcStructSize( const char* dt, int initial_size ) switch (v) { case 'u': { elem_max_size = std::max( elem_max_size, sizeof(uchar ) ); break; } + case 'b': { elem_max_size = std::max( elem_max_size, sizeof(bool ) ); break; } case 'c': { elem_max_size = std::max( elem_max_size, sizeof(schar ) ); break; } case 'w': { elem_max_size = std::max( elem_max_size, sizeof(ushort) ); break; } case 's': { elem_max_size = std::max( elem_max_size, sizeof(short ) ); break; } case 'i': { elem_max_size = std::max( elem_max_size, sizeof(int ) ); break; } + case 'n': { elem_max_size = std::max( elem_max_size, sizeof(unsigned) ); break; } case 'f': { elem_max_size = std::max( elem_max_size, sizeof(float ) ); break; } case 'd': { elem_max_size = std::max( elem_max_size, sizeof(double) ); break; } - case 'h': { elem_max_size = std::max(elem_max_size, sizeof(float16_t)); break; } + case 'h': { elem_max_size = std::max( elem_max_size, sizeof(float16_t)); break; } + case 'H': { elem_max_size = std::max( elem_max_size, sizeof(bfloat16_t)); break; } + case 'I': { elem_max_size = std::max( elem_max_size, sizeof(int64_t)); break; } + case 'U': { elem_max_size = std::max( elem_max_size, sizeof(uint64_t)); break; } default: CV_Error_(Error::StsNotImplemented, ("Unknown type identifier: '%c' in '%s'", (char)(*type), dt)); } @@ -1097,6 +1124,10 @@ void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, s ptr = fs::itoa(*(uchar *) data, buf, 10); data++; break; + case CV_Bool: + ptr = fs::itoa(*(uchar *) data != 0, buf, 10); + data++; + break; case CV_8S: ptr = fs::itoa(*(char *) data, buf, 10); data++; @@ -1109,10 +1140,22 @@ void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, s ptr = fs::itoa(*(short *) data, buf, 10); data += sizeof(short); break; + case CV_32U: + ptr = fs::itoa((int64_t)*(unsigned*) data, buf, 10, false); + data += sizeof(unsigned); + break; case CV_32S: ptr = fs::itoa(*(int *) data, buf, 10); data += sizeof(int); break; + case CV_64U: + ptr = fs::itoa(*(uint64_t*) data, buf, 10, false); + data += sizeof(uint64_t); + break; + case CV_64S: + ptr = fs::itoa(*(int64_t*) data, buf, 10, true); + data += sizeof(int64_t); + break; case CV_32F: ptr = fs::floatToString(buf, sizeof(buf), *(float *) data, false, explicitZero); data += sizeof(float); @@ -1121,10 +1164,14 @@ void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, s ptr = fs::doubleToString(buf, sizeof(buf), *(double *) data, explicitZero); data += sizeof(double); break; - case CV_16F: /* reference */ + case CV_16F: ptr = fs::floatToString(buf, sizeof(buf), (float) *(float16_t *) data, true, explicitZero); data += sizeof(float16_t); break; + case CV_16BF: + ptr = fs::floatToString(buf, sizeof(buf), (float) *(bfloat16_t *) data, true, explicitZero); + data += sizeof(bfloat16_t); + break; default: CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported type"); return; @@ -2572,6 +2619,10 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si *(char*)data = saturate_cast(ival); data++; break; + case CV_Bool: + *(bool*)data = ival != 0; + data++; + break; case CV_16U: *(ushort*)data = saturate_cast(ival); data += sizeof(ushort); @@ -2580,6 +2631,10 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si *(short*)data = saturate_cast(ival); data += sizeof(short); break; + case CV_32U: + *(unsigned*)data = (unsigned)std::max(ival, 0); + data += sizeof(unsigned); + break; case CV_32S: *(int*)data = ival; data += sizeof(int); @@ -2588,6 +2643,14 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si *(float*)data = (float)ival; data += sizeof(float); break; + case CV_64U: + *(uint64_t*)data = (uint64_t)ival; + data += sizeof(uint64_t); + break; + case CV_64S: + *(int64_t*)data = (int64_t)ival; + data += sizeof(int64_t); + break; case CV_64F: *(double*)data = (double)ival; data += sizeof(double); @@ -2596,6 +2659,10 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si *(float16_t*)data = float16_t((float)ival); data += sizeof(float16_t); break; + case CV_16BF: + *(bfloat16_t*)data = bfloat16_t((float)ival); + data += sizeof(bfloat16_t); + break; default: CV_Error( Error::StsUnsupportedFormat, "Unsupported type" ); } @@ -2622,6 +2689,10 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si *(short*)data = saturate_cast(fval); data += sizeof(short); break; + case CV_32U: + *(int*)data = saturate_cast(fval); + data += sizeof(int); + break; case CV_32S: *(int*)data = saturate_cast(fval); data += sizeof(int); @@ -2630,6 +2701,14 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si *(float*)data = (float)fval; data += sizeof(float); break; + case CV_64U: + *(uint64_t*)data = (uint64_t)round(std::max(fval, 0.)); + data += sizeof(uint64_t); + break; + case CV_64S: + *(int64_t*)data = (int64_t)round(std::max(fval, 0.)); + data += sizeof(int64_t); + break; case CV_64F: *(double*)data = fval; data += sizeof(double); @@ -2638,6 +2717,10 @@ FileNodeIterator& FileNodeIterator::readRaw( const String& fmt, void* _data0, si *(float16_t*)data = float16_t((float)fval); data += sizeof(float16_t); break; + case CV_16BF: + *(bfloat16_t*)data = bfloat16_t((float)fval); + data += sizeof(bfloat16_t); + break; default: CV_Error( Error::StsUnsupportedFormat, "Unsupported type" ); } diff --git a/modules/core/src/persistence.hpp b/modules/core/src/persistence.hpp index 4b579303fa..c08ddb5472 100644 --- a/modules/core/src/persistence.hpp +++ b/modules/core/src/persistence.hpp @@ -86,6 +86,7 @@ namespace fs { int strcasecmp(const char* str1, const char* str2); char* itoa( int _val, char* buffer, int /*radix*/ ); +char* itoa( int64_t _val, char* buffer, int /*radix*/, bool _signed ); char* floatToString( char* buf, size_t bufSize, float value, bool halfprecision, bool explicitZero ); char* doubleToString( char* buf, size_t bufSize, double value, bool explicitZero ); diff --git a/modules/core/src/rand.cpp b/modules/core/src/rand.cpp index 3e4f761f4a..ed93f88d4f 100644 --- a/modules/core/src/rand.cpp +++ b/modules/core/src/rand.cpp @@ -51,38 +51,53 @@ namespace cv Multiply-with-carry generator is used here: temp = ( A*X(n) + carry ) X(n+1) = temp mod (2^32) - carry = temp / (2^32) + carry = floor (temp / (2^32)) */ #define RNG_NEXT(x) ((uint64)(unsigned)(x)*CV_RNG_COEFF + ((x) >> 32)) +// make it jump-less +#define CN_NEXT(k) (((k) + 1) & (((k) >= cn) - 1)) + +enum +{ + RNG_FLAG_SMALL = 0x40000000, + RNG_FLAG_STDMTX = 0x80000000 +}; /***************************************************************************************\ * Pseudo-Random Number Generators (PRNGs) * \***************************************************************************************/ template static void -randBits_( T* arr, int len, uint64* state, const Vec2i* p, bool small_flag ) +randBits_( T* arr, int len, int cn, uint64* state, const Vec2l* p, int flags ) { + bool small_flag = (flags & RNG_FLAG_SMALL) != 0; uint64 temp = *state; - int i; + int i, k = 0; + len *= cn; + --cn; if( !small_flag ) { for( i = 0; i <= len - 4; i += 4 ) { - int t0, t1; + int64_t t0, t1; temp = RNG_NEXT(temp); - t0 = ((int)temp & p[i][0]) + p[i][1]; + t0 = ((int64_t)temp & p[k][0]) + p[k][1]; + k = CN_NEXT(k); temp = RNG_NEXT(temp); - t1 = ((int)temp & p[i+1][0]) + p[i+1][1]; + t1 = ((int64_t)temp & p[k][0]) + p[k][1]; + k = CN_NEXT(k); arr[i] = saturate_cast(t0); arr[i+1] = saturate_cast(t1); temp = RNG_NEXT(temp); - t0 = ((int)temp & p[i+2][0]) + p[i+2][1]; + t0 = ((int64_t)temp & p[k][0]) + p[k][1]; + k = CN_NEXT(k); temp = RNG_NEXT(temp); - t1 = ((int)temp & p[i+3][0]) + p[i+3][1]; + t1 = ((int64_t)temp & p[k][0]) + p[k][1]; + k = CN_NEXT(k); arr[i+2] = saturate_cast(t0); arr[i+3] = saturate_cast(t1); } @@ -91,16 +106,23 @@ randBits_( T* arr, int len, uint64* state, const Vec2i* p, bool small_flag ) { for( i = 0; i <= len - 4; i += 4 ) { - int t0, t1, t; + int64_t t0, t1, t; temp = RNG_NEXT(temp); - t = (int)temp; - t0 = (t & p[i][0]) + p[i][1]; - t1 = ((t >> 8) & p[i+1][0]) + p[i+1][1]; + t = temp; + // p[i+...][0] is within 0..255 in this branch (small_flag==true), + // so we don't need to do (t>>...)&255, + // the upper bits will be cleaned with ... & p[i+...][0]. + t0 = (t & p[k][0]) + p[k][1]; + k = CN_NEXT(k); + t1 = ((t >> 8) & p[k][0]) + p[k][1]; + k = CN_NEXT(k); arr[i] = saturate_cast(t0); arr[i+1] = saturate_cast(t1); - t0 = ((t >> 16) & p[i+2][0]) + p[i+2][1]; - t1 = ((t >> 24) & p[i+3][0]) + p[i+3][1]; + t0 = ((t >> 16) & p[k][0]) + p[k][1]; + k = CN_NEXT(k); + t1 = ((t >> 24) & p[k][0]) + p[k][1]; + k = CN_NEXT(k); arr[i+2] = saturate_cast(t0); arr[i+3] = saturate_cast(t1); } @@ -108,10 +130,11 @@ randBits_( T* arr, int len, uint64* state, const Vec2i* p, bool small_flag ) for( ; i < len; i++ ) { - int t0; + int64_t t0; temp = RNG_NEXT(temp); - t0 = ((int)temp & p[i][0]) + p[i][1]; + t0 = ((int64_t)temp & p[k][0]) + p[k][1]; + k = CN_NEXT(k); arr[i] = saturate_cast(t0); } @@ -123,101 +146,145 @@ struct DivStruct unsigned d; unsigned M; int sh1, sh2; - int delta; + int64_t delta; + uint64_t diff; }; template static void -randi_( T* arr, int len, uint64* state, const DivStruct* p ) +randi_( T* arr, int len, int cn, uint64* state, const DivStruct* p ) { uint64 temp = *state; + int k = 0; + len *= cn; + cn--; for( int i = 0; i < len; i++ ) { temp = RNG_NEXT(temp); unsigned t = (unsigned)temp; - unsigned v = (unsigned)(((uint64)t * p[i].M) >> 32); - v = (v + ((t - v) >> p[i].sh1)) >> p[i].sh2; - v = t - v*p[i].d + p[i].delta; - arr[i] = saturate_cast((int)v); + unsigned v = (unsigned)(((uint64)t * p[k].M) >> 32); + v = (v + ((t - v) >> p[k].sh1)) >> p[k].sh2; + int64_t res = (int64_t)(t - v*p[k].d) + p[k].delta; + k = CN_NEXT(k); + arr[i] = saturate_cast(res); } *state = temp; } - -#define DEF_RANDI_FUNC(suffix, type) \ -static void randBits_##suffix(type* arr, int len, uint64* state, \ - const Vec2i* p, void*, bool small_flag) \ -{ randBits_(arr, len, state, p, small_flag); } \ -\ -static void randi_##suffix(type* arr, int len, uint64* state, \ - const DivStruct* p, void*, bool ) \ -{ randi_(arr, len, state, p); } - -DEF_RANDI_FUNC(8u, uchar) -DEF_RANDI_FUNC(8s, schar) -DEF_RANDI_FUNC(16u, ushort) -DEF_RANDI_FUNC(16s, short) -DEF_RANDI_FUNC(32s, int) - -static void randf_32f( float* arr, int len, uint64* state, const Vec2f* p, void*, bool ) -{ - uint64 temp = *state; - for( int i = 0; i < len; i++ ) - { - int t = (int)(temp = RNG_NEXT(temp)); - arr[i] = (float)(t*p[i][0]); - } - *state = temp; - - // add bias separately to make the generated random numbers - // more deterministic, independent of - // architecture details (FMA instruction use etc.) - hal::addRNGBias32f(arr, &p[0][0], len); -} - static void -randf_64f( double* arr, int len, uint64* state, const Vec2d* p, void*, bool ) +randi_( int64_t* arr, int len, int cn, uint64* state, const DivStruct* p ) { uint64 temp = *state; + int k = 0; + len *= cn; + cn--; for( int i = 0; i < len; i++ ) { temp = RNG_NEXT(temp); - int64 v = (temp >> 32)|(temp << 32); - arr[i] = v*p[i][0]; + unsigned t0 = (unsigned)temp; + temp = RNG_NEXT(temp); + unsigned t1 = (unsigned)temp; + int64_t t = (int64_t)((((uint64_t)t0 << 32) | t1) % p[k].diff) + p[k].delta; + k = CN_NEXT(k); + arr[i] = t; } *state = temp; - - hal::addRNGBias64f(arr, &p[0][0], len); } -static void randf_16f( float16_t* arr, int len, uint64* state, const Vec2f* p, float* fbuf, bool ) +static void +randi_( uint64_t* arr, int len, int cn, uint64* state, const DivStruct* p ) { uint64 temp = *state; + int k = 0; + len *= cn; + cn--; for( int i = 0; i < len; i++ ) { - float f = (float)(int)(temp = RNG_NEXT(temp)); - fbuf[i] = f*p[i][0]; + temp = RNG_NEXT(temp); + unsigned t0 = (unsigned)temp; + temp = RNG_NEXT(temp); + unsigned t1 = (unsigned)temp; + uint64_t t = (((uint64_t)t0 << 32) | t1) % p[k].diff; + int64_t delta = p[k].delta; + k = CN_NEXT(k); + arr[i] = delta >= 0 || t >= (uint64_t)-delta ? t + (uint64_t)delta : 0; } *state = temp; - - // add bias separately to make the generated random numbers - // more deterministic, independent of - // architecture details (FMA instruction use etc.) - hal::addRNGBias32f(fbuf, &p[0][0], len); - hal::cvt32f16f(fbuf, arr, len); } -typedef void (*RandFunc)(uchar* arr, int len, uint64* state, const void* p, void* tempbuf, bool small_flag); +#define DEF_RANDI_FUNC(suffix, type) \ +static void randBits_##suffix(type* arr, int len, int cn, uint64* state, \ + const Vec2l* p, void*, int flags) \ +{ randBits_(arr, len, cn, state, p, flags); } \ +\ +static void randi_##suffix(type* arr, int len, int cn, uint64* state, \ + const DivStruct* p, void*, int) \ +{ randi_(arr, len, cn, state, p); } +DEF_RANDI_FUNC(8u, uchar) +DEF_RANDI_FUNC(8b, bool) +DEF_RANDI_FUNC(8s, schar) +DEF_RANDI_FUNC(16u, ushort) +DEF_RANDI_FUNC(16s, short) +DEF_RANDI_FUNC(32u, unsigned) +DEF_RANDI_FUNC(32s, int) +DEF_RANDI_FUNC(64u, uint64_t) +DEF_RANDI_FUNC(64s, int64_t) -static RandFunc randTab[][8] = +static void randf_16_or_32f( void* dst, int len_, int cn, uint64* state, const Vec2f* p, float* fbuf, int flags ) +{ + int depth = CV_MAT_DEPTH(flags); + uint64 temp = *state; + int k = 0, len = len_*cn; + float* arr = depth == CV_16F || depth == CV_16BF ? fbuf : (float*)dst; + cn--; + for( int i = 0; i < len; i++ ) + { + int t = (int)(temp = RNG_NEXT(temp)); + arr[i] = (float)(t*p[k][0]); + k = CN_NEXT(k); + } + *state = temp; + hal::addRNGBias32f(arr, &p[0][0], len_, cn+1); + if (depth == CV_16F) + hal::cvt32f16f(fbuf, (float16_t*)dst, len); + else if (depth == CV_16BF) + hal::cvt32f16bf(fbuf, (bfloat16_t*)dst, len); +} + +static void +randf_64f( double* arr, int len_, int cn, uint64* state, const Vec2d* p, void*, int ) +{ + uint64 temp = *state; + int k = 0, len = len_*cn; + cn--; + for( int i = 0; i < len; i++ ) + { + temp = RNG_NEXT(temp); + int64_t v = (int64_t)((temp >> 32) | (temp << 32)); + arr[i] = v*p[k][0]; + k = CN_NEXT(k); + } + *state = temp; + hal::addRNGBias64f(arr, &p[0][0], len_, cn+1); +} + +typedef void (*RandFunc)(uchar* arr, int len, int cn, uint64* state, + const void* p, void* tempbuf, int flags); + +static RandFunc randTab[][16] = { { - (RandFunc)randi_8u, (RandFunc)randi_8s, (RandFunc)randi_16u, (RandFunc)randi_16s, - (RandFunc)randi_32s, (RandFunc)randf_32f, (RandFunc)randf_64f, (RandFunc)randf_16f + (RandFunc)randi_8u, (RandFunc)randi_8s, (RandFunc)randi_16u, + (RandFunc)randi_16s, (RandFunc)randi_32s, (RandFunc)randf_16_or_32f, + (RandFunc)randf_64f, (RandFunc)randf_16_or_32f, (RandFunc)randf_16_or_32f, + (RandFunc)randi_8b, (RandFunc)randi_64u, (RandFunc)randi_64s, + (RandFunc)randi_32u, 0, 0, 0 }, { - (RandFunc)randBits_8u, (RandFunc)randBits_8s, (RandFunc)randBits_16u, (RandFunc)randBits_16s, - (RandFunc)randBits_32s, 0, 0, 0 + (RandFunc)randBits_8u, (RandFunc)randBits_8s, (RandFunc)randBits_16u, + (RandFunc)randBits_16s, (RandFunc)randBits_32s, 0, 0, 0, 0, + (RandFunc)randBits_8b, (RandFunc)randBits_64u, (RandFunc)randBits_64s, + (RandFunc)randBits_32u, 0, 0, 0 } }; @@ -309,90 +376,153 @@ double RNG::gaussian(double sigma) return temp*sigma; } - template static void -randnScale_( const float* src, T* dst, int len, int cn, const PT* mean, const PT* stddev, bool stdmtx ) +randnScale_(float* src, T* dst, int len, int cn, + const PT* mean, const PT* stddev, int flags ) { + bool stdmtx = (flags & RNG_FLAG_STDMTX) != 0; int i, j, k; - if( !stdmtx ) + if( !stdmtx || cn == 1 ) { if( cn == 1 ) { - PT b = mean[0], a = stddev[0]; + PT a = stddev[0], b = mean[0]; for( i = 0; i < len; i++ ) dst[i] = saturate_cast(src[i]*a + b); } else { - for( i = 0; i < len; i++, src += cn, dst += cn ) - for( k = 0; k < cn; k++ ) - dst[k] = saturate_cast(src[k]*stddev[k] + mean[k]); + len *= cn; + cn--; + for( i = k = 0; i < len; i++ ) { + dst[i] = saturate_cast(src[i]*stddev[k] + mean[k]); + k = CN_NEXT(k); + } } } else { - for( i = 0; i < len; i++, src += cn, dst += cn ) + len *= cn; + cn--; + for( i = j = 0; i < len; i++ ) { - for( j = 0; j < cn; j++ ) - { - PT s = mean[j]; - for( k = 0; k < cn; k++ ) - s += src[k]*stddev[j*cn + k]; - dst[j] = saturate_cast(s); - } + PT s = mean[j]; + int i0 = i - j; + for( k = 0; k <= cn; k++ ) + s += src[i0 + k]*stddev[j*(cn+1) + k]; + dst[i] = saturate_cast(s); + j = CN_NEXT(j); } } } -static void randnScale_8u( const float* src, uchar* dst, int len, int cn, - const float* mean, const float* stddev, bool stdmtx ) -{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); } +// special version for 16f, 16bf and 32f +static void +randnScale_16_or_32f(float* fbuf, float* dst, int len, int cn, + const float* mean, const float* stddev, int flags) +{ + bool stdmtx = (flags & RNG_FLAG_STDMTX) != 0; + int depth = CV_MAT_DEPTH(flags); + float* arr = depth == CV_16F || depth == CV_16BF ? fbuf : dst; + int i, j, k; -static void randnScale_8s( const float* src, schar* dst, int len, int cn, - const float* mean, const float* stddev, bool stdmtx ) -{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); } + if( !stdmtx || cn == 1 ) + { + if( cn == 1 ) + { + float a = stddev[0], b = mean[0]; + for( i = 0; i < len; i++ ) + arr[i] = fbuf[i]*a + b; + } + else + { + len *= cn; + cn--; + for( i = k = 0; i < len; i++ ) { + arr[i] = fbuf[i]*stddev[k] + mean[k]; + k = CN_NEXT(k); + } + } + } + else if( depth == CV_32F ) + { + len *= cn; + cn--; + for( i = j = 0; i < len; i++ ) + { + float s = mean[j]; + int i0 = i - j; + for( k = 0; k <= cn; k++ ) + s += fbuf[i0 + k]*stddev[j*(cn+1) + k]; + dst[i] = s; + j = CN_NEXT(j); + } + } + else + { + float elembuf[CV_CN_MAX]; + len *= cn; + for( i = 0; i < len; i += cn ) + { + // since we process fbuf in-place, + // we need to copy each cn-channel element + // prior to matrix multiplication + for (j = 0; j < cn; j++) + elembuf[j] = fbuf[i + j]; + for (j = 0; j < cn; j++) { + float s = mean[j]; + for( k = 0; k < cn; k++ ) + s += elembuf[k]*stddev[j*cn + k]; + fbuf[i + j] = s; + } + } + } + if (depth == CV_16F) + hal::cvt32f16f(fbuf, (float16_t*)dst, len); + else if (depth == CV_16BF) + hal::cvt32f16bf(fbuf, (bfloat16_t*)dst, len); +} -static void randnScale_16u( const float* src, ushort* dst, int len, int cn, - const float* mean, const float* stddev, bool stdmtx ) -{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); } +#define DEF_RANDNSCALE_FUNC(suffix, T, PT) \ +static void randnScale_##suffix( float* src, T* dst, int len, int cn, \ + const PT* mean, const PT* stddev, int flags ) \ +{ randnScale_(src, dst, len, cn, mean, stddev, flags); } -static void randnScale_16s( const float* src, short* dst, int len, int cn, - const float* mean, const float* stddev, bool stdmtx ) -{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); } +DEF_RANDNSCALE_FUNC(8u, uchar, float) +DEF_RANDNSCALE_FUNC(8b, bool, float) +DEF_RANDNSCALE_FUNC(8s, schar, float) +DEF_RANDNSCALE_FUNC(16u, ushort, float) +DEF_RANDNSCALE_FUNC(16s, short, float) +DEF_RANDNSCALE_FUNC(32u, unsigned, float) +DEF_RANDNSCALE_FUNC(32s, int, float) +DEF_RANDNSCALE_FUNC(64u, uint64_t, double) +DEF_RANDNSCALE_FUNC(64s, int64_t, double) +DEF_RANDNSCALE_FUNC(64f, double, double) -static void randnScale_32s( const float* src, int* dst, int len, int cn, - const float* mean, const float* stddev, bool stdmtx ) -{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); } - -static void randnScale_32f( const float* src, float* dst, int len, int cn, - const float* mean, const float* stddev, bool stdmtx ) -{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); } - -static void randnScale_64f( const float* src, double* dst, int len, int cn, - const double* mean, const double* stddev, bool stdmtx ) -{ randnScale_(src, dst, len, cn, mean, stddev, stdmtx); } - -typedef void (*RandnScaleFunc)(const float* src, uchar* dst, int len, int cn, - const uchar*, const uchar*, bool); +typedef void (*RandnScaleFunc)(float* src, void* dst, int len, int cn, + const void* mean, const void* stddev, int flags); static RandnScaleFunc randnScaleTab[] = { (RandnScaleFunc)randnScale_8u, (RandnScaleFunc)randnScale_8s, (RandnScaleFunc)randnScale_16u, - (RandnScaleFunc)randnScale_16s, (RandnScaleFunc)randnScale_32s, (RandnScaleFunc)randnScale_32f, - (RandnScaleFunc)randnScale_64f, 0 + (RandnScaleFunc)randnScale_16s, (RandnScaleFunc)randnScale_32s, (RandnScaleFunc)randnScale_16_or_32f, + (RandnScaleFunc)randnScale_64f, (RandnScaleFunc)randnScale_16_or_32f, (RandnScaleFunc)randnScale_16_or_32f, + (RandnScaleFunc)randnScale_8b, (RandnScaleFunc)randnScale_64u, (RandnScaleFunc)randnScale_64s, + (RandnScaleFunc)randnScale_32u, 0, 0, 0 }; void RNG::fill( InputOutputArray _mat, int disttype, - InputArray _param1arg, InputArray _param2arg, bool saturateRange ) + InputArray _param1arg, InputArray _param2arg, + bool saturateRange ) { CV_Assert(!_mat.empty()); Mat mat = _mat.getMat(), _param1 = _param1arg.getMat(), _param2 = _param2arg.getMat(); - int depth = mat.depth(), cn = mat.channels(); + int j, depth = mat.depth(), cn = mat.channels(); + int esz1 = CV_ELEM_SIZE(depth); AutoBuffer _parambuf; - int j, k; bool fast_int_mode = false; - bool smallFlag = true; + bool small_flag = false; RandFunc func = 0; RandnScaleFunc scaleFunc = 0; @@ -405,10 +535,7 @@ void RNG::fill( InputOutputArray _mat, int disttype, (_param1.size() == Size(1, 4) && _param1.type() == CV_64F && cn <= 4))) || (_param2.rows == cn && _param2.cols == cn && disttype == NORMAL))); - Vec2i* ip = 0; - Vec2d* dp = 0; - Vec2f* fp = 0; - DivStruct* ds = 0; + const void* uni_param = 0; uchar* mean = 0; uchar* stddev = 0; bool stdmtx = false; @@ -417,47 +544,48 @@ void RNG::fill( InputOutputArray _mat, int disttype, if( disttype == UNIFORM ) { - _parambuf.allocate(cn*8 + n1 + n2); + _parambuf.allocate((sizeof(DivStruct)+sizeof(double)-1)/sizeof(double) + cn*2 + n1 + n2); double* parambuf = _parambuf.data(); double* p1 = _param1.ptr(); double* p2 = _param2.ptr(); if( !_param1.isContinuous() || _param1.type() != CV_64F || n1 != cn ) { - Mat tmp(_param1.size(), CV_64F, parambuf); - _param1.convertTo(tmp, CV_64F); p1 = parambuf; - if( n1 < cn ) - for( j = n1; j < cn; j++ ) - p1[j] = p1[j-n1]; + Mat tmp(_param1.size(), CV_64F, p1); + _param1.convertTo(tmp, CV_64F); + for( j = n1; j < cn; j++ ) + p1[j] = p1[j-n1]; } if( !_param2.isContinuous() || _param2.type() != CV_64F || n2 != cn ) { - Mat tmp(_param2.size(), CV_64F, parambuf + cn); - _param2.convertTo(tmp, CV_64F); p2 = parambuf + cn; - if( n2 < cn ) - for( j = n2; j < cn; j++ ) - p2[j] = p2[j-n2]; + Mat tmp(_param2.size(), CV_64F, p2); + _param2.convertTo(tmp, CV_64F); + for( j = n2; j < cn; j++ ) + p2[j] = p2[j-n2]; } - if( depth <= CV_32S ) + if( CV_IS_INT_TYPE(depth) ) { - ip = (Vec2i*)(parambuf + cn*2); + Vec2l* ip = (Vec2l*)(parambuf + cn*2); for( j = 0, fast_int_mode = true; j < cn; j++ ) { double a = std::min(p1[j], p2[j]); double b = std::max(p1[j], p2[j]); if( saturateRange ) { - a = std::max(a, depth == CV_8U || depth == CV_16U ? 0. : - depth == CV_8S ? -128. : depth == CV_16S ? -32768. : (double)INT_MIN); - b = std::min(b, depth == CV_8U ? 256. : depth == CV_16U ? 65536. : - depth == CV_8S ? 128. : depth == CV_16S ? 32768. : (double)INT_MAX); + a = std::max(a, depth == CV_8U || depth == CV_16U || depth == CV_32U || + depth == CV_64U || depth == CV_Bool ? 0. : + depth == CV_8S ? -128. : depth == CV_16S ? -32768. : + depth == CV_32S ? (double)INT_MIN : (double)INT64_MIN); + b = std::min(b, depth == CV_8U ? 256. : depth == CV_Bool ? 2. : depth == CV_16U ? 65536. : + depth == CV_8S ? 128. : depth == CV_16S ? 32768. : depth == CV_32U ? (double)UINT_MAX : + depth == CV_32S ? (double)INT_MAX : (double)INT64_MAX); } - ip[j][1] = cvCeil(a); - int idiff = ip[j][0] = cvFloor(b) - ip[j][1] - 1; + ip[j][1] = (int64_t)ceil(a); + int64_t idiff = ip[j][0] = (int64_t)floor(b) - ip[j][1] - 1; if (idiff < 0) { idiff = 0; @@ -467,30 +595,41 @@ void RNG::fill( InputOutputArray _mat, int disttype, fast_int_mode = fast_int_mode && diff <= 4294967296. && (idiff & (idiff+1)) == 0; if( fast_int_mode ) - smallFlag = smallFlag && (idiff <= 255); + small_flag = idiff <= 255; else { - if( diff > INT_MAX ) - ip[j][0] = INT_MAX; - if( a < INT_MIN/2 ) - ip[j][1] = INT_MIN/2; + int64_t minval = INT32_MIN/2, maxval = INT32_MAX; + if (depth == CV_64S || depth == CV_64U) + { + minval = INT64_MIN/2; + maxval = INT64_MAX; + } + if( diff > (double)maxval ) + ip[j][0] = maxval; + if( a < (double)minval ) + ip[j][1] = minval; } } + uni_param = ip; if( !fast_int_mode ) { - ds = (DivStruct*)(ip + cn); + DivStruct* ds = (DivStruct*)(ip + cn); for( j = 0; j < cn; j++ ) { ds[j].delta = ip[j][1]; - unsigned d = ds[j].d = (unsigned)(ip[j][0]+1); - int l = 0; - while(((uint64)1 << l) < d) - l++; - ds[j].M = (unsigned)(((uint64)1 << 32)*(((uint64)1 << l) - d)/d) + 1; - ds[j].sh1 = std::min(l, 1); - ds[j].sh2 = std::max(l - 1, 0); + ds[j].diff = ip[j][0]; + if (depth != CV_64U && depth != CV_64S) { + unsigned d = ds[j].d = (unsigned)(ip[j][0]+1); + int l = 0; + while(((uint64)1 << l) < d) + l++; + ds[j].M = (unsigned)(((uint64)1 << 32)*(((uint64)1 << l) - d)/d) + 1; + ds[j].sh1 = std::min(l, 1); + ds[j].sh2 = std::max(l - 1, 0); + } } + uni_param = ds; } func = randTab[fast_int_mode ? 1 : 0][depth]; @@ -508,21 +647,23 @@ void RNG::fill( InputOutputArray _mat, int disttype, // dparam[0][i]*X + dparam[1][i] if( depth != CV_64F ) { - fp = (Vec2f*)(parambuf + cn*2); + Vec2f* fp = (Vec2f*)(parambuf + cn*2); for( j = 0; j < cn; j++ ) { fp[j][0] = (float)(std::min(maxdiff, p2[j] - p1[j])*scale); fp[j][1] = (float)((p2[j] + p1[j])*0.5); } + uni_param = fp; } else { - dp = (Vec2d*)(parambuf + cn*2); + Vec2d* dp = (Vec2d*)(parambuf + cn*2); for( j = 0; j < cn; j++ ) { dp[j][0] = std::min(DBL_MAX, p2[j] - p1[j])*scale; dp[j][1] = ((p2[j] + p1[j])*0.5); } + uni_param = dp; } func = randTab[0][depth]; @@ -534,8 +675,7 @@ void RNG::fill( InputOutputArray _mat, int disttype, _parambuf.allocate(MAX(n1, cn) + MAX(n2, cn)); double* parambuf = _parambuf.data(); - int ptype = depth == CV_64F ? CV_64F : CV_32F; - int esz = (int)CV_ELEM_SIZE(ptype); + int ptype = esz1 == 8 ? CV_64F : CV_32F; if( _param1.isContinuous() && _param1.type() == ptype && n1 >= cn) mean = _param1.ptr(); @@ -547,8 +687,8 @@ void RNG::fill( InputOutputArray _mat, int disttype, } if( n1 < cn ) - for( j = n1*esz; j < cn*esz; j++ ) - mean[j] = mean[j - n1*esz]; + for( j = n1*esz1; j < cn*esz1; j++ ) + mean[j] = mean[j - n1*esz1]; if( _param2.isContinuous() && _param2.type() == ptype && n2 >= cn) stddev = _param2.ptr(); @@ -560,8 +700,8 @@ void RNG::fill( InputOutputArray _mat, int disttype, } if( n2 < cn ) - for( j = n2*esz; j < cn*esz; j++ ) - stddev[j] = stddev[j - n2*esz]; + for( j = n2*esz1; j < cn*esz1; j++ ) + stddev[j] = stddev[j - n2*esz1]; stdmtx = _param2.rows == cn && _param2.cols == cn; scaleFunc = randnScaleTab[depth]; @@ -571,59 +711,18 @@ void RNG::fill( InputOutputArray _mat, int disttype, CV_Error( CV_StsBadArg, "Unknown distribution type" ); const Mat* arrays[] = {&mat, 0}; - uchar* ptr; + uchar* ptr = 0; NAryMatIterator it(arrays, &ptr, 1); - int total = (int)it.size, blockSize = std::min((BLOCK_SIZE + cn - 1)/cn, total); - size_t esz = mat.elemSize(); - AutoBuffer buf; - uchar* param = 0; - float* nbuf = 0; - float* tmpbuf = 0; + float fbuf[BLOCK_SIZE + CV_CN_MAX]; + int total = (int)it.size; + int blockSize = std::min((BLOCK_SIZE + cn - 1)/cn, total); + size_t esz = (size_t)esz1*cn; + int flags = mat.type(); if( disttype == UNIFORM ) - { - buf.allocate(blockSize*cn*4); - param = (uchar*)(double*)buf.data(); - - if( depth <= CV_32S ) - { - if( !fast_int_mode ) - { - DivStruct* p = (DivStruct*)param; - for( j = 0; j < blockSize*cn; j += cn ) - for( k = 0; k < cn; k++ ) - p[j + k] = ds[k]; - } - else - { - Vec2i* p = (Vec2i*)param; - for( j = 0; j < blockSize*cn; j += cn ) - for( k = 0; k < cn; k++ ) - p[j + k] = ip[k]; - } - } - else if( depth != CV_64F ) - { - Vec2f* p = (Vec2f*)param; - for( j = 0; j < blockSize*cn; j += cn ) - for( k = 0; k < cn; k++ ) - p[j + k] = fp[k]; - if( depth == CV_16F ) - tmpbuf = (float*)p + blockSize*cn*2; - } - else - { - Vec2d* p = (Vec2d*)param; - for( j = 0; j < blockSize*cn; j += cn ) - for( k = 0; k < cn; k++ ) - p[j + k] = dp[k]; - } - } + flags |= (small_flag ? (int)RNG_FLAG_SMALL : 0); else - { - buf.allocate((blockSize*cn+1)/2); - nbuf = (float*)(double*)buf.data(); - } + flags |= (stdmtx ? (int)RNG_FLAG_STDMTX : 0); for( size_t i = 0; i < it.nplanes; i++, ++it ) { @@ -631,14 +730,13 @@ void RNG::fill( InputOutputArray _mat, int disttype, { int len = std::min(total - j, blockSize); - if( disttype == CV_RAND_UNI ) - func( ptr, len*cn, &state, param, tmpbuf, smallFlag ); + if( disttype == UNIFORM ) + func(ptr + j*esz, len, cn, &state, uni_param, fbuf, flags); else { - randn_0_1_32f(nbuf, len*cn, &state); - scaleFunc(nbuf, ptr, len, cn, mean, stddev, stdmtx); + randn_0_1_32f(fbuf, len*cn, &state); + scaleFunc(fbuf, ptr + j*esz, len, cn, mean, stddev, flags); } - ptr += len*esz; } } } diff --git a/modules/core/src/split.dispatch.cpp b/modules/core/src/split.dispatch.cpp index fc5e073497..42a07ed2e3 100644 --- a/modules/core/src/split.dispatch.cpp +++ b/modules/core/src/split.dispatch.cpp @@ -53,12 +53,15 @@ typedef void (*SplitFunc)(const uchar* src, uchar** dst, int len, int cn); static SplitFunc getSplitFunc(int depth) { - static SplitFunc splitTab[] = + static SplitFunc splitTab[CV_DEPTH_MAX] = { (SplitFunc)GET_OPTIMIZED(cv::hal::split8u), (SplitFunc)GET_OPTIMIZED(cv::hal::split8u), (SplitFunc)GET_OPTIMIZED(cv::hal::split16u), (SplitFunc)GET_OPTIMIZED(cv::hal::split16u), (SplitFunc)GET_OPTIMIZED(cv::hal::split32s), (SplitFunc)GET_OPTIMIZED(cv::hal::split32s), - (SplitFunc)GET_OPTIMIZED(cv::hal::split64s), (SplitFunc)GET_OPTIMIZED(cv::hal::split16u) + (SplitFunc)GET_OPTIMIZED(cv::hal::split64s), (SplitFunc)GET_OPTIMIZED(cv::hal::split16u), + (SplitFunc)GET_OPTIMIZED(cv::hal::split16u), (SplitFunc)GET_OPTIMIZED(cv::hal::split8u), + (SplitFunc)GET_OPTIMIZED(cv::hal::split64s), (SplitFunc)GET_OPTIMIZED(cv::hal::split64s), + (SplitFunc)GET_OPTIMIZED(cv::hal::split32s), 0, 0, 0 }; return splitTab[depth]; diff --git a/modules/core/src/sum.simd.hpp b/modules/core/src/sum.simd.hpp index 2232013b24..045f40ebed 100644 --- a/modules/core/src/sum.simd.hpp +++ b/modules/core/src/sum.simd.hpp @@ -434,7 +434,7 @@ static int sum64f( const double* src, const uchar* mask, double* dst, int len, i SumFunc getSumFunc(int depth) { - static SumFunc sumTab[] = + static SumFunc sumTab[CV_DEPTH_MAX] = { (SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s, (SumFunc)sum16u, (SumFunc)sum16s, diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index ea9cda56be..c6756f6502 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -40,7 +40,11 @@ struct BaseElemWiseOp ninputs > 1 ? ARITHM_MAX_CHANNELS : 4); } - virtual double getMaxErr(int depth) { return depth < CV_32F ? 1 : depth == CV_32F ? 1e-5 : 1e-12; } + virtual double getMaxErr(int depth) + { + return depth < CV_32F || depth == CV_32U || depth == CV_64U || depth == CV_64S ? 1 : + depth == CV_16F || depth == CV_16BF ? 1e-2 : depth == CV_32F ? 1e-5 : 1e-12; + } virtual void generateScalars(int depth, RNG& rng) { const double m = 3.; @@ -93,11 +97,31 @@ struct BaseElemWiseOp int context; }; +static const _OutputArray::DepthMask baseArithmTypeMask = + _OutputArray::DepthMask( + _OutputArray::DEPTH_MASK_8U | + _OutputArray::DEPTH_MASK_16U | + _OutputArray::DEPTH_MASK_16S | + _OutputArray::DEPTH_MASK_32S | + _OutputArray::DEPTH_MASK_32F | + _OutputArray::DEPTH_MASK_64F); -struct BaseAddOp : public BaseElemWiseOp +struct BaseArithmOp : public BaseElemWiseOp +{ + BaseArithmOp(int _ninputs, int _flags, double _alpha, double _beta, Scalar _gamma=Scalar::all(0)) + : BaseElemWiseOp(_ninputs, _flags, _alpha, _beta, _gamma) {} + + int getRandomType(RNG& rng) + { + return cvtest::randomType(rng, baseArithmTypeMask, 1, + ninputs > 1 ? ARITHM_MAX_CHANNELS : 4); + } +}; + +struct BaseAddOp : public BaseArithmOp { BaseAddOp(int _ninputs, int _flags, double _alpha, double _beta, Scalar _gamma=Scalar::all(0)) - : BaseElemWiseOp(_ninputs, _flags, _alpha, _beta, _gamma) {} + : BaseArithmOp(_ninputs, _flags, _alpha, _beta, _gamma) {} void refop(const vector& src, Mat& dst, const Mat& mask) { @@ -192,9 +216,9 @@ struct AddWeightedOp : public BaseAddOp } }; -struct MulOp : public BaseElemWiseOp +struct MulOp : public BaseArithmOp { - MulOp() : BaseElemWiseOp(2, FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} + MulOp() : BaseArithmOp(2, FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} void getValueRange(int depth, double& minval, double& maxval) { minval = depth < CV_32S ? cvtest::getMinVal(depth) : depth == CV_32S ? -1000000 : -1000.; @@ -216,9 +240,9 @@ struct MulOp : public BaseElemWiseOp } }; -struct DivOp : public BaseElemWiseOp +struct DivOp : public BaseArithmOp { - DivOp() : BaseElemWiseOp(2, FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} + DivOp() : BaseArithmOp(2, FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} void op(const vector& src, Mat& dst, const Mat&) { cv::divide(src[0], src[1], dst, alpha); @@ -233,9 +257,9 @@ struct DivOp : public BaseElemWiseOp } }; -struct RecipOp : public BaseElemWiseOp +struct RecipOp : public BaseArithmOp { - RecipOp() : BaseElemWiseOp(1, FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} + RecipOp() : BaseArithmOp(1, FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} void op(const vector& src, Mat& dst, const Mat&) { cv::divide(alpha, src[0], dst); @@ -339,9 +363,9 @@ struct LogicSOp : public BaseElemWiseOp char opcode; }; -struct MinOp : public BaseElemWiseOp +struct MinOp : public BaseArithmOp { - MinOp() : BaseElemWiseOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} + MinOp() : BaseArithmOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} void op(const vector& src, Mat& dst, const Mat&) { cv::min(src[0], src[1], dst); @@ -356,9 +380,9 @@ struct MinOp : public BaseElemWiseOp } }; -struct MaxOp : public BaseElemWiseOp +struct MaxOp : public BaseArithmOp { - MaxOp() : BaseElemWiseOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} + MaxOp() : BaseArithmOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} void op(const vector& src, Mat& dst, const Mat&) { cv::max(src[0], src[1], dst); @@ -373,9 +397,9 @@ struct MaxOp : public BaseElemWiseOp } }; -struct MinSOp : public BaseElemWiseOp +struct MinSOp : public BaseArithmOp { - MinSOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+REAL_GAMMA, 1, 1, Scalar::all(0)) {} + MinSOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+REAL_GAMMA, 1, 1, Scalar::all(0)) {} void op(const vector& src, Mat& dst, const Mat&) { cv::min(src[0], gamma[0], dst); @@ -390,9 +414,9 @@ struct MinSOp : public BaseElemWiseOp } }; -struct MaxSOp : public BaseElemWiseOp +struct MaxSOp : public BaseArithmOp { - MaxSOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+REAL_GAMMA, 1, 1, Scalar::all(0)) {} + MaxSOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+REAL_GAMMA, 1, 1, Scalar::all(0)) {} void op(const vector& src, Mat& dst, const Mat&) { cv::max(src[0], gamma[0], dst); @@ -407,9 +431,9 @@ struct MaxSOp : public BaseElemWiseOp } }; -struct CmpOp : public BaseElemWiseOp +struct CmpOp : public BaseArithmOp { - CmpOp() : BaseElemWiseOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) { cmpop = 0; } + CmpOp() : BaseArithmOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) { cmpop = 0; } void generateScalars(int depth, RNG& rng) { BaseElemWiseOp::generateScalars(depth, rng); @@ -425,7 +449,7 @@ struct CmpOp : public BaseElemWiseOp } int getRandomType(RNG& rng) { - return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_BUT_8S, 1, 1); + return cvtest::randomType(rng, baseArithmTypeMask, 1, 1); } double getMaxErr(int) @@ -435,9 +459,9 @@ struct CmpOp : public BaseElemWiseOp int cmpop; }; -struct CmpSOp : public BaseElemWiseOp +struct CmpSOp : public BaseArithmOp { - CmpSOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+REAL_GAMMA, 1, 1, Scalar::all(0)) { cmpop = 0; } + CmpSOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+REAL_GAMMA, 1, 1, Scalar::all(0)) { cmpop = 0; } void generateScalars(int depth, RNG& rng) { BaseElemWiseOp::generateScalars(depth, rng); @@ -455,7 +479,7 @@ struct CmpSOp : public BaseElemWiseOp } int getRandomType(RNG& rng) { - return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_BUT_8S, 1, 1); + return cvtest::randomType(rng, baseArithmTypeMask, 1, 1); } double getMaxErr(int) { @@ -478,7 +502,7 @@ struct CopyOp : public BaseElemWiseOp } int getRandomType(RNG& rng) { - return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_16F, 1, ARITHM_MAX_CHANNELS); + return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL, 1, ARITHM_MAX_CHANNELS); } double getMaxErr(int) { @@ -500,7 +524,7 @@ struct SetOp : public BaseElemWiseOp } int getRandomType(RNG& rng) { - return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_16F, 1, ARITHM_MAX_CHANNELS); + return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL, 1, ARITHM_MAX_CHANNELS); } double getMaxErr(int) { @@ -650,9 +674,9 @@ static void inRangeS(const Mat& src, const Scalar& lb, const Scalar& rb, Mat& ds } // namespace CVTEST_GUARD_SYMBOL(inRange); -struct InRangeSOp : public BaseElemWiseOp +struct InRangeSOp : public BaseArithmOp { - InRangeSOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA, 1, 1, Scalar::all(0)) {} + InRangeSOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA, 1, 1, Scalar::all(0)) {} void op(const vector& src, Mat& dst, const Mat&) { cv::inRange(src[0], gamma, gamma1, dst); @@ -680,9 +704,9 @@ struct InRangeSOp : public BaseElemWiseOp }; -struct InRangeOp : public BaseElemWiseOp +struct InRangeOp : public BaseArithmOp { - InRangeOp() : BaseElemWiseOp(3, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} + InRangeOp() : BaseArithmOp(3, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} void op(const vector& src, Mat& dst, const Mat&) { Mat lb, rb; @@ -725,7 +749,7 @@ struct ConvertScaleOp : public BaseElemWiseOp } double getMaxErr(int) { - return ddepth <= CV_32S ? 2 : ddepth < CV_64F ? 1e-3 : 1e-12; + return ddepth <= CV_32S || ddepth == CV_32U || ddepth == CV_64U || ddepth == CV_64S ? 2 : ddepth == CV_64F ? 1e-12 : ddepth == CV_Bool ? 0 : ddepth == CV_16BF ? 1e-2 : 2e-3; } void generateScalars(int depth, RNG& rng) { @@ -1018,9 +1042,9 @@ static void log(const Mat& src, Mat& dst) } // namespace -struct ExpOp : public BaseElemWiseOp +struct ExpOp : public BaseArithmOp { - ExpOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} + ExpOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} int getRandomType(RNG& rng) { return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_FLT, 1, ARITHM_MAX_CHANNELS); @@ -1045,9 +1069,9 @@ struct ExpOp : public BaseElemWiseOp }; -struct LogOp : public BaseElemWiseOp +struct LogOp : public BaseArithmOp { - LogOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} + LogOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) {} int getRandomType(RNG& rng) { return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_FLT, 1, ARITHM_MAX_CHANNELS); @@ -1129,9 +1153,9 @@ static void cartToPolar(const Mat& mx, const Mat& my, Mat& mmag, Mat& mangle, bo } // namespace -struct CartToPolarToCartOp : public BaseElemWiseOp +struct CartToPolarToCartOp : public BaseArithmOp { - CartToPolarToCartOp() : BaseElemWiseOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) + CartToPolarToCartOp() : BaseArithmOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)) { context = 3; angleInDegrees = true; @@ -1173,9 +1197,9 @@ struct CartToPolarToCartOp : public BaseElemWiseOp }; -struct MeanOp : public BaseElemWiseOp +struct MeanOp : public BaseArithmOp { - MeanOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0)) + MeanOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0)) { context = 3; }; @@ -1196,9 +1220,9 @@ struct MeanOp : public BaseElemWiseOp }; -struct SumOp : public BaseElemWiseOp +struct SumOp : public BaseArithmOp { - SumOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SCALAR_OUTPUT, 1, 1, Scalar::all(0)) + SumOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SCALAR_OUTPUT, 1, 1, Scalar::all(0)) { context = 3; }; @@ -1219,13 +1243,13 @@ struct SumOp : public BaseElemWiseOp }; -struct CountNonZeroOp : public BaseElemWiseOp +struct CountNonZeroOp : public BaseArithmOp { - CountNonZeroOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SCALAR_OUTPUT+SUPPORT_MASK, 1, 1, Scalar::all(0)) + CountNonZeroOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SCALAR_OUTPUT+SUPPORT_MASK, 1, 1, Scalar::all(0)) {} int getRandomType(RNG& rng) { - return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL, 1, 1); + return cvtest::randomType(rng, baseArithmTypeMask, 1, 1); } void op(const vector& src, Mat& dst, const Mat& mask) { @@ -1252,12 +1276,12 @@ struct CountNonZeroOp : public BaseElemWiseOp }; -struct MeanStdDevOp : public BaseElemWiseOp +struct MeanStdDevOp : public BaseArithmOp { Scalar sqmeanRef; int cn; - MeanStdDevOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0)) + MeanStdDevOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0)) { cn = 0; context = 7; @@ -1296,16 +1320,16 @@ struct MeanStdDevOp : public BaseElemWiseOp }; -struct NormOp : public BaseElemWiseOp +struct NormOp : public BaseArithmOp { - NormOp() : BaseElemWiseOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0)) + NormOp() : BaseArithmOp(2, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0)) { context = 1; normType = 0; }; int getRandomType(RNG& rng) { - int type = cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_BUT_8S, 1, 4); + int type = cvtest::randomType(rng, baseArithmTypeMask, 1, 4); for(;;) { normType = rng.uniform(1, 8); @@ -1343,15 +1367,15 @@ struct NormOp : public BaseElemWiseOp }; -struct MinMaxLocOp : public BaseElemWiseOp +struct MinMaxLocOp : public BaseArithmOp { - MinMaxLocOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0)) + MinMaxLocOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA+SUPPORT_MASK+SCALAR_OUTPUT, 1, 1, Scalar::all(0)) { context = ARITHM_MAX_NDIMS*2 + 2; }; int getRandomType(RNG& rng) { - return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_BUT_8S, 1, 1); + return cvtest::randomType(rng, baseArithmTypeMask, 1, 1); } void saveOutput(const vector& minidx, const vector& maxidx, double minval, double maxval, Mat& dst) @@ -1389,16 +1413,16 @@ struct MinMaxLocOp : public BaseElemWiseOp } }; -struct reduceArgMinMaxOp : public BaseElemWiseOp +struct reduceArgMinMaxOp : public BaseArithmOp { - reduceArgMinMaxOp() : BaseElemWiseOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)), + reduceArgMinMaxOp() : BaseArithmOp(1, FIX_ALPHA+FIX_BETA+FIX_GAMMA, 1, 1, Scalar::all(0)), isLast(false), isMax(false), axis(0) { context = ARITHM_MAX_NDIMS*2 + 2; }; int getRandomType(RNG& rng) override { - return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_BUT_8S, 1, 1); + return cvtest::randomType(rng, baseArithmTypeMask, 1, 1); } void getRandomSize(RNG& rng, vector& size) override { @@ -1568,82 +1592,82 @@ INSTANTIATE_TEST_CASE_P(Core_CartToPolarToCart, ElemWiseTest, ::testing::Values( TEST(Core_ArithmMask, uninitialized) { - RNG& rng = theRNG(); - const int MAX_DIM=3; - int sizes[MAX_DIM]; - for( int iter = 0; iter < 100; iter++ ) - { - int dims = rng.uniform(1, MAX_DIM+1); - int depth = rng.uniform(CV_8U, CV_64F+1); - int cn = rng.uniform(1, 6); - int type = CV_MAKETYPE(depth, cn); - int op = rng.uniform(0, depth < CV_32F ? 5 : 2); // don't run binary operations between floating-point values - int depth1 = op <= 1 ? CV_64F : depth; - for (int k = 0; k < MAX_DIM; k++) - { - sizes[k] = k < dims ? rng.uniform(1, 30) : 0; - } - SCOPED_TRACE(cv::format("iter=%d dims=%d depth=%d cn=%d type=%d op=%d depth1=%d dims=[%d; %d; %d]", - iter, dims, depth, cn, type, op, depth1, sizes[0], sizes[1], sizes[2])); + RNG& rng = theRNG(); + const int MAX_DIM=3; + int sizes[MAX_DIM]; + for( int iter = 0; iter < 100; iter++ ) + { + int dims = rng.uniform(1, MAX_DIM+1); + int depth = rng.uniform(CV_8U, CV_64F+1); + int cn = rng.uniform(1, 6); + int type = CV_MAKETYPE(depth, cn); + int op = rng.uniform(0, depth < CV_32F ? 5 : 2); // don't run binary operations between floating-point values + int depth1 = op <= 1 ? CV_64F : depth; + for (int k = 0; k < MAX_DIM; k++) + { + sizes[k] = k < dims ? rng.uniform(1, 30) : 0; + } + SCOPED_TRACE(cv::format("iter=%d dims=%d depth=%d cn=%d type=%d op=%d depth1=%d dims=[%d; %d; %d]", + iter, dims, depth, cn, type, op, depth1, sizes[0], sizes[1], sizes[2])); - Mat a(dims, sizes, type), a1; - Mat b(dims, sizes, type), b1; - Mat mask(dims, sizes, CV_8U); - Mat mask1; - Mat c, d; + Mat a(dims, sizes, type), a1; + Mat b(dims, sizes, type), b1; + Mat mask(dims, sizes, CV_8U); + Mat mask1; + Mat c, d; - rng.fill(a, RNG::UNIFORM, 0, 100); - rng.fill(b, RNG::UNIFORM, 0, 100); + rng.fill(a, RNG::UNIFORM, 0, 100); + rng.fill(b, RNG::UNIFORM, 0, 100); - // [-2,2) range means that the each generated random number - // will be one of -2, -1, 0, 1. Saturated to [0,255], it will become - // 0, 0, 0, 1 => the mask will be filled by ~25%. - rng.fill(mask, RNG::UNIFORM, -2, 2); + // [-2,2) range means that the each generated random number + // will be one of -2, -1, 0, 1. Saturated to [0,255], it will become + // 0, 0, 0, 1 => the mask will be filled by ~25%. + rng.fill(mask, RNG::UNIFORM, -2, 2); - a.convertTo(a1, depth1); - b.convertTo(b1, depth1); - // invert the mask - cv::compare(mask, 0, mask1, CMP_EQ); - a1.setTo(0, mask1); - b1.setTo(0, mask1); + a.convertTo(a1, depth1); + b.convertTo(b1, depth1); + // invert the mask + cv::compare(mask, 0, mask1, CMP_EQ); + a1.setTo(0, mask1); + b1.setTo(0, mask1); - if( op == 0 ) - { - cv::add(a, b, c, mask); - cv::add(a1, b1, d); - } - else if( op == 1 ) - { - cv::subtract(a, b, c, mask); - cv::subtract(a1, b1, d); - } - else if( op == 2 ) - { - cv::bitwise_and(a, b, c, mask); - cv::bitwise_and(a1, b1, d); - } - else if( op == 3 ) - { - cv::bitwise_or(a, b, c, mask); - cv::bitwise_or(a1, b1, d); - } - else if( op == 4 ) - { - cv::bitwise_xor(a, b, c, mask); - cv::bitwise_xor(a1, b1, d); - } - Mat d1; - d.convertTo(d1, depth); - EXPECT_LE(cvtest::norm(c, d1, CV_C), DBL_EPSILON); - } + if( op == 0 ) + { + cv::add(a, b, c, mask); + cv::add(a1, b1, d); + } + else if( op == 1 ) + { + cv::subtract(a, b, c, mask); + cv::subtract(a1, b1, d); + } + else if( op == 2 ) + { + cv::bitwise_and(a, b, c, mask); + cv::bitwise_and(a1, b1, d); + } + else if( op == 3 ) + { + cv::bitwise_or(a, b, c, mask); + cv::bitwise_or(a1, b1, d); + } + else if( op == 4 ) + { + cv::bitwise_xor(a, b, c, mask); + cv::bitwise_xor(a1, b1, d); + } + Mat d1; + d.convertTo(d1, depth); + EXPECT_LE(cvtest::norm(c, d1, CV_C), DBL_EPSILON); + } - Mat_ tmpSrc(100,100); - tmpSrc = 124; - Mat_ tmpMask(100,100); - tmpMask = 255; - Mat_ tmpDst(100,100); - tmpDst = 2; - tmpSrc.copyTo(tmpDst,tmpMask); + Mat_ tmpSrc(100,100); + tmpSrc = 124; + Mat_ tmpMask(100,100); + tmpMask = 255; + Mat_ tmpDst(100,100); + tmpDst = 2; + tmpSrc.copyTo(tmpDst,tmpMask); } TEST(Multiply, FloatingPointRounding) @@ -2273,35 +2297,35 @@ TEST(Core_minMaxIdx, regression_9207_2) const int rows = 13; const int cols = 15; uchar mask_[rows*cols] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, - 0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, - 255, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 255, - 255, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 255, 255, - 255, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 255, 0, - 255, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 0, 255, 0, - 255, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 255, 255, 0, - 255, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 255, 0, - 255, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 255, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, + 0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, + 255, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 255, + 255, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 255, 255, + 255, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 255, 0, + 255, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 0, 255, 0, + 255, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 255, 255, 0, + 255, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 255, 0, + 255, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 255, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; uchar src_[15*13] = { - 5, 5, 5, 5, 5, 6, 5, 2, 0, 4, 6, 6, 4, 1, 0, - 6, 5, 4, 4, 5, 6, 6, 5, 2, 0, 4, 6, 5, 2, 0, - 3, 2, 1, 1, 2, 4, 6, 6, 4, 2, 3, 4, 4, 2, 0, - 1, 0, 0, 0, 0, 1, 4, 5, 4, 4, 4, 4, 3, 2, 0, - 0, 0, 0, 0, 0, 0, 2, 3, 4, 4, 4, 3, 2, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 2, 3, 4, 3, 2, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 3, 3, 1, 0, 1, - 0, 0, 0, 0, 0, 0, 1, 4, 5, 6, 5, 4, 3, 2, 0, - 1, 0, 0, 0, 0, 0, 3, 5, 5, 4, 3, 4, 4, 3, 0, - 2, 0, 0, 0, 0, 2, 5, 6, 5, 2, 2, 5, 4, 3, 0 -}; + 5, 5, 5, 5, 5, 6, 5, 2, 0, 4, 6, 6, 4, 1, 0, + 6, 5, 4, 4, 5, 6, 6, 5, 2, 0, 4, 6, 5, 2, 0, + 3, 2, 1, 1, 2, 4, 6, 6, 4, 2, 3, 4, 4, 2, 0, + 1, 0, 0, 0, 0, 1, 4, 5, 4, 4, 4, 4, 3, 2, 0, + 0, 0, 0, 0, 0, 0, 2, 3, 4, 4, 4, 3, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 2, 3, 4, 3, 2, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 1, 2, 4, 3, 3, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 1, 4, 5, 6, 5, 4, 3, 2, 0, + 1, 0, 0, 0, 0, 0, 3, 5, 5, 4, 3, 4, 4, 3, 0, + 2, 0, 0, 0, 0, 2, 5, 6, 5, 2, 2, 5, 4, 3, 0 + }; Mat mask(Size(cols, rows), CV_8UC1, mask_); Mat src(Size(cols, rows), CV_8UC1, src_); double minVal = -0.0, maxVal = -0.0; @@ -2715,7 +2739,6 @@ TEST(Core_CartPolar, inplace) EXPECT_THROW(cv::polarToCart(uA[0], uA[1], uA[1], uA[0]), cv::Exception); EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception); EXPECT_THROW(cv::cartToPolar(uA[0], uA[1], uA[0], uA[1]), cv::Exception); - } }} // namespace diff --git a/modules/core/test/test_dxt.cpp b/modules/core/test/test_dxt.cpp index 05d1f3062c..a1d40e0ac9 100644 --- a/modules/core/test/test_dxt.cpp +++ b/modules/core/test/test_dxt.cpp @@ -589,7 +589,7 @@ void CxCore_DXTBaseTest::get_test_array_types_and_sizes( int test_case_idx, { if( cn == 1 ) { - types[OUTPUT][0] = depth + 8; + types[OUTPUT][0] = CV_MAKETYPE(depth, 2); sizes[TEMP][0] = size; } sizes[INPUT][0] = sizes[INPUT][1] = size; @@ -597,7 +597,7 @@ void CxCore_DXTBaseTest::get_test_array_types_and_sizes( int test_case_idx, } else if( /*(cn == 2 && (bits&32)) ||*/ (cn == 1 && allow_complex) ) { - types[TEMP][0] = depth + 8; // CV_??FC2 + types[TEMP][0] = CV_MAKETYPE(depth, 2); // CV_??FC2 sizes[TEMP][0] = size; size = cvSize(size.width/2+1, size.height); @@ -614,7 +614,7 @@ void CxCore_DXTBaseTest::get_test_array_types_and_sizes( int test_case_idx, else { if( allow_complex ) - types[OUTPUT][0] = depth + 8; + types[OUTPUT][0] = CV_MAKETYPE(depth, 2); if( cn == 2 ) { diff --git a/modules/core/test/test_io.cpp b/modules/core/test/test_io.cpp index 5e1f6d7a8e..4def1a0a0a 100644 --- a/modules/core/test/test_io.cpp +++ b/modules/core/test/test_io.cpp @@ -680,7 +680,9 @@ static void test_filestorage_basic(int write_flags, const char* suffix_name, boo reference.read(&reference_data[0], ref_sz); reference.close(); - EXPECT_EQ(reference_data, test_data); + if (useMemory) { + EXPECT_EQ(reference_data, test_data); + } } std::cout << "Storage size: " << sz << std::endl; EXPECT_LE(sz, (size_t)6000); @@ -736,16 +738,14 @@ static void test_filestorage_basic(int write_flags, const char* suffix_name, boo { for (int j = 0; j < _2d_out.cols; ++j) { - EXPECT_EQ(_2d_in.at(i, j), _2d_out.at(i, j)); - if (::testing::Test::HasNonfatalFailure()) - { + if (_2d_in.at(i, j) != _2d_out.at(i, j)) { + EXPECT_EQ(_2d_in.at(i, j), _2d_out.at(i, j)); printf("i = %d, j = %d\n", i, j); - errors++; - } - if (errors >= 3) - { - i = _2d_out.rows; - break; + if (++errors >= 3) + { + i = _2d_out.rows; + break; + } } } } @@ -760,7 +760,10 @@ static void test_filestorage_basic(int write_flags, const char* suffix_name, boo ASSERT_EQ(_rd_in.cols , _rd_out.cols); ASSERT_EQ(_rd_in.dims , _rd_out.dims); ASSERT_EQ(_rd_in.depth(), _rd_out.depth()); - EXPECT_EQ(0, cv::norm(_rd_in, _rd_out, NORM_INF)); + + if (useMemory) { + EXPECT_EQ(0, cv::norm(_rd_in, _rd_out, NORM_INF)); + } } } @@ -1901,15 +1904,25 @@ static void test_20279(FileStorage& fs) EXPECT_EQ(CV_16FC3, m16fc3.type()) << typeToString(m16fc3.type()); //std::cout << m16fc3 << std::endl; + Mat m16bfc1, m16bfc3; + m16fc1.convertTo(m16bfc1, CV_16BF); + m16fc3.convertTo(m16bfc3, CV_16BF); + fs << "m16fc1" << m16fc1; fs << "m16fc3" << m16fc3; + fs << "m16bfc1" << m16bfc1; + fs << "m16bfc3" << m16bfc3; string content = fs.releaseAndGetString(); if (cvtest::debugLevel > 0) std::cout << content << std::endl; FileStorage fs_read(content, FileStorage::READ + FileStorage::MEMORY); + Mat m16fc1_result; Mat m16fc3_result; + Mat m16bfc1_result; + Mat m16bfc3_result; + fs_read["m16fc1"] >> m16fc1_result; ASSERT_FALSE(m16fc1_result.empty()); EXPECT_EQ(CV_16FC1, m16fc1_result.type()) << typeToString(m16fc1_result.type()); @@ -1919,6 +1932,16 @@ static void test_20279(FileStorage& fs) ASSERT_FALSE(m16fc3_result.empty()); EXPECT_EQ(CV_16FC3, m16fc3_result.type()) << typeToString(m16fc3_result.type()); EXPECT_LE(cvtest::norm(m16fc3_result, m16fc3, NORM_INF), 1e-2); + + fs_read["m16bfc1"] >> m16bfc1_result; + ASSERT_FALSE(m16bfc1_result.empty()); + EXPECT_EQ(CV_16BFC1, m16bfc1_result.type()) << typeToString(m16bfc1_result.type()); + EXPECT_LE(cvtest::norm(m16bfc1_result, m16bfc1, NORM_INF), 2e-2); + + fs_read["m16bfc3"] >> m16bfc3_result; + ASSERT_FALSE(m16bfc3_result.empty()); + EXPECT_EQ(CV_16BFC3, m16bfc3_result.type()) << typeToString(m16bfc3_result.type()); + EXPECT_LE(cvtest::norm(m16bfc3_result, m16bfc3, NORM_INF), 2e-2); } TEST(Core_InputOutput, FileStorage_16F_xml) diff --git a/modules/core/test/test_misc.cpp b/modules/core/test/test_misc.cpp index 8ed0afe771..f508f51ac4 100644 --- a/modules/core/test/test_misc.cpp +++ b/modules/core/test/test_misc.cpp @@ -31,12 +31,12 @@ TEST(Core_OutputArrayCreate, _1997) ASSERT_NO_THROW(local::create( mat(Rect(Point(), submatSize)), submatSize, mat.type() )); } -TEST(Core_SaturateCast, NegativeNotClipped) +TEST(Core_SaturateCast, NegativesAreClipped) { double d = -1.0; unsigned int val = cv::saturate_cast(d); - ASSERT_EQ(0xffffffff, val); + ASSERT_EQ(0u, val); } template diff --git a/modules/imgproc/misc/java/test/ImgprocTest.java b/modules/imgproc/misc/java/test/ImgprocTest.java index 873292bc65..5ccf0f53d5 100644 --- a/modules/imgproc/misc/java/test/ImgprocTest.java +++ b/modules/imgproc/misc/java/test/ImgprocTest.java @@ -216,19 +216,19 @@ public class ImgprocTest extends OpenCVTestCase { public void testBoxFilterMatMatIntSize() { Size size = new Size(3, 3); - Imgproc.boxFilter(gray0, dst, 8, size); + Imgproc.boxFilter(gray0, dst, 0, size); assertMatEqual(gray0, dst); // TODO_: write better test } public void testBoxFilterMatMatIntSizePointBoolean() { - Imgproc.boxFilter(gray255, dst, 8, size, anchorPoint, false); + Imgproc.boxFilter(gray255, dst, 0, size, anchorPoint, false); assertMatEqual(gray255, dst); // TODO_: write better test } public void testBoxFilterMatMatIntSizePointBooleanInt() { - Imgproc.boxFilter(gray255, dst, 8, size, anchorPoint, false, Core.BORDER_REFLECT); + Imgproc.boxFilter(gray255, dst, 0, size, anchorPoint, false, Core.BORDER_REFLECT); assertMatEqual(gray255, dst); // TODO_: write better test } diff --git a/modules/imgproc/test/test_pc.cpp b/modules/imgproc/test/test_pc.cpp index 7b06e3bd65..173866ac58 100644 --- a/modules/imgproc/test/test_pc.cpp +++ b/modules/imgproc/test/test_pc.cpp @@ -186,10 +186,10 @@ void CV_DivSpectrumsTest::get_test_array_types_and_sizes( int test_case_idx, vec // Inputs are CCS-packed arrays. Prepare outputs and temporary inputs as complex matrices. if( type == CV_32FC1 || type == CV_64FC1 ) { - types[OUTPUT][0] += 8; - types[REF_OUTPUT][0] += 8; - types[TEMP][0] += 8; - types[TEMP][1] += 8; + types[OUTPUT][0] += CV_DEPTH_MAX; + types[REF_OUTPUT][0] += CV_DEPTH_MAX; + types[TEMP][0] += CV_DEPTH_MAX; + types[TEMP][1] += CV_DEPTH_MAX; } } diff --git a/modules/stitching/src/exposure_compensate.cpp b/modules/stitching/src/exposure_compensate.cpp index 59542d95ba..40cb58fd15 100644 --- a/modules/stitching/src/exposure_compensate.cpp +++ b/modules/stitching/src/exposure_compensate.cpp @@ -129,7 +129,7 @@ void GainCompensator::singleFeed(const std::vector &corners, const std::v const int num_images = static_cast(images.size()); Mat_ N(num_images, num_images); N.setTo(0); Mat_ I(num_images, num_images); I.setTo(0); - Mat_ skip(num_images, 1); skip.setTo(true); + Mat_ skip(num_images, 1); skip.setTo(1); Mat subimg1, subimg2; Mat_ submask1, submask2, intersect; diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp index cd02766148..4c6cf95858 100644 --- a/modules/ts/src/ts_func.cpp +++ b/modules/ts/src/ts_func.cpp @@ -72,10 +72,10 @@ int randomType(RNG& rng, _OutputArray::DepthMask typeMask, int minChannels, int { int channels = rng.uniform(minChannels, maxChannels+1); int depth = 0; - CV_Assert((typeMask & _OutputArray::DEPTH_MASK_ALL_16F) != 0); + CV_Assert((typeMask & _OutputArray::DEPTH_MASK_ALL) != 0); for(;;) { - depth = rng.uniform(CV_8U, CV_16F+1); + depth = rng.uniform(CV_8U, CV_DEPTH_CURR_MAX); if( ((1 << depth) & typeMask) != 0 ) break; } @@ -246,8 +246,43 @@ convert_(const _Tp1* src, _Tp2* dst, size_t total, double alpha, double beta) dst[i] = saturate_cast<_Tp2>(src[i]*alpha + beta); } +template inline void +convert_to_bool(const _Tp1* src, bool* dst, + size_t total, double alpha, double beta) +{ + size_t i; + if( alpha == 1 && beta == 0 ) + for( i = 0; i < total; i++ ) + dst[i] = src[i] != 0; + else if( beta == 0 ) + for( i = 0; i < total; i++ ) + dst[i] = src[i]*alpha != 0; + else + for( i = 0; i < total; i++ ) + dst[i] = src[i]*alpha + beta != 0; +} + +template +inline void +convert_(const bool* src_, _Tp2* dst, + size_t total, double alpha, double beta) +{ + size_t i; + const uint8_t* src = (const uint8_t*)src_; + if( alpha == 1 && beta == 0 ) + for( i = 0; i < total; i++ ) + dst[i] = saturate_cast<_Tp2>(src[i] != 0); + else if( beta == 0 ) + for( i = 0; i < total; i++ ) + dst[i] = saturate_cast<_Tp2>((src[i] != 0)*alpha); + else + for( i = 0; i < total; i++ ) + dst[i] = saturate_cast<_Tp2>((src[i] != 0)*alpha + beta); +} + template inline void -convertTo(const _Tp* src, void* dst, int dtype, size_t total, double alpha, double beta) +convertTo(const _Tp* src, void* dst, int dtype, + size_t total, double alpha, double beta) { switch( CV_MAT_DEPTH(dtype) ) { @@ -263,6 +298,9 @@ convertTo(const _Tp* src, void* dst, int dtype, size_t total, double alpha, doub case CV_16S: convert_(src, (short*)dst, total, alpha, beta); break; + case CV_32U: + convert_(src, (unsigned*)dst, total, alpha, beta); + break; case CV_32S: convert_(src, (int*)dst, total, alpha, beta); break; @@ -272,16 +310,35 @@ convertTo(const _Tp* src, void* dst, int dtype, size_t total, double alpha, doub case CV_64F: convert_(src, (double*)dst, total, alpha, beta); break; + case CV_64U: + convert_(src, (uint64_t*)dst, total, alpha, beta); + break; + case CV_64S: + convert_(src, (int64_t*)dst, total, alpha, beta); + break; + case CV_16F: + convert_(src, (cv::float16_t*)dst, total, alpha, beta); + break; + case CV_16BF: + convert_(src, (cv::bfloat16_t*)dst, total, alpha, beta); + break; + case CV_Bool: + convert_to_bool(src, (bool*)dst, total, alpha, beta); + break; default: CV_Assert(0); } } -void convert(const Mat& src, cv::OutputArray _dst, int dtype, double alpha, double beta) +void convert(const Mat& src, cv::OutputArray _dst, + int dtype, double alpha, double beta) { if (dtype < 0) dtype = _dst.depth(); - dtype = CV_MAKETYPE(CV_MAT_DEPTH(dtype), src.channels()); + int sdepth = src.depth(); + int ddepth = CV_MAT_DEPTH(dtype); + + dtype = CV_MAKETYPE(ddepth, src.channels()); _dst.create(src.dims, &src.size[0], dtype); Mat dst = _dst.getMat(); if( alpha == 0 ) @@ -307,7 +364,7 @@ void convert(const Mat& src, cv::OutputArray _dst, int dtype, double alpha, doub const uchar* sptr = planes[0].ptr(); uchar* dptr = planes[1].ptr(); - switch( src.depth() ) + switch( sdepth ) { case CV_8U: convertTo((const uchar*)sptr, dptr, dtype, total, alpha, beta); @@ -315,12 +372,18 @@ void convert(const Mat& src, cv::OutputArray _dst, int dtype, double alpha, doub case CV_8S: convertTo((const schar*)sptr, dptr, dtype, total, alpha, beta); break; + case CV_Bool: + convertTo((const bool*)sptr, dptr, dtype, total, alpha, beta); + break; case CV_16U: convertTo((const ushort*)sptr, dptr, dtype, total, alpha, beta); break; case CV_16S: convertTo((const short*)sptr, dptr, dtype, total, alpha, beta); break; + case CV_32U: + convertTo((const unsigned*)sptr, dptr, dtype, total, alpha, beta); + break; case CV_32S: convertTo((const int*)sptr, dptr, dtype, total, alpha, beta); break; @@ -330,6 +393,20 @@ void convert(const Mat& src, cv::OutputArray _dst, int dtype, double alpha, doub case CV_64F: convertTo((const double*)sptr, dptr, dtype, total, alpha, beta); break; + case CV_64U: + convertTo((const uint64_t*)sptr, dptr, dtype, total, alpha, beta); + break; + case CV_64S: + convertTo((const int64_t*)sptr, dptr, dtype, total, alpha, beta); + break; + case CV_16F: + convertTo((const cv::float16_t*)sptr, dptr, dtype, total, alpha, beta); + break; + case CV_16BF: + convertTo((const cv::bfloat16_t*)sptr, dptr, dtype, total, alpha, beta); + break; + default: + CV_Error(CV_StsNotImplemented, "unknown/unsupported depth"); } } } @@ -1351,7 +1428,7 @@ double norm(InputArray _src, int normType, InputArray _mask) double norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask) { Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat(); - if( src1.depth() == CV_16F ) + if( src1.depth() == CV_16F || src1.depth() == CV_16BF ) { Mat src1_32f, src2_32f; src1.convertTo(src1_32f, CV_32F); @@ -1769,10 +1846,10 @@ cmpUlpsInt_(const _Tp* src1, const _Tp* src2, size_t total, int imaxdiff, size_t startidx, size_t& idx) { size_t i; - int realmaxdiff = 0; + int64_t realmaxdiff = 0; for( i = 0; i < total; i++ ) { - int diff = std::abs(src1[i] - src2[i]); + int64_t diff = (int64_t)std::abs((int64_t)src1[i] - (int64_t)src2[i]); if( realmaxdiff < diff ) { realmaxdiff = diff; @@ -1780,7 +1857,7 @@ cmpUlpsInt_(const _Tp* src1, const _Tp* src2, size_t total, int imaxdiff, idx = i + startidx; } } - return realmaxdiff; + return (double)realmaxdiff; } @@ -2008,7 +2085,7 @@ int cmpEps( const Mat& arr_, const Mat& refarr_, double* _realmaxdiff, { Mat arr = arr_, refarr = refarr_; CV_Assert( arr.type() == refarr.type() && arr.size == refarr.size ); - if( arr.depth() == CV_16F ) + if( arr.depth() == CV_16F || arr.depth() == CV_16BF ) { Mat arr32f, refarr32f; arr.convertTo(arr32f, CV_32F); @@ -2017,7 +2094,8 @@ int cmpEps( const Mat& arr_, const Mat& refarr_, double* _realmaxdiff, refarr = refarr32f; } - int ilevel = refarr.depth() <= CV_32S ? cvFloor(success_err_level) : 0; + int depth = refarr.depth(); + int ilevel = depth <= CV_32S || depth == CV_32U || depth == CV_64U || depth == CV_64S ? cvFloor(success_err_level) : 0; int result = CMP_EPS_OK; const Mat *arrays[]={&arr, &refarr, 0}; @@ -2025,14 +2103,13 @@ int cmpEps( const Mat& arr_, const Mat& refarr_, double* _realmaxdiff, NAryMatIterator it(arrays, planes); size_t total = planes[0].total()*planes[0].channels(), j = total; size_t i, nplanes = it.nplanes; - int depth = arr.depth(); size_t startidx = 1, idx = 0; double realmaxdiff = 0, maxval = 0; if(_realmaxdiff) *_realmaxdiff = 0; - if( refarr.depth() >= CV_32F && !element_wise_relative_error ) + if( !CV_IS_INT_TYPE(depth) && !element_wise_relative_error ) { maxval = cvtest::norm( refarr, NORM_INF ); maxval = MAX(maxval, 1.); @@ -2048,6 +2125,9 @@ int cmpEps( const Mat& arr_, const Mat& refarr_, double* _realmaxdiff, case CV_8U: realmaxdiff = cmpUlpsInt_((const uchar*)sptr1, (const uchar*)sptr2, total, ilevel, startidx, idx); break; + case CV_Bool: + realmaxdiff = cmpUlpsInt_((const uchar*)sptr1, (const uchar*)sptr2, total, ilevel, startidx, idx); + break; case CV_8S: realmaxdiff = cmpUlpsInt_((const schar*)sptr1, (const schar*)sptr2, total, ilevel, startidx, idx); break; @@ -2060,6 +2140,15 @@ int cmpEps( const Mat& arr_, const Mat& refarr_, double* _realmaxdiff, case CV_32S: realmaxdiff = cmpUlpsInt_((const int*)sptr1, (const int*)sptr2, total, ilevel, startidx, idx); break; + case CV_32U: + realmaxdiff = cmpUlpsInt_((const unsigned*)sptr1, (const unsigned*)sptr2, total, ilevel, startidx, idx); + break; + case CV_64S: + realmaxdiff = cmpUlpsInt_((const int64_t*)sptr1, (const int64_t*)sptr2, total, ilevel, startidx, idx); + break; + case CV_64U: + realmaxdiff = cmpUlpsInt_((const uint64_t*)sptr1, (const uint64_t*)sptr2, total, ilevel, startidx, idx); + break; case CV_32F: for( j = 0; j < total; j++ ) { @@ -2887,7 +2976,7 @@ std::ostream& operator << (std::ostream& out, const MatInfo& m) out << ""; else { - static const char* depthstr[] = {"8u", "8s", "16u", "16s", "32s", "32f", "64f", "?"}; + static const char* depthstr[] = {"8u", "8s", "16u", "16s", "32s", "32f", "64f", "16f", "16bf", "Bool", "64u", "64s", "32u", "?", "?", "?"}; out << depthstr[m.m->depth()] << "C" << m.m->channels() << " " << m.m->dims << "-dim ("; for( int i = 0; i < m.m->dims; i++ ) out << m.m->size[i] << (i < m.m->dims-1 ? " x " : ")"); @@ -2930,7 +3019,6 @@ writeElems(std::ostream& out, const void* data, int nelems, int starpos) } } - static void writeElems(std::ostream& out, const void* data, int nelems, int depth, int starpos) { if(depth == CV_8U) @@ -2943,6 +3031,28 @@ static void writeElems(std::ostream& out, const void* data, int nelems, int dept writeElems(out, data, nelems, starpos); else if(depth == CV_32S) writeElems(out, data, nelems, starpos); + else if(depth == CV_32U) + writeElems(out, data, nelems, starpos); + else if(depth == CV_64U) + writeElems(out, data, nelems, starpos); + else if(depth == CV_64S) + writeElems(out, data, nelems, starpos); + else if(depth == CV_Bool) + writeElems(out, data, nelems, starpos); + else if(depth == CV_16F) + { + std::streamsize pp = out.precision(); + out.precision(4); + writeElems(out, data, nelems, starpos); + out.precision(pp); + } + else if(depth == CV_16BF) + { + std::streamsize pp = out.precision(); + out.precision(4); + writeElems(out, data, nelems, starpos); + out.precision(pp); + } else if(depth == CV_32F) { std::streamsize pp = out.precision(); diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index 39147228b8..09b9ac1a9b 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -465,6 +465,15 @@ void Regression::verify(cv::FileNode node, cv::InputArray array, double eps, ERR { int expected_kind = (int)node["kind"]; int expected_type = (int)node["type"]; + int array_type = array.type(); + if (array_type != expected_type) { + // temporary hack; we optimistically assume that type in the computed and expected array should be the same. + // if they are different, it must be because of the change in type representation between OpenCV 5.x and OpenCV 2.x,3.x,4.x. + // need to add "type5" or something like that and use it in the newer files. Then type will always mean 'earlier than 5.x type'. + int depth = expected_type & 7; + int channels = ((expected_type >> 3) & 127) + 1; + expected_type = CV_MAKETYPE(depth, channels); + } ASSERT_EQ(expected_kind, array.kind()) << " Argument \"" << node.name() << "\" has unexpected kind"; ASSERT_EQ(expected_type, array.type()) << " Argument \"" << node.name() << "\" has unexpected type"; diff --git a/modules/videoio/src/backend_plugin.cpp b/modules/videoio/src/backend_plugin.cpp index 71756ac158..5e65137cd4 100644 --- a/modules/videoio/src/backend_plugin.cpp +++ b/modules/videoio/src/backend_plugin.cpp @@ -535,6 +535,12 @@ public: cv::_OutputArray* dst = static_cast(userdata); if (!dst) return CV_ERROR_FAIL; + int depth = CV_MAT_DEPTH(type); + // [TODO] Remove this condition after rebuilding plugins or add a new + // version of plugins. Convert type from the old one to the new one (5 bits) + if (depth > 7) { + type = CV_MAKETYPE((type & 7), (type >> 3) + 1); + } cv::Mat(cv::Size(width, height), type, (void*)data, step).copyTo(*dst); return CV_ERROR_OK; } diff --git a/modules/videoio/test/test_precomp.hpp b/modules/videoio/test/test_precomp.hpp index b4f340897e..9bd613d8f0 100644 --- a/modules/videoio/test/test_precomp.hpp +++ b/modules/videoio/test/test_precomp.hpp @@ -54,7 +54,11 @@ static inline void PrintTo(const cv::VideoCaptureAPIs& api, std::ostream* os) inline std::string fourccToString(int fourcc) { - return cv::format("%c%c%c%c", fourcc & 255, (fourcc >> 8) & 255, (fourcc >> 16) & 255, (fourcc >> 24) & 255); + return cv::format("%c%c%c%c", + (char)(fourcc & 255), + (char)((fourcc >> 8) & 255), + (char)((fourcc >> 16) & 255), + (char)((fourcc >> 24) & 255)); } inline std::string fourccToStringSafe(int fourcc) @@ -71,19 +75,19 @@ inline int fourccFromString(const std::string &fourcc) return cv::VideoWriter::fourcc(fourcc[0], fourcc[1], fourcc[2], fourcc[3]); } -inline void generateFrame(int i, int FRAME_COUNT, cv::Mat & frame) +inline void generateFrame(int i, int frame_count, cv::Mat & frame) { using namespace cv; using namespace std; - int offset = (((i * 5) % FRAME_COUNT) - FRAME_COUNT / 2) * (frame.cols / 2) / FRAME_COUNT; + int offset = (((i * 5) % frame_count) - frame_count / 2) * (frame.cols / 2) / frame_count; frame(cv::Rect(0, 0, frame.cols / 2 + offset, frame.rows)) = Scalar(255, 255, 255); frame(cv::Rect(frame.cols / 2 + offset, 0, frame.cols - frame.cols / 2 - offset, frame.rows)) = Scalar(0, 0, 0); - ostringstream buf; buf << "Frame " << setw(2) << setfill('0') << i + 1; + std::string str = cv::format("%02d", i+1); int baseLine = 0; - Size box = getTextSize(buf.str(), FONT_HERSHEY_COMPLEX, 2, 5, &baseLine); - putText(frame, buf.str(), Point((frame.cols - box.width) / 2, (frame.rows - box.height) / 2 + baseLine), + Size box = getTextSize(str, FONT_HERSHEY_COMPLEX, 2, 5, &baseLine); + putText(frame, str, Point((frame.cols - box.width) / 2, (frame.rows - box.height) / 2 + baseLine), FONT_HERSHEY_COMPLEX, 2, Scalar(0, 0, 255), 5, LINE_AA); - Point p(i * frame.cols / (FRAME_COUNT - 1), i * frame.rows / (FRAME_COUNT - 1)); + Point p(i * frame.cols / (frame_count - 1), i * frame.rows / (frame_count - 1)); circle(frame, p, 50, Scalar(200, 25, 55), 8, LINE_AA); #if 0 imshow("frame", frame);