mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 14:36:36 +08:00
added basic support for CV_16F (the new datatype etc.) (#12463)
* added basic support for CV_16F (the new datatype etc.). CV_USRTYPE1 is now equal to CV_16F, which may break some [rarely used] functionality. We'll see * fixed just introduced bug in norm; reverted errorneous changes in Torch importer (need to find a better solution) * addressed some issues found during the PR review * restored the patch to fix some perf test failures
This commit is contained in:
parent
dca657a2fd
commit
6d7f5871db
@ -3009,6 +3009,7 @@ public:
|
||||
|
||||
virtual Ptr<Formatted> format(const Mat& mtx) const = 0;
|
||||
|
||||
virtual void set16fPrecision(int p = 4) = 0;
|
||||
virtual void set32fPrecision(int p = 8) = 0;
|
||||
virtual void set64fPrecision(int p = 16) = 0;
|
||||
virtual void setMultiline(bool ml = true) = 0;
|
||||
|
@ -317,13 +317,10 @@ Cv64suf;
|
||||
#define CV_IS_SUBMAT(flags) ((flags) & CV_MAT_SUBMAT_FLAG)
|
||||
|
||||
/** Size of each channel item,
|
||||
0x8442211 = 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
|
||||
#define CV_ELEM_SIZE1(type) \
|
||||
((((sizeof(size_t)<<28)|0x8442211) >> CV_MAT_DEPTH(type)*4) & 15)
|
||||
0x28442211 = 0010 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
|
||||
#define CV_ELEM_SIZE1(type) ((0x28442211 >> CV_MAT_DEPTH(type)*4) & 15)
|
||||
|
||||
/** 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */
|
||||
#define CV_ELEM_SIZE(type) \
|
||||
(CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3))
|
||||
#define CV_ELEM_SIZE(type) (CV_MAT_CN(type)*CV_ELEM_SIZE1(type))
|
||||
|
||||
#ifndef MIN
|
||||
# define MIN(a,b) ((a) > (b) ? (b) : (a))
|
||||
|
@ -195,6 +195,12 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2,
|
||||
CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
|
||||
CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
|
||||
|
||||
CV_EXPORTS void cvt16f32f( const float16_t* src, float* dst, int len );
|
||||
CV_EXPORTS void cvt32f16f( const float* src, float16_t* dst, int len );
|
||||
|
||||
CV_EXPORTS void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len );
|
||||
CV_EXPORTS void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len );
|
||||
|
||||
struct CV_EXPORTS DFT1D
|
||||
{
|
||||
static Ptr<DFT1D> create(int len, int count, int depth, int flags, bool * useBuffer = 0);
|
||||
|
@ -76,6 +76,7 @@ typedef signed char schar;
|
||||
#define CV_32F 5
|
||||
#define CV_64F 6
|
||||
#define CV_USRTYPE1 7
|
||||
#define CV_16F 7
|
||||
|
||||
#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1)
|
||||
#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK)
|
||||
@ -124,6 +125,12 @@ typedef signed char schar;
|
||||
#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
|
||||
#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
|
||||
#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
|
||||
|
||||
#define CV_16FC1 CV_MAKETYPE(CV_16F,1)
|
||||
#define CV_16FC2 CV_MAKETYPE(CV_16F,2)
|
||||
#define CV_16FC3 CV_MAKETYPE(CV_16F,3)
|
||||
#define CV_16FC4 CV_MAKETYPE(CV_16F,4)
|
||||
#define CV_16FC(n) CV_MAKETYPE(CV_16F,(n))
|
||||
//! @}
|
||||
|
||||
//! @name Comparison operation
|
||||
|
@ -296,8 +296,10 @@ public:
|
||||
DEPTH_MASK_32S = 1 << CV_32S,
|
||||
DEPTH_MASK_32F = 1 << CV_32F,
|
||||
DEPTH_MASK_64F = 1 << CV_64F,
|
||||
DEPTH_MASK_16F = 1 << CV_16F,
|
||||
DEPTH_MASK_ALL = (DEPTH_MASK_64F<<1)-1,
|
||||
DEPTH_MASK_ALL_BUT_8S = DEPTH_MASK_ALL & ~DEPTH_MASK_8S,
|
||||
DEPTH_MASK_ALL_16F = (DEPTH_MASK_16F<<1)-1,
|
||||
DEPTH_MASK_FLT = DEPTH_MASK_32F + DEPTH_MASK_64F
|
||||
};
|
||||
|
||||
|
@ -158,6 +158,22 @@ template<> inline uint64 saturate_cast<uint64>(int64 v) { return (uint64)st
|
||||
|
||||
template<> inline int64 saturate_cast<int64>(uint64 v) { return (int64)std::min(v, (uint64)LLONG_MAX); }
|
||||
|
||||
/** @overload */
|
||||
template<typename _Tp> static inline _Tp saturate_cast(float16_t v) { return saturate_cast<_Tp>((float)v); }
|
||||
|
||||
// in theory, we could use a LUT for 8u/8s->16f conversion,
|
||||
// but with hardware support for FP32->FP16 conversion the current approach is preferable
|
||||
template<> inline float16_t saturate_cast<float16_t>(uchar v) { return float16_t((float)v); }
|
||||
template<> inline float16_t saturate_cast<float16_t>(schar v) { return float16_t((float)v); }
|
||||
template<> inline float16_t saturate_cast<float16_t>(ushort v) { return float16_t((float)v); }
|
||||
template<> inline float16_t saturate_cast<float16_t>(short v) { return float16_t((float)v); }
|
||||
template<> inline float16_t saturate_cast<float16_t>(unsigned v){ return float16_t((float)v); }
|
||||
template<> inline float16_t saturate_cast<float16_t>(int v) { return float16_t((float)v); }
|
||||
template<> inline float16_t saturate_cast<float16_t>(uint64 v) { return float16_t((float)v); }
|
||||
template<> inline float16_t saturate_cast<float16_t>(int64 v) { return float16_t((float)v); }
|
||||
template<> inline float16_t saturate_cast<float16_t>(float v) { return float16_t(v); }
|
||||
template<> inline float16_t saturate_cast<float16_t>(double v) { return float16_t((float)v); }
|
||||
|
||||
//! @}
|
||||
|
||||
} // cv
|
||||
|
@ -261,6 +261,20 @@ public:
|
||||
};
|
||||
};
|
||||
|
||||
template<> class DataType<float16_t>
|
||||
{
|
||||
public:
|
||||
typedef float16_t value_type;
|
||||
typedef float work_type;
|
||||
typedef value_type channel_type;
|
||||
typedef value_type vec_type;
|
||||
enum { generic_type = 0,
|
||||
depth = CV_16F,
|
||||
channels = 1,
|
||||
fmt = (int)'h',
|
||||
type = CV_MAKETYPE(depth, channels)
|
||||
};
|
||||
};
|
||||
|
||||
/** @brief A helper class for cv::DataType
|
||||
|
||||
@ -330,6 +344,12 @@ template<> class TypeDepth<CV_64F>
|
||||
typedef double value_type;
|
||||
};
|
||||
|
||||
template<> class TypeDepth<CV_16F>
|
||||
{
|
||||
enum { depth = CV_16F };
|
||||
typedef float16_t value_type;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
//! @}
|
||||
|
@ -3262,6 +3262,9 @@ void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
|
||||
case CV_64F:
|
||||
scalarToRawData_<double>(s, (double*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_16F:
|
||||
scalarToRawData_<float16_t>(s, (float16_t*)_buf, cn, unroll_to);
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsUnsupportedFormat,"");
|
||||
}
|
||||
|
@ -43,15 +43,15 @@ static const char* getTestOpMath(unsigned testOp)
|
||||
|
||||
const char* depthToString_(int depth)
|
||||
{
|
||||
static const char* depthNames[] = { "CV_8U", "CV_8S", "CV_16U", "CV_16S", "CV_32S", "CV_32F", "CV_64F", "CV_USRTYPE1" };
|
||||
return (depth <= CV_USRTYPE1 && depth >= 0) ? depthNames[depth] : NULL;
|
||||
static const char* depthNames[] = { "CV_8U", "CV_8S", "CV_16U", "CV_16S", "CV_32S", "CV_32F", "CV_64F", "CV_16F" };
|
||||
return (depth <= CV_16F && depth >= 0) ? depthNames[depth] : NULL;
|
||||
}
|
||||
|
||||
const cv::String typeToString_(int type)
|
||||
{
|
||||
int depth = CV_MAT_DEPTH(type);
|
||||
int cn = CV_MAT_CN(type);
|
||||
if (depth >= 0 && depth <= CV_USRTYPE1)
|
||||
if (depth >= 0 && depth <= CV_16F)
|
||||
return cv::format("%sC%d", depthToString_(depth), cn);
|
||||
return cv::String();
|
||||
}
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
namespace cv {
|
||||
|
||||
/*namespace hal {
|
||||
namespace hal {
|
||||
|
||||
void cvt16f32f( const float16_t* src, float* dst, int len )
|
||||
{
|
||||
@ -50,21 +50,21 @@ void cvt32f16f( const float* src, float16_t* dst, int len )
|
||||
dst[j] = float16_t(src[j]);
|
||||
}
|
||||
|
||||
/*void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len )
|
||||
void addRNGBias32f( float* arr, const float* scaleBiasPairs, int len )
|
||||
{
|
||||
// the loop is simple enough, so we let the compiler to vectorize it
|
||||
for( int i = 0; i < len; i++ )
|
||||
arr[i] = scaleBiasPairs[i*2 + 1];
|
||||
arr[i] += scaleBiasPairs[i*2 + 1];
|
||||
}
|
||||
|
||||
void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len )
|
||||
{
|
||||
// the loop is simple enough, so we let the compiler to vectorize it
|
||||
for( int i = 0; i < len; i++ )
|
||||
arr[i] = scaleBiasPairs[i*2 + 1];
|
||||
arr[i] += scaleBiasPairs[i*2 + 1];
|
||||
}
|
||||
|
||||
}*/
|
||||
}
|
||||
|
||||
template<typename _Ts, typename _Td, typename _Twvec> inline void
|
||||
cvt_( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size )
|
||||
@ -150,7 +150,7 @@ DEF_CVT_FUNC(8u16s, cvt_, uchar, short, v_int16)
|
||||
DEF_CVT_FUNC(8u32s, cvt_, uchar, int, v_int32)
|
||||
DEF_CVT_FUNC(8u32f, cvt_, uchar, float, v_float32)
|
||||
DEF_CVT_FUNC(8u64f, cvt_, uchar, double, v_int32)
|
||||
//DEF_CVT_FUNC(8u16f, cvt1_, uchar, float16_t, v_float32)
|
||||
DEF_CVT_FUNC(8u16f, cvt1_, uchar, float16_t, v_float32)
|
||||
|
||||
////////////////////// 8s -> ... ////////////////////////
|
||||
|
||||
@ -160,7 +160,7 @@ DEF_CVT_FUNC(8s16s, cvt_, schar, short, v_int16)
|
||||
DEF_CVT_FUNC(8s32s, cvt_, schar, int, v_int32)
|
||||
DEF_CVT_FUNC(8s32f, cvt_, schar, float, v_float32)
|
||||
DEF_CVT_FUNC(8s64f, cvt_, schar, double, v_int32)
|
||||
//DEF_CVT_FUNC(8s16f, cvt1_, schar, float16_t, v_float32)
|
||||
DEF_CVT_FUNC(8s16f, cvt1_, schar, float16_t, v_float32)
|
||||
|
||||
////////////////////// 16u -> ... ////////////////////////
|
||||
|
||||
@ -170,7 +170,7 @@ DEF_CVT_FUNC(16u16s, cvt_, ushort, short, v_int32)
|
||||
DEF_CVT_FUNC(16u32s, cvt_, ushort, int, v_int32)
|
||||
DEF_CVT_FUNC(16u32f, cvt_, ushort, float, v_float32)
|
||||
DEF_CVT_FUNC(16u64f, cvt_, ushort, double, v_int32)
|
||||
//DEF_CVT_FUNC(16u16f, cvt1_,ushort, float16_t, v_float32)
|
||||
DEF_CVT_FUNC(16u16f, cvt1_,ushort, float16_t, v_float32)
|
||||
|
||||
////////////////////// 16s -> ... ////////////////////////
|
||||
|
||||
@ -180,7 +180,7 @@ DEF_CVT_FUNC(16s16u, cvt_, short, ushort, v_int32)
|
||||
DEF_CVT_FUNC(16s32s, cvt_, short, int, v_int32)
|
||||
DEF_CVT_FUNC(16s32f, cvt_, short, float, v_float32)
|
||||
DEF_CVT_FUNC(16s64f, cvt_, short, double, v_int32)
|
||||
//DEF_CVT_FUNC(16s16f, cvt1_,short, float16_t, v_float32)
|
||||
DEF_CVT_FUNC(16s16f, cvt1_,short, float16_t, v_float32)
|
||||
|
||||
////////////////////// 32s -> ... ////////////////////////
|
||||
|
||||
@ -190,7 +190,7 @@ DEF_CVT_FUNC(32s16u, cvt_, int, ushort, v_int32)
|
||||
DEF_CVT_FUNC(32s16s, cvt_, int, short, v_int32)
|
||||
DEF_CVT_FUNC(32s32f, cvt_, int, float, v_float32)
|
||||
DEF_CVT_FUNC(32s64f, cvt_, int, double, v_int32)
|
||||
//DEF_CVT_FUNC(32s16f, cvt1_,int, float16_t, v_float32)
|
||||
DEF_CVT_FUNC(32s16f, cvt1_,int, float16_t, v_float32)
|
||||
|
||||
////////////////////// 32f -> ... ////////////////////////
|
||||
|
||||
@ -210,17 +210,17 @@ DEF_CVT_FUNC(64f16u, cvt_, double, ushort, v_int32)
|
||||
DEF_CVT_FUNC(64f16s, cvt_, double, short, v_int32)
|
||||
DEF_CVT_FUNC(64f32s, cvt_, double, int, v_int32)
|
||||
DEF_CVT_FUNC(64f32f, cvt_, double, float, v_float32)
|
||||
//DEF_CVT_FUNC(64f16f, cvt1_,double, float16_t, v_float32)
|
||||
DEF_CVT_FUNC(64f16f, cvt1_,double, float16_t, v_float32)
|
||||
|
||||
////////////////////// 16f -> ... ////////////////////////
|
||||
|
||||
//DEF_CVT_FUNC(16f8u, cvt_, float16_t, uchar, v_float32)
|
||||
//DEF_CVT_FUNC(16f8s, cvt_, float16_t, schar, v_float32)
|
||||
//DEF_CVT_FUNC(16f16u, cvt1_, float16_t, ushort, v_float32)
|
||||
//DEF_CVT_FUNC(16f16s, cvt1_, float16_t, short, v_float32)
|
||||
//DEF_CVT_FUNC(16f32s, cvt1_, float16_t, int, v_float32)
|
||||
DEF_CVT_FUNC(16f8u, cvt_, float16_t, uchar, v_float32)
|
||||
DEF_CVT_FUNC(16f8s, cvt_, float16_t, schar, v_float32)
|
||||
DEF_CVT_FUNC(16f16u, cvt1_, float16_t, ushort, v_float32)
|
||||
DEF_CVT_FUNC(16f16s, cvt1_, float16_t, short, v_float32)
|
||||
DEF_CVT_FUNC(16f32s, cvt1_, float16_t, int, v_float32)
|
||||
DEF_CVT_FUNC(16f32f, cvt1_, float16_t, float, v_float32)
|
||||
//DEF_CVT_FUNC(16f64f, cvt1_, float16_t, double, v_float32)
|
||||
DEF_CVT_FUNC(16f64f, cvt1_, float16_t, double, v_float32)
|
||||
|
||||
///////////// "conversion" w/o conversion ///////////////
|
||||
|
||||
@ -339,42 +339,41 @@ BinaryFunc getConvertFunc(int sdepth, int ddepth)
|
||||
{
|
||||
(BinaryFunc)(cvt8u), (BinaryFunc)GET_OPTIMIZED(cvt8s8u), (BinaryFunc)GET_OPTIMIZED(cvt16u8u),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt16s8u), (BinaryFunc)GET_OPTIMIZED(cvt32s8u), (BinaryFunc)GET_OPTIMIZED(cvt32f8u),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt64f8u), 0 //(BinaryFunc)(cvt16f8u)
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt64f8u), (BinaryFunc)(cvt16f8u)
|
||||
},
|
||||
{
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt8u8s), (BinaryFunc)cvt8u, (BinaryFunc)GET_OPTIMIZED(cvt16u8s),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt16s8s), (BinaryFunc)GET_OPTIMIZED(cvt32s8s), (BinaryFunc)GET_OPTIMIZED(cvt32f8s),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt64f8s), 0 //(BinaryFunc)(cvt16f8s)
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt64f8s), (BinaryFunc)(cvt16f8s)
|
||||
},
|
||||
{
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt8u16u), (BinaryFunc)GET_OPTIMIZED(cvt8s16u), (BinaryFunc)cvt16u,
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt16s16u), (BinaryFunc)GET_OPTIMIZED(cvt32s16u), (BinaryFunc)GET_OPTIMIZED(cvt32f16u),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt64f16u), 0 //(BinaryFunc)(cvt16f16u)
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt64f16u), (BinaryFunc)(cvt16f16u)
|
||||
},
|
||||
{
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt8u16s), (BinaryFunc)GET_OPTIMIZED(cvt8s16s), (BinaryFunc)GET_OPTIMIZED(cvt16u16s),
|
||||
(BinaryFunc)cvt16u, (BinaryFunc)GET_OPTIMIZED(cvt32s16s), (BinaryFunc)GET_OPTIMIZED(cvt32f16s),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt64f16s), 0 //(BinaryFunc)(cvt16f16s)
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt64f16s), (BinaryFunc)(cvt16f16s)
|
||||
},
|
||||
{
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt8u32s), (BinaryFunc)GET_OPTIMIZED(cvt8s32s), (BinaryFunc)GET_OPTIMIZED(cvt16u32s),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt16s32s), (BinaryFunc)cvt32s, (BinaryFunc)GET_OPTIMIZED(cvt32f32s),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt64f32s), 0 //(BinaryFunc)(cvt16f32s)
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt64f32s), (BinaryFunc)(cvt16f32s)
|
||||
},
|
||||
{
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt8u32f), (BinaryFunc)GET_OPTIMIZED(cvt8s32f), (BinaryFunc)GET_OPTIMIZED(cvt16u32f),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt16s32f), (BinaryFunc)GET_OPTIMIZED(cvt32s32f), (BinaryFunc)cvt32s,
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt64f32f), 0 //(BinaryFunc)(cvt16f32f)
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt64f32f), (BinaryFunc)(cvt16f32f)
|
||||
},
|
||||
{
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt8u64f), (BinaryFunc)GET_OPTIMIZED(cvt8s64f), (BinaryFunc)GET_OPTIMIZED(cvt16u64f),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvt16s64f), (BinaryFunc)GET_OPTIMIZED(cvt32s64f), (BinaryFunc)GET_OPTIMIZED(cvt32f64f),
|
||||
(BinaryFunc)(cvt64s), 0 //(BinaryFunc)(cvt16f64f)
|
||||
(BinaryFunc)(cvt64s), (BinaryFunc)(cvt16f64f)
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0
|
||||
//(BinaryFunc)(cvt8u16f), (BinaryFunc)(cvt8s16f), (BinaryFunc)(cvt16u16f), (BinaryFunc)(cvt16s16f),
|
||||
//(BinaryFunc)(cvt32s16f), (BinaryFunc)(cvt32f16f), (BinaryFunc)(cvt64f16f), (BinaryFunc)(cvt16u)
|
||||
(BinaryFunc)(cvt8u16f), (BinaryFunc)(cvt8s16f), (BinaryFunc)(cvt16u16f), (BinaryFunc)(cvt16s16f),
|
||||
(BinaryFunc)(cvt32s16f), (BinaryFunc)(cvt32f16f), (BinaryFunc)(cvt64f16f), (BinaryFunc)(cvt16u)
|
||||
}
|
||||
};
|
||||
return cvtTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)];
|
||||
@ -481,7 +480,7 @@ void cv::convertFp16( InputArray _src, OutputArray _dst )
|
||||
if(_dst.fixedType())
|
||||
{
|
||||
ddepth = _dst.depth();
|
||||
CV_Assert(ddepth == CV_16S /*|| ddepth == CV_16F*/);
|
||||
CV_Assert(ddepth == CV_16S || ddepth == CV_16F);
|
||||
CV_Assert(_dst.channels() == _src.channels());
|
||||
}
|
||||
else
|
||||
@ -489,7 +488,7 @@ void cv::convertFp16( InputArray _src, OutputArray _dst )
|
||||
func = (BinaryFunc)cvt32f16f;
|
||||
break;
|
||||
case CV_16S:
|
||||
//case CV_16F:
|
||||
case CV_16F:
|
||||
ddepth = CV_32F;
|
||||
func = (BinaryFunc)cvt16f32f;
|
||||
break;
|
||||
|
@ -150,12 +150,11 @@ static inline void vx_load_pair_as(const int* ptr, v_float32& a, v_float32& b)
|
||||
static inline void vx_load_pair_as(const float* ptr, v_float32& a, v_float32& b)
|
||||
{ a = vx_load(ptr); b = vx_load(ptr + v_float32::nlanes); }
|
||||
|
||||
//static inline void vx_load_pair_as(const float16_t* ptr, v_float32& a, v_float32& b)
|
||||
//{
|
||||
// a = vx_load_expand(ptr);
|
||||
// b = vx_load_expand(ptr + v_float32::nlanes);
|
||||
//}
|
||||
|
||||
static inline void vx_load_pair_as(const float16_t* ptr, v_float32& a, v_float32& b)
|
||||
{
|
||||
a = vx_load_expand(ptr);
|
||||
b = vx_load_expand(ptr + v_float32::nlanes);
|
||||
}
|
||||
|
||||
static inline void v_store_pair_as(uchar* ptr, const v_uint16& a, const v_uint16& b)
|
||||
{
|
||||
@ -295,12 +294,12 @@ static inline void vx_load_pair_as(const double* ptr, v_float64& a, v_float64& b
|
||||
b = vx_load(ptr + v_float64::nlanes);
|
||||
}
|
||||
|
||||
//static inline void vx_load_pair_as(const float16_t* ptr, v_float64& a, v_float64& b)
|
||||
//{
|
||||
// v_float32 v0 = vx_load_expand(ptr);
|
||||
// a = v_cvt_f64(v0);
|
||||
// b = v_cvt_f64_high(v0);
|
||||
//}
|
||||
static inline void vx_load_pair_as(const float16_t* ptr, v_float64& a, v_float64& b)
|
||||
{
|
||||
v_float32 v0 = vx_load_expand(ptr);
|
||||
a = v_cvt_f64(v0);
|
||||
b = v_cvt_f64_high(v0);
|
||||
}
|
||||
|
||||
static inline void v_store_as(double* ptr, const v_float32& a)
|
||||
{
|
||||
@ -349,11 +348,11 @@ static inline void v_store_pair_as(float* ptr, const v_float64& a, const v_float
|
||||
v_store(ptr, v);
|
||||
}
|
||||
|
||||
//static inline void v_store_pair_as(float16_t* ptr, const v_float64& a, const v_float64& b)
|
||||
//{
|
||||
// v_float32 v = v_cvt_f32(a, b);
|
||||
// v_pack_store(ptr, v);
|
||||
//}
|
||||
static inline void v_store_pair_as(float16_t* ptr, const v_float64& a, const v_float64& b)
|
||||
{
|
||||
v_float32 v = v_cvt_f32(a, b);
|
||||
v_pack_store(ptr, v);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
|
@ -222,7 +222,7 @@ DEF_CVT_SCALE_FUNC(16s8u, cvt_32f, short, uchar, float)
|
||||
DEF_CVT_SCALE_FUNC(32s8u, cvt_32f, int, uchar, float)
|
||||
DEF_CVT_SCALE_FUNC(32f8u, cvt_32f, float, uchar, float)
|
||||
DEF_CVT_SCALE_FUNC(64f8u, cvt_32f, double, uchar, float)
|
||||
//DEF_CVT_SCALE_FUNC(16f8u, cvt_32f, float16_t, uchar, float)
|
||||
DEF_CVT_SCALE_FUNC(16f8u, cvt_32f, float16_t, uchar, float)
|
||||
|
||||
DEF_CVT_SCALE_FUNC(8u8s, cvt_32f, uchar, schar, float)
|
||||
DEF_CVT_SCALE_FUNC(8s, cvt_32f, schar, schar, float)
|
||||
@ -231,7 +231,7 @@ DEF_CVT_SCALE_FUNC(16s8s, cvt_32f, short, schar, float)
|
||||
DEF_CVT_SCALE_FUNC(32s8s, cvt_32f, int, schar, float)
|
||||
DEF_CVT_SCALE_FUNC(32f8s, cvt_32f, float, schar, float)
|
||||
DEF_CVT_SCALE_FUNC(64f8s, cvt_32f, double, schar, float)
|
||||
//DEF_CVT_SCALE_FUNC(16f8s, cvt_32f, float16_t, schar, float)
|
||||
DEF_CVT_SCALE_FUNC(16f8s, cvt_32f, float16_t, schar, float)
|
||||
|
||||
DEF_CVT_SCALE_FUNC(8u16u, cvt_32f, uchar, ushort, float)
|
||||
DEF_CVT_SCALE_FUNC(8s16u, cvt_32f, schar, ushort, float)
|
||||
@ -240,7 +240,7 @@ DEF_CVT_SCALE_FUNC(16s16u, cvt_32f, short, ushort, float)
|
||||
DEF_CVT_SCALE_FUNC(32s16u, cvt_32f, int, ushort, float)
|
||||
DEF_CVT_SCALE_FUNC(32f16u, cvt_32f, float, ushort, float)
|
||||
DEF_CVT_SCALE_FUNC(64f16u, cvt_32f, double, ushort, float)
|
||||
//DEF_CVT_SCALE_FUNC(16f16u, cvt1_32f, float16_t, ushort, float)
|
||||
DEF_CVT_SCALE_FUNC(16f16u, cvt1_32f, float16_t, ushort, float)
|
||||
|
||||
DEF_CVT_SCALE_FUNC(8u16s, cvt_32f, uchar, short, float)
|
||||
DEF_CVT_SCALE_FUNC(8s16s, cvt_32f, schar, short, float)
|
||||
@ -249,7 +249,7 @@ DEF_CVT_SCALE_FUNC(16s, cvt_32f, short, short, float)
|
||||
DEF_CVT_SCALE_FUNC(32s16s, cvt_32f, int, short, float)
|
||||
DEF_CVT_SCALE_FUNC(32f16s, cvt_32f, float, short, float)
|
||||
DEF_CVT_SCALE_FUNC(64f16s, cvt_32f, double, short, float)
|
||||
//DEF_CVT_SCALE_FUNC(16f16s, cvt1_32f, float16_t, short, float)
|
||||
DEF_CVT_SCALE_FUNC(16f16s, cvt1_32f, float16_t, short, float)
|
||||
|
||||
DEF_CVT_SCALE_FUNC(8u32s, cvt_32f, uchar, int, float)
|
||||
DEF_CVT_SCALE_FUNC(8s32s, cvt_32f, schar, int, float)
|
||||
@ -258,7 +258,7 @@ DEF_CVT_SCALE_FUNC(16s32s, cvt_32f, short, int, float)
|
||||
DEF_CVT_SCALE_FUNC(32s, cvt_64f, int, int, double)
|
||||
DEF_CVT_SCALE_FUNC(32f32s, cvt_32f, float, int, float)
|
||||
DEF_CVT_SCALE_FUNC(64f32s, cvt_64f, double, int, double)
|
||||
//DEF_CVT_SCALE_FUNC(16f32s, cvt1_32f, float16_t, int, float)
|
||||
DEF_CVT_SCALE_FUNC(16f32s, cvt1_32f, float16_t, int, float)
|
||||
|
||||
DEF_CVT_SCALE_FUNC(8u32f, cvt_32f, uchar, float, float)
|
||||
DEF_CVT_SCALE_FUNC(8s32f, cvt_32f, schar, float, float)
|
||||
@ -267,7 +267,7 @@ DEF_CVT_SCALE_FUNC(16s32f, cvt_32f, short, float, float)
|
||||
DEF_CVT_SCALE_FUNC(32s32f, cvt_32f, int, float, float)
|
||||
DEF_CVT_SCALE_FUNC(32f, cvt_32f, float, float, float)
|
||||
DEF_CVT_SCALE_FUNC(64f32f, cvt_64f, double, float, double)
|
||||
//DEF_CVT_SCALE_FUNC(16f32f, cvt1_32f, float16_t, float, float)
|
||||
DEF_CVT_SCALE_FUNC(16f32f, cvt1_32f, float16_t, float, float)
|
||||
|
||||
DEF_CVT_SCALE_FUNC(8u64f, cvt_64f, uchar, double, double)
|
||||
DEF_CVT_SCALE_FUNC(8s64f, cvt_64f, schar, double, double)
|
||||
@ -276,16 +276,16 @@ DEF_CVT_SCALE_FUNC(16s64f, cvt_64f, short, double, double)
|
||||
DEF_CVT_SCALE_FUNC(32s64f, cvt_64f, int, double, double)
|
||||
DEF_CVT_SCALE_FUNC(32f64f, cvt_64f, float, double, double)
|
||||
DEF_CVT_SCALE_FUNC(64f, cvt_64f, double, double, double)
|
||||
//DEF_CVT_SCALE_FUNC(16f64f, cvt_64f, float16_t, double, double)
|
||||
DEF_CVT_SCALE_FUNC(16f64f, cvt_64f, float16_t, double, double)
|
||||
|
||||
/*DEF_CVT_SCALE_FUNC(8u16f, cvt1_32f, uchar, float16_t, float)
|
||||
DEF_CVT_SCALE_FUNC(8u16f, cvt1_32f, uchar, float16_t, float)
|
||||
DEF_CVT_SCALE_FUNC(8s16f, cvt1_32f, schar, float16_t, float)
|
||||
DEF_CVT_SCALE_FUNC(16u16f, cvt1_32f, ushort, float16_t, float)
|
||||
DEF_CVT_SCALE_FUNC(16s16f, cvt1_32f, short, float16_t, float)
|
||||
DEF_CVT_SCALE_FUNC(32s16f, cvt1_32f, int, float16_t, float)
|
||||
DEF_CVT_SCALE_FUNC(32f16f, cvt1_32f, float, float16_t, float)
|
||||
DEF_CVT_SCALE_FUNC(64f16f, cvt_64f, double, float16_t, double)
|
||||
DEF_CVT_SCALE_FUNC(16f, cvt1_32f, float16_t, float16_t, float)*/
|
||||
DEF_CVT_SCALE_FUNC(16f, cvt1_32f, float16_t, float16_t, float)
|
||||
|
||||
static BinaryFunc getCvtScaleAbsFunc(int depth)
|
||||
{
|
||||
@ -306,43 +306,42 @@ BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
|
||||
{
|
||||
(BinaryFunc)GET_OPTIMIZED(cvtScale8u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8u),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvtScale16s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8u),
|
||||
(BinaryFunc)cvtScale64f8u, 0 //(BinaryFunc)cvtScale16f8u
|
||||
(BinaryFunc)cvtScale64f8u, (BinaryFunc)cvtScale16f8u
|
||||
},
|
||||
{
|
||||
(BinaryFunc)GET_OPTIMIZED(cvtScale8u8s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u8s),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvtScale16s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s8s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f8s),
|
||||
(BinaryFunc)cvtScale64f8s, 0 //(BinaryFunc)cvtScale16f8s
|
||||
(BinaryFunc)cvtScale64f8s, (BinaryFunc)cvtScale16f8s
|
||||
},
|
||||
{
|
||||
(BinaryFunc)GET_OPTIMIZED(cvtScale8u16u), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale16u),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvtScale16s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16u), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16u),
|
||||
(BinaryFunc)cvtScale64f16u, 0 //(BinaryFunc)cvtScale16f16u
|
||||
(BinaryFunc)cvtScale64f16u, (BinaryFunc)cvtScale16f16u
|
||||
},
|
||||
{
|
||||
(BinaryFunc)GET_OPTIMIZED(cvtScale8u16s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u16s),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvtScale16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s16s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f16s),
|
||||
(BinaryFunc)cvtScale64f16s, 0 //(BinaryFunc)cvtScale16f16s
|
||||
(BinaryFunc)cvtScale64f16s, (BinaryFunc)cvtScale16f16s
|
||||
},
|
||||
{
|
||||
(BinaryFunc)GET_OPTIMIZED(cvtScale8u32s), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32s),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvtScale16s32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32s), (BinaryFunc)GET_OPTIMIZED(cvtScale32f32s),
|
||||
(BinaryFunc)cvtScale64f32s, 0 //(BinaryFunc)cvtScale16f32s
|
||||
(BinaryFunc)cvtScale64f32s, (BinaryFunc)cvtScale16f32s
|
||||
},
|
||||
{
|
||||
(BinaryFunc)GET_OPTIMIZED(cvtScale8u32f), (BinaryFunc)GET_OPTIMIZED(cvtScale8s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale16u32f),
|
||||
(BinaryFunc)GET_OPTIMIZED(cvtScale16s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32s32f), (BinaryFunc)GET_OPTIMIZED(cvtScale32f),
|
||||
(BinaryFunc)cvtScale64f32f, 0 //(BinaryFunc)cvtScale16f32f
|
||||
(BinaryFunc)cvtScale64f32f, (BinaryFunc)cvtScale16f32f
|
||||
},
|
||||
{
|
||||
(BinaryFunc)cvtScale8u64f, (BinaryFunc)cvtScale8s64f, (BinaryFunc)cvtScale16u64f,
|
||||
(BinaryFunc)cvtScale16s64f, (BinaryFunc)cvtScale32s64f, (BinaryFunc)cvtScale32f64f,
|
||||
(BinaryFunc)cvtScale64f, 0 //(BinaryFunc)cvtScale16f64f
|
||||
(BinaryFunc)cvtScale64f, (BinaryFunc)cvtScale16f64f
|
||||
},
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0
|
||||
/*(BinaryFunc)cvtScale8u16f, (BinaryFunc)cvtScale8s16f, (BinaryFunc)cvtScale16u16f,
|
||||
(BinaryFunc)cvtScale8u16f, (BinaryFunc)cvtScale8s16f, (BinaryFunc)cvtScale16u16f,
|
||||
(BinaryFunc)cvtScale16s16f, (BinaryFunc)cvtScale32s16f, (BinaryFunc)cvtScale32f16f,
|
||||
(BinaryFunc)cvtScale64f16f, (BinaryFunc)cvtScale16f*/
|
||||
(BinaryFunc)cvtScale64f16f, (BinaryFunc)cvtScale16f
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -216,8 +216,10 @@ static MergeFunc getMergeFunc(int depth)
|
||||
{
|
||||
static MergeFunc mergeTab[] =
|
||||
{
|
||||
(MergeFunc)GET_OPTIMIZED(cv::hal::merge8u), (MergeFunc)GET_OPTIMIZED(cv::hal::merge8u), (MergeFunc)GET_OPTIMIZED(cv::hal::merge16u), (MergeFunc)GET_OPTIMIZED(cv::hal::merge16u),
|
||||
(MergeFunc)GET_OPTIMIZED(cv::hal::merge32s), (MergeFunc)GET_OPTIMIZED(cv::hal::merge32s), (MergeFunc)GET_OPTIMIZED(cv::hal::merge64s), 0
|
||||
(MergeFunc)GET_OPTIMIZED(cv::hal::merge8u), (MergeFunc)GET_OPTIMIZED(cv::hal::merge8u),
|
||||
(MergeFunc)GET_OPTIMIZED(cv::hal::merge16u), (MergeFunc)GET_OPTIMIZED(cv::hal::merge16u),
|
||||
(MergeFunc)GET_OPTIMIZED(cv::hal::merge32s), (MergeFunc)GET_OPTIMIZED(cv::hal::merge32s),
|
||||
(MergeFunc)GET_OPTIMIZED(cv::hal::merge64s), (MergeFunc)GET_OPTIMIZED(cv::hal::merge16u)
|
||||
};
|
||||
|
||||
return mergeTab[depth];
|
||||
|
@ -723,7 +723,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
|
||||
return result;
|
||||
}
|
||||
|
||||
NormFunc func = getNormFunc(normType >> 1, depth);
|
||||
NormFunc func = getNormFunc(normType >> 1, depth == CV_16F ? CV_32F : depth);
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src, &mask, 0};
|
||||
@ -737,19 +737,31 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
|
||||
result;
|
||||
result.d = 0;
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
|
||||
bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
|
||||
int j, total = (int)it.size, blockSize = total;
|
||||
bool blockSum = depth == CV_16F || (normType == NORM_L1 && depth <= CV_16S) ||
|
||||
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
|
||||
int isum = 0;
|
||||
int *ibuf = &result.i;
|
||||
AutoBuffer<float> fltbuf_;
|
||||
float* fltbuf = 0;
|
||||
size_t esz = 0;
|
||||
|
||||
if( blockSum )
|
||||
{
|
||||
intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
|
||||
blockSize = std::min(blockSize, intSumBlockSize);
|
||||
ibuf = &isum;
|
||||
esz = src.elemSize();
|
||||
|
||||
if( depth == CV_16F )
|
||||
{
|
||||
blockSize = std::min(blockSize, 1024);
|
||||
fltbuf_.allocate(blockSize);
|
||||
fltbuf = fltbuf_.data();
|
||||
}
|
||||
else
|
||||
{
|
||||
int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
|
||||
blockSize = std::min(blockSize, intSumBlockSize);
|
||||
ibuf = &isum;
|
||||
}
|
||||
}
|
||||
|
||||
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
||||
@ -757,13 +769,17 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
|
||||
for( j = 0; j < total; j += blockSize )
|
||||
{
|
||||
int bsz = std::min(total - j, blockSize);
|
||||
func( ptrs[0], ptrs[1], (uchar*)ibuf, bsz, cn );
|
||||
count += bsz;
|
||||
if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
|
||||
const uchar* data = ptrs[0];
|
||||
if( depth == CV_16F )
|
||||
{
|
||||
hal::cvt16f32f((const float16_t*)ptrs[0], fltbuf, bsz);
|
||||
data = (const uchar*)fltbuf;
|
||||
}
|
||||
func( data, ptrs[1], (uchar*)ibuf, bsz, cn );
|
||||
if( blockSum && depth != CV_16F )
|
||||
{
|
||||
result.d += isum;
|
||||
isum = 0;
|
||||
count = 0;
|
||||
}
|
||||
ptrs[0] += bsz*esz;
|
||||
if( ptrs[1] )
|
||||
@ -1181,7 +1197,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
|
||||
return result;
|
||||
}
|
||||
|
||||
NormDiffFunc func = getNormDiffFunc(normType >> 1, depth);
|
||||
NormDiffFunc func = getNormDiffFunc(normType >> 1, depth == CV_16F ? CV_32F : depth);
|
||||
CV_Assert( func != 0 );
|
||||
|
||||
const Mat* arrays[] = {&src1, &src2, &mask, 0};
|
||||
@ -1196,19 +1212,31 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
|
||||
result;
|
||||
result.d = 0;
|
||||
NAryMatIterator it(arrays, ptrs);
|
||||
int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
|
||||
bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
|
||||
int j, total = (int)it.size, blockSize = total;
|
||||
bool blockSum = depth == CV_16F || (normType == NORM_L1 && depth <= CV_16S) ||
|
||||
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
|
||||
unsigned isum = 0;
|
||||
unsigned *ibuf = &result.u;
|
||||
AutoBuffer<float> fltbuf_;
|
||||
float* fltbuf = 0;
|
||||
size_t esz = 0;
|
||||
|
||||
if( blockSum )
|
||||
{
|
||||
intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
|
||||
blockSize = std::min(blockSize, intSumBlockSize);
|
||||
ibuf = &isum;
|
||||
esz = src1.elemSize();
|
||||
|
||||
if( depth == CV_16F )
|
||||
{
|
||||
blockSize = std::min(blockSize, 1024);
|
||||
fltbuf_.allocate(blockSize*2);
|
||||
fltbuf = fltbuf_.data();
|
||||
}
|
||||
else
|
||||
{
|
||||
int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
|
||||
blockSize = std::min(blockSize, intSumBlockSize);
|
||||
ibuf = &isum;
|
||||
}
|
||||
}
|
||||
|
||||
for( size_t i = 0; i < it.nplanes; i++, ++it )
|
||||
@ -1216,13 +1244,19 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
|
||||
for( j = 0; j < total; j += blockSize )
|
||||
{
|
||||
int bsz = std::min(total - j, blockSize);
|
||||
func( ptrs[0], ptrs[1], ptrs[2], (uchar*)ibuf, bsz, cn );
|
||||
count += bsz;
|
||||
if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
|
||||
const uchar *data0 = ptrs[0], *data1 = ptrs[1];
|
||||
if( depth == CV_16F )
|
||||
{
|
||||
hal::cvt16f32f((const float16_t*)ptrs[0], fltbuf, bsz);
|
||||
hal::cvt16f32f((const float16_t*)ptrs[1], fltbuf + bsz, bsz);
|
||||
data0 = (const uchar*)fltbuf;
|
||||
data1 = (const uchar*)(fltbuf + bsz);
|
||||
}
|
||||
func( data0, data1, ptrs[2], (uchar*)ibuf, bsz, cn );
|
||||
if( blockSum && depth != CV_16F )
|
||||
{
|
||||
result.d += isum;
|
||||
isum = 0;
|
||||
count = 0;
|
||||
}
|
||||
ptrs[0] += bsz*esz;
|
||||
ptrs[1] += bsz*esz;
|
||||
|
@ -77,6 +77,7 @@ namespace cv
|
||||
void valueToStr32s() { sprintf(buf, "%d", mtx.ptr<int>(row, col)[cn]); }
|
||||
void valueToStr32f() { sprintf(buf, floatFormat, mtx.ptr<float>(row, col)[cn]); }
|
||||
void valueToStr64f() { sprintf(buf, floatFormat, mtx.ptr<double>(row, col)[cn]); }
|
||||
void valueToStr16f() { sprintf(buf, floatFormat, (float)mtx.ptr<float16_t>(row, col)[cn]); }
|
||||
void valueToStrOther() { buf[0] = 0; }
|
||||
|
||||
public:
|
||||
@ -115,7 +116,8 @@ namespace cv
|
||||
case CV_32S: valueToStr = &FormattedImpl::valueToStr32s; break;
|
||||
case CV_32F: valueToStr = &FormattedImpl::valueToStr32f; break;
|
||||
case CV_64F: valueToStr = &FormattedImpl::valueToStr64f; break;
|
||||
default: valueToStr = &FormattedImpl::valueToStrOther; break;
|
||||
default: CV_Assert(mtx.depth() == CV_16F);
|
||||
valueToStr = &FormattedImpl::valueToStr16f;
|
||||
}
|
||||
}
|
||||
|
||||
@ -256,7 +258,12 @@ namespace cv
|
||||
class FormatterBase : public Formatter
|
||||
{
|
||||
public:
|
||||
FormatterBase() : prec32f(8), prec64f(16), multiline(true) {}
|
||||
FormatterBase() : prec16f(4), prec32f(8), prec64f(16), multiline(true) {}
|
||||
|
||||
void set16fPrecision(int p) CV_OVERRIDE
|
||||
{
|
||||
prec16f = p;
|
||||
}
|
||||
|
||||
void set32fPrecision(int p) CV_OVERRIDE
|
||||
{
|
||||
@ -274,6 +281,7 @@ namespace cv
|
||||
}
|
||||
|
||||
protected:
|
||||
int prec16f;
|
||||
int prec32f;
|
||||
int prec64f;
|
||||
int multiline;
|
||||
@ -325,7 +333,7 @@ namespace cv
|
||||
{
|
||||
static const char* numpyTypes[] =
|
||||
{
|
||||
"uint8", "int8", "uint16", "int16", "int32", "float32", "float64", "uint64"
|
||||
"uint8", "int8", "uint16", "int16", "int32", "float32", "float64", "float16"
|
||||
};
|
||||
char braces[5] = {'[', ']', ',', '[', ']'};
|
||||
if (mtx.cols == 1)
|
||||
|
@ -48,18 +48,6 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#if defined _WIN32 || defined WINCE
|
||||
#include <windows.h>
|
||||
#undef small
|
||||
#undef min
|
||||
#undef max
|
||||
#undef abs
|
||||
#endif
|
||||
|
||||
#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
|
||||
#include "emmintrin.h"
|
||||
#endif
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
@ -74,12 +62,6 @@ namespace cv
|
||||
|
||||
#define RNG_NEXT(x) ((uint64)(unsigned)(x)*CV_RNG_COEFF + ((x) >> 32))
|
||||
|
||||
#ifdef __PPC64__
|
||||
#define PPC_MUL_ADD(ret, tmp, p0, p1) \
|
||||
asm volatile("fmuls %0,%1,%2\n\t fadds %0,%0,%3" : "=&f" (ret) \
|
||||
: "f" (tmp), "f" (p0), "f" (p1))
|
||||
#endif
|
||||
|
||||
/***************************************************************************************\
|
||||
* Pseudo-Random Number Generators (PRNGs) *
|
||||
\***************************************************************************************/
|
||||
@ -154,59 +136,26 @@ template<typename T> static void
|
||||
randi_( T* arr, int len, uint64* state, const DivStruct* p )
|
||||
{
|
||||
uint64 temp = *state;
|
||||
int i = 0;
|
||||
unsigned t0, t1, v0, v1;
|
||||
|
||||
for( i = 0; i <= len - 4; i += 4 )
|
||||
for( int i = 0; i < len; i++ )
|
||||
{
|
||||
temp = RNG_NEXT(temp);
|
||||
t0 = (unsigned)temp;
|
||||
temp = RNG_NEXT(temp);
|
||||
t1 = (unsigned)temp;
|
||||
v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
|
||||
v1 = (unsigned)(((uint64)t1 * p[i+1].M) >> 32);
|
||||
v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
|
||||
v1 = (v1 + ((t1 - v1) >> p[i+1].sh1)) >> p[i+1].sh2;
|
||||
v0 = t0 - v0*p[i].d + p[i].delta;
|
||||
v1 = t1 - v1*p[i+1].d + p[i+1].delta;
|
||||
arr[i] = saturate_cast<T>((int)v0);
|
||||
arr[i+1] = saturate_cast<T>((int)v1);
|
||||
|
||||
temp = RNG_NEXT(temp);
|
||||
t0 = (unsigned)temp;
|
||||
temp = RNG_NEXT(temp);
|
||||
t1 = (unsigned)temp;
|
||||
v0 = (unsigned)(((uint64)t0 * p[i+2].M) >> 32);
|
||||
v1 = (unsigned)(((uint64)t1 * p[i+3].M) >> 32);
|
||||
v0 = (v0 + ((t0 - v0) >> p[i+2].sh1)) >> p[i+2].sh2;
|
||||
v1 = (v1 + ((t1 - v1) >> p[i+3].sh1)) >> p[i+3].sh2;
|
||||
v0 = t0 - v0*p[i+2].d + p[i+2].delta;
|
||||
v1 = t1 - v1*p[i+3].d + p[i+3].delta;
|
||||
arr[i+2] = saturate_cast<T>((int)v0);
|
||||
arr[i+3] = saturate_cast<T>((int)v1);
|
||||
unsigned t = (unsigned)temp;
|
||||
unsigned v = (unsigned)(((uint64)t * p[i].M) >> 32);
|
||||
v = (v + ((t - v) >> p[i].sh1)) >> p[i].sh2;
|
||||
v = t - v*p[i].d + p[i].delta;
|
||||
arr[i] = saturate_cast<T>((int)v);
|
||||
}
|
||||
|
||||
for( ; i < len; i++ )
|
||||
{
|
||||
temp = RNG_NEXT(temp);
|
||||
t0 = (unsigned)temp;
|
||||
v0 = (unsigned)(((uint64)t0 * p[i].M) >> 32);
|
||||
v0 = (v0 + ((t0 - v0) >> p[i].sh1)) >> p[i].sh2;
|
||||
v0 = t0 - v0*p[i].d + p[i].delta;
|
||||
arr[i] = saturate_cast<T>((int)v0);
|
||||
}
|
||||
|
||||
*state = temp;
|
||||
}
|
||||
|
||||
|
||||
#define DEF_RANDI_FUNC(suffix, type) \
|
||||
static void randBits_##suffix(type* arr, int len, uint64* state, \
|
||||
const Vec2i* p, bool small_flag) \
|
||||
const Vec2i* p, void*, bool small_flag) \
|
||||
{ randBits_(arr, len, state, p, small_flag); } \
|
||||
\
|
||||
static void randi_##suffix(type* arr, int len, uint64* state, \
|
||||
const DivStruct* p, bool ) \
|
||||
const DivStruct* p, void*, bool ) \
|
||||
{ randi_(arr, len, state, p); }
|
||||
|
||||
DEF_RANDI_FUNC(8u, uchar)
|
||||
@ -215,131 +164,62 @@ DEF_RANDI_FUNC(16u, ushort)
|
||||
DEF_RANDI_FUNC(16s, short)
|
||||
DEF_RANDI_FUNC(32s, int)
|
||||
|
||||
static void randf_32f( float* arr, int len, uint64* state, const Vec2f* p, bool )
|
||||
static void randf_32f( float* arr, int len, uint64* state, const Vec2f* p, void*, bool )
|
||||
{
|
||||
uint64 temp = *state;
|
||||
int i = 0;
|
||||
|
||||
for( ; i <= len - 4; i += 4 )
|
||||
for( int i = 0; i < len; i++ )
|
||||
{
|
||||
float f[4];
|
||||
f[0] = (float)(int)(temp = RNG_NEXT(temp));
|
||||
f[1] = (float)(int)(temp = RNG_NEXT(temp));
|
||||
f[2] = (float)(int)(temp = RNG_NEXT(temp));
|
||||
f[3] = (float)(int)(temp = RNG_NEXT(temp));
|
||||
|
||||
// handwritten SSE is required not for performance but for numerical stability!
|
||||
// both 32-bit gcc and MSVC compilers trend to generate double precision SSE
|
||||
// while 64-bit compilers generate single precision SIMD instructions
|
||||
// so manual vectorisation forces all compilers to the single precision
|
||||
#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
|
||||
__m128 q0 = _mm_loadu_ps((const float*)(p + i));
|
||||
__m128 q1 = _mm_loadu_ps((const float*)(p + i + 2));
|
||||
|
||||
__m128 q01l = _mm_unpacklo_ps(q0, q1);
|
||||
__m128 q01h = _mm_unpackhi_ps(q0, q1);
|
||||
|
||||
__m128 p0 = _mm_unpacklo_ps(q01l, q01h);
|
||||
__m128 p1 = _mm_unpackhi_ps(q01l, q01h);
|
||||
|
||||
_mm_storeu_ps(arr + i, _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(f), p0), p1));
|
||||
#elif defined __ARM_NEON && defined __aarch64__
|
||||
// handwritten NEON is required not for performance but for numerical stability!
|
||||
// 64bit gcc tends to use fmadd instead of separate multiply and add
|
||||
// use volatile to ensure to separate the multiply and add
|
||||
float32x4x2_t q = vld2q_f32((const float*)(p + i));
|
||||
|
||||
float32x4_t p0 = q.val[0];
|
||||
float32x4_t p1 = q.val[1];
|
||||
|
||||
volatile float32x4_t v0 = vmulq_f32(vld1q_f32(f), p0);
|
||||
vst1q_f32(arr+i, vaddq_f32(v0, p1));
|
||||
#elif defined __PPC64__
|
||||
// inline asm is required for numerical stability!
|
||||
// compilers tends to use floating multiply-add single(fmadds)
|
||||
// instead of separate multiply and add
|
||||
PPC_MUL_ADD(arr[i+0], f[0], p[i+0][0], p[i+0][1]);
|
||||
PPC_MUL_ADD(arr[i+1], f[1], p[i+1][0], p[i+1][1]);
|
||||
PPC_MUL_ADD(arr[i+2], f[2], p[i+2][0], p[i+2][1]);
|
||||
PPC_MUL_ADD(arr[i+3], f[3], p[i+3][0], p[i+3][1]);
|
||||
#else
|
||||
arr[i+0] = f[0]*p[i+0][0] + p[i+0][1];
|
||||
arr[i+1] = f[1]*p[i+1][0] + p[i+1][1];
|
||||
arr[i+2] = f[2]*p[i+2][0] + p[i+2][1];
|
||||
arr[i+3] = f[3]*p[i+3][0] + p[i+3][1];
|
||||
#endif
|
||||
int t = (int)(temp = RNG_NEXT(temp));
|
||||
arr[i] = (float)(t*p[i][0]);
|
||||
}
|
||||
|
||||
for( ; i < len; i++ )
|
||||
{
|
||||
temp = RNG_NEXT(temp);
|
||||
#if defined __SSE2__ || (defined _M_IX86_FP && 2 == _M_IX86_FP)
|
||||
_mm_store_ss(arr + i, _mm_add_ss(
|
||||
_mm_mul_ss(_mm_set_ss((float)(int)temp), _mm_set_ss(p[i][0])),
|
||||
_mm_set_ss(p[i][1]))
|
||||
);
|
||||
#elif defined __ARM_NEON && defined __aarch64__
|
||||
float32x2_t t = vadd_f32(vmul_f32(
|
||||
vdup_n_f32((float)(int)temp), vdup_n_f32(p[i][0])),
|
||||
vdup_n_f32(p[i][1]));
|
||||
arr[i] = vget_lane_f32(t, 0);
|
||||
#elif defined __PPC64__
|
||||
PPC_MUL_ADD(arr[i], (float)(int)temp, p[i][0], p[i][1]);
|
||||
#else
|
||||
arr[i] = (int)temp*p[i][0] + p[i][1];
|
||||
#endif
|
||||
}
|
||||
|
||||
*state = temp;
|
||||
}
|
||||
|
||||
// add bias separately to make the generated random numbers
|
||||
// more deterministic, independent of
|
||||
// architecture details (FMA instruction use etc.)
|
||||
hal::addRNGBias32f(arr, &p[0][0], len);
|
||||
}
|
||||
|
||||
static void
|
||||
randf_64f( double* arr, int len, uint64* state, const Vec2d* p, bool )
|
||||
randf_64f( double* arr, int len, uint64* state, const Vec2d* p, void*, bool )
|
||||
{
|
||||
uint64 temp = *state;
|
||||
int64 v = 0;
|
||||
int i;
|
||||
|
||||
for( i = 0; i <= len - 4; i += 4 )
|
||||
{
|
||||
double f0, f1;
|
||||
|
||||
temp = RNG_NEXT(temp);
|
||||
v = (temp >> 32)|(temp << 32);
|
||||
f0 = v*p[i][0] + p[i][1];
|
||||
temp = RNG_NEXT(temp);
|
||||
v = (temp >> 32)|(temp << 32);
|
||||
f1 = v*p[i+1][0] + p[i+1][1];
|
||||
arr[i] = f0; arr[i+1] = f1;
|
||||
|
||||
temp = RNG_NEXT(temp);
|
||||
v = (temp >> 32)|(temp << 32);
|
||||
f0 = v*p[i+2][0] + p[i+2][1];
|
||||
temp = RNG_NEXT(temp);
|
||||
v = (temp >> 32)|(temp << 32);
|
||||
f1 = v*p[i+3][0] + p[i+3][1];
|
||||
arr[i+2] = f0; arr[i+3] = f1;
|
||||
}
|
||||
|
||||
for( ; i < len; i++ )
|
||||
for( int i = 0; i < len; i++ )
|
||||
{
|
||||
temp = RNG_NEXT(temp);
|
||||
v = (temp >> 32)|(temp << 32);
|
||||
arr[i] = v*p[i][0] + p[i][1];
|
||||
int64 v = (temp >> 32)|(temp << 32);
|
||||
arr[i] = v*p[i][0];
|
||||
}
|
||||
|
||||
*state = temp;
|
||||
|
||||
hal::addRNGBias64f(arr, &p[0][0], len);
|
||||
}
|
||||
|
||||
typedef void (*RandFunc)(uchar* arr, int len, uint64* state, const void* p, bool small_flag);
|
||||
static void randf_16f( float16_t* arr, int len, uint64* state, const Vec2f* p, float* fbuf, bool )
|
||||
{
|
||||
uint64 temp = *state;
|
||||
for( int i = 0; i < len; i++ )
|
||||
{
|
||||
float f = (float)(int)(temp = RNG_NEXT(temp));
|
||||
fbuf[i] = f*p[i][0];
|
||||
}
|
||||
*state = temp;
|
||||
|
||||
// add bias separately to make the generated random numbers
|
||||
// more deterministic, independent of
|
||||
// architecture details (FMA instruction use etc.)
|
||||
hal::addRNGBias32f(fbuf, &p[0][0], len);
|
||||
hal::cvt32f16f(fbuf, arr, len);
|
||||
}
|
||||
|
||||
typedef void (*RandFunc)(uchar* arr, int len, uint64* state, const void* p, void* tempbuf, bool small_flag);
|
||||
|
||||
|
||||
static RandFunc randTab[][8] =
|
||||
{
|
||||
{
|
||||
(RandFunc)randi_8u, (RandFunc)randi_8s, (RandFunc)randi_16u, (RandFunc)randi_16s,
|
||||
(RandFunc)randi_32s, (RandFunc)randf_32f, (RandFunc)randf_64f, 0
|
||||
(RandFunc)randi_32s, (RandFunc)randf_32f, (RandFunc)randf_64f, (RandFunc)randf_16f
|
||||
},
|
||||
{
|
||||
(RandFunc)randBits_8u, (RandFunc)randBits_8s, (RandFunc)randBits_16u, (RandFunc)randBits_16s,
|
||||
@ -350,7 +230,7 @@ static RandFunc randTab[][8] =
|
||||
/*
|
||||
The code below implements the algorithm described in
|
||||
"The Ziggurat Method for Generating Random Variables"
|
||||
by Marsaglia and Tsang, Journal of Statistical Software.
|
||||
by George Marsaglia and Wai Wan Tsang, Journal of Statistical Software, 2007.
|
||||
*/
|
||||
static void
|
||||
randn_0_1_32f( float* arr, int len, uint64* state )
|
||||
@ -631,8 +511,8 @@ void RNG::fill( InputOutputArray _mat, int disttype,
|
||||
// for each channel i compute such dparam[0][i] & dparam[1][i],
|
||||
// so that a signed 32/64-bit integer X is transformed to
|
||||
// the range [param1.val[i], param2.val[i]) using
|
||||
// dparam[1][i]*X + dparam[0][i]
|
||||
if( depth == CV_32F )
|
||||
// dparam[0][i]*X + dparam[1][i]
|
||||
if( depth != CV_64F )
|
||||
{
|
||||
fp = (Vec2f*)(parambuf + cn*2);
|
||||
for( j = 0; j < cn; j++ )
|
||||
@ -704,6 +584,7 @@ void RNG::fill( InputOutputArray _mat, int disttype,
|
||||
AutoBuffer<double> buf;
|
||||
uchar* param = 0;
|
||||
float* nbuf = 0;
|
||||
float* tmpbuf = 0;
|
||||
|
||||
if( disttype == UNIFORM )
|
||||
{
|
||||
@ -727,12 +608,14 @@ void RNG::fill( InputOutputArray _mat, int disttype,
|
||||
p[j + k] = ip[k];
|
||||
}
|
||||
}
|
||||
else if( depth == CV_32F )
|
||||
else if( depth != CV_64F )
|
||||
{
|
||||
Vec2f* p = (Vec2f*)param;
|
||||
for( j = 0; j < blockSize*cn; j += cn )
|
||||
for( k = 0; k < cn; k++ )
|
||||
p[j + k] = fp[k];
|
||||
if( depth == CV_16F )
|
||||
tmpbuf = (float*)p + blockSize*cn*2;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -755,7 +638,7 @@ void RNG::fill( InputOutputArray _mat, int disttype,
|
||||
int len = std::min(total - j, blockSize);
|
||||
|
||||
if( disttype == CV_RAND_UNI )
|
||||
func( ptr, len*cn, &state, param, smallFlag );
|
||||
func( ptr, len*cn, &state, param, tmpbuf, smallFlag );
|
||||
else
|
||||
{
|
||||
randn_0_1_32f(nbuf, len*cn, &state);
|
||||
|
@ -224,8 +224,10 @@ static SplitFunc getSplitFunc(int depth)
|
||||
{
|
||||
static SplitFunc splitTab[] =
|
||||
{
|
||||
(SplitFunc)GET_OPTIMIZED(cv::hal::split8u), (SplitFunc)GET_OPTIMIZED(cv::hal::split8u), (SplitFunc)GET_OPTIMIZED(cv::hal::split16u), (SplitFunc)GET_OPTIMIZED(cv::hal::split16u),
|
||||
(SplitFunc)GET_OPTIMIZED(cv::hal::split32s), (SplitFunc)GET_OPTIMIZED(cv::hal::split32s), (SplitFunc)GET_OPTIMIZED(cv::hal::split64s), 0
|
||||
(SplitFunc)GET_OPTIMIZED(cv::hal::split8u), (SplitFunc)GET_OPTIMIZED(cv::hal::split8u),
|
||||
(SplitFunc)GET_OPTIMIZED(cv::hal::split16u), (SplitFunc)GET_OPTIMIZED(cv::hal::split16u),
|
||||
(SplitFunc)GET_OPTIMIZED(cv::hal::split32s), (SplitFunc)GET_OPTIMIZED(cv::hal::split32s),
|
||||
(SplitFunc)GET_OPTIMIZED(cv::hal::split64s), (SplitFunc)GET_OPTIMIZED(cv::hal::split16u)
|
||||
};
|
||||
|
||||
return splitTab[depth];
|
||||
|
@ -78,7 +78,7 @@ OCL_TEST_P(UMatExpr, Ones)
|
||||
|
||||
//////////////////////////////// Instantiation /////////////////////////////////////////////////
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(MatrixOperation, UMatExpr, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(MatrixOperation, UMatExpr, Combine(OCL_ALL_DEPTHS_16F, OCL_ALL_CHANNELS));
|
||||
|
||||
} } // namespace opencv_test::ocl
|
||||
|
||||
|
@ -476,7 +476,7 @@ struct CopyOp : public BaseElemWiseOp
|
||||
}
|
||||
int getRandomType(RNG& rng)
|
||||
{
|
||||
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL, 1, ARITHM_MAX_CHANNELS);
|
||||
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_16F, 1, ARITHM_MAX_CHANNELS);
|
||||
}
|
||||
double getMaxErr(int)
|
||||
{
|
||||
@ -498,7 +498,7 @@ struct SetOp : public BaseElemWiseOp
|
||||
}
|
||||
int getRandomType(RNG& rng)
|
||||
{
|
||||
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL, 1, ARITHM_MAX_CHANNELS);
|
||||
return cvtest::randomType(rng, _OutputArray::DEPTH_MASK_ALL_16F, 1, ARITHM_MAX_CHANNELS);
|
||||
}
|
||||
double getMaxErr(int)
|
||||
{
|
||||
|
@ -372,6 +372,7 @@ IMPLEMENT_PARAM_CLASS(Channels, int)
|
||||
#define OCL_ON(...) cv::ocl::setUseOpenCL(true); __VA_ARGS__ ;
|
||||
|
||||
#define OCL_ALL_DEPTHS Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F)
|
||||
#define OCL_ALL_DEPTHS_16F Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F, CV_16F)
|
||||
#define OCL_ALL_CHANNELS Values(1, 2, 3, 4)
|
||||
|
||||
CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA, INTER_LINEAR_EXACT)
|
||||
|
@ -160,7 +160,7 @@ private:
|
||||
}; \
|
||||
static inline void PrintTo(const class_name& t, std::ostream* os) { t.PrintTo(os); } }
|
||||
|
||||
CV_ENUM(MatDepth, CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F, CV_USRTYPE1)
|
||||
CV_ENUM(MatDepth, CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F, CV_16F)
|
||||
|
||||
/*****************************************************************************************\
|
||||
* Regression control utility for performance testing *
|
||||
|
@ -72,10 +72,10 @@ int randomType(RNG& rng, int typeMask, int minChannels, int maxChannels)
|
||||
{
|
||||
int channels = rng.uniform(minChannels, maxChannels+1);
|
||||
int depth = 0;
|
||||
CV_Assert((typeMask & _OutputArray::DEPTH_MASK_ALL) != 0);
|
||||
CV_Assert((typeMask & _OutputArray::DEPTH_MASK_ALL_16F) != 0);
|
||||
for(;;)
|
||||
{
|
||||
depth = rng.uniform(CV_8U, CV_64F+1);
|
||||
depth = rng.uniform(CV_8U, CV_16F+1);
|
||||
if( ((1 << depth) & typeMask) != 0 )
|
||||
break;
|
||||
}
|
||||
@ -1260,6 +1260,13 @@ norm_(const _Tp* src1, const _Tp* src2, size_t total, int cn, int normType, doub
|
||||
double norm(InputArray _src, int normType, InputArray _mask)
|
||||
{
|
||||
Mat src = _src.getMat(), mask = _mask.getMat();
|
||||
if( src.depth() == CV_16F )
|
||||
{
|
||||
Mat src32f;
|
||||
src.convertTo(src32f, CV_32F);
|
||||
return cvtest::norm(src32f, normType, _mask);
|
||||
}
|
||||
|
||||
if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
|
||||
{
|
||||
if( !mask.empty() )
|
||||
@ -1340,6 +1347,14 @@ double norm(InputArray _src, int normType, InputArray _mask)
|
||||
double norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask)
|
||||
{
|
||||
Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
|
||||
if( src1.depth() == CV_16F )
|
||||
{
|
||||
Mat src1_32f, src2_32f;
|
||||
src1.convertTo(src1_32f, CV_32F);
|
||||
src2.convertTo(src2_32f, CV_32F);
|
||||
return cvtest::norm(src1_32f, src2_32f, normType, _mask);
|
||||
}
|
||||
|
||||
bool isRelative = (normType & NORM_RELATIVE) != 0;
|
||||
normType &= ~NORM_RELATIVE;
|
||||
|
||||
@ -1982,11 +1997,20 @@ int check( const Mat& a, double fmin, double fmax, vector<int>* _idx )
|
||||
// success_err_level is maximum allowed difference, idx is the index of the first
|
||||
// element for which difference is >success_err_level
|
||||
// (or index of element with the maximum difference)
|
||||
int cmpEps( const Mat& arr, const Mat& refarr, double* _realmaxdiff,
|
||||
int cmpEps( const Mat& arr_, const Mat& refarr_, double* _realmaxdiff,
|
||||
double success_err_level, vector<int>* _idx,
|
||||
bool element_wise_relative_error )
|
||||
{
|
||||
Mat arr = arr_, refarr = refarr_;
|
||||
CV_Assert( arr.type() == refarr.type() && arr.size == refarr.size );
|
||||
if( arr.depth() == CV_16F )
|
||||
{
|
||||
Mat arr32f, refarr32f;
|
||||
arr.convertTo(arr32f, CV_32F);
|
||||
refarr.convertTo(refarr32f, CV_32F);
|
||||
arr = arr32f;
|
||||
refarr = refarr32f;
|
||||
}
|
||||
|
||||
int ilevel = refarr.depth() <= CV_32S ? cvFloor(success_err_level) : 0;
|
||||
int result = CMP_EPS_OK;
|
||||
|
@ -594,11 +594,11 @@ Regression& Regression::operator() (const std::string& name, cv::InputArray arra
|
||||
// exit if current test is already failed
|
||||
if(::testing::UnitTest::GetInstance()->current_test_info()->result()->Failed()) return *this;
|
||||
|
||||
if(!array.empty() && array.depth() == CV_USRTYPE1)
|
||||
/*if(!array.empty() && array.depth() == CV_USRTYPE1)
|
||||
{
|
||||
ADD_FAILURE() << " Can not check regression for CV_USRTYPE1 data type for " << name;
|
||||
return *this;
|
||||
}
|
||||
}*/
|
||||
|
||||
std::string nodename = getCurrentTestNodeName();
|
||||
|
||||
@ -2207,7 +2207,7 @@ void PrintTo(const MatType& t, ::std::ostream* os)
|
||||
case CV_32S: *os << "32S"; break;
|
||||
case CV_32F: *os << "32F"; break;
|
||||
case CV_64F: *os << "64F"; break;
|
||||
case CV_USRTYPE1: *os << "USRTYPE1"; break;
|
||||
case CV_USRTYPE1: *os << "16F"; break;
|
||||
default: *os << "INVALID_TYPE"; break;
|
||||
}
|
||||
*os << 'C' << CV_MAT_CN((int)t);
|
||||
|
Loading…
Reference in New Issue
Block a user