mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 14:13:15 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
85ade61ef7
@ -3559,6 +3559,10 @@ CV_EXPORTS MatExpr operator + (const Mat& m, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator + (const MatExpr& e, const Scalar& s);
|
||||
CV_EXPORTS MatExpr operator + (const Scalar& s, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator + (const MatExpr& e1, const MatExpr& e2);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator + (const Mat& a, const Matx<_Tp, m, n>& b) { return a + Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator + (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) + b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator - (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator - (const Mat& a, const Scalar& s);
|
||||
@ -3568,6 +3572,10 @@ CV_EXPORTS MatExpr operator - (const Mat& m, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator - (const MatExpr& e, const Scalar& s);
|
||||
CV_EXPORTS MatExpr operator - (const Scalar& s, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator - (const MatExpr& e1, const MatExpr& e2);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator - (const Mat& a, const Matx<_Tp, m, n>& b) { return a - Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator - (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) - b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator - (const Mat& m);
|
||||
CV_EXPORTS MatExpr operator - (const MatExpr& e);
|
||||
@ -3580,6 +3588,10 @@ CV_EXPORTS MatExpr operator * (const Mat& m, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator * (const MatExpr& e, double s);
|
||||
CV_EXPORTS MatExpr operator * (double s, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator * (const MatExpr& e1, const MatExpr& e2);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator * (const Mat& a, const Matx<_Tp, m, n>& b) { return a + Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator * (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) + b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator / (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator / (const Mat& a, double s);
|
||||
@ -3589,52 +3601,100 @@ CV_EXPORTS MatExpr operator / (const Mat& m, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator / (const MatExpr& e, double s);
|
||||
CV_EXPORTS MatExpr operator / (double s, const MatExpr& e);
|
||||
CV_EXPORTS MatExpr operator / (const MatExpr& e1, const MatExpr& e2);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator / (const Mat& a, const Matx<_Tp, m, n>& b) { return a / Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator / (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) / b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator < (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator < (const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr operator < (double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator < (const Mat& a, const Matx<_Tp, m, n>& b) { return a < Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator < (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) < b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator <= (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator <= (const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr operator <= (double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator <= (const Mat& a, const Matx<_Tp, m, n>& b) { return a <= Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator <= (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) <= b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator == (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator == (const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr operator == (double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator == (const Mat& a, const Matx<_Tp, m, n>& b) { return a == Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator == (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) == b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator != (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator != (const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr operator != (double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator != (const Mat& a, const Matx<_Tp, m, n>& b) { return a != Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator != (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) != b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator >= (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator >= (const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr operator >= (double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator >= (const Mat& a, const Matx<_Tp, m, n>& b) { return a >= Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator >= (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) >= b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator > (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator > (const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr operator > (double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator > (const Mat& a, const Matx<_Tp, m, n>& b) { return a > Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator > (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) > b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator & (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator & (const Mat& a, const Scalar& s);
|
||||
CV_EXPORTS MatExpr operator & (const Scalar& s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator & (const Mat& a, const Matx<_Tp, m, n>& b) { return a & Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator & (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) & b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator | (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator | (const Mat& a, const Scalar& s);
|
||||
CV_EXPORTS MatExpr operator | (const Scalar& s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator | (const Mat& a, const Matx<_Tp, m, n>& b) { return a | Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator | (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) | b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator ^ (const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr operator ^ (const Mat& a, const Scalar& s);
|
||||
CV_EXPORTS MatExpr operator ^ (const Scalar& s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator ^ (const Mat& a, const Matx<_Tp, m, n>& b) { return a ^ Mat(b); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr operator ^ (const Matx<_Tp, m, n>& a, const Mat& b) { return Mat(a) ^ b; }
|
||||
|
||||
CV_EXPORTS MatExpr operator ~(const Mat& m);
|
||||
|
||||
CV_EXPORTS MatExpr min(const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr min(const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr min(double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr min (const Mat& a, const Matx<_Tp, m, n>& b) { return min(a, Mat(b)); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr min (const Matx<_Tp, m, n>& a, const Mat& b) { return min(Mat(a), b); }
|
||||
|
||||
CV_EXPORTS MatExpr max(const Mat& a, const Mat& b);
|
||||
CV_EXPORTS MatExpr max(const Mat& a, double s);
|
||||
CV_EXPORTS MatExpr max(double s, const Mat& a);
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr max (const Mat& a, const Matx<_Tp, m, n>& b) { return max(a, Mat(b)); }
|
||||
template<typename _Tp, int m, int n> static inline
|
||||
MatExpr max (const Matx<_Tp, m, n>& a, const Mat& b) { return max(Mat(a), b); }
|
||||
|
||||
/** @brief Calculates an absolute value of each matrix element.
|
||||
|
||||
|
@ -258,48 +258,67 @@ Matx<_Tp, n, l> Matx<_Tp, m, n>::solve(const Matx<_Tp, m, l>& rhs, int method) c
|
||||
template<typename _Tp> CV_MAT_AUG_OPERATOR1(op, cvop, A, B) \
|
||||
template<typename _Tp> CV_MAT_AUG_OPERATOR1(op, cvop, const A, B)
|
||||
|
||||
#define CV_MAT_AUG_OPERATOR_TN(op, cvop, A) \
|
||||
template<typename _Tp, int m, int n> static inline A& operator op (A& a, const Matx<_Tp,m,n>& b) { cvop; return a; } \
|
||||
template<typename _Tp, int m, int n> static inline const A& operator op (const A& a, const Matx<_Tp,m,n>& b) { cvop; return a; }
|
||||
|
||||
CV_MAT_AUG_OPERATOR (+=, cv::add(a,b,a), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR (+=, cv::add(a,b,a), Mat, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(+=, cv::add(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
|
||||
CV_MAT_AUG_OPERATOR_TN(+=, cv::add(a,Mat(b),a), Mat)
|
||||
CV_MAT_AUG_OPERATOR_TN(+=, cv::add(a,Mat(b),a), Mat_<_Tp>)
|
||||
|
||||
CV_MAT_AUG_OPERATOR (-=, cv::subtract(a,b,a), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR (-=, cv::subtract(a,b,a), Mat, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(-=, cv::subtract(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
|
||||
CV_MAT_AUG_OPERATOR_TN(-=, cv::subtract(a,Mat(b),a), Mat)
|
||||
CV_MAT_AUG_OPERATOR_TN(-=, cv::subtract(a,Mat(b),a), Mat_<_Tp>)
|
||||
|
||||
CV_MAT_AUG_OPERATOR (*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(*=, cv::gemm(a, b, 1, Mat(), 0, a, 0), Mat_<_Tp>, Mat_<_Tp>)
|
||||
CV_MAT_AUG_OPERATOR (*=, a.convertTo(a, -1, b), Mat, double)
|
||||
CV_MAT_AUG_OPERATOR_T(*=, a.convertTo(a, -1, b), Mat_<_Tp>, double)
|
||||
CV_MAT_AUG_OPERATOR_TN(*=, cv::gemm(a, Mat(b), 1, Mat(), 0, a, 0), Mat)
|
||||
CV_MAT_AUG_OPERATOR_TN(*=, cv::gemm(a, Mat(b), 1, Mat(), 0, a, 0), Mat_<_Tp>)
|
||||
|
||||
CV_MAT_AUG_OPERATOR (/=, cv::divide(a,b,a), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(/=, cv::divide(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
|
||||
CV_MAT_AUG_OPERATOR (/=, a.convertTo((Mat&)a, -1, 1./b), Mat, double)
|
||||
CV_MAT_AUG_OPERATOR_T(/=, a.convertTo((Mat&)a, -1, 1./b), Mat_<_Tp>, double)
|
||||
CV_MAT_AUG_OPERATOR_TN(/=, cv::divide(a, Mat(b), a), Mat)
|
||||
CV_MAT_AUG_OPERATOR_TN(/=, cv::divide(a, Mat(b), a), Mat_<_Tp>)
|
||||
|
||||
CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a,b,a), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR (&=, cv::bitwise_and(a,b,a), Mat, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(&=, cv::bitwise_and(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
|
||||
CV_MAT_AUG_OPERATOR_TN(&=, cv::bitwise_and(a, Mat(b), a), Mat)
|
||||
CV_MAT_AUG_OPERATOR_TN(&=, cv::bitwise_and(a, Mat(b), a), Mat_<_Tp>)
|
||||
|
||||
CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a,b,a), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR (|=, cv::bitwise_or(a,b,a), Mat, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(|=, cv::bitwise_or(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
|
||||
CV_MAT_AUG_OPERATOR_TN(|=, cv::bitwise_or(a, Mat(b), a), Mat)
|
||||
CV_MAT_AUG_OPERATOR_TN(|=, cv::bitwise_or(a, Mat(b), a), Mat_<_Tp>)
|
||||
|
||||
CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a,b,a), Mat, Mat)
|
||||
CV_MAT_AUG_OPERATOR (^=, cv::bitwise_xor(a,b,a), Mat, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat)
|
||||
CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Scalar)
|
||||
CV_MAT_AUG_OPERATOR_T(^=, cv::bitwise_xor(a,b,a), Mat_<_Tp>, Mat_<_Tp>)
|
||||
CV_MAT_AUG_OPERATOR_TN(^=, cv::bitwise_xor(a, Mat(b), a), Mat)
|
||||
CV_MAT_AUG_OPERATOR_TN(^=, cv::bitwise_xor(a, Mat(b), a), Mat_<_Tp>)
|
||||
|
||||
#undef CV_MAT_AUG_OPERATOR_TN
|
||||
#undef CV_MAT_AUG_OPERATOR_T
|
||||
#undef CV_MAT_AUG_OPERATOR
|
||||
#undef CV_MAT_AUG_OPERATOR1
|
||||
|
@ -69,6 +69,7 @@ protected:
|
||||
bool TestVec();
|
||||
bool TestMatxMultiplication();
|
||||
bool TestMatxElementwiseDivison();
|
||||
bool TestMatMatxCastSum();
|
||||
bool TestSubMatAccess();
|
||||
bool TestExp();
|
||||
bool TestSVD();
|
||||
@ -885,6 +886,74 @@ bool CV_OperationsTest::TestMatxMultiplication()
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CV_OperationsTest::TestMatMatxCastSum()
|
||||
{
|
||||
try
|
||||
{
|
||||
Mat ref1 = (Mat_<double>(3, 1) << 1, 2, 3);
|
||||
Mat ref2 = (Mat_<double>(3, 1) << 3, 4, 5);
|
||||
Mat ref3 = Mat::ones(3, 1, CV_64FC1);
|
||||
|
||||
Mat mat = Mat::zeros(3, 1, CV_64FC1);
|
||||
|
||||
Mat tst1 = ref1.clone();
|
||||
Mat_<double> tst2 = ref2.clone();
|
||||
Matx<double, 3, 1> tst3(1, 2, 3);
|
||||
Vec3d tst4(3, 4, 5);
|
||||
Scalar tst5(1, 2, 3);
|
||||
Mat res;
|
||||
|
||||
res = mat + tst1;
|
||||
CHECK_DIFF_FLT(res, ref1);
|
||||
res = mat + tst2;
|
||||
CHECK_DIFF_FLT(res, ref2);
|
||||
res = mat + tst3;
|
||||
CHECK_DIFF_FLT(res, ref1);
|
||||
res = mat + tst4;
|
||||
CHECK_DIFF_FLT(res, ref2);
|
||||
|
||||
res = mat + tst5;
|
||||
CHECK_DIFF_FLT(res, ref3);
|
||||
res = mat + 1;
|
||||
CHECK_DIFF_FLT(res, ref3);
|
||||
|
||||
cv::add(mat, tst1, res);
|
||||
CHECK_DIFF_FLT(res, ref1);
|
||||
cv::add(mat, tst2, res);
|
||||
CHECK_DIFF_FLT(res, ref2);
|
||||
cv::add(mat, tst3, res);
|
||||
CHECK_DIFF_FLT(res, ref1);
|
||||
cv::add(mat, tst4, res);
|
||||
CHECK_DIFF_FLT(res, ref2);
|
||||
|
||||
cv::add(mat, tst5, res);
|
||||
CHECK_DIFF_FLT(res, ref3);
|
||||
cv::add(mat, 1, res);
|
||||
CHECK_DIFF_FLT(res, ref3);
|
||||
|
||||
res = mat.clone(); res += tst1;
|
||||
CHECK_DIFF_FLT(res, ref1);
|
||||
res = mat.clone(); res += tst2;
|
||||
CHECK_DIFF_FLT(res, ref2);
|
||||
res = mat.clone(); res += tst3;
|
||||
CHECK_DIFF_FLT(res, ref1);
|
||||
res = mat.clone(); res += tst4;
|
||||
CHECK_DIFF_FLT(res, ref2);
|
||||
|
||||
res = mat.clone(); res += tst5;
|
||||
CHECK_DIFF_FLT(res, ref3);
|
||||
res = mat.clone(); res += 1;
|
||||
CHECK_DIFF_FLT(res, ref3);
|
||||
}
|
||||
catch (const test_excep& e)
|
||||
{
|
||||
ts->printf(cvtest::TS::LOG, "%s\n", e.s.c_str());
|
||||
ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CV_OperationsTest::TestMatxElementwiseDivison()
|
||||
{
|
||||
try
|
||||
@ -1135,6 +1204,9 @@ void CV_OperationsTest::run( int /* start_from */)
|
||||
if (!TestMatxElementwiseDivison())
|
||||
return;
|
||||
|
||||
if (!TestMatMatxCastSum())
|
||||
return;
|
||||
|
||||
if (!TestSubMatAccess())
|
||||
return;
|
||||
|
||||
|
@ -95,9 +95,9 @@ void drawKeypoints( InputArray image, const std::vector<KeyPoint>& keypoints, In
|
||||
|
||||
if( !(flags & DrawMatchesFlags::DRAW_OVER_OUTIMG) )
|
||||
{
|
||||
if( image.type() == CV_8UC3 )
|
||||
if (image.type() == CV_8UC3 || image.type() == CV_8UC4)
|
||||
{
|
||||
image.copyTo( outImage );
|
||||
image.copyTo(outImage);
|
||||
}
|
||||
else if( image.type() == CV_8UC1 )
|
||||
{
|
||||
@ -105,7 +105,7 @@ void drawKeypoints( InputArray image, const std::vector<KeyPoint>& keypoints, In
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Error( Error::StsBadArg, "Incorrect type of input image.\n" );
|
||||
CV_Error( Error::StsBadArg, "Incorrect type of input image: " + typeToString(image.type()) );
|
||||
}
|
||||
}
|
||||
|
||||
@ -122,6 +122,25 @@ void drawKeypoints( InputArray image, const std::vector<KeyPoint>& keypoints, In
|
||||
}
|
||||
}
|
||||
|
||||
static void _prepareImage(InputArray src, const Mat& dst)
|
||||
{
|
||||
CV_CheckType(src.type(), src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4, "Unsupported source image");
|
||||
CV_CheckType(dst.type(), dst.type() == CV_8UC3 || dst.type() == CV_8UC4, "Unsupported destination image");
|
||||
const int src_cn = src.channels();
|
||||
const int dst_cn = dst.channels();
|
||||
|
||||
if (src_cn == dst_cn)
|
||||
src.copyTo(dst);
|
||||
else if (src_cn == 1)
|
||||
cvtColor(src, dst, dst_cn == 3 ? COLOR_GRAY2BGR : COLOR_GRAY2BGRA);
|
||||
else if (src_cn == 3 && dst_cn == 4)
|
||||
cvtColor(src, dst, COLOR_BGR2BGRA);
|
||||
else if (src_cn == 4 && dst_cn == 3)
|
||||
cvtColor(src, dst, COLOR_BGRA2BGR);
|
||||
else
|
||||
CV_Error(Error::StsInternal, "");
|
||||
}
|
||||
|
||||
static void _prepareImgAndDrawKeypoints( InputArray img1, const std::vector<KeyPoint>& keypoints1,
|
||||
InputArray img2, const std::vector<KeyPoint>& keypoints2,
|
||||
InputOutputArray _outImg, Mat& outImg1, Mat& outImg2,
|
||||
@ -140,21 +159,16 @@ static void _prepareImgAndDrawKeypoints( InputArray img1, const std::vector<KeyP
|
||||
}
|
||||
else
|
||||
{
|
||||
_outImg.create( size, CV_MAKETYPE(img1.depth(), 3) );
|
||||
const int cn1 = img1.channels(), cn2 = img2.channels();
|
||||
const int out_cn = std::max(3, std::max(cn1, cn2));
|
||||
_outImg.create(size, CV_MAKETYPE(img1.depth(), out_cn));
|
||||
outImg = _outImg.getMat();
|
||||
outImg = Scalar::all(0);
|
||||
outImg1 = outImg( Rect(0, 0, img1size.width, img1size.height) );
|
||||
outImg2 = outImg( Rect(img1size.width, 0, img2size.width, img2size.height) );
|
||||
|
||||
if( img1.type() == CV_8U )
|
||||
cvtColor( img1, outImg1, COLOR_GRAY2BGR );
|
||||
else
|
||||
img1.copyTo( outImg1 );
|
||||
|
||||
if( img2.type() == CV_8U )
|
||||
cvtColor( img2, outImg2, COLOR_GRAY2BGR );
|
||||
else
|
||||
img2.copyTo( outImg2 );
|
||||
_prepareImage(img1, outImg1);
|
||||
_prepareImage(img2, outImg2);
|
||||
}
|
||||
|
||||
// draw keypoints
|
||||
|
78
modules/features2d/test/test_drawing.cpp
Normal file
78
modules/features2d/test/test_drawing.cpp
Normal file
@ -0,0 +1,78 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2018, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
#include "test_precomp.hpp"
|
||||
|
||||
namespace opencv_test { namespace {
|
||||
|
||||
static
|
||||
Mat getReference_DrawKeypoint(int cn)
|
||||
{
|
||||
static Mat ref = (Mat_<uint8_t>(11, 11) <<
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 15, 54, 15, 1, 1, 1, 1,
|
||||
1, 1, 1, 76, 217, 217, 221, 81, 1, 1, 1,
|
||||
1, 1, 100, 224, 111, 57, 115, 225, 101, 1, 1,
|
||||
1, 44, 215, 100, 1, 1, 1, 101, 214, 44, 1,
|
||||
1, 54, 212, 57, 1, 1, 1, 55, 212, 55, 1,
|
||||
1, 40, 215, 104, 1, 1, 1, 105, 215, 40, 1,
|
||||
1, 1, 102, 221, 111, 55, 115, 222, 103, 1, 1,
|
||||
1, 1, 1, 76, 218, 217, 220, 81, 1, 1, 1,
|
||||
1, 1, 1, 1, 15, 55, 15, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
|
||||
Mat res;
|
||||
cvtColor(ref, res, (cn == 4) ? COLOR_GRAY2BGRA : COLOR_GRAY2BGR);
|
||||
return res;
|
||||
}
|
||||
|
||||
typedef testing::TestWithParam<MatType> Features2D_drawKeypoints;
|
||||
TEST_P(Features2D_drawKeypoints, Accuracy)
|
||||
{
|
||||
const int cn = CV_MAT_CN(GetParam());
|
||||
Mat inpImg(11, 11, GetParam(), Scalar(1, 1, 1, 255)), outImg;
|
||||
|
||||
std::vector<KeyPoint> keypoints(1, KeyPoint(5, 5, 1));
|
||||
drawKeypoints(inpImg, keypoints, outImg, Scalar::all(255));
|
||||
ASSERT_EQ(outImg.channels(), (cn == 4) ? 4 : 3);
|
||||
|
||||
Mat ref_ = getReference_DrawKeypoint(cn);
|
||||
EXPECT_EQ(0, cv::norm(outImg, ref_, NORM_INF));
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Features2D_drawKeypoints, Values(CV_8UC1, CV_8UC3, CV_8UC4));
|
||||
|
||||
typedef testing::TestWithParam<tuple<MatType, MatType> > Features2D_drawMatches;
|
||||
TEST_P(Features2D_drawMatches, Accuracy)
|
||||
{
|
||||
Mat inpImg1(11, 11, get<0>(GetParam()), Scalar(1, 1, 1, 255));
|
||||
Mat inpImg2(11, 11, get<1>(GetParam()), Scalar(2, 2, 2, 255)), outImg2, outImg;
|
||||
|
||||
std::vector<KeyPoint> keypoints(1, KeyPoint(5, 5, 1));
|
||||
|
||||
// Get outImg2 using drawKeypoints assuming that it works correctly (see the test above).
|
||||
drawKeypoints(inpImg2, keypoints, outImg2, Scalar::all(255));
|
||||
ASSERT_EQ(outImg2.channels(), (inpImg2.channels() == 4) ? 4 : 3);
|
||||
|
||||
// Merge both references.
|
||||
const int cn = max(3, max(inpImg1.channels(), inpImg2.channels()));
|
||||
if (cn == 4 && outImg2.channels() == 3)
|
||||
cvtColor(outImg2, outImg2, COLOR_BGR2BGRA);
|
||||
Mat ref_ = getReference_DrawKeypoint(cn);
|
||||
Mat concattedRef;
|
||||
hconcat(ref_, outImg2, concattedRef);
|
||||
|
||||
std::vector<DMatch> matches;
|
||||
drawMatches(inpImg1, keypoints, inpImg2, keypoints, matches, outImg,
|
||||
Scalar::all(255), Scalar::all(255));
|
||||
ASSERT_EQ(outImg.channels(), cn);
|
||||
|
||||
EXPECT_EQ(0, cv::norm(outImg, concattedRef, NORM_INF));
|
||||
}
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Features2D_drawMatches, Combine(
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4),
|
||||
Values(CV_8UC1, CV_8UC3, CV_8UC4)
|
||||
));
|
||||
|
||||
}} // namespace
|
@ -66,6 +66,7 @@
|
||||
#include <functional>
|
||||
#include "opencv2/highgui.hpp"
|
||||
#include <GL/gl.h>
|
||||
#include "opencv2/core/opengl.hpp"
|
||||
#endif
|
||||
|
||||
static const char* trackbar_text =
|
||||
@ -1144,20 +1145,20 @@ static void icvUpdateWindowPos( CvWindow* window )
|
||||
{
|
||||
RECT rmw, rw = icvCalcWindowRect(window );
|
||||
MoveWindow(window->hwnd, rw.left, rw.top,
|
||||
rw.right - rw.left + 1, rw.bottom - rw.top + 1, FALSE);
|
||||
rw.right - rw.left, rw.bottom - rw.top, FALSE);
|
||||
GetClientRect(window->hwnd, &rw);
|
||||
GetWindowRect(window->frame, &rmw);
|
||||
// Resize the mainhWnd window in order to make the bitmap fit into the child window
|
||||
MoveWindow(window->frame, rmw.left, rmw.top,
|
||||
rmw.right - rmw.left + size.cx - rw.right + rw.left,
|
||||
rmw.bottom - rmw.top + size.cy - rw.bottom + rw.top, TRUE );
|
||||
size.cx + (rmw.right - rmw.left) - (rw.right - rw.left),
|
||||
size.cy + (rmw.bottom - rmw.top) - (rw.bottom - rw.top), TRUE );
|
||||
}
|
||||
}
|
||||
|
||||
rect = icvCalcWindowRect(window);
|
||||
MoveWindow(window->hwnd, rect.left, rect.top,
|
||||
rect.right - rect.left + 1,
|
||||
rect.bottom - rect.top + 1, TRUE );
|
||||
rect.right - rect.left,
|
||||
rect.bottom - rect.top, TRUE );
|
||||
}
|
||||
|
||||
CV_IMPL void
|
||||
@ -1263,18 +1264,18 @@ CV_IMPL void cvResizeWindow(const char* name, int width, int height )
|
||||
{
|
||||
rw = icvCalcWindowRect(window);
|
||||
MoveWindow(window->hwnd, rw.left, rw.top,
|
||||
rw.right - rw.left + 1, rw.bottom - rw.top + 1, FALSE);
|
||||
rw.right - rw.left, rw.bottom - rw.top, FALSE);
|
||||
GetClientRect(window->hwnd, &rw);
|
||||
GetWindowRect(window->frame, &rmw);
|
||||
// Resize the mainhWnd window in order to make the bitmap fit into the child window
|
||||
MoveWindow(window->frame, rmw.left, rmw.top,
|
||||
rmw.right - rmw.left + width - rw.right + rw.left,
|
||||
rmw.bottom - rmw.top + height - rw.bottom + rw.top, TRUE);
|
||||
width + (rmw.right - rmw.left) - (rw.right - rw.left),
|
||||
height + (rmw.bottom - rmw.top) - (rw.bottom - rw.top), TRUE);
|
||||
}
|
||||
|
||||
rect = icvCalcWindowRect(window);
|
||||
MoveWindow(window->hwnd, rect.left, rect.top,
|
||||
rect.right - rect.left + 1, rect.bottom - rect.top + 1, TRUE);
|
||||
rect.right - rect.left, rect.bottom - rect.top, TRUE);
|
||||
|
||||
__END__;
|
||||
}
|
||||
@ -1421,7 +1422,20 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam )
|
||||
GetClientRect( window->hwnd, &rect );
|
||||
|
||||
SIZE size = {0,0};
|
||||
icvGetBitmapData( window, &size, 0, 0 );
|
||||
#ifdef HAVE_OPENGL
|
||||
if (window->useGl)
|
||||
{
|
||||
cv::ogl::Texture2D* texObj = static_cast<cv::ogl::Texture2D*>(window->glDrawData);
|
||||
size.cx = texObj->cols();
|
||||
size.cy = texObj->rows();
|
||||
}
|
||||
else
|
||||
{
|
||||
icvGetBitmapData(window, &size, 0, 0);
|
||||
}
|
||||
#else
|
||||
icvGetBitmapData(window, &size, 0, 0);
|
||||
#endif
|
||||
|
||||
window->on_mouse( event, pt.x*size.cx/MAX(rect.right - rect.left,1),
|
||||
pt.y*size.cy/MAX(rect.bottom - rect.top,1), flags,
|
||||
@ -1561,8 +1575,8 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM
|
||||
RECT rect = icvCalcWindowRect(window);
|
||||
pos->x = rect.left;
|
||||
pos->y = rect.top;
|
||||
pos->cx = rect.right - rect.left + 1;
|
||||
pos->cy = rect.bottom - rect.top + 1;
|
||||
pos->cx = rect.right - rect.left;
|
||||
pos->cy = rect.bottom - rect.top;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -1615,7 +1629,21 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM
|
||||
SIZE size = {0, 0};
|
||||
|
||||
GetClientRect( window->hwnd, &rect );
|
||||
|
||||
#ifdef HAVE_OPENGL
|
||||
if (window->useGl)
|
||||
{
|
||||
cv::ogl::Texture2D* texObj = static_cast<cv::ogl::Texture2D*>(window->glDrawData);
|
||||
size.cx = texObj->cols();
|
||||
size.cy = texObj->rows();
|
||||
}
|
||||
else
|
||||
{
|
||||
icvGetBitmapData(window, &size, 0, 0);
|
||||
}
|
||||
#else
|
||||
icvGetBitmapData( window, &size, 0, 0 );
|
||||
#endif
|
||||
|
||||
window->on_mouse( event, pt.x*size.cx/MAX(rect.right - rect.left,1),
|
||||
pt.y*size.cy/MAX(rect.bottom - rect.top,1), flags,
|
||||
|
@ -587,6 +587,7 @@ struct RowVec_8u32s
|
||||
i += v_uint32::nlanes;
|
||||
}
|
||||
}
|
||||
vx_cleanup();
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -1083,6 +1084,7 @@ struct SymmRowSmallVec_8u32s
|
||||
}
|
||||
}
|
||||
|
||||
vx_cleanup();
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -1106,6 +1108,8 @@ struct SymmColumnVec_32s8u
|
||||
int operator()(const uchar** _src, uchar* dst, int width) const
|
||||
{
|
||||
int _ksize = kernel.rows + kernel.cols - 1;
|
||||
if( _ksize == 1 )
|
||||
return 0;
|
||||
int ksize2 = _ksize/2;
|
||||
const float* ky = kernel.ptr<float>() + ksize2;
|
||||
int i = 0, k;
|
||||
@ -1115,9 +1119,8 @@ struct SymmColumnVec_32s8u
|
||||
v_float32 d4 = vx_setall_f32(delta);
|
||||
if( symmetrical )
|
||||
{
|
||||
if (_ksize == 1)
|
||||
return 0;
|
||||
v_float32 f0 = vx_setall_f32(ky[0]);
|
||||
v_float32 f1 = vx_setall_f32(ky[1]);
|
||||
for( ; i <= width - v_uint8::nlanes; i += v_uint8::nlanes )
|
||||
{
|
||||
const int* S = src[0] + i;
|
||||
@ -1125,11 +1128,17 @@ struct SymmColumnVec_32s8u
|
||||
v_float32 s1 = v_muladd(v_cvt_f32(vx_load(S + v_int32::nlanes)), f0, d4);
|
||||
v_float32 s2 = v_muladd(v_cvt_f32(vx_load(S + 2*v_int32::nlanes)), f0, d4);
|
||||
v_float32 s3 = v_muladd(v_cvt_f32(vx_load(S + 3*v_int32::nlanes)), f0, d4);
|
||||
for( k = 1; k <= ksize2; k++ )
|
||||
const int* S0 = src[1] + i;
|
||||
const int* S1 = src[-1] + i;
|
||||
s0 = v_muladd(v_cvt_f32(vx_load(S0) + vx_load(S1)), f1, s0);
|
||||
s1 = v_muladd(v_cvt_f32(vx_load(S0 + v_int32::nlanes) + vx_load(S1 + v_int32::nlanes)), f1, s1);
|
||||
s2 = v_muladd(v_cvt_f32(vx_load(S0 + 2 * v_int32::nlanes) + vx_load(S1 + 2 * v_int32::nlanes)), f1, s2);
|
||||
s3 = v_muladd(v_cvt_f32(vx_load(S0 + 3 * v_int32::nlanes) + vx_load(S1 + 3 * v_int32::nlanes)), f1, s3);
|
||||
for( k = 2; k <= ksize2; k++ )
|
||||
{
|
||||
v_float32 f = vx_setall_f32(ky[k]);
|
||||
const int* S0 = src[k] + i;
|
||||
const int* S1 = src[-k] + i;
|
||||
S0 = src[k] + i;
|
||||
S1 = src[-k] + i;
|
||||
s0 = v_muladd(v_cvt_f32(vx_load(S0) + vx_load(S1)), f, s0);
|
||||
s1 = v_muladd(v_cvt_f32(vx_load(S0 + v_int32::nlanes) + vx_load(S1 + v_int32::nlanes)), f, s1);
|
||||
s2 = v_muladd(v_cvt_f32(vx_load(S0 + 2*v_int32::nlanes) + vx_load(S1 + 2*v_int32::nlanes)), f, s2);
|
||||
@ -1142,11 +1151,15 @@ struct SymmColumnVec_32s8u
|
||||
const int* S = src[0] + i;
|
||||
v_float32 s0 = v_muladd(v_cvt_f32(vx_load(S)), f0, d4);
|
||||
v_float32 s1 = v_muladd(v_cvt_f32(vx_load(S + v_int32::nlanes)), f0, d4);
|
||||
for( k = 1; k <= ksize2; k++ )
|
||||
const int* S0 = src[1] + i;
|
||||
const int* S1 = src[-1] + i;
|
||||
s0 = v_muladd(v_cvt_f32(vx_load(S0) + vx_load(S1)), f1, s0);
|
||||
s1 = v_muladd(v_cvt_f32(vx_load(S0 + v_int32::nlanes) + vx_load(S1 + v_int32::nlanes)), f1, s1);
|
||||
for( k = 2; k <= ksize2; k++ )
|
||||
{
|
||||
v_float32 f = vx_setall_f32(ky[k]);
|
||||
const int* S0 = src[k] + i;
|
||||
const int* S1 = src[-k] + i;
|
||||
S0 = src[k] + i;
|
||||
S1 = src[-k] + i;
|
||||
s0 = v_muladd(v_cvt_f32(vx_load(S0) + vx_load(S1)), f, s0);
|
||||
s1 = v_muladd(v_cvt_f32(vx_load(S0 + v_int32::nlanes) + vx_load(S1 + v_int32::nlanes)), f, s1);
|
||||
}
|
||||
@ -1160,7 +1173,8 @@ struct SymmColumnVec_32s8u
|
||||
#endif
|
||||
{
|
||||
v_float32x4 s0 = v_muladd(v_cvt_f32(v_load(src[0] + i)), v_setall_f32(ky[0]), v_setall_f32(delta));
|
||||
for( k = 1; k <= ksize2; k++ )
|
||||
s0 = v_muladd(v_cvt_f32(v_load(src[1] + i) + v_load(src[-1] + i)), v_setall_f32(ky[1]), s0);
|
||||
for( k = 2; k <= ksize2; k++ )
|
||||
s0 = v_muladd(v_cvt_f32(v_load(src[k] + i) + v_load(src[-k] + i)), v_setall_f32(ky[k]), s0);
|
||||
v_int32x4 s32 = v_round(s0);
|
||||
v_int16x8 s16 = v_pack(s32, s32);
|
||||
@ -1170,17 +1184,20 @@ struct SymmColumnVec_32s8u
|
||||
}
|
||||
else
|
||||
{
|
||||
v_float32 f1 = vx_setall_f32(ky[1]);
|
||||
for( ; i <= width - v_uint8::nlanes; i += v_uint8::nlanes )
|
||||
{
|
||||
v_float32 s0 = d4;
|
||||
v_float32 s1 = d4;
|
||||
v_float32 s2 = d4;
|
||||
v_float32 s3 = d4;
|
||||
for ( k = 1; k <= ksize2; k++ )
|
||||
const int* S0 = src[1] + i;
|
||||
const int* S1 = src[-1] + i;
|
||||
v_float32 s0 = v_muladd(v_cvt_f32(vx_load(S0) - vx_load(S1)), f1, d4);
|
||||
v_float32 s1 = v_muladd(v_cvt_f32(vx_load(S0 + v_int32::nlanes) - vx_load(S1 + v_int32::nlanes)), f1, d4);
|
||||
v_float32 s2 = v_muladd(v_cvt_f32(vx_load(S0 + 2 * v_int32::nlanes) - vx_load(S1 + 2 * v_int32::nlanes)), f1, d4);
|
||||
v_float32 s3 = v_muladd(v_cvt_f32(vx_load(S0 + 3 * v_int32::nlanes) - vx_load(S1 + 3 * v_int32::nlanes)), f1, d4);
|
||||
for ( k = 2; k <= ksize2; k++ )
|
||||
{
|
||||
v_float32 f = vx_setall_f32(ky[k]);
|
||||
const int* S0 = src[k] + i;
|
||||
const int* S1 = src[-k] + i;
|
||||
S0 = src[k] + i;
|
||||
S1 = src[-k] + i;
|
||||
s0 = v_muladd(v_cvt_f32(vx_load(S0) - vx_load(S1)), f, s0);
|
||||
s1 = v_muladd(v_cvt_f32(vx_load(S0 + v_int32::nlanes) - vx_load(S1 + v_int32::nlanes)), f, s1);
|
||||
s2 = v_muladd(v_cvt_f32(vx_load(S0 + 2*v_int32::nlanes) - vx_load(S1 + 2*v_int32::nlanes)), f, s2);
|
||||
@ -1190,13 +1207,15 @@ struct SymmColumnVec_32s8u
|
||||
}
|
||||
if( i <= width - v_uint16::nlanes )
|
||||
{
|
||||
v_float32 s0 = d4;
|
||||
v_float32 s1 = d4;
|
||||
for ( k = 1; k <= ksize2; k++ )
|
||||
const int* S0 = src[1] + i;
|
||||
const int* S1 = src[-1] + i;
|
||||
v_float32 s0 = v_muladd(v_cvt_f32(vx_load(S0) - vx_load(S1)), f1, d4);
|
||||
v_float32 s1 = v_muladd(v_cvt_f32(vx_load(S0 + v_int32::nlanes) - vx_load(S1 + v_int32::nlanes)), f1, d4);
|
||||
for ( k = 2; k <= ksize2; k++ )
|
||||
{
|
||||
v_float32 f = vx_setall_f32(ky[k]);
|
||||
const int* S0 = src[k] + i;
|
||||
const int* S1 = src[-k] + i;
|
||||
S0 = src[k] + i;
|
||||
S1 = src[-k] + i;
|
||||
s0 = v_muladd(v_cvt_f32(vx_load(S0) - vx_load(S1)), f, s0);
|
||||
s1 = v_muladd(v_cvt_f32(vx_load(S0 + v_int32::nlanes) - vx_load(S1 + v_int32::nlanes)), f, s1);
|
||||
}
|
||||
@ -1209,8 +1228,8 @@ struct SymmColumnVec_32s8u
|
||||
if( i <= width - v_int32x4::nlanes )
|
||||
#endif
|
||||
{
|
||||
v_float32x4 s0 = v_setall_f32(delta);
|
||||
for (k = 1; k <= ksize2; k++)
|
||||
v_float32x4 s0 = v_muladd(v_cvt_f32(v_load(src[1] + i) - v_load(src[-1] + i)), v_setall_f32(ky[1]), v_setall_f32(delta));
|
||||
for (k = 2; k <= ksize2; k++)
|
||||
s0 = v_muladd(v_cvt_f32(v_load(src[k] + i) - v_load(src[-k] + i)), v_setall_f32(ky[k]), s0);
|
||||
v_int32x4 s32 = v_round(s0);
|
||||
v_int16x8 s16 = v_pack(s32, s32);
|
||||
@ -1219,6 +1238,7 @@ struct SymmColumnVec_32s8u
|
||||
}
|
||||
}
|
||||
|
||||
vx_cleanup();
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -1250,57 +1270,104 @@ struct SymmColumnSmallVec_32s16s
|
||||
short* dst = (short*)_dst;
|
||||
|
||||
v_float32 df4 = vx_setall_f32(delta);
|
||||
v_int32 d4 = v_round(df4);
|
||||
int d = cvRound(delta);
|
||||
v_int16 d8 = vx_setall_s16((short)d);
|
||||
if( symmetrical )
|
||||
{
|
||||
if( ky[0] == 2 && ky[1] == 1 )
|
||||
{
|
||||
for( ; i <= width - v_int16::nlanes; i += v_int16::nlanes )
|
||||
for( ; i <= width - 2*v_int16::nlanes; i += 2*v_int16::nlanes )
|
||||
{
|
||||
v_int32 s0 = vx_load(S1 + i);
|
||||
v_int32 s1 = vx_load(S1 + i + v_int32::nlanes);
|
||||
v_int32 s2 = vx_load(S1 + i + 2*v_int32::nlanes);
|
||||
v_int32 s3 = vx_load(S1 + i + 3*v_int32::nlanes);
|
||||
v_store(dst + i, v_pack(vx_load(S0 + i) + vx_load(S2 + i) + (s0 + s0), vx_load(S0 + i + v_int32::nlanes) + vx_load(S2 + i + v_int32::nlanes) + (s1 + s1)) + d8);
|
||||
v_store(dst + i + v_int16::nlanes, v_pack(vx_load(S0 + i + 2*v_int32::nlanes) + vx_load(S2 + i + 2*v_int32::nlanes) + (s2 + s2),
|
||||
vx_load(S0 + i + 3*v_int32::nlanes) + vx_load(S2 + i + 3*v_int32::nlanes) + (s3 + s3)) + d8);
|
||||
}
|
||||
if( i <= width - v_int16::nlanes )
|
||||
{
|
||||
v_int32 sl = vx_load(S1 + i);
|
||||
v_int32 sh = vx_load(S1 + i + v_int32::nlanes);
|
||||
v_store(dst + i, v_pack(vx_load(S0 + i) + vx_load(S2 + i) + d4 + (sl + sl), vx_load(S0 + i + v_int32::nlanes) + vx_load(S2 + i + v_int32::nlanes) + d4 + (sh + sh)));
|
||||
v_store(dst + i, v_pack(vx_load(S0 + i) + vx_load(S2 + i) + (sl + sl), vx_load(S0 + i + v_int32::nlanes) + vx_load(S2 + i + v_int32::nlanes) + (sh + sh)) + d8);
|
||||
i += v_int16::nlanes;
|
||||
}
|
||||
if( i <= width - v_int32::nlanes )
|
||||
{
|
||||
v_int32 s = vx_load(S1 + i);
|
||||
v_pack_store(dst + i, vx_load(S0 + i) + vx_load(S2 + i) + d4 + (s + s));
|
||||
v_pack_store(dst + i, vx_load(S0 + i) + vx_load(S2 + i) + vx_setall_s32(d) + (s + s));
|
||||
i += v_int32::nlanes;
|
||||
}
|
||||
}
|
||||
else if( ky[0] == -2 && ky[1] == 1 )
|
||||
{
|
||||
for( ; i <= width - v_int16::nlanes; i += v_int16::nlanes )
|
||||
for( ; i <= width - 2*v_int16::nlanes; i += 2*v_int16::nlanes )
|
||||
{
|
||||
v_int32 s0 = vx_load(S1 + i);
|
||||
v_int32 s1 = vx_load(S1 + i + v_int32::nlanes);
|
||||
v_int32 s2 = vx_load(S1 + i + 2*v_int32::nlanes);
|
||||
v_int32 s3 = vx_load(S1 + i + 3*v_int32::nlanes);
|
||||
v_store(dst + i, v_pack(vx_load(S0 + i) + vx_load(S2 + i) - (s0 + s0),
|
||||
vx_load(S0 + i + v_int32::nlanes) + vx_load(S2 + i + v_int32::nlanes) - (s1 + s1)) + d8);
|
||||
v_store(dst + i + v_int16::nlanes, v_pack(vx_load(S0 + i + 2*v_int32::nlanes) + vx_load(S2 + i + 2*v_int32::nlanes) - (s2 + s2),
|
||||
vx_load(S0 + i + 3*v_int32::nlanes) + vx_load(S2 + i + 3*v_int32::nlanes) - (s3 + s3)) + d8);
|
||||
}
|
||||
if( i <= width - v_int16::nlanes )
|
||||
{
|
||||
v_int32 sl = vx_load(S1 + i);
|
||||
v_int32 sh = vx_load(S1 + i + v_int32::nlanes);
|
||||
v_store(dst + i, v_pack(vx_load(S0 + i) + vx_load(S2 + i) + d4 - (sl + sl), vx_load(S0 + i + v_int32::nlanes) + vx_load(S2 + i + v_int32::nlanes) + d4 - (sh + sh)));
|
||||
v_store(dst + i, v_pack(vx_load(S0 + i) + vx_load(S2 + i) - (sl + sl), vx_load(S0 + i + v_int32::nlanes) + vx_load(S2 + i + v_int32::nlanes) - (sh + sh)) + d8);
|
||||
i += v_int16::nlanes;
|
||||
}
|
||||
if( i <= width - v_int32::nlanes )
|
||||
{
|
||||
v_int32 s = vx_load(S1 + i);
|
||||
v_pack_store(dst + i, vx_load(S0 + i) + vx_load(S2 + i) + d4 - (s + s));
|
||||
v_pack_store(dst + i, vx_load(S0 + i) + vx_load(S2 + i) + vx_setall_s32(d) - (s + s));
|
||||
i += v_int32::nlanes;
|
||||
}
|
||||
}
|
||||
#if CV_NEON
|
||||
else if( ky[0] == (float)((int)ky[0]) && ky[1] == (float)((int)ky[1]) )
|
||||
{
|
||||
v_int32 k0 = vx_setall_s32((int)ky[0]), k1 = vx_setall_s32((int)ky[1]);
|
||||
for( ; i <= width - v_int16::nlanes; i += v_int16::nlanes )
|
||||
v_int32 d4 = vx_setall_s32(d);
|
||||
for( ; i <= width - 2*v_int16::nlanes; i += 2*v_int16::nlanes )
|
||||
{
|
||||
v_store(dst + i, v_pack(v_muladd(vx_load(S0 + i) + vx_load(S2 + i), k1, v_muladd(vx_load(S1 + i), k0, d4)),
|
||||
v_muladd(vx_load(S0 + i + v_int32::nlanes) + vx_load(S2 + i + v_int32::nlanes), k1, v_muladd(vx_load(S1 + i + v_int32::nlanes), k0, d4))));
|
||||
v_store(dst + i + v_int16::nlanes, v_pack(v_muladd(vx_load(S0 + i + 2*v_int32::nlanes) + vx_load(S2 + i + 2*v_int32::nlanes), k1, v_muladd(vx_load(S1 + i + 2*v_int32::nlanes), k0, d4)),
|
||||
v_muladd(vx_load(S0 + i + 3*v_int32::nlanes) + vx_load(S2 + i + 3*v_int32::nlanes), k1, v_muladd(vx_load(S1 + i + 3*v_int32::nlanes), k0, d4))));
|
||||
}
|
||||
if( i <= width - v_int16::nlanes )
|
||||
{
|
||||
v_store(dst + i, v_pack(v_muladd(vx_load(S0 + i) + vx_load(S2 + i), k1, v_muladd(vx_load(S1 + i), k0, d4)),
|
||||
v_muladd(vx_load(S0 + i + v_int32::nlanes) + vx_load(S2 + i + v_int32::nlanes), k1, v_muladd(vx_load(S1 + i + v_int32::nlanes), k0, d4))));
|
||||
i += v_int16::nlanes;
|
||||
}
|
||||
if( i <= width - v_int32::nlanes )
|
||||
{
|
||||
v_pack_store(dst + i, v_muladd(vx_load(S0 + i) + vx_load(S2 + i), k1, v_muladd(vx_load(S1 + i), k0, d4)));
|
||||
i += v_int32::nlanes;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
else
|
||||
{
|
||||
v_float32 k0 = vx_setall_f32(ky[0]), k1 = vx_setall_f32(ky[1]);
|
||||
for( ; i <= width - v_int16::nlanes; i += v_int16::nlanes )
|
||||
for( ; i <= width - 2*v_int16::nlanes; i += 2*v_int16::nlanes )
|
||||
{
|
||||
v_store(dst + i, v_pack(v_round(v_muladd(v_cvt_f32(vx_load(S0 + i) + vx_load(S2 + i)), k1, v_muladd(v_cvt_f32(vx_load(S1 + i)), k0, df4))),
|
||||
v_round(v_muladd(v_cvt_f32(vx_load(S0 + i + v_int32::nlanes) + vx_load(S2 + i + v_int32::nlanes)), k1, v_muladd(v_cvt_f32(vx_load(S1 + i + v_int32::nlanes)), k0, df4)))));
|
||||
v_store(dst + i + v_int16::nlanes, v_pack(v_round(v_muladd(v_cvt_f32(vx_load(S0 + i + 2*v_int32::nlanes) + vx_load(S2 + i + 2*v_int32::nlanes)), k1, v_muladd(v_cvt_f32(vx_load(S1 + i + 2*v_int32::nlanes)), k0, df4))),
|
||||
v_round(v_muladd(v_cvt_f32(vx_load(S0 + i + 3*v_int32::nlanes) + vx_load(S2 + i + 3*v_int32::nlanes)), k1, v_muladd(v_cvt_f32(vx_load(S1 + i + 3*v_int32::nlanes)), k0, df4)))));
|
||||
}
|
||||
if( i <= width - v_int16::nlanes )
|
||||
{
|
||||
v_store(dst + i, v_pack(v_round(v_muladd(v_cvt_f32(vx_load(S0 + i) + vx_load(S2 + i)), k1, v_muladd(v_cvt_f32(vx_load(S1 + i)), k0, df4))),
|
||||
v_round(v_muladd(v_cvt_f32(vx_load(S0 + i + v_int32::nlanes) + vx_load(S2 + i + v_int32::nlanes)), k1, v_muladd(v_cvt_f32(vx_load(S1 + i + v_int32::nlanes)), k0, df4)))));
|
||||
i += v_int16::nlanes;
|
||||
}
|
||||
if( i <= width - v_int32::nlanes )
|
||||
{
|
||||
v_pack_store(dst + i, v_round(v_muladd(v_cvt_f32(vx_load(S0 + i) + vx_load(S2 + i)), k1, v_muladd(v_cvt_f32(vx_load(S1 + i)), k0, df4))));
|
||||
@ -1314,20 +1381,38 @@ struct SymmColumnSmallVec_32s16s
|
||||
{
|
||||
if( ky[1] < 0 )
|
||||
std::swap(S0, S2);
|
||||
for( ; i <= width - v_int16::nlanes; i += v_int16::nlanes )
|
||||
v_store(dst + i, v_pack(vx_load(S2 + i) - vx_load(S0 + i) + d4, vx_load(S2 + i + v_int32::nlanes) - vx_load(S0 + i + v_int32::nlanes) + d4));
|
||||
for( ; i <= width - 2*v_int16::nlanes; i += 2*v_int16::nlanes )
|
||||
{
|
||||
v_store(dst + i, v_pack(vx_load(S2 + i) - vx_load(S0 + i), vx_load(S2 + i + v_int32::nlanes) - vx_load(S0 + i + v_int32::nlanes)) + d8);
|
||||
v_store(dst + i + v_int16::nlanes, v_pack(vx_load(S2 + i + 2*v_int32::nlanes) - vx_load(S0 + i + 2*v_int32::nlanes), vx_load(S2 + i + 3*v_int32::nlanes) - vx_load(S0 + i + 3*v_int32::nlanes)) + d8);
|
||||
}
|
||||
if( i <= width - v_int16::nlanes )
|
||||
{
|
||||
v_store(dst + i, v_pack(vx_load(S2 + i) - vx_load(S0 + i), vx_load(S2 + i + v_int32::nlanes) - vx_load(S0 + i + v_int32::nlanes)) + d8);
|
||||
i += v_int16::nlanes;
|
||||
}
|
||||
if( i <= width - v_int32::nlanes )
|
||||
{
|
||||
v_pack_store(dst + i, vx_load(S2 + i) - vx_load(S0 + i) + d4);
|
||||
v_pack_store(dst + i, vx_load(S2 + i) - vx_load(S0 + i) + vx_setall_s32(d));
|
||||
i += v_int32::nlanes;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
v_float32 k1 = vx_setall_f32(ky[1]);
|
||||
for( ; i <= width - v_int16::nlanes; i += v_int16::nlanes )
|
||||
for( ; i <= width - 2*v_int16::nlanes; i += 2*v_int16::nlanes )
|
||||
{
|
||||
v_store(dst + i, v_pack(v_round(v_muladd(v_cvt_f32(vx_load(S2 + i) - vx_load(S0 + i)), k1, df4)),
|
||||
v_round(v_muladd(v_cvt_f32(vx_load(S2 + i + v_int32::nlanes) - vx_load(S0 + i + v_int32::nlanes)), k1, df4))));
|
||||
v_store(dst + i + v_int16::nlanes, v_pack(v_round(v_muladd(v_cvt_f32(vx_load(S2 + i + 2*v_int32::nlanes) - vx_load(S0 + i + 2*v_int32::nlanes)), k1, df4)),
|
||||
v_round(v_muladd(v_cvt_f32(vx_load(S2 + i + 3*v_int32::nlanes) - vx_load(S0 + i + 3*v_int32::nlanes)), k1, df4))));
|
||||
}
|
||||
if( i <= width - v_int16::nlanes )
|
||||
{
|
||||
v_store(dst + i, v_pack(v_round(v_muladd(v_cvt_f32(vx_load(S2 + i) - vx_load(S0 + i)), k1, df4)),
|
||||
v_round(v_muladd(v_cvt_f32(vx_load(S2 + i + v_int32::nlanes) - vx_load(S0 + i + v_int32::nlanes)), k1, df4))));
|
||||
i += v_int16::nlanes;
|
||||
}
|
||||
if( i <= width - v_int32::nlanes )
|
||||
{
|
||||
v_pack_store(dst + i, v_round(v_muladd(v_cvt_f32(vx_load(S2 + i) - vx_load(S0 + i)), k1, df4)));
|
||||
@ -1336,6 +1421,7 @@ struct SymmColumnSmallVec_32s16s
|
||||
}
|
||||
}
|
||||
|
||||
vx_cleanup();
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -1362,19 +1448,43 @@ struct RowVec_16s32f
|
||||
const float* _kx = kernel.ptr<float>();
|
||||
width *= cn;
|
||||
|
||||
for( ; i <= width - v_int16::nlanes; i += v_int16::nlanes )
|
||||
for( ; i <= width - 2*v_int16::nlanes; i += 2*v_int16::nlanes )
|
||||
{
|
||||
const short* src = (const short*)_src + i;
|
||||
v_float32 s0 = vx_setzero_f32();
|
||||
v_float32 s1 = vx_setzero_f32();
|
||||
v_float32 s2 = vx_setzero_f32();
|
||||
v_float32 s3 = vx_setzero_f32();
|
||||
for( k = 0; k < _ksize; k++, src += cn )
|
||||
{
|
||||
v_float32 f = vx_setall_f32(_kx[k]);
|
||||
v_int16 xl = vx_load(src);
|
||||
v_int16 xh = vx_load(src + v_int16::nlanes);
|
||||
s0 = v_muladd(v_cvt_f32(v_expand_low(xl)), f, s0);
|
||||
s1 = v_muladd(v_cvt_f32(v_expand_high(xl)), f, s1);
|
||||
s2 = v_muladd(v_cvt_f32(v_expand_low(xh)), f, s2);
|
||||
s3 = v_muladd(v_cvt_f32(v_expand_high(xh)), f, s3);
|
||||
}
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + v_float32::nlanes, s1);
|
||||
v_store(dst + i + 2*v_float32::nlanes, s2);
|
||||
v_store(dst + i + 3*v_float32::nlanes, s3);
|
||||
}
|
||||
if( i <= width - v_int16::nlanes )
|
||||
{
|
||||
const short* src = (const short*)_src + i;
|
||||
v_float32 s0 = vx_setzero_f32();
|
||||
v_float32 s1 = vx_setzero_f32();
|
||||
for( k = 0; k < _ksize; k++, src += cn )
|
||||
{
|
||||
v_float32 f = vx_setall_f32(_kx[k]);
|
||||
v_int16 x = vx_load(src);
|
||||
s0 = v_muladd(v_cvt_f32(v_expand_low(x)), vx_setall_f32(_kx[k]), s0);
|
||||
s1 = v_muladd(v_cvt_f32(v_expand_high(x)), vx_setall_f32(_kx[k]), s1);
|
||||
s0 = v_muladd(v_cvt_f32(v_expand_low(x)), f, s0);
|
||||
s1 = v_muladd(v_cvt_f32(v_expand_high(x)), f, s1);
|
||||
}
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + v_float32::nlanes, s1);
|
||||
i += v_int16::nlanes;
|
||||
}
|
||||
if( i <= width - v_float32::nlanes )
|
||||
{
|
||||
@ -1385,6 +1495,7 @@ struct RowVec_16s32f
|
||||
v_store(dst + i, s0);
|
||||
i += v_float32::nlanes;
|
||||
}
|
||||
vx_cleanup();
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -1406,6 +1517,8 @@ struct SymmColumnVec_32f16s
|
||||
int operator()(const uchar** _src, uchar* _dst, int width) const
|
||||
{
|
||||
int _ksize = kernel.rows + kernel.cols - 1;
|
||||
if( _ksize == 1 )
|
||||
return 0;
|
||||
int ksize2 = _ksize / 2;
|
||||
const float* ky = kernel.ptr<float>() + ksize2;
|
||||
int i = 0, k;
|
||||
@ -1416,25 +1529,49 @@ struct SymmColumnVec_32f16s
|
||||
v_float32 d4 = vx_setall_f32(delta);
|
||||
if( symmetrical )
|
||||
{
|
||||
if (_ksize == 1)
|
||||
return 0;
|
||||
v_float32 k0 = vx_setall_f32(ky[0]);
|
||||
for( ; i <= width - v_int16::nlanes; i += v_int16::nlanes )
|
||||
v_float32 k1 = vx_setall_f32(ky[1]);
|
||||
for( ; i <= width - 2*v_int16::nlanes; i += 2*v_int16::nlanes )
|
||||
{
|
||||
v_float32 s0 = v_muladd(vx_load(src[0] + i), k0, d4);
|
||||
v_float32 s1 = v_muladd(vx_load(src[0] + i + v_float32::nlanes), k0, d4);
|
||||
for( k = 1; k <= ksize2; k++ )
|
||||
v_float32 s2 = v_muladd(vx_load(src[0] + i + 2*v_float32::nlanes), k0, d4);
|
||||
v_float32 s3 = v_muladd(vx_load(src[0] + i + 3*v_float32::nlanes), k0, d4);
|
||||
s0 = v_muladd(vx_load(src[1] + i) + vx_load(src[-1] + i), k1, s0);
|
||||
s1 = v_muladd(vx_load(src[1] + i + v_float32::nlanes) + vx_load(src[-1] + i + v_float32::nlanes), k1, s1);
|
||||
s2 = v_muladd(vx_load(src[1] + i + 2*v_float32::nlanes) + vx_load(src[-1] + i + 2*v_float32::nlanes), k1, s2);
|
||||
s3 = v_muladd(vx_load(src[1] + i + 3*v_float32::nlanes) + vx_load(src[-1] + i + 3*v_float32::nlanes), k1, s3);
|
||||
for( k = 2; k <= ksize2; k++ )
|
||||
{
|
||||
v_float32 k1 = vx_setall_f32(ky[k]);
|
||||
s0 = v_muladd(vx_load(src[k] + i) + vx_load(src[-k] + i), k1, s0);
|
||||
s1 = v_muladd(vx_load(src[k] + i + v_float32::nlanes) + vx_load(src[-k] + i + v_float32::nlanes), k1, s1);
|
||||
v_float32 k2 = vx_setall_f32(ky[k]);
|
||||
s0 = v_muladd(vx_load(src[k] + i) + vx_load(src[-k] + i), k2, s0);
|
||||
s1 = v_muladd(vx_load(src[k] + i + v_float32::nlanes) + vx_load(src[-k] + i + v_float32::nlanes), k2, s1);
|
||||
s2 = v_muladd(vx_load(src[k] + i + 2*v_float32::nlanes) + vx_load(src[-k] + i + 2*v_float32::nlanes), k2, s2);
|
||||
s3 = v_muladd(vx_load(src[k] + i + 3*v_float32::nlanes) + vx_load(src[-k] + i + 3*v_float32::nlanes), k2, s3);
|
||||
}
|
||||
v_store(dst + i, v_pack(v_round(s0), v_round(s1)));
|
||||
v_store(dst + i + v_int16::nlanes, v_pack(v_round(s2), v_round(s3)));
|
||||
}
|
||||
if( i <= width - v_int16::nlanes )
|
||||
{
|
||||
v_float32 s0 = v_muladd(vx_load(src[0] + i), k0, d4);
|
||||
v_float32 s1 = v_muladd(vx_load(src[0] + i + v_float32::nlanes), k0, d4);
|
||||
s0 = v_muladd(vx_load(src[1] + i) + vx_load(src[-1] + i), k1, s0);
|
||||
s1 = v_muladd(vx_load(src[1] + i + v_float32::nlanes) + vx_load(src[-1] + i + v_float32::nlanes), k1, s1);
|
||||
for( k = 2; k <= ksize2; k++ )
|
||||
{
|
||||
v_float32 k2 = vx_setall_f32(ky[k]);
|
||||
s0 = v_muladd(vx_load(src[k] + i) + vx_load(src[-k] + i), k2, s0);
|
||||
s1 = v_muladd(vx_load(src[k] + i + v_float32::nlanes) + vx_load(src[-k] + i + v_float32::nlanes), k2, s1);
|
||||
}
|
||||
v_store(dst + i, v_pack(v_round(s0), v_round(s1)));
|
||||
i += v_int16::nlanes;
|
||||
}
|
||||
if( i <= width - v_float32::nlanes )
|
||||
{
|
||||
v_float32 s0 = v_muladd(vx_load(src[0] + i), k0, d4);
|
||||
for( k = 1; k <= ksize2; k++ )
|
||||
s0 = v_muladd(vx_load(src[1] + i) + vx_load(src[-1] + i), k1, s0);
|
||||
for( k = 2; k <= ksize2; k++ )
|
||||
s0 = v_muladd(vx_load(src[k] + i) + vx_load(src[-k] + i), vx_setall_f32(ky[k]), s0);
|
||||
v_pack_store(dst + i, v_round(s0));
|
||||
i += v_float32::nlanes;
|
||||
@ -1442,28 +1579,48 @@ struct SymmColumnVec_32f16s
|
||||
}
|
||||
else
|
||||
{
|
||||
for( ; i <= width - v_int16::nlanes; i += v_int16::nlanes )
|
||||
v_float32 k1 = vx_setall_f32(ky[1]);
|
||||
for( ; i <= width - 2*v_int16::nlanes; i += 2*v_int16::nlanes )
|
||||
{
|
||||
v_float32 s0 = d4;
|
||||
v_float32 s1 = d4;
|
||||
for( k = 1; k <= ksize2; k++ )
|
||||
v_float32 s0 = v_muladd(vx_load(src[1] + i) - vx_load(src[-1] + i), k1, d4);
|
||||
v_float32 s1 = v_muladd(vx_load(src[1] + i + v_float32::nlanes) - vx_load(src[-1] + i + v_float32::nlanes), k1, d4);
|
||||
v_float32 s2 = v_muladd(vx_load(src[1] + i + 2*v_float32::nlanes) - vx_load(src[-1] + i + 2*v_float32::nlanes), k1, d4);
|
||||
v_float32 s3 = v_muladd(vx_load(src[1] + i + 3*v_float32::nlanes) - vx_load(src[-1] + i + 3*v_float32::nlanes), k1, d4);
|
||||
for( k = 2; k <= ksize2; k++ )
|
||||
{
|
||||
v_float32 k1 = vx_setall_f32(ky[k]);
|
||||
s0 = v_muladd(vx_load(src[k] + i) - vx_load(src[-k] + i), k1, s0);
|
||||
s1 = v_muladd(vx_load(src[k] + i + v_float32::nlanes) - vx_load(src[-k] + i + v_float32::nlanes), k1, s1);
|
||||
v_float32 k2 = vx_setall_f32(ky[k]);
|
||||
s0 = v_muladd(vx_load(src[k] + i) - vx_load(src[-k] + i), k2, s0);
|
||||
s1 = v_muladd(vx_load(src[k] + i + v_float32::nlanes) - vx_load(src[-k] + i + v_float32::nlanes), k2, s1);
|
||||
s2 = v_muladd(vx_load(src[k] + i + 2*v_float32::nlanes) - vx_load(src[-k] + i + 2*v_float32::nlanes), k2, s2);
|
||||
s3 = v_muladd(vx_load(src[k] + i + 3*v_float32::nlanes) - vx_load(src[-k] + i + 3*v_float32::nlanes), k2, s3);
|
||||
}
|
||||
v_store(dst + i, v_pack(v_round(s0), v_round(s1)));
|
||||
v_store(dst + i + v_int16::nlanes, v_pack(v_round(s2), v_round(s3)));
|
||||
}
|
||||
if( i <= width - v_int16::nlanes )
|
||||
{
|
||||
v_float32 s0 = v_muladd(vx_load(src[1] + i) - vx_load(src[-1] + i), k1, d4);
|
||||
v_float32 s1 = v_muladd(vx_load(src[1] + i + v_float32::nlanes) - vx_load(src[-1] + i + v_float32::nlanes), k1, d4);
|
||||
for( k = 2; k <= ksize2; k++ )
|
||||
{
|
||||
v_float32 k2 = vx_setall_f32(ky[k]);
|
||||
s0 = v_muladd(vx_load(src[k] + i) - vx_load(src[-k] + i), k2, s0);
|
||||
s1 = v_muladd(vx_load(src[k] + i + v_float32::nlanes) - vx_load(src[-k] + i + v_float32::nlanes), k2, s1);
|
||||
}
|
||||
v_store(dst + i, v_pack(v_round(s0), v_round(s1)));
|
||||
i += v_int16::nlanes;
|
||||
}
|
||||
if( i <= width - v_float32::nlanes )
|
||||
{
|
||||
v_float32 s0 = d4;
|
||||
for( k = 1; k <= ksize2; k++ )
|
||||
v_float32 s0 = v_muladd(vx_load(src[1] + i) - vx_load(src[-1] + i), k1, d4);
|
||||
for( k = 2; k <= ksize2; k++ )
|
||||
s0 = v_muladd(vx_load(src[k] + i) - vx_load(src[-k] + i), vx_setall_f32(ky[k]), s0);
|
||||
v_pack_store(dst + i, v_round(s0));
|
||||
i += v_float32::nlanes;
|
||||
}
|
||||
}
|
||||
|
||||
vx_cleanup();
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -1505,6 +1662,7 @@ struct RowVec_32f
|
||||
}
|
||||
#endif
|
||||
int _ksize = kernel.rows + kernel.cols - 1;
|
||||
CV_DbgAssert(_ksize > 0);
|
||||
const float* src0 = (const float*)_src;
|
||||
float* dst = (float*)_dst;
|
||||
const float* _kx = kernel.ptr<float>();
|
||||
@ -1516,14 +1674,55 @@ struct RowVec_32f
|
||||
if (haveAVX2)
|
||||
return RowVec_32f_AVX(src0, _kx, dst, width, cn, _ksize);
|
||||
#endif
|
||||
for( ; i <= width - v_float32::nlanes; i += v_float32::nlanes )
|
||||
v_float32 k0 = vx_setall_f32(_kx[0]);
|
||||
for( ; i <= width - 4*v_float32::nlanes; i += 4*v_float32::nlanes )
|
||||
{
|
||||
const float* src = src0 + i;
|
||||
v_float32 s0 = vx_setzero_f32();
|
||||
for( k = 0; k < _ksize; k++, src += cn )
|
||||
v_float32 s0 = vx_load(src) * k0;
|
||||
v_float32 s1 = vx_load(src + v_float32::nlanes) * k0;
|
||||
v_float32 s2 = vx_load(src + 2*v_float32::nlanes) * k0;
|
||||
v_float32 s3 = vx_load(src + 3*v_float32::nlanes) * k0;
|
||||
src += cn;
|
||||
for( k = 1; k < _ksize; k++, src += cn )
|
||||
{
|
||||
v_float32 k1 = vx_setall_f32(_kx[k]);
|
||||
s0 = v_muladd(vx_load(src), k1, s0);
|
||||
s1 = v_muladd(vx_load(src + v_float32::nlanes), k1, s1);
|
||||
s2 = v_muladd(vx_load(src + 2*v_float32::nlanes), k1, s2);
|
||||
s3 = v_muladd(vx_load(src + 3*v_float32::nlanes), k1, s3);
|
||||
}
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + v_float32::nlanes, s1);
|
||||
v_store(dst + i + 2*v_float32::nlanes, s2);
|
||||
v_store(dst + i + 3*v_float32::nlanes, s3);
|
||||
}
|
||||
if( i <= width - 2*v_float32::nlanes )
|
||||
{
|
||||
const float* src = src0 + i;
|
||||
v_float32 s0 = vx_load(src) * k0;
|
||||
v_float32 s1 = vx_load(src + v_float32::nlanes) * k0;
|
||||
src += cn;
|
||||
for( k = 1; k < _ksize; k++, src += cn )
|
||||
{
|
||||
v_float32 k1 = vx_setall_f32(_kx[k]);
|
||||
s0 = v_muladd(vx_load(src), k1, s0);
|
||||
s1 = v_muladd(vx_load(src + v_float32::nlanes), k1, s1);
|
||||
}
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + v_float32::nlanes, s1);
|
||||
i += 2*v_float32::nlanes;
|
||||
}
|
||||
if( i <= width - v_float32::nlanes )
|
||||
{
|
||||
const float* src = src0 + i;
|
||||
v_float32 s0 = vx_load(src) * k0;
|
||||
src += cn;
|
||||
for( k = 1; k < _ksize; k++, src += cn )
|
||||
s0 = v_muladd(vx_load(src), vx_setall_f32(_kx[k]), s0);
|
||||
v_store(dst + i, s0);
|
||||
i += v_float32::nlanes;
|
||||
}
|
||||
vx_cleanup();
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -1584,6 +1783,8 @@ struct SymmRowSmallVec_32f
|
||||
int operator()(const uchar* _src, uchar* _dst, int width, int cn) const
|
||||
{
|
||||
int i = 0, _ksize = kernel.rows + kernel.cols - 1;
|
||||
if( _ksize == 1 )
|
||||
return 0;
|
||||
float* dst = (float*)_dst;
|
||||
const float* src = (const float*)_src + (_ksize/2)*cn;
|
||||
bool symmetrical = (symmetryType & KERNEL_SYMMETRICAL) != 0;
|
||||
@ -1592,15 +1793,28 @@ struct SymmRowSmallVec_32f
|
||||
|
||||
if( symmetrical )
|
||||
{
|
||||
if( _ksize == 1 )
|
||||
return 0;
|
||||
if( _ksize == 3 )
|
||||
{
|
||||
if( fabs(kx[0]) == 2 && kx[1] == 1 )
|
||||
{
|
||||
#if CV_FMA3 || CV_AVX2
|
||||
v_float32 k0 = vx_setall_f32(kx[0]);
|
||||
for( ; i <= width - v_float32::nlanes; i += v_float32::nlanes, src += v_float32::nlanes )
|
||||
v_store(dst + i, v_muladd(vx_load(src), k0, vx_load(src - cn) + vx_load(src + cn)));
|
||||
#else
|
||||
if( kx[0] > 0 )
|
||||
for( ; i <= width - v_float32::nlanes; i += v_float32::nlanes, src += v_float32::nlanes )
|
||||
{
|
||||
v_float32 x = vx_load(src);
|
||||
v_store(dst + i, vx_load(src - cn) + vx_load(src + cn) + (x + x));
|
||||
}
|
||||
else
|
||||
for( ; i <= width - v_float32::nlanes; i += v_float32::nlanes, src += v_float32::nlanes )
|
||||
{
|
||||
v_float32 x = vx_load(src);
|
||||
v_store(dst + i, vx_load(src - cn) + vx_load(src + cn) - (x + x));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1613,9 +1827,17 @@ struct SymmRowSmallVec_32f
|
||||
{
|
||||
if( kx[0] == -2 && kx[1] == 0 && kx[2] == 1 )
|
||||
{
|
||||
#if CV_FMA3 || CV_AVX2
|
||||
v_float32 k0 = vx_setall_f32(-2);
|
||||
for( ; i <= width - v_float32::nlanes; i += v_float32::nlanes, src += v_float32::nlanes )
|
||||
v_store(dst + i, v_muladd(vx_load(src), k0, vx_load(src - 2*cn) + vx_load(src + 2*cn)));
|
||||
#else
|
||||
for( ; i <= width - v_float32::nlanes; i += v_float32::nlanes, src += v_float32::nlanes )
|
||||
{
|
||||
v_float32 x = vx_load(src);
|
||||
v_store(dst + i, vx_load(src - 2*cn) + vx_load(src + 2*cn) - (x + x));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1647,6 +1869,7 @@ struct SymmRowSmallVec_32f
|
||||
}
|
||||
}
|
||||
|
||||
vx_cleanup();
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -1688,12 +1911,47 @@ struct SymmColumnVec_32f
|
||||
return SymmColumnVec_32f_Symm_AVX(src, ky, dst, delta, width, ksize2);
|
||||
#endif
|
||||
const v_float32 d4 = vx_setall_f32(delta);
|
||||
for ( ; i <= width - v_float32::nlanes; i += v_float32::nlanes )
|
||||
const v_float32 k0 = vx_setall_f32(ky[0]);
|
||||
for( ; i <= width - 4*v_float32::nlanes; i += 4*v_float32::nlanes )
|
||||
{
|
||||
v_float32 s0 = v_muladd(vx_load(src[0] + i), vx_setall_f32(ky[0]), d4);
|
||||
v_float32 s0 = v_muladd(vx_load(src[0] + i), k0, d4);
|
||||
v_float32 s1 = v_muladd(vx_load(src[0] + i + v_float32::nlanes), k0, d4);
|
||||
v_float32 s2 = v_muladd(vx_load(src[0] + i + 2*v_float32::nlanes), k0, d4);
|
||||
v_float32 s3 = v_muladd(vx_load(src[0] + i + 3*v_float32::nlanes), k0, d4);
|
||||
for( k = 1; k <= ksize2; k++ )
|
||||
{
|
||||
v_float32 k1 = vx_setall_f32(ky[k]);
|
||||
s0 = v_muladd(vx_load(src[k] + i) + vx_load(src[-k] + i), k1, s0);
|
||||
s1 = v_muladd(vx_load(src[k] + i + v_float32::nlanes) + vx_load(src[-k] + i + v_float32::nlanes), k1, s1);
|
||||
s2 = v_muladd(vx_load(src[k] + i + 2*v_float32::nlanes) + vx_load(src[-k] + i + 2*v_float32::nlanes), k1, s2);
|
||||
s3 = v_muladd(vx_load(src[k] + i + 3*v_float32::nlanes) + vx_load(src[-k] + i + 3*v_float32::nlanes), k1, s3);
|
||||
}
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + v_float32::nlanes, s1);
|
||||
v_store(dst + i + 2*v_float32::nlanes, s2);
|
||||
v_store(dst + i + 3*v_float32::nlanes, s3);
|
||||
}
|
||||
if( i <= width - 2*v_float32::nlanes )
|
||||
{
|
||||
v_float32 s0 = v_muladd(vx_load(src[0] + i), k0, d4);
|
||||
v_float32 s1 = v_muladd(vx_load(src[0] + i + v_float32::nlanes), k0, d4);
|
||||
for( k = 1; k <= ksize2; k++ )
|
||||
{
|
||||
v_float32 k1 = vx_setall_f32(ky[k]);
|
||||
s0 = v_muladd(vx_load(src[k] + i) + vx_load(src[-k] + i), k1, s0);
|
||||
s1 = v_muladd(vx_load(src[k] + i + v_float32::nlanes) + vx_load(src[-k] + i + v_float32::nlanes), k1, s1);
|
||||
}
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + v_float32::nlanes, s1);
|
||||
i += 2*v_float32::nlanes;
|
||||
}
|
||||
if( i <= width - v_float32::nlanes )
|
||||
{
|
||||
v_float32 s0 = v_muladd(vx_load(src[0] + i), k0, d4);
|
||||
for( k = 1; k <= ksize2; k++ )
|
||||
s0 = v_muladd(vx_load(src[k] + i) + vx_load(src[-k] + i), vx_setall_f32(ky[k]), s0);
|
||||
v_store(dst + i, s0);
|
||||
i += v_float32::nlanes;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -1702,16 +1960,53 @@ struct SymmColumnVec_32f
|
||||
if (haveAVX2)
|
||||
return SymmColumnVec_32f_Unsymm_AVX(src, ky, dst, delta, width, ksize2);
|
||||
#endif
|
||||
CV_DbgAssert(ksize2 > 0);
|
||||
const v_float32 d4 = vx_setall_f32(delta);
|
||||
for ( ; i <= width - v_float32::nlanes; i += v_float32::nlanes )
|
||||
const v_float32 k1 = vx_setall_f32(ky[1]);
|
||||
for( ; i <= width - 4*v_float32::nlanes; i += 4*v_float32::nlanes )
|
||||
{
|
||||
v_float32 s0 = d4;
|
||||
for( k = 1; k <= ksize2; k++ )
|
||||
v_float32 s0 = v_muladd(vx_load(src[1] + i) - vx_load(src[-1] + i), k1, d4);
|
||||
v_float32 s1 = v_muladd(vx_load(src[1] + i + v_float32::nlanes) - vx_load(src[-1] + i + v_float32::nlanes), k1, d4);
|
||||
v_float32 s2 = v_muladd(vx_load(src[1] + i + 2*v_float32::nlanes) - vx_load(src[-1] + i + 2*v_float32::nlanes), k1, d4);
|
||||
v_float32 s3 = v_muladd(vx_load(src[1] + i + 3*v_float32::nlanes) - vx_load(src[-1] + i + 3*v_float32::nlanes), k1, d4);
|
||||
for( k = 2; k <= ksize2; k++ )
|
||||
{
|
||||
v_float32 k2 = vx_setall_f32(ky[k]);
|
||||
s0 = v_muladd(vx_load(src[k] + i) - vx_load(src[-k] + i), k2, s0);
|
||||
s1 = v_muladd(vx_load(src[k] + i + v_float32::nlanes) - vx_load(src[-k] + i + v_float32::nlanes), k2, s1);
|
||||
s2 = v_muladd(vx_load(src[k] + i + 2*v_float32::nlanes) - vx_load(src[-k] + i + 2*v_float32::nlanes), k2, s2);
|
||||
s3 = v_muladd(vx_load(src[k] + i + 3*v_float32::nlanes) - vx_load(src[-k] + i + 3*v_float32::nlanes), k2, s3);
|
||||
}
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + v_float32::nlanes, s1);
|
||||
v_store(dst + i + 2*v_float32::nlanes, s2);
|
||||
v_store(dst + i + 3*v_float32::nlanes, s3);
|
||||
}
|
||||
if( i <= width - 2*v_float32::nlanes )
|
||||
{
|
||||
v_float32 s0 = v_muladd(vx_load(src[1] + i) - vx_load(src[-1] + i), k1, d4);
|
||||
v_float32 s1 = v_muladd(vx_load(src[1] + i + v_float32::nlanes) - vx_load(src[-1] + i + v_float32::nlanes), k1, d4);
|
||||
for( k = 2; k <= ksize2; k++ )
|
||||
{
|
||||
v_float32 k2 = vx_setall_f32(ky[k]);
|
||||
s0 = v_muladd(vx_load(src[k] + i) - vx_load(src[-k] + i), k2, s0);
|
||||
s1 = v_muladd(vx_load(src[k] + i + v_float32::nlanes) - vx_load(src[-k] + i + v_float32::nlanes), k2, s1);
|
||||
}
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + v_float32::nlanes, s1);
|
||||
i += 2*v_float32::nlanes;
|
||||
}
|
||||
if( i <= width - v_float32::nlanes )
|
||||
{
|
||||
v_float32 s0 = v_muladd(vx_load(src[1] + i) - vx_load(src[-1] + i), k1, d4);
|
||||
for( k = 2; k <= ksize2; k++ )
|
||||
s0 = v_muladd(vx_load(src[k] + i) - vx_load(src[-k] + i), vx_setall_f32(ky[k]), s0);
|
||||
v_store(dst + i, s0);
|
||||
i += v_float32::nlanes;
|
||||
}
|
||||
}
|
||||
|
||||
vx_cleanup();
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -1748,9 +2043,24 @@ struct SymmColumnSmallVec_32f
|
||||
{
|
||||
if( fabs(ky[0]) == 2 && ky[1] == 1 )
|
||||
{
|
||||
#if CV_FMA3 || CV_AVX2
|
||||
v_float32 k0 = vx_setall_f32(ky[0]);
|
||||
for ( ; i <= width - v_float32::nlanes; i += v_float32::nlanes )
|
||||
for( ; i <= width - v_float32::nlanes; i += v_float32::nlanes )
|
||||
v_store(dst + i, v_muladd(vx_load(S1 + i), k0, vx_load(S0 + i) + vx_load(S2 + i) + d4));
|
||||
#else
|
||||
if(ky[0] > 0)
|
||||
for( ; i <= width - v_float32::nlanes; i += v_float32::nlanes )
|
||||
{
|
||||
v_float32 x = vx_load(S1 + i);
|
||||
v_store(dst + i, vx_load(S0 + i) + vx_load(S2 + i) + d4 + (x + x));
|
||||
}
|
||||
else
|
||||
for( ; i <= width - v_float32::nlanes; i += v_float32::nlanes )
|
||||
{
|
||||
v_float32 x = vx_load(S1 + i);
|
||||
v_store(dst + i, vx_load(S0 + i) + vx_load(S2 + i) + d4 - (x + x));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1776,6 +2086,7 @@ struct SymmColumnSmallVec_32f
|
||||
}
|
||||
}
|
||||
|
||||
vx_cleanup();
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -1804,19 +2115,27 @@ struct FilterVec_8u
|
||||
|
||||
int operator()(const uchar** src, uchar* dst, int width) const
|
||||
{
|
||||
CV_DbgAssert(_nz > 0);
|
||||
const float* kf = (const float*)&coeffs[0];
|
||||
int i = 0, k, nz = _nz;
|
||||
|
||||
v_float32 d4 = vx_setall_f32(delta);
|
||||
v_float32 f0 = vx_setall_f32(kf[0]);
|
||||
for( ; i <= width - v_uint8::nlanes; i += v_uint8::nlanes )
|
||||
{
|
||||
v_float32 s0 = d4, s1 = d4, s2 = d4, s3 = d4;
|
||||
for( k = 0; k < nz; k++ )
|
||||
v_uint16 xl, xh;
|
||||
v_expand(vx_load(src[0] + i), xl, xh);
|
||||
v_uint32 x0, x1, x2, x3;
|
||||
v_expand(xl, x0, x1);
|
||||
v_expand(xh, x2, x3);
|
||||
v_float32 s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(x0)), f0, d4);
|
||||
v_float32 s1 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(x1)), f0, d4);
|
||||
v_float32 s2 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(x2)), f0, d4);
|
||||
v_float32 s3 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(x3)), f0, d4);
|
||||
for( k = 1; k < nz; k++ )
|
||||
{
|
||||
v_float32 f = vx_setall_f32(kf[k]);
|
||||
v_uint16 xl, xh;
|
||||
v_expand(vx_load(src[k] + i), xl, xh);
|
||||
v_uint32 x0, x1, x2, x3;
|
||||
v_expand(xl, x0, x1);
|
||||
v_expand(xh, x2, x3);
|
||||
s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(x0)), f, s0);
|
||||
@ -1828,11 +2147,13 @@ struct FilterVec_8u
|
||||
}
|
||||
if( i <= width - v_uint16::nlanes )
|
||||
{
|
||||
v_float32 s0 = d4, s1 = d4;
|
||||
for( k = 0; k < nz; k++ )
|
||||
v_uint32 x0, x1;
|
||||
v_expand(vx_load_expand(src[0] + i), x0, x1);
|
||||
v_float32 s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(x0)), f0, d4);
|
||||
v_float32 s1 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(x1)), f0, d4);
|
||||
for( k = 1; k < nz; k++ )
|
||||
{
|
||||
v_float32 f = vx_setall_f32(kf[k]);
|
||||
v_uint32 x0, x1;
|
||||
v_expand(vx_load_expand(src[k] + i), x0, x1);
|
||||
s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(x0)), f, s0);
|
||||
s1 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(x1)), f, s1);
|
||||
@ -1846,8 +2167,8 @@ struct FilterVec_8u
|
||||
if( i <= width - v_int32x4::nlanes )
|
||||
#endif
|
||||
{
|
||||
v_float32x4 s0 = v_setall_f32(delta);
|
||||
for( k = 0; k < nz; k++ )
|
||||
v_float32x4 s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(src[0] + i))), v_setall_f32(kf[0]), v_setall_f32(delta));
|
||||
for( k = 1; k < nz; k++ )
|
||||
s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(v_load_expand_q(src[k] + i))), v_setall_f32(kf[k]), s0);
|
||||
v_int32x4 s32 = v_round(s0);
|
||||
v_int16x8 s16 = v_pack(s32, s32);
|
||||
@ -1855,6 +2176,7 @@ struct FilterVec_8u
|
||||
i += v_int32x4::nlanes;
|
||||
}
|
||||
|
||||
vx_cleanup();
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -1879,18 +2201,24 @@ struct FilterVec_8u16s
|
||||
|
||||
int operator()(const uchar** src, uchar* _dst, int width) const
|
||||
{
|
||||
CV_DbgAssert(_nz > 0);
|
||||
const float* kf = (const float*)&coeffs[0];
|
||||
short* dst = (short*)_dst;
|
||||
int i = 0, k, nz = _nz;
|
||||
|
||||
v_float32 d4 = vx_setall_f32(delta);
|
||||
v_float32 f0 = vx_setall_f32(kf[0]);
|
||||
for( ; i <= width - v_uint8::nlanes; i += v_uint8::nlanes )
|
||||
{
|
||||
v_float32 s0 = d4, s1 = d4, s2 = d4, s3 = d4;
|
||||
for( k = 0; k < nz; k++ )
|
||||
v_uint16 xl, xh;
|
||||
v_expand(vx_load(src[0] + i), xl, xh);
|
||||
v_float32 s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(v_expand_low(xl))), f0, d4);
|
||||
v_float32 s1 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(v_expand_high(xl))), f0, d4);
|
||||
v_float32 s2 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(v_expand_low(xh))), f0, d4);
|
||||
v_float32 s3 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(v_expand_high(xh))), f0, d4);
|
||||
for( k = 1; k < nz; k++ )
|
||||
{
|
||||
v_float32 f = vx_setall_f32(kf[k]);
|
||||
v_uint16 xl, xh;
|
||||
v_expand(vx_load(src[k] + i), xl, xh);
|
||||
s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(v_expand_low(xl))), f, s0);
|
||||
s1 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(v_expand_high(xl))), f, s1);
|
||||
@ -1902,11 +2230,13 @@ struct FilterVec_8u16s
|
||||
}
|
||||
if( i <= width - v_uint16::nlanes )
|
||||
{
|
||||
v_float32 s0 = d4, s1 = d4;
|
||||
for( k = 0; k < nz; k++ )
|
||||
v_uint16 x = vx_load_expand(src[0] + i);
|
||||
v_float32 s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(v_expand_low(x))), f0, d4);
|
||||
v_float32 s1 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(v_expand_high(x))), f0, d4);
|
||||
for( k = 1; k < nz; k++ )
|
||||
{
|
||||
v_float32 f = vx_setall_f32(kf[k]);
|
||||
v_uint16 x = vx_load_expand(src[k] + i);
|
||||
x = vx_load_expand(src[k] + i);
|
||||
s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(v_expand_low(x))), f, s0);
|
||||
s1 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(v_expand_high(x))), f, s1);
|
||||
}
|
||||
@ -1915,13 +2245,14 @@ struct FilterVec_8u16s
|
||||
}
|
||||
if( i <= width - v_int32::nlanes )
|
||||
{
|
||||
v_float32 s0 = d4;
|
||||
for( k = 0; k < nz; k++ )
|
||||
v_float32 s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(vx_load_expand_q(src[0] + i))), f0, d4);
|
||||
for( k = 1; k < nz; k++ )
|
||||
s0 = v_muladd(v_cvt_f32(v_reinterpret_as_s32(vx_load_expand_q(src[k] + i))), vx_setall_f32(kf[k]), s0);
|
||||
v_pack_store(dst + i, v_round(s0));
|
||||
i += v_int32::nlanes;
|
||||
}
|
||||
|
||||
vx_cleanup();
|
||||
return i;
|
||||
}
|
||||
|
||||
@ -1950,14 +2281,50 @@ struct FilterVec_32f
|
||||
int i = 0, k, nz = _nz;
|
||||
|
||||
v_float32 d4 = vx_setall_f32(delta);
|
||||
for( ; i <= width - v_float32::nlanes; i += v_float32::nlanes )
|
||||
v_float32 f0 = vx_setall_f32(kf[0]);
|
||||
for( ; i <= width - 4*v_float32::nlanes; i += 4*v_float32::nlanes )
|
||||
{
|
||||
v_float32 s0 = d4;
|
||||
for( k = 0; k < nz; k++ )
|
||||
v_float32 s0 = v_muladd(vx_load(src[0] + i), f0, d4);
|
||||
v_float32 s1 = v_muladd(vx_load(src[0] + i + v_float32::nlanes), f0, d4);
|
||||
v_float32 s2 = v_muladd(vx_load(src[0] + i + 2*v_float32::nlanes), f0, d4);
|
||||
v_float32 s3 = v_muladd(vx_load(src[0] + i + 3*v_float32::nlanes), f0, d4);
|
||||
for( k = 1; k < nz; k++ )
|
||||
{
|
||||
v_float32 f1 = vx_setall_f32(kf[k]);
|
||||
s0 = v_muladd(vx_load(src[k] + i), f1, s0);
|
||||
s1 = v_muladd(vx_load(src[k] + i + v_float32::nlanes), f1, s1);
|
||||
s2 = v_muladd(vx_load(src[k] + i + 2*v_float32::nlanes), f1, s2);
|
||||
s3 = v_muladd(vx_load(src[k] + i + 3*v_float32::nlanes), f1, s3);
|
||||
}
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + v_float32::nlanes, s1);
|
||||
v_store(dst + i + 2*v_float32::nlanes, s2);
|
||||
v_store(dst + i + 3*v_float32::nlanes, s3);
|
||||
}
|
||||
if( i <= width - 2*v_float32::nlanes )
|
||||
{
|
||||
v_float32 s0 = v_muladd(vx_load(src[0] + i), f0, d4);
|
||||
v_float32 s1 = v_muladd(vx_load(src[0] + i + v_float32::nlanes), f0, d4);
|
||||
for( k = 1; k < nz; k++ )
|
||||
{
|
||||
v_float32 f1 = vx_setall_f32(kf[k]);
|
||||
s0 = v_muladd(vx_load(src[k] + i), f1, s0);
|
||||
s1 = v_muladd(vx_load(src[k] + i + v_float32::nlanes), f1, s1);
|
||||
}
|
||||
v_store(dst + i, s0);
|
||||
v_store(dst + i + v_float32::nlanes, s1);
|
||||
i += 2*v_float32::nlanes;
|
||||
}
|
||||
if( i <= width - v_float32::nlanes )
|
||||
{
|
||||
v_float32 s0 = v_muladd(vx_load(src[0] + i), f0, d4);
|
||||
for( k = 1; k < nz; k++ )
|
||||
s0 = v_muladd(vx_load(src[k] + i), vx_setall_f32(kf[k]), s0);
|
||||
v_store(dst + i, s0);
|
||||
i += v_float32::nlanes;
|
||||
}
|
||||
|
||||
vx_cleanup();
|
||||
return i;
|
||||
}
|
||||
|
||||
|
@ -403,9 +403,9 @@ void CV_FilterTest::get_test_array_types_and_sizes( int test_case_idx,
|
||||
{
|
||||
CV_FilterBaseTest::get_test_array_types_and_sizes( test_case_idx, sizes, types );
|
||||
RNG& rng = ts->get_rng();
|
||||
int depth = cvtest::randInt(rng)%3;
|
||||
int depth = cvtest::randInt(rng)%4;
|
||||
int cn = CV_MAT_CN(types[INPUT][0]);
|
||||
depth = depth == 0 ? CV_8U : depth == 1 ? CV_16U : CV_32F;
|
||||
depth = depth == 0 ? CV_8U : depth == 1 ? CV_16U : depth == 2 ? CV_16S : CV_32F;
|
||||
types[INPUT][0] = types[OUTPUT][0] = types[REF_OUTPUT][0] = CV_MAKETYPE(depth, cn);
|
||||
}
|
||||
|
||||
@ -457,10 +457,11 @@ void CV_DerivBaseTest::get_test_array_types_and_sizes( int test_case_idx,
|
||||
{
|
||||
RNG& rng = ts->get_rng();
|
||||
CV_FilterBaseTest::get_test_array_types_and_sizes( test_case_idx, sizes, types );
|
||||
int depth = cvtest::randInt(rng) % 2;
|
||||
depth = depth == 0 ? CV_8U : CV_32F;
|
||||
int depth = cvtest::randInt(rng) % 4;
|
||||
depth = depth == 0 ? CV_8U : depth == 1 ? CV_16U : depth == 2 ? CV_16S : CV_32F;
|
||||
types[INPUT][0] = CV_MAKETYPE(depth,1);
|
||||
types[OUTPUT][0] = types[REF_OUTPUT][0] = CV_MAKETYPE(depth==CV_8U?CV_16S:CV_32F,1);
|
||||
int sameDepth = cvtest::randInt(rng) % 2;
|
||||
types[OUTPUT][0] = types[REF_OUTPUT][0] = sameDepth ? depth : CV_MAKETYPE(depth==CV_8U?CV_16S:CV_32F,1);
|
||||
_aperture_size = (cvtest::randInt(rng)%5)*2 - 1;
|
||||
sizes[INPUT][1] = aperture_size = cvSize(_aperture_size, _aperture_size);
|
||||
}
|
||||
@ -2211,4 +2212,27 @@ TEST(Imgproc_MedianBlur, hires_regression_13409)
|
||||
|
||||
ASSERT_EQ(0.0, cvtest::norm(dst_hires(Rect(516, 516, 1016, 1016)), dst_ref(Rect(4, 4, 1016, 1016)), NORM_INF));
|
||||
}
|
||||
|
||||
TEST(Imgproc_Sobel, s16_regression_13506)
|
||||
{
|
||||
Mat src = (Mat_<short>(8, 16) << 127, 138, 130, 102, 118, 97, 76, 84, 124, 90, 146, 63, 130, 87, 212, 85,
|
||||
164, 3, 51, 124, 151, 89, 154, 117, 36, 88, 116, 117, 180, 112, 147, 124,
|
||||
63, 50, 115, 103, 83, 148, 106, 79, 213, 106, 135, 53, 79, 106, 122, 112,
|
||||
218, 107, 81, 126, 78, 138, 85, 142, 151, 108, 104, 158, 155, 81, 112, 178,
|
||||
184, 96, 187, 148, 150, 112, 138, 162, 222, 146, 128, 49, 124, 46, 165, 104,
|
||||
119, 164, 77, 144, 186, 98, 106, 148, 155, 157, 160, 151, 156, 149, 43, 122,
|
||||
106, 155, 120, 132, 159, 115, 126, 188, 44, 79, 164, 201, 153, 97, 139, 133,
|
||||
133, 98, 111, 165, 66, 106, 131, 85, 176, 156, 67, 108, 142, 91, 74, 137);
|
||||
Mat ref = (Mat_<short>(8, 16) << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
-1020, -796, -489, -469, -247, 317, 760, 1429, 1983, 1384, 254, -459, -899, -1197, -1172, -1058,
|
||||
2552, 2340, 1617, 591, 9, 96, 722, 1985, 2746, 1916, 676, 9, -635, -1115, -779, -380,
|
||||
3546, 3349, 2838, 2206, 1388, 669, 938, 1880, 2252, 1785, 1083, 606, 180, -298, -464, -418,
|
||||
816, 966, 1255, 1652, 1619, 924, 535, 288, 5, 601, 1581, 1870, 1520, 625, -627, -1260,
|
||||
-782, -610, -395, -267, -122, -42, -317, -1378, -2293, -1451, 596, 1870, 1679, 763, -69, -394,
|
||||
-882, -681, -463, -818, -1167, -732, -463, -1042, -1604, -1592, -1047, -334, -104, -117, 229, 512,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
Mat dst;
|
||||
Sobel(src, dst, CV_16S, 0, 1, 5);
|
||||
ASSERT_EQ(0.0, cvtest::norm(dst, ref, NORM_INF));
|
||||
}
|
||||
}} // namespace
|
||||
|
@ -351,6 +351,7 @@ void FeaturesMatcher::operator ()(const std::vector<ImageFeatures> &features, st
|
||||
if (features[i].keypoints.size() > 0 && features[j].keypoints.size() > 0 && mask_(i, j))
|
||||
near_pairs.push_back(std::make_pair(i, j));
|
||||
|
||||
pairwise_matches.clear(); // clear history values
|
||||
pairwise_matches.resize(num_images * num_images);
|
||||
MatchPairsBody body(*this, features, pairwise_matches, near_pairs);
|
||||
|
||||
|
@ -57,21 +57,26 @@ const int POSE_PAIRS[3][20][2] = {
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
CommandLineParser parser(argc, argv,
|
||||
"{ h help | false | print this help message }"
|
||||
"{ p proto | | (required) model configuration, e.g. hand/pose.prototxt }"
|
||||
"{ m model | | (required) model weights, e.g. hand/pose_iter_102000.caffemodel }"
|
||||
"{ i image | | (required) path to image file (containing a single person, or hand) }"
|
||||
"{ width | 368 | Preprocess input image by resizing to a specific width. }"
|
||||
"{ height | 368 | Preprocess input image by resizing to a specific height. }"
|
||||
"{ t threshold | 0.1 | threshold or confidence value for the heatmap }"
|
||||
"{ h help | false | print this help message }"
|
||||
"{ p proto | | (required) model configuration, e.g. hand/pose.prototxt }"
|
||||
"{ m model | | (required) model weights, e.g. hand/pose_iter_102000.caffemodel }"
|
||||
"{ i image | | (required) path to image file (containing a single person, or hand) }"
|
||||
"{ d dataset | | specify what kind of model was trained. It could be (COCO, MPI, HAND) depends on dataset. }"
|
||||
"{ width | 368 | Preprocess input image by resizing to a specific width. }"
|
||||
"{ height | 368 | Preprocess input image by resizing to a specific height. }"
|
||||
"{ t threshold | 0.1 | threshold or confidence value for the heatmap }"
|
||||
"{ s scale | 0.003922 | scale for blob }"
|
||||
);
|
||||
|
||||
String modelTxt = samples::findFile(parser.get<string>("proto"));
|
||||
String modelBin = samples::findFile(parser.get<string>("model"));
|
||||
String imageFile = samples::findFile(parser.get<String>("image"));
|
||||
String dataset = parser.get<String>("dataset");
|
||||
int W_in = parser.get<int>("width");
|
||||
int H_in = parser.get<int>("height");
|
||||
float thresh = parser.get<float>("threshold");
|
||||
float scale = parser.get<float>("scale");
|
||||
|
||||
if (parser.get<bool>("help") || modelTxt.empty() || modelBin.empty() || imageFile.empty())
|
||||
{
|
||||
cout << "A sample app to demonstrate human or hand pose detection with a pretrained OpenPose dnn." << endl;
|
||||
@ -79,9 +84,18 @@ int main(int argc, char **argv)
|
||||
return 0;
|
||||
}
|
||||
|
||||
// read the network model
|
||||
Net net = readNetFromCaffe(modelTxt, modelBin);
|
||||
int midx, npairs, nparts;
|
||||
if (!dataset.compare("COCO")) { midx = 0; npairs = 17; nparts = 18; }
|
||||
else if (!dataset.compare("MPI")) { midx = 1; npairs = 14; nparts = 16; }
|
||||
else if (!dataset.compare("HAND")) { midx = 2; npairs = 20; nparts = 22; }
|
||||
else
|
||||
{
|
||||
std::cerr << "Can't interpret dataset parameter: " << dataset << std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
// read the network model
|
||||
Net net = readNet(modelBin, modelTxt);
|
||||
// and the image
|
||||
Mat img = imread(imageFile);
|
||||
if (img.empty())
|
||||
@ -91,39 +105,14 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
// send it through the network
|
||||
Mat inputBlob = blobFromImage(img, 1.0 / 255, Size(W_in, H_in), Scalar(0, 0, 0), false, false);
|
||||
Mat inputBlob = blobFromImage(img, scale, Size(W_in, H_in), Scalar(0, 0, 0), false, false);
|
||||
net.setInput(inputBlob);
|
||||
Mat result = net.forward();
|
||||
// the result is an array of "heatmaps", the probability of a body part being in location x,y
|
||||
|
||||
int midx, npairs;
|
||||
int nparts = result.size[1];
|
||||
int H = result.size[2];
|
||||
int W = result.size[3];
|
||||
|
||||
// find out, which model we have
|
||||
if (nparts == 19)
|
||||
{ // COCO body
|
||||
midx = 0;
|
||||
npairs = 17;
|
||||
nparts = 18; // skip background
|
||||
}
|
||||
else if (nparts == 16)
|
||||
{ // MPI body
|
||||
midx = 1;
|
||||
npairs = 14;
|
||||
}
|
||||
else if (nparts == 22)
|
||||
{ // hand
|
||||
midx = 2;
|
||||
npairs = 20;
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "there should be 19 parts for the COCO model, 16 for MPI, or 22 for the hand one, but this model has " << nparts << " parts." << endl;
|
||||
return (0);
|
||||
}
|
||||
|
||||
// find the position of the body parts
|
||||
vector<Point> points(22);
|
||||
for (int n=0; n<nparts; n++)
|
||||
|
@ -1,5 +1,5 @@
|
||||
# To use Inference Engine backend, specify location of plugins:
|
||||
# export LD_LIBRARY_PATH=/opt/intel/deeplearning_deploymenttoolkit/deployment_tools/external/mklml_lnx/lib:$LD_LIBRARY_PATH
|
||||
# source /opt/intel/computer_vision_sdk/bin/setupvars.sh
|
||||
import cv2 as cv
|
||||
import numpy as np
|
||||
import argparse
|
||||
@ -12,10 +12,11 @@ parser.add_argument('--input', help='Path to image or video. Skip to capture fra
|
||||
parser.add_argument('--proto', help='Path to .prototxt')
|
||||
parser.add_argument('--model', help='Path to .caffemodel')
|
||||
parser.add_argument('--dataset', help='Specify what kind of model was trained. '
|
||||
'It could be (COCO, MPI) depends on dataset.')
|
||||
'It could be (COCO, MPI, HAND) depends on dataset.')
|
||||
parser.add_argument('--thr', default=0.1, type=float, help='Threshold value for pose parts heat map')
|
||||
parser.add_argument('--width', default=368, type=int, help='Resize input to specific width.')
|
||||
parser.add_argument('--height', default=368, type=int, help='Resize input to specific height.')
|
||||
parser.add_argument('--scale', default=0.003922, type=float, help='Scale for blob.')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@ -30,8 +31,7 @@ if args.dataset == 'COCO':
|
||||
["Neck", "RHip"], ["RHip", "RKnee"], ["RKnee", "RAnkle"], ["Neck", "LHip"],
|
||||
["LHip", "LKnee"], ["LKnee", "LAnkle"], ["Neck", "Nose"], ["Nose", "REye"],
|
||||
["REye", "REar"], ["Nose", "LEye"], ["LEye", "LEar"] ]
|
||||
else:
|
||||
assert(args.dataset == 'MPI')
|
||||
elif args.dataset == 'MPI':
|
||||
BODY_PARTS = { "Head": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
|
||||
"LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8, "RKnee": 9,
|
||||
"RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13, "Chest": 14,
|
||||
@ -41,11 +41,33 @@ else:
|
||||
["RElbow", "RWrist"], ["Neck", "LShoulder"], ["LShoulder", "LElbow"],
|
||||
["LElbow", "LWrist"], ["Neck", "Chest"], ["Chest", "RHip"], ["RHip", "RKnee"],
|
||||
["RKnee", "RAnkle"], ["Chest", "LHip"], ["LHip", "LKnee"], ["LKnee", "LAnkle"] ]
|
||||
else:
|
||||
assert(args.dataset == 'HAND')
|
||||
BODY_PARTS = { "Wrist": 0,
|
||||
"ThumbMetacarpal": 1, "ThumbProximal": 2, "ThumbMiddle": 3, "ThumbDistal": 4,
|
||||
"IndexFingerMetacarpal": 5, "IndexFingerProximal": 6, "IndexFingerMiddle": 7, "IndexFingerDistal": 8,
|
||||
"MiddleFingerMetacarpal": 9, "MiddleFingerProximal": 10, "MiddleFingerMiddle": 11, "MiddleFingerDistal": 12,
|
||||
"RingFingerMetacarpal": 13, "RingFingerProximal": 14, "RingFingerMiddle": 15, "RingFingerDistal": 16,
|
||||
"LittleFingerMetacarpal": 17, "LittleFingerProximal": 18, "LittleFingerMiddle": 19, "LittleFingerDistal": 20,
|
||||
}
|
||||
|
||||
POSE_PAIRS = [ ["Wrist", "ThumbMetacarpal"], ["ThumbMetacarpal", "ThumbProximal"],
|
||||
["ThumbProximal", "ThumbMiddle"], ["ThumbMiddle", "ThumbDistal"],
|
||||
["Wrist", "IndexFingerMetacarpal"], ["IndexFingerMetacarpal", "IndexFingerProximal"],
|
||||
["IndexFingerProximal", "IndexFingerMiddle"], ["IndexFingerMiddle", "IndexFingerDistal"],
|
||||
["Wrist", "MiddleFingerMetacarpal"], ["MiddleFingerMetacarpal", "MiddleFingerProximal"],
|
||||
["MiddleFingerProximal", "MiddleFingerMiddle"], ["MiddleFingerMiddle", "MiddleFingerDistal"],
|
||||
["Wrist", "RingFingerMetacarpal"], ["RingFingerMetacarpal", "RingFingerProximal"],
|
||||
["RingFingerProximal", "RingFingerMiddle"], ["RingFingerMiddle", "RingFingerDistal"],
|
||||
["Wrist", "LittleFingerMetacarpal"], ["LittleFingerMetacarpal", "LittleFingerProximal"],
|
||||
["LittleFingerProximal", "LittleFingerMiddle"], ["LittleFingerMiddle", "LittleFingerDistal"] ]
|
||||
|
||||
|
||||
inWidth = args.width
|
||||
inHeight = args.height
|
||||
inScale = args.scale
|
||||
|
||||
net = cv.dnn.readNetFromCaffe(cv.samples.findFile(args.proto), cv.samples.findFile(args.model))
|
||||
net = cv.dnn.readNet(cv.samples.findFile(args.proto), cv.samples.findFile(args.model))
|
||||
|
||||
cap = cv.VideoCapture(args.input if args.input else 0)
|
||||
|
||||
@ -57,12 +79,12 @@ while cv.waitKey(1) < 0:
|
||||
|
||||
frameWidth = frame.shape[1]
|
||||
frameHeight = frame.shape[0]
|
||||
inp = cv.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
|
||||
inp = cv.dnn.blobFromImage(frame, inScale, (inWidth, inHeight),
|
||||
(0, 0, 0), swapRB=False, crop=False)
|
||||
net.setInput(inp)
|
||||
out = net.forward()
|
||||
|
||||
assert(len(BODY_PARTS) == out.shape[1])
|
||||
assert(len(BODY_PARTS) <= out.shape[1])
|
||||
|
||||
points = []
|
||||
for i in range(len(BODY_PARTS)):
|
||||
|
Loading…
Reference in New Issue
Block a user