mirror of
https://github.com/opencv/opencv.git
synced 2025-06-06 00:43:52 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
9787ab598b
@ -1044,12 +1044,10 @@ void cvCreateTrainingSamples( const char* filename,
|
||||
output = fopen( filename, "wb" );
|
||||
if( output != NULL )
|
||||
{
|
||||
int hasbg;
|
||||
int i;
|
||||
int inverse;
|
||||
|
||||
hasbg = 0;
|
||||
hasbg = (bgfilename != NULL && icvInitBackgroundReaders( bgfilename,
|
||||
const int hasbg = (bgfilename != NULL && icvInitBackgroundReaders( bgfilename,
|
||||
Size( winwidth,winheight ) ) );
|
||||
|
||||
Mat sample( winheight, winwidth, CV_8UC1 );
|
||||
|
@ -740,7 +740,7 @@ macro(ocv_compiler_optimization_fill_cpu_config)
|
||||
")
|
||||
|
||||
|
||||
set(__file "${CMAKE_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h")
|
||||
set(__file "${OpenCV_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h")
|
||||
if(EXISTS "${__file}")
|
||||
file(READ "${__file}" __content)
|
||||
endif()
|
||||
|
@ -220,7 +220,7 @@ void CirclesGridClusterFinder::findOutsideCorners(const std::vector<cv::Point2f>
|
||||
CV_Assert(!corners.empty());
|
||||
outsideCorners.clear();
|
||||
//find two pairs of the most nearest corners
|
||||
int i, j, n = (int)corners.size();
|
||||
const size_t n = corners.size();
|
||||
|
||||
#ifdef DEBUG_CIRCLES
|
||||
Mat cornersImage(1024, 1248, CV_8UC1, Scalar(0));
|
||||
@ -228,22 +228,22 @@ void CirclesGridClusterFinder::findOutsideCorners(const std::vector<cv::Point2f>
|
||||
imshow("corners", cornersImage);
|
||||
#endif
|
||||
|
||||
std::vector<Point2f> tangentVectors(corners.size());
|
||||
for(size_t k=0; k<corners.size(); k++)
|
||||
std::vector<Point2f> tangentVectors(n);
|
||||
for(size_t k=0; k < n; k++)
|
||||
{
|
||||
Point2f diff = corners[(k + 1) % corners.size()] - corners[k];
|
||||
Point2f diff = corners[(k + 1) % n] - corners[k];
|
||||
tangentVectors[k] = diff * (1.0f / norm(diff));
|
||||
}
|
||||
|
||||
//compute angles between all sides
|
||||
Mat cosAngles(n, n, CV_32FC1, 0.0f);
|
||||
for(i = 0; i < n; i++)
|
||||
Mat cosAngles((int)n, (int)n, CV_32FC1, 0.0f);
|
||||
for(size_t i = 0; i < n; i++)
|
||||
{
|
||||
for(j = i + 1; j < n; j++)
|
||||
for(size_t j = i + 1; j < n; j++)
|
||||
{
|
||||
float val = fabs(tangentVectors[i].dot(tangentVectors[j]));
|
||||
cosAngles.at<float>(i, j) = val;
|
||||
cosAngles.at<float>(j, i) = val;
|
||||
cosAngles.at<float>((int)i, (int)j) = val;
|
||||
cosAngles.at<float>((int)j, (int)i) = val;
|
||||
}
|
||||
}
|
||||
|
||||
@ -272,10 +272,10 @@ void CirclesGridClusterFinder::findOutsideCorners(const std::vector<cv::Point2f>
|
||||
const int bigDiff = 4;
|
||||
if(maxIdx - minIdx == bigDiff)
|
||||
{
|
||||
minIdx += n;
|
||||
minIdx += (int)n;
|
||||
std::swap(maxIdx, minIdx);
|
||||
}
|
||||
if(maxIdx - minIdx != n - bigDiff)
|
||||
if(maxIdx - minIdx != (int)n - bigDiff)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -206,7 +206,7 @@ void dls::run_kernel(const cv::Mat& pp)
|
||||
|
||||
void dls::build_coeff_matrix(const cv::Mat& pp, cv::Mat& Mtilde, cv::Mat& D)
|
||||
{
|
||||
CV_Assert(!pp.empty());
|
||||
CV_Assert(!pp.empty() && N > 0);
|
||||
cv::Mat eye = cv::Mat::eye(3, 3, CV_64F);
|
||||
|
||||
// build coeff matrix
|
||||
|
@ -334,19 +334,19 @@ bool validateData(const ChessBoardGenerator& cbg, const Size& imgSz,
|
||||
|
||||
tmp = cv::norm(cur - mat(i + 1, j + 1)); // TODO cvtest
|
||||
if (tmp < minNeibDist)
|
||||
tmp = minNeibDist;
|
||||
minNeibDist = tmp;
|
||||
|
||||
tmp = cv::norm(cur - mat(i - 1, j + 1)); // TODO cvtest
|
||||
if (tmp < minNeibDist)
|
||||
tmp = minNeibDist;
|
||||
minNeibDist = tmp;
|
||||
|
||||
tmp = cv::norm(cur - mat(i + 1, j - 1)); // TODO cvtest
|
||||
if (tmp < minNeibDist)
|
||||
tmp = minNeibDist;
|
||||
minNeibDist = tmp;
|
||||
|
||||
tmp = cv::norm(cur - mat(i - 1, j - 1)); // TODO cvtest
|
||||
if (tmp < minNeibDist)
|
||||
tmp = minNeibDist;
|
||||
minNeibDist = tmp;
|
||||
}
|
||||
|
||||
const double threshold = 0.25;
|
||||
|
@ -526,13 +526,13 @@ inline void v256_zip(const _Tpvec& a, const _Tpvec& b, _Tpvec& ab0, _Tpvec& ab1)
|
||||
|
||||
template<typename _Tpvec>
|
||||
inline _Tpvec v256_combine_diagonal(const _Tpvec& a, const _Tpvec& b)
|
||||
{ return _Tpvec(_mm256_blend_epi32(a.val, b.val, 0b11110000)); }
|
||||
{ return _Tpvec(_mm256_blend_epi32(a.val, b.val, 0xf0)); }
|
||||
|
||||
inline v_float32x8 v256_combine_diagonal(const v_float32x8& a, const v_float32x8& b)
|
||||
{ return v256_blend<0b11110000>(a, b); }
|
||||
{ return v256_blend<0xf0>(a, b); }
|
||||
|
||||
inline v_float64x4 v256_combine_diagonal(const v_float64x4& a, const v_float64x4& b)
|
||||
{ return v256_blend<0b1100>(a, b); }
|
||||
{ return v256_blend<0xc>(a, b); }
|
||||
|
||||
template<typename _Tpvec>
|
||||
inline _Tpvec v256_alignr_128(const _Tpvec& a, const _Tpvec& b)
|
||||
@ -1609,392 +1609,592 @@ OPENCV_HAL_IMPL_AVX_EXTRACT(v_float32x8)
|
||||
OPENCV_HAL_IMPL_AVX_EXTRACT(v_float64x4)
|
||||
|
||||
|
||||
/** Reinterpret **/
|
||||
// its up there with load and store operations
|
||||
///////////////////// load deinterleave /////////////////////////////
|
||||
|
||||
/* de&interleave */
|
||||
#define OPENCV_HAL_IMPL_AVX_INTERLEAVE_2CH(_Tpvec, _Tp, suffix) \
|
||||
inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b) \
|
||||
{ return v256_load_deinterleave_##suffix(ptr, a, b); } \
|
||||
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return v256_store_interleave_2ch(ptr, a, b); }
|
||||
|
||||
#define OPENCV_HAL_IMPL_AVX_INTERLEAVE_3CH(_Tpvec, _Tp, suffix) \
|
||||
inline void v_load_deinterleave \
|
||||
(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c) \
|
||||
{ return v256_load_deinterleave_##suffix(ptr, a, b, c); } \
|
||||
inline void v_store_interleave \
|
||||
(_Tp* ptr, const _Tpvec& a,const _Tpvec& b, const _Tpvec& c) \
|
||||
{ return v256_store_interleave_##suffix(ptr, a, b, c); }
|
||||
|
||||
#define OPENCV_HAL_IMPL_AVX_INTERLEAVE_4CH(_Tpvec, _Tp, suffix) \
|
||||
inline void v_load_deinterleave \
|
||||
(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c, _Tpvec& d) \
|
||||
{ return v256_load_deinterleave_##suffix(ptr, a, b, c, d); } \
|
||||
inline void v_store_interleave \
|
||||
(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c, const _Tpvec& d) \
|
||||
{ return v256_store_interleave_##suffix(ptr, a, b, c, d); }
|
||||
|
||||
#define OPENCV_HAL_IMPL_AVX_INTERLEAVE_3n4CH(_Tpvec, _Tp, suffix) \
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_3CH(_Tpvec, _Tp, suffix) \
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_4CH(_Tpvec, _Tp, suffix)
|
||||
|
||||
#define OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(_Tpvec, _Tp, suffix) \
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_2CH(_Tpvec, _Tp, suffix) \
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_3n4CH(_Tpvec, _Tp, suffix)
|
||||
|
||||
/* **** */
|
||||
//
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_store_interleave_2ch(_Tp* ptr, const _Tpvec& a, const _Tpvec& b)
|
||||
inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b )
|
||||
{
|
||||
_Tpvec ab0, ab1;
|
||||
v_zip(a, b, ab0, ab1);
|
||||
v_store(ptr, ab0);
|
||||
v_store(ptr + _Tpvec::nlanes, ab1);
|
||||
__m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||
__m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
|
||||
|
||||
static const __m256i sh = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15,
|
||||
0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
|
||||
__m256i p0 = _mm256_shuffle_epi8(ab0, sh);
|
||||
__m256i p1 = _mm256_shuffle_epi8(ab1, sh);
|
||||
__m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
|
||||
__m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
|
||||
__m256i a0 = _mm256_unpacklo_epi64(pl, ph);
|
||||
__m256i b0 = _mm256_unpackhi_epi64(pl, ph);
|
||||
a = v_uint8x32(a0);
|
||||
b = v_uint8x32(b0);
|
||||
}
|
||||
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_load_deinterleave_l4(const _Tp* ptr, _Tpvec& a, _Tpvec& b)
|
||||
inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16& b )
|
||||
{
|
||||
_Tpvec ab0 = v256_load(ptr);
|
||||
_Tpvec ab1 = v256_load(ptr + _Tpvec::nlanes);
|
||||
_Tpvec ab00, ab11;
|
||||
v_recombine(ab0, ab1, ab00, ab11);
|
||||
v256_zip(ab00, ab11, a, b);
|
||||
__m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||
__m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
|
||||
|
||||
static const __m256i sh = _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
|
||||
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
|
||||
__m256i p0 = _mm256_shuffle_epi8(ab0, sh);
|
||||
__m256i p1 = _mm256_shuffle_epi8(ab1, sh);
|
||||
__m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
|
||||
__m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
|
||||
__m256i a0 = _mm256_unpacklo_epi64(pl, ph);
|
||||
__m256i b0 = _mm256_unpackhi_epi64(pl, ph);
|
||||
a = v_uint16x16(a0);
|
||||
b = v_uint16x16(b0);
|
||||
}
|
||||
|
||||
///
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_load_deinterleave_l4(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c)
|
||||
inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& a, v_uint32x8& b )
|
||||
{
|
||||
_Tpvec abc0 = v256_load(ptr);
|
||||
_Tpvec abc1 = v256_load(ptr + _Tpvec::nlanes);
|
||||
_Tpvec abc2 = v256_load(ptr + _Tpvec::nlanes * 2);
|
||||
__m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||
__m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
|
||||
|
||||
_Tpvec ab0 = v256_combine_diagonal(abc0, abc1);
|
||||
_Tpvec bc1 = v256_combine_diagonal(abc1, abc2);
|
||||
_Tpvec ac1 = v256_reverse_64(v256_combine_diagonal(abc2, abc0));
|
||||
|
||||
a = v256_unpacklo(ab0, ac1);
|
||||
c = v256_unpackhi(ac1, bc1);
|
||||
b = v256_alignr_64(bc1, ab0);
|
||||
const int sh = 0+2*4+1*16+3*64;
|
||||
__m256i p0 = _mm256_shuffle_epi32(ab0, sh);
|
||||
__m256i p1 = _mm256_shuffle_epi32(ab1, sh);
|
||||
__m256i pl = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
|
||||
__m256i ph = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
|
||||
__m256i a0 = _mm256_unpacklo_epi64(pl, ph);
|
||||
__m256i b0 = _mm256_unpackhi_epi64(pl, ph);
|
||||
a = v_uint32x8(a0);
|
||||
b = v_uint32x8(b0);
|
||||
}
|
||||
|
||||
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_store_interleave_l4(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c)
|
||||
inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& a, v_uint64x4& b )
|
||||
{
|
||||
_Tpvec ab0 = v256_unpacklo(a, b);
|
||||
_Tpvec bc1 = v256_unpackhi(b, c);
|
||||
_Tpvec ca10 = v256_swap_halves(v256_blend<0b1010>(c, a));
|
||||
__m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||
__m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 4));
|
||||
|
||||
v_store(ptr, v256_combine_diagonal(ab0, ca10));
|
||||
v_store(ptr + _Tpvec::nlanes, v256_combine_diagonal(bc1, ab0));
|
||||
v_store(ptr + _Tpvec::nlanes * 2, v256_combine_diagonal(ca10, bc1));
|
||||
__m256i pl = _mm256_permute2x128_si256(ab0, ab1, 0 + 2*16);
|
||||
__m256i ph = _mm256_permute2x128_si256(ab0, ab1, 1 + 3*16);
|
||||
__m256i a0 = _mm256_unpacklo_epi64(pl, ph);
|
||||
__m256i b0 = _mm256_unpackhi_epi64(pl, ph);
|
||||
a = v_uint64x4(a0);
|
||||
b = v_uint64x4(b0);
|
||||
}
|
||||
|
||||
////
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_load_deinterleave_l4(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c, _Tpvec& d)
|
||||
inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& b, v_uint8x32& g, v_uint8x32& r )
|
||||
{
|
||||
_Tpvec abcd0 = v256_load(ptr);
|
||||
_Tpvec abcd1 = v256_load(ptr + _Tpvec::nlanes);
|
||||
_Tpvec abcd2 = v256_load(ptr + _Tpvec::nlanes * 2);
|
||||
_Tpvec abcd3 = v256_load(ptr + _Tpvec::nlanes * 3);
|
||||
__m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
|
||||
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64));
|
||||
|
||||
_Tpvec cd0ab0 = v256_alignr_128(abcd0, abcd2);
|
||||
_Tpvec cd1ab1 = v256_alignr_128(abcd1, abcd3);
|
||||
__m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16);
|
||||
__m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16);
|
||||
|
||||
_Tpvec ab0 = v256_combine_diagonal(abcd0, cd0ab0);
|
||||
_Tpvec ab1 = v256_combine_diagonal(abcd1, cd1ab1);
|
||||
_Tpvec cd0 = v256_combine_diagonal(cd0ab0, abcd2);
|
||||
_Tpvec cd1 = v256_combine_diagonal(cd1ab1, abcd3);
|
||||
static const __m256i m0 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
|
||||
0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
|
||||
static const __m256i m1 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0,
|
||||
-1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1);
|
||||
|
||||
v256_zip(ab0, ab1, a, b);
|
||||
v256_zip(cd0, cd1, c, d);
|
||||
__m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1);
|
||||
__m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0);
|
||||
__m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1);
|
||||
|
||||
static const __m256i
|
||||
sh_b = _mm256_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13,
|
||||
0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13),
|
||||
sh_g = _mm256_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14,
|
||||
1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14),
|
||||
sh_r = _mm256_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15,
|
||||
2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15);
|
||||
b0 = _mm256_shuffle_epi8(b0, sh_b);
|
||||
g0 = _mm256_shuffle_epi8(g0, sh_g);
|
||||
r0 = _mm256_shuffle_epi8(r0, sh_r);
|
||||
|
||||
b = v_uint8x32(b0);
|
||||
g = v_uint8x32(g0);
|
||||
r = v_uint8x32(r0);
|
||||
}
|
||||
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_store_interleave_l4(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c, const _Tpvec& d)
|
||||
inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& b, v_uint16x16& g, v_uint16x16& r )
|
||||
{
|
||||
_Tpvec ab0, ab1, cd0, cd1;
|
||||
v256_zip(a, b, ab0, ab1);
|
||||
v256_zip(c, d, cd0, cd1);
|
||||
__m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
|
||||
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
|
||||
|
||||
_Tpvec ab0cd0 = v256_alignr_128(ab0, cd0);
|
||||
_Tpvec ab1cd1 = v256_alignr_128(ab1, cd1);
|
||||
__m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16);
|
||||
__m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16);
|
||||
|
||||
v_store(ptr, v256_combine_diagonal(ab0, ab0cd0));
|
||||
v_store(ptr + _Tpvec::nlanes, v256_combine_diagonal(ab1, ab1cd1));
|
||||
v_store(ptr + _Tpvec::nlanes * 2, v256_combine_diagonal(ab0cd0, cd0));
|
||||
v_store(ptr + _Tpvec::nlanes * 3, v256_combine_diagonal(ab1cd1, cd1));
|
||||
static const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1,
|
||||
0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0);
|
||||
static const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0,
|
||||
-1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0);
|
||||
__m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1);
|
||||
__m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1);
|
||||
__m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0);
|
||||
static const __m256i sh_b = _mm256_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11,
|
||||
0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
|
||||
static const __m256i sh_g = _mm256_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13,
|
||||
2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13);
|
||||
static const __m256i sh_r = _mm256_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15,
|
||||
4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
|
||||
b0 = _mm256_shuffle_epi8(b0, sh_b);
|
||||
g0 = _mm256_shuffle_epi8(g0, sh_g);
|
||||
r0 = _mm256_shuffle_epi8(r0, sh_r);
|
||||
|
||||
b = v_uint16x16(b0);
|
||||
g = v_uint16x16(g0);
|
||||
r = v_uint16x16(r0);
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_uint64x4, uint64, l4)
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_int64x4, int64, l4)
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_float64x4, double, l4)
|
||||
|
||||
/* **** **** */
|
||||
//
|
||||
inline void v256_load_deinterleave_l8(const float* ptr, v_float32x8& a, v_float32x8& b)
|
||||
inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& b, v_uint32x8& g, v_uint32x8& r )
|
||||
{
|
||||
v_float32x8 ab0 = v256_load(ptr);
|
||||
v_float32x8 ab1 = v256_load(ptr + 8);
|
||||
__m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
|
||||
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
|
||||
|
||||
v_float32x8 ab0ab2, ab1ab3;
|
||||
v_recombine(ab0, ab1, ab0ab2, ab1ab3);
|
||||
__m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16);
|
||||
__m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16);
|
||||
|
||||
a.val = _mm256_shuffle_ps(ab0ab2.val, ab1ab3.val, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
b.val = _mm256_shuffle_ps(ab0ab2.val, ab1ab3.val, _MM_SHUFFLE(3, 1, 3, 1));
|
||||
__m256i b0 = _mm256_blend_epi32(_mm256_blend_epi32(s02_low, s02_high, 0x24), bgr1, 0x92);
|
||||
__m256i g0 = _mm256_blend_epi32(_mm256_blend_epi32(s02_high, s02_low, 0x92), bgr1, 0x24);
|
||||
__m256i r0 = _mm256_blend_epi32(_mm256_blend_epi32(bgr1, s02_low, 0x24), s02_high, 0x92);
|
||||
|
||||
b0 = _mm256_shuffle_epi32(b0, 0x6c);
|
||||
g0 = _mm256_shuffle_epi32(g0, 0xb1);
|
||||
r0 = _mm256_shuffle_epi32(r0, 0xc6);
|
||||
|
||||
b = v_uint32x8(b0);
|
||||
g = v_uint32x8(g0);
|
||||
r = v_uint32x8(r0);
|
||||
}
|
||||
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_load_deinterleave_l8(const _Tp* ptr, _Tpvec& a, _Tpvec& b)
|
||||
inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& b, v_uint64x4& g, v_uint64x4& r )
|
||||
{
|
||||
v_float32x8 fa, fb;
|
||||
v256_load_deinterleave_l8((float*)ptr, fa, fb);
|
||||
a.val = v_reinterpret_as_u32(fa).val;
|
||||
b.val = v_reinterpret_as_u32(fb).val;
|
||||
}
|
||||
///
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_store_interleave_l8(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c)
|
||||
{
|
||||
_Tpvec ab0, ab1, bc0, bc1;
|
||||
v256_zip(a, b, ab0, ab1);
|
||||
v256_zip(b, c, bc0, bc1);
|
||||
__m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 4));
|
||||
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
|
||||
|
||||
_Tpvec cazg = v256_blend<0b10101010>(c, a);
|
||||
_Tpvec abc0abc1(_mm256_unpacklo_epi64(ab0.val, cazg.val));
|
||||
_Tpvec abc1abc2(_mm256_unpackhi_epi64(cazg.val, bc1.val));
|
||||
_Tpvec abc2abc0 = v256_reverse_64(v256_blend<0b11001100>(ab1, bc0));
|
||||
__m256i s01 = _mm256_blend_epi32(bgr0, bgr1, 0xf0);
|
||||
__m256i s12 = _mm256_blend_epi32(bgr1, bgr2, 0xf0);
|
||||
__m256i s20r = _mm256_permute4x64_epi64(_mm256_blend_epi32(bgr2, bgr0, 0xf0), 0x1b);
|
||||
__m256i b0 = _mm256_unpacklo_epi64(s01, s20r);
|
||||
__m256i g0 = _mm256_alignr_epi8(s12, s01, 8);
|
||||
__m256i r0 = _mm256_unpackhi_epi64(s20r, s12);
|
||||
|
||||
_Tpvec abc0 = v256_combine_diagonal(abc0abc1, abc2abc0);
|
||||
_Tpvec abc1 = v256_combine_diagonal(abc1abc2, abc0abc1);
|
||||
_Tpvec abc2 = v256_combine_diagonal(abc2abc0, abc1abc2);
|
||||
|
||||
v_store(ptr, abc0);
|
||||
v_store(ptr + _Tpvec::nlanes, abc1);
|
||||
v_store(ptr + _Tpvec::nlanes * 2, abc2);
|
||||
b = v_uint64x4(b0);
|
||||
g = v_uint64x4(g0);
|
||||
r = v_uint64x4(r0);
|
||||
}
|
||||
|
||||
inline void v256_store_interleave_l8(float* ptr, const v_float32x8& a, const v_float32x8& b, const v_float32x8& c)
|
||||
inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& b, v_uint8x32& g, v_uint8x32& r, v_uint8x32& a )
|
||||
{
|
||||
v_float32x8 ab0, ab1, bc0, bc1;
|
||||
v256_zip(a, b, ab0, ab1);
|
||||
v256_zip(b, c, bc0, bc1);
|
||||
__m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
|
||||
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64));
|
||||
__m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 96));
|
||||
static const __m256i sh = _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
|
||||
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
|
||||
|
||||
v_float32x8 cazg = v256_blend<0b10101010>(c, a);
|
||||
v_float32x8 abc0abc1(_mm256_shuffle_ps(ab0.val, cazg.val, _MM_SHUFFLE(1, 0, 1, 0)));
|
||||
v_float32x8 abc1abc2(_mm256_shuffle_ps(cazg.val, bc1.val, _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
__m256i p0 = _mm256_shuffle_epi8(bgr0, sh);
|
||||
__m256i p1 = _mm256_shuffle_epi8(bgr1, sh);
|
||||
__m256i p2 = _mm256_shuffle_epi8(bgr2, sh);
|
||||
__m256i p3 = _mm256_shuffle_epi8(bgr3, sh);
|
||||
|
||||
v_float32x8 abc0abc2(_mm256_shuffle_ps(bc0.val, ab1.val, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
v_float32x8 abc2abc0 = v256_swap_halves(abc0abc2);
|
||||
__m256i p01l = _mm256_unpacklo_epi32(p0, p1);
|
||||
__m256i p01h = _mm256_unpackhi_epi32(p0, p1);
|
||||
__m256i p23l = _mm256_unpacklo_epi32(p2, p3);
|
||||
__m256i p23h = _mm256_unpackhi_epi32(p2, p3);
|
||||
|
||||
v_float32x8 abc0 = v256_combine_diagonal(abc0abc1, abc2abc0);
|
||||
v_float32x8 abc1 = v256_combine_diagonal(abc1abc2, abc0abc1);
|
||||
v_float32x8 abc2 = v256_combine_diagonal(abc2abc0, abc1abc2);
|
||||
__m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16);
|
||||
__m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16);
|
||||
__m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16);
|
||||
__m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16);
|
||||
|
||||
v_store(ptr, abc0);
|
||||
v_store(ptr + 8, abc1);
|
||||
v_store(ptr + 16, abc2);
|
||||
__m256i b0 = _mm256_unpacklo_epi32(pll, plh);
|
||||
__m256i g0 = _mm256_unpackhi_epi32(pll, plh);
|
||||
__m256i r0 = _mm256_unpacklo_epi32(phl, phh);
|
||||
__m256i a0 = _mm256_unpackhi_epi32(phl, phh);
|
||||
|
||||
b = v_uint8x32(b0);
|
||||
g = v_uint8x32(g0);
|
||||
r = v_uint8x32(r0);
|
||||
a = v_uint8x32(a0);
|
||||
}
|
||||
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_load_deinterleave_l8(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c)
|
||||
inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& b, v_uint16x16& g, v_uint16x16& r, v_uint16x16& a )
|
||||
{
|
||||
_Tpvec abc02 = v256_load(ptr);
|
||||
_Tpvec abc1 = v256_load(ptr + _Tpvec::nlanes);
|
||||
_Tpvec abc20 = v256_load(ptr + _Tpvec::nlanes * 2);
|
||||
__m256i bgr0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
|
||||
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
|
||||
__m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 48));
|
||||
static const __m256i sh = _mm256_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
|
||||
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15);
|
||||
__m256i p0 = _mm256_shuffle_epi8(bgr0, sh);
|
||||
__m256i p1 = _mm256_shuffle_epi8(bgr1, sh);
|
||||
__m256i p2 = _mm256_shuffle_epi8(bgr2, sh);
|
||||
__m256i p3 = _mm256_shuffle_epi8(bgr3, sh);
|
||||
|
||||
_Tpvec abc2 = v256_alignr_128(abc02, abc20);
|
||||
_Tpvec abc0 = v256_combine_diagonal(abc02, abc20);
|
||||
__m256i p01l = _mm256_unpacklo_epi32(p0, p1);
|
||||
__m256i p01h = _mm256_unpackhi_epi32(p0, p1);
|
||||
__m256i p23l = _mm256_unpacklo_epi32(p2, p3);
|
||||
__m256i p23h = _mm256_unpackhi_epi32(p2, p3);
|
||||
|
||||
a = v256_blend<0b10010010>(abc0, abc1);
|
||||
a = v256_blend<0b01000100>(a, abc2);
|
||||
__m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16);
|
||||
__m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16);
|
||||
__m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16);
|
||||
__m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16);
|
||||
|
||||
b = v256_blend<0b00100100>(abc0, abc1);
|
||||
b = v256_blend<0b10011001>(b, abc2);
|
||||
__m256i b0 = _mm256_unpacklo_epi32(pll, plh);
|
||||
__m256i g0 = _mm256_unpackhi_epi32(pll, plh);
|
||||
__m256i r0 = _mm256_unpacklo_epi32(phl, phh);
|
||||
__m256i a0 = _mm256_unpackhi_epi32(phl, phh);
|
||||
|
||||
c = v256_blend<0b01001001>(abc0, abc1);
|
||||
c = v256_blend<0b00100010>(c, abc2);
|
||||
|
||||
a = v256_shuffle<_MM_SHUFFLE(1, 2, 3, 0)>(a);
|
||||
b = v256_shuffle<_MM_SHUFFLE(2, 3, 0, 1)>(b);
|
||||
c = v256_shuffle<_MM_SHUFFLE(3, 0, 1, 2)>(c);
|
||||
}
|
||||
/////
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_load_deinterleave_l8(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c, _Tpvec& d)
|
||||
{
|
||||
_Tpvec ab0, ab1, cd0, cd1;
|
||||
v256_load_deinterleave_l4(ptr, ab0, cd0, ab1, cd1);
|
||||
v256_zip(ab0, ab1, a, b);
|
||||
v256_zip(cd0, cd1, c, d);
|
||||
b = v_uint16x16(b0);
|
||||
g = v_uint16x16(g0);
|
||||
r = v_uint16x16(r0);
|
||||
a = v_uint16x16(a0);
|
||||
}
|
||||
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_store_interleave_l8(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c, const _Tpvec& d)
|
||||
inline void v_load_deinterleave( const unsigned* ptr, v_uint32x8& b, v_uint32x8& g, v_uint32x8& r, v_uint32x8& a )
|
||||
{
|
||||
_Tpvec ac0, ac1, bd0, bd1;
|
||||
v256_zip(a, c, ac0, ac1);
|
||||
v256_zip(b, d, bd0, bd1);
|
||||
__m256i p0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||
__m256i p1 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
|
||||
__m256i p2 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
|
||||
__m256i p3 = _mm256_loadu_si256((const __m256i*)(ptr + 24));
|
||||
|
||||
_Tpvec abcd0, abcd1, abcd2, abcd3;
|
||||
v256_zip(ac0, bd0, abcd0, abcd1);
|
||||
v256_zip(ac1, bd1, abcd2, abcd3);
|
||||
__m256i p01l = _mm256_unpacklo_epi32(p0, p1);
|
||||
__m256i p01h = _mm256_unpackhi_epi32(p0, p1);
|
||||
__m256i p23l = _mm256_unpacklo_epi32(p2, p3);
|
||||
__m256i p23h = _mm256_unpackhi_epi32(p2, p3);
|
||||
|
||||
_Tpvec abcd01, abcd23, abcd45, abcd67;
|
||||
v_recombine(abcd0, abcd1, abcd01, abcd45);
|
||||
v_recombine(abcd2, abcd3, abcd23, abcd67);
|
||||
__m256i pll = _mm256_permute2x128_si256(p01l, p23l, 0 + 2*16);
|
||||
__m256i plh = _mm256_permute2x128_si256(p01l, p23l, 1 + 3*16);
|
||||
__m256i phl = _mm256_permute2x128_si256(p01h, p23h, 0 + 2*16);
|
||||
__m256i phh = _mm256_permute2x128_si256(p01h, p23h, 1 + 3*16);
|
||||
|
||||
v_store(ptr, abcd01);
|
||||
v_store(ptr + _Tpvec::nlanes, abcd23);
|
||||
v_store(ptr + _Tpvec::nlanes * 2, abcd45);
|
||||
v_store(ptr + _Tpvec::nlanes * 3, abcd67);
|
||||
__m256i b0 = _mm256_unpacklo_epi32(pll, plh);
|
||||
__m256i g0 = _mm256_unpackhi_epi32(pll, plh);
|
||||
__m256i r0 = _mm256_unpacklo_epi32(phl, phh);
|
||||
__m256i a0 = _mm256_unpackhi_epi32(phl, phh);
|
||||
|
||||
b = v_uint32x8(b0);
|
||||
g = v_uint32x8(g0);
|
||||
r = v_uint32x8(r0);
|
||||
a = v_uint32x8(a0);
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_uint32x8, unsigned, l8)
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_int32x8, int, l8)
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_float32x8, float, l8)
|
||||
|
||||
/* ******** ******** */
|
||||
//
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_load_deinterleave_l16(const _Tp* ptr, _Tpvec& a, _Tpvec& b)
|
||||
inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& b, v_uint64x4& g, v_uint64x4& r, v_uint64x4& a )
|
||||
{
|
||||
const __m256i sep = _mm256_setr_epi8(
|
||||
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
|
||||
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15
|
||||
);
|
||||
__m256i bgra0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||
__m256i bgra1 = _mm256_loadu_si256((const __m256i*)(ptr + 4));
|
||||
__m256i bgra2 = _mm256_loadu_si256((const __m256i*)(ptr + 8));
|
||||
__m256i bgra3 = _mm256_loadu_si256((const __m256i*)(ptr + 12));
|
||||
|
||||
_Tpvec ab0, ab1;
|
||||
v_recombine(v256_load(ptr), v256_load(ptr + _Tpvec::nlanes), ab0, ab1);
|
||||
__m256i l02 = _mm256_permute2x128_si256(bgra0, bgra2, 0 + 2*16);
|
||||
__m256i h02 = _mm256_permute2x128_si256(bgra0, bgra2, 1 + 3*16);
|
||||
__m256i l13 = _mm256_permute2x128_si256(bgra1, bgra3, 0 + 2*16);
|
||||
__m256i h13 = _mm256_permute2x128_si256(bgra1, bgra3, 1 + 3*16);
|
||||
|
||||
__m256i a0b0 = _mm256_shuffle_epi8(ab0.val, sep);
|
||||
__m256i a1b1 = _mm256_shuffle_epi8(ab1.val, sep);
|
||||
__m256i b0 = _mm256_unpacklo_epi64(l02, l13);
|
||||
__m256i g0 = _mm256_unpackhi_epi64(l02, l13);
|
||||
__m256i r0 = _mm256_unpacklo_epi64(h02, h13);
|
||||
__m256i a0 = _mm256_unpackhi_epi64(h02, h13);
|
||||
|
||||
a.val = _mm256_unpacklo_epi64(a0b0, a1b1);
|
||||
b.val = _mm256_unpackhi_epi64(a0b0, a1b1);
|
||||
}
|
||||
///
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_store_interleave_l16(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c)
|
||||
{
|
||||
v_uint32x8 ab0 = v_reinterpret_as_u32(v256_unpacklo(a, b));
|
||||
v_uint32x8 ab1 = v_reinterpret_as_u32(v256_unpackhi(a, b));
|
||||
v_uint32x8 bc0 = v_reinterpret_as_u32(v256_unpacklo(b, c));
|
||||
v_uint32x8 bc1 = v_reinterpret_as_u32(v256_unpackhi(b, c));
|
||||
|
||||
v_uint32x8 cazg = v_reinterpret_as_u32(v256_blend<0b10101010>(c, a));
|
||||
cazg = v256_shuffle<_MM_SHUFFLE(2, 1, 0, 3)>(cazg);
|
||||
|
||||
v_uint32x8 ac1ab1 = v256_blend<0b10101010>(ab1, bc1);
|
||||
ac1ab1 = v256_shuffle<_MM_SHUFFLE(2, 1, 0, 3)>(ac1ab1);
|
||||
|
||||
v_uint32x8 abc001 = v256_blend<0b10101010>(ab0, cazg);
|
||||
v_uint32x8 cabc0 = v256_blend<0b10101010>(cazg, bc0);
|
||||
|
||||
v_uint32x8 cabc1 = v256_unpacklo(cabc0, ac1ab1);
|
||||
v_uint32x8 bcab0 = v256_unpackhi(cabc1, abc001);
|
||||
|
||||
v_uint64x4 abc01 = v256_unpacklo(v_reinterpret_as_u64(abc001), v_reinterpret_as_u64(bcab0));
|
||||
v_uint64x4 abc21 = v256_unpackhi(v_reinterpret_as_u64(cabc0), v_reinterpret_as_u64(bcab0));
|
||||
abc21 = v256_swap_halves(abc21);
|
||||
v_uint64x4 abc12 = v_reinterpret_as_u64(v256_alignr_64(cabc1, ac1ab1));
|
||||
|
||||
v_uint64x4 abc0 = v256_combine_diagonal(abc01, abc21);
|
||||
v_uint64x4 abc1 = v256_combine_diagonal(abc12, abc01);
|
||||
v_uint64x4 abc2 = v256_combine_diagonal(abc21, abc12);
|
||||
|
||||
v_store(ptr, _Tpvec(abc0.val));
|
||||
v_store(ptr + _Tpvec::nlanes, _Tpvec(abc1.val));
|
||||
v_store(ptr + _Tpvec::nlanes * 2, _Tpvec(abc2.val));
|
||||
}
|
||||
// todo:
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_load_deinterleave_l16(const _Tp*, _Tpvec&, _Tpvec&, _Tpvec&)
|
||||
{}
|
||||
////
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_load_deinterleave_l16(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c, _Tpvec& d)
|
||||
{
|
||||
_Tpvec ab0, ab1, cd0, cd1;
|
||||
v256_load_deinterleave_l8(ptr, ab0, cd0, ab1, cd1);
|
||||
v256_zip(ab0, ab1, a, b);
|
||||
v256_zip(cd0, cd1, c, d);
|
||||
b = v_uint64x4(b0);
|
||||
g = v_uint64x4(g0);
|
||||
r = v_uint64x4(r0);
|
||||
a = v_uint64x4(a0);
|
||||
}
|
||||
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_store_interleave_l16(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c, const _Tpvec& d)
|
||||
{ v256_store_interleave_l8(ptr, a, b, c, d); }
|
||||
///////////////////////////// store interleave /////////////////////////////////////
|
||||
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_uint16x16, ushort, l16)
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_int16x16, short, l16)
|
||||
|
||||
/* **************** **************** */
|
||||
//
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_load_deinterleave_l32(const _Tp* ptr, _Tpvec& a, _Tpvec& b)
|
||||
inline void v_store_interleave( uchar* ptr, const v_uint8x32& x, const v_uint8x32& y )
|
||||
{
|
||||
const __m256i sep = _mm256_setr_epi8(
|
||||
0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15,
|
||||
0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
|
||||
);
|
||||
__m256i xy_l = _mm256_unpacklo_epi8(x.val, y.val);
|
||||
__m256i xy_h = _mm256_unpackhi_epi8(x.val, y.val);
|
||||
|
||||
_Tpvec ab0, ab1;
|
||||
v_recombine(v256_load(ptr), v256_load(ptr + _Tpvec::nlanes), ab0, ab1);
|
||||
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
|
||||
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
|
||||
|
||||
__m256i a0b0 = _mm256_shuffle_epi8(ab0.val, sep);
|
||||
__m256i a1b1 = _mm256_shuffle_epi8(ab1.val, sep);
|
||||
|
||||
a.val = _mm256_unpacklo_epi64(a0b0, a1b1);
|
||||
b.val = _mm256_unpackhi_epi64(a0b0, a1b1);
|
||||
_mm256_storeu_si256((__m256i*)ptr, xy0);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 32), xy1);
|
||||
}
|
||||
|
||||
/// todo
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_store_interleave_l32(_Tp*, const _Tpvec&, const _Tpvec&, const _Tpvec&)
|
||||
{}
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_load_deinterleave_l32(const _Tp*, _Tpvec&, _Tpvec&, _Tpvec&)
|
||||
{}
|
||||
////
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_load_deinterleave_l32(const _Tp* ptr, _Tpvec& a, _Tpvec& b, _Tpvec& c, _Tpvec& d)
|
||||
inline void v_store_interleave( ushort* ptr, const v_uint16x16& x, const v_uint16x16& y )
|
||||
{
|
||||
const __m256i sep = _mm256_setr_epi8(
|
||||
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
|
||||
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15
|
||||
);
|
||||
__m256i xy_l = _mm256_unpacklo_epi16(x.val, y.val);
|
||||
__m256i xy_h = _mm256_unpackhi_epi16(x.val, y.val);
|
||||
|
||||
_Tpvec abcd0, abcd1, abcd2, abcd3;
|
||||
v_recombine(v256_load(ptr), v256_load(ptr + _Tpvec::nlanes * 2), abcd0, abcd1);
|
||||
v_recombine(v256_load(ptr + _Tpvec::nlanes), v256_load(ptr + _Tpvec::nlanes * 3), abcd2, abcd3);
|
||||
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
|
||||
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
|
||||
|
||||
__m256i ab0cd0 = _mm256_shuffle_epi8(abcd0.val, sep);
|
||||
__m256i ab1cd1 = _mm256_shuffle_epi8(abcd1.val, sep);
|
||||
__m256i ab2cd2 = _mm256_shuffle_epi8(abcd2.val, sep);
|
||||
__m256i ab3cd3 = _mm256_shuffle_epi8(abcd3.val, sep);
|
||||
|
||||
__m256i ab0 = _mm256_unpacklo_epi32(ab0cd0, ab1cd1);
|
||||
__m256i ab1 = _mm256_unpacklo_epi32(ab2cd2, ab3cd3);
|
||||
__m256i cd0 = _mm256_unpackhi_epi32(ab0cd0, ab1cd1);
|
||||
__m256i cd1 = _mm256_unpackhi_epi32(ab2cd2, ab3cd3);
|
||||
|
||||
a.val = _mm256_unpacklo_epi64(ab0, ab1);
|
||||
b.val = _mm256_unpackhi_epi64(ab0, ab1);
|
||||
c.val = _mm256_unpacklo_epi64(cd0, cd1);
|
||||
d.val = _mm256_unpackhi_epi64(cd0, cd1);
|
||||
_mm256_storeu_si256((__m256i*)ptr, xy0);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 16), xy1);
|
||||
}
|
||||
|
||||
template<typename _Tp, typename _Tpvec>
|
||||
inline void v256_store_interleave_l32(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, const _Tpvec& c, const _Tpvec& d)
|
||||
{ v256_store_interleave_l8(ptr, a, b, c, d); }
|
||||
inline void v_store_interleave( unsigned* ptr, const v_uint32x8& x, const v_uint32x8& y )
|
||||
{
|
||||
__m256i xy_l = _mm256_unpacklo_epi32(x.val, y.val);
|
||||
__m256i xy_h = _mm256_unpackhi_epi32(x.val, y.val);
|
||||
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_uint8x32, uchar, l32)
|
||||
OPENCV_HAL_IMPL_AVX_INTERLEAVE_ACH(v_int8x32, schar, l32)
|
||||
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
|
||||
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
|
||||
|
||||
_mm256_storeu_si256((__m256i*)ptr, xy0);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 8), xy1);
|
||||
}
|
||||
|
||||
inline void v_store_interleave( uint64* ptr, const v_uint64x4& x, const v_uint64x4& y )
|
||||
{
|
||||
__m256i xy_l = _mm256_unpacklo_epi64(x.val, y.val);
|
||||
__m256i xy_h = _mm256_unpackhi_epi64(x.val, y.val);
|
||||
|
||||
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
|
||||
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
|
||||
|
||||
_mm256_storeu_si256((__m256i*)ptr, xy0);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 4), xy1);
|
||||
}
|
||||
|
||||
inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x32& g, const v_uint8x32& r )
|
||||
{
|
||||
static const __m256i sh_b = _mm256_setr_epi8(
|
||||
0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5,
|
||||
0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5);
|
||||
static const __m256i sh_g = _mm256_setr_epi8(
|
||||
5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10,
|
||||
5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10);
|
||||
static const __m256i sh_r = _mm256_setr_epi8(
|
||||
10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15,
|
||||
10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15);
|
||||
|
||||
__m256i b0 = _mm256_shuffle_epi8(b.val, sh_b);
|
||||
__m256i g0 = _mm256_shuffle_epi8(g.val, sh_g);
|
||||
__m256i r0 = _mm256_shuffle_epi8(r.val, sh_r);
|
||||
|
||||
static const __m256i m0 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0,
|
||||
0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
|
||||
static const __m256i m1 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
|
||||
0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
|
||||
|
||||
__m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1);
|
||||
__m256i p1 = _mm256_blendv_epi8(_mm256_blendv_epi8(g0, r0, m0), b0, m1);
|
||||
__m256i p2 = _mm256_blendv_epi8(_mm256_blendv_epi8(r0, b0, m0), g0, m1);
|
||||
|
||||
__m256i bgr0 = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
|
||||
__m256i bgr1 = _mm256_permute2x128_si256(p2, p0, 0 + 3*16);
|
||||
__m256i bgr2 = _mm256_permute2x128_si256(p1, p2, 1 + 3*16);
|
||||
|
||||
_mm256_storeu_si256((__m256i*)ptr, bgr0);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 32), bgr1);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 64), bgr2);
|
||||
}
|
||||
|
||||
inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint16x16& g, const v_uint16x16& r )
|
||||
{
|
||||
static const __m256i sh_b = _mm256_setr_epi8(
|
||||
0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11,
|
||||
0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
|
||||
static const __m256i sh_g = _mm256_setr_epi8(
|
||||
10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5,
|
||||
10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5);
|
||||
static const __m256i sh_r = _mm256_setr_epi8(
|
||||
4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15,
|
||||
4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
|
||||
|
||||
__m256i b0 = _mm256_shuffle_epi8(b.val, sh_b);
|
||||
__m256i g0 = _mm256_shuffle_epi8(g.val, sh_g);
|
||||
__m256i r0 = _mm256_shuffle_epi8(r.val, sh_r);
|
||||
|
||||
static const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1,
|
||||
0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0);
|
||||
static const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0,
|
||||
-1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0);
|
||||
|
||||
__m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1);
|
||||
__m256i p1 = _mm256_blendv_epi8(_mm256_blendv_epi8(g0, r0, m0), b0, m1);
|
||||
__m256i p2 = _mm256_blendv_epi8(_mm256_blendv_epi8(r0, b0, m0), g0, m1);
|
||||
|
||||
__m256i bgr0 = _mm256_permute2x128_si256(p0, p2, 0 + 2*16);
|
||||
//__m256i bgr1 = p1;
|
||||
__m256i bgr2 = _mm256_permute2x128_si256(p0, p2, 1 + 3*16);
|
||||
|
||||
_mm256_storeu_si256((__m256i*)ptr, bgr0);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 16), p1);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 32), bgr2);
|
||||
}
|
||||
|
||||
inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint32x8& g, const v_uint32x8& r )
|
||||
{
|
||||
__m256i b0 = _mm256_shuffle_epi32(b.val, 0x6c);
|
||||
__m256i g0 = _mm256_shuffle_epi32(g.val, 0xb1);
|
||||
__m256i r0 = _mm256_shuffle_epi32(r.val, 0xc6);
|
||||
|
||||
__m256i p0 = _mm256_blend_epi32(_mm256_blend_epi32(b0, g0, 0x92), r0, 0x24);
|
||||
__m256i p1 = _mm256_blend_epi32(_mm256_blend_epi32(g0, r0, 0x92), b0, 0x24);
|
||||
__m256i p2 = _mm256_blend_epi32(_mm256_blend_epi32(r0, b0, 0x92), g0, 0x24);
|
||||
|
||||
__m256i bgr0 = _mm256_permute2x128_si256(p0, p1, 0 + 2*16);
|
||||
//__m256i bgr1 = p2;
|
||||
__m256i bgr2 = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
|
||||
|
||||
_mm256_storeu_si256((__m256i*)ptr, bgr0);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 8), p2);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 16), bgr2);
|
||||
}
|
||||
|
||||
inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64x4& g, const v_uint64x4& r )
|
||||
{
|
||||
__m256i s01 = _mm256_unpacklo_epi64(b.val, g.val);
|
||||
__m256i s12 = _mm256_unpackhi_epi64(g.val, r.val);
|
||||
__m256i s20 = _mm256_blend_epi32(r.val, b.val, 0xcc);
|
||||
|
||||
__m256i bgr0 = _mm256_permute2x128_si256(s01, s20, 0 + 2*16);
|
||||
__m256i bgr1 = _mm256_blend_epi32(s01, s12, 0x0f);
|
||||
__m256i bgr2 = _mm256_permute2x128_si256(s20, s12, 1 + 3*16);
|
||||
|
||||
_mm256_storeu_si256((__m256i*)ptr, bgr0);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 4), bgr1);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 8), bgr2);
|
||||
}
|
||||
|
||||
inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x32& g, const v_uint8x32& r, const v_uint8x32& a )
|
||||
{
|
||||
__m256i bg0 = _mm256_unpacklo_epi8(b.val, g.val);
|
||||
__m256i bg1 = _mm256_unpackhi_epi8(b.val, g.val);
|
||||
__m256i ra0 = _mm256_unpacklo_epi8(r.val, a.val);
|
||||
__m256i ra1 = _mm256_unpackhi_epi8(r.val, a.val);
|
||||
|
||||
__m256i bgra0_ = _mm256_unpacklo_epi16(bg0, ra0);
|
||||
__m256i bgra1_ = _mm256_unpackhi_epi16(bg0, ra0);
|
||||
__m256i bgra2_ = _mm256_unpacklo_epi16(bg1, ra1);
|
||||
__m256i bgra3_ = _mm256_unpackhi_epi16(bg1, ra1);
|
||||
|
||||
__m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16);
|
||||
__m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16);
|
||||
__m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
|
||||
__m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
|
||||
|
||||
_mm256_storeu_si256((__m256i*)ptr, bgra0);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 32), bgra1);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 64), bgra2);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 96), bgra3);
|
||||
}
|
||||
|
||||
inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint16x16& g,
|
||||
const v_uint16x16& r, const v_uint16x16& a )
|
||||
{
|
||||
__m256i bg0 = _mm256_unpacklo_epi16(b.val, g.val);
|
||||
__m256i bg1 = _mm256_unpackhi_epi16(b.val, g.val);
|
||||
__m256i ra0 = _mm256_unpacklo_epi16(r.val, a.val);
|
||||
__m256i ra1 = _mm256_unpackhi_epi16(r.val, a.val);
|
||||
|
||||
__m256i bgra0_ = _mm256_unpacklo_epi32(bg0, ra0);
|
||||
__m256i bgra1_ = _mm256_unpackhi_epi32(bg0, ra0);
|
||||
__m256i bgra2_ = _mm256_unpacklo_epi32(bg1, ra1);
|
||||
__m256i bgra3_ = _mm256_unpackhi_epi32(bg1, ra1);
|
||||
|
||||
__m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16);
|
||||
__m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16);
|
||||
__m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
|
||||
__m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
|
||||
|
||||
_mm256_storeu_si256((__m256i*)ptr, bgra0);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 16), bgra1);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 32), bgra2);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 48), bgra3);
|
||||
}
|
||||
|
||||
inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint32x8& g,
|
||||
const v_uint32x8& r, const v_uint32x8& a )
|
||||
{
|
||||
__m256i bg0 = _mm256_unpacklo_epi32(b.val, g.val);
|
||||
__m256i bg1 = _mm256_unpackhi_epi32(b.val, g.val);
|
||||
__m256i ra0 = _mm256_unpacklo_epi32(r.val, a.val);
|
||||
__m256i ra1 = _mm256_unpackhi_epi32(r.val, a.val);
|
||||
|
||||
__m256i bgra0_ = _mm256_unpacklo_epi64(bg0, ra0);
|
||||
__m256i bgra1_ = _mm256_unpackhi_epi64(bg0, ra0);
|
||||
__m256i bgra2_ = _mm256_unpacklo_epi64(bg1, ra1);
|
||||
__m256i bgra3_ = _mm256_unpackhi_epi64(bg1, ra1);
|
||||
|
||||
__m256i bgra0 = _mm256_permute2x128_si256(bgra0_, bgra1_, 0 + 2*16);
|
||||
__m256i bgra2 = _mm256_permute2x128_si256(bgra0_, bgra1_, 1 + 3*16);
|
||||
__m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
|
||||
__m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
|
||||
|
||||
_mm256_storeu_si256((__m256i*)ptr, bgra0);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 8), bgra1);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 16), bgra2);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 24), bgra3);
|
||||
}
|
||||
|
||||
inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64x4& g,
|
||||
const v_uint64x4& r, const v_uint64x4& a )
|
||||
{
|
||||
__m256i bg0 = _mm256_unpacklo_epi64(b.val, g.val);
|
||||
__m256i bg1 = _mm256_unpackhi_epi64(b.val, g.val);
|
||||
__m256i ra0 = _mm256_unpacklo_epi64(r.val, a.val);
|
||||
__m256i ra1 = _mm256_unpackhi_epi64(r.val, a.val);
|
||||
|
||||
__m256i bgra0 = _mm256_permute2x128_si256(bg0, ra0, 0 + 2*16);
|
||||
__m256i bgra1 = _mm256_permute2x128_si256(bg1, ra1, 0 + 2*16);
|
||||
__m256i bgra2 = _mm256_permute2x128_si256(bg0, ra0, 1 + 3*16);
|
||||
__m256i bgra3 = _mm256_permute2x128_si256(bg1, ra1, 1 + 3*16);
|
||||
|
||||
_mm256_storeu_si256((__m256i*)ptr, bgra0);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 4), bgra1);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 8), bgra2);
|
||||
_mm256_storeu_si256((__m256i*)(ptr + 12), bgra3);
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \
|
||||
inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \
|
||||
{ \
|
||||
_Tpvec1 a1, b1; \
|
||||
v_load_deinterleave((const _Tp1*)ptr, a1, b1); \
|
||||
a0 = v_reinterpret_as_##suffix0(a1); \
|
||||
b0 = v_reinterpret_as_##suffix0(b1); \
|
||||
} \
|
||||
inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \
|
||||
{ \
|
||||
_Tpvec1 a1, b1, c1; \
|
||||
v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \
|
||||
a0 = v_reinterpret_as_##suffix0(a1); \
|
||||
b0 = v_reinterpret_as_##suffix0(b1); \
|
||||
c0 = v_reinterpret_as_##suffix0(c1); \
|
||||
} \
|
||||
inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \
|
||||
{ \
|
||||
_Tpvec1 a1, b1, c1, d1; \
|
||||
v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \
|
||||
a0 = v_reinterpret_as_##suffix0(a1); \
|
||||
b0 = v_reinterpret_as_##suffix0(b1); \
|
||||
c0 = v_reinterpret_as_##suffix0(c1); \
|
||||
d0 = v_reinterpret_as_##suffix0(d1); \
|
||||
} \
|
||||
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0 ) \
|
||||
{ \
|
||||
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
||||
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
||||
v_store_interleave((_Tp1*)ptr, a1, b1); \
|
||||
} \
|
||||
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0 ) \
|
||||
{ \
|
||||
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
||||
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
||||
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
|
||||
v_store_interleave((_Tp1*)ptr, a1, b1, c1); \
|
||||
} \
|
||||
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
|
||||
const _Tpvec0& c0, const _Tpvec0& d0 ) \
|
||||
{ \
|
||||
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
||||
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
||||
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
|
||||
_Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \
|
||||
v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int8x32, schar, s8, v_uint8x32, uchar, u8)
|
||||
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int16x16, short, s16, v_uint16x16, ushort, u16)
|
||||
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int32x8, int, s32, v_uint32x8, unsigned, u32)
|
||||
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float32x8, float, f32, v_uint32x8, unsigned, u32)
|
||||
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int64x4, int64, s64, v_uint64x4, uint64, u64)
|
||||
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_float64x4, double, f64, v_uint64x4, uint64, u64)
|
||||
|
||||
inline void v256_cleanup() { _mm256_zeroupper(); }
|
||||
|
||||
|
@ -1318,6 +1318,80 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec&
|
||||
vst4q_##suffix(ptr, v); \
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(tp, suffix) \
|
||||
inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b ) \
|
||||
{ \
|
||||
tp##x1_t a0 = vld1_##suffix(ptr); \
|
||||
tp##x1_t b0 = vld1_##suffix(ptr + 1); \
|
||||
tp##x1_t a1 = vld1_##suffix(ptr + 2); \
|
||||
tp##x1_t b1 = vld1_##suffix(ptr + 3); \
|
||||
a = v_##tp##x2(vcombine_##suffix(a0, a1)); \
|
||||
b = v_##tp##x2(vcombine_##suffix(b0, b1)); \
|
||||
} \
|
||||
\
|
||||
inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, \
|
||||
v_##tp##x2& b, v_##tp##x2& c ) \
|
||||
{ \
|
||||
tp##x1_t a0 = vld1_##suffix(ptr); \
|
||||
tp##x1_t b0 = vld1_##suffix(ptr + 1); \
|
||||
tp##x1_t c0 = vld1_##suffix(ptr + 2); \
|
||||
tp##x1_t a1 = vld1_##suffix(ptr + 3); \
|
||||
tp##x1_t b1 = vld1_##suffix(ptr + 4); \
|
||||
tp##x1_t c1 = vld1_##suffix(ptr + 5); \
|
||||
a = v_##tp##x2(vcombine_##suffix(a0, a1)); \
|
||||
b = v_##tp##x2(vcombine_##suffix(b0, b1)); \
|
||||
c = v_##tp##x2(vcombine_##suffix(c0, c1)); \
|
||||
} \
|
||||
\
|
||||
inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b, \
|
||||
v_##tp##x2& c, v_##tp##x2& d ) \
|
||||
{ \
|
||||
tp##x1_t a0 = vld1_##suffix(ptr); \
|
||||
tp##x1_t b0 = vld1_##suffix(ptr + 1); \
|
||||
tp##x1_t c0 = vld1_##suffix(ptr + 2); \
|
||||
tp##x1_t d0 = vld1_##suffix(ptr + 3); \
|
||||
tp##x1_t a1 = vld1_##suffix(ptr + 4); \
|
||||
tp##x1_t b1 = vld1_##suffix(ptr + 5); \
|
||||
tp##x1_t c1 = vld1_##suffix(ptr + 6); \
|
||||
tp##x1_t d1 = vld1_##suffix(ptr + 7); \
|
||||
a = v_##tp##x2(vcombine_##suffix(a0, a1)); \
|
||||
b = v_##tp##x2(vcombine_##suffix(b0, b1)); \
|
||||
c = v_##tp##x2(vcombine_##suffix(c0, c1)); \
|
||||
d = v_##tp##x2(vcombine_##suffix(d0, d1)); \
|
||||
} \
|
||||
\
|
||||
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b ) \
|
||||
{ \
|
||||
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
|
||||
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
|
||||
vst1_##suffix(ptr + 2, vget_high_##suffix(a.val)); \
|
||||
vst1_##suffix(ptr + 3, vget_high_##suffix(b.val)); \
|
||||
} \
|
||||
\
|
||||
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \
|
||||
const v_##tp##x2& b, const v_##tp##x2& c ) \
|
||||
{ \
|
||||
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
|
||||
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
|
||||
vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \
|
||||
vst1_##suffix(ptr + 3, vget_high_##suffix(a.val)); \
|
||||
vst1_##suffix(ptr + 4, vget_high_##suffix(b.val)); \
|
||||
vst1_##suffix(ptr + 5, vget_high_##suffix(c.val)); \
|
||||
} \
|
||||
\
|
||||
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \
|
||||
const v_##tp##x2& c, const v_##tp##x2& d ) \
|
||||
{ \
|
||||
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
|
||||
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
|
||||
vst1_##suffix(ptr + 2, vget_low_##suffix(c.val)); \
|
||||
vst1_##suffix(ptr + 3, vget_low_##suffix(d.val)); \
|
||||
vst1_##suffix(ptr + 4, vget_high_##suffix(a.val)); \
|
||||
vst1_##suffix(ptr + 5, vget_high_##suffix(b.val)); \
|
||||
vst1_##suffix(ptr + 6, vget_high_##suffix(c.val)); \
|
||||
vst1_##suffix(ptr + 7, vget_high_##suffix(d.val)); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint8x16, uchar, u8)
|
||||
OPENCV_HAL_IMPL_NEON_INTERLEAVED(int8x16, schar, s8)
|
||||
OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint16x8, ushort, u16)
|
||||
@ -1329,6 +1403,9 @@ OPENCV_HAL_IMPL_NEON_INTERLEAVED(float32x4, float, f32)
|
||||
OPENCV_HAL_IMPL_NEON_INTERLEAVED(float64x2, double, f64)
|
||||
#endif
|
||||
|
||||
OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(int64, s64)
|
||||
OPENCV_HAL_IMPL_NEON_INTERLEAVED_INT64(uint64, u64)
|
||||
|
||||
inline v_float32x4 v_cvt_f32(const v_int32x4& a)
|
||||
{
|
||||
return v_float32x4(vcvtq_f32_s32(a.val));
|
||||
|
@ -58,17 +58,6 @@ namespace cv
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
|
||||
struct v_uint8x16;
|
||||
struct v_int8x16;
|
||||
struct v_uint16x8;
|
||||
struct v_int16x8;
|
||||
struct v_uint32x4;
|
||||
struct v_int32x4;
|
||||
struct v_float32x4;
|
||||
struct v_uint64x2;
|
||||
struct v_int64x2;
|
||||
struct v_float64x2;
|
||||
|
||||
struct v_uint8x16
|
||||
{
|
||||
typedef uchar lane_type;
|
||||
@ -1660,7 +1649,7 @@ OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_uint32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_N
|
||||
OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
|
||||
OPENCV_HAL_IMPL_SSE_TRANSPOSE4x4(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps)
|
||||
|
||||
// adopted from sse_utils.hpp
|
||||
// load deinterleave
|
||||
inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b)
|
||||
{
|
||||
__m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
|
||||
@ -1681,7 +1670,25 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b)
|
||||
|
||||
inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c)
|
||||
{
|
||||
#if CV_SSSE3
|
||||
#if CV_SSE4_1
|
||||
static const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
|
||||
static const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
|
||||
__m128i s0 = _mm_loadu_si128((const __m128i*)ptr);
|
||||
__m128i s1 = _mm_loadu_si128((const __m128i*)(ptr + 16));
|
||||
__m128i s2 = _mm_loadu_si128((const __m128i*)(ptr + 32));
|
||||
__m128i a0 = _mm_blendv_epi8(_mm_blendv_epi8(s0, s1, m0), s2, m1);
|
||||
__m128i b0 = _mm_blendv_epi8(_mm_blendv_epi8(s1, s2, m0), s0, m1);
|
||||
__m128i c0 = _mm_blendv_epi8(_mm_blendv_epi8(s2, s0, m0), s1, m1);
|
||||
static const __m128i sh_b = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13);
|
||||
static const __m128i sh_g = _mm_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14);
|
||||
static const __m128i sh_r = _mm_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15);
|
||||
a0 = _mm_shuffle_epi8(a0, sh_b);
|
||||
b0 = _mm_shuffle_epi8(b0, sh_g);
|
||||
c0 = _mm_shuffle_epi8(c0, sh_r);
|
||||
a.val = a0;
|
||||
b.val = b0;
|
||||
c.val = c0;
|
||||
#elif CV_SSSE3
|
||||
static const __m128i m0 = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13, 2, 5, 8, 11, 14);
|
||||
static const __m128i m1 = _mm_alignr_epi8(m0, m0, 11);
|
||||
static const __m128i m2 = _mm_alignr_epi8(m0, m0, 6);
|
||||
@ -1753,8 +1760,41 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
|
||||
d.val = _mm_unpackhi_epi8(v2, v3);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b)
|
||||
{
|
||||
__m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1 a2 b2 a3 b3
|
||||
__m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); // a4 b4 a5 b5 a6 b6 a7 b7
|
||||
|
||||
__m128i v2 = _mm_unpacklo_epi16(v0, v1); // a0 a4 b0 b4 a1 a5 b1 b5
|
||||
__m128i v3 = _mm_unpackhi_epi16(v0, v1); // a2 a6 b2 b6 a3 a7 b3 b7
|
||||
__m128i v4 = _mm_unpacklo_epi16(v2, v3); // a0 a2 a4 a6 b0 b2 b4 b6
|
||||
__m128i v5 = _mm_unpackhi_epi16(v2, v3); // a1 a3 a5 a7 b1 b3 b5 b7
|
||||
|
||||
a.val = _mm_unpacklo_epi16(v4, v5); // a0 a1 a2 a3 a4 a5 a6 a7
|
||||
b.val = _mm_unpackhi_epi16(v4, v5); // b0 b1 ab b3 b4 b5 b6 b7
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c)
|
||||
{
|
||||
#if CV_SSE4_1
|
||||
__m128i v0 = _mm_loadu_si128((__m128i*)(ptr));
|
||||
__m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8));
|
||||
__m128i v2 = _mm_loadu_si128((__m128i*)(ptr + 16));
|
||||
__m128i a0 = _mm_blend_epi16(_mm_blend_epi16(v0, v1, 0x92), v2, 0x24);
|
||||
__m128i b0 = _mm_blend_epi16(_mm_blend_epi16(v2, v0, 0x92), v1, 0x24);
|
||||
__m128i c0 = _mm_blend_epi16(_mm_blend_epi16(v1, v2, 0x92), v0, 0x24);
|
||||
|
||||
static const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
|
||||
static const __m128i sh_b = _mm_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13);
|
||||
static const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
|
||||
a0 = _mm_shuffle_epi8(a0, sh_a);
|
||||
b0 = _mm_shuffle_epi8(b0, sh_b);
|
||||
c0 = _mm_shuffle_epi8(c0, sh_c);
|
||||
|
||||
a.val = a0;
|
||||
b.val = b0;
|
||||
c.val = c0;
|
||||
#else
|
||||
__m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
|
||||
__m128i t01 = _mm_loadu_si128((const __m128i*)(ptr + 8));
|
||||
__m128i t02 = _mm_loadu_si128((const __m128i*)(ptr + 16));
|
||||
@ -1770,6 +1810,7 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b,
|
||||
a.val = _mm_unpacklo_epi16(t20, _mm_unpackhi_epi64(t21, t21));
|
||||
b.val = _mm_unpacklo_epi16(_mm_unpackhi_epi64(t20, t20), t22);
|
||||
c.val = _mm_unpacklo_epi16(t21, _mm_unpackhi_epi64(t22, t22));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b, v_uint16x8& c, v_uint16x8& d)
|
||||
@ -1795,6 +1836,18 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b,
|
||||
d.val = _mm_unpackhi_epi16(u2, u3);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b)
|
||||
{
|
||||
__m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1
|
||||
__m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 4)); // a2 b2 a3 b3
|
||||
|
||||
__m128i v2 = _mm_unpacklo_epi32(v0, v1); // a0 a2 b0 b2
|
||||
__m128i v3 = _mm_unpackhi_epi32(v0, v1); // a1 a3 b1 b3
|
||||
|
||||
a.val = _mm_unpacklo_epi32(v2, v3); // a0 a1 a2 a3
|
||||
b.val = _mm_unpackhi_epi32(v2, v3); // b0 b1 ab b3
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c)
|
||||
{
|
||||
__m128i t00 = _mm_loadu_si128((const __m128i*)ptr);
|
||||
@ -1812,12 +1865,23 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
|
||||
|
||||
inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& b, v_uint32x4& c, v_uint32x4& d)
|
||||
{
|
||||
v_uint32x4 u0(_mm_loadu_si128((const __m128i*)ptr)); // a0 b0 c0 d0
|
||||
v_uint32x4 u1(_mm_loadu_si128((const __m128i*)(ptr + 4))); // a1 b1 c1 d1
|
||||
v_uint32x4 u2(_mm_loadu_si128((const __m128i*)(ptr + 8))); // a2 b2 c2 d2
|
||||
v_uint32x4 u3(_mm_loadu_si128((const __m128i*)(ptr + 12))); // a3 b3 c3 d3
|
||||
v_uint32x4 s0(_mm_loadu_si128((const __m128i*)ptr)); // a0 b0 c0 d0
|
||||
v_uint32x4 s1(_mm_loadu_si128((const __m128i*)(ptr + 4))); // a1 b1 c1 d1
|
||||
v_uint32x4 s2(_mm_loadu_si128((const __m128i*)(ptr + 8))); // a2 b2 c2 d2
|
||||
v_uint32x4 s3(_mm_loadu_si128((const __m128i*)(ptr + 12))); // a3 b3 c3 d3
|
||||
|
||||
v_transpose4x4(u0, u1, u2, u3, a, b, c, d);
|
||||
v_transpose4x4(s0, s1, s2, s3, a, b, c, d);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
|
||||
{
|
||||
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
|
||||
|
||||
__m128 u0 = _mm_loadu_ps(ptr); // a0 b0 a1 b1
|
||||
__m128 u1 = _mm_loadu_ps((ptr + 4)); // a2 b2 a3 b3
|
||||
|
||||
a.val = _mm_shuffle_ps(u0, u1, mask_lo); // a0 a1 a2 a3
|
||||
b.val = _mm_shuffle_ps(u0, u1, mask_hi); // b0 b1 ab b3
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c)
|
||||
@ -1853,77 +1917,43 @@ inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b
|
||||
d.val = _mm_unpackhi_ps(t02hi, t13hi);
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c)
|
||||
inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b)
|
||||
{
|
||||
__m128i t0 = _mm_loadu_si128((const __m128i*)ptr);
|
||||
__m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2));
|
||||
__m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4));
|
||||
|
||||
a = v_uint64x2(_mm_unpacklo_epi64(t0, _mm_unpackhi_epi64(t1, t1)));
|
||||
a = v_uint64x2(_mm_unpacklo_epi64(t0, t1));
|
||||
b = v_uint64x2(_mm_unpackhi_epi64(t0, t1));
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a, v_uint64x2& b, v_uint64x2& c)
|
||||
{
|
||||
__m128i t0 = _mm_loadu_si128((const __m128i*)ptr); // a0, b0
|
||||
__m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); // c0, a1
|
||||
__m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4)); // b1, c1
|
||||
|
||||
t1 = _mm_shuffle_epi32(t1, 0x4e); // a1, c0
|
||||
|
||||
a = v_uint64x2(_mm_unpacklo_epi64(t0, t1));
|
||||
b = v_uint64x2(_mm_unpacklo_epi64(_mm_unpackhi_epi64(t0, t0), t2));
|
||||
c = v_uint64x2(_mm_unpacklo_epi64(t1, _mm_unpackhi_epi64(t2, t2)));
|
||||
c = v_uint64x2(_mm_unpackhi_epi64(t1, t2));
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const int64 *ptr, v_int64x2& a, v_int64x2& b, v_int64x2& c)
|
||||
inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a,
|
||||
v_uint64x2& b, v_uint64x2& c, v_uint64x2& d)
|
||||
{
|
||||
v_uint64x2 t0, t1, t2;
|
||||
v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
|
||||
a = v_reinterpret_as_s64(t0);
|
||||
b = v_reinterpret_as_s64(t1);
|
||||
c = v_reinterpret_as_s64(t2);
|
||||
__m128i t0 = _mm_loadu_si128((const __m128i*)ptr); // a0 b0
|
||||
__m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 2)); // c0 d0
|
||||
__m128i t2 = _mm_loadu_si128((const __m128i*)(ptr + 4)); // a1 b1
|
||||
__m128i t3 = _mm_loadu_si128((const __m128i*)(ptr + 6)); // c1 d1
|
||||
|
||||
a = v_uint64x2(_mm_unpacklo_epi64(t0, t2));
|
||||
b = v_uint64x2(_mm_unpackhi_epi64(t0, t2));
|
||||
c = v_uint64x2(_mm_unpacklo_epi64(t1, t3));
|
||||
d = v_uint64x2(_mm_unpackhi_epi64(t1, t3));
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const double *ptr, v_float64x2& a, v_float64x2& b, v_float64x2& c)
|
||||
{
|
||||
v_uint64x2 t0, t1, t2;
|
||||
v_load_deinterleave((const uint64*)ptr, t0, t1, t2);
|
||||
a = v_reinterpret_as_f64(t0);
|
||||
b = v_reinterpret_as_f64(t1);
|
||||
c = v_reinterpret_as_f64(t2);
|
||||
}
|
||||
|
||||
// 2-channel
|
||||
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
|
||||
{
|
||||
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
|
||||
|
||||
__m128 u0 = _mm_loadu_ps(ptr); // a0 b0 a1 b1
|
||||
__m128 u1 = _mm_loadu_ps((ptr + 4)); // a2 b2 a3 b3
|
||||
|
||||
a.val = _mm_shuffle_ps(u0, u1, mask_lo); // a0 a1 a2 a3
|
||||
b.val = _mm_shuffle_ps(u0, u1, mask_hi); // b0 b1 ab b3
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const short* ptr, v_int16x8& a, v_int16x8& b)
|
||||
{
|
||||
__m128i v0 = _mm_loadu_si128((__m128i*)(ptr)); // a0 b0 a1 b1 a2 b2 a3 b3
|
||||
__m128i v1 = _mm_loadu_si128((__m128i*)(ptr + 8)); // a4 b4 a5 b5 a6 b6 a7 b7
|
||||
|
||||
__m128i v2 = _mm_unpacklo_epi16(v0, v1); // a0 a4 b0 b4 a1 a5 b1 b5
|
||||
__m128i v3 = _mm_unpackhi_epi16(v0, v1); // a2 a6 b2 b6 a3 a7 b3 b7
|
||||
__m128i v4 = _mm_unpacklo_epi16(v2, v3); // a0 a2 a4 a6 b0 b2 b4 b6
|
||||
__m128i v5 = _mm_unpackhi_epi16(v2, v3); // a1 a3 a5 a7 b1 b3 b5 b7
|
||||
|
||||
a.val = _mm_unpacklo_epi16(v4, v5); // a0 a1 a2 a3 a4 a5 a6 a7
|
||||
b.val = _mm_unpackhi_epi16(v4, v5); // b0 b1 ab b3 b4 b5 b6 b7
|
||||
}
|
||||
|
||||
inline void v_load_deinterleave(const ushort*ptr, v_uint16x8& a, v_uint16x8& b)
|
||||
{
|
||||
v_int16x8 sa, sb;
|
||||
v_load_deinterleave((const short*)ptr, sa, sb);
|
||||
a = v_reinterpret_as_u16(sa);
|
||||
b = v_reinterpret_as_u16(sb);
|
||||
}
|
||||
|
||||
inline void v_store_interleave(short* ptr, const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
__m128i t0, t1;
|
||||
t0 = _mm_unpacklo_epi16(a.val, b.val);
|
||||
t1 = _mm_unpackhi_epi16(a.val, b.val);
|
||||
_mm_storeu_si128((__m128i*)(ptr), t0);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 8), t1);
|
||||
}
|
||||
// store interleave
|
||||
|
||||
inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b)
|
||||
{
|
||||
@ -1937,7 +1967,24 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1
|
||||
inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
|
||||
const v_uint8x16& c )
|
||||
{
|
||||
#if CV_SSSE3
|
||||
#if CV_SSE4_1
|
||||
static const __m128i sh_a = _mm_setr_epi8(0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5);
|
||||
static const __m128i sh_b = _mm_setr_epi8(5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10);
|
||||
static const __m128i sh_c = _mm_setr_epi8(10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15);
|
||||
__m128i a0 = _mm_shuffle_epi8(a.val, sh_a);
|
||||
__m128i b0 = _mm_shuffle_epi8(b.val, sh_b);
|
||||
__m128i c0 = _mm_shuffle_epi8(c.val, sh_c);
|
||||
|
||||
static const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
|
||||
static const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
|
||||
__m128i v0 = _mm_blendv_epi8(_mm_blendv_epi8(a0, b0, m1), c0, m0);
|
||||
__m128i v1 = _mm_blendv_epi8(_mm_blendv_epi8(b0, c0, m1), a0, m0);
|
||||
__m128i v2 = _mm_blendv_epi8(_mm_blendv_epi8(c0, a0, m1), b0, m0);
|
||||
|
||||
_mm_storeu_si128((__m128i*)(ptr), v0);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 16), v1);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 32), v2);
|
||||
#elif CV_SSSE3
|
||||
static const __m128i m0 = _mm_setr_epi8(0, 6, 11, 1, 7, 12, 2, 8, 13, 3, 9, 14, 4, 10, 15, 5);
|
||||
static const __m128i m1 = _mm_setr_epi8(5, 11, 0, 6, 12, 1, 7, 13, 2, 8, 14, 3, 9, 15, 4, 10);
|
||||
static const __m128i m2 = _mm_setr_epi8(10, 0, 5, 11, 1, 6, 12, 2, 7, 13, 3, 8, 14, 4, 9, 15);
|
||||
@ -2025,10 +2072,35 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1
|
||||
_mm_storeu_si128((__m128i*)(ptr + 48), v3);
|
||||
}
|
||||
|
||||
inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b )
|
||||
{
|
||||
__m128i t0, t1;
|
||||
t0 = _mm_unpacklo_epi16(a.val, b.val);
|
||||
t1 = _mm_unpackhi_epi16(a.val, b.val);
|
||||
_mm_storeu_si128((__m128i*)(ptr), t0);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 8), t1);
|
||||
}
|
||||
|
||||
inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
|
||||
const v_uint16x8& b,
|
||||
const v_uint16x8& c )
|
||||
{
|
||||
#if CV_SSE4_1
|
||||
static const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
|
||||
static const __m128i sh_b = _mm_setr_epi8(10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5);
|
||||
static const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
|
||||
__m128i a0 = _mm_shuffle_epi8(a.val, sh_a);
|
||||
__m128i b0 = _mm_shuffle_epi8(b.val, sh_b);
|
||||
__m128i c0 = _mm_shuffle_epi8(c.val, sh_c);
|
||||
|
||||
__m128i v0 = _mm_blend_epi16(_mm_blend_epi16(a0, b0, 0x92), c0, 0x24);
|
||||
__m128i v1 = _mm_blend_epi16(_mm_blend_epi16(c0, a0, 0x92), b0, 0x24);
|
||||
__m128i v2 = _mm_blend_epi16(_mm_blend_epi16(b0, c0, 0x92), a0, 0x24);
|
||||
|
||||
_mm_storeu_si128((__m128i*)ptr, v0);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 8), v1);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 16), v2);
|
||||
#else
|
||||
__m128i z = _mm_setzero_si128();
|
||||
__m128i ab0 = _mm_unpacklo_epi16(a.val, b.val);
|
||||
__m128i ab1 = _mm_unpackhi_epi16(a.val, b.val);
|
||||
@ -2060,6 +2132,7 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
|
||||
_mm_storeu_si128((__m128i*)(ptr), v0);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 8), v1);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 16), v2);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b,
|
||||
@ -2085,6 +2158,15 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16
|
||||
_mm_storeu_si128((__m128i*)(ptr + 24), v3);
|
||||
}
|
||||
|
||||
inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b )
|
||||
{
|
||||
__m128i t0 = _mm_unpacklo_epi32(a.val, b.val);
|
||||
__m128i t1 = _mm_unpackhi_epi32(a.val, b.val);
|
||||
|
||||
_mm_storeu_si128((__m128i*)ptr, t0);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 4), t1);
|
||||
}
|
||||
|
||||
inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
|
||||
const v_uint32x4& c )
|
||||
{
|
||||
@ -2158,6 +2240,15 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32
|
||||
_mm_storeu_ps(ptr + 12, v3);
|
||||
}
|
||||
|
||||
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b)
|
||||
{
|
||||
__m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
|
||||
__m128i t1 = _mm_unpackhi_epi64(a.val, b.val);
|
||||
|
||||
_mm_storeu_si128((__m128i*)ptr, t0);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 2), t1);
|
||||
}
|
||||
|
||||
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c)
|
||||
{
|
||||
__m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
|
||||
@ -2169,58 +2260,72 @@ inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x
|
||||
_mm_storeu_si128((__m128i*)(ptr + 4), t2);
|
||||
}
|
||||
|
||||
inline void v_store_interleave(int64 *ptr, const v_int64x2& a, const v_int64x2& b, const v_int64x2& c)
|
||||
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, const v_uint64x2& d)
|
||||
{
|
||||
v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
|
||||
__m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
|
||||
__m128i t1 = _mm_unpacklo_epi64(c.val, d.val);
|
||||
__m128i t2 = _mm_unpackhi_epi64(a.val, b.val);
|
||||
__m128i t3 = _mm_unpackhi_epi64(c.val, d.val);
|
||||
|
||||
_mm_storeu_si128((__m128i*)ptr, t0);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 2), t1);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 4), t2);
|
||||
_mm_storeu_si128((__m128i*)(ptr + 6), t3);
|
||||
}
|
||||
|
||||
inline void v_store_interleave(double *ptr, const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
|
||||
{
|
||||
v_store_interleave((uint64*)ptr, v_reinterpret_as_u64(a), v_reinterpret_as_u64(b), v_reinterpret_as_u64(c));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec, _Tp, suffix, _Tpuvec, _Tpu, usuffix) \
|
||||
inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
|
||||
_Tpvec& b0, _Tpvec& c0 ) \
|
||||
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \
|
||||
inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0 ) \
|
||||
{ \
|
||||
_Tpuvec a1, b1, c1; \
|
||||
v_load_deinterleave((const _Tpu*)ptr, a1, b1, c1); \
|
||||
a0 = v_reinterpret_as_##suffix(a1); \
|
||||
b0 = v_reinterpret_as_##suffix(b1); \
|
||||
c0 = v_reinterpret_as_##suffix(c1); \
|
||||
_Tpvec1 a1, b1; \
|
||||
v_load_deinterleave((const _Tp1*)ptr, a1, b1); \
|
||||
a0 = v_reinterpret_as_##suffix0(a1); \
|
||||
b0 = v_reinterpret_as_##suffix0(b1); \
|
||||
} \
|
||||
inline void v_load_deinterleave( const _Tp* ptr, _Tpvec& a0, \
|
||||
_Tpvec& b0, _Tpvec& c0, _Tpvec& d0 ) \
|
||||
inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0 ) \
|
||||
{ \
|
||||
_Tpuvec a1, b1, c1, d1; \
|
||||
v_load_deinterleave((const _Tpu*)ptr, a1, b1, c1, d1); \
|
||||
a0 = v_reinterpret_as_##suffix(a1); \
|
||||
b0 = v_reinterpret_as_##suffix(b1); \
|
||||
c0 = v_reinterpret_as_##suffix(c1); \
|
||||
d0 = v_reinterpret_as_##suffix(d1); \
|
||||
_Tpvec1 a1, b1, c1; \
|
||||
v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1); \
|
||||
a0 = v_reinterpret_as_##suffix0(a1); \
|
||||
b0 = v_reinterpret_as_##suffix0(b1); \
|
||||
c0 = v_reinterpret_as_##suffix0(c1); \
|
||||
} \
|
||||
inline void v_store_interleave( _Tp* ptr, const _Tpvec& a0, \
|
||||
const _Tpvec& b0, const _Tpvec& c0 ) \
|
||||
inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpvec0& c0, _Tpvec0& d0 ) \
|
||||
{ \
|
||||
_Tpuvec a1 = v_reinterpret_as_##usuffix(a0); \
|
||||
_Tpuvec b1 = v_reinterpret_as_##usuffix(b0); \
|
||||
_Tpuvec c1 = v_reinterpret_as_##usuffix(c0); \
|
||||
v_store_interleave((_Tpu*)ptr, a1, b1, c1); \
|
||||
_Tpvec1 a1, b1, c1, d1; \
|
||||
v_load_deinterleave((const _Tp1*)ptr, a1, b1, c1, d1); \
|
||||
a0 = v_reinterpret_as_##suffix0(a1); \
|
||||
b0 = v_reinterpret_as_##suffix0(b1); \
|
||||
c0 = v_reinterpret_as_##suffix0(c1); \
|
||||
d0 = v_reinterpret_as_##suffix0(d1); \
|
||||
} \
|
||||
inline void v_store_interleave( _Tp* ptr, const _Tpvec& a0, const _Tpvec& b0, \
|
||||
const _Tpvec& c0, const _Tpvec& d0 ) \
|
||||
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0 ) \
|
||||
{ \
|
||||
_Tpuvec a1 = v_reinterpret_as_##usuffix(a0); \
|
||||
_Tpuvec b1 = v_reinterpret_as_##usuffix(b0); \
|
||||
_Tpuvec c1 = v_reinterpret_as_##usuffix(c0); \
|
||||
_Tpuvec d1 = v_reinterpret_as_##usuffix(d0); \
|
||||
v_store_interleave((_Tpu*)ptr, a1, b1, c1, d1); \
|
||||
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
||||
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
||||
v_store_interleave((_Tp1*)ptr, a1, b1); \
|
||||
} \
|
||||
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0 ) \
|
||||
{ \
|
||||
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
||||
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
||||
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
|
||||
v_store_interleave((_Tp1*)ptr, a1, b1, c1); \
|
||||
} \
|
||||
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
|
||||
const _Tpvec0& c0, const _Tpvec0& d0 ) \
|
||||
{ \
|
||||
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
||||
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
||||
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
|
||||
_Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \
|
||||
v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8)
|
||||
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int16x8, short, s16, v_uint16x8, ushort, u16)
|
||||
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int32x4, int, s32, v_uint32x4, unsigned, u32)
|
||||
//OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_float32x4, float, f32, v_uint32x4, unsigned, u32)
|
||||
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int64x2, int64, s64, v_uint64x2, uint64, u64)
|
||||
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2, uint64, u64)
|
||||
|
||||
inline v_float32x4 v_cvt_f32(const v_int32x4& a)
|
||||
{
|
||||
|
@ -298,6 +298,8 @@ OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint, v_uint32x4)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(int, v_int32x4)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(float, v_float32x4)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(double, v_float64x2)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(int64, v_int64x2)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint64, v_uint64x2)
|
||||
|
||||
/* Expand */
|
||||
#define OPENCV_HAL_IMPL_VSX_EXPAND(_Tpvec, _Tpwvec, _Tp, fl, fh) \
|
||||
|
@ -871,6 +871,13 @@ public:
|
||||
*/
|
||||
TermCriteria(int type, int maxCount, double epsilon);
|
||||
|
||||
inline bool isValid() const
|
||||
{
|
||||
const bool isCount = (type & COUNT) && maxCount > 0;
|
||||
const bool isEps = (type & EPS) && !cvIsNaN(epsilon);
|
||||
return isCount || isEps;
|
||||
}
|
||||
|
||||
int type; //!< the type of termination criteria: COUNT, EPS or COUNT + EPS
|
||||
int maxCount; //!< the maximum number of iterations/elements
|
||||
double epsilon; //!< the desired accuracy
|
||||
|
@ -629,7 +629,6 @@ CV_INLINE int cvIplDepth( int type )
|
||||
#define CV_TYPE_NAME_MATND "opencv-nd-matrix"
|
||||
|
||||
#define CV_MAX_DIM 32
|
||||
#define CV_MAX_DIM_HEAP 1024
|
||||
|
||||
/**
|
||||
@deprecated consider using cv::Mat instead
|
||||
|
@ -1725,8 +1725,8 @@ cvPtr1D( const CvArr* arr, int idx, int* _type )
|
||||
else
|
||||
{
|
||||
int i, n = m->dims;
|
||||
CV_DbgAssert( n <= CV_MAX_DIM_HEAP );
|
||||
int _idx[CV_MAX_DIM_HEAP];
|
||||
CV_DbgAssert( n <= CV_MAX_DIM );
|
||||
int _idx[CV_MAX_DIM];
|
||||
|
||||
for( i = n - 1; i >= 0; i-- )
|
||||
{
|
||||
|
@ -8,223 +8,49 @@
|
||||
|
||||
namespace cv { namespace hal {
|
||||
|
||||
#if CV_NEON
|
||||
template<typename T> struct VMerge2;
|
||||
template<typename T> struct VMerge3;
|
||||
template<typename T> struct VMerge4;
|
||||
|
||||
#define MERGE2_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
|
||||
template<> \
|
||||
struct name<data_type>{ \
|
||||
void operator()(const data_type* src0, const data_type* src1, \
|
||||
data_type* dst){ \
|
||||
reg_type r; \
|
||||
r.val[0] = load_func(src0); \
|
||||
r.val[1] = load_func(src1); \
|
||||
store_func(dst, r); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define MERGE3_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
|
||||
template<> \
|
||||
struct name<data_type>{ \
|
||||
void operator()(const data_type* src0, const data_type* src1, \
|
||||
const data_type* src2, data_type* dst){ \
|
||||
reg_type r; \
|
||||
r.val[0] = load_func(src0); \
|
||||
r.val[1] = load_func(src1); \
|
||||
r.val[2] = load_func(src2); \
|
||||
store_func(dst, r); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define MERGE4_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
|
||||
template<> \
|
||||
struct name<data_type>{ \
|
||||
void operator()(const data_type* src0, const data_type* src1, \
|
||||
const data_type* src2, const data_type* src3, \
|
||||
data_type* dst){ \
|
||||
reg_type r; \
|
||||
r.val[0] = load_func(src0); \
|
||||
r.val[1] = load_func(src1); \
|
||||
r.val[2] = load_func(src2); \
|
||||
r.val[3] = load_func(src3); \
|
||||
store_func(dst, r); \
|
||||
} \
|
||||
}
|
||||
|
||||
MERGE2_KERNEL_TEMPLATE(VMerge2, uchar , uint8x16x2_t, vld1q_u8 , vst2q_u8 );
|
||||
MERGE2_KERNEL_TEMPLATE(VMerge2, ushort, uint16x8x2_t, vld1q_u16, vst2q_u16);
|
||||
MERGE2_KERNEL_TEMPLATE(VMerge2, int , int32x4x2_t, vld1q_s32, vst2q_s32);
|
||||
MERGE2_KERNEL_TEMPLATE(VMerge2, int64 , int64x1x2_t, vld1_s64 , vst2_s64 );
|
||||
|
||||
MERGE3_KERNEL_TEMPLATE(VMerge3, uchar , uint8x16x3_t, vld1q_u8 , vst3q_u8 );
|
||||
MERGE3_KERNEL_TEMPLATE(VMerge3, ushort, uint16x8x3_t, vld1q_u16, vst3q_u16);
|
||||
MERGE3_KERNEL_TEMPLATE(VMerge3, int , int32x4x3_t, vld1q_s32, vst3q_s32);
|
||||
MERGE3_KERNEL_TEMPLATE(VMerge3, int64 , int64x1x3_t, vld1_s64 , vst3_s64 );
|
||||
|
||||
MERGE4_KERNEL_TEMPLATE(VMerge4, uchar , uint8x16x4_t, vld1q_u8 , vst4q_u8 );
|
||||
MERGE4_KERNEL_TEMPLATE(VMerge4, ushort, uint16x8x4_t, vld1q_u16, vst4q_u16);
|
||||
MERGE4_KERNEL_TEMPLATE(VMerge4, int , int32x4x4_t, vld1q_s32, vst4q_s32);
|
||||
MERGE4_KERNEL_TEMPLATE(VMerge4, int64 , int64x1x4_t, vld1_s64 , vst4_s64 );
|
||||
|
||||
#elif CV_SSE2
|
||||
|
||||
template <typename T>
|
||||
struct VMerge2
|
||||
#if CV_SIMD
|
||||
template<typename T, typename VecT> static void
|
||||
vecmerge_( const T** src, T* dst, int len, int cn )
|
||||
{
|
||||
VMerge2() : support(false) { }
|
||||
void operator()(const T *, const T *, T *) const { }
|
||||
int i;
|
||||
const T* src0 = src[0];
|
||||
const T* src1 = src[1];
|
||||
|
||||
bool support;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct VMerge3
|
||||
{
|
||||
VMerge3() : support(false) { }
|
||||
void operator()(const T *, const T *, const T *, T *) const { }
|
||||
|
||||
bool support;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct VMerge4
|
||||
{
|
||||
VMerge4() : support(false) { }
|
||||
void operator()(const T *, const T *, const T *, const T *, T *) const { }
|
||||
|
||||
bool support;
|
||||
};
|
||||
|
||||
#define MERGE2_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_interleave, flavor, se) \
|
||||
template <> \
|
||||
struct VMerge2<data_type> \
|
||||
{ \
|
||||
enum \
|
||||
{ \
|
||||
ELEMS_IN_VEC = 16 / sizeof(data_type) \
|
||||
}; \
|
||||
\
|
||||
VMerge2() \
|
||||
{ \
|
||||
support = checkHardwareSupport(se); \
|
||||
} \
|
||||
\
|
||||
void operator()(const data_type * src0, const data_type * src1, \
|
||||
data_type * dst) const \
|
||||
{ \
|
||||
reg_type v_src0 = _mm_loadu_##flavor((const cast_type *)(src0)); \
|
||||
reg_type v_src1 = _mm_loadu_##flavor((const cast_type *)(src0 + ELEMS_IN_VEC)); \
|
||||
reg_type v_src2 = _mm_loadu_##flavor((const cast_type *)(src1)); \
|
||||
reg_type v_src3 = _mm_loadu_##flavor((const cast_type *)(src1 + ELEMS_IN_VEC)); \
|
||||
\
|
||||
_mm_interleave(v_src0, v_src1, v_src2, v_src3); \
|
||||
\
|
||||
_mm_storeu_##flavor((cast_type *)(dst), v_src0); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC), v_src1); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 2), v_src2); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 3), v_src3); \
|
||||
} \
|
||||
\
|
||||
bool support; \
|
||||
const int VECSZ = VecT::nlanes;
|
||||
if( cn == 2 )
|
||||
{
|
||||
for( i = 0; i < len; i += VECSZ )
|
||||
{
|
||||
i = std::min( len - VECSZ, i );
|
||||
VecT a = vx_load(src0 + i), b = vx_load(src1 + i);
|
||||
v_store_interleave(dst + i*cn, a, b);
|
||||
}
|
||||
}
|
||||
else if( cn == 3 )
|
||||
{
|
||||
const T* src2 = src[2];
|
||||
for( i = 0; i < len; i += VECSZ )
|
||||
{
|
||||
i = std::min( len - VECSZ, i );
|
||||
VecT a = vx_load(src0 + i), b = vx_load(src1 + i), c = vx_load(src2 + i);
|
||||
v_store_interleave(dst + i*cn, a, b, c);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert( cn == 4 );
|
||||
const T* src2 = src[2];
|
||||
const T* src3 = src[3];
|
||||
for( i = 0; i < len; i += VECSZ )
|
||||
{
|
||||
i = std::min( len - VECSZ, i );
|
||||
VecT a = vx_load(src0 + i), b = vx_load(src1 + i);
|
||||
VecT c = vx_load(src2 + i), d = vx_load(src3 + i);
|
||||
v_store_interleave(dst + i*cn, a, b, c, d);
|
||||
}
|
||||
}
|
||||
vx_cleanup();
|
||||
}
|
||||
|
||||
#define MERGE3_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_interleave, flavor, se) \
|
||||
template <> \
|
||||
struct VMerge3<data_type> \
|
||||
{ \
|
||||
enum \
|
||||
{ \
|
||||
ELEMS_IN_VEC = 16 / sizeof(data_type) \
|
||||
}; \
|
||||
\
|
||||
VMerge3() \
|
||||
{ \
|
||||
support = checkHardwareSupport(se); \
|
||||
} \
|
||||
\
|
||||
void operator()(const data_type * src0, const data_type * src1, const data_type * src2,\
|
||||
data_type * dst) const \
|
||||
{ \
|
||||
reg_type v_src0 = _mm_loadu_##flavor((const cast_type *)(src0)); \
|
||||
reg_type v_src1 = _mm_loadu_##flavor((const cast_type *)(src0 + ELEMS_IN_VEC)); \
|
||||
reg_type v_src2 = _mm_loadu_##flavor((const cast_type *)(src1)); \
|
||||
reg_type v_src3 = _mm_loadu_##flavor((const cast_type *)(src1 + ELEMS_IN_VEC)); \
|
||||
reg_type v_src4 = _mm_loadu_##flavor((const cast_type *)(src2)); \
|
||||
reg_type v_src5 = _mm_loadu_##flavor((const cast_type *)(src2 + ELEMS_IN_VEC)); \
|
||||
\
|
||||
_mm_interleave(v_src0, v_src1, v_src2, \
|
||||
v_src3, v_src4, v_src5); \
|
||||
\
|
||||
_mm_storeu_##flavor((cast_type *)(dst), v_src0); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC), v_src1); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 2), v_src2); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 3), v_src3); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 4), v_src4); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 5), v_src5); \
|
||||
} \
|
||||
\
|
||||
bool support; \
|
||||
}
|
||||
|
||||
#define MERGE4_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_interleave, flavor, se) \
|
||||
template <> \
|
||||
struct VMerge4<data_type> \
|
||||
{ \
|
||||
enum \
|
||||
{ \
|
||||
ELEMS_IN_VEC = 16 / sizeof(data_type) \
|
||||
}; \
|
||||
\
|
||||
VMerge4() \
|
||||
{ \
|
||||
support = checkHardwareSupport(se); \
|
||||
} \
|
||||
\
|
||||
void operator()(const data_type * src0, const data_type * src1, \
|
||||
const data_type * src2, const data_type * src3, \
|
||||
data_type * dst) const \
|
||||
{ \
|
||||
reg_type v_src0 = _mm_loadu_##flavor((const cast_type *)(src0)); \
|
||||
reg_type v_src1 = _mm_loadu_##flavor((const cast_type *)(src0 + ELEMS_IN_VEC)); \
|
||||
reg_type v_src2 = _mm_loadu_##flavor((const cast_type *)(src1)); \
|
||||
reg_type v_src3 = _mm_loadu_##flavor((const cast_type *)(src1 + ELEMS_IN_VEC)); \
|
||||
reg_type v_src4 = _mm_loadu_##flavor((const cast_type *)(src2)); \
|
||||
reg_type v_src5 = _mm_loadu_##flavor((const cast_type *)(src2 + ELEMS_IN_VEC)); \
|
||||
reg_type v_src6 = _mm_loadu_##flavor((const cast_type *)(src3)); \
|
||||
reg_type v_src7 = _mm_loadu_##flavor((const cast_type *)(src3 + ELEMS_IN_VEC)); \
|
||||
\
|
||||
_mm_interleave(v_src0, v_src1, v_src2, v_src3, \
|
||||
v_src4, v_src5, v_src6, v_src7); \
|
||||
\
|
||||
_mm_storeu_##flavor((cast_type *)(dst), v_src0); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC), v_src1); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 2), v_src2); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 3), v_src3); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 4), v_src4); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 5), v_src5); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 6), v_src6); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 7), v_src7); \
|
||||
} \
|
||||
\
|
||||
bool support; \
|
||||
}
|
||||
|
||||
MERGE2_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128, CV_CPU_SSE2);
|
||||
MERGE3_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128, CV_CPU_SSE2);
|
||||
MERGE4_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128, CV_CPU_SSE2);
|
||||
|
||||
#if CV_SSE4_1
|
||||
MERGE2_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128, CV_CPU_SSE4_1);
|
||||
MERGE3_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128, CV_CPU_SSE4_1);
|
||||
MERGE4_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128, CV_CPU_SSE4_1);
|
||||
#endif
|
||||
|
||||
MERGE2_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps, CV_CPU_SSE2);
|
||||
MERGE3_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps, CV_CPU_SSE2);
|
||||
MERGE4_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps, CV_CPU_SSE2);
|
||||
|
||||
#endif
|
||||
|
||||
template<typename T> static void
|
||||
@ -242,28 +68,6 @@ merge_( const T** src, T* dst, int len, int cn )
|
||||
{
|
||||
const T *src0 = src[0], *src1 = src[1];
|
||||
i = j = 0;
|
||||
#if CV_NEON
|
||||
if(cn == 2)
|
||||
{
|
||||
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
|
||||
int inc_j = 2 * inc_i;
|
||||
|
||||
VMerge2<T> vmerge;
|
||||
for( ; i < len - inc_i; i += inc_i, j += inc_j)
|
||||
vmerge(src0 + i, src1 + i, dst + j);
|
||||
}
|
||||
#elif CV_SSE2
|
||||
if(cn == 2)
|
||||
{
|
||||
int inc_i = 32/sizeof(T);
|
||||
int inc_j = 2 * inc_i;
|
||||
|
||||
VMerge2<T> vmerge;
|
||||
if (vmerge.support)
|
||||
for( ; i < len - inc_i; i += inc_i, j += inc_j)
|
||||
vmerge(src0 + i, src1 + i, dst + j);
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++, j += cn )
|
||||
{
|
||||
dst[j] = src0[i];
|
||||
@ -274,28 +78,6 @@ merge_( const T** src, T* dst, int len, int cn )
|
||||
{
|
||||
const T *src0 = src[0], *src1 = src[1], *src2 = src[2];
|
||||
i = j = 0;
|
||||
#if CV_NEON
|
||||
if(cn == 3)
|
||||
{
|
||||
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
|
||||
int inc_j = 3 * inc_i;
|
||||
|
||||
VMerge3<T> vmerge;
|
||||
for( ; i < len - inc_i; i += inc_i, j += inc_j)
|
||||
vmerge(src0 + i, src1 + i, src2 + i, dst + j);
|
||||
}
|
||||
#elif CV_SSE2
|
||||
if(cn == 3)
|
||||
{
|
||||
int inc_i = 32/sizeof(T);
|
||||
int inc_j = 3 * inc_i;
|
||||
|
||||
VMerge3<T> vmerge;
|
||||
if (vmerge.support)
|
||||
for( ; i < len - inc_i; i += inc_i, j += inc_j)
|
||||
vmerge(src0 + i, src1 + i, src2 + i, dst + j);
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++, j += cn )
|
||||
{
|
||||
dst[j] = src0[i];
|
||||
@ -307,28 +89,6 @@ merge_( const T** src, T* dst, int len, int cn )
|
||||
{
|
||||
const T *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3];
|
||||
i = j = 0;
|
||||
#if CV_NEON
|
||||
if(cn == 4)
|
||||
{
|
||||
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
|
||||
int inc_j = 4 * inc_i;
|
||||
|
||||
VMerge4<T> vmerge;
|
||||
for( ; i < len - inc_i; i += inc_i, j += inc_j)
|
||||
vmerge(src0 + i, src1 + i, src2 + i, src3 + i, dst + j);
|
||||
}
|
||||
#elif CV_SSE2
|
||||
if(cn == 4)
|
||||
{
|
||||
int inc_i = 32/sizeof(T);
|
||||
int inc_j = 4 * inc_i;
|
||||
|
||||
VMerge4<T> vmerge;
|
||||
if (vmerge.support)
|
||||
for( ; i < len - inc_i; i += inc_i, j += inc_j)
|
||||
vmerge(src0 + i, src1 + i, src2 + i, src3 + i, dst + j);
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++, j += cn )
|
||||
{
|
||||
dst[j] = src0[i]; dst[j+1] = src1[i];
|
||||
@ -347,29 +107,48 @@ merge_( const T** src, T* dst, int len, int cn )
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void merge8u(const uchar** src, uchar* dst, int len, int cn )
|
||||
{
|
||||
CALL_HAL(merge8u, cv_hal_merge8u, src, dst, len, cn)
|
||||
merge_(src, dst, len, cn);
|
||||
#if CV_SIMD
|
||||
if( len >= v_uint8::nlanes && 2 <= cn && cn <= 4 )
|
||||
vecmerge_<uchar, v_uint8>(src, dst, len, cn);
|
||||
else
|
||||
#endif
|
||||
merge_(src, dst, len, cn);
|
||||
}
|
||||
|
||||
void merge16u(const ushort** src, ushort* dst, int len, int cn )
|
||||
{
|
||||
CALL_HAL(merge16u, cv_hal_merge16u, src, dst, len, cn)
|
||||
merge_(src, dst, len, cn);
|
||||
#if CV_SIMD
|
||||
if( len >= v_uint16::nlanes && 2 <= cn && cn <= 4 )
|
||||
vecmerge_<ushort, v_uint16>(src, dst, len, cn);
|
||||
else
|
||||
#endif
|
||||
merge_(src, dst, len, cn);
|
||||
}
|
||||
|
||||
void merge32s(const int** src, int* dst, int len, int cn )
|
||||
{
|
||||
CALL_HAL(merge32s, cv_hal_merge32s, src, dst, len, cn)
|
||||
merge_(src, dst, len, cn);
|
||||
#if CV_SIMD
|
||||
if( len >= v_int32::nlanes && 2 <= cn && cn <= 4 )
|
||||
vecmerge_<int, v_int32>(src, dst, len, cn);
|
||||
else
|
||||
#endif
|
||||
merge_(src, dst, len, cn);
|
||||
}
|
||||
|
||||
void merge64s(const int64** src, int64* dst, int len, int cn )
|
||||
{
|
||||
CALL_HAL(merge64s, cv_hal_merge64s, src, dst, len, cn)
|
||||
merge_(src, dst, len, cn);
|
||||
#if CV_SIMD
|
||||
if( len >= v_int64::nlanes && 2 <= cn && cn <= 4 )
|
||||
vecmerge_<int64, v_int64>(src, dst, len, cn);
|
||||
else
|
||||
#endif
|
||||
merge_(src, dst, len, cn);
|
||||
}
|
||||
|
||||
}} // cv::hal::
|
||||
|
@ -123,7 +123,6 @@ static char* icvJSONParseKey( CvFileStorage* fs, char* ptr, CvFileNode* map, CvF
|
||||
CV_PARSE_ERROR( "Key must start with \'\"\'" );
|
||||
|
||||
char * beg = ptr + 1;
|
||||
char * end = beg;
|
||||
|
||||
do {
|
||||
++ptr;
|
||||
@ -133,7 +132,7 @@ static char* icvJSONParseKey( CvFileStorage* fs, char* ptr, CvFileNode* map, CvF
|
||||
if( *ptr != '"' )
|
||||
CV_PARSE_ERROR( "Key must end with \'\"\'" );
|
||||
|
||||
end = ptr;
|
||||
const char * end = ptr;
|
||||
ptr++;
|
||||
ptr = icvJSONSkipSpaces( fs, ptr );
|
||||
if ( ptr == 0 || fs->dummy_eof )
|
||||
@ -576,12 +575,12 @@ void icvJSONParse( CvFileStorage* fs )
|
||||
if ( *ptr == '{' )
|
||||
{
|
||||
CvFileNode* root_node = (CvFileNode*)cvSeqPush( fs->roots, 0 );
|
||||
ptr = icvJSONParseMap( fs, ptr, root_node );
|
||||
icvJSONParseMap( fs, ptr, root_node );
|
||||
}
|
||||
else if ( *ptr == '[' )
|
||||
{
|
||||
CvFileNode* root_node = (CvFileNode*)cvSeqPush( fs->roots, 0 );
|
||||
ptr = icvJSONParseSeq( fs, ptr, root_node );
|
||||
icvJSONParseSeq( fs, ptr, root_node );
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -668,7 +667,7 @@ void icvJSONWrite( CvFileStorage* fs, const char* key, const char* data )
|
||||
*ptr++ = '\n';
|
||||
*ptr++ = '\0';
|
||||
::icvPuts( fs, fs->buffer_start );
|
||||
ptr = fs->buffer = fs->buffer_start;
|
||||
fs->buffer = fs->buffer_start;
|
||||
}
|
||||
ptr = icvFSFlush(fs);
|
||||
}
|
||||
|
@ -302,7 +302,7 @@ static void* icvReadSparseMat( CvFileStorage* fs, CvFileNode* node )
|
||||
CvFileNode* sizes_node;
|
||||
CvSeqReader reader;
|
||||
CvSeq* elements;
|
||||
int sizes[CV_MAX_DIM_HEAP], dims, elem_type, cn;
|
||||
int sizes[CV_MAX_DIM], dims, elem_type, cn;
|
||||
int i;
|
||||
|
||||
sizes_node = cvGetFileNodeByName( fs, node, "sizes" );
|
||||
@ -327,7 +327,7 @@ static void* icvReadSparseMat( CvFileStorage* fs, CvFileNode* node )
|
||||
mat = cvCreateSparseMat( dims, sizes, elem_type );
|
||||
|
||||
cn = CV_MAT_CN(elem_type);
|
||||
int idx[CV_MAX_DIM_HEAP];
|
||||
int idx[CV_MAX_DIM];
|
||||
elements = data->data.seq;
|
||||
cvStartReadRawData( fs, data, &reader );
|
||||
|
||||
|
@ -8,222 +8,57 @@
|
||||
|
||||
namespace cv { namespace hal {
|
||||
|
||||
#if CV_NEON
|
||||
template<typename T> struct VSplit2;
|
||||
template<typename T> struct VSplit3;
|
||||
template<typename T> struct VSplit4;
|
||||
|
||||
#define SPLIT2_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
|
||||
template<> \
|
||||
struct name<data_type> \
|
||||
{ \
|
||||
void operator()(const data_type* src, data_type* dst0, \
|
||||
data_type* dst1) const \
|
||||
{ \
|
||||
reg_type r = load_func(src); \
|
||||
store_func(dst0, r.val[0]); \
|
||||
store_func(dst1, r.val[1]); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define SPLIT3_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
|
||||
template<> \
|
||||
struct name<data_type> \
|
||||
{ \
|
||||
void operator()(const data_type* src, data_type* dst0, data_type* dst1, \
|
||||
data_type* dst2) const \
|
||||
{ \
|
||||
reg_type r = load_func(src); \
|
||||
store_func(dst0, r.val[0]); \
|
||||
store_func(dst1, r.val[1]); \
|
||||
store_func(dst2, r.val[2]); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define SPLIT4_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
|
||||
template<> \
|
||||
struct name<data_type> \
|
||||
{ \
|
||||
void operator()(const data_type* src, data_type* dst0, data_type* dst1, \
|
||||
data_type* dst2, data_type* dst3) const \
|
||||
{ \
|
||||
reg_type r = load_func(src); \
|
||||
store_func(dst0, r.val[0]); \
|
||||
store_func(dst1, r.val[1]); \
|
||||
store_func(dst2, r.val[2]); \
|
||||
store_func(dst3, r.val[3]); \
|
||||
} \
|
||||
}
|
||||
|
||||
SPLIT2_KERNEL_TEMPLATE(VSplit2, uchar , uint8x16x2_t, vld2q_u8 , vst1q_u8 );
|
||||
SPLIT2_KERNEL_TEMPLATE(VSplit2, ushort, uint16x8x2_t, vld2q_u16, vst1q_u16);
|
||||
SPLIT2_KERNEL_TEMPLATE(VSplit2, int , int32x4x2_t, vld2q_s32, vst1q_s32);
|
||||
SPLIT2_KERNEL_TEMPLATE(VSplit2, int64 , int64x1x2_t, vld2_s64 , vst1_s64 );
|
||||
|
||||
SPLIT3_KERNEL_TEMPLATE(VSplit3, uchar , uint8x16x3_t, vld3q_u8 , vst1q_u8 );
|
||||
SPLIT3_KERNEL_TEMPLATE(VSplit3, ushort, uint16x8x3_t, vld3q_u16, vst1q_u16);
|
||||
SPLIT3_KERNEL_TEMPLATE(VSplit3, int , int32x4x3_t, vld3q_s32, vst1q_s32);
|
||||
SPLIT3_KERNEL_TEMPLATE(VSplit3, int64 , int64x1x3_t, vld3_s64 , vst1_s64 );
|
||||
|
||||
SPLIT4_KERNEL_TEMPLATE(VSplit4, uchar , uint8x16x4_t, vld4q_u8 , vst1q_u8 );
|
||||
SPLIT4_KERNEL_TEMPLATE(VSplit4, ushort, uint16x8x4_t, vld4q_u16, vst1q_u16);
|
||||
SPLIT4_KERNEL_TEMPLATE(VSplit4, int , int32x4x4_t, vld4q_s32, vst1q_s32);
|
||||
SPLIT4_KERNEL_TEMPLATE(VSplit4, int64 , int64x1x4_t, vld4_s64 , vst1_s64 );
|
||||
|
||||
#elif CV_SSE2
|
||||
|
||||
template <typename T>
|
||||
struct VSplit2
|
||||
#if CV_SIMD
|
||||
template<typename T, typename VecT> static void
|
||||
vecsplit_( const T* src, T** dst, int len, int cn )
|
||||
{
|
||||
VSplit2() : support(false) { }
|
||||
void operator()(const T *, T *, T *) const { }
|
||||
int i;
|
||||
T* dst0 = dst[0];
|
||||
T* dst1 = dst[1];
|
||||
|
||||
bool support;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct VSplit3
|
||||
{
|
||||
VSplit3() : support(false) { }
|
||||
void operator()(const T *, T *, T *, T *) const { }
|
||||
|
||||
bool support;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct VSplit4
|
||||
{
|
||||
VSplit4() : support(false) { }
|
||||
void operator()(const T *, T *, T *, T *, T *) const { }
|
||||
|
||||
bool support;
|
||||
};
|
||||
|
||||
#define SPLIT2_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor) \
|
||||
template <> \
|
||||
struct VSplit2<data_type> \
|
||||
{ \
|
||||
enum \
|
||||
{ \
|
||||
ELEMS_IN_VEC = 16 / sizeof(data_type) \
|
||||
}; \
|
||||
\
|
||||
VSplit2() \
|
||||
{ \
|
||||
support = checkHardwareSupport(CV_CPU_SSE2); \
|
||||
} \
|
||||
\
|
||||
void operator()(const data_type * src, \
|
||||
data_type * dst0, data_type * dst1) const \
|
||||
{ \
|
||||
reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src)); \
|
||||
reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC)); \
|
||||
reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \
|
||||
reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \
|
||||
\
|
||||
_mm_deinterleave(v_src0, v_src1, v_src2, v_src3); \
|
||||
\
|
||||
_mm_storeu_##flavor((cast_type *)(dst0), v_src0); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst1), v_src2); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3); \
|
||||
} \
|
||||
\
|
||||
bool support; \
|
||||
const int VECSZ = VecT::nlanes;
|
||||
if( cn == 2 )
|
||||
{
|
||||
for( i = 0; i < len; i += VECSZ )
|
||||
{
|
||||
i = std::min( len - VECSZ, i );
|
||||
VecT a, b;
|
||||
v_load_deinterleave(src + i*cn, a, b);
|
||||
v_store(dst0 + i, a);
|
||||
v_store(dst1 + i, b);
|
||||
}
|
||||
}
|
||||
else if( cn == 3 )
|
||||
{
|
||||
T* dst2 = dst[2];
|
||||
for( i = 0; i < len; i += VECSZ )
|
||||
{
|
||||
i = std::min( len - VECSZ, i );
|
||||
VecT a, b, c;
|
||||
v_load_deinterleave(src + i*cn, a, b, c);
|
||||
v_store(dst0 + i, a);
|
||||
v_store(dst1 + i, b);
|
||||
v_store(dst2 + i, c);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert( cn == 4 );
|
||||
T* dst2 = dst[2];
|
||||
T* dst3 = dst[3];
|
||||
for( i = 0; i < len; i += VECSZ )
|
||||
{
|
||||
i = std::min( len - VECSZ, i );
|
||||
VecT a, b, c, d;
|
||||
v_load_deinterleave(src + i*cn, a, b, c, d);
|
||||
v_store(dst0 + i, a);
|
||||
v_store(dst1 + i, b);
|
||||
v_store(dst2 + i, c);
|
||||
v_store(dst3 + i, d);
|
||||
}
|
||||
}
|
||||
vx_cleanup();
|
||||
}
|
||||
|
||||
#define SPLIT3_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor) \
|
||||
template <> \
|
||||
struct VSplit3<data_type> \
|
||||
{ \
|
||||
enum \
|
||||
{ \
|
||||
ELEMS_IN_VEC = 16 / sizeof(data_type) \
|
||||
}; \
|
||||
\
|
||||
VSplit3() \
|
||||
{ \
|
||||
support = checkHardwareSupport(CV_CPU_SSE2); \
|
||||
} \
|
||||
\
|
||||
void operator()(const data_type * src, \
|
||||
data_type * dst0, data_type * dst1, data_type * dst2) const \
|
||||
{ \
|
||||
reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src)); \
|
||||
reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC)); \
|
||||
reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \
|
||||
reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \
|
||||
reg_type v_src4 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 4)); \
|
||||
reg_type v_src5 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 5)); \
|
||||
\
|
||||
_mm_deinterleave(v_src0, v_src1, v_src2, \
|
||||
v_src3, v_src4, v_src5); \
|
||||
\
|
||||
_mm_storeu_##flavor((cast_type *)(dst0), v_src0); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst1), v_src2); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst2), v_src4); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst2 + ELEMS_IN_VEC), v_src5); \
|
||||
} \
|
||||
\
|
||||
bool support; \
|
||||
}
|
||||
|
||||
#define SPLIT4_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor) \
|
||||
template <> \
|
||||
struct VSplit4<data_type> \
|
||||
{ \
|
||||
enum \
|
||||
{ \
|
||||
ELEMS_IN_VEC = 16 / sizeof(data_type) \
|
||||
}; \
|
||||
\
|
||||
VSplit4() \
|
||||
{ \
|
||||
support = checkHardwareSupport(CV_CPU_SSE2); \
|
||||
} \
|
||||
\
|
||||
void operator()(const data_type * src, data_type * dst0, data_type * dst1, \
|
||||
data_type * dst2, data_type * dst3) const \
|
||||
{ \
|
||||
reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src)); \
|
||||
reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC)); \
|
||||
reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \
|
||||
reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \
|
||||
reg_type v_src4 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 4)); \
|
||||
reg_type v_src5 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 5)); \
|
||||
reg_type v_src6 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 6)); \
|
||||
reg_type v_src7 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 7)); \
|
||||
\
|
||||
_mm_deinterleave(v_src0, v_src1, v_src2, v_src3, \
|
||||
v_src4, v_src5, v_src6, v_src7); \
|
||||
\
|
||||
_mm_storeu_##flavor((cast_type *)(dst0), v_src0); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst1), v_src2); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst2), v_src4); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst2 + ELEMS_IN_VEC), v_src5); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst3), v_src6); \
|
||||
_mm_storeu_##flavor((cast_type *)(dst3 + ELEMS_IN_VEC), v_src7); \
|
||||
} \
|
||||
\
|
||||
bool support; \
|
||||
}
|
||||
|
||||
SPLIT2_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
|
||||
SPLIT2_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
|
||||
SPLIT2_KERNEL_TEMPLATE( int, __m128, float, _mm_deinterleave_ps, ps);
|
||||
|
||||
SPLIT3_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
|
||||
SPLIT3_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
|
||||
SPLIT3_KERNEL_TEMPLATE( int, __m128, float, _mm_deinterleave_ps, ps);
|
||||
|
||||
SPLIT4_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
|
||||
SPLIT4_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
|
||||
SPLIT4_KERNEL_TEMPLATE( int, __m128, float, _mm_deinterleave_ps, ps);
|
||||
|
||||
#endif
|
||||
|
||||
template<typename T> static void
|
||||
@ -250,30 +85,6 @@ split_( const T* src, T** dst, int len, int cn )
|
||||
T *dst0 = dst[0], *dst1 = dst[1];
|
||||
i = j = 0;
|
||||
|
||||
#if CV_NEON
|
||||
if(cn == 2)
|
||||
{
|
||||
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
|
||||
int inc_j = 2 * inc_i;
|
||||
|
||||
VSplit2<T> vsplit;
|
||||
for( ; i < len - inc_i; i += inc_i, j += inc_j)
|
||||
vsplit(src + j, dst0 + i, dst1 + i);
|
||||
}
|
||||
#elif CV_SSE2
|
||||
if (cn == 2)
|
||||
{
|
||||
int inc_i = 32/sizeof(T);
|
||||
int inc_j = 2 * inc_i;
|
||||
|
||||
VSplit2<T> vsplit;
|
||||
if (vsplit.support)
|
||||
{
|
||||
for( ; i <= len - inc_i; i += inc_i, j += inc_j)
|
||||
vsplit(src + j, dst0 + i, dst1 + i);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++, j += cn )
|
||||
{
|
||||
dst0[i] = src[j];
|
||||
@ -285,31 +96,6 @@ split_( const T* src, T** dst, int len, int cn )
|
||||
T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2];
|
||||
i = j = 0;
|
||||
|
||||
#if CV_NEON
|
||||
if(cn == 3)
|
||||
{
|
||||
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
|
||||
int inc_j = 3 * inc_i;
|
||||
|
||||
VSplit3<T> vsplit;
|
||||
for( ; i <= len - inc_i; i += inc_i, j += inc_j)
|
||||
vsplit(src + j, dst0 + i, dst1 + i, dst2 + i);
|
||||
}
|
||||
#elif CV_SSE2
|
||||
if (cn == 3)
|
||||
{
|
||||
int inc_i = 32/sizeof(T);
|
||||
int inc_j = 3 * inc_i;
|
||||
|
||||
VSplit3<T> vsplit;
|
||||
|
||||
if (vsplit.support)
|
||||
{
|
||||
for( ; i <= len - inc_i; i += inc_i, j += inc_j)
|
||||
vsplit(src + j, dst0 + i, dst1 + i, dst2 + i);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++, j += cn )
|
||||
{
|
||||
dst0[i] = src[j];
|
||||
@ -322,30 +108,6 @@ split_( const T* src, T** dst, int len, int cn )
|
||||
T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2], *dst3 = dst[3];
|
||||
i = j = 0;
|
||||
|
||||
#if CV_NEON
|
||||
if(cn == 4)
|
||||
{
|
||||
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
|
||||
int inc_j = 4 * inc_i;
|
||||
|
||||
VSplit4<T> vsplit;
|
||||
for( ; i <= len - inc_i; i += inc_i, j += inc_j)
|
||||
vsplit(src + j, dst0 + i, dst1 + i, dst2 + i, dst3 + i);
|
||||
}
|
||||
#elif CV_SSE2
|
||||
if (cn == 4)
|
||||
{
|
||||
int inc_i = 32/sizeof(T);
|
||||
int inc_j = 4 * inc_i;
|
||||
|
||||
VSplit4<T> vsplit;
|
||||
if (vsplit.support)
|
||||
{
|
||||
for( ; i <= len - inc_i; i += inc_i, j += inc_j)
|
||||
vsplit(src + j, dst0 + i, dst1 + i, dst2 + i, dst3 + i);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++, j += cn )
|
||||
{
|
||||
dst0[i] = src[j]; dst1[i] = src[j+1];
|
||||
@ -367,25 +129,46 @@ split_( const T* src, T** dst, int len, int cn )
|
||||
void split8u(const uchar* src, uchar** dst, int len, int cn )
|
||||
{
|
||||
CALL_HAL(split8u, cv_hal_split8u, src,dst, len, cn)
|
||||
split_(src, dst, len, cn);
|
||||
|
||||
#if CV_SIMD
|
||||
if( len >= v_uint8::nlanes && 2 <= cn && cn <= 4 )
|
||||
vecsplit_<uchar, v_uint8>(src, dst, len, cn);
|
||||
else
|
||||
#endif
|
||||
split_(src, dst, len, cn);
|
||||
}
|
||||
|
||||
void split16u(const ushort* src, ushort** dst, int len, int cn )
|
||||
{
|
||||
CALL_HAL(split16u, cv_hal_split16u, src,dst, len, cn)
|
||||
split_(src, dst, len, cn);
|
||||
#if CV_SIMD
|
||||
if( len >= v_uint16::nlanes && 2 <= cn && cn <= 4 )
|
||||
vecsplit_<ushort, v_uint16>(src, dst, len, cn);
|
||||
else
|
||||
#endif
|
||||
split_(src, dst, len, cn);
|
||||
}
|
||||
|
||||
void split32s(const int* src, int** dst, int len, int cn )
|
||||
{
|
||||
CALL_HAL(split32s, cv_hal_split32s, src,dst, len, cn)
|
||||
split_(src, dst, len, cn);
|
||||
#if CV_SIMD
|
||||
if( len >= v_uint32::nlanes && 2 <= cn && cn <= 4 )
|
||||
vecsplit_<int, v_int32>(src, dst, len, cn);
|
||||
else
|
||||
#endif
|
||||
split_(src, dst, len, cn);
|
||||
}
|
||||
|
||||
void split64s(const int64* src, int64** dst, int len, int cn )
|
||||
{
|
||||
CALL_HAL(split64s, cv_hal_split64s, src,dst, len, cn)
|
||||
split_(src, dst, len, cn);
|
||||
#if CV_SIMD
|
||||
if( len >= v_int64::nlanes && 2 <= cn && cn <= 4 )
|
||||
vecsplit_<int64, v_int64>(src, dst, len, cn);
|
||||
else
|
||||
#endif
|
||||
split_(src, dst, len, cn);
|
||||
}
|
||||
|
||||
}} // cv::hal::
|
||||
|
@ -1014,8 +1014,8 @@ protected:
|
||||
Size mSize(rng.uniform(minMSize, maxMSize), rng.uniform(minMSize, maxMSize));
|
||||
size_t mvSize = rng.uniform(1, maxMvSize);
|
||||
|
||||
int res = cvtest::TS::OK, curRes = res;
|
||||
curRes = run_case(CV_8U, mvSize, mSize, rng);
|
||||
int res = cvtest::TS::OK;
|
||||
int curRes = run_case(CV_8U, mvSize, mSize, rng);
|
||||
res = curRes != cvtest::TS::OK ? curRes : res;
|
||||
|
||||
curRes = run_case(CV_8S, mvSize, mSize, rng);
|
||||
|
@ -173,7 +173,7 @@ void Core_RandTest::run( int )
|
||||
dsz = slice+1 < maxSlice ? (int)(cvtest::randInt(rng) % (SZ - sz) + 1) : SZ - sz;
|
||||
Mat aslice = arr[k].colRange(sz, sz + dsz);
|
||||
tested_rng.fill(aslice, dist_type, A, B);
|
||||
printf("%d - %d\n", sz, sz + dsz);
|
||||
//printf("%d - %d\n", sz, sz + dsz);
|
||||
}
|
||||
}
|
||||
|
||||
@ -375,9 +375,11 @@ TEST(Core_Rand, Regression_Stack_Corruption)
|
||||
int bufsz = 128; //enough for 14 doubles
|
||||
AutoBuffer<uchar> buffer(bufsz);
|
||||
size_t offset = 0;
|
||||
cv::Mat_<cv::Point2d> x(2, 3, (cv::Point2d*)(buffer.data()+offset)); offset += x.total()*x.elemSize();
|
||||
double& param1 = *(double*)(buffer.data()+offset); offset += sizeof(double);
|
||||
double& param2 = *(double*)(buffer.data()+offset); offset += sizeof(double);
|
||||
cv::Mat_<cv::Point2d> x(2, 3, (cv::Point2d*)(buffer.data()+offset));
|
||||
offset += x.total()*x.elemSize();
|
||||
double& param1 = *(double*)(buffer.data()+offset);
|
||||
offset += sizeof(double);
|
||||
double& param2 = *(double*)(buffer.data()+offset);
|
||||
param1 = -9; param2 = 2;
|
||||
|
||||
cv::theRNG().fill(x, cv::RNG::NORMAL, param1, param2);
|
||||
|
@ -120,3 +120,9 @@ if(BUILD_PERF_TESTS)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Test Intel's Inference Engine models
|
||||
if(HAVE_INF_ENGINE AND TARGET opencv_test_dnn)
|
||||
ocv_target_include_directories(opencv_test_dnn PRIVATE ${INF_ENGINE_INCLUDE_DIRS})
|
||||
ocv_target_link_libraries(opencv_test_dnn LINK_PRIVATE ${INF_ENGINE_LIBRARIES})
|
||||
endif()
|
||||
|
@ -46,9 +46,9 @@
|
||||
#include <opencv2/core.hpp>
|
||||
|
||||
#if !defined CV_DOXYGEN && !defined CV_DNN_DONT_ADD_EXPERIMENTAL_NS
|
||||
#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_v5 {
|
||||
#define CV__DNN_EXPERIMENTAL_NS_BEGIN namespace experimental_dnn_v6 {
|
||||
#define CV__DNN_EXPERIMENTAL_NS_END }
|
||||
namespace cv { namespace dnn { namespace experimental_dnn_v5 { } using namespace experimental_dnn_v5; }}
|
||||
namespace cv { namespace dnn { namespace experimental_dnn_v6 { } using namespace experimental_dnn_v6; }}
|
||||
#else
|
||||
#define CV__DNN_EXPERIMENTAL_NS_BEGIN
|
||||
#define CV__DNN_EXPERIMENTAL_NS_END
|
||||
@ -487,14 +487,19 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
||||
*/
|
||||
CV_WRAP void setPreferableTarget(int targetId);
|
||||
|
||||
/** @brief Sets the new value for the layer output blob
|
||||
* @param name descriptor of the updating layer output blob.
|
||||
* @param blob new blob.
|
||||
/** @brief Sets the new input value for the network
|
||||
* @param blob A new blob. Should have CV_32F or CV_8U depth.
|
||||
* @param name A name of input layer.
|
||||
* @param scalefactor An optional normalization scale.
|
||||
* @param mean An optional mean subtraction values.
|
||||
* @see connect(String, String) to know format of the descriptor.
|
||||
* @note If updating blob is not empty then @p blob must have the same shape,
|
||||
* because network reshaping is not implemented yet.
|
||||
*
|
||||
* If scale or mean values are specified, a final input blob is computed
|
||||
* as:
|
||||
* \f[input(n,c,h,w) = scalefactor \times (blob(n,c,h,w) - mean_c)\f]
|
||||
*/
|
||||
CV_WRAP void setInput(InputArray blob, const String& name = "");
|
||||
CV_WRAP void setInput(InputArray blob, const String& name = "",
|
||||
double scalefactor = 1.0, const Scalar& mean = Scalar());
|
||||
|
||||
/** @brief Sets the new value for the learned param of the layer.
|
||||
* @param layer name or id of the layer.
|
||||
@ -805,13 +810,15 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
||||
* @param swapRB flag which indicates that swap first and last channels
|
||||
* in 3-channel image is necessary.
|
||||
* @param crop flag which indicates whether image will be cropped after resize or not
|
||||
* @param ddepth Depth of output blob. Choose CV_32F or CV_8U.
|
||||
* @details if @p crop is true, input image is resized so one side after resize is equal to corresponding
|
||||
* dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
|
||||
* If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
|
||||
* @returns 4-dimensional Mat with NCHW dimensions order.
|
||||
*/
|
||||
CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(),
|
||||
const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true);
|
||||
const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true,
|
||||
int ddepth=CV_32F);
|
||||
|
||||
/** @brief Creates 4-dimensional blob from image.
|
||||
* @details This is an overloaded member function, provided for convenience.
|
||||
@ -819,7 +826,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
||||
*/
|
||||
CV_EXPORTS void blobFromImage(InputArray image, OutputArray blob, double scalefactor=1.0,
|
||||
const Size& size = Size(), const Scalar& mean = Scalar(),
|
||||
bool swapRB=true, bool crop=true);
|
||||
bool swapRB=true, bool crop=true, int ddepth=CV_32F);
|
||||
|
||||
|
||||
/** @brief Creates 4-dimensional blob from series of images. Optionally resizes and
|
||||
@ -833,13 +840,15 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
||||
* @param swapRB flag which indicates that swap first and last channels
|
||||
* in 3-channel image is necessary.
|
||||
* @param crop flag which indicates whether image will be cropped after resize or not
|
||||
* @param ddepth Depth of output blob. Choose CV_32F or CV_8U.
|
||||
* @details if @p crop is true, input image is resized so one side after resize is equal to corresponding
|
||||
* dimension in @p size and another one is equal or larger. Then, crop from the center is performed.
|
||||
* If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
|
||||
* @returns 4-dimansional Mat with NCHW dimensions order.
|
||||
* @returns 4-dimensional Mat with NCHW dimensions order.
|
||||
*/
|
||||
CV_EXPORTS_W Mat blobFromImages(InputArrayOfArrays images, double scalefactor=1.0,
|
||||
Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true);
|
||||
Size size = Size(), const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true,
|
||||
int ddepth=CV_32F);
|
||||
|
||||
/** @brief Creates 4-dimensional blob from series of images.
|
||||
* @details This is an overloaded member function, provided for convenience.
|
||||
@ -847,7 +856,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
||||
*/
|
||||
CV_EXPORTS void blobFromImages(InputArrayOfArrays images, OutputArray blob,
|
||||
double scalefactor=1.0, Size size = Size(),
|
||||
const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true);
|
||||
const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true,
|
||||
int ddepth=CV_32F);
|
||||
|
||||
/** @brief Parse a 4D blob and output the images it contains as 2D arrays through a simpler data structure
|
||||
* (std::vector<cv::Mat>).
|
||||
|
@ -97,35 +97,42 @@ namespace
|
||||
}
|
||||
|
||||
Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
|
||||
const Scalar& mean, bool swapRB, bool crop)
|
||||
const Scalar& mean, bool swapRB, bool crop, int ddepth)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
Mat blob;
|
||||
blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop);
|
||||
blobFromImage(image, blob, scalefactor, size, mean, swapRB, crop, ddepth);
|
||||
return blob;
|
||||
}
|
||||
|
||||
void blobFromImage(InputArray image, OutputArray blob, double scalefactor,
|
||||
const Size& size, const Scalar& mean, bool swapRB, bool crop)
|
||||
const Size& size, const Scalar& mean, bool swapRB, bool crop, int ddepth)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
std::vector<Mat> images(1, image.getMat());
|
||||
blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop);
|
||||
blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
|
||||
}
|
||||
|
||||
Mat blobFromImages(InputArrayOfArrays images, double scalefactor, Size size,
|
||||
const Scalar& mean, bool swapRB, bool crop)
|
||||
const Scalar& mean, bool swapRB, bool crop, int ddepth)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
Mat blob;
|
||||
blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop);
|
||||
blobFromImages(images, blob, scalefactor, size, mean, swapRB, crop, ddepth);
|
||||
return blob;
|
||||
}
|
||||
|
||||
void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
|
||||
Size size, const Scalar& mean_, bool swapRB, bool crop)
|
||||
Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
|
||||
if (ddepth == CV_8U)
|
||||
{
|
||||
CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
|
||||
CV_Assert(mean_ == Scalar(), "Mean subtraction is not supported for CV_8U blob depth");
|
||||
}
|
||||
|
||||
std::vector<Mat> images;
|
||||
images_.getMatVector(images);
|
||||
CV_Assert(!images.empty());
|
||||
@ -149,7 +156,7 @@ void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalef
|
||||
else
|
||||
resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
|
||||
}
|
||||
if(images[i].depth() == CV_8U)
|
||||
if(images[i].depth() == CV_8U && ddepth == CV_32F)
|
||||
images[i].convertTo(images[i], CV_32F);
|
||||
Scalar mean = mean_;
|
||||
if (swapRB)
|
||||
@ -167,20 +174,20 @@ void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalef
|
||||
if (nch == 3 || nch == 4)
|
||||
{
|
||||
int sz[] = { (int)nimages, nch, image0.rows, image0.cols };
|
||||
blob_.create(4, sz, CV_32F);
|
||||
blob_.create(4, sz, ddepth);
|
||||
Mat blob = blob_.getMat();
|
||||
Mat ch[4];
|
||||
|
||||
for( i = 0; i < nimages; i++ )
|
||||
{
|
||||
image = images[i];
|
||||
CV_Assert(image.depth() == CV_32F);
|
||||
CV_Assert(image.depth() == blob_.depth());
|
||||
nch = image.channels();
|
||||
CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
|
||||
CV_Assert(image.size() == image0.size());
|
||||
|
||||
for( int j = 0; j < nch; j++ )
|
||||
ch[j] = Mat(image.rows, image.cols, CV_32F, blob.ptr((int)i, j));
|
||||
ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j));
|
||||
if(swapRB)
|
||||
std::swap(ch[0], ch[2]);
|
||||
split(image, ch);
|
||||
@ -190,18 +197,18 @@ void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalef
|
||||
{
|
||||
CV_Assert(nch == 1);
|
||||
int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
|
||||
blob_.create(4, sz, CV_32F);
|
||||
blob_.create(4, sz, ddepth);
|
||||
Mat blob = blob_.getMat();
|
||||
|
||||
for( i = 0; i < nimages; i++ )
|
||||
{
|
||||
Mat image = images[i];
|
||||
CV_Assert(image.depth() == CV_32F);
|
||||
CV_Assert(image.depth() == blob_.depth());
|
||||
nch = image.channels();
|
||||
CV_Assert(image.dims == 2 && (nch == 1));
|
||||
CV_Assert(image.size() == image0.size());
|
||||
|
||||
image.copyTo(Mat(image.rows, image.cols, CV_32F, blob.ptr((int)i, 0)));
|
||||
image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -408,7 +415,16 @@ struct LayerData
|
||||
//fake layer containing network input blobs
|
||||
struct DataLayer : public Layer
|
||||
{
|
||||
void finalize(const std::vector<Mat*>&, std::vector<Mat>&) CV_OVERRIDE {}
|
||||
DataLayer() : Layer()
|
||||
{
|
||||
skip = false;
|
||||
}
|
||||
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_INFERENCE_ENGINE && inputsData.size() == 1;
|
||||
}
|
||||
|
||||
void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) CV_OVERRIDE
|
||||
{
|
||||
@ -423,11 +439,36 @@ struct DataLayer : public Layer
|
||||
|
||||
void forward(std::vector<Mat*>&, std::vector<Mat>& outputs, std::vector<Mat> &) CV_OVERRIDE
|
||||
{
|
||||
// Supported modes:
|
||||
// | Input type | Output type |
|
||||
// | fp32 | fp32 |
|
||||
// | uint8 | fp32 |
|
||||
for (int i = 0; i < inputsData.size(); ++i)
|
||||
{
|
||||
if (inputsData[i].type() == CV_32F && outputs[i].type() == CV_16S)
|
||||
double scale = scaleFactors[i];
|
||||
Scalar& mean = means[i];
|
||||
CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4,
|
||||
outputs[i].type() == CV_32F);
|
||||
|
||||
bool singleMean = true;
|
||||
for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
|
||||
{
|
||||
convertFp16(inputsData[i], outputs[i]);
|
||||
singleMean = mean[j] == mean[j - 1];
|
||||
}
|
||||
|
||||
if (singleMean)
|
||||
{
|
||||
inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int n = 0; n < inputsData[i].size[0]; ++n)
|
||||
for (int c = 0; c < inputsData[i].size[1]; ++c)
|
||||
{
|
||||
Mat inp = getPlane(inputsData[i], n, c);
|
||||
Mat out = getPlane(outputs[i], n, c);
|
||||
inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -435,13 +476,66 @@ struct DataLayer : public Layer
|
||||
#ifdef HAVE_OPENCL
|
||||
bool forward_ocl(InputArrayOfArrays, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_)
|
||||
{
|
||||
if (outputs_.depth() == CV_16S)
|
||||
// Supported modes:
|
||||
// | Input type | Output type |
|
||||
// | fp32 | fp32 |
|
||||
// | fp32 | fp16 |
|
||||
// | uint8 | fp32 |
|
||||
std::vector<UMat> outputs;
|
||||
outputs_.getUMatVector(outputs);
|
||||
|
||||
for (int i = 0; i < inputsData.size(); ++i)
|
||||
{
|
||||
std::vector<UMat> outputs;
|
||||
outputs_.getUMatVector(outputs);
|
||||
for (int i = 0; i < inputsData.size(); ++i)
|
||||
double scale = scaleFactors[i];
|
||||
Scalar& mean = means[i];
|
||||
|
||||
CV_Assert(mean == Scalar() || inputsData[i].size[1] <= 4);
|
||||
bool singleMean = true;
|
||||
for (int j = 1; j < std::min(4, inputsData[i].size[1]) && singleMean; ++j)
|
||||
{
|
||||
convertFp16(inputsData[i], outputs[i]);
|
||||
singleMean = mean[j] == mean[j - 1];
|
||||
}
|
||||
|
||||
if (outputs_.depth() == CV_16S)
|
||||
{
|
||||
if (singleMean)
|
||||
convertFp16(scale * (inputsData[i] - mean[0]), outputs[i]);
|
||||
else
|
||||
{
|
||||
for (int n = 0; n < inputsData[i].size[0]; ++n)
|
||||
for (int c = 0; c < inputsData[i].size[1]; ++c)
|
||||
{
|
||||
Mat inp = getPlane(inputsData[i], n, c);
|
||||
|
||||
std::vector<cv::Range> plane(4, Range::all());
|
||||
plane[0] = Range(n, n + 1);
|
||||
plane[1] = Range(c, c + 1);
|
||||
UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
|
||||
|
||||
convertFp16(scale * (inp - mean[c]), out);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(outputs_.depth() == CV_32F);
|
||||
if (singleMean)
|
||||
inputsData[i].convertTo(outputs[i], CV_32F, scale, -mean[0] * scale);
|
||||
else
|
||||
{
|
||||
for (int n = 0; n < inputsData[i].size[0]; ++n)
|
||||
for (int c = 0; c < inputsData[i].size[1]; ++c)
|
||||
{
|
||||
Mat inp = getPlane(inputsData[i], n, c);
|
||||
|
||||
std::vector<cv::Range> plane(4, Range::all());
|
||||
plane[0] = Range(n, n + 1);
|
||||
plane[1] = Range(c, c + 1);
|
||||
UMat out = outputs[i](plane).reshape(1, inp.dims, inp.size);
|
||||
|
||||
inp.convertTo(out, CV_32F, scale, -mean[c] * scale);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
@ -469,8 +563,61 @@ struct DataLayer : public Layer
|
||||
return false;
|
||||
}
|
||||
|
||||
void finalize(const std::vector<Mat*>&, std::vector<Mat>& outputs) CV_OVERRIDE
|
||||
{
|
||||
CV_Assert(outputs.size() == scaleFactors.size(), outputs.size() == means.size(),
|
||||
inputsData.size() == outputs.size());
|
||||
skip = true;
|
||||
for (int i = 0; skip && i < inputsData.size(); ++i)
|
||||
{
|
||||
if (inputsData[i].data != outputs[i].data || scaleFactors[i] != 1.0 || means[i] != Scalar())
|
||||
skip = false;
|
||||
}
|
||||
}
|
||||
|
||||
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||
{
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
InferenceEngine::LayerParams lp;
|
||||
lp.name = name;
|
||||
lp.type = "ScaleShift";
|
||||
lp.precision = InferenceEngine::Precision::FP32;
|
||||
std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
|
||||
|
||||
CV_Assert(inputsData.size() == 1, inputsData[0].dims == 4);
|
||||
const size_t numChannels = inputsData[0].size[1];
|
||||
CV_Assert(numChannels <= 4);
|
||||
|
||||
// Scale
|
||||
auto weights = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
|
||||
{numChannels});
|
||||
weights->allocate();
|
||||
weights->set(std::vector<float>(numChannels, scaleFactors[0]));
|
||||
ieLayer->_weights = weights;
|
||||
|
||||
// Mean subtraction
|
||||
auto biases = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
|
||||
{numChannels});
|
||||
biases->allocate();
|
||||
std::vector<float> biasesVec(numChannels);
|
||||
for (int i = 0; i < numChannels; ++i)
|
||||
{
|
||||
biasesVec[i] = -means[0][i] * scaleFactors[0];
|
||||
}
|
||||
biases->set(biasesVec);
|
||||
ieLayer->_biases = biases;
|
||||
|
||||
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||
#endif // HAVE_INF_ENGINE
|
||||
return Ptr<BackendNode>();
|
||||
}
|
||||
|
||||
std::vector<String> outNames;
|
||||
// Preprocessing parameters for each network's input.
|
||||
std::vector<double> scaleFactors;
|
||||
std::vector<Scalar> means;
|
||||
std::vector<Mat> inputsData;
|
||||
bool skip;
|
||||
};
|
||||
|
||||
struct BlobManager
|
||||
@ -739,7 +886,7 @@ struct Net::Impl
|
||||
netInputLayer = Ptr<DataLayer>(new DataLayer());
|
||||
LayerData &inpl = layers.insert( make_pair(0, LayerData()) ).first->second;
|
||||
inpl.id = 0;
|
||||
inpl.name = "_input";
|
||||
netInputLayer->name = inpl.name = "_input";
|
||||
inpl.type = "__NetInputLayer__";
|
||||
inpl.layerInstance = netInputLayer;
|
||||
layerNameToId.insert(std::make_pair(inpl.name, inpl.id));
|
||||
@ -930,6 +1077,11 @@ struct Net::Impl
|
||||
clear();
|
||||
|
||||
allocateLayers(blobsToKeep_);
|
||||
|
||||
MapIdToLayerData::iterator it = layers.find(0);
|
||||
CV_Assert(it != layers.end());
|
||||
it->second.skip = netInputLayer->skip;
|
||||
|
||||
initBackend();
|
||||
|
||||
if (!netWasAllocated )
|
||||
@ -1179,6 +1331,29 @@ struct Net::Impl
|
||||
MapIdToLayerData::iterator it;
|
||||
Ptr<InfEngineBackendNet> net;
|
||||
|
||||
for (it = layers.begin(); it != layers.end(); ++it)
|
||||
{
|
||||
LayerData &ld = it->second;
|
||||
if (ld.id == 0)
|
||||
{
|
||||
CV_Assert((netInputLayer->outNames.empty() && ld.outputBlobsWrappers.size() == 1) ||
|
||||
(netInputLayer->outNames.size() == ld.outputBlobsWrappers.size()));
|
||||
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
|
||||
{
|
||||
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
|
||||
dataPtr->name = netInputLayer->outNames.empty() ? ld.name : netInputLayer->outNames[i];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
|
||||
{
|
||||
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
|
||||
dataPtr->name = ld.name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (skipInfEngineInit)
|
||||
{
|
||||
Ptr<BackendNode> node = layers[lastLayerId].backendNodes[preferableBackend];
|
||||
@ -1190,11 +1365,21 @@ struct Net::Impl
|
||||
for (it = layers.begin(); it != layers.end(); ++it)
|
||||
{
|
||||
LayerData &ld = it->second;
|
||||
|
||||
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
|
||||
if (ld.id == 0)
|
||||
{
|
||||
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
|
||||
dataPtr->name = ld.id == 0 ? netInputLayer->outNames[i] : ld.name;
|
||||
for (int i = 0; i < ld.inputBlobsWrappers.size(); ++i)
|
||||
{
|
||||
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
|
||||
dataPtr->name = netInputLayer->outNames[i];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
|
||||
{
|
||||
InferenceEngine::DataPtr dataPtr = infEngineDataNode(ld.outputBlobsWrappers[i]);
|
||||
dataPtr->name = ld.name;
|
||||
}
|
||||
}
|
||||
ieNode->net->addBlobs(ld.inputBlobsWrappers);
|
||||
ieNode->net->addBlobs(ld.outputBlobsWrappers);
|
||||
@ -1210,11 +1395,11 @@ struct Net::Impl
|
||||
// some of layers is not implemented.
|
||||
|
||||
// Set of all input and output blobs wrappers for current network.
|
||||
std::map<int, Ptr<BackendWrapper> > netBlobsWrappers;
|
||||
std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
|
||||
for (it = layers.begin(); it != layers.end(); ++it)
|
||||
{
|
||||
LayerData &ld = it->second;
|
||||
if (ld.id == 0)
|
||||
if (ld.id == 0 && ld.skip)
|
||||
continue;
|
||||
bool fused = ld.skip;
|
||||
|
||||
@ -1251,20 +1436,17 @@ struct Net::Impl
|
||||
// So we need to rewrap all the external blobs.
|
||||
for (int i = 0; i < ld.inputBlobsId.size(); ++i)
|
||||
{
|
||||
int lid = ld.inputBlobsId[i].lid;
|
||||
LayerData &inpLd = layers[lid];
|
||||
auto it = netBlobsWrappers.find(lid);
|
||||
LayerPin inPin = ld.inputBlobsId[i];
|
||||
auto it = netBlobsWrappers.find(inPin);
|
||||
if (it == netBlobsWrappers.end())
|
||||
{
|
||||
ld.inputBlobsWrappers[i] = wrap(*ld.inputBlobs[i]);
|
||||
auto dataPtr = infEngineDataNode(ld.inputBlobsWrappers[i]);
|
||||
dataPtr->name = inpLd.name;
|
||||
netBlobsWrappers[lid] = ld.inputBlobsWrappers[i];
|
||||
ld.inputBlobsWrappers[i] = InfEngineBackendWrapper::create(ld.inputBlobsWrappers[i]);
|
||||
netBlobsWrappers[inPin] = ld.inputBlobsWrappers[i];
|
||||
}
|
||||
else
|
||||
ld.inputBlobsWrappers[i] = it->second;
|
||||
}
|
||||
netBlobsWrappers[ld.id] = ld.outputBlobsWrappers[0];
|
||||
netBlobsWrappers[LayerPin(ld.id, 0)] = ld.outputBlobsWrappers[0];
|
||||
|
||||
Ptr<BackendNode> node;
|
||||
if (!net.empty())
|
||||
@ -2343,7 +2525,7 @@ void Net::setInputsNames(const std::vector<String> &inputBlobNames)
|
||||
impl->netInputLayer->setNames(inputBlobNames);
|
||||
}
|
||||
|
||||
void Net::setInput(InputArray blob, const String& name)
|
||||
void Net::setInput(InputArray blob, const String& name, double scalefactor, const Scalar& mean)
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||
@ -2360,6 +2542,8 @@ void Net::setInput(InputArray blob, const String& name)
|
||||
ld.outputBlobs.resize(numInputs);
|
||||
ld.outputBlobsWrappers.resize(numInputs);
|
||||
impl->netInputLayer->inputsData.resize(numInputs);
|
||||
impl->netInputLayer->scaleFactors.resize(numInputs);
|
||||
impl->netInputLayer->means.resize(numInputs);
|
||||
|
||||
MatShape prevShape = shape(impl->netInputLayer->inputsData[pin.oid]);
|
||||
Mat blob_ = blob.getMat();
|
||||
@ -2378,6 +2562,8 @@ void Net::setInput(InputArray blob, const String& name)
|
||||
{
|
||||
ld.outputBlobsWrappers[pin.oid]->setHostDirty();
|
||||
}
|
||||
impl->netInputLayer->scaleFactors[pin.oid] = scalefactor;
|
||||
impl->netInputLayer->means[pin.oid] = mean;
|
||||
impl->netWasAllocated = impl->netWasAllocated && oldShape;
|
||||
}
|
||||
|
||||
|
@ -560,7 +560,7 @@ public:
|
||||
int ngroups = ngroups_, batchSize = input_->size[0]*ngroups;
|
||||
int outW = output_->size[3], outH = output_->size[2], outCn = output_->size[1]/ngroups;
|
||||
int width = input_->size[3], height = input_->size[2], inpCn = input_->size[1]/ngroups;
|
||||
int nstripes = nstripes_;
|
||||
const int nstripes = nstripes_;
|
||||
int kernel_w = kernel_.width, kernel_h = kernel_.height;
|
||||
int pad_w = pad_.width, pad_h = pad_.height;
|
||||
int stride_w = stride_.width, stride_h = stride_.height;
|
||||
@ -587,7 +587,6 @@ public:
|
||||
int samplesPerStripe = std::max((batchSize + nstripes - 1)/nstripes, 1);
|
||||
r.start *= samplesPerStripe;
|
||||
r.end *= samplesPerStripe;
|
||||
nstripes *= samplesPerStripe;
|
||||
stripeSize = outPlaneSize;
|
||||
}
|
||||
|
||||
@ -866,6 +865,16 @@ public:
|
||||
for (int i = 0; i < inputs.size(); ++i)
|
||||
CV_Assert(inputs[i].u != outputs[0].u);
|
||||
|
||||
if (umat_blobs.empty())
|
||||
{
|
||||
size_t n = blobs.size();
|
||||
umat_blobs.resize(n);
|
||||
for (size_t i = 0; i < n; i++)
|
||||
{
|
||||
blobs[i].copyTo(umat_blobs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (convolutionOp.empty())
|
||||
{
|
||||
OCL4DNNConvConfig config;
|
||||
@ -1637,14 +1646,6 @@ public:
|
||||
Ptr<BaseConvolutionLayer> ConvolutionLayer::create(const LayerParams ¶ms)
|
||||
{
|
||||
Ptr<ConvolutionLayerImpl> l(new ConvolutionLayerImpl(params));
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
size_t n = params.blobs.size();
|
||||
l->umat_blobs.resize(n);
|
||||
for (int i = 0; i < n; i++)
|
||||
l->umat_blobs[i] = params.blobs[i].getUMat(ACCESS_READ);
|
||||
#endif
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
|
@ -187,7 +187,7 @@ public:
|
||||
int c, j, k, n = nsrcs;
|
||||
const float* coeffsptr = coeffs && !coeffs->empty() ? &coeffs->at(0) : 0;
|
||||
float* dstptr0 = dst->ptr<float>();
|
||||
int blockSize0 = 1 << 12, blockSize = blockSize0;
|
||||
int blockSize0 = 1 << 12, blockSize;
|
||||
|
||||
for( size_t ofs = stripeStart; ofs < stripeEnd; ofs += blockSize )
|
||||
{
|
||||
|
@ -190,6 +190,7 @@ public:
|
||||
|
||||
size_t num = total(shape(inp0.size), 0, startAxis);
|
||||
size_t numPlanes = total(shape(inp0.size), startAxis, endAxis + 1);
|
||||
CV_Assert(num * numPlanes != 0);
|
||||
size_t planeSize = inp0.total() / (num * numPlanes);
|
||||
for (size_t n = 0; n < num; ++n)
|
||||
{
|
||||
|
@ -189,18 +189,16 @@ public:
|
||||
else
|
||||
outTailShape_.assign(1, _numOut);
|
||||
|
||||
int _numTimeStamps, _numSamples;
|
||||
int _numSamples;
|
||||
if (useTimestampDim)
|
||||
{
|
||||
CV_Assert(inp0.size() >= 2 && total(inp0, 2) == _numInp);
|
||||
_numTimeStamps = inp0[0];
|
||||
_numSamples = inp0[1];
|
||||
outResShape.push_back(_numTimeStamps);
|
||||
outResShape.push_back(inp0[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert(inp0.size() >= 2 && total(inp0, 1) == _numInp);
|
||||
_numTimeStamps = 1;
|
||||
_numSamples = inp0[0];
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,7 @@ namespace cv { namespace dnn {
|
||||
class ResizeLayerImpl : public ResizeLayer
|
||||
{
|
||||
public:
|
||||
ResizeLayerImpl(const LayerParams& params) : scaleWidth(0), scaleHeight(0)
|
||||
ResizeLayerImpl(const LayerParams& params) : zoomFactorWidth(0), zoomFactorHeight(0), scaleWidth(0), scaleHeight(0)
|
||||
{
|
||||
setParamsFrom(params);
|
||||
outWidth = params.get<float>("width", 0);
|
||||
|
@ -563,10 +563,10 @@ bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
|
||||
}
|
||||
|
||||
if (use_half_ && bias_half.empty() && !bias.empty())
|
||||
convertFp16((UMat&)bias, bias_half);
|
||||
convertFp16(bias, bias_half);
|
||||
|
||||
if (use_half_ && weights_half.empty())
|
||||
convertFp16((UMat&)weight, weights_half);
|
||||
convertFp16(weight, weights_half);
|
||||
|
||||
prepareKernel(bottom, top, weight, (use_half_) ? bias_half : bias, numImages);
|
||||
if (bestKernelConfig.empty())
|
||||
|
@ -68,19 +68,32 @@ static InferenceEngine::DataPtr wrapToInfEngineDataNode(const Mat& m, const std:
|
||||
{
|
||||
std::vector<size_t> reversedShape(&m.size[0], &m.size[0] + m.dims);
|
||||
std::reverse(reversedShape.begin(), reversedShape.end());
|
||||
return InferenceEngine::DataPtr(
|
||||
new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::FP32, estimateLayout(m))
|
||||
);
|
||||
if (m.type() == CV_32F)
|
||||
return InferenceEngine::DataPtr(
|
||||
new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::FP32, estimateLayout(m))
|
||||
);
|
||||
else if (m.type() == CV_8U)
|
||||
return InferenceEngine::DataPtr(
|
||||
new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::U8, estimateLayout(m))
|
||||
);
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, format("Unsupported data type %d", m.type()));
|
||||
}
|
||||
|
||||
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape,
|
||||
InferenceEngine::Layout layout)
|
||||
InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape,
|
||||
InferenceEngine::Layout layout)
|
||||
{
|
||||
return InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
|
||||
layout, shape, (float*)m.data);
|
||||
if (m.type() == CV_32F)
|
||||
return InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
|
||||
layout, shape, (float*)m.data);
|
||||
else if (m.type() == CV_8U)
|
||||
return InferenceEngine::make_shared_blob<uint8_t>(InferenceEngine::Precision::U8,
|
||||
layout, shape, (uint8_t*)m.data);
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, format("Unsupported data type %d", m.type()));
|
||||
}
|
||||
|
||||
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout)
|
||||
InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout)
|
||||
{
|
||||
std::vector<size_t> reversedShape(&m.size[0], &m.size[0] + m.dims);
|
||||
std::reverse(reversedShape.begin(), reversedShape.end());
|
||||
@ -102,6 +115,24 @@ InfEngineBackendWrapper::InfEngineBackendWrapper(int targetId, const cv::Mat& m)
|
||||
blob = wrapToInfEngineBlob(m, estimateLayout(m));
|
||||
}
|
||||
|
||||
InfEngineBackendWrapper::InfEngineBackendWrapper(Ptr<BackendWrapper> wrapper)
|
||||
: BackendWrapper(DNN_BACKEND_INFERENCE_ENGINE, wrapper->targetId)
|
||||
{
|
||||
Ptr<InfEngineBackendWrapper> ieWrapper = wrapper.dynamicCast<InfEngineBackendWrapper>();
|
||||
CV_Assert(!ieWrapper.empty());
|
||||
InferenceEngine::DataPtr srcData = ieWrapper->dataPtr;
|
||||
dataPtr = InferenceEngine::DataPtr(
|
||||
new InferenceEngine::Data(srcData->name, srcData->dims, srcData->precision,
|
||||
srcData->layout)
|
||||
);
|
||||
blob = ieWrapper->blob;
|
||||
}
|
||||
|
||||
Ptr<BackendWrapper> InfEngineBackendWrapper::create(Ptr<BackendWrapper> wrapper)
|
||||
{
|
||||
return Ptr<BackendWrapper>(new InfEngineBackendWrapper(wrapper));
|
||||
}
|
||||
|
||||
InfEngineBackendWrapper::~InfEngineBackendWrapper()
|
||||
{
|
||||
|
||||
@ -149,10 +180,15 @@ InferenceEngine::Precision InfEngineBackendNet::getPrecision() noexcept
|
||||
return precision;
|
||||
}
|
||||
|
||||
InferenceEngine::Precision InfEngineBackendNet::getPrecision() const noexcept
|
||||
{
|
||||
return precision;
|
||||
}
|
||||
|
||||
// Assume that outputs of network is unconnected blobs.
|
||||
void InfEngineBackendNet::getOutputsInfo(InferenceEngine::OutputsDataMap &outputs_) noexcept
|
||||
{
|
||||
outputs_ = outputs;
|
||||
const_cast<const InfEngineBackendNet*>(this)->getOutputsInfo(outputs_);
|
||||
}
|
||||
void InfEngineBackendNet::getOutputsInfo(InferenceEngine::OutputsDataMap &outputs_) const noexcept
|
||||
{
|
||||
@ -162,7 +198,7 @@ void InfEngineBackendNet::getOutputsInfo(InferenceEngine::OutputsDataMap &output
|
||||
// Returns input references that aren't connected to internal outputs.
|
||||
void InfEngineBackendNet::getInputsInfo(InferenceEngine::InputsDataMap &inputs_) noexcept
|
||||
{
|
||||
inputs_ = inputs;
|
||||
const_cast<const InfEngineBackendNet*>(this)->getInputsInfo(inputs_);
|
||||
}
|
||||
|
||||
// Returns input references that aren't connected to internal outputs.
|
||||
@ -173,7 +209,11 @@ void InfEngineBackendNet::getInputsInfo(InferenceEngine::InputsDataMap &inputs_)
|
||||
|
||||
InferenceEngine::InputInfo::Ptr InfEngineBackendNet::getInput(const std::string &inputName) noexcept
|
||||
{
|
||||
getInputsInfo(inputs);
|
||||
return const_cast<const InfEngineBackendNet*>(this)->getInput(inputName);
|
||||
}
|
||||
|
||||
InferenceEngine::InputInfo::Ptr InfEngineBackendNet::getInput(const std::string &inputName) const noexcept
|
||||
{
|
||||
const auto& it = inputs.find(inputName);
|
||||
CV_Assert(it != inputs.end());
|
||||
return it->second;
|
||||
@ -187,7 +227,17 @@ void InfEngineBackendNet::getName(char*, size_t) const noexcept
|
||||
{
|
||||
}
|
||||
|
||||
const std::string& InfEngineBackendNet::getName() const noexcept
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
size_t InfEngineBackendNet::layerCount() noexcept
|
||||
{
|
||||
return const_cast<const InfEngineBackendNet*>(this)->layerCount();
|
||||
}
|
||||
|
||||
size_t InfEngineBackendNet::layerCount() const noexcept
|
||||
{
|
||||
return layers.size();
|
||||
}
|
||||
@ -227,6 +277,13 @@ InfEngineBackendNet::addOutput(const std::string &layerName, size_t outputIndex,
|
||||
InferenceEngine::StatusCode
|
||||
InfEngineBackendNet::getLayerByName(const char *layerName, InferenceEngine::CNNLayerPtr &out,
|
||||
InferenceEngine::ResponseDesc *resp) noexcept
|
||||
{
|
||||
return const_cast<const InfEngineBackendNet*>(this)->getLayerByName(layerName, out, resp);
|
||||
}
|
||||
|
||||
InferenceEngine::StatusCode InfEngineBackendNet::getLayerByName(const char *layerName,
|
||||
InferenceEngine::CNNLayerPtr &out,
|
||||
InferenceEngine::ResponseDesc *resp) const noexcept
|
||||
{
|
||||
for (auto& l : layers)
|
||||
{
|
||||
@ -254,7 +311,12 @@ InferenceEngine::TargetDevice InfEngineBackendNet::getTargetDevice() noexcept
|
||||
return targetDevice;
|
||||
}
|
||||
|
||||
InferenceEngine::StatusCode InfEngineBackendNet::setBatchSize(const size_t size) noexcept
|
||||
InferenceEngine::TargetDevice InfEngineBackendNet::getTargetDevice() const noexcept
|
||||
{
|
||||
return targetDevice;
|
||||
}
|
||||
|
||||
InferenceEngine::StatusCode InfEngineBackendNet::setBatchSize(const size_t) noexcept
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
return InferenceEngine::StatusCode::OK;
|
||||
@ -329,6 +391,7 @@ void InfEngineBackendNet::init(int targetId)
|
||||
{
|
||||
CV_Assert(allBlobs.find(it.first) != allBlobs.end());
|
||||
inpBlobs[it.first] = allBlobs[it.first];
|
||||
it.second->setPrecision(inpBlobs[it.first]->precision());
|
||||
}
|
||||
|
||||
// Set up output blobs.
|
||||
@ -342,7 +405,9 @@ void InfEngineBackendNet::init(int targetId)
|
||||
switch (targetId)
|
||||
{
|
||||
case DNN_TARGET_CPU: setTargetDevice(InferenceEngine::TargetDevice::eCPU); break;
|
||||
case DNN_TARGET_OPENCL_FP16: setPrecision(InferenceEngine::Precision::FP16); // Fallback to the next.
|
||||
case DNN_TARGET_OPENCL_FP16:
|
||||
setPrecision(InferenceEngine::Precision::FP16);
|
||||
/* Falls through. */
|
||||
case DNN_TARGET_OPENCL: setTargetDevice(InferenceEngine::TargetDevice::eGPU); break;
|
||||
case DNN_TARGET_MYRIAD:
|
||||
{
|
||||
@ -363,9 +428,8 @@ void InfEngineBackendNet::initPlugin(InferenceEngine::ICNNNetwork& net)
|
||||
|
||||
try
|
||||
{
|
||||
static std::map<std::string, InferenceEngine::InferenceEnginePluginPtr> sharedPlugins;
|
||||
std::string deviceName = InferenceEngine::getDeviceName(targetDevice);
|
||||
auto pluginIt = sharedPlugins.find(deviceName);
|
||||
static std::map<InferenceEngine::TargetDevice, InferenceEngine::InferenceEnginePluginPtr> sharedPlugins;
|
||||
auto pluginIt = sharedPlugins.find(targetDevice);
|
||||
if (pluginIt != sharedPlugins.end())
|
||||
{
|
||||
enginePtr = pluginIt->second;
|
||||
@ -373,7 +437,7 @@ void InfEngineBackendNet::initPlugin(InferenceEngine::ICNNNetwork& net)
|
||||
else
|
||||
{
|
||||
enginePtr = InferenceEngine::PluginDispatcher({""}).getSuitablePlugin(targetDevice);
|
||||
sharedPlugins[deviceName] = enginePtr;
|
||||
sharedPlugins[targetDevice] = enginePtr;
|
||||
|
||||
if (targetDevice == InferenceEngine::TargetDevice::eCPU)
|
||||
{
|
||||
@ -427,7 +491,7 @@ void InfEngineBackendNet::addBlobs(const std::vector<Ptr<BackendWrapper> >& ptrs
|
||||
auto wrappers = infEngineWrappers(ptrs);
|
||||
for (const auto& wrapper : wrappers)
|
||||
{
|
||||
allBlobs[wrapper->dataPtr->name] = wrapper->blob;
|
||||
allBlobs.insert({wrapper->dataPtr->name, wrapper->blob});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,8 @@
|
||||
#ifndef __OPENCV_DNN_OP_INF_ENGINE_HPP__
|
||||
#define __OPENCV_DNN_OP_INF_ENGINE_HPP__
|
||||
|
||||
#include "opencv2/core/cvdef.h"
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#if defined(__GNUC__) && __GNUC__ >= 5
|
||||
//#pragma GCC diagnostic push
|
||||
@ -34,7 +36,9 @@ public:
|
||||
|
||||
void setPrecision(InferenceEngine::Precision p) noexcept;
|
||||
|
||||
virtual InferenceEngine::Precision getPrecision() noexcept CV_OVERRIDE;
|
||||
virtual InferenceEngine::Precision getPrecision() noexcept;
|
||||
|
||||
virtual InferenceEngine::Precision getPrecision() const noexcept;
|
||||
|
||||
virtual void getOutputsInfo(InferenceEngine::OutputsDataMap &out) noexcept /*CV_OVERRIDE*/;
|
||||
|
||||
@ -44,13 +48,19 @@ public:
|
||||
|
||||
virtual void getInputsInfo(InferenceEngine::InputsDataMap &inputs) const noexcept /*CV_OVERRIDE*/;
|
||||
|
||||
virtual InferenceEngine::InputInfo::Ptr getInput(const std::string &inputName) noexcept CV_OVERRIDE;
|
||||
virtual InferenceEngine::InputInfo::Ptr getInput(const std::string &inputName) noexcept;
|
||||
|
||||
virtual InferenceEngine::InputInfo::Ptr getInput(const std::string &inputName) const noexcept;
|
||||
|
||||
virtual void getName(char *pName, size_t len) noexcept;
|
||||
|
||||
virtual void getName(char *pName, size_t len) const noexcept;
|
||||
|
||||
virtual size_t layerCount() noexcept CV_OVERRIDE;
|
||||
virtual const std::string& getName() const noexcept;
|
||||
|
||||
virtual size_t layerCount() noexcept;
|
||||
|
||||
virtual size_t layerCount() const noexcept;
|
||||
|
||||
virtual InferenceEngine::DataPtr& getData(const char *dname) noexcept CV_OVERRIDE;
|
||||
|
||||
@ -58,15 +68,21 @@ public:
|
||||
|
||||
virtual InferenceEngine::StatusCode addOutput(const std::string &layerName,
|
||||
size_t outputIndex = 0,
|
||||
InferenceEngine::ResponseDesc *resp = nullptr) noexcept CV_OVERRIDE;
|
||||
InferenceEngine::ResponseDesc *resp = nullptr) noexcept;
|
||||
|
||||
virtual InferenceEngine::StatusCode getLayerByName(const char *layerName,
|
||||
InferenceEngine::CNNLayerPtr &out,
|
||||
InferenceEngine::ResponseDesc *resp) noexcept CV_OVERRIDE;
|
||||
InferenceEngine::ResponseDesc *resp) noexcept;
|
||||
|
||||
virtual InferenceEngine::StatusCode getLayerByName(const char *layerName,
|
||||
InferenceEngine::CNNLayerPtr &out,
|
||||
InferenceEngine::ResponseDesc *resp) const noexcept;
|
||||
|
||||
virtual void setTargetDevice(InferenceEngine::TargetDevice device) noexcept CV_OVERRIDE;
|
||||
|
||||
virtual InferenceEngine::TargetDevice getTargetDevice() noexcept CV_OVERRIDE;
|
||||
virtual InferenceEngine::TargetDevice getTargetDevice() noexcept;
|
||||
|
||||
virtual InferenceEngine::TargetDevice getTargetDevice() const noexcept;
|
||||
|
||||
virtual InferenceEngine::StatusCode setBatchSize(const size_t size) noexcept CV_OVERRIDE;
|
||||
|
||||
@ -94,6 +110,8 @@ private:
|
||||
InferenceEngine::ExecutableNetwork netExec;
|
||||
InferenceEngine::InferRequest infRequest;
|
||||
|
||||
std::string name;
|
||||
|
||||
void initPlugin(InferenceEngine::ICNNNetwork& net);
|
||||
};
|
||||
|
||||
@ -115,19 +133,23 @@ class InfEngineBackendWrapper : public BackendWrapper
|
||||
public:
|
||||
InfEngineBackendWrapper(int targetId, const Mat& m);
|
||||
|
||||
InfEngineBackendWrapper(Ptr<BackendWrapper> wrapper);
|
||||
|
||||
~InfEngineBackendWrapper();
|
||||
|
||||
static Ptr<BackendWrapper> create(Ptr<BackendWrapper> wrapper);
|
||||
|
||||
virtual void copyToHost() CV_OVERRIDE;
|
||||
|
||||
virtual void setHostDirty() CV_OVERRIDE;
|
||||
|
||||
InferenceEngine::DataPtr dataPtr;
|
||||
InferenceEngine::TBlob<float>::Ptr blob;
|
||||
InferenceEngine::Blob::Ptr blob;
|
||||
};
|
||||
|
||||
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout = InferenceEngine::Layout::ANY);
|
||||
InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, InferenceEngine::Layout layout = InferenceEngine::Layout::ANY);
|
||||
|
||||
InferenceEngine::TBlob<float>::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape, InferenceEngine::Layout layout);
|
||||
InferenceEngine::Blob::Ptr wrapToInfEngineBlob(const Mat& m, const std::vector<size_t>& shape, InferenceEngine::Layout layout);
|
||||
|
||||
InferenceEngine::DataPtr infEngineDataNode(const Ptr<BackendWrapper>& ptr);
|
||||
|
||||
|
@ -771,6 +771,13 @@ void TFImporter::populateNet(Net dstNet)
|
||||
type = layer.op();
|
||||
}
|
||||
|
||||
// For the object detection networks, TensorFlow Object Detection API
|
||||
// predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax)
|
||||
// order. We can manage it at DetectionOutput layer parsing predictions
|
||||
// or shuffle last convolution's weights.
|
||||
bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") &&
|
||||
getLayerAttr(layer, "loc_pred_transposed").b();
|
||||
|
||||
layerParams.set("bias_term", false);
|
||||
layerParams.blobs.resize(1);
|
||||
|
||||
@ -784,18 +791,32 @@ void TFImporter::populateNet(Net dstNet)
|
||||
blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]);
|
||||
ExcludeLayer(net, weights_layer_index, 0, false);
|
||||
layers_to_ignore.insert(next_layers[0].first);
|
||||
|
||||
// Shuffle bias from yxYX to xyXY.
|
||||
if (locPredTransposed)
|
||||
{
|
||||
const int numWeights = layerParams.blobs[1].total();
|
||||
float* biasData = reinterpret_cast<float*>(layerParams.blobs[1].data);
|
||||
CV_Assert(numWeights % 4 == 0);
|
||||
for (int i = 0; i < numWeights; i += 2)
|
||||
{
|
||||
std::swap(biasData[i], biasData[i + 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id);
|
||||
kernelFromTensor(kernelTensor, layerParams.blobs[0]);
|
||||
releaseTensor(const_cast<tensorflow::TensorProto*>(&kernelTensor));
|
||||
int* kshape = layerParams.blobs[0].size.p;
|
||||
const int outCh = kshape[0];
|
||||
const int inCh = kshape[1];
|
||||
const int height = kshape[2];
|
||||
const int width = kshape[3];
|
||||
if (type == "DepthwiseConv2dNative")
|
||||
{
|
||||
CV_Assert(!locPredTransposed);
|
||||
const int chMultiplier = kshape[0];
|
||||
const int inCh = kshape[1];
|
||||
const int height = kshape[2];
|
||||
const int width = kshape[3];
|
||||
|
||||
Mat copy = layerParams.blobs[0].clone();
|
||||
float* src = (float*)copy.data;
|
||||
@ -814,9 +835,21 @@ void TFImporter::populateNet(Net dstNet)
|
||||
size_t* kstep = layerParams.blobs[0].step.p;
|
||||
kstep[0] = kstep[1]; // fix steps too
|
||||
}
|
||||
layerParams.set("kernel_h", kshape[2]);
|
||||
layerParams.set("kernel_w", kshape[3]);
|
||||
layerParams.set("num_output", kshape[0]);
|
||||
layerParams.set("kernel_h", height);
|
||||
layerParams.set("kernel_w", width);
|
||||
layerParams.set("num_output", outCh);
|
||||
|
||||
// Shuffle output channels from yxYX to xyXY.
|
||||
if (locPredTransposed)
|
||||
{
|
||||
const int slice = height * width * inCh;
|
||||
for (int i = 0; i < outCh; i += 2)
|
||||
{
|
||||
cv::Mat src(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i));
|
||||
cv::Mat dst(1, slice, CV_32F, layerParams.blobs[0].ptr<float>(i + 1));
|
||||
std::swap_ranges(src.begin<float>(), src.end<float>(), dst.begin<float>());
|
||||
}
|
||||
}
|
||||
|
||||
setStrides(layerParams, layer);
|
||||
setPadding(layerParams, layer);
|
||||
|
@ -107,12 +107,10 @@ TEST_P(Convolution, Accuracy)
|
||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD)
|
||||
throw SkipTestException("");
|
||||
|
||||
// TODO: unstable test cases
|
||||
if (backendId == DNN_BACKEND_OPENCV && (targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) &&
|
||||
inChannels == 6 && outChannels == 9 && group == 1 && inSize == Size(5, 6) &&
|
||||
kernel == Size(3, 1) && stride == Size(1, 1) && pad == Size(0, 1) && dilation == Size(1, 1) &&
|
||||
hasBias)
|
||||
throw SkipTestException("");
|
||||
if (cvtest::skipUnstableTests && backendId == DNN_BACKEND_OPENCV &&
|
||||
(targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) &&
|
||||
kernel == Size(3, 1) && stride == Size(1, 1) && pad == Size(0, 1))
|
||||
throw SkipTestException("Skip unstable test");
|
||||
|
||||
int sz[] = {outChannels, inChannels / group, kernel.height, kernel.width};
|
||||
Mat weights(4, &sz[0], CV_32F);
|
||||
|
238
modules/dnn/test/test_ie_models.cpp
Normal file
238
modules/dnn/test/test_ie_models.cpp
Normal file
@ -0,0 +1,238 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
//
|
||||
// Copyright (C) 2018, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
#include "test_precomp.hpp"
|
||||
|
||||
#ifdef HAVE_INF_ENGINE
|
||||
#include <opencv2/core/utils/filesystem.hpp>
|
||||
|
||||
#include <inference_engine.hpp>
|
||||
#include <ie_icnn_network.hpp>
|
||||
#include <ie_extension.h>
|
||||
|
||||
namespace opencv_test { namespace {
|
||||
|
||||
static void initDLDTDataPath()
|
||||
{
|
||||
#ifndef WINRT
|
||||
static bool initialized = false;
|
||||
if (!initialized)
|
||||
{
|
||||
const char* dldtTestDataPath = getenv("INTEL_CVSDK_DIR");
|
||||
if (dldtTestDataPath)
|
||||
cvtest::addDataSearchPath(cv::utils::fs::join(dldtTestDataPath, "deployment_tools"));
|
||||
initialized = true;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::dnn;
|
||||
using namespace InferenceEngine;
|
||||
|
||||
static inline void genData(const std::vector<size_t>& dims, Mat& m, Blob::Ptr& dataPtr)
|
||||
{
|
||||
std::vector<int> reversedDims(dims.begin(), dims.end());
|
||||
std::reverse(reversedDims.begin(), reversedDims.end());
|
||||
|
||||
m.create(reversedDims, CV_32F);
|
||||
randu(m, -1, 1);
|
||||
|
||||
dataPtr = make_shared_blob<float>(Precision::FP32, dims, (float*)m.data);
|
||||
}
|
||||
|
||||
void runIE(Target target, const std::string& xmlPath, const std::string& binPath,
|
||||
std::map<std::string, cv::Mat>& inputsMap, std::map<std::string, cv::Mat>& outputsMap)
|
||||
{
|
||||
CNNNetReader reader;
|
||||
reader.ReadNetwork(xmlPath);
|
||||
reader.ReadWeights(binPath);
|
||||
|
||||
CNNNetwork net = reader.getNetwork();
|
||||
|
||||
InferenceEnginePluginPtr enginePtr;
|
||||
InferencePlugin plugin;
|
||||
ExecutableNetwork netExec;
|
||||
InferRequest infRequest;
|
||||
TargetDevice targetDevice;
|
||||
switch (target)
|
||||
{
|
||||
case DNN_TARGET_CPU:
|
||||
targetDevice = TargetDevice::eCPU;
|
||||
break;
|
||||
case DNN_TARGET_OPENCL:
|
||||
case DNN_TARGET_OPENCL_FP16:
|
||||
targetDevice = TargetDevice::eGPU;
|
||||
break;
|
||||
case DNN_TARGET_MYRIAD:
|
||||
targetDevice = TargetDevice::eMYRIAD;
|
||||
break;
|
||||
default:
|
||||
CV_Error(Error::StsNotImplemented, "Unknown target");
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
enginePtr = PluginDispatcher({""}).getSuitablePlugin(targetDevice);
|
||||
|
||||
if (targetDevice == TargetDevice::eCPU)
|
||||
{
|
||||
std::string suffixes[] = {"_avx2", "_sse4", ""};
|
||||
bool haveFeature[] = {
|
||||
checkHardwareSupport(CPU_AVX2),
|
||||
checkHardwareSupport(CPU_SSE4_2),
|
||||
true
|
||||
};
|
||||
for (int i = 0; i < 3; ++i)
|
||||
{
|
||||
if (!haveFeature[i])
|
||||
continue;
|
||||
#ifdef _WIN32
|
||||
std::string libName = "cpu_extension" + suffixes[i] + ".dll";
|
||||
#else
|
||||
std::string libName = "libcpu_extension" + suffixes[i] + ".so";
|
||||
#endif // _WIN32
|
||||
try
|
||||
{
|
||||
IExtensionPtr extension = make_so_pointer<IExtension>(libName);
|
||||
enginePtr->AddExtension(extension, 0);
|
||||
break;
|
||||
}
|
||||
catch(...) {}
|
||||
}
|
||||
// Some of networks can work without a library of extra layers.
|
||||
}
|
||||
plugin = InferencePlugin(enginePtr);
|
||||
|
||||
netExec = plugin.LoadNetwork(net, {});
|
||||
infRequest = netExec.CreateInferRequest();
|
||||
}
|
||||
catch (const std::exception& ex)
|
||||
{
|
||||
CV_Error(Error::StsAssert, format("Failed to initialize Inference Engine backend: %s", ex.what()));
|
||||
}
|
||||
|
||||
// Fill input blobs.
|
||||
inputsMap.clear();
|
||||
BlobMap inputBlobs;
|
||||
for (auto& it : net.getInputsInfo())
|
||||
{
|
||||
genData(it.second->getDims(), inputsMap[it.first], inputBlobs[it.first]);
|
||||
}
|
||||
infRequest.SetInput(inputBlobs);
|
||||
|
||||
// Fill output blobs.
|
||||
outputsMap.clear();
|
||||
BlobMap outputBlobs;
|
||||
for (auto& it : net.getOutputsInfo())
|
||||
{
|
||||
genData(it.second->dims, outputsMap[it.first], outputBlobs[it.first]);
|
||||
}
|
||||
infRequest.SetOutput(outputBlobs);
|
||||
|
||||
infRequest.Infer();
|
||||
}
|
||||
|
||||
std::vector<String> getOutputsNames(const Net& net)
|
||||
{
|
||||
std::vector<String> names;
|
||||
if (names.empty())
|
||||
{
|
||||
std::vector<int> outLayers = net.getUnconnectedOutLayers();
|
||||
std::vector<String> layersNames = net.getLayerNames();
|
||||
names.resize(outLayers.size());
|
||||
for (size_t i = 0; i < outLayers.size(); ++i)
|
||||
names[i] = layersNames[outLayers[i] - 1];
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
void runCV(Target target, const std::string& xmlPath, const std::string& binPath,
|
||||
const std::map<std::string, cv::Mat>& inputsMap,
|
||||
std::map<std::string, cv::Mat>& outputsMap)
|
||||
{
|
||||
Net net = readNet(xmlPath, binPath);
|
||||
for (auto& it : inputsMap)
|
||||
net.setInput(it.second, it.first);
|
||||
net.setPreferableTarget(target);
|
||||
|
||||
std::vector<String> outNames = getOutputsNames(net);
|
||||
std::vector<Mat> outs;
|
||||
net.forward(outs, outNames);
|
||||
|
||||
outputsMap.clear();
|
||||
EXPECT_EQ(outs.size(), outNames.size());
|
||||
for (int i = 0; i < outs.size(); ++i)
|
||||
{
|
||||
EXPECT_TRUE(outputsMap.insert({outNames[i], outs[i]}).second);
|
||||
}
|
||||
}
|
||||
|
||||
typedef TestWithParam<tuple<Target, String> > DNNTestOpenVINO;
|
||||
TEST_P(DNNTestOpenVINO, models)
|
||||
{
|
||||
Target target = (dnn::Target)(int)get<0>(GetParam());
|
||||
std::string modelName = get<1>(GetParam());
|
||||
|
||||
if (modelName == "semantic-segmentation-adas-0001" && target == DNN_TARGET_OPENCL_FP16)
|
||||
throw SkipTestException("");
|
||||
|
||||
std::string precision = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? "FP16" : "FP32";
|
||||
std::string prefix = utils::fs::join("intel_models",
|
||||
utils::fs::join(modelName,
|
||||
utils::fs::join(precision, modelName)));
|
||||
std::string xmlPath = findDataFile(prefix + ".xml");
|
||||
std::string binPath = findDataFile(prefix + ".bin");
|
||||
|
||||
std::map<std::string, cv::Mat> inputsMap;
|
||||
std::map<std::string, cv::Mat> ieOutputsMap, cvOutputsMap;
|
||||
runIE(target, xmlPath, binPath, inputsMap, ieOutputsMap);
|
||||
runCV(target, xmlPath, binPath, inputsMap, cvOutputsMap);
|
||||
|
||||
EXPECT_EQ(ieOutputsMap.size(), cvOutputsMap.size());
|
||||
for (auto& srcIt : ieOutputsMap)
|
||||
{
|
||||
auto dstIt = cvOutputsMap.find(srcIt.first);
|
||||
CV_Assert(dstIt != cvOutputsMap.end());
|
||||
double normInf = cvtest::norm(srcIt.second, dstIt->second, cv::NORM_INF);
|
||||
EXPECT_EQ(normInf, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static testing::internal::ParamGenerator<String> intelModels()
|
||||
{
|
||||
initDLDTDataPath();
|
||||
std::vector<String> modelsNames;
|
||||
|
||||
std::string path;
|
||||
try
|
||||
{
|
||||
path = findDataDirectory("intel_models", false);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << "ERROR: Can't find OpenVINO models. Check INTEL_CVSDK_DIR environment variable (run setup.sh)" << std::endl;
|
||||
return ValuesIn(modelsNames); // empty list
|
||||
}
|
||||
|
||||
cv::utils::fs::glob_relative(path, "", modelsNames, false, true);
|
||||
|
||||
modelsNames.erase(
|
||||
std::remove_if(modelsNames.begin(), modelsNames.end(),
|
||||
[&](const String& dir){ return !utils::fs::isDirectory(utils::fs::join(path, dir)); }),
|
||||
modelsNames.end()
|
||||
);
|
||||
CV_Assert(!modelsNames.empty());
|
||||
|
||||
return ValuesIn(modelsNames);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, DNNTestOpenVINO, Combine(
|
||||
Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL, DNN_TARGET_OPENCL_FP16), intelModels()
|
||||
));
|
||||
|
||||
}}
|
||||
#endif // HAVE_INF_ENGINE
|
@ -291,7 +291,7 @@ TEST_P(Test_Caffe_layers, Fused_Concat)
|
||||
|
||||
TEST_P(Test_Caffe_layers, Eltwise)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
||||
throw SkipTestException("");
|
||||
testLayerUsingCaffeModels("layer_eltwise");
|
||||
}
|
||||
@ -939,6 +939,25 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
|
||||
ASSERT_EQ(net.getLayer(outLayers[0])->type, "Concat");
|
||||
}
|
||||
|
||||
TEST(Layer_Test_Convolution_DLDT, setInput_uint8)
|
||||
{
|
||||
Mat inp = blobFromNPY(_tf("blob.npy"));
|
||||
|
||||
Mat inputs[] = {Mat(inp.dims, inp.size, CV_8U), Mat()};
|
||||
randu(inputs[0], 0, 255);
|
||||
inputs[0].convertTo(inputs[1], CV_32F);
|
||||
|
||||
Mat outs[2];
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
Net net = readNet(_tf("layer_convolution.xml"), _tf("layer_convolution.bin"));
|
||||
net.setInput(inputs[i]);
|
||||
outs[i] = net.forward();
|
||||
ASSERT_EQ(outs[i].type(), CV_32F);
|
||||
}
|
||||
normAssert(outs[0], outs[1]);
|
||||
}
|
||||
|
||||
// 1. Create a .prototxt file with the following network:
|
||||
// layer {
|
||||
// type: "Input" name: "data" top: "data"
|
||||
@ -961,22 +980,65 @@ TEST(Layer_Test_Convolution_DLDT, Accuracy)
|
||||
// net.save('/path/to/caffemodel')
|
||||
//
|
||||
// 3. Convert using ModelOptimizer.
|
||||
TEST(Test_DLDT, two_inputs)
|
||||
typedef testing::TestWithParam<tuple<int, int> > Test_DLDT_two_inputs;
|
||||
TEST_P(Test_DLDT_two_inputs, as_IR)
|
||||
{
|
||||
int firstInpType = get<0>(GetParam());
|
||||
int secondInpType = get<1>(GetParam());
|
||||
// TODO: It looks like a bug in Inference Engine.
|
||||
if (secondInpType == CV_8U)
|
||||
throw SkipTestException("");
|
||||
|
||||
Net net = readNet(_tf("net_two_inputs.xml"), _tf("net_two_inputs.bin"));
|
||||
int inpSize[] = {1, 2, 3};
|
||||
Mat firstInp(3, &inpSize[0], CV_32F);
|
||||
Mat secondInp(3, &inpSize[0], CV_32F);
|
||||
randu(firstInp, -1, 1);
|
||||
randu(secondInp, -1, 1);
|
||||
Mat firstInp(3, &inpSize[0], firstInpType);
|
||||
Mat secondInp(3, &inpSize[0], secondInpType);
|
||||
randu(firstInp, 0, 255);
|
||||
randu(secondInp, 0, 255);
|
||||
|
||||
net.setInput(firstInp, "data");
|
||||
net.setInput(secondInp, "second_input");
|
||||
Mat out = net.forward();
|
||||
|
||||
normAssert(out, firstInp + secondInp);
|
||||
Mat ref;
|
||||
cv::add(firstInp, secondInp, ref, Mat(), CV_32F);
|
||||
normAssert(out, ref);
|
||||
}
|
||||
|
||||
TEST_P(Test_DLDT_two_inputs, as_backend)
|
||||
{
|
||||
static const float kScale = 0.5f;
|
||||
static const float kScaleInv = 1.0f / kScale;
|
||||
|
||||
Net net;
|
||||
LayerParams lp;
|
||||
lp.type = "Eltwise";
|
||||
lp.name = "testLayer";
|
||||
lp.set("operation", "sum");
|
||||
int eltwiseId = net.addLayerToPrev(lp.name, lp.type, lp); // connect to a first input
|
||||
net.connect(0, 1, eltwiseId, 1); // connect to a second input
|
||||
|
||||
int inpSize[] = {1, 2, 3};
|
||||
Mat firstInp(3, &inpSize[0], get<0>(GetParam()));
|
||||
Mat secondInp(3, &inpSize[0], get<1>(GetParam()));
|
||||
randu(firstInp, 0, 255);
|
||||
randu(secondInp, 0, 255);
|
||||
|
||||
net.setInputsNames({"data", "second_input"});
|
||||
net.setInput(firstInp, "data", kScale);
|
||||
net.setInput(secondInp, "second_input", kScaleInv);
|
||||
net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
|
||||
Mat out = net.forward();
|
||||
|
||||
Mat ref;
|
||||
addWeighted(firstInp, kScale, secondInp, kScaleInv, 0, ref, CV_32F);
|
||||
normAssert(out, ref);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/*nothing*/, Test_DLDT_two_inputs, Combine(
|
||||
Values(CV_8U, CV_32F), Values(CV_8U, CV_32F)
|
||||
));
|
||||
|
||||
class UnsupportedLayer : public Layer
|
||||
{
|
||||
public:
|
||||
|
@ -138,4 +138,44 @@ TEST(LayerFactory, custom_layers)
|
||||
LayerFactory::unregisterLayer("CustomType");
|
||||
}
|
||||
|
||||
typedef testing::TestWithParam<tuple<float, Vec3f, int, tuple<Backend, Target> > > setInput;
|
||||
TEST_P(setInput, normalization)
|
||||
{
|
||||
const float kScale = get<0>(GetParam());
|
||||
const Scalar kMean = get<1>(GetParam());
|
||||
const int dtype = get<2>(GetParam());
|
||||
const int backend = get<0>(get<3>(GetParam()));
|
||||
const int target = get<1>(get<3>(GetParam()));
|
||||
const bool kSwapRB = true;
|
||||
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD && !checkMyriadTarget())
|
||||
throw SkipTestException("Myriad is not available/disabled in OpenCV");
|
||||
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16 && dtype != CV_32F)
|
||||
throw SkipTestException("");
|
||||
|
||||
Mat inp(5, 5, CV_8UC3);
|
||||
randu(inp, 0, 255);
|
||||
Mat ref = blobFromImage(inp, kScale, Size(), kMean, kSwapRB, /*crop*/false);
|
||||
|
||||
LayerParams lp;
|
||||
Net net;
|
||||
net.addLayerToPrev("testLayer", "Identity", lp);
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
|
||||
Mat blob = blobFromImage(inp, 1.0, Size(), Scalar(), kSwapRB, /*crop*/false, dtype);
|
||||
ASSERT_EQ(blob.type(), dtype);
|
||||
net.setInput(blob, "", kScale, kMean);
|
||||
Mat out = net.forward();
|
||||
ASSERT_EQ(out.type(), CV_32F);
|
||||
normAssert(ref, out, "", 4e-4, 1e-3);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(/**/, setInput, Combine(
|
||||
Values(1.0f, 1.0 / 127.5),
|
||||
Values(Vec3f(), Vec3f(50, 50, 50), Vec3f(10, 50, 140)),
|
||||
Values(CV_32F, CV_8U),
|
||||
dnnBackendsAndTargets()
|
||||
));
|
||||
|
||||
}} // namespace
|
||||
|
@ -309,7 +309,7 @@ TEST_P(Test_TensorFlow_nets, Inception_v2_SSD)
|
||||
0, 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527,
|
||||
0, 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384);
|
||||
double scoreDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 5e-3 : default_l1;
|
||||
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.025 : default_lInf;
|
||||
double iouDiff = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.09 : default_lInf;
|
||||
normAssertDetections(ref, out, "", 0.5, scoreDiff, iouDiff);
|
||||
}
|
||||
|
||||
|
@ -1236,7 +1236,6 @@ BriskScaleSpace::isMax2D(const int layer, const int x_layer, const int y_layer)
|
||||
{
|
||||
// in this case, we have to analyze the situation more carefully:
|
||||
// the values are gaussian blurred and then we really decide
|
||||
data = scores.ptr() + y_layer * scorescols + x_layer;
|
||||
int smoothedcenter = 4 * center + 2 * (s_10 + s10 + s0_1 + s01) + s_1_1 + s1_1 + s_11 + s11;
|
||||
for (unsigned int i = 0; i < deltasize; i += 2)
|
||||
{
|
||||
@ -1312,8 +1311,7 @@ BriskScaleSpace::refine3D(const int layer, const int x_layer, const int y_layer,
|
||||
int s_2_2 = l.getAgastScore_5_8(x_layer + 1, y_layer + 1, 1);
|
||||
max_below = std::max(s_2_2, max_below);
|
||||
|
||||
max_below_float = subpixel2D(s_0_0, s_0_1, s_0_2, s_1_0, s_1_1, s_1_2, s_2_0, s_2_1, s_2_2, delta_x_below,
|
||||
delta_y_below);
|
||||
subpixel2D(s_0_0, s_0_1, s_0_2, s_1_0, s_1_1, s_1_2, s_2_0, s_2_1, s_2_2, delta_x_below, delta_y_below);
|
||||
max_below_float = (float)max_below;
|
||||
}
|
||||
else
|
||||
|
@ -373,8 +373,6 @@ void KAZEFeatures::Determinant_Hessian(std::vector<KeyPoint>& kpts)
|
||||
is_out = true;
|
||||
}
|
||||
|
||||
is_out = false;
|
||||
|
||||
if (is_out == false) {
|
||||
if (is_repeated == false) {
|
||||
kpts.push_back(kpts_par_[i][j]);
|
||||
|
@ -175,7 +175,6 @@ std::map<int, ExifEntry_t > ExifReader::getExif()
|
||||
CV_THROW (ExifParsingError());
|
||||
}
|
||||
m_stream.read( reinterpret_cast<char*>(&m_data[0]), exifSize - offsetToTiffHeader );
|
||||
count = m_stream.gcount();
|
||||
exifFound = true;
|
||||
break;
|
||||
|
||||
|
@ -265,7 +265,7 @@ bool BmpDecoder::readData( Mat& img )
|
||||
for(;;)
|
||||
{
|
||||
int code = m_strm.getWord();
|
||||
int len = code & 255;
|
||||
const int len = code & 255;
|
||||
code >>= 8;
|
||||
if( len != 0 ) // encoded mode
|
||||
{
|
||||
@ -304,16 +304,13 @@ bool BmpDecoder::readData( Mat& img )
|
||||
else
|
||||
{
|
||||
int x_shift3 = (int)(line_end - data);
|
||||
int y_shift = m_height - y;
|
||||
|
||||
if( code == 2 )
|
||||
{
|
||||
x_shift3 = m_strm.getByte()*nch;
|
||||
y_shift = m_strm.getByte();
|
||||
m_strm.getByte();
|
||||
}
|
||||
|
||||
len = x_shift3 + ((y_shift * width3) & ((code == 0) - 1));
|
||||
|
||||
if( color )
|
||||
data = FillUniColor( data, line_end, step, width3,
|
||||
y, m_height, x_shift3,
|
||||
|
@ -689,7 +689,7 @@ bool PAMEncoder::write( const Mat& img, const std::vector<int>& params )
|
||||
tmp += sprintf( buffer + tmp, "MAXVAL %d\n", (1 << img.elemSize1()*8) - 1);
|
||||
if (fmt)
|
||||
tmp += sprintf( buffer + tmp, "TUPLTYPE %s\n", fmt->name );
|
||||
tmp += sprintf( buffer + tmp, "ENDHDR\n" );
|
||||
sprintf( buffer + tmp, "ENDHDR\n" );
|
||||
|
||||
strm.putBytes( buffer, (int)strlen(buffer) );
|
||||
/* write data */
|
||||
|
@ -255,22 +255,21 @@ bool TiffDecoder::readHeader()
|
||||
{
|
||||
case 8:
|
||||
m_type = CV_MAKETYPE(CV_8U, photometric > 1 ? wanted_channels : 1);
|
||||
result = true;
|
||||
break;
|
||||
case 16:
|
||||
m_type = CV_MAKETYPE(CV_16U, photometric > 1 ? wanted_channels : 1);
|
||||
result = true;
|
||||
break;
|
||||
|
||||
case 32:
|
||||
m_type = CV_MAKETYPE(CV_32F, photometric > 1 ? 3 : 1);
|
||||
result = true;
|
||||
break;
|
||||
case 64:
|
||||
m_type = CV_MAKETYPE(CV_64F, photometric > 1 ? 3 : 1);
|
||||
result = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
result = false;
|
||||
}
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -855,7 +855,6 @@ icvTraceContour_32s( int *ptr, int step, int *stop_ptr, int is_hole )
|
||||
for( ;; )
|
||||
{
|
||||
CV_Assert(i3 != NULL);
|
||||
s_end = s;
|
||||
s = std::min(s, MAX_SIZE - 1);
|
||||
|
||||
while( s < MAX_SIZE - 1 )
|
||||
@ -1479,7 +1478,7 @@ icvFindContoursInInterval( const CvArr* src,
|
||||
cv::Ptr<CvMemStorage> storage01;
|
||||
CvSeq* first = 0;
|
||||
|
||||
int i, j, k, n;
|
||||
int j, k, n;
|
||||
|
||||
uchar* src_data = 0;
|
||||
int img_step = 0;
|
||||
@ -1547,7 +1546,6 @@ icvFindContoursInInterval( const CvArr* src,
|
||||
|
||||
// First line. None of runs is binded
|
||||
tmp.pt.y = 0;
|
||||
i = 0;
|
||||
CV_WRITE_SEQ_ELEM( tmp, writer );
|
||||
upper_line = (CvLinkedRunPoint*)CV_GET_WRITTEN_ELEM( writer );
|
||||
|
||||
@ -1580,7 +1578,7 @@ icvFindContoursInInterval( const CvArr* src,
|
||||
last_elem = tmp_prev;
|
||||
tmp_prev->next = 0;
|
||||
|
||||
for( i = 1; i < img_size.height; i++ )
|
||||
for( int i = 1; i < img_size.height; i++ )
|
||||
{
|
||||
//------// Find runs in next line
|
||||
src_data += img_step;
|
||||
|
@ -338,7 +338,6 @@ LineAA( Mat& img, Point2l pt1, Point2l pt2, const void* color )
|
||||
|
||||
if( ax > ay )
|
||||
{
|
||||
dx = ax;
|
||||
dy = (dy ^ j) - j;
|
||||
pt1.x ^= pt2.x & j;
|
||||
pt2.x ^= pt1.x & j;
|
||||
@ -362,7 +361,6 @@ LineAA( Mat& img, Point2l pt1, Point2l pt2, const void* color )
|
||||
}
|
||||
else
|
||||
{
|
||||
dy = ay;
|
||||
dx = (dx ^ i) - i;
|
||||
pt1.x ^= pt2.x & i;
|
||||
pt2.x ^= pt1.x & i;
|
||||
@ -677,7 +675,6 @@ Line2( Mat& img, Point2l pt1, Point2l pt2, const void* color)
|
||||
|
||||
if( ax > ay )
|
||||
{
|
||||
dx = ax;
|
||||
dy = (dy ^ j) - j;
|
||||
pt1.x ^= pt2.x & j;
|
||||
pt2.x ^= pt1.x & j;
|
||||
@ -692,7 +689,6 @@ Line2( Mat& img, Point2l pt1, Point2l pt2, const void* color)
|
||||
}
|
||||
else
|
||||
{
|
||||
dy = ay;
|
||||
dx = (dx ^ i) - i;
|
||||
pt1.x ^= pt2.x & i;
|
||||
pt2.x ^= pt1.x & i;
|
||||
|
@ -128,8 +128,6 @@ int SymmColumnVec_32f_Symm_AVX(const float** src, const float* ky, float* dst, f
|
||||
for( k = 1; k <= ksize2; k++ )
|
||||
{
|
||||
f = _mm_set1_ps(ky[k]);
|
||||
S = src[k] + i;
|
||||
S2 = src[-k] + i;
|
||||
x0 = _mm_add_ps(_mm_load_ps(src[k]+i), _mm_load_ps(src[-k] + i));
|
||||
s0 = _mm_add_ps(s0, _mm_mul_ps(x0, f));
|
||||
}
|
||||
@ -144,7 +142,7 @@ int SymmColumnVec_32f_Symm_AVX(const float** src, const float* ky, float* dst, f
|
||||
int SymmColumnVec_32f_Unsymm_AVX(const float** src, const float* ky, float* dst, float delta, int width, int ksize2)
|
||||
{
|
||||
int i = 0, k;
|
||||
const float *S, *S2;
|
||||
const float *S2;
|
||||
const __m128 d4 = _mm_set1_ps(delta);
|
||||
const __m256 d8 = _mm256_set1_ps(delta);
|
||||
|
||||
@ -152,11 +150,10 @@ int SymmColumnVec_32f_Unsymm_AVX(const float** src, const float* ky, float* dst,
|
||||
{
|
||||
__m256 f, s0 = d8, s1 = d8;
|
||||
__m256 x0;
|
||||
S = src[0] + i;
|
||||
|
||||
for (k = 1; k <= ksize2; k++)
|
||||
{
|
||||
S = src[k] + i;
|
||||
const float *S = src[k] + i;
|
||||
S2 = src[-k] + i;
|
||||
f = _mm256_set1_ps(ky[k]);
|
||||
x0 = _mm256_sub_ps(_mm256_loadu_ps(S), _mm256_loadu_ps(S2));
|
||||
|
@ -4284,7 +4284,7 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
|
||||
size_t src_step = _src.step(), src_offset = _src.offset();
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
|
||||
if (esz == 0
|
||||
if (esz == 0 || src_step == 0
|
||||
|| (src_offset % src_step) % esz != 0
|
||||
|| (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
|
||||
|| !(borderType == BORDER_CONSTANT
|
||||
|
@ -467,7 +467,7 @@ int cv::floodFill( InputOutputArray _image, InputOutputArray _mask,
|
||||
if( rect )
|
||||
*rect = Rect();
|
||||
|
||||
int i, connectivity = flags & 255;
|
||||
int i;
|
||||
union {
|
||||
uchar b[4];
|
||||
int i[4];
|
||||
@ -491,9 +491,8 @@ int cv::floodFill( InputOutputArray _image, InputOutputArray _mask,
|
||||
CV_Error( CV_StsBadArg, "Number of channels in input image must be 1 or 3" );
|
||||
}
|
||||
|
||||
if( connectivity == 0 )
|
||||
connectivity = 4;
|
||||
else if( connectivity != 4 && connectivity != 8 )
|
||||
const int connectivity = flags & 255;
|
||||
if( connectivity != 0 && connectivity != 4 && connectivity != 8 )
|
||||
CV_Error( CV_StsBadFlag, "Connectivity must be 4, 0(=4) or 8" );
|
||||
|
||||
bool is_simple = mask.empty() && (flags & FLOODFILL_MASK_ONLY) == 0;
|
||||
|
@ -1930,7 +1930,7 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method )
|
||||
Mat planes[2];
|
||||
NAryMatIterator it(arrays, planes);
|
||||
double result = 0;
|
||||
int j, len = (int)it.size;
|
||||
int j;
|
||||
|
||||
CV_Assert( H1.type() == H2.type() && H1.depth() == CV_32F );
|
||||
|
||||
@ -1946,7 +1946,7 @@ double cv::compareHist( InputArray _H1, InputArray _H2, int method )
|
||||
{
|
||||
const float* h1 = it.planes[0].ptr<float>();
|
||||
const float* h2 = it.planes[1].ptr<float>();
|
||||
len = it.planes[0].rows*it.planes[0].cols*H1.channels();
|
||||
const int len = it.planes[0].rows*it.planes[0].cols*H1.channels();
|
||||
j = 0;
|
||||
|
||||
if( (method == CV_COMP_CHISQR) || (method == CV_COMP_CHISQR_ALT))
|
||||
|
@ -413,7 +413,6 @@ HoughLinesSDiv( InputArray image, OutputArray lines, int type,
|
||||
// Find peaks in maccum...
|
||||
for( index = 0; index < sfn; index++ )
|
||||
{
|
||||
i = 0;
|
||||
int pos = (int)(lst.size() - 1);
|
||||
if( pos < 0 || lst[pos].value < mcaccum[index] )
|
||||
{
|
||||
|
@ -401,7 +401,6 @@ static void findMinimumAreaEnclosingTriangle(const std::vector<cv::Point2f> &pol
|
||||
|
||||
a = 1;
|
||||
b = 2;
|
||||
c = 0;
|
||||
|
||||
// Main algorithm steps
|
||||
|
||||
|
@ -370,6 +370,7 @@ static void cvUndistortPointsInternal( const CvMat* _src, CvMat* _dst, const CvM
|
||||
const CvMat* _distCoeffs,
|
||||
const CvMat* matR, const CvMat* matP, cv::TermCriteria criteria)
|
||||
{
|
||||
CV_Assert(criteria.isValid());
|
||||
double A[3][3], RR[3][3], k[14]={0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
CvMat matA=cvMat(3, 3, CV_64F, A), _Dk;
|
||||
CvMat _RR=cvMat(3, 3, CV_64F, RR);
|
||||
|
@ -1187,7 +1187,7 @@ public:
|
||||
prev_dEdw_sign[i] = Mat::zeros(weights[i].size(), CV_8S);
|
||||
dEdw[i] = Mat::zeros(weights[i].size(), CV_64F);
|
||||
}
|
||||
|
||||
CV_Assert(total > 0);
|
||||
int dcount0 = max_buf_size/(2*total);
|
||||
dcount0 = std::max( dcount0, 1 );
|
||||
dcount0 = std::min( dcount0, count );
|
||||
|
@ -5,10 +5,9 @@
|
||||
#include "test_precomp.hpp"
|
||||
|
||||
|
||||
namespace opencv_test
|
||||
{
|
||||
namespace opencv_test { namespace {
|
||||
|
||||
String qrcode_images_name[] = {
|
||||
std::string qrcode_images_name[] = {
|
||||
"20110817_030.jpg",
|
||||
"20110817_048.jpg",
|
||||
"img_20120226_161648.jpg",
|
||||
@ -25,24 +24,25 @@ String qrcode_images_name[] = {
|
||||
|
||||
TEST(Objdetect_QRCode, generate_test_data)
|
||||
{
|
||||
String root = cvtest::TS::ptr()->get_data_path() + "qrcode/";
|
||||
String dataset_config = cvtest::TS::ptr()->get_data_path() + "qrcode/dataset_config.json";
|
||||
const std::string root = "qrcode/";
|
||||
const std::string dataset_config = findDataFile(root + "dataset_config.json");
|
||||
FileStorage file_config(dataset_config, FileStorage::WRITE);
|
||||
|
||||
file_config << "test_images" << "[";
|
||||
size_t images_count = sizeof(qrcode_images_name) / sizeof(String);
|
||||
size_t images_count = sizeof(qrcode_images_name) / sizeof(qrcode_images_name[0]);
|
||||
for (size_t i = 0; i < images_count; i++)
|
||||
{
|
||||
file_config << "{:" << "image_name" << qrcode_images_name[i];
|
||||
String image_path = root + qrcode_images_name[i];
|
||||
std::vector<Point> transform;
|
||||
std::string image_path = findDataFile(root + qrcode_images_name[i]);
|
||||
std::vector<Point> corners;
|
||||
Mat src = imread(image_path, IMREAD_GRAYSCALE);
|
||||
EXPECT_TRUE(detectQRCode(src, transform));
|
||||
ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path;
|
||||
EXPECT_TRUE(detectQRCode(src, corners));
|
||||
file_config << "x" << "[:";
|
||||
for (size_t j = 0; j < transform.size(); j++) { file_config << transform[j].x; }
|
||||
for (size_t j = 0; j < corners.size(); j++) { file_config << corners[j].x; }
|
||||
file_config << "]";
|
||||
file_config << "y" << "[:";
|
||||
for (size_t j = 0; j < transform.size(); j++) { file_config << transform[j].y; }
|
||||
for (size_t j = 0; j < corners.size(); j++) { file_config << corners[j].y; }
|
||||
file_config << "]" << "}";
|
||||
}
|
||||
file_config << "]";
|
||||
@ -51,65 +51,65 @@ TEST(Objdetect_QRCode, generate_test_data)
|
||||
|
||||
#else
|
||||
|
||||
typedef testing::TestWithParam< String > Objdetect_QRCode;
|
||||
typedef testing::TestWithParam< std::string > Objdetect_QRCode;
|
||||
TEST_P(Objdetect_QRCode, regression)
|
||||
{
|
||||
String root = cvtest::TS::ptr()->get_data_path() + "qrcode/";
|
||||
String dataset_config = cvtest::TS::ptr()->get_data_path() + "qrcode/dataset_config.json";
|
||||
FileStorage file_config(dataset_config, FileStorage::READ);
|
||||
const std::string name_current_image = GetParam();
|
||||
const std::string root = "qrcode/";
|
||||
const int pixels_error = 3;
|
||||
|
||||
std::vector<Point> corners;
|
||||
String image_path = root + String(GetParam());
|
||||
std::string image_path = findDataFile(root + name_current_image);
|
||||
Mat src = imread(image_path, IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(src.empty()) << "Can't read image: " << image_path;
|
||||
|
||||
std::vector<Point> corners;
|
||||
EXPECT_TRUE(detectQRCode(src, corners));
|
||||
|
||||
if (file_config.isOpened())
|
||||
const std::string dataset_config = findDataFile(root + "dataset_config.json", false);
|
||||
FileStorage file_config(dataset_config, FileStorage::READ);
|
||||
ASSERT_TRUE(file_config.isOpened()) << "Can't read validation data: " << dataset_config;
|
||||
{
|
||||
FileNode images_list = file_config["test_images"];
|
||||
int index = 0, images_count = static_cast<int>(images_list.size());
|
||||
ASSERT_GT(images_count, 0);
|
||||
size_t images_count = static_cast<size_t>(images_list.size());
|
||||
ASSERT_GT(images_count, 0u) << "Can't find validation data entries in 'test_images': " << dataset_config;
|
||||
|
||||
bool runTestsFlag = false;
|
||||
String name_current_image = String(GetParam());
|
||||
for (; index < images_count; index++)
|
||||
for (size_t index = 0; index < images_count; index++)
|
||||
{
|
||||
String name_test_image = images_list[index]["image_name"];
|
||||
FileNode config = images_list[(int)index];
|
||||
std::string name_test_image = config["image_name"];
|
||||
if (name_test_image == name_current_image)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
int x = images_list[index]["x"][i];
|
||||
int y = images_list[index]["y"][i];
|
||||
int x = config["x"][i];
|
||||
int y = config["y"][i];
|
||||
EXPECT_NEAR(x, corners[i].x, pixels_error);
|
||||
EXPECT_NEAR(y, corners[i].y, pixels_error);
|
||||
}
|
||||
runTestsFlag = true;
|
||||
return; // done
|
||||
}
|
||||
}
|
||||
if (!runTestsFlag)
|
||||
{
|
||||
std::cout << "Not found results for " << name_current_image;
|
||||
std::cout << " image in dataset_config.json file." << std::endl;
|
||||
}
|
||||
|
||||
file_config.release();
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << " Not found dataset_config.json file." << std::endl;
|
||||
std::cerr
|
||||
<< "Not found results for '" << name_current_image
|
||||
<< "' image in config file:" << dataset_config << std::endl
|
||||
<< "Re-run tests with enabled UPDATE_QRCODE_TEST_DATA macro to update test data."
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(objdetect, Objdetect_QRCode, testing::ValuesIn(qrcode_images_name));
|
||||
INSTANTIATE_TEST_CASE_P(/**/, Objdetect_QRCode, testing::ValuesIn(qrcode_images_name));
|
||||
|
||||
TEST(Objdetect_QRCode, not_found_qrcode)
|
||||
|
||||
|
||||
TEST(Objdetect_QRCode_basic, not_found_qrcode)
|
||||
{
|
||||
std::vector<Point> corners;
|
||||
Mat zero_image = Mat::zeros(256, 256, CV_8UC1);
|
||||
EXPECT_FALSE(detectQRCode(zero_image, corners));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // UPDATE_QRCODE_TEST_DATA
|
||||
|
||||
}} // namespace
|
||||
|
@ -1563,8 +1563,6 @@ PyObject* pyopencv_from(const Moments& m)
|
||||
"nu30", m.nu30, "nu21", m.nu21, "nu12", m.nu12, "nu03", m.nu03);
|
||||
}
|
||||
|
||||
#include "pyopencv_custom_headers.h"
|
||||
|
||||
static int OnError(int status, const char *func_name, const char *err_msg, const char *file_name, int line, void *userdata)
|
||||
{
|
||||
PyGILState_STATE gstate;
|
||||
@ -1802,6 +1800,7 @@ static int convert_to_char(PyObject *o, char *dst, const char *name = "no_name")
|
||||
# pragma GCC diagnostic ignored "-Wmissing-field-initializers"
|
||||
#endif
|
||||
|
||||
#include "pyopencv_custom_headers.h"
|
||||
#include "pyopencv_generated_types.h"
|
||||
#include "pyopencv_generated_funcs.h"
|
||||
|
||||
|
25
modules/python/test/test_videoio.py
Normal file
25
modules/python/test/test_videoio.py
Normal file
@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import cv2 as cv
|
||||
|
||||
from tests_common import NewOpenCVTests
|
||||
|
||||
class Bindings(NewOpenCVTests):
|
||||
|
||||
def check_name(self, name):
|
||||
#print(name)
|
||||
self.assertFalse(name == None)
|
||||
self.assertFalse(name == "")
|
||||
|
||||
def test_registry(self):
|
||||
self.check_name(cv.videoio_registry.getBackendName(cv.CAP_ANY));
|
||||
self.check_name(cv.videoio_registry.getBackendName(cv.CAP_FFMPEG))
|
||||
self.check_name(cv.videoio_registry.getBackendName(cv.CAP_OPENCV_MJPEG))
|
||||
backends = cv.videoio_registry.getBackends()
|
||||
for backend in backends:
|
||||
self.check_name(cv.videoio_registry.getBackendName(backend))
|
||||
|
||||
if __name__ == '__main__':
|
||||
NewOpenCVTests.bootstrap()
|
@ -103,6 +103,7 @@ using std::pair;
|
||||
using std::make_pair;
|
||||
using testing::TestWithParam;
|
||||
using testing::Values;
|
||||
using testing::ValuesIn;
|
||||
using testing::Combine;
|
||||
|
||||
using cv::Mat;
|
||||
@ -654,6 +655,11 @@ void addDataSearchSubDirectory(const std::string& subdir);
|
||||
*/
|
||||
std::string findDataFile(const std::string& relative_path, bool required = true);
|
||||
|
||||
/*! @brief Try to find requested data directory
|
||||
@sa findDataFile
|
||||
*/
|
||||
std::string findDataDirectory(const std::string& relative_path, bool required = true);
|
||||
|
||||
|
||||
#ifndef __CV_TEST_EXEC_ARGS
|
||||
#if defined(_MSC_VER) && (_MSC_VER <= 1400)
|
||||
|
@ -44,13 +44,13 @@ extern int testThreads;
|
||||
|
||||
|
||||
#undef TEST
|
||||
#define TEST_(test_case_name, test_name, BODY_IMPL) \
|
||||
class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public ::testing::Test {\
|
||||
#define TEST_(test_case_name, test_name, parent_class, bodyMethodName, BODY_IMPL) \
|
||||
class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\
|
||||
public:\
|
||||
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\
|
||||
private:\
|
||||
virtual void TestBody() CV_OVERRIDE;\
|
||||
virtual void Body();\
|
||||
virtual void bodyMethodName();\
|
||||
static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\
|
||||
GTEST_DISALLOW_COPY_AND_ASSIGN_(\
|
||||
GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\
|
||||
@ -62,14 +62,14 @@ extern int testThreads;
|
||||
#test_case_name, #test_name, NULL, NULL, \
|
||||
::testing::internal::CodeLocation(__FILE__, __LINE__), \
|
||||
(::testing::internal::GetTestTypeId()), \
|
||||
::testing::Test::SetUpTestCase, \
|
||||
::testing::Test::TearDownTestCase, \
|
||||
parent_class::SetUpTestCase, \
|
||||
parent_class::TearDownTestCase, \
|
||||
new ::testing::internal::TestFactoryImpl<\
|
||||
GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
|
||||
void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() BODY_IMPL( #test_case_name "_" #test_name ) \
|
||||
void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::Body()
|
||||
void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::bodyMethodName()
|
||||
|
||||
#define TEST(test_case_name, test_name) TEST_(test_case_name, test_name, CV__TEST_BODY_IMPL)
|
||||
#define TEST(test_case_name, test_name) TEST_(test_case_name, test_name, ::testing::Test, Body, CV__TEST_BODY_IMPL)
|
||||
|
||||
#define CV__TEST_BIGDATA_BODY_IMPL(name) \
|
||||
{ \
|
||||
@ -92,9 +92,9 @@ extern int testThreads;
|
||||
|
||||
// Special type of tests which require / use or validate processing of huge amount of data (>= 2Gb)
|
||||
#if defined(_M_X64) || defined(__x86_64__) || defined(__aarch64__)
|
||||
#define BIGDATA_TEST(test_case_name, test_name) TEST_(BigData_ ## test_case_name, test_name, CV__TEST_BIGDATA_BODY_IMPL)
|
||||
#define BIGDATA_TEST(test_case_name, test_name) TEST_(BigData_ ## test_case_name, test_name, ::testing::Test, Body, CV__TEST_BIGDATA_BODY_IMPL)
|
||||
#else
|
||||
#define BIGDATA_TEST(test_case_name, test_name) TEST_(BigData_ ## test_case_name, DISABLED_ ## test_name, CV__TEST_BIGDATA_BODY_IMPL)
|
||||
#define BIGDATA_TEST(test_case_name, test_name) TEST_(BigData_ ## test_case_name, DISABLED_ ## test_name, ::testing::Test, Body, CV__TEST_BIGDATA_BODY_IMPL)
|
||||
#endif
|
||||
|
||||
#undef TEST_F
|
||||
|
@ -546,17 +546,7 @@ void PrintTo(const Size& sz, ::std::ostream* os);
|
||||
// EXPECT_TRUE(foo.StatusIsOK());
|
||||
// }
|
||||
#define PERF_TEST(test_case_name, test_name)\
|
||||
namespace PERF_PROXY_NAMESPACE_NAME_(test_case_name, test_name) {\
|
||||
class TestBase {/*compile error for this class means that you are trying to use perf::TestBase as a fixture*/};\
|
||||
class test_case_name : public ::perf::TestBase {\
|
||||
public:\
|
||||
test_case_name() {}\
|
||||
protected:\
|
||||
virtual void PerfTestBody();\
|
||||
};\
|
||||
TEST_F(test_case_name, test_name){ CV__PERF_TEST_BODY_IMPL(#test_case_name "_" #test_name); }\
|
||||
}\
|
||||
void PERF_PROXY_NAMESPACE_NAME_(test_case_name, test_name)::test_case_name::PerfTestBody()
|
||||
TEST_(test_case_name, test_name, ::perf::TestBase, PerfTestBody, CV__PERF_TEST_BODY_IMPL)
|
||||
|
||||
// Defines a performance test that uses a test fixture.
|
||||
//
|
||||
|
@ -772,16 +772,24 @@ void addDataSearchSubDirectory(const std::string& subdir)
|
||||
TS::ptr()->data_search_subdir.push_back(subdir);
|
||||
}
|
||||
|
||||
std::string findDataFile(const std::string& relative_path, bool required)
|
||||
static std::string findData(const std::string& relative_path, bool required, bool findDirectory)
|
||||
{
|
||||
#define TEST_TRY_FILE_WITH_PREFIX(prefix) \
|
||||
{ \
|
||||
std::string path = path_join(prefix, relative_path); \
|
||||
/*printf("Trying %s\n", path.c_str());*/ \
|
||||
FILE* f = fopen(path.c_str(), "rb"); \
|
||||
if(f) { \
|
||||
fclose(f); \
|
||||
return path; \
|
||||
if (findDirectory) \
|
||||
{ \
|
||||
if (isDirectory(path)) \
|
||||
return path; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
FILE* f = fopen(path.c_str(), "rb"); \
|
||||
if(f) { \
|
||||
fclose(f); \
|
||||
return path; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
@ -842,11 +850,21 @@ std::string findDataFile(const std::string& relative_path, bool required)
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
const char* type = findDirectory ? "directory" : "data file";
|
||||
if (required)
|
||||
CV_Error(cv::Error::StsError, cv::format("OpenCV tests: Can't find required data file: %s", relative_path.c_str()));
|
||||
throw SkipTestException(cv::format("OpenCV tests: Can't find data file: %s", relative_path.c_str()));
|
||||
CV_Error(cv::Error::StsError, cv::format("OpenCV tests: Can't find required %s: %s", type, relative_path.c_str()));
|
||||
throw SkipTestException(cv::format("OpenCV tests: Can't find %s: %s", type, relative_path.c_str()));
|
||||
}
|
||||
|
||||
std::string findDataFile(const std::string& relative_path, bool required)
|
||||
{
|
||||
return findData(relative_path, required, false);
|
||||
}
|
||||
|
||||
std::string findDataDirectory(const std::string& relative_path, bool required)
|
||||
{
|
||||
return findData(relative_path, required, true);
|
||||
}
|
||||
|
||||
} //namespace cvtest
|
||||
|
||||
|
@ -59,6 +59,7 @@
|
||||
@defgroup videoio_c C API for video I/O
|
||||
@defgroup videoio_ios iOS glue for video I/O
|
||||
@defgroup videoio_winrt WinRT glue for video I/O
|
||||
@defgroup videoio_registry Query I/O API backends registry
|
||||
@}
|
||||
*/
|
||||
|
||||
|
44
modules/videoio/include/opencv2/videoio/registry.hpp
Normal file
44
modules/videoio/include/opencv2/videoio/registry.hpp
Normal file
@ -0,0 +1,44 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#ifndef OPENCV_VIDEOIO_REGISTRY_HPP
|
||||
#define OPENCV_VIDEOIO_REGISTRY_HPP
|
||||
|
||||
#include <opencv2/videoio.hpp>
|
||||
|
||||
namespace cv { namespace videoio_registry {
|
||||
/** @addtogroup videoio_registry
|
||||
This section contains API description how to query/configure available Video I/O backends.
|
||||
|
||||
Runtime configuration options:
|
||||
- enable debug mode: `OPENCV_VIDEOIO_DEBUG=1`
|
||||
- change backend priority: `OPENCV_VIDEOIO_PRIORITY_<backend>=9999`
|
||||
- disable backend: `OPENCV_VIDEOIO_PRIORITY_<backend>=0`
|
||||
- specify list of backends with high priority (>100000): `OPENCV_VIDEOIO_PRIORITY_LIST=FFMPEG,GSTREAMER`
|
||||
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
/** @brief Returns backend API name or "unknown"
|
||||
@param api backend ID (#VideoCaptureAPIs)
|
||||
*/
|
||||
CV_EXPORTS_W cv::String getBackendName(VideoCaptureAPIs api);
|
||||
|
||||
/** @brief Returns list of all builtin backends */
|
||||
CV_EXPORTS_W std::vector<VideoCaptureAPIs> getBackends();
|
||||
|
||||
/** @brief Returns list of available backends which works via `cv::VideoCapture(int index)` */
|
||||
CV_EXPORTS_W std::vector<VideoCaptureAPIs> getCameraBackends();
|
||||
|
||||
/** @brief Returns list of available backends which works via `cv::VideoCapture(filename)` */
|
||||
CV_EXPORTS_W std::vector<VideoCaptureAPIs> getStreamBackends();
|
||||
|
||||
/** @brief Returns list of available backends which works via `cv::VideoWriter()` */
|
||||
CV_EXPORTS_W std::vector<VideoCaptureAPIs> getWriterBackends();
|
||||
|
||||
//! @}
|
||||
}} // namespace
|
||||
|
||||
#endif // OPENCV_VIDEOIO_REGISTRY_HPP
|
50
modules/videoio/misc/python/pyopencv_videoio.hpp
Normal file
50
modules/videoio/misc/python/pyopencv_videoio.hpp
Normal file
@ -0,0 +1,50 @@
|
||||
#ifdef HAVE_OPENCV_VIDEOIO
|
||||
typedef std::vector<VideoCaptureAPIs> vector_VideoCaptureAPIs;
|
||||
|
||||
template<>
|
||||
bool pyopencv_to(PyObject *o, cv::VideoCaptureAPIs &v, const char *name)
|
||||
{
|
||||
(void)name;
|
||||
v = CAP_ANY;
|
||||
if (!o || o == Py_None)
|
||||
return false;
|
||||
else if (PyLong_Check(o))
|
||||
{
|
||||
v = VideoCaptureAPIs((int64)PyLong_AsLongLong(o));
|
||||
return true;
|
||||
}
|
||||
else if (PyInt_Check(o))
|
||||
{
|
||||
v = VideoCaptureAPIs((int64)PyInt_AS_LONG(o));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
template<>
|
||||
PyObject* pyopencv_from(const cv::VideoCaptureAPIs &v)
|
||||
{
|
||||
return pyopencv_from((int)(v));
|
||||
}
|
||||
|
||||
template<> struct pyopencvVecConverter<cv::VideoCaptureAPIs>
|
||||
{
|
||||
static bool to(PyObject* obj, std::vector<cv::VideoCaptureAPIs>& value, const ArgInfo info)
|
||||
{
|
||||
return pyopencv_to_generic_vec(obj, value, info);
|
||||
}
|
||||
|
||||
static PyObject* from(const std::vector<cv::VideoCaptureAPIs>& value)
|
||||
{
|
||||
return pyopencv_from_generic_vec(value);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
bool pyopencv_to(PyObject *o, std::vector<cv::VideoCaptureAPIs>& apis, const char *name)
|
||||
{
|
||||
return pyopencvVecConverter<cv::VideoCaptureAPIs>::to(o, apis, ArgInfo(name, false));
|
||||
}
|
||||
|
||||
#endif // HAVE_OPENCV_VIDEOIO
|
@ -2351,9 +2351,6 @@ AVStream* OutputMediaStream_FFMPEG::addVideoStream(AVFormatContext *oc, CV_CODEC
|
||||
c->codec_type = AVMEDIA_TYPE_VIDEO;
|
||||
|
||||
// put sample parameters
|
||||
unsigned long long lbit_rate = static_cast<unsigned long long>(bitrate);
|
||||
lbit_rate += (bitrate / 4);
|
||||
lbit_rate = std::min(lbit_rate, static_cast<unsigned long long>(std::numeric_limits<int>::max()));
|
||||
c->bit_rate = bitrate;
|
||||
|
||||
// took advice from
|
||||
|
@ -158,8 +158,9 @@ public:
|
||||
data.resize(size);
|
||||
}
|
||||
|
||||
void put(unsigned bits, int len)
|
||||
inline void put_bits(unsigned bits, int len)
|
||||
{
|
||||
CV_Assert(len >=0 && len < 32);
|
||||
if((m_pos == (data.size() - 1) && len > bits_free) || m_pos == data.size())
|
||||
{
|
||||
resize(int(2*data.size()));
|
||||
@ -182,6 +183,12 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
inline void put_val(int val, const unsigned * table)
|
||||
{
|
||||
unsigned code = table[(val) + 2];
|
||||
put_bits(code >> 8, (int)(code & 255));
|
||||
}
|
||||
|
||||
void finish()
|
||||
{
|
||||
if(bits_free == 32)
|
||||
@ -1188,13 +1195,6 @@ public:
|
||||
void operator()( const cv::Range& range ) const CV_OVERRIDE
|
||||
{
|
||||
const int CAT_TAB_SIZE = 4096;
|
||||
unsigned code = 0;
|
||||
|
||||
#define JPUT_BITS(val, bits) output_buffer.put(val, bits)
|
||||
|
||||
#define JPUT_HUFF(val, table) \
|
||||
code = table[(val) + 2]; \
|
||||
JPUT_BITS(code >> 8, (int)(code & 255))
|
||||
|
||||
int x, y;
|
||||
int i, j;
|
||||
@ -1300,8 +1300,8 @@ public:
|
||||
int cat = cat_table[val + CAT_TAB_SIZE];
|
||||
|
||||
//CV_Assert( cat <= 11 );
|
||||
JPUT_HUFF( cat, huff_dc_tab[is_chroma] );
|
||||
JPUT_BITS( val - (val < 0 ? 1 : 0), cat );
|
||||
output_buffer.put_val(cat, huff_dc_tab[is_chroma] );
|
||||
output_buffer.put_bits( val - (val < 0 ? 1 : 0), cat );
|
||||
}
|
||||
|
||||
for( j = 1; j < 64; j++ )
|
||||
@ -1316,15 +1316,15 @@ public:
|
||||
{
|
||||
while( run >= 16 )
|
||||
{
|
||||
JPUT_HUFF( 0xF0, htable ); // encode 16 zeros
|
||||
output_buffer.put_val( 0xF0, htable ); // encode 16 zeros
|
||||
run -= 16;
|
||||
}
|
||||
|
||||
{
|
||||
int cat = cat_table[val + CAT_TAB_SIZE];
|
||||
//CV_Assert( cat <= 10 );
|
||||
JPUT_HUFF( cat + run*16, htable );
|
||||
JPUT_BITS( val - (val < 0 ? 1 : 0), cat );
|
||||
output_buffer.put_val( cat + run*16, htable );
|
||||
output_buffer.put_bits( val - (val < 0 ? 1 : 0), cat );
|
||||
}
|
||||
|
||||
run = 0;
|
||||
@ -1333,7 +1333,7 @@ public:
|
||||
|
||||
if( run )
|
||||
{
|
||||
JPUT_HUFF( 0x00, htable ); // encode EOB
|
||||
output_buffer.put_val( 0x00, htable ); // encode EOB
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -277,6 +277,7 @@ struct CvCaptureCAM_V4L CV_FINAL : public CvCapture
|
||||
|
||||
__u32 palette;
|
||||
int width, height;
|
||||
int width_set, height_set;
|
||||
int bufferSize;
|
||||
__u32 fps;
|
||||
bool convert_rgb;
|
||||
@ -797,6 +798,7 @@ bool CvCaptureCAM_V4L::open(const char* _deviceName)
|
||||
FirstCapture = 1;
|
||||
width = DEFAULT_V4L_WIDTH;
|
||||
height = DEFAULT_V4L_HEIGHT;
|
||||
width_set = height_set = 0;
|
||||
bufferSize = DEFAULT_V4L_BUFFERS;
|
||||
fps = DEFAULT_V4L_FPS;
|
||||
convert_rgb = true;
|
||||
@ -1769,7 +1771,6 @@ static bool icvSetControl (CvCaptureCAM_V4L* capture,
|
||||
|
||||
static int icvSetPropertyCAM_V4L( CvCaptureCAM_V4L* capture,
|
||||
int property_id, double value ){
|
||||
static int width = 0, height = 0;
|
||||
bool retval = false;
|
||||
bool possible;
|
||||
|
||||
@ -1778,6 +1779,9 @@ static int icvSetPropertyCAM_V4L( CvCaptureCAM_V4L* capture,
|
||||
|
||||
switch (property_id) {
|
||||
case CV_CAP_PROP_FRAME_WIDTH:
|
||||
{
|
||||
int& width = capture->width_set;
|
||||
int& height = capture->height_set;
|
||||
width = cvRound(value);
|
||||
retval = width != 0;
|
||||
if(width !=0 && height != 0) {
|
||||
@ -1786,8 +1790,12 @@ static int icvSetPropertyCAM_V4L( CvCaptureCAM_V4L* capture,
|
||||
retval = v4l2_reset(capture);
|
||||
width = height = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_FRAME_HEIGHT:
|
||||
{
|
||||
int& width = capture->width_set;
|
||||
int& height = capture->height_set;
|
||||
height = cvRound(value);
|
||||
retval = height != 0;
|
||||
if(width !=0 && height != 0) {
|
||||
@ -1796,7 +1804,8 @@ static int icvSetPropertyCAM_V4L( CvCaptureCAM_V4L* capture,
|
||||
retval = v4l2_reset(capture);
|
||||
width = height = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_FPS:
|
||||
capture->fps = value;
|
||||
retval = v4l2_reset(capture);
|
||||
|
@ -12,7 +12,7 @@ namespace cv
|
||||
|
||||
// Utility function for safe integer conversions
|
||||
template <typename D, typename S>
|
||||
inline D safe_int_cast(S val)
|
||||
inline D safe_int_cast(S val, const char * msg = 0)
|
||||
{
|
||||
typedef std::numeric_limits<S> st;
|
||||
typedef std::numeric_limits<D> dt;
|
||||
@ -21,7 +21,10 @@ inline D safe_int_cast(S val)
|
||||
const bool in_range_l = (double)val >= (double)dt::min();
|
||||
if (!in_range_r || !in_range_l)
|
||||
{
|
||||
CV_Error_(cv::Error::StsOutOfRange, ("Can not convert integer values (%s -> %s), value 0x%llx is out of range", typeid(S).name(), typeid(D).name(), val));
|
||||
if (!msg)
|
||||
CV_Error_(Error::StsOutOfRange, ("Can not convert integer values (%s -> %s), value 0x%llx is out of range", typeid(S).name(), typeid(D).name(), val));
|
||||
else
|
||||
CV_Error(Error::StsOutOfRange, msg);
|
||||
}
|
||||
return static_cast<D>(val);
|
||||
}
|
||||
@ -128,7 +131,7 @@ public:
|
||||
VideoInputStream();
|
||||
VideoInputStream(const String& filename);
|
||||
~VideoInputStream();
|
||||
VideoInputStream& read(char*, uint64_t);
|
||||
VideoInputStream& read(char*, uint32_t);
|
||||
VideoInputStream& seekg(uint64_t);
|
||||
uint64_t tellg();
|
||||
bool isOpened() const;
|
||||
@ -229,11 +232,11 @@ void VideoInputStream::close()
|
||||
}
|
||||
}
|
||||
|
||||
VideoInputStream& VideoInputStream::read(char* buf, uint64_t count)
|
||||
VideoInputStream& VideoInputStream::read(char* buf, uint32_t count)
|
||||
{
|
||||
if(isOpened())
|
||||
{
|
||||
input.read(buf, safe_int_cast<std::streamsize>(count));
|
||||
input.read(buf, safe_int_cast<std::streamsize>(count, "Failed to read AVI file: requested chunk size is too large"));
|
||||
m_is_valid = (input.gcount() == (std::streamsize)count);
|
||||
}
|
||||
|
||||
@ -243,7 +246,7 @@ VideoInputStream& VideoInputStream::read(char* buf, uint64_t count)
|
||||
VideoInputStream& VideoInputStream::seekg(uint64_t pos)
|
||||
{
|
||||
input.clear();
|
||||
input.seekg(safe_int_cast<std::streamoff>(pos));
|
||||
input.seekg(safe_int_cast<std::streamoff>(pos, "Failed to seek in AVI file: position is out of range"));
|
||||
m_is_valid = !input.eof();
|
||||
return *this;
|
||||
}
|
||||
@ -322,9 +325,6 @@ bool AVIReadContainer::parseStrl(char stream_id, Codecs codec_)
|
||||
|
||||
if(m_file_stream && strh.m_four_cc == STRH_CC)
|
||||
{
|
||||
uint64_t next_strl_list = m_file_stream->tellg();
|
||||
next_strl_list += strh.m_size;
|
||||
|
||||
AviStreamHeader strm_hdr;
|
||||
*m_file_stream >> strm_hdr;
|
||||
|
||||
@ -668,7 +668,7 @@ void BitStream::writeBlock()
|
||||
}
|
||||
|
||||
size_t BitStream::getPos() const {
|
||||
return safe_int_cast<size_t>(m_current - m_start) + m_pos;
|
||||
return safe_int_cast<size_t>(m_current - m_start, "Failed to determine AVI bufer position: value is out of range") + m_pos;
|
||||
}
|
||||
|
||||
void BitStream::putByte(int val)
|
||||
@ -737,7 +737,7 @@ void BitStream::patchInt(uint32_t val, size_t pos)
|
||||
{
|
||||
if( pos >= m_pos )
|
||||
{
|
||||
ptrdiff_t delta = safe_int_cast<ptrdiff_t>(pos - m_pos);
|
||||
ptrdiff_t delta = safe_int_cast<ptrdiff_t>(pos - m_pos, "Failed to seek in AVI buffer: value is out of range");
|
||||
CV_Assert( delta < m_current - m_start );
|
||||
m_start[delta] = (uchar)val;
|
||||
m_start[delta+1] = (uchar)(val >> 8);
|
||||
@ -747,7 +747,7 @@ void BitStream::patchInt(uint32_t val, size_t pos)
|
||||
else
|
||||
{
|
||||
std::streamoff fpos = output.tellp();
|
||||
output.seekp(safe_int_cast<std::streamoff>(pos));
|
||||
output.seekp(safe_int_cast<std::streamoff>(pos, "Failed to seek in AVI file: value is out of range"));
|
||||
uchar buf[] = { (uchar)val, (uchar)(val >> 8), (uchar)(val >> 16), (uchar)(val >> 24) };
|
||||
output.write((char *)buf, 4);
|
||||
output.seekp(fpos);
|
||||
@ -960,7 +960,7 @@ void AVIWriteContainer::endWriteChunk()
|
||||
size_t pospos = AVIChunkSizeIndex.back();
|
||||
AVIChunkSizeIndex.pop_back();
|
||||
CV_Assert(currpos >= pospos);
|
||||
uint32_t chunksz = safe_int_cast<uint32_t>(currpos - pospos);
|
||||
uint32_t chunksz = safe_int_cast<uint32_t>(currpos - pospos, "Failed to write AVI file: chunk size is out of bounds");
|
||||
strm->patchInt(chunksz, pospos);
|
||||
}
|
||||
}
|
||||
@ -996,7 +996,7 @@ void AVIWriteContainer::writeIndex(int stream_number, StreamType strm_type)
|
||||
|
||||
void AVIWriteContainer::finishWriteAVI()
|
||||
{
|
||||
uint32_t nframes = safe_int_cast<uint32_t>(frameOffset.size());
|
||||
uint32_t nframes = safe_int_cast<uint32_t>(frameOffset.size(), "Failed to write AVI file: number of frames is too large");
|
||||
// Record frames numbers to AVI Header
|
||||
while (!frameNumIndexes.empty())
|
||||
{
|
||||
|
@ -6,6 +6,8 @@
|
||||
|
||||
#include "videoio_registry.hpp"
|
||||
|
||||
#include "opencv2/videoio/registry.hpp"
|
||||
|
||||
#include "cap_intelperc.hpp"
|
||||
#include "cap_librealsense.hpp"
|
||||
#include "cap_dshow.hpp"
|
||||
@ -250,6 +252,8 @@ public:
|
||||
return g_instance;
|
||||
}
|
||||
|
||||
inline std::vector<VideoBackendInfo> getEnabledBackends() const { return enabledBackends; }
|
||||
|
||||
inline std::vector<VideoBackendInfo> getAvailableBackends_CaptureByIndex() const
|
||||
{
|
||||
std::vector<VideoBackendInfo> result;
|
||||
@ -305,6 +309,58 @@ std::vector<VideoBackendInfo> getAvailableBackends_Writer()
|
||||
return result;
|
||||
}
|
||||
|
||||
cv::String getBackendName(VideoCaptureAPIs api)
|
||||
{
|
||||
if (api == CAP_ANY)
|
||||
return "CAP_ANY"; // special case, not a part of backends list
|
||||
const int N = sizeof(builtin_backends)/sizeof(builtin_backends[0]);
|
||||
for (size_t i = 0; i < N; i++)
|
||||
{
|
||||
const VideoBackendInfo& backend = builtin_backends[i];
|
||||
if (backend.id == api)
|
||||
return backend.name;
|
||||
}
|
||||
return cv::format("UnknownVideoAPI(%d)", (int)api);
|
||||
}
|
||||
|
||||
std::vector<VideoCaptureAPIs> getBackends()
|
||||
{
|
||||
std::vector<VideoBackendInfo> backends = VideoBackendRegistry::getInstance().getEnabledBackends();
|
||||
std::vector<VideoCaptureAPIs> result;
|
||||
for (size_t i = 0; i < backends.size(); i++)
|
||||
result.push_back((VideoCaptureAPIs)backends[i].id);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<VideoCaptureAPIs> getCameraBackends()
|
||||
{
|
||||
const std::vector<VideoBackendInfo> backends = VideoBackendRegistry::getInstance().getAvailableBackends_CaptureByIndex();
|
||||
std::vector<VideoCaptureAPIs> result;
|
||||
for (size_t i = 0; i < backends.size(); i++)
|
||||
result.push_back((VideoCaptureAPIs)backends[i].id);
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
std::vector<VideoCaptureAPIs> getStreamBackends()
|
||||
{
|
||||
const std::vector<VideoBackendInfo> backends = VideoBackendRegistry::getInstance().getAvailableBackends_CaptureByFilename();
|
||||
std::vector<VideoCaptureAPIs> result;
|
||||
for (size_t i = 0; i < backends.size(); i++)
|
||||
result.push_back((VideoCaptureAPIs)backends[i].id);
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
std::vector<VideoCaptureAPIs> getWriterBackends()
|
||||
{
|
||||
const std::vector<VideoBackendInfo> backends = VideoBackendRegistry::getInstance().getAvailableBackends_Writer();
|
||||
std::vector<VideoCaptureAPIs> result;
|
||||
for (size_t i = 0; i < backends.size(); i++)
|
||||
result.push_back((VideoCaptureAPIs)backends[i].id);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace registry
|
||||
|
||||
#define TRY_OPEN(backend_func) \
|
||||
|
@ -6,10 +6,26 @@
|
||||
|
||||
#include "opencv2/ts.hpp"
|
||||
#include "opencv2/videoio.hpp"
|
||||
#include "opencv2/videoio/registry.hpp"
|
||||
#include "opencv2/imgproc/imgproc_c.h"
|
||||
|
||||
#include "opencv2/core/private.hpp"
|
||||
|
||||
namespace cv {
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &out, const VideoCaptureAPIs& api)
|
||||
{
|
||||
out << cv::videoio_registry::getBackendName(api); return out;
|
||||
}
|
||||
|
||||
static inline void PrintTo(const cv::VideoCaptureAPIs& api, std::ostream* os)
|
||||
{
|
||||
*os << cv::videoio_registry::getBackendName(api);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
inline std::string fourccToString(int fourcc)
|
||||
{
|
||||
return cv::format("%c%c%c%c", fourcc & 255, (fourcc >> 8) & 255, (fourcc >> 16) & 255, (fourcc >> 24) & 255);
|
||||
@ -55,4 +71,15 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static inline bool isBackendAvailable(cv::VideoCaptureAPIs api, const std::vector<cv::VideoCaptureAPIs>& api_list)
|
||||
{
|
||||
for (size_t i = 0; i < api_list.size(); i++)
|
||||
{
|
||||
if (api_list[i] == api)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -46,62 +46,12 @@
|
||||
namespace opencv_test
|
||||
{
|
||||
|
||||
struct VideoCaptureAPI
|
||||
{
|
||||
VideoCaptureAPIs api;
|
||||
|
||||
inline const char * toString() const
|
||||
{
|
||||
switch (api)
|
||||
{
|
||||
case CAP_ANY: return "CAP_ANY";
|
||||
#ifdef __linux__
|
||||
case CAP_V4L2: return "CAP_V4L/CAP_V4L2";
|
||||
#else
|
||||
case CAP_VFW: return "CAP_VFW";
|
||||
#endif
|
||||
case CAP_FIREWIRE: return "CAP_FIREWIRE";
|
||||
case CAP_QT: return "CAP_QT";
|
||||
case CAP_UNICAP: return "CAP_UNICAP";
|
||||
case CAP_DSHOW: return "CAP_DSHOW";
|
||||
case CAP_PVAPI: return "CAP_PVAPI";
|
||||
case CAP_OPENNI: return "CAP_OPENNI";
|
||||
case CAP_OPENNI_ASUS: return "CAP_OPENNI_ASUS";
|
||||
case CAP_ANDROID: return "CAP_ANDROID";
|
||||
case CAP_XIAPI: return "CAP_XIAPI";
|
||||
case CAP_AVFOUNDATION: return "CAP_AVFOUNDATION";
|
||||
case CAP_GIGANETIX: return "CAP_GIGANETIX";
|
||||
case CAP_MSMF: return "CAP_MSMF";
|
||||
case CAP_WINRT: return "CAP_WINRT";
|
||||
case CAP_INTELPERC: return "CAP_INTELPERC";
|
||||
case CAP_OPENNI2: return "CAP_OPENNI2";
|
||||
case CAP_OPENNI2_ASUS: return "CAP_OPENNI2_ASUS";
|
||||
case CAP_GPHOTO2: return "CAP_GPHOTO2";
|
||||
case CAP_GSTREAMER: return "CAP_GSTREAMER";
|
||||
case CAP_FFMPEG: return "CAP_FFMPEG";
|
||||
case CAP_IMAGES: return "CAP_IMAGES";
|
||||
case CAP_ARAVIS: return "CAP_ARAVIS";
|
||||
case CAP_OPENCV_MJPEG: return "CAP_OPENCV_MJPEG";
|
||||
case CAP_INTEL_MFX: return "CAP_INTEL_MFX";
|
||||
case CAP_XINE: return "CAP_XINE";
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
VideoCaptureAPI(int api_ = CAP_ANY) : api((VideoCaptureAPIs)api_) {}
|
||||
operator int() { return api; }
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &out, const VideoCaptureAPI & api)
|
||||
{
|
||||
out << api.toString(); return out;
|
||||
}
|
||||
|
||||
class Videoio_Test_Base
|
||||
{
|
||||
protected:
|
||||
string ext;
|
||||
string video_file;
|
||||
VideoCaptureAPI apiPref;
|
||||
VideoCaptureAPIs apiPref;
|
||||
protected:
|
||||
Videoio_Test_Base() {}
|
||||
virtual ~Videoio_Test_Base() {}
|
||||
@ -131,6 +81,8 @@ protected:
|
||||
public:
|
||||
void doTest()
|
||||
{
|
||||
if (!isBackendAvailable(apiPref, cv::videoio_registry::getStreamBackends()))
|
||||
throw SkipTestException(cv::String("Backend is not available/disabled: ") + cv::videoio_registry::getBackendName(apiPref));
|
||||
VideoCapture cap;
|
||||
ASSERT_NO_THROW(cap.open(video_file, apiPref));
|
||||
if (!cap.isOpened())
|
||||
@ -200,7 +152,7 @@ public:
|
||||
};
|
||||
|
||||
//==================================================================================================
|
||||
typedef tuple<string, VideoCaptureAPI> Backend_Type_Params;
|
||||
typedef tuple<string, VideoCaptureAPIs> Backend_Type_Params;
|
||||
|
||||
class Videoio_Bunny : public Videoio_Test_Base, public testing::TestWithParam<Backend_Type_Params>
|
||||
{
|
||||
@ -214,6 +166,8 @@ public:
|
||||
}
|
||||
void doFrameCountTest()
|
||||
{
|
||||
if (!isBackendAvailable(apiPref, cv::videoio_registry::getStreamBackends()))
|
||||
throw SkipTestException(cv::String("Backend is not available/disabled: ") + cv::videoio_registry::getBackendName(apiPref));
|
||||
VideoCapture cap;
|
||||
EXPECT_NO_THROW(cap.open(video_file, apiPref));
|
||||
if (!cap.isOpened())
|
||||
@ -274,7 +228,7 @@ struct Ext_Fourcc_PSNR
|
||||
string ext;
|
||||
string fourcc;
|
||||
float PSNR;
|
||||
VideoCaptureAPI api;
|
||||
VideoCaptureAPIs api;
|
||||
};
|
||||
typedef tuple<Size, Ext_Fourcc_PSNR> Size_Ext_Fourcc_PSNR;
|
||||
|
||||
@ -348,7 +302,7 @@ public:
|
||||
|
||||
//==================================================================================================
|
||||
|
||||
static VideoCaptureAPI backend_params[] = {
|
||||
static const VideoCaptureAPIs backend_params[] = {
|
||||
#ifdef HAVE_QUICKTIME
|
||||
CAP_QT,
|
||||
#endif
|
||||
@ -383,7 +337,7 @@ static VideoCaptureAPI backend_params[] = {
|
||||
// CAP_INTEL_MFX
|
||||
};
|
||||
|
||||
static string bunny_params[] = {
|
||||
static const string bunny_params[] = {
|
||||
#ifdef HAVE_VIDEO_INPUT
|
||||
string("wmv"),
|
||||
string("mov"),
|
||||
|
@ -7,6 +7,9 @@
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <limits>
|
||||
#include <stdint.h>
|
||||
#ifdef HAVE_OPENGL
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN 1
|
||||
@ -36,17 +39,17 @@ static void help()
|
||||
cout << "\n This program demonstrates how to use MSER to detect extremal regions \n"
|
||||
"Usage: \n"
|
||||
" ./detect_mser <image1(without parameter a syntehtic image is used as default)>\n"
|
||||
"Press esc key when image window is active to change descriptor parameter\n"
|
||||
"Press esc key when image window is active to change descriptor parameter\n"
|
||||
"Press 2, 8, 4, 6, +,- or 5 keys in openGL windows to change view or use mouse\n";
|
||||
}
|
||||
|
||||
struct MSERParams
|
||||
{
|
||||
MSERParams(int _delta = 5, int _min_area = 60, int _max_area = 14400,
|
||||
double _max_variation = 0.25, double _min_diversity = .2,
|
||||
int _max_evolution = 200, double _area_threshold = 1.01,
|
||||
double _min_margin = 0.003, int _edge_blur_size = 5)
|
||||
{
|
||||
double _max_variation = 0.25, double _min_diversity = .2,
|
||||
int _max_evolution = 200, double _area_threshold = 1.01,
|
||||
double _min_margin = 0.003, int _edge_blur_size = 5)
|
||||
{
|
||||
delta = _delta;
|
||||
minArea = _min_area;
|
||||
maxArea = _max_area;
|
||||
@ -57,7 +60,7 @@ struct MSERParams
|
||||
minMargin = _min_margin;
|
||||
edgeBlurSize = _edge_blur_size;
|
||||
pass2Only = false;
|
||||
}
|
||||
}
|
||||
|
||||
int delta;
|
||||
int minArea;
|
||||
@ -72,30 +75,20 @@ struct MSERParams
|
||||
int edgeBlurSize;
|
||||
};
|
||||
|
||||
static String Legende(MSERParams &pAct)
|
||||
static String Legende(const MSERParams &pAct)
|
||||
{
|
||||
String s="";
|
||||
String inf = static_cast<const ostringstream&>(ostringstream() << pAct.minArea).str();
|
||||
String sup = static_cast<const ostringstream&>(ostringstream() << pAct.maxArea).str();
|
||||
s = " Area[" + inf + "," + sup + "]";
|
||||
ostringstream ss;
|
||||
ss << "Area[" << pAct.minArea << "," << pAct.maxArea << "] ";
|
||||
ss << "del. [" << pAct.delta << "] ";
|
||||
ss << "var. [" << pAct.maxVariation << "] ";
|
||||
ss << "div. [" << (int)pAct.minDiversity << "] ";
|
||||
ss << "pas. [" << (int)pAct.pass2Only << "] ";
|
||||
ss << "RGb->evo. [" << pAct.maxEvolution << "] ";
|
||||
ss << "are. [" << (int)pAct.areaThreshold << "] ";
|
||||
ss << "mar. [" << (int)pAct.minMargin << "] ";
|
||||
ss << "siz. [" << pAct.edgeBlurSize << "]";
|
||||
|
||||
inf = static_cast<const ostringstream&>(ostringstream() << pAct.delta).str();
|
||||
s += " del. [" + inf + "]";
|
||||
inf = static_cast<const ostringstream&>(ostringstream() << pAct.maxVariation).str();
|
||||
s += " var. [" + inf + "]";
|
||||
inf = static_cast<const ostringstream&>(ostringstream() << (int)pAct.minDiversity).str();
|
||||
s += " div. [" + inf + "]";
|
||||
inf = static_cast<const ostringstream&>(ostringstream() << (int)pAct.pass2Only).str();
|
||||
s += " pas. [" + inf + "]";
|
||||
inf = static_cast<const ostringstream&>(ostringstream() << (int)pAct.maxEvolution).str();
|
||||
s += "RGb-> evo. [" + inf + "]";
|
||||
inf = static_cast<const ostringstream&>(ostringstream() << (int)pAct.areaThreshold).str();
|
||||
s += " are. [" + inf + "]";
|
||||
inf = static_cast<const ostringstream&>(ostringstream() << (int)pAct.minMargin).str();
|
||||
s += " mar. [" + inf + "]";
|
||||
inf = static_cast<const ostringstream&>(ostringstream() << (int)pAct.edgeBlurSize).str();
|
||||
s += " siz. [" + inf + "]";
|
||||
return s;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
|
||||
@ -109,18 +102,28 @@ bool keyPressed=false;
|
||||
Vec4f rotAxis(1,0,1,0);
|
||||
Vec3f zoom(1,0,0);
|
||||
|
||||
float obsX = (float)0, obsY = (float)0, obsZ = (float)-10, tx = (float)0, ty = (float)0;
|
||||
float thetaObs = (float)-1.570, phiObs = (float)1.570, rObs = (float)10;
|
||||
int prevX=-1,prevY=-1,prevTheta=-1000,prevPhi=-1000;
|
||||
float obsX = 0.f;
|
||||
float obsY = 0.f;
|
||||
float obsZ = -10.f;
|
||||
float tx = 0.f;
|
||||
float ty = 0.f;
|
||||
|
||||
float thetaObs = -1.570f;
|
||||
float phiObs = 1.570f;
|
||||
float rObs = 10.f;
|
||||
|
||||
int prevX = -1;
|
||||
int prevY = -1;
|
||||
int prevTheta = -1000;
|
||||
int prevPhi = -1000;
|
||||
|
||||
#ifdef HAVE_OPENGL
|
||||
struct DrawData
|
||||
|
||||
{
|
||||
{
|
||||
ogl::Arrays arr;
|
||||
ogl::Texture2D tex;
|
||||
ogl::Buffer indices;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
static void draw(void* userdata)
|
||||
@ -167,19 +170,19 @@ static void onMouse(int event, int x, int y, int flags, void*)
|
||||
{
|
||||
if (x - prevTheta<0)
|
||||
{
|
||||
thetaObs +=(float)0.02;
|
||||
thetaObs += 0.02f;
|
||||
}
|
||||
else if (x - prevTheta>0)
|
||||
{
|
||||
thetaObs -= (float)0.02;
|
||||
thetaObs -= 0.02f;
|
||||
}
|
||||
if (y - prevPhi<0)
|
||||
{
|
||||
phiObs -= (float)0.02;
|
||||
phiObs -= 0.02f;
|
||||
}
|
||||
else if (y - prevPhi>0)
|
||||
{
|
||||
phiObs += (float)0.02;
|
||||
phiObs += 0.02f;
|
||||
}
|
||||
prevTheta = x;
|
||||
prevPhi = y;
|
||||
@ -187,9 +190,9 @@ static void onMouse(int event, int x, int y, int flags, void*)
|
||||
if (event==EVENT_MOUSEWHEEL)
|
||||
{
|
||||
if (getMouseWheelDelta(flags)>0)
|
||||
rObs += (float)0.1;
|
||||
rObs += 0.1f;
|
||||
else
|
||||
rObs -= (float)0.1;
|
||||
rObs -= 0.1f;
|
||||
}
|
||||
float pi = static_cast<float>(CV_PI);
|
||||
if (thetaObs>pi)
|
||||
@ -202,11 +205,11 @@ static void onMouse(int event, int x, int y, int flags, void*)
|
||||
}
|
||||
if (phiObs>pi / 2)
|
||||
{
|
||||
phiObs = pi / 2 - (float)0.0001;
|
||||
phiObs = pi / 2 - 0.0001f;
|
||||
}
|
||||
if (phiObs<-pi / 2)
|
||||
{
|
||||
phiObs = -pi / 2 + (float)0.00001;
|
||||
phiObs = -pi / 2 + 0.00001f;
|
||||
}
|
||||
if (rObs<0)
|
||||
{
|
||||
@ -224,36 +227,37 @@ static void DrawOpenGLMSER(Mat img, Mat result)
|
||||
cvtColor(img, imgGray, COLOR_BGR2GRAY);
|
||||
else
|
||||
imgGray = img;
|
||||
|
||||
namedWindow("OpenGL", WINDOW_OPENGL);
|
||||
setMouseCallback("OpenGL", onMouse, NULL);
|
||||
|
||||
Mat_<Vec3f> vertex(1, img.cols*img.rows);
|
||||
Mat_<Vec2f> texCoords(1, img.cols*img.rows);
|
||||
for (int i = 0, nbPix = 0; i<img.rows; i++)
|
||||
{
|
||||
{
|
||||
for (int j = 0; j<img.cols; j++, nbPix++)
|
||||
{
|
||||
{
|
||||
float x = (j) / (float)img.cols;
|
||||
float y = (i) / (float)img.rows;
|
||||
vertex.at< Vec3f >(0, nbPix) = Vec3f(float(2 * (x - 0.5)), float(2 * (0.5 - y)), float(imgGray.at<uchar>(i, j) / 512.0));
|
||||
texCoords.at< Vec2f>(0, nbPix) = Vec2f(x, y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Mat_<int> indices(1, (img.rows - 1)*(6 * img.cols));
|
||||
for (int i = 1, nbPix = 0; i<img.rows; i++)
|
||||
{
|
||||
{
|
||||
for (int j = 1; j<img.cols; j++)
|
||||
{
|
||||
{
|
||||
int c = i*img.cols + j;
|
||||
indices.at<int>(0, nbPix++) = c ;
|
||||
indices.at<int>(0, nbPix++) = c;
|
||||
indices.at<int>(0, nbPix++) = c - 1;
|
||||
indices.at<int>(0, nbPix++) = c- img.cols - 1;
|
||||
indices.at<int>(0, nbPix++) = c- img.cols - 1;
|
||||
indices.at<int>(0, nbPix++) = c - img.cols - 1;
|
||||
indices.at<int>(0, nbPix++) = c - img.cols - 1;
|
||||
indices.at<int>(0, nbPix++) = c - img.cols;
|
||||
indices.at<int>(0, nbPix++) = c ;
|
||||
}
|
||||
indices.at<int>(0, nbPix++) = c;
|
||||
}
|
||||
}
|
||||
|
||||
DrawData *data = new DrawData;
|
||||
|
||||
@ -279,7 +283,7 @@ static void DrawOpenGLMSER(Mat img, Mat result)
|
||||
setOpenGlDrawCallback("OpenGL", draw, data);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
{
|
||||
updateWindow("OpenGL");
|
||||
char key = (char)waitKey(40);
|
||||
if (key == 27)
|
||||
@ -292,27 +296,28 @@ static void DrawOpenGLMSER(Mat img, Mat result)
|
||||
case '5':
|
||||
obsX = 0, obsY = 0, obsZ = -10;
|
||||
thetaObs = -pi/2, phiObs = pi/2, rObs = 10;
|
||||
tx=0;ty=0;
|
||||
tx=0; ty=0;
|
||||
break;
|
||||
case '4':
|
||||
thetaObs += (float)0.1;
|
||||
thetaObs += 0.1f;
|
||||
break;
|
||||
case '6':
|
||||
thetaObs -= (float)0.1;
|
||||
thetaObs -= 0.1f;
|
||||
break;
|
||||
case '2':
|
||||
phiObs -= (float).1;
|
||||
phiObs -= 0.1f;
|
||||
break;
|
||||
case '8':
|
||||
phiObs += (float).1;
|
||||
phiObs += 0.1f;
|
||||
break;
|
||||
case '+':
|
||||
rObs -= (float).1;
|
||||
rObs -= 0.1f;
|
||||
break;
|
||||
case '-':
|
||||
rObs += (float).1;
|
||||
rObs += 0.1f;
|
||||
break;
|
||||
}
|
||||
|
||||
if (thetaObs>pi)
|
||||
{
|
||||
thetaObs = -2 * pi + thetaObs;
|
||||
@ -320,9 +325,9 @@ static void DrawOpenGLMSER(Mat img, Mat result)
|
||||
if (thetaObs<-pi)
|
||||
thetaObs = 2 * pi + thetaObs;
|
||||
if (phiObs>pi / 2)
|
||||
phiObs = pi / 2 - (float)0.0001;
|
||||
phiObs = pi / 2 - 0.0001f;
|
||||
if (phiObs<-pi / 2)
|
||||
phiObs = -pi / 2 + (float)0.00001;
|
||||
phiObs = -pi / 2 + 0.00001f;
|
||||
if (rObs<0)
|
||||
rObs = 0;
|
||||
obsX = rObs*cos(thetaObs)*cos(phiObs);
|
||||
@ -334,67 +339,59 @@ static void DrawOpenGLMSER(Mat img, Mat result)
|
||||
}
|
||||
#endif
|
||||
|
||||
static Mat MakeSyntheticImage()
|
||||
{
|
||||
Mat img(800, 800, CV_8UC1);
|
||||
map<int, char> val;
|
||||
int fond = 0;
|
||||
img = Scalar(fond);
|
||||
val[fond] = 1;
|
||||
int width1[] = { 390, 380, 300, 290, 280, 270, 260, 250, 210, 190, 150, 100, 80, 70 };
|
||||
int color1[] = { 80, 180, 160, 140, 120, 100, 90, 110, 170, 150, 140, 100, 220 };
|
||||
Point p0(10, 10);
|
||||
int *width, *color;
|
||||
|
||||
width = width1;
|
||||
color = color1;
|
||||
for (int i = 0; i<13; i++)
|
||||
{
|
||||
// Add nested rectangles of different widths and colors to an image
|
||||
static void addNestedRectangles(Mat &img, Point p0, int* width, int *color, int n) {
|
||||
for (int i = 0; i<n; i++)
|
||||
{
|
||||
rectangle(img, Rect(p0, Size(width[i], width[i])), Scalar(color[i]), 1);
|
||||
p0 += Point((width[i] - width[i + 1]) / 2, (width[i] - width[i + 1]) / 2);
|
||||
floodFill(img, p0, Scalar(color[i]));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
int color2[] = { 81, 181, 161, 141, 121, 101, 91, 111, 171, 151, 141, 101, 221 };
|
||||
color = color2;
|
||||
p0 = Point(200, 600);
|
||||
for (int i = 0; i<13; i++)
|
||||
{
|
||||
circle(img, p0, width[i] / 2, Scalar(color[i]), 1);
|
||||
floodFill(img, p0, Scalar(color[i]));
|
||||
|
||||
}
|
||||
int color3[] = { 175,75,95,115,135,155,165,145,85,105,115,156 };
|
||||
color = color3;
|
||||
p0 = Point(410, 10);
|
||||
for (int i = 0; i<13; i++)
|
||||
{
|
||||
rectangle(img, Rect(p0, Size(width[i], width[i])), Scalar(color[i]), 1);
|
||||
p0 += Point((width[i] - width[i + 1]) / 2, (width[i] - width[i + 1]) / 2);
|
||||
floodFill(img, p0, Scalar(color[i]));
|
||||
|
||||
}
|
||||
int color4[] = { 173,73,93,113,133,153,163,143,83,103,114,154 };
|
||||
color = color4;
|
||||
|
||||
p0 = Point(600, 600);
|
||||
for (int i = 0; i<13; i++)
|
||||
// Add nested circles of different widths and colors to an image
|
||||
static void addNestedCircles(Mat &img, Point p0, int *width, int *color, int n) {
|
||||
for (int i = 0; i<n; i++)
|
||||
{
|
||||
circle(img, p0, width[i] / 2, Scalar(color[i]), 1);
|
||||
floodFill(img, p0, Scalar(color[i]));
|
||||
}
|
||||
}
|
||||
|
||||
static Mat MakeSyntheticImage()
|
||||
{
|
||||
const int fond = 0;
|
||||
|
||||
Mat img(800, 800, CV_8UC1);
|
||||
img = Scalar(fond);
|
||||
|
||||
int width[] = { 390, 380, 300, 290, 280, 270, 260, 250, 210, 190, 150, 100, 80, 70 };
|
||||
|
||||
int color1[] = { 80, 180, 160, 140, 120, 100, 90, 110, 170, 150, 140, 100, 220 };
|
||||
int color2[] = { 81, 181, 161, 141, 121, 101, 91, 111, 171, 151, 141, 101, 221 };
|
||||
int color3[] = { 175, 75, 95, 115, 135, 155, 165, 145, 85, 105, 115, 155, 35 };
|
||||
int color4[] = { 173, 73, 93, 113, 133, 153, 163, 143, 83, 103, 113, 153, 33 };
|
||||
|
||||
addNestedRectangles(img, Point(10, 10), width, color1, 13);
|
||||
addNestedCircles(img, Point(200, 600), width, color2, 13);
|
||||
|
||||
addNestedRectangles(img, Point(410, 10), width, color3, 13);
|
||||
addNestedCircles(img, Point(600, 600), width, color4, 13);
|
||||
|
||||
int histSize = 256;
|
||||
float range[] = { 0, 256 };
|
||||
const float* histRange[] = { range };
|
||||
Mat hist;
|
||||
|
||||
// we compute the histogram
|
||||
calcHist(&img, 1, 0, Mat(), hist, 1, &histSize, histRange, true, false);
|
||||
|
||||
cout << "****************Maximal region************************\n";
|
||||
for (int i = 0; i < hist.rows ; i++)
|
||||
for (int i = 0; i < hist.rows; i++)
|
||||
{
|
||||
if (hist.at<float>(i, 0)!=0)
|
||||
{
|
||||
cout << "h" << i << "=\t" << hist.at<float>(i, 0) << "\n";
|
||||
cout << "h" << setw(3) << left << i << "\t=\t" << hist.at<float>(i, 0) << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
@ -403,68 +400,60 @@ static Mat MakeSyntheticImage()
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
vector<String> fileName;
|
||||
Mat imgOrig,img;
|
||||
Size blurSize(5,5);
|
||||
Mat imgOrig, img;
|
||||
Size blurSize(5, 5);
|
||||
cv::CommandLineParser parser(argc, argv, "{ help h | | }{ @input | | }");
|
||||
if (parser.has("help"))
|
||||
{
|
||||
help();
|
||||
return 0;
|
||||
}
|
||||
|
||||
string input = parser.get<string>("@input");
|
||||
if (!input.empty())
|
||||
{
|
||||
fileName.push_back(input);
|
||||
imgOrig = imread(fileName[0], IMREAD_GRAYSCALE);
|
||||
imgOrig = imread(input, IMREAD_GRAYSCALE);
|
||||
blur(imgOrig, img, blurSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
fileName.push_back("SyntheticImage.bmp");
|
||||
imgOrig = MakeSyntheticImage();
|
||||
img=imgOrig;
|
||||
img = imgOrig;
|
||||
}
|
||||
|
||||
MSERParams pDefaultMSER;
|
||||
// Descriptor array MSER
|
||||
vector<String> typeDesc;
|
||||
// Param array for MSER
|
||||
vector<MSERParams> pMSER;
|
||||
vector<MSERParams>::iterator itMSER;
|
||||
|
||||
// Color palette
|
||||
vector<Vec3b> palette;
|
||||
for (int i = 0; i<65536; i++)
|
||||
vector<Vec3b> palette;
|
||||
for (int i = 0; i<=numeric_limits<uint16_t>::max(); i++)
|
||||
palette.push_back(Vec3b((uchar)rand(), (uchar)rand(), (uchar)rand()));
|
||||
|
||||
help();
|
||||
|
||||
MSERParams params;
|
||||
|
||||
params.delta = 10;
|
||||
params.minArea = 100;
|
||||
params.maxArea = 5000;
|
||||
params.maxVariation = 2;
|
||||
params.minDiversity = 0;
|
||||
params.pass2Only = true;
|
||||
|
||||
typeDesc.push_back("MSER");
|
||||
pMSER.push_back(pDefaultMSER);
|
||||
pMSER.back().delta = 10;
|
||||
pMSER.back().minArea = 100;
|
||||
pMSER.back().maxArea = 5000;
|
||||
pMSER.back().maxVariation = 2;
|
||||
pMSER.back().minDiversity = 0;
|
||||
pMSER.back().pass2Only = true;
|
||||
pMSER.push_back(params);
|
||||
|
||||
params.pass2Only = false;
|
||||
typeDesc.push_back("MSER");
|
||||
pMSER.push_back(pDefaultMSER);
|
||||
pMSER.back().delta = 10;
|
||||
pMSER.back().minArea = 100;
|
||||
pMSER.back().maxArea = 5000;
|
||||
pMSER.back().maxVariation = 2;
|
||||
pMSER.back().minDiversity = 0;
|
||||
pMSER.back().pass2Only = false;
|
||||
pMSER.push_back(params);
|
||||
|
||||
params.delta = 100;
|
||||
typeDesc.push_back("MSER");
|
||||
pMSER.push_back(pDefaultMSER);
|
||||
pMSER.back().delta = 100;
|
||||
pMSER.back().minArea = 100;
|
||||
pMSER.back().maxArea = 5000;
|
||||
pMSER.back().maxVariation = 2;
|
||||
pMSER.back().minDiversity = 0;
|
||||
pMSER.back().pass2Only = false;
|
||||
itMSER = pMSER.begin();
|
||||
vector<double> desMethCmp;
|
||||
pMSER.push_back(params);
|
||||
|
||||
vector<MSERParams>::iterator itMSER = pMSER.begin();
|
||||
Ptr<Feature2D> b;
|
||||
String label;
|
||||
// Descriptor loop
|
||||
@ -473,14 +462,14 @@ int main(int argc, char *argv[])
|
||||
for (itDesc = typeDesc.begin(); itDesc != typeDesc.end(); ++itDesc)
|
||||
{
|
||||
vector<KeyPoint> keyImg1;
|
||||
if (*itDesc == "MSER"){
|
||||
if (*itDesc == "MSER")
|
||||
{
|
||||
if (img.type() == CV_8UC3)
|
||||
{
|
||||
b = MSER::create(itMSER->delta, itMSER->minArea, itMSER->maxArea, itMSER->maxVariation, itMSER->minDiversity, itMSER->maxEvolution,
|
||||
itMSER->areaThreshold, itMSER->minMargin, itMSER->edgeBlurSize);
|
||||
label = Legende(*itMSER);
|
||||
++itMSER;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -490,6 +479,7 @@ int main(int argc, char *argv[])
|
||||
++itMSER;
|
||||
}
|
||||
}
|
||||
|
||||
if (img.type()==CV_8UC3)
|
||||
{
|
||||
img.copyTo(result);
|
||||
@ -505,36 +495,37 @@ int main(int argc, char *argv[])
|
||||
try
|
||||
{
|
||||
// We can detect regions using detectRegions method
|
||||
vector<KeyPoint> keyImg;
|
||||
vector<Rect> zone;
|
||||
vector<vector <Point> > region;
|
||||
Mat desc;
|
||||
vector<KeyPoint> keyImg;
|
||||
vector<Rect> zone;
|
||||
vector<vector <Point> > region;
|
||||
Mat desc;
|
||||
|
||||
if (b.dynamicCast<MSER>() != NULL)
|
||||
{
|
||||
Ptr<MSER> sbd = b.dynamicCast<MSER>();
|
||||
sbd->detectRegions(img, region, zone);
|
||||
int i = 0;
|
||||
//result = Scalar(0, 0, 0);
|
||||
int nbPixelInMSER=0;
|
||||
for (vector<vector <Point> >::iterator itr = region.begin(); itr != region.end(); ++itr, ++i)
|
||||
for (vector<vector <Point> >::iterator itr = region.begin(); itr != region.end(); ++itr)
|
||||
{
|
||||
for (vector <Point>::iterator itp = region[i].begin(); itp != region[i].end(); ++itp)
|
||||
for (vector <Point>::iterator itp = itr->begin(); itp != itr->end(); ++itp)
|
||||
{
|
||||
// all pixels belonging to region become blue
|
||||
result.at<Vec3b>(itp->y, itp->x) = Vec3b(128, 0, 0);
|
||||
nbPixelInMSER++;
|
||||
}
|
||||
}
|
||||
cout << "Number of MSER region " << region.size()<<" Number of pixels in all MSER region : "<<nbPixelInMSER<<"\n";
|
||||
cout << "Number of MSER region: " << region.size() << "; Number of pixels in all MSER region: " << nbPixelInMSER << "\n";
|
||||
}
|
||||
namedWindow(*itDesc + label, WINDOW_AUTOSIZE);
|
||||
imshow(*itDesc + label, result);
|
||||
|
||||
const string winName = *itDesc + label;
|
||||
namedWindow(winName, WINDOW_AUTOSIZE);
|
||||
imshow(winName, result);
|
||||
imshow("Original", img);
|
||||
}
|
||||
catch (Exception& e)
|
||||
{
|
||||
cout << "Feature : " << *itDesc << "\n";
|
||||
cout << "Feature: " << *itDesc << "\n";
|
||||
cout << e.msg << endl;
|
||||
}
|
||||
#ifdef HAVE_OPENGL
|
||||
|
@ -208,12 +208,18 @@ for label in ['ClassPredictor', 'BoxEncodingPredictor']:
|
||||
graph_def.node.extend([flatten])
|
||||
addConcatNode('%s/concat' % label, concatInputs, 'concat/axis_flatten')
|
||||
|
||||
idx = 0
|
||||
for node in graph_def.node:
|
||||
if node.name == ('BoxPredictor_%d/BoxEncodingPredictor/Conv2D' % idx):
|
||||
text_format.Merge('b: true', node.attr["loc_pred_transposed"])
|
||||
idx += 1
|
||||
assert(idx == args.num_layers)
|
||||
|
||||
# Add layers that generate anchors (bounding boxes proposals).
|
||||
scales = [args.min_scale + (args.max_scale - args.min_scale) * i / (args.num_layers - 1)
|
||||
for i in range(args.num_layers)] + [1.0]
|
||||
|
||||
priorBoxes = []
|
||||
addConstNode('reshape_prior_boxes_to_4d', [1, 2, -1, 1])
|
||||
for i in range(args.num_layers):
|
||||
priorBox = NodeDef()
|
||||
priorBox.name = 'PriorBox_%d' % i
|
||||
@ -240,18 +246,9 @@ for i in range(args.num_layers):
|
||||
text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"])
|
||||
|
||||
graph_def.node.extend([priorBox])
|
||||
priorBoxes.append(priorBox.name)
|
||||
|
||||
# Reshape from 1x2xN to 1x2xNx1
|
||||
reshape = NodeDef()
|
||||
reshape.name = priorBox.name + '/4d'
|
||||
reshape.op = 'Reshape'
|
||||
reshape.input.append(priorBox.name)
|
||||
reshape.input.append('reshape_prior_boxes_to_4d')
|
||||
graph_def.node.extend([reshape])
|
||||
|
||||
priorBoxes.append(reshape.name)
|
||||
|
||||
addConcatNode('PriorBox/concat', priorBoxes, 'PriorBox/concat/axis')
|
||||
addConcatNode('PriorBox/concat', priorBoxes, 'concat/axis_flatten')
|
||||
|
||||
# Sigmoid for classes predictions and DetectionOutput layer
|
||||
sigmoid = NodeDef()
|
||||
@ -276,7 +273,6 @@ text_format.Merge('i: 100', detectionOut.attr['top_k'])
|
||||
text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type'])
|
||||
text_format.Merge('i: 100', detectionOut.attr['keep_top_k'])
|
||||
text_format.Merge('f: 0.01', detectionOut.attr['confidence_threshold'])
|
||||
text_format.Merge('b: true', detectionOut.attr['loc_pred_transposed'])
|
||||
|
||||
graph_def.node.extend([detectionOut])
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user