opencv/modules/core/src/stat.cpp

4496 lines
153 KiB
C++
Raw Normal View History

/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
2015-01-12 15:59:30 +08:00
// Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#include <climits>
2014-04-07 20:45:35 +08:00
#include <limits>
#include "opencv2/core/hal/intrin.hpp"
2014-08-01 22:11:20 +08:00
#include "opencl_kernels_core.hpp"
#include "opencv2/core/openvx/ovx_defs.hpp"
namespace cv
{
/****************************************************************************************\
* sum *
\****************************************************************************************/
2014-09-22 03:38:02 +08:00
template <typename T, typename ST>
struct Sum_SIMD
{
int operator () (const T *, const uchar *, ST *, int, int) const
{
return 0;
}
};
template <typename ST, typename DT>
inline void addChannels(DT * dst, ST * buf, int cn)
{
for (int i = 0; i < 4; ++i)
dst[i % cn] += buf[i];
}
2015-01-12 15:59:31 +08:00
#if CV_SSE2
template <>
struct Sum_SIMD<schar, int>
{
int operator () (const schar * src0, const uchar * mask, int * dst, int len, int cn) const
{
if (mask || (cn != 1 && cn != 2 && cn != 4) || !USE_SSE2)
return 0;
int x = 0;
__m128i v_zero = _mm_setzero_si128(), v_sum = v_zero;
for ( ; x <= len - 16; x += 16)
{
__m128i v_src = _mm_loadu_si128((const __m128i *)(src0 + x));
__m128i v_half = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src), 8);
v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_half), 16));
v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_half), 16));
v_half = _mm_srai_epi16(_mm_unpackhi_epi8(v_zero, v_src), 8);
v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_half), 16));
v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_half), 16));
}
for ( ; x <= len - 8; x += 8)
{
__m128i v_src = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, _mm_loadl_epi64((__m128i const *)(src0 + x))), 8);
v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src), 16));
v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src), 16));
}
int CV_DECL_ALIGNED(16) ar[4];
_mm_store_si128((__m128i*)ar, v_sum);
addChannels(dst, ar, cn);
2015-01-12 15:59:31 +08:00
return x / cn;
}
};
template <>
struct Sum_SIMD<int, double>
{
int operator () (const int * src0, const uchar * mask, double * dst, int len, int cn) const
{
if (mask || (cn != 1 && cn != 2 && cn != 4) || !USE_SSE2)
return 0;
int x = 0;
__m128d v_zero = _mm_setzero_pd(), v_sum0 = v_zero, v_sum1 = v_zero;
for ( ; x <= len - 4; x += 4)
{
__m128i v_src = _mm_loadu_si128((__m128i const *)(src0 + x));
v_sum0 = _mm_add_pd(v_sum0, _mm_cvtepi32_pd(v_src));
v_sum1 = _mm_add_pd(v_sum1, _mm_cvtepi32_pd(_mm_srli_si128(v_src, 8)));
}
double CV_DECL_ALIGNED(16) ar[4];
_mm_store_pd(ar, v_sum0);
_mm_store_pd(ar + 2, v_sum1);
addChannels(dst, ar, cn);
2015-01-12 15:59:31 +08:00
return x / cn;
}
};
template <>
struct Sum_SIMD<float, double>
{
int operator () (const float * src0, const uchar * mask, double * dst, int len, int cn) const
{
if (mask || (cn != 1 && cn != 2 && cn != 4) || !USE_SSE2)
return 0;
int x = 0;
__m128d v_zero = _mm_setzero_pd(), v_sum0 = v_zero, v_sum1 = v_zero;
for ( ; x <= len - 4; x += 4)
{
__m128 v_src = _mm_loadu_ps(src0 + x);
v_sum0 = _mm_add_pd(v_sum0, _mm_cvtps_pd(v_src));
v_src = _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8));
v_sum1 = _mm_add_pd(v_sum1, _mm_cvtps_pd(v_src));
}
double CV_DECL_ALIGNED(16) ar[4];
_mm_store_pd(ar, v_sum0);
_mm_store_pd(ar + 2, v_sum1);
addChannels(dst, ar, cn);
2015-01-12 15:59:31 +08:00
return x / cn;
}
};
#elif CV_NEON
2014-09-22 03:38:02 +08:00
template <>
struct Sum_SIMD<uchar, int>
{
int operator () (const uchar * src0, const uchar * mask, int * dst, int len, int cn) const
{
if (mask || (cn != 1 && cn != 2 && cn != 4))
return 0;
int x = 0;
uint32x4_t v_sum = vdupq_n_u32(0u);
for ( ; x <= len - 16; x += 16)
{
uint8x16_t v_src = vld1q_u8(src0 + x);
uint16x8_t v_half = vmovl_u8(vget_low_u8(v_src));
2014-09-30 22:34:48 +08:00
v_sum = vaddw_u16(v_sum, vget_low_u16(v_half));
v_sum = vaddw_u16(v_sum, vget_high_u16(v_half));
2014-09-22 03:38:02 +08:00
v_half = vmovl_u8(vget_high_u8(v_src));
2014-09-30 22:34:48 +08:00
v_sum = vaddw_u16(v_sum, vget_low_u16(v_half));
v_sum = vaddw_u16(v_sum, vget_high_u16(v_half));
2014-09-22 03:38:02 +08:00
}
for ( ; x <= len - 8; x += 8)
{
uint16x8_t v_src = vmovl_u8(vld1_u8(src0 + x));
2014-09-30 22:34:48 +08:00
v_sum = vaddw_u16(v_sum, vget_low_u16(v_src));
v_sum = vaddw_u16(v_sum, vget_high_u16(v_src));
2014-09-22 03:38:02 +08:00
}
unsigned int CV_DECL_ALIGNED(16) ar[4];
vst1q_u32(ar, v_sum);
addChannels(dst, ar, cn);
2014-09-22 03:38:02 +08:00
return x / cn;
}
};
template <>
struct Sum_SIMD<schar, int>
{
int operator () (const schar * src0, const uchar * mask, int * dst, int len, int cn) const
{
if (mask || (cn != 1 && cn != 2 && cn != 4))
return 0;
int x = 0;
int32x4_t v_sum = vdupq_n_s32(0);
for ( ; x <= len - 16; x += 16)
{
int8x16_t v_src = vld1q_s8(src0 + x);
int16x8_t v_half = vmovl_s8(vget_low_s8(v_src));
2014-09-30 22:34:48 +08:00
v_sum = vaddw_s16(v_sum, vget_low_s16(v_half));
v_sum = vaddw_s16(v_sum, vget_high_s16(v_half));
2014-09-22 03:38:02 +08:00
v_half = vmovl_s8(vget_high_s8(v_src));
2014-09-30 22:34:48 +08:00
v_sum = vaddw_s16(v_sum, vget_low_s16(v_half));
v_sum = vaddw_s16(v_sum, vget_high_s16(v_half));
2014-09-22 03:38:02 +08:00
}
for ( ; x <= len - 8; x += 8)
{
int16x8_t v_src = vmovl_s8(vld1_s8(src0 + x));
2014-09-30 22:34:48 +08:00
v_sum = vaddw_s16(v_sum, vget_low_s16(v_src));
v_sum = vaddw_s16(v_sum, vget_high_s16(v_src));
2014-09-22 03:38:02 +08:00
}
int CV_DECL_ALIGNED(16) ar[4];
vst1q_s32(ar, v_sum);
addChannels(dst, ar, cn);
2014-09-22 03:38:02 +08:00
return x / cn;
}
};
template <>
struct Sum_SIMD<ushort, int>
{
int operator () (const ushort * src0, const uchar * mask, int * dst, int len, int cn) const
{
if (mask || (cn != 1 && cn != 2 && cn != 4))
return 0;
int x = 0;
uint32x4_t v_sum = vdupq_n_u32(0u);
for ( ; x <= len - 8; x += 8)
{
uint16x8_t v_src = vld1q_u16(src0 + x);
2014-09-30 22:34:48 +08:00
v_sum = vaddw_u16(v_sum, vget_low_u16(v_src));
v_sum = vaddw_u16(v_sum, vget_high_u16(v_src));
2014-09-22 03:38:02 +08:00
}
for ( ; x <= len - 4; x += 4)
2014-09-30 22:34:48 +08:00
v_sum = vaddw_u16(v_sum, vld1_u16(src0 + x));
2014-09-28 21:36:07 +08:00
2014-09-22 03:38:02 +08:00
unsigned int CV_DECL_ALIGNED(16) ar[4];
vst1q_u32(ar, v_sum);
addChannels(dst, ar, cn);
2014-09-22 03:38:02 +08:00
return x / cn;
}
};
template <>
struct Sum_SIMD<short, int>
{
int operator () (const short * src0, const uchar * mask, int * dst, int len, int cn) const
{
if (mask || (cn != 1 && cn != 2 && cn != 4))
return 0;
int x = 0;
int32x4_t v_sum = vdupq_n_s32(0u);
for ( ; x <= len - 8; x += 8)
{
int16x8_t v_src = vld1q_s16(src0 + x);
2014-09-30 22:34:48 +08:00
v_sum = vaddw_s16(v_sum, vget_low_s16(v_src));
v_sum = vaddw_s16(v_sum, vget_high_s16(v_src));
2014-09-22 03:38:02 +08:00
}
for ( ; x <= len - 4; x += 4)
2014-09-30 22:34:48 +08:00
v_sum = vaddw_s16(v_sum, vld1_s16(src0 + x));
2014-09-28 21:36:07 +08:00
2014-09-22 03:38:02 +08:00
int CV_DECL_ALIGNED(16) ar[4];
vst1q_s32(ar, v_sum);
addChannels(dst, ar, cn);
2014-09-22 03:38:02 +08:00
return x / cn;
}
};
#endif
template<typename T, typename ST>
static int sum_(const T* src0, const uchar* mask, ST* dst, int len, int cn )
{
const T* src = src0;
if( !mask )
{
2014-09-22 03:38:02 +08:00
Sum_SIMD<T, ST> vop;
int i = vop(src0, mask, dst, len, cn), k = cn % 4;
src += i * cn;
if( k == 1 )
{
ST s0 = dst[0];
2012-02-10 14:05:04 +08:00
2012-06-08 01:21:29 +08:00
#if CV_ENABLE_UNROLLED
2012-02-10 14:05:04 +08:00
for(; i <= len - 4; i += 4, src += cn*4 )
s0 += src[0] + src[cn] + src[cn*2] + src[cn*3];
2012-02-10 14:05:04 +08:00
#endif
for( ; i < len; i++, src += cn )
s0 += src[0];
dst[0] = s0;
}
else if( k == 2 )
{
ST s0 = dst[0], s1 = dst[1];
2014-09-22 03:38:02 +08:00
for( ; i < len; i++, src += cn )
{
s0 += src[0];
s1 += src[1];
}
dst[0] = s0;
dst[1] = s1;
}
else if( k == 3 )
{
ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
2014-09-22 03:38:02 +08:00
for( ; i < len; i++, src += cn )
{
s0 += src[0];
s1 += src[1];
s2 += src[2];
}
dst[0] = s0;
dst[1] = s1;
dst[2] = s2;
}
2012-06-08 01:21:29 +08:00
for( ; k < cn; k += 4 )
{
2014-09-22 03:38:02 +08:00
src = src0 + i*cn + k;
ST s0 = dst[k], s1 = dst[k+1], s2 = dst[k+2], s3 = dst[k+3];
2014-09-22 03:38:02 +08:00
for( ; i < len; i++, src += cn )
{
s0 += src[0]; s1 += src[1];
s2 += src[2]; s3 += src[3];
}
dst[k] = s0;
dst[k+1] = s1;
dst[k+2] = s2;
dst[k+3] = s3;
}
return len;
}
2012-06-08 01:21:29 +08:00
int i, nzm = 0;
if( cn == 1 )
{
ST s = dst[0];
for( i = 0; i < len; i++ )
if( mask[i] )
{
s += src[i];
nzm++;
}
dst[0] = s;
}
else if( cn == 3 )
2010-10-12 20:31:40 +08:00
{
ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
for( i = 0; i < len; i++, src += 3 )
if( mask[i] )
{
s0 += src[0];
s1 += src[1];
s2 += src[2];
nzm++;
}
dst[0] = s0;
dst[1] = s1;
dst[2] = s2;
2010-10-12 20:31:40 +08:00
}
else
{
for( i = 0; i < len; i++, src += cn )
if( mask[i] )
{
int k = 0;
2012-06-08 01:21:29 +08:00
#if CV_ENABLE_UNROLLED
for( ; k <= cn - 4; k += 4 )
{
ST s0, s1;
s0 = dst[k] + src[k];
s1 = dst[k+1] + src[k+1];
dst[k] = s0; dst[k+1] = s1;
s0 = dst[k+2] + src[k+2];
s1 = dst[k+3] + src[k+3];
dst[k+2] = s0; dst[k+3] = s1;
}
2012-02-10 14:05:04 +08:00
#endif
for( ; k < cn; k++ )
dst[k] += src[k];
nzm++;
}
}
return nzm;
}
2011-12-22 20:51:01 +08:00
static int sum8u( const uchar* src, const uchar* mask, int* dst, int len, int cn )
{ return sum_(src, mask, dst, len, cn); }
static int sum8s( const schar* src, const uchar* mask, int* dst, int len, int cn )
{ return sum_(src, mask, dst, len, cn); }
static int sum16u( const ushort* src, const uchar* mask, int* dst, int len, int cn )
{ return sum_(src, mask, dst, len, cn); }
static int sum16s( const short* src, const uchar* mask, int* dst, int len, int cn )
{ return sum_(src, mask, dst, len, cn); }
static int sum32s( const int* src, const uchar* mask, double* dst, int len, int cn )
{ return sum_(src, mask, dst, len, cn); }
static int sum32f( const float* src, const uchar* mask, double* dst, int len, int cn )
{ return sum_(src, mask, dst, len, cn); }
static int sum64f( const double* src, const uchar* mask, double* dst, int len, int cn )
{ return sum_(src, mask, dst, len, cn); }
typedef int (*SumFunc)(const uchar*, const uchar* mask, uchar*, int, int);
static SumFunc getSumFunc(int depth)
{
static SumFunc sumTab[] =
{
(SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s,
(SumFunc)sum16u, (SumFunc)sum16s,
(SumFunc)sum32s,
(SumFunc)GET_OPTIMIZED(sum32f), (SumFunc)sum64f,
0
};
return sumTab[depth];
}
2011-12-22 20:51:01 +08:00
template<typename T>
static int countNonZero_(const T* src, int len )
{
2012-02-10 14:05:04 +08:00
int i=0, nz = 0;
2012-06-08 01:21:29 +08:00
#if CV_ENABLE_UNROLLED
2012-02-10 14:05:04 +08:00
for(; i <= len - 4; i += 4 )
nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0);
2012-02-10 14:05:04 +08:00
#endif
for( ; i < len; i++ )
nz += src[i] != 0;
return nz;
}
static int countNonZero8u( const uchar* src, int len )
{
int i=0, nz = 0;
#if CV_SSE2
if(USE_SSE2)//5x-6x
{
__m128i v_zero = _mm_setzero_si128();
__m128i sum = _mm_setzero_si128();
2012-10-17 07:18:30 +08:00
for (; i<=len-16; i+=16)
{
__m128i r0 = _mm_loadu_si128((const __m128i*)(src+i));
sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_cmpeq_epi8(r0, v_zero)), v_zero));
}
nz = i - _mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum)));
}
2014-09-28 21:36:07 +08:00
#elif CV_NEON
int len0 = len & -16, blockSize1 = (1 << 8) - 16, blockSize0 = blockSize1 << 6;
uint32x4_t v_nz = vdupq_n_u32(0u);
uint8x16_t v_zero = vdupq_n_u8(0), v_1 = vdupq_n_u8(1);
const uchar * src0 = src;
while( i < len0 )
{
int blockSizei = std::min(len0 - i, blockSize0), j = 0;
while (j < blockSizei)
{
int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
uint8x16_t v_pz = v_zero;
for( ; k <= blockSizej - 16; k += 16 )
v_pz = vaddq_u8(v_pz, vandq_u8(vceqq_u8(vld1q_u8(src0 + k), v_zero), v_1));
uint16x8_t v_p1 = vmovl_u8(vget_low_u8(v_pz)), v_p2 = vmovl_u8(vget_high_u8(v_pz));
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_p1), vget_high_u16(v_p1)), v_nz);
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_p2), vget_high_u16(v_p2)), v_nz);
src0 += blockSizej;
j += blockSizej;
}
i += blockSizei;
}
CV_DECL_ALIGNED(16) unsigned int buf[4];
vst1q_u32(buf, v_nz);
nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
#endif
for( ; i < len; i++ )
nz += src[i] != 0;
return nz;
}
static int countNonZero16u( const ushort* src, int len )
2014-09-28 21:36:07 +08:00
{
int i = 0, nz = 0;
2015-01-12 15:59:28 +08:00
#if CV_SSE2
if (USE_SSE2)
{
__m128i v_zero = _mm_setzero_si128 ();
__m128i sum = _mm_setzero_si128();
2015-01-12 15:59:28 +08:00
for ( ; i <= len - 8; i += 8)
{
__m128i r0 = _mm_loadu_si128((const __m128i*)(src + i));
sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_cmpeq_epi16(r0, v_zero)), v_zero));
2015-01-12 15:59:28 +08:00
}
nz = i - (_mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum))) >> 1);
2015-01-12 15:59:28 +08:00
src += i;
}
#elif CV_NEON
2014-09-28 21:36:07 +08:00
int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
uint32x4_t v_nz = vdupq_n_u32(0u);
uint16x8_t v_zero = vdupq_n_u16(0), v_1 = vdupq_n_u16(1);
while( i < len0 )
{
int blockSizei = std::min(len0 - i, blockSize0), j = 0;
while (j < blockSizei)
{
int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
uint16x8_t v_pz = v_zero;
for( ; k <= blockSizej - 8; k += 8 )
v_pz = vaddq_u16(v_pz, vandq_u16(vceqq_u16(vld1q_u16(src + k), v_zero), v_1));
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
src += blockSizej;
j += blockSizej;
}
i += blockSizei;
}
CV_DECL_ALIGNED(16) unsigned int buf[4];
vst1q_u32(buf, v_nz);
nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
#endif
return nz + countNonZero_(src, len - i);
}
static int countNonZero32s( const int* src, int len )
2014-09-28 21:36:07 +08:00
{
int i = 0, nz = 0;
2015-01-12 15:59:28 +08:00
#if CV_SSE2
if (USE_SSE2)
{
__m128i v_zero = _mm_setzero_si128 ();
__m128i sum = _mm_setzero_si128();
2015-01-12 15:59:28 +08:00
for ( ; i <= len - 4; i += 4)
2015-01-12 15:59:28 +08:00
{
__m128i r0 = _mm_loadu_si128((const __m128i*)(src + i));
sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_cmpeq_epi32(r0, v_zero)), v_zero));
2015-01-12 15:59:28 +08:00
}
nz = i - (_mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum))) >> 2);
2015-01-12 15:59:28 +08:00
src += i;
}
#elif CV_NEON
2014-09-28 21:36:07 +08:00
int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
uint32x4_t v_nz = vdupq_n_u32(0u);
int32x4_t v_zero = vdupq_n_s32(0.0f);
uint16x8_t v_1 = vdupq_n_u16(1u), v_zerou = vdupq_n_u16(0u);
while( i < len0 )
{
int blockSizei = std::min(len0 - i, blockSize0), j = 0;
while (j < blockSizei)
{
int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
uint16x8_t v_pz = v_zerou;
for( ; k <= blockSizej - 8; k += 8 )
v_pz = vaddq_u16(v_pz, vandq_u16(vcombine_u16(vmovn_u32(vceqq_s32(vld1q_s32(src + k), v_zero)),
vmovn_u32(vceqq_s32(vld1q_s32(src + k + 4), v_zero))), v_1));
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
src += blockSizej;
j += blockSizej;
}
i += blockSizei;
}
CV_DECL_ALIGNED(16) unsigned int buf[4];
vst1q_u32(buf, v_nz);
nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
#endif
return nz + countNonZero_(src, len - i);
}
static int countNonZero32f( const float* src, int len )
2014-09-28 21:36:07 +08:00
{
int i = 0, nz = 0;
2015-01-12 15:59:28 +08:00
#if CV_SSE2
if (USE_SSE2)
{
__m128 v_zero_f = _mm_setzero_ps();
__m128i v_zero = _mm_setzero_si128 ();
__m128i sum = _mm_setzero_si128();
2015-01-12 15:59:28 +08:00
for ( ; i <= len - 4; i += 4)
2015-01-12 15:59:28 +08:00
{
__m128 r0 = _mm_loadu_ps(src + i);
sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_castps_si128(_mm_cmpeq_ps(r0, v_zero_f))), v_zero));
2015-01-12 15:59:28 +08:00
}
nz = i - (_mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum))) >> 2);
2015-01-12 15:59:28 +08:00
src += i;
}
#elif CV_NEON
2014-09-28 21:36:07 +08:00
int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
uint32x4_t v_nz = vdupq_n_u32(0u);
float32x4_t v_zero = vdupq_n_f32(0.0f);
uint16x8_t v_1 = vdupq_n_u16(1u), v_zerou = vdupq_n_u16(0u);
while( i < len0 )
{
int blockSizei = std::min(len0 - i, blockSize0), j = 0;
while (j < blockSizei)
{
int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
uint16x8_t v_pz = v_zerou;
for( ; k <= blockSizej - 8; k += 8 )
v_pz = vaddq_u16(v_pz, vandq_u16(vcombine_u16(vmovn_u32(vceqq_f32(vld1q_f32(src + k), v_zero)),
vmovn_u32(vceqq_f32(vld1q_f32(src + k + 4), v_zero))), v_1));
v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
src += blockSizej;
j += blockSizej;
}
i += blockSizei;
}
CV_DECL_ALIGNED(16) unsigned int buf[4];
vst1q_u32(buf, v_nz);
nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
#endif
return nz + countNonZero_(src, len - i);
}
static int countNonZero64f( const double* src, int len )
2015-01-12 15:59:31 +08:00
{
return countNonZero_(src, len);
2015-01-12 15:59:28 +08:00
}
typedef int (*CountNonZeroFunc)(const uchar*, int);
2011-12-22 20:51:01 +08:00
static CountNonZeroFunc getCountNonZeroTab(int depth)
{
static CountNonZeroFunc countNonZeroTab[] =
{
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
(CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
};
return countNonZeroTab[depth];
}
2011-12-22 20:51:01 +08:00
2015-01-12 15:59:31 +08:00
template <typename T, typename ST, typename SQT>
struct SumSqr_SIMD
{
int operator () (const T *, const uchar *, ST *, SQT *, int, int) const
{
return 0;
}
};
template <typename T>
inline void addSqrChannels(T * sum, T * sqsum, T * buf, int cn)
{
for (int i = 0; i < 4; ++i)
{
sum[i % cn] += buf[i];
sqsum[i % cn] += buf[4 + i];
}
}
2015-01-12 15:59:31 +08:00
#if CV_SSE2
template <>
struct SumSqr_SIMD<uchar, int, int>
{
int operator () (const uchar * src0, const uchar * mask, int * sum, int * sqsum, int len, int cn) const
{
if (mask || (cn != 1 && cn != 2) || !USE_SSE2)
return 0;
int x = 0;
__m128i v_zero = _mm_setzero_si128(), v_sum = v_zero, v_sqsum = v_zero;
2016-07-29 19:10:14 +08:00
const int len_16 = len & ~15;
2015-01-12 15:59:31 +08:00
2016-07-29 19:10:14 +08:00
for ( ; x <= len_16 - 16; )
2015-01-12 15:59:31 +08:00
{
2016-07-29 19:10:14 +08:00
const int len_tmp = min(x + 2048, len_16);
__m128i v_sum_tmp = v_zero;
for ( ; x <= len_tmp - 16; x += 16)
{
__m128i v_src = _mm_loadu_si128((const __m128i *)(src0 + x));
__m128i v_half_0 = _mm_unpacklo_epi8(v_src, v_zero);
__m128i v_half_1 = _mm_unpackhi_epi8(v_src, v_zero);
v_sum_tmp = _mm_add_epi16(v_sum_tmp, _mm_add_epi16(v_half_0, v_half_1));
__m128i v_half_2 = _mm_unpacklo_epi16(v_half_0, v_half_1);
__m128i v_half_3 = _mm_unpackhi_epi16(v_half_0, v_half_1);
v_sqsum = _mm_add_epi32(v_sqsum, _mm_madd_epi16(v_half_2, v_half_2));
v_sqsum = _mm_add_epi32(v_sqsum, _mm_madd_epi16(v_half_3, v_half_3));
}
v_sum = _mm_add_epi32(v_sum, _mm_unpacklo_epi16(v_sum_tmp, v_zero));
v_sum = _mm_add_epi32(v_sum, _mm_unpackhi_epi16(v_sum_tmp, v_zero));
2015-01-12 15:59:31 +08:00
}
for ( ; x <= len - 8; x += 8)
{
__m128i v_src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i const *)(src0 + x)), v_zero);
2016-07-29 19:10:14 +08:00
__m128i v_half_0 = _mm_unpackhi_epi64(v_src, v_src);
__m128i v_sum_tmp = _mm_add_epi16(v_src, v_half_0);
__m128i v_half_1 = _mm_unpacklo_epi16(v_src, v_half_0);
2015-01-12 15:59:31 +08:00
2016-07-29 19:10:14 +08:00
v_sum = _mm_add_epi32(v_sum, _mm_unpacklo_epi16(v_sum_tmp, v_zero));
v_sqsum = _mm_add_epi32(v_sqsum, _mm_madd_epi16(v_half_1, v_half_1));
2015-01-12 15:59:31 +08:00
}
int CV_DECL_ALIGNED(16) ar[8];
_mm_store_si128((__m128i*)ar, v_sum);
_mm_store_si128((__m128i*)(ar + 4), v_sqsum);
addSqrChannels(sum, sqsum, ar, cn);
2015-01-12 15:59:31 +08:00
return x / cn;
}
};
template <>
struct SumSqr_SIMD<schar, int, int>
{
int operator () (const schar * src0, const uchar * mask, int * sum, int * sqsum, int len, int cn) const
{
if (mask || (cn != 1 && cn != 2) || !USE_SSE2)
return 0;
int x = 0;
__m128i v_zero = _mm_setzero_si128(), v_sum = v_zero, v_sqsum = v_zero;
2016-07-29 22:18:02 +08:00
const int len_16 = len & ~15;
2015-01-12 15:59:31 +08:00
2016-07-29 22:18:02 +08:00
for ( ; x <= len_16 - 16; )
2015-01-12 15:59:31 +08:00
{
2016-07-29 22:18:02 +08:00
const int len_tmp = min(x + 2048, len_16);
__m128i v_sum_tmp = v_zero;
for ( ; x <= len_tmp - 16; x += 16)
{
__m128i v_src = _mm_loadu_si128((const __m128i *)(src0 + x));
__m128i v_half_0 = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src), 8);
__m128i v_half_1 = _mm_srai_epi16(_mm_unpackhi_epi8(v_zero, v_src), 8);
v_sum_tmp = _mm_add_epi16(v_sum_tmp, _mm_add_epi16(v_half_0, v_half_1));
__m128i v_half_2 = _mm_unpacklo_epi16(v_half_0, v_half_1);
__m128i v_half_3 = _mm_unpackhi_epi16(v_half_0, v_half_1);
v_sqsum = _mm_add_epi32(v_sqsum, _mm_madd_epi16(v_half_2, v_half_2));
v_sqsum = _mm_add_epi32(v_sqsum, _mm_madd_epi16(v_half_3, v_half_3));
}
v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_sum_tmp), 16));
v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_sum_tmp), 16));
2015-01-12 15:59:31 +08:00
}
for ( ; x <= len - 8; x += 8)
{
__m128i v_src = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, _mm_loadl_epi64((__m128i const *)(src0 + x))), 8);
2016-07-29 22:18:02 +08:00
__m128i v_half_0 = _mm_unpackhi_epi64(v_src, v_src);
__m128i v_sum_tmp = _mm_add_epi16(v_src, v_half_0);
__m128i v_half_1 = _mm_unpacklo_epi16(v_src, v_half_0);
2015-01-12 15:59:31 +08:00
2016-07-29 22:18:02 +08:00
v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_sum_tmp), 16));
v_sqsum = _mm_add_epi32(v_sqsum, _mm_madd_epi16(v_half_1, v_half_1));
2015-01-12 15:59:31 +08:00
}
int CV_DECL_ALIGNED(16) ar[8];
_mm_store_si128((__m128i*)ar, v_sum);
_mm_store_si128((__m128i*)(ar + 4), v_sqsum);
addSqrChannels(sum, sqsum, ar, cn);
2015-01-12 15:59:31 +08:00
return x / cn;
}
};
#endif
template<typename T, typename ST, typename SQT>
static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int len, int cn )
{
const T* src = src0;
2012-06-08 01:21:29 +08:00
if( !mask )
2010-10-12 20:31:40 +08:00
{
2015-01-12 15:59:31 +08:00
SumSqr_SIMD<T, ST, SQT> vop;
int i = vop(src0, mask, sum, sqsum, len, cn), k = cn % 4;
src += i * cn;
2012-06-08 01:21:29 +08:00
if( k == 1 )
{
ST s0 = sum[0];
SQT sq0 = sqsum[0];
2015-01-12 15:59:31 +08:00
for( ; i < len; i++, src += cn )
{
T v = src[0];
s0 += v; sq0 += (SQT)v*v;
}
sum[0] = s0;
sqsum[0] = sq0;
}
else if( k == 2 )
{
ST s0 = sum[0], s1 = sum[1];
SQT sq0 = sqsum[0], sq1 = sqsum[1];
2015-01-12 15:59:31 +08:00
for( ; i < len; i++, src += cn )
{
T v0 = src[0], v1 = src[1];
s0 += v0; sq0 += (SQT)v0*v0;
s1 += v1; sq1 += (SQT)v1*v1;
}
sum[0] = s0; sum[1] = s1;
sqsum[0] = sq0; sqsum[1] = sq1;
}
else if( k == 3 )
{
ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
2015-01-12 15:59:31 +08:00
for( ; i < len; i++, src += cn )
{
T v0 = src[0], v1 = src[1], v2 = src[2];
s0 += v0; sq0 += (SQT)v0*v0;
s1 += v1; sq1 += (SQT)v1*v1;
s2 += v2; sq2 += (SQT)v2*v2;
}
sum[0] = s0; sum[1] = s1; sum[2] = s2;
sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
}
2012-06-08 01:21:29 +08:00
for( ; k < cn; k += 4 )
2010-10-12 20:31:40 +08:00
{
src = src0 + k;
ST s0 = sum[k], s1 = sum[k+1], s2 = sum[k+2], s3 = sum[k+3];
SQT sq0 = sqsum[k], sq1 = sqsum[k+1], sq2 = sqsum[k+2], sq3 = sqsum[k+3];
2015-01-12 15:59:31 +08:00
for( ; i < len; i++, src += cn )
{
T v0, v1;
v0 = src[0], v1 = src[1];
s0 += v0; sq0 += (SQT)v0*v0;
s1 += v1; sq1 += (SQT)v1*v1;
v0 = src[2], v1 = src[3];
s2 += v0; sq2 += (SQT)v0*v0;
s3 += v1; sq3 += (SQT)v1*v1;
}
sum[k] = s0; sum[k+1] = s1;
sum[k+2] = s2; sum[k+3] = s3;
sqsum[k] = sq0; sqsum[k+1] = sq1;
sqsum[k+2] = sq2; sqsum[k+3] = sq3;
2010-10-12 20:31:40 +08:00
}
return len;
2010-10-12 20:31:40 +08:00
}
2012-06-08 01:21:29 +08:00
int i, nzm = 0;
if( cn == 1 )
{
ST s0 = sum[0];
SQT sq0 = sqsum[0];
for( i = 0; i < len; i++ )
if( mask[i] )
{
T v = src[i];
s0 += v; sq0 += (SQT)v*v;
nzm++;
}
sum[0] = s0;
sqsum[0] = sq0;
}
else if( cn == 3 )
{
ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
for( i = 0; i < len; i++, src += 3 )
if( mask[i] )
{
T v0 = src[0], v1 = src[1], v2 = src[2];
s0 += v0; sq0 += (SQT)v0*v0;
s1 += v1; sq1 += (SQT)v1*v1;
s2 += v2; sq2 += (SQT)v2*v2;
nzm++;
}
sum[0] = s0; sum[1] = s1; sum[2] = s2;
sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
}
else
{
for( i = 0; i < len; i++, src += cn )
if( mask[i] )
{
for( int k = 0; k < cn; k++ )
{
T v = src[k];
ST s = sum[k] + v;
SQT sq = sqsum[k] + (SQT)v*v;
sum[k] = s; sqsum[k] = sq;
}
nzm++;
}
}
return nzm;
2012-06-08 01:21:29 +08:00
}
static int sqsum8u( const uchar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
{ return sumsqr_(src, mask, sum, sqsum, len, cn); }
static int sqsum8s( const schar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
{ return sumsqr_(src, mask, sum, sqsum, len, cn); }
static int sqsum16u( const ushort* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
{ return sumsqr_(src, mask, sum, sqsum, len, cn); }
static int sqsum16s( const short* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
{ return sumsqr_(src, mask, sum, sqsum, len, cn); }
static int sqsum32s( const int* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
{ return sumsqr_(src, mask, sum, sqsum, len, cn); }
static int sqsum32f( const float* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
{ return sumsqr_(src, mask, sum, sqsum, len, cn); }
static int sqsum64f( const double* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
{ return sumsqr_(src, mask, sum, sqsum, len, cn); }
typedef int (*SumSqrFunc)(const uchar*, const uchar* mask, uchar*, uchar*, int, int);
static SumSqrFunc getSumSqrTab(int depth)
{
static SumSqrFunc sumSqrTab[] =
{
(SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s,
(SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0
};
return sumSqrTab[depth];
}
2014-02-05 23:10:02 +08:00
#ifdef HAVE_OPENCL
2013-12-01 18:23:07 +08:00
template <typename T> Scalar ocl_part_sum(Mat m)
{
CV_Assert(m.rows == 1);
Scalar s = Scalar::all(0);
int cn = m.channels();
const T * const ptr = m.ptr<T>(0);
for (int x = 0, w = m.cols * cn; x < w; )
for (int c = 0; c < cn; ++c, ++x)
s[c] += ptr[x];
return s;
}
2013-12-01 18:45:48 +08:00
enum { OCL_OP_SUM = 0, OCL_OP_SUM_ABS = 1, OCL_OP_SUM_SQR = 2 };
2013-12-01 18:23:07 +08:00
2014-06-07 19:51:41 +08:00
static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask = noArray(),
InputArray _src2 = noArray(), bool calc2 = false, const Scalar & res2 = Scalar() )
2013-12-01 18:23:07 +08:00
{
2013-12-01 18:45:48 +08:00
CV_Assert(sum_op == OCL_OP_SUM || sum_op == OCL_OP_SUM_ABS || sum_op == OCL_OP_SUM_SQR);
2013-12-01 18:23:07 +08:00
2014-06-07 19:51:41 +08:00
const ocl::Device & dev = ocl::Device::getDefault();
bool doubleSupport = dev.doubleFPConfig() > 0,
haveMask = _mask.kind() != _InputArray::NONE,
haveSrc2 = _src2.kind() != _InputArray::NONE;
2014-05-28 23:23:13 +08:00
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
2014-07-28 16:21:00 +08:00
kercn = cn == 1 && !haveMask ? ocl::predictOptimalVectorWidth(_src, _src2) : 1,
2014-05-28 23:23:13 +08:00
mcn = std::max(cn, kercn);
2014-06-07 19:51:41 +08:00
CV_Assert(!haveSrc2 || _src2.type() == type);
2014-07-25 18:51:30 +08:00
int convert_cn = haveSrc2 ? mcn : cn;
2013-12-01 18:23:07 +08:00
2014-03-12 05:00:29 +08:00
if ( (!doubleSupport && depth == CV_64F) || cn > 4 )
2013-12-01 18:23:07 +08:00
return false;
2014-06-07 19:51:41 +08:00
int ngroups = dev.maxComputeUnits(), dbsize = ngroups * (calc2 ? 2 : 1);
size_t wgs = dev.maxWorkGroupSize();
2013-12-01 18:23:07 +08:00
2014-01-02 03:46:19 +08:00
int ddepth = std::max(sum_op == OCL_OP_SUM_SQR ? CV_32F : CV_32S, depth),
dtype = CV_MAKE_TYPE(ddepth, cn);
CV_Assert(!haveMask || _mask.type() == CV_8UC1);
2013-12-01 18:23:07 +08:00
int wgs2_aligned = 1;
while (wgs2_aligned < (int)wgs)
wgs2_aligned <<= 1;
wgs2_aligned >>= 1;
static const char * const opMap[3] = { "OP_SUM", "OP_SUM_ABS", "OP_SUM_SQR" };
2014-06-11 23:30:10 +08:00
char cvt[2][40];
2014-05-28 23:23:13 +08:00
String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D dstT1=%s -D ddepth=%d -D cn=%d"
2014-06-11 23:30:10 +08:00
" -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s%s%s -D kercn=%d%s%s%s -D convertFromU=%s",
2014-05-28 23:23:13 +08:00
ocl::typeToStr(CV_MAKE_TYPE(depth, mcn)), ocl::typeToStr(depth),
ocl::typeToStr(dtype), ocl::typeToStr(CV_MAKE_TYPE(ddepth, mcn)),
ocl::typeToStr(ddepth), ddepth, cn,
2014-06-11 23:30:10 +08:00
ocl::convertTypeStr(depth, ddepth, mcn, cvt[0]),
2013-12-01 18:23:07 +08:00
opMap[sum_op], (int)wgs, wgs2_aligned,
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
2014-05-28 20:43:58 +08:00
haveMask ? " -D HAVE_MASK" : "",
_src.isContinuous() ? " -D HAVE_SRC_CONT" : "",
2014-06-07 19:51:41 +08:00
haveMask && _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn,
haveSrc2 ? " -D HAVE_SRC2" : "", calc2 ? " -D OP_CALC2" : "",
2014-06-11 23:30:10 +08:00
haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "",
2014-07-25 18:51:30 +08:00
depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, convert_cn, cvt[1]) : "noconvert");
2014-05-28 23:23:13 +08:00
ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, opts);
2013-12-03 04:41:07 +08:00
if (k.empty())
return false;
2014-06-07 19:51:41 +08:00
UMat src = _src.getUMat(), src2 = _src2.getUMat(),
db(1, dbsize, dtype), mask = _mask.getUMat();
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
dbarg = ocl::KernelArg::PtrWriteOnly(db),
2014-06-07 19:51:41 +08:00
maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
src2arg = ocl::KernelArg::ReadOnlyNoSize(src2);
if (haveMask)
2014-06-07 19:51:41 +08:00
{
if (haveSrc2)
k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg, src2arg);
else
k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg);
}
else
2014-06-07 19:51:41 +08:00
{
if (haveSrc2)
k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, src2arg);
else
k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg);
}
2013-12-01 18:23:07 +08:00
2014-06-07 19:51:41 +08:00
size_t globalsize = ngroups * wgs;
2014-01-02 03:46:19 +08:00
if (k.run(1, &globalsize, &wgs, false))
2013-12-01 18:23:07 +08:00
{
typedef Scalar (*part_sum)(Mat m);
part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> },
func = funcs[ddepth - CV_32S];
2014-06-07 19:51:41 +08:00
Mat mres = db.getMat(ACCESS_READ);
if (calc2)
2014-06-09 04:50:14 +08:00
const_cast<Scalar &>(res2) = func(mres.colRange(ngroups, dbsize));
2014-06-07 19:51:41 +08:00
2014-06-09 04:50:14 +08:00
res = func(mres.colRange(0, ngroups));
2013-12-01 18:23:07 +08:00
return true;
}
return false;
}
2014-01-25 01:03:31 +08:00
#endif
#ifdef HAVE_IPP
static bool ipp_sum(Mat &src, Scalar &_res)
{
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 >= 700
int cn = src.channels();
if (cn > 4)
return false;
size_t total_size = src.total();
int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
if( src.dims == 2 || (src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
{
IppiSize sz = { cols, rows };
int type = src.type();
typedef IppStatus (CV_STDCALL* ippiSumFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
typedef IppStatus (CV_STDCALL* ippiSumFuncNoHint)(const void*, int, IppiSize, double *);
ippiSumFuncHint ippiSumHint =
type == CV_32FC1 ? (ippiSumFuncHint)ippiSum_32f_C1R :
type == CV_32FC3 ? (ippiSumFuncHint)ippiSum_32f_C3R :
type == CV_32FC4 ? (ippiSumFuncHint)ippiSum_32f_C4R :
0;
ippiSumFuncNoHint ippiSum =
type == CV_8UC1 ? (ippiSumFuncNoHint)ippiSum_8u_C1R :
type == CV_8UC3 ? (ippiSumFuncNoHint)ippiSum_8u_C3R :
type == CV_8UC4 ? (ippiSumFuncNoHint)ippiSum_8u_C4R :
type == CV_16UC1 ? (ippiSumFuncNoHint)ippiSum_16u_C1R :
type == CV_16UC3 ? (ippiSumFuncNoHint)ippiSum_16u_C3R :
type == CV_16UC4 ? (ippiSumFuncNoHint)ippiSum_16u_C4R :
type == CV_16SC1 ? (ippiSumFuncNoHint)ippiSum_16s_C1R :
type == CV_16SC3 ? (ippiSumFuncNoHint)ippiSum_16s_C3R :
type == CV_16SC4 ? (ippiSumFuncNoHint)ippiSum_16s_C4R :
0;
CV_Assert(!ippiSumHint || !ippiSum);
if( ippiSumHint || ippiSum )
{
Ipp64f res[4];
IppStatus ret = ippiSumHint ?
CV_INSTRUMENT_FUN_IPP(ippiSumHint, src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) :
CV_INSTRUMENT_FUN_IPP(ippiSum, src.ptr(), (int)src.step[0], sz, res);
if( ret >= 0 )
{
for( int i = 0; i < cn; i++ )
_res[i] = res[i];
return true;
}
}
}
#else
CV_UNUSED(src); CV_UNUSED(_res);
#endif
return false;
}
#endif
}
cv::Scalar cv::sum( InputArray _src )
{
CV_INSTRUMENT_REGION()
#if defined HAVE_OPENCL || defined HAVE_IPP
2013-12-01 18:23:07 +08:00
Scalar _res;
#endif
#ifdef HAVE_OPENCL
2014-06-19 19:18:52 +08:00
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
2014-02-25 23:32:55 +08:00
ocl_sum(_src, _res, OCL_OP_SUM),
_res)
2014-01-25 01:03:31 +08:00
#endif
2013-12-01 18:23:07 +08:00
Mat src = _src.getMat();
CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_sum(src, _res), _res);
2013-08-21 20:44:09 +08:00
int k, cn = src.channels(), depth = src.depth();
SumFunc func = getSumFunc(depth);
CV_Assert( cn <= 4 && func != 0 );
2012-06-08 01:21:29 +08:00
const Mat* arrays[] = {&src, 0};
uchar* ptrs[1];
NAryMatIterator it(arrays, ptrs);
Scalar s;
int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
int j, count = 0;
AutoBuffer<int> _buf;
int* buf = (int*)&s[0];
size_t esz = 0;
bool blockSum = depth < CV_32S;
2012-06-08 01:21:29 +08:00
if( blockSum )
{
intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
blockSize = std::min(blockSize, intSumBlockSize);
_buf.allocate(cn);
buf = _buf;
2011-12-22 20:51:01 +08:00
for( k = 0; k < cn; k++ )
buf[k] = 0;
esz = src.elemSize();
}
2011-12-22 20:51:01 +08:00
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
for( j = 0; j < total; j += blockSize )
{
int bsz = std::min(total - j, blockSize);
func( ptrs[0], 0, (uchar*)buf, bsz, cn );
count += bsz;
if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
{
for( k = 0; k < cn; k++ )
{
s[k] += buf[k];
buf[k] = 0;
}
count = 0;
}
ptrs[0] += bsz*esz;
}
}
return s;
}
2014-01-25 01:03:31 +08:00
#ifdef HAVE_OPENCL
2013-12-01 07:02:13 +08:00
namespace cv {
static bool ocl_countNonZero( InputArray _src, int & res )
{
2014-05-28 22:39:02 +08:00
int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src);
2013-12-01 07:02:13 +08:00
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (depth == CV_64F && !doubleSupport)
return false;
int dbsize = ocl::Device::getDefault().maxComputeUnits();
size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
int wgs2_aligned = 1;
while (wgs2_aligned < (int)wgs)
wgs2_aligned <<= 1;
wgs2_aligned >>= 1;
2013-12-01 18:23:07 +08:00
ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
2014-05-28 23:23:13 +08:00
format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO"
" -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s",
2014-05-28 22:39:02 +08:00
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
ocl::typeToStr(depth), (int)wgs, kercn,
2014-05-28 20:43:58 +08:00
wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
_src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
2013-12-03 04:41:07 +08:00
if (k.empty())
return false;
UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1);
2013-12-01 07:02:13 +08:00
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
dbsize, ocl::KernelArg::PtrWriteOnly(db));
size_t globalsize = dbsize * wgs;
if (k.run(1, &globalsize, &wgs, true))
return res = saturate_cast<int>(cv::sum(db.getMat(ACCESS_READ))[0]), true;
2013-12-01 07:02:13 +08:00
return false;
}
}
2014-01-25 01:03:31 +08:00
#endif
#if defined HAVE_IPP
namespace cv {
static bool ipp_countNonZero( Mat &src, int &res )
{
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 < 201801
// Poor performance of SSE42
if(cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
return false;
#endif
Ipp32s count = 0;
int depth = src.depth();
if(src.dims <= 2)
{
IppStatus status;
IppiSize size = {src.cols*src.channels(), src.rows};
if(depth == CV_8U)
status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, (const Ipp8u *)src.ptr(), (int)src.step, size, &count, 0, 0);
else if(depth == CV_32F)
status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, (const Ipp32f *)src.ptr(), (int)src.step, size, &count, 0, 0);
else
return false;
if(status < 0)
return false;
res = size.width*size.height - count;
}
else
{
IppStatus status;
const Mat *arrays[] = {&src, NULL};
Mat planes[1];
NAryMatIterator it(arrays, planes, 1);
IppiSize size = {(int)it.size*src.channels(), 1};
res = 0;
for (size_t i = 0; i < it.nplanes; i++, ++it)
{
if(depth == CV_8U)
status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_8u_C1R, it.planes->ptr<Ipp8u>(), (int)it.planes->step, size, &count, 0, 0);
else if(depth == CV_32F)
status = CV_INSTRUMENT_FUN_IPP(ippiCountInRange_32f_C1R, it.planes->ptr<Ipp32f>(), (int)it.planes->step, size, &count, 0, 0);
else
return false;
if(status < 0 || (int)it.planes->total()*src.channels() < count)
return false;
res += (int)it.planes->total()*src.channels() - count;
}
}
return true;
}
}
#endif
int cv::countNonZero( InputArray _src )
{
CV_INSTRUMENT_REGION()
2014-04-09 19:21:35 +08:00
int type = _src.type(), cn = CV_MAT_CN(type);
CV_Assert( cn == 1 );
2013-12-01 07:02:13 +08:00
#if defined HAVE_OPENCL || defined HAVE_IPP
2013-12-01 07:02:13 +08:00
int res = -1;
#endif
#ifdef HAVE_OPENCL
2014-06-19 19:18:52 +08:00
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
ocl_countNonZero(_src, res),
res)
2014-01-25 01:03:31 +08:00
#endif
2013-12-01 07:02:13 +08:00
Mat src = _src.getMat();
CV_IPP_RUN_FAST(ipp_countNonZero(src, res), res);
2014-04-09 19:21:35 +08:00
CountNonZeroFunc func = getCountNonZeroTab(src.depth());
2013-12-01 07:02:13 +08:00
CV_Assert( func != 0 );
2012-06-08 01:21:29 +08:00
const Mat* arrays[] = {&src, 0};
uchar* ptrs[1];
NAryMatIterator it(arrays, ptrs);
int total = (int)it.size, nz = 0;
2012-06-08 01:21:29 +08:00
for( size_t i = 0; i < it.nplanes; i++, ++it )
nz += func( ptrs[0], total );
2012-06-08 01:21:29 +08:00
return nz;
2012-06-08 01:21:29 +08:00
}
2011-12-22 20:51:01 +08:00
#if defined HAVE_IPP
namespace cv
{
static bool ipp_mean( Mat &src, Mat &mask, Scalar &ret )
{
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 >= 700
size_t total_size = src.total();
int cn = src.channels();
if (cn > 4)
return false;
int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
2013-08-21 22:52:15 +08:00
{
IppiSize sz = { cols, rows };
int type = src.type();
if( !mask.empty() )
2013-08-21 22:52:15 +08:00
{
typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
ippiMaskMeanFuncC1 ippiMean_C1MR =
type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR :
0;
if( ippiMean_C1MR )
2013-08-21 22:52:15 +08:00
{
Ipp64f res;
if( CV_INSTRUMENT_FUN_IPP(ippiMean_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &res) >= 0 )
{
ret = Scalar(res);
return true;
}
}
typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
ippiMaskMeanFuncC3 ippiMean_C3MR =
type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR :
0;
if( ippiMean_C3MR )
{
Ipp64f res1, res2, res3;
if( CV_INSTRUMENT_FUN_IPP(ippiMean_C3MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &res1) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiMean_C3MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &res2) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiMean_C3MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &res3) >= 0 )
2013-08-21 22:52:15 +08:00
{
ret = Scalar(res1, res2, res3);
return true;
2013-08-21 22:52:15 +08:00
}
}
}
else
{
typedef IppStatus (CV_STDCALL* ippiMeanFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
typedef IppStatus (CV_STDCALL* ippiMeanFuncNoHint)(const void*, int, IppiSize, double *);
ippiMeanFuncHint ippiMeanHint =
type == CV_32FC1 ? (ippiMeanFuncHint)ippiMean_32f_C1R :
type == CV_32FC3 ? (ippiMeanFuncHint)ippiMean_32f_C3R :
type == CV_32FC4 ? (ippiMeanFuncHint)ippiMean_32f_C4R :
0;
ippiMeanFuncNoHint ippiMean =
type == CV_8UC1 ? (ippiMeanFuncNoHint)ippiMean_8u_C1R :
type == CV_8UC3 ? (ippiMeanFuncNoHint)ippiMean_8u_C3R :
type == CV_8UC4 ? (ippiMeanFuncNoHint)ippiMean_8u_C4R :
type == CV_16UC1 ? (ippiMeanFuncNoHint)ippiMean_16u_C1R :
type == CV_16UC3 ? (ippiMeanFuncNoHint)ippiMean_16u_C3R :
type == CV_16UC4 ? (ippiMeanFuncNoHint)ippiMean_16u_C4R :
type == CV_16SC1 ? (ippiMeanFuncNoHint)ippiMean_16s_C1R :
type == CV_16SC3 ? (ippiMeanFuncNoHint)ippiMean_16s_C3R :
type == CV_16SC4 ? (ippiMeanFuncNoHint)ippiMean_16s_C4R :
0;
// Make sure only zero or one version of the function pointer is valid
CV_Assert(!ippiMeanHint || !ippiMean);
if( ippiMeanHint || ippiMean )
2013-08-21 22:52:15 +08:00
{
Ipp64f res[4];
IppStatus status = ippiMeanHint ? CV_INSTRUMENT_FUN_IPP(ippiMeanHint, src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) :
CV_INSTRUMENT_FUN_IPP(ippiMean, src.ptr(), (int)src.step[0], sz, res);
if( status >= 0 )
2013-08-21 22:52:15 +08:00
{
for( int i = 0; i < cn; i++ )
ret[i] = res[i];
return true;
2013-08-21 22:52:15 +08:00
}
}
}
}
return false;
#else
return false;
#endif
}
}
2013-07-26 01:27:35 +08:00
#endif
2013-08-21 20:44:09 +08:00
cv::Scalar cv::mean( InputArray _src, InputArray _mask )
{
CV_INSTRUMENT_REGION()
Mat src = _src.getMat(), mask = _mask.getMat();
CV_Assert( mask.empty() || mask.type() == CV_8U );
int k, cn = src.channels(), depth = src.depth();
Scalar s;
CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_mean(src, mask, s), s)
SumFunc func = getSumFunc(depth);
2012-06-08 01:21:29 +08:00
CV_Assert( cn <= 4 && func != 0 );
2012-06-08 01:21:29 +08:00
const Mat* arrays[] = {&src, &mask, 0};
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs);
int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
int j, count = 0;
AutoBuffer<int> _buf;
int* buf = (int*)&s[0];
bool blockSum = depth <= CV_16S;
size_t esz = 0, nz0 = 0;
2012-06-08 01:21:29 +08:00
if( blockSum )
{
intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
blockSize = std::min(blockSize, intSumBlockSize);
_buf.allocate(cn);
buf = _buf;
2012-06-08 01:21:29 +08:00
for( k = 0; k < cn; k++ )
buf[k] = 0;
esz = src.elemSize();
}
2012-06-08 01:21:29 +08:00
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
for( j = 0; j < total; j += blockSize )
{
int bsz = std::min(total - j, blockSize);
int nz = func( ptrs[0], ptrs[1], (uchar*)buf, bsz, cn );
count += nz;
nz0 += nz;
if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
{
for( k = 0; k < cn; k++ )
{
s[k] += buf[k];
buf[k] = 0;
}
count = 0;
}
ptrs[0] += bsz*esz;
if( ptrs[1] )
ptrs[1] += bsz;
}
}
return s*(nz0 ? 1./nz0 : 0);
2012-06-08 01:21:29 +08:00
}
2014-01-25 01:03:31 +08:00
#ifdef HAVE_OPENCL
2013-12-01 18:45:48 +08:00
namespace cv {
static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
2013-12-01 18:45:48 +08:00
{
CV_INSTRUMENT_REGION_OPENCL()
bool haveMask = _mask.kind() != _InputArray::NONE;
2014-05-21 22:12:26 +08:00
int nz = haveMask ? -1 : (int)_src.total();
2017-07-21 21:42:28 +08:00
Scalar mean(0), stddev(0);
const int cn = _src.channels();
if (cn > 4)
return false;
2013-12-01 18:45:48 +08:00
2014-05-21 22:12:26 +08:00
{
int type = _src.type(), depth = CV_MAT_DEPTH(type);
2014-05-21 22:12:26 +08:00
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
isContinuous = _src.isContinuous(),
isMaskContinuous = _mask.isContinuous();
2014-07-03 19:50:56 +08:00
const ocl::Device &defDev = ocl::Device::getDefault();
int groups = defDev.maxComputeUnits();
if (defDev.isIntel())
{
static const int subSliceEUCount = 10;
groups = (groups / subSliceEUCount) * 2;
}
size_t wgs = defDev.maxWorkGroupSize();
2014-05-21 22:12:26 +08:00
int ddepth = std::max(CV_32S, depth), sqddepth = std::max(CV_32F, depth),
dtype = CV_MAKE_TYPE(ddepth, cn),
sqdtype = CV_MAKETYPE(sqddepth, cn);
CV_Assert(!haveMask || _mask.type() == CV_8UC1);
int wgs2_aligned = 1;
while (wgs2_aligned < (int)wgs)
wgs2_aligned <<= 1;
wgs2_aligned >>= 1;
if ( (!doubleSupport && depth == CV_64F) )
2014-05-21 22:12:26 +08:00
return false;
char cvt[2][40];
String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D sqddepth=%d"
" -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s%s"
2014-05-21 22:12:26 +08:00
" -D convertToDT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s%s",
ocl::typeToStr(type), ocl::typeToStr(depth),
ocl::typeToStr(dtype), ocl::typeToStr(ddepth), sqddepth,
ocl::typeToStr(sqdtype), ocl::typeToStr(sqddepth),
ocl::convertTypeStr(depth, sqddepth, cn, cvt[0]),
cn, isContinuous ? " -D HAVE_SRC_CONT" : "",
isMaskContinuous ? " -D HAVE_MASK_CONT" : "",
2014-05-21 22:12:26 +08:00
ocl::convertTypeStr(depth, ddepth, cn, cvt[1]),
(int)wgs, wgs2_aligned, haveMask ? " -D HAVE_MASK" : "",
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
ocl::Kernel k("meanStdDev", ocl::core::meanstddev_oclsrc, opts);
if (k.empty())
return false;
int dbsize = groups * ((haveMask ? CV_ELEM_SIZE1(CV_32S) : 0) +
CV_ELEM_SIZE(sqdtype) + CV_ELEM_SIZE(dtype));
UMat src = _src.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
dbarg = ocl::KernelArg::PtrWriteOnly(db),
maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
if (haveMask)
k.args(srcarg, src.cols, (int)src.total(), groups, dbarg, maskarg);
else
k.args(srcarg, src.cols, (int)src.total(), groups, dbarg);
size_t globalsize = groups * wgs;
if(!k.run(1, &globalsize, &wgs, false))
2014-05-21 22:12:26 +08:00
return false;
typedef Scalar (* part_sum)(Mat m);
part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> };
Mat dbm = db.getMat(ACCESS_READ);
mean = funcs[ddepth - CV_32S](Mat(1, groups, dtype, dbm.ptr()));
stddev = funcs[sqddepth - CV_32S](Mat(1, groups, sqdtype, dbm.ptr() + groups * CV_ELEM_SIZE(dtype)));
2014-05-21 22:12:26 +08:00
if (haveMask)
nz = saturate_cast<int>(funcs[0](Mat(1, groups, CV_32SC1, dbm.ptr() +
2014-05-21 22:12:26 +08:00
groups * (CV_ELEM_SIZE(dtype) +
CV_ELEM_SIZE(sqdtype))))[0]);
}
2014-01-09 21:32:13 +08:00
double total = nz != 0 ? 1.0 / nz : 0;
int k, j;
2013-12-01 18:45:48 +08:00
for (int i = 0; i < cn; ++i)
{
mean[i] *= total;
stddev[i] = std::sqrt(std::max(stddev[i] * total - mean[i] * mean[i] , 0.));
}
for( j = 0; j < 2; j++ )
{
const double * const sptr = j == 0 ? &mean[0] : &stddev[0];
_OutputArray _dst = j == 0 ? _mean : _sdv;
if( !_dst.needed() )
continue;
if( !_dst.fixedSize() )
_dst.create(cn, 1, CV_64F, -1, true);
Mat dst = _dst.getMat();
int dcn = (int)dst.total();
CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
(dst.cols == 1 || dst.rows == 1) && dcn >= cn );
double* dptr = dst.ptr<double>();
for( k = 0; k < cn; k++ )
dptr[k] = sptr[k];
for( ; k < dcn; k++ )
dptr[k] = 0;
}
return true;
}
}
2011-12-22 20:51:01 +08:00
2014-01-25 01:03:31 +08:00
#endif
#ifdef HAVE_OPENVX
namespace cv
{
static bool openvx_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask)
{
size_t total_size = src.total();
int rows = src.size[0], cols = rows ? (int)(total_size / rows) : 0;
if (src.type() != CV_8UC1|| !mask.empty() ||
(src.dims != 2 && !(src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size))
)
return false;
try
{
ivx::Context ctx = ovx::getOpenVXContext();
#ifndef VX_VERSION_1_1
if (ctx.vendorID() == VX_ID_KHRONOS)
return false; // Do not use OpenVX meanStdDev estimation for sample 1.0.1 implementation due to lack of accuracy
#endif
ivx::Image
ia = ivx::Image::createFromHandle(ctx, VX_DF_IMAGE_U8,
ivx::Image::createAddressing(cols, rows, 1, (vx_int32)(src.step[0])), src.ptr());
vx_float32 mean_temp, stddev_temp;
ivx::IVX_CHECK_STATUS(vxuMeanStdDev(ctx, ia, &mean_temp, &stddev_temp));
if (_mean.needed())
{
if (!_mean.fixedSize())
_mean.create(1, 1, CV_64F, -1, true);
Mat mean = _mean.getMat();
CV_Assert(mean.type() == CV_64F && mean.isContinuous() &&
(mean.cols == 1 || mean.rows == 1) && mean.total() >= 1);
double *pmean = mean.ptr<double>();
pmean[0] = mean_temp;
for (int c = 1; c < (int)mean.total(); c++)
pmean[c] = 0;
}
if (_sdv.needed())
{
if (!_sdv.fixedSize())
_sdv.create(1, 1, CV_64F, -1, true);
Mat stddev = _sdv.getMat();
CV_Assert(stddev.type() == CV_64F && stddev.isContinuous() &&
(stddev.cols == 1 || stddev.rows == 1) && stddev.total() >= 1);
double *pstddev = stddev.ptr<double>();
pstddev[0] = stddev_temp;
for (int c = 1; c < (int)stddev.total(); c++)
pstddev[c] = 0;
}
}
catch (ivx::RuntimeError & e)
{
VX_DbgThrow(e.what());
}
catch (ivx::WrapperError & e)
{
VX_DbgThrow(e.what());
}
return true;
}
}
#endif
#ifdef HAVE_IPP
namespace cv
{
static bool ipp_meanStdDev(Mat& src, OutputArray _mean, OutputArray _sdv, Mat& mask)
{
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 >= 700
int cn = src.channels();
#if IPP_VERSION_X100 < 201801
// IPP_DISABLE: C3C functions can read outside of allocated memory
if (cn > 1)
return false;
#endif
size_t total_size = src.total();
int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
{
Ipp64f mean_temp[3];
Ipp64f stddev_temp[3];
Ipp64f *pmean = &mean_temp[0];
Ipp64f *pstddev = &stddev_temp[0];
Mat mean, stddev;
int dcn_mean = -1;
if( _mean.needed() )
{
if( !_mean.fixedSize() )
_mean.create(cn, 1, CV_64F, -1, true);
mean = _mean.getMat();
dcn_mean = (int)mean.total();
pmean = mean.ptr<Ipp64f>();
}
int dcn_stddev = -1;
if( _sdv.needed() )
{
if( !_sdv.fixedSize() )
_sdv.create(cn, 1, CV_64F, -1, true);
stddev = _sdv.getMat();
dcn_stddev = (int)stddev.total();
pstddev = stddev.ptr<Ipp64f>();
}
for( int c = cn; c < dcn_mean; c++ )
pmean[c] = 0;
for( int c = cn; c < dcn_stddev; c++ )
pstddev[c] = 0;
IppiSize sz = { cols, rows };
int type = src.type();
if( !mask.empty() )
{
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *, Ipp64f *);
ippiMaskMeanStdDevFuncC1 ippiMean_StdDev_C1MR =
type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR :
0;
if( ippiMean_StdDev_C1MR )
{
if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 )
{
return true;
}
}
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
ippiMaskMeanStdDevFuncC3 ippiMean_StdDev_C3CMR =
type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR :
0;
if( ippiMean_StdDev_C3CMR )
{
if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CMR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
{
return true;
}
}
}
else
{
typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *);
ippiMeanStdDevFuncC1 ippiMean_StdDev_C1R =
type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
#if (IPP_VERSION_X100 >= 810)
type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
#endif
0;
if( ippiMean_StdDev_C1R )
{
if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C1R, src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 )
{
return true;
}
}
typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
ippiMeanStdDevFuncC3 ippiMean_StdDev_C3CR =
type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR :
type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR :
type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR :
0;
if( ippiMean_StdDev_C3CR )
{
if( CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiMean_StdDev_C3CR, src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
{
return true;
}
}
}
}
#else
CV_UNUSED(src); CV_UNUSED(_mean); CV_UNUSED(_sdv); CV_UNUSED(mask);
#endif
return false;
}
}
#endif
void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
{
CV_INSTRUMENT_REGION()
CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
ocl_meanStdDev(_src, _mean, _sdv, _mask))
Mat src = _src.getMat(), mask = _mask.getMat();
CV_Assert( mask.empty() || mask.type() == CV_8UC1 );
CV_OVX_RUN(!ovx::skipSmallImages<VX_KERNEL_MEAN_STDDEV>(src.cols, src.rows),
openvx_meanStdDev(src, _mean, _sdv, mask))
CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_meanStdDev(src, _mean, _sdv, mask));
int k, cn = src.channels(), depth = src.depth();
SumSqrFunc func = getSumSqrTab(depth);
2012-06-08 01:21:29 +08:00
CV_Assert( func != 0 );
2012-06-08 01:21:29 +08:00
const Mat* arrays[] = {&src, &mask, 0};
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs);
int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
int j, count = 0, nz0 = 0;
AutoBuffer<double> _buf(cn*4);
double *s = (double*)_buf, *sq = s + cn;
int *sbuf = (int*)s, *sqbuf = (int*)sq;
bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S;
size_t esz = 0;
2012-06-08 01:21:29 +08:00
for( k = 0; k < cn; k++ )
s[k] = sq[k] = 0;
2012-06-08 01:21:29 +08:00
if( blockSum )
{
intSumBlockSize = 1 << 15;
blockSize = std::min(blockSize, intSumBlockSize);
sbuf = (int*)(sq + cn);
if( blockSqSum )
sqbuf = sbuf + cn;
for( k = 0; k < cn; k++ )
sbuf[k] = sqbuf[k] = 0;
esz = src.elemSize();
}
2012-06-08 01:21:29 +08:00
for( size_t i = 0; i < it.nplanes; i++, ++it )
{
for( j = 0; j < total; j += blockSize )
2010-10-12 20:31:40 +08:00
{
int bsz = std::min(total - j, blockSize);
int nz = func( ptrs[0], ptrs[1], (uchar*)sbuf, (uchar*)sqbuf, bsz, cn );
count += nz;
nz0 += nz;
if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
{
for( k = 0; k < cn; k++ )
{
s[k] += sbuf[k];
sbuf[k] = 0;
}
if( blockSqSum )
{
for( k = 0; k < cn; k++ )
{
sq[k] += sqbuf[k];
sqbuf[k] = 0;
}
}
count = 0;
}
ptrs[0] += bsz*esz;
if( ptrs[1] )
ptrs[1] += bsz;
2010-10-12 20:31:40 +08:00
}
}
2012-06-08 01:21:29 +08:00
double scale = nz0 ? 1./nz0 : 0.;
for( k = 0; k < cn; k++ )
{
s[k] *= scale;
sq[k] = std::sqrt(std::max(sq[k]*scale - s[k]*s[k], 0.));
}
2012-06-08 01:21:29 +08:00
for( j = 0; j < 2; j++ )
{
const double* sptr = j == 0 ? s : sq;
_OutputArray _dst = j == 0 ? _mean : _sdv;
if( !_dst.needed() )
continue;
if( !_dst.fixedSize() )
_dst.create(cn, 1, CV_64F, -1, true);
Mat dst = _dst.getMat();
int dcn = (int)dst.total();
CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
2011-12-22 20:51:01 +08:00
(dst.cols == 1 || dst.rows == 1) && dcn >= cn );
double* dptr = dst.ptr<double>();
for( k = 0; k < cn; k++ )
dptr[k] = sptr[k];
for( ; k < dcn; k++ )
dptr[k] = 0;
}
}
/****************************************************************************************\
* minMaxLoc *
\****************************************************************************************/
namespace cv
{
template<typename T, typename WT> static void
minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal,
size_t* _minIdx, size_t* _maxIdx, int len, size_t startIdx )
{
WT minVal = *_minVal, maxVal = *_maxVal;
size_t minIdx = *_minIdx, maxIdx = *_maxIdx;
2012-06-08 01:21:29 +08:00
if( !mask )
{
for( int i = 0; i < len; i++ )
{
T val = src[i];
if( val < minVal )
{
minVal = val;
minIdx = startIdx + i;
}
if( val > maxVal )
{
maxVal = val;
maxIdx = startIdx + i;
}
}
}
else
{
for( int i = 0; i < len; i++ )
{
T val = src[i];
if( mask[i] && val < minVal )
{
minVal = val;
minIdx = startIdx + i;
}
if( mask[i] && val > maxVal )
{
maxVal = val;
maxIdx = startIdx + i;
}
}
}
*_minIdx = minIdx;
*_maxIdx = maxIdx;
*_minVal = minVal;
*_maxVal = maxVal;
}
static void minMaxIdx_8u(const uchar* src, const uchar* mask, int* minval, int* maxval,
size_t* minidx, size_t* maxidx, int len, size_t startidx )
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
static void minMaxIdx_8s(const schar* src, const uchar* mask, int* minval, int* maxval,
size_t* minidx, size_t* maxidx, int len, size_t startidx )
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
static void minMaxIdx_16u(const ushort* src, const uchar* mask, int* minval, int* maxval,
size_t* minidx, size_t* maxidx, int len, size_t startidx )
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
static void minMaxIdx_16s(const short* src, const uchar* mask, int* minval, int* maxval,
size_t* minidx, size_t* maxidx, int len, size_t startidx )
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
static void minMaxIdx_32s(const int* src, const uchar* mask, int* minval, int* maxval,
size_t* minidx, size_t* maxidx, int len, size_t startidx )
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
static void minMaxIdx_32f(const float* src, const uchar* mask, float* minval, float* maxval,
size_t* minidx, size_t* maxidx, int len, size_t startidx )
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval, double* maxval,
size_t* minidx, size_t* maxidx, int len, size_t startidx )
2012-06-08 01:21:29 +08:00
{ minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
2011-12-22 20:51:01 +08:00
typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t);
static MinMaxIdxFunc getMinmaxTab(int depth)
{
static MinMaxIdxFunc minmaxTab[] =
{
(MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8s),
(MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16s),
(MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32s),
(MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32f), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_64f),
0
};
return minmaxTab[depth];
}
2011-12-22 20:51:01 +08:00
2010-10-12 20:31:40 +08:00
static void ofs2idx(const Mat& a, size_t ofs, int* idx)
{
int i, d = a.dims;
if( ofs > 0 )
{
ofs--;
for( i = d-1; i >= 0; i-- )
{
int sz = a.size[i];
idx[i] = (int)(ofs % sz);
ofs /= sz;
}
}
else
2010-10-12 20:31:40 +08:00
{
for( i = d-1; i >= 0; i-- )
idx[i] = -1;
2010-10-12 20:31:40 +08:00
}
}
2011-12-22 20:51:01 +08:00
2014-01-25 01:03:31 +08:00
#ifdef HAVE_OPENCL
2013-12-05 20:34:23 +08:00
#define MINMAX_STRUCT_ALIGNMENT 8 // sizeof double
2013-12-05 20:34:23 +08:00
template <typename T>
2014-06-08 00:53:20 +08:00
void getMinMaxRes(const Mat & db, double * minVal, double * maxVal,
2014-06-04 22:22:55 +08:00
int* minLoc, int* maxLoc,
2014-06-08 00:53:20 +08:00
int groupnum, int cols, double * maxVal2)
2013-12-05 20:34:23 +08:00
{
2014-06-04 22:22:55 +08:00
uint index_max = std::numeric_limits<uint>::max();
T minval = std::numeric_limits<T>::max();
2014-06-08 00:53:20 +08:00
T maxval = std::numeric_limits<T>::min() > 0 ? -std::numeric_limits<T>::max() : std::numeric_limits<T>::min(), maxval2 = maxval;
2014-06-04 22:22:55 +08:00
uint minloc = index_max, maxloc = index_max;
size_t index = 0;
2014-06-08 00:53:20 +08:00
const T * minptr = NULL, * maxptr = NULL, * maxptr2 = NULL;
2014-06-04 22:22:55 +08:00
const uint * minlocptr = NULL, * maxlocptr = NULL;
if (minVal || minLoc)
{
minptr = db.ptr<T>();
2014-06-04 22:22:55 +08:00
index += sizeof(T) * groupnum;
index = alignSize(index, MINMAX_STRUCT_ALIGNMENT);
2014-06-04 22:22:55 +08:00
}
if (maxVal || maxLoc)
{
maxptr = (const T *)(db.ptr() + index);
2014-06-04 22:22:55 +08:00
index += sizeof(T) * groupnum;
index = alignSize(index, MINMAX_STRUCT_ALIGNMENT);
2014-06-04 22:22:55 +08:00
}
if (minLoc)
{
minlocptr = (const uint *)(db.ptr() + index);
2014-06-04 22:22:55 +08:00
index += sizeof(uint) * groupnum;
index = alignSize(index, MINMAX_STRUCT_ALIGNMENT);
2014-06-04 22:22:55 +08:00
}
if (maxLoc)
2014-06-08 00:53:20 +08:00
{
maxlocptr = (const uint *)(db.ptr() + index);
2014-06-08 00:53:20 +08:00
index += sizeof(uint) * groupnum;
index = alignSize(index, MINMAX_STRUCT_ALIGNMENT);
2014-06-08 00:53:20 +08:00
}
if (maxVal2)
maxptr2 = (const T *)(db.ptr() + index);
2014-06-04 22:22:55 +08:00
for (int i = 0; i < groupnum; i++)
2013-12-05 20:34:23 +08:00
{
2014-06-04 22:22:55 +08:00
if (minptr && minptr[i] <= minval)
2013-12-05 20:34:23 +08:00
{
2014-06-04 22:22:55 +08:00
if (minptr[i] == minval)
{
if (minlocptr)
minloc = std::min(minlocptr[i], minloc);
}
else
{
if (minlocptr)
minloc = minlocptr[i];
minval = minptr[i];
}
}
if (maxptr && maxptr[i] >= maxval)
{
if (maxptr[i] == maxval)
{
if (maxlocptr)
maxloc = std::min(maxlocptr[i], maxloc);
}
else
{
if (maxlocptr)
maxloc = maxlocptr[i];
maxval = maxptr[i];
}
2013-12-05 20:34:23 +08:00
}
2014-06-08 00:53:20 +08:00
if (maxptr2 && maxptr2[i] > maxval2)
maxval2 = maxptr2[i];
2013-12-05 20:34:23 +08:00
}
2014-06-04 22:22:55 +08:00
bool zero_mask = (minLoc && minloc == index_max) ||
(maxLoc && maxloc == index_max);
if (minVal)
2014-06-04 22:22:55 +08:00
*minVal = zero_mask ? 0 : (double)minval;
if (maxVal)
2014-06-04 22:22:55 +08:00
*maxVal = zero_mask ? 0 : (double)maxval;
2014-06-08 00:53:20 +08:00
if (maxVal2)
*maxVal2 = zero_mask ? 0 : (double)maxval2;
2014-06-04 22:22:55 +08:00
if (minLoc)
2013-12-05 20:34:23 +08:00
{
2014-06-04 22:22:55 +08:00
minLoc[0] = zero_mask ? -1 : minloc / cols;
minLoc[1] = zero_mask ? -1 : minloc % cols;
2013-12-05 20:34:23 +08:00
}
if (maxLoc)
2013-12-05 20:34:23 +08:00
{
2014-06-04 22:22:55 +08:00
maxLoc[0] = zero_mask ? -1 : maxloc / cols;
maxLoc[1] = zero_mask ? -1 : maxloc % cols;
2013-12-05 20:34:23 +08:00
}
}
2014-06-08 00:53:20 +08:00
typedef void (*getMinMaxResFunc)(const Mat & db, double * minVal, double * maxVal,
int * minLoc, int *maxLoc, int gropunum, int cols, double * maxVal2);
2013-12-05 20:34:23 +08:00
2014-06-06 21:15:19 +08:00
static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask,
2014-06-08 00:53:20 +08:00
int ddepth = -1, bool absValues = false, InputArray _src2 = noArray(), double * maxVal2 = NULL)
2013-12-05 20:34:23 +08:00
{
2014-06-04 22:22:55 +08:00
const ocl::Device & dev = ocl::Device::getDefault();
#ifdef __ANDROID__
if (dev.isNVidia())
return false;
#endif
2014-06-07 19:51:41 +08:00
bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(),
haveSrc2 = _src2.kind() != _InputArray::NONE;
2014-06-04 22:22:55 +08:00
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src, _src2));
2014-06-08 00:53:20 +08:00
// disabled following modes since it occasionally fails on AMD devices (e.g. A10-6800K, sep. 2014)
if ((haveMask || type == CV_32FC1) && dev.isAMD())
return false;
2014-06-08 00:53:20 +08:00
2014-06-09 04:50:14 +08:00
CV_Assert( (cn == 1 && (!haveMask || _mask.type() == CV_8U)) ||
(cn >= 1 && !minLoc && !maxLoc) );
2014-06-08 00:53:20 +08:00
2014-06-06 21:15:19 +08:00
if (ddepth < 0)
ddepth = depth;
2013-12-05 20:34:23 +08:00
2014-06-07 19:51:41 +08:00
CV_Assert(!haveSrc2 || _src2.type() == type);
if (depth == CV_32S)
2014-07-28 23:32:06 +08:00
return false;
2014-06-06 21:15:19 +08:00
if ((depth == CV_64F || ddepth == CV_64F) && !doubleSupport)
2013-12-05 20:34:23 +08:00
return false;
2014-06-04 22:22:55 +08:00
int groupnum = dev.maxComputeUnits();
size_t wgs = dev.maxWorkGroupSize();
2013-12-05 20:34:23 +08:00
int wgs2_aligned = 1;
while (wgs2_aligned < (int)wgs)
wgs2_aligned <<= 1;
wgs2_aligned >>= 1;
2014-06-04 22:22:55 +08:00
bool needMinVal = minVal || minLoc, needMinLoc = minLoc != NULL,
needMaxVal = maxVal || maxLoc, needMaxLoc = maxLoc != NULL;
// in case of mask we must know whether mask is filled with zeros or not
// so let's calculate min or max location, if it's undefined, so mask is zeros
if (!(needMaxLoc || needMinLoc) && haveMask)
2014-06-06 21:15:19 +08:00
{
2014-06-04 22:22:55 +08:00
if (needMinVal)
needMinLoc = true;
else
needMaxLoc = true;
2014-06-06 21:15:19 +08:00
}
2014-06-04 22:22:55 +08:00
2014-06-11 23:30:10 +08:00
char cvt[2][40];
2014-06-04 22:22:55 +08:00
String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
2014-06-06 21:15:19 +08:00
" -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s"
" -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d -D convertFromU=%s"
" -D MINMAX_STRUCT_ALIGNMENT=%d",
2014-06-04 22:22:55 +08:00
depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs,
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned,
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
2014-05-28 20:43:58 +08:00
_src.isContinuous() ? " -D HAVE_SRC_CONT" : "",
2014-06-04 22:22:55 +08:00
_mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn,
needMinVal ? " -D NEED_MINVAL" : "", needMaxVal ? " -D NEED_MAXVAL" : "",
2014-06-06 21:15:19 +08:00
needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "",
ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
2014-06-11 23:30:10 +08:00
ocl::convertTypeStr(depth, ddepth, kercn, cvt[0]),
absValues ? " -D OP_ABS" : "",
2014-06-08 00:53:20 +08:00
haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "",
2014-06-11 23:30:10 +08:00
haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth,
depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, kercn, cvt[1]) : "noconvert",
MINMAX_STRUCT_ALIGNMENT);
2013-12-05 20:34:23 +08:00
2014-06-04 22:22:55 +08:00
ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
2013-12-05 20:34:23 +08:00
if (k.empty())
return false;
2014-06-06 21:15:19 +08:00
int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S),
2014-06-04 22:22:55 +08:00
dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) +
2014-06-07 19:51:41 +08:00
(needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0) +
(maxVal2 ? esz : 0))
+ 5 * MINMAX_STRUCT_ALIGNMENT;
2014-06-07 19:51:41 +08:00
UMat src = _src.getUMat(), src2 = _src2.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
2013-12-05 20:34:23 +08:00
if (cn > 1 && !haveMask)
2014-06-07 19:51:41 +08:00
{
2013-12-05 20:34:23 +08:00
src = src.reshape(1);
2014-06-07 19:51:41 +08:00
src2 = src2.reshape(1);
}
2013-12-05 20:34:23 +08:00
2014-06-07 19:51:41 +08:00
if (haveSrc2)
{
if (!haveMask)
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(src2));
else
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask),
ocl::KernelArg::ReadOnlyNoSize(src2));
}
2013-12-05 20:34:23 +08:00
else
2014-06-07 19:51:41 +08:00
{
if (!haveMask)
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
groupnum, ocl::KernelArg::PtrWriteOnly(db));
else
k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask));
}
2013-12-05 20:34:23 +08:00
size_t globalsize = groupnum * wgs;
if (!k.run(1, &globalsize, &wgs, true))
2013-12-05 20:34:23 +08:00
return false;
2014-06-04 22:22:55 +08:00
static const getMinMaxResFunc functab[7] =
2013-12-05 20:34:23 +08:00
{
getMinMaxRes<uchar>,
getMinMaxRes<char>,
getMinMaxRes<ushort>,
getMinMaxRes<short>,
getMinMaxRes<int>,
getMinMaxRes<float>,
getMinMaxRes<double>
};
2014-06-08 00:53:20 +08:00
getMinMaxResFunc func = functab[ddepth];
2013-12-05 20:34:23 +08:00
2014-06-04 22:22:55 +08:00
int locTemp[2];
func(db.getMat(ACCESS_READ), minVal, maxVal,
needMinLoc ? minLoc ? minLoc : locTemp : minLoc,
2014-06-08 00:53:20 +08:00
needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc,
groupnum, src.cols, maxVal2);
2013-12-05 20:34:23 +08:00
return true;
}
2014-01-25 01:03:31 +08:00
#endif
#ifdef HAVE_OPENVX
namespace ovx {
template <> inline bool skipSmallImages<VX_KERNEL_MINMAXLOC>(int w, int h) { return w*h < 3840 * 2160; }
}
static bool openvx_minMaxIdx(Mat &src, double* minVal, double* maxVal, int* minIdx, int* maxIdx, Mat &mask)
{
int stype = src.type();
size_t total_size = src.total();
int rows = src.size[0], cols = rows ? (int)(total_size / rows) : 0;
if ((stype != CV_8UC1 && stype != CV_16SC1) || !mask.empty() ||
(src.dims != 2 && !(src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size))
)
return false;
try
{
ivx::Context ctx = ovx::getOpenVXContext();
ivx::Image
ia = ivx::Image::createFromHandle(ctx, stype == CV_8UC1 ? VX_DF_IMAGE_U8 : VX_DF_IMAGE_S16,
ivx::Image::createAddressing(cols, rows, stype == CV_8UC1 ? 1 : 2, (vx_int32)(src.step[0])), src.ptr());
ivx::Scalar vxMinVal = ivx::Scalar::create(ctx, stype == CV_8UC1 ? VX_TYPE_UINT8 : VX_TYPE_INT16, 0);
ivx::Scalar vxMaxVal = ivx::Scalar::create(ctx, stype == CV_8UC1 ? VX_TYPE_UINT8 : VX_TYPE_INT16, 0);
ivx::Array vxMinInd, vxMaxInd;
ivx::Scalar vxMinCount, vxMaxCount;
if (minIdx)
{
vxMinInd = ivx::Array::create(ctx, VX_TYPE_COORDINATES2D, 1);
vxMinCount = ivx::Scalar::create(ctx, VX_TYPE_UINT32, 0);
}
if (maxIdx)
{
vxMaxInd = ivx::Array::create(ctx, VX_TYPE_COORDINATES2D, 1);
vxMaxCount = ivx::Scalar::create(ctx, VX_TYPE_UINT32, 0);
}
ivx::IVX_CHECK_STATUS(vxuMinMaxLoc(ctx, ia, vxMinVal, vxMaxVal, vxMinInd, vxMaxInd, vxMinCount, vxMaxCount));
if (minVal)
{
*minVal = stype == CV_8UC1 ? vxMinVal.getValue<vx_uint8>() : vxMinVal.getValue<vx_int16>();
}
if (maxVal)
{
*maxVal = stype == CV_8UC1 ? vxMaxVal.getValue<vx_uint8>() : vxMaxVal.getValue<vx_int16>();
}
if (minIdx)
{
if(vxMinCount.getValue<vx_uint32>()<1) throw ivx::RuntimeError(VX_ERROR_INVALID_VALUE, std::string(__func__) + "(): minimum value location not found");
vx_coordinates2d_t loc;
vxMinInd.copyRangeTo(0, 1, &loc);
size_t minidx = loc.y * cols + loc.x + 1;
ofs2idx(src, minidx, minIdx);
}
if (maxIdx)
{
if (vxMaxCount.getValue<vx_uint32>()<1) throw ivx::RuntimeError(VX_ERROR_INVALID_VALUE, std::string(__func__) + "(): maximum value location not found");
vx_coordinates2d_t loc;
vxMaxInd.copyRangeTo(0, 1, &loc);
size_t maxidx = loc.y * cols + loc.x + 1;
ofs2idx(src, maxidx, maxIdx);
}
}
catch (ivx::RuntimeError & e)
{
VX_DbgThrow(e.what());
}
catch (ivx::WrapperError & e)
{
VX_DbgThrow(e.what());
}
return true;
}
#endif
#ifdef HAVE_IPP
static IppStatus ipp_minMaxIndex_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType,
float* pMinVal, float* pMaxVal, IppiPoint* pMinIndex, IppiPoint* pMaxIndex, const Ipp8u*, int)
2010-10-12 20:31:40 +08:00
{
switch(dataType)
{
case ipp8u: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_8u_C1R, (const Ipp8u*)pSrc, srcStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex);
case ipp16u: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_16u_C1R, (const Ipp16u*)pSrc, srcStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex);
case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_32f_C1R, (const Ipp32f*)pSrc, srcStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex);
default: return ippStsDataTypeErr;
}
}
static IppStatus ipp_minMaxIndexMask_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType,
float* pMinVal, float* pMaxVal, IppiPoint* pMinIndex, IppiPoint* pMaxIndex, const Ipp8u* pMask, int maskStep)
{
switch(dataType)
{
case ipp8u: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_8u_C1MR, (const Ipp8u*)pSrc, srcStep, pMask, maskStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex);
case ipp16u: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_16u_C1MR, (const Ipp16u*)pSrc, srcStep, pMask, maskStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex);
case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMinMaxIndx_32f_C1MR, (const Ipp32f*)pSrc, srcStep, pMask, maskStep, size, pMinVal, pMaxVal, pMinIndex, pMaxIndex);
default: return ippStsDataTypeErr;
}
}
static IppStatus ipp_minMax_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType,
float* pMinVal, float* pMaxVal, IppiPoint*, IppiPoint*, const Ipp8u*, int)
{
IppStatus status;
switch(dataType)
{
#if IPP_VERSION_X100 > 201701 // wrong min values
case ipp8u:
{
Ipp8u val[2];
status = CV_INSTRUMENT_FUN_IPP(ippiMinMax_8u_C1R, (const Ipp8u*)pSrc, srcStep, size, &val[0], &val[1]);
*pMinVal = val[0];
*pMaxVal = val[1];
return status;
}
#endif
case ipp16u:
{
Ipp16u val[2];
status = CV_INSTRUMENT_FUN_IPP(ippiMinMax_16u_C1R, (const Ipp16u*)pSrc, srcStep, size, &val[0], &val[1]);
*pMinVal = val[0];
*pMaxVal = val[1];
return status;
}
case ipp16s:
{
Ipp16s val[2];
status = CV_INSTRUMENT_FUN_IPP(ippiMinMax_16s_C1R, (const Ipp16s*)pSrc, srcStep, size, &val[0], &val[1]);
*pMinVal = val[0];
*pMaxVal = val[1];
return status;
}
case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMinMax_32f_C1R, (const Ipp32f*)pSrc, srcStep, size, pMinVal, pMaxVal);
default: return ipp_minMaxIndex_wrap(pSrc, srcStep, size, dataType, pMinVal, pMaxVal, NULL, NULL, NULL, 0);
}
}
static IppStatus ipp_minIdx_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType,
float* pMinVal, float*, IppiPoint* pMinIndex, IppiPoint*, const Ipp8u*, int)
{
IppStatus status;
switch(dataType)
{
case ipp8u:
{
Ipp8u val;
status = CV_INSTRUMENT_FUN_IPP(ippiMinIndx_8u_C1R, (const Ipp8u*)pSrc, srcStep, size, &val, &pMinIndex->x, &pMinIndex->y);
*pMinVal = val;
return status;
}
case ipp16u:
{
Ipp16u val;
status = CV_INSTRUMENT_FUN_IPP(ippiMinIndx_16u_C1R, (const Ipp16u*)pSrc, srcStep, size, &val, &pMinIndex->x, &pMinIndex->y);
*pMinVal = val;
return status;
}
case ipp16s:
{
Ipp16s val;
status = CV_INSTRUMENT_FUN_IPP(ippiMinIndx_16s_C1R, (const Ipp16s*)pSrc, srcStep, size, &val, &pMinIndex->x, &pMinIndex->y);
*pMinVal = val;
return status;
}
case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMinIndx_32f_C1R, (const Ipp32f*)pSrc, srcStep, size, pMinVal, &pMinIndex->x, &pMinIndex->y);
default: return ipp_minMaxIndex_wrap(pSrc, srcStep, size, dataType, pMinVal, NULL, pMinIndex, NULL, NULL, 0);
}
}
static IppStatus ipp_maxIdx_wrap(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType,
float*, float* pMaxVal, IppiPoint*, IppiPoint* pMaxIndex, const Ipp8u*, int)
{
IppStatus status;
switch(dataType)
{
case ipp8u:
{
Ipp8u val;
status = CV_INSTRUMENT_FUN_IPP(ippiMaxIndx_8u_C1R, (const Ipp8u*)pSrc, srcStep, size, &val, &pMaxIndex->x, &pMaxIndex->y);
*pMaxVal = val;
return status;
}
case ipp16u:
{
Ipp16u val;
status = CV_INSTRUMENT_FUN_IPP(ippiMaxIndx_16u_C1R, (const Ipp16u*)pSrc, srcStep, size, &val, &pMaxIndex->x, &pMaxIndex->y);
*pMaxVal = val;
return status;
}
case ipp16s:
{
Ipp16s val;
status = CV_INSTRUMENT_FUN_IPP(ippiMaxIndx_16s_C1R, (const Ipp16s*)pSrc, srcStep, size, &val, &pMaxIndex->x, &pMaxIndex->y);
*pMaxVal = val;
return status;
}
case ipp32f: return CV_INSTRUMENT_FUN_IPP(ippiMaxIndx_32f_C1R, (const Ipp32f*)pSrc, srcStep, size, pMaxVal, &pMaxIndex->x, &pMaxIndex->y);
default: return ipp_minMaxIndex_wrap(pSrc, srcStep, size, dataType, NULL, pMaxVal, NULL, pMaxIndex, NULL, 0);
}
}
typedef IppStatus (*IppMinMaxSelector)(const void* pSrc, int srcStep, IppiSize size, IppDataType dataType,
float* pMinVal, float* pMaxVal, IppiPoint* pMinIndex, IppiPoint* pMaxIndex, const Ipp8u* pMask, int maskStep);
static bool ipp_minMaxIdx(Mat &src, double* _minVal, double* _maxVal, int* _minIdx, int* _maxIdx, Mat &mask)
{
#if IPP_VERSION_X100 >= 700
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 < 201800
// cv::minMaxIdx problem with NaN input
// Disable 32F processing only
if(src.depth() == CV_32F && cv::ipp::getIppTopFeatures() == ippCPUID_SSE42)
return false;
#endif
#if IPP_VERSION_X100 < 201801
// cv::minMaxIdx problem with index positions on AVX
if(!mask.empty() && _maxIdx && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42)
return false;
#endif
IppStatus status;
IppDataType dataType = ippiGetDataType(src.depth());
float minVal = 0;
float maxVal = 0;
IppiPoint minIdx = {-1, -1};
IppiPoint maxIdx = {-1, -1};
float *pMinVal = (_minVal || _minIdx)?&minVal:NULL;
float *pMaxVal = (_maxVal || _maxIdx)?&maxVal:NULL;
IppiPoint *pMinIdx = (_minIdx)?&minIdx:NULL;
IppiPoint *pMaxIdx = (_maxIdx)?&maxIdx:NULL;
IppMinMaxSelector ippMinMaxFun = ipp_minMaxIndexMask_wrap;
if(mask.empty())
{
if(_maxVal && _maxIdx && !_minVal && !_minIdx)
ippMinMaxFun = ipp_maxIdx_wrap;
else if(!_maxVal && !_maxIdx && _minVal && _minIdx)
ippMinMaxFun = ipp_minIdx_wrap;
else if(_maxVal && !_maxIdx && _minVal && !_minIdx)
ippMinMaxFun = ipp_minMax_wrap;
else if(!_maxVal && !_maxIdx && !_minVal && !_minIdx)
return false;
else
ippMinMaxFun = ipp_minMaxIndex_wrap;
}
if(src.dims <= 2)
{
IppiSize size = ippiSize(src.size());
size.width *= src.channels();
status = ippMinMaxFun(src.ptr(), (int)src.step, size, dataType, pMinVal, pMaxVal, pMinIdx, pMaxIdx, (Ipp8u*)mask.ptr(), (int)mask.step);
if(status < 0)
return false;
if(_minVal)
*_minVal = minVal;
if(_maxVal)
*_maxVal = maxVal;
if(_minIdx)
{
#if IPP_VERSION_X100 < 201801
// Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks
if(status == ippStsNoOperation && !mask.empty() && !pMinIdx->x && !pMinIdx->y)
#else
if(status == ippStsNoOperation)
#endif
{
_minIdx[0] = -1;
_minIdx[1] = -1;
}
else
{
_minIdx[0] = minIdx.y;
_minIdx[1] = minIdx.x;
}
}
if(_maxIdx)
{
#if IPP_VERSION_X100 < 201801
// Should be just ippStsNoOperation check, but there is a bug in the function so we need additional checks
if(status == ippStsNoOperation && !mask.empty() && !pMaxIdx->x && !pMaxIdx->y)
#else
if(status == ippStsNoOperation)
#endif
{
_maxIdx[0] = -1;
_maxIdx[1] = -1;
}
else
{
_maxIdx[0] = maxIdx.y;
_maxIdx[1] = maxIdx.x;
}
}
}
else
{
const Mat *arrays[] = {&src, mask.empty()?NULL:&mask, NULL};
uchar *ptrs[3] = {NULL};
NAryMatIterator it(arrays, ptrs);
IppiSize size = ippiSize(it.size*src.channels(), 1);
int srcStep = (int)(size.width*src.elemSize1());
int maskStep = size.width;
size_t idxPos = 1;
size_t minIdxAll = 0;
size_t maxIdxAll = 0;
float minValAll = IPP_MAXABS_32F;
float maxValAll = -IPP_MAXABS_32F;
for(size_t i = 0; i < it.nplanes; i++, ++it, idxPos += size.width)
{
status = ippMinMaxFun(ptrs[0], srcStep, size, dataType, pMinVal, pMaxVal, pMinIdx, pMaxIdx, ptrs[1], maskStep);
if(status < 0)
return false;
#if IPP_VERSION_X100 > 201701
// Zero-mask check, function should return ippStsNoOperation warning
if(status == ippStsNoOperation)
continue;
#else
// Crude zero-mask check, waiting for fix in IPP function
if(ptrs[1])
{
Mat localMask(Size(size.width, 1), CV_8U, ptrs[1], maskStep);
if(!cv::countNonZero(localMask))
continue;
}
#endif
if(_minVal && minVal < minValAll)
{
minValAll = minVal;
minIdxAll = idxPos+minIdx.x;
}
if(_maxVal && maxVal > maxValAll)
{
maxValAll = maxVal;
maxIdxAll = idxPos+maxIdx.x;
}
}
if(!src.empty() && mask.empty())
{
if(minIdxAll == 0)
minIdxAll = 1;
if(maxValAll == 0)
maxValAll = 1;
}
if(_minVal)
*_minVal = minValAll;
if(_maxVal)
*_maxVal = maxValAll;
if(_minIdx)
ofs2idx(src, minIdxAll, _minIdx);
if(_maxIdx)
ofs2idx(src, maxIdxAll, _maxIdx);
}
return true;
#else
CV_UNUSED(src); CV_UNUSED(minVal); CV_UNUSED(maxVal); CV_UNUSED(minIdx); CV_UNUSED(maxIdx); CV_UNUSED(mask);
return false;
#endif
}
#endif
}
void cv::minMaxIdx(InputArray _src, double* minVal,
double* maxVal, int* minIdx, int* maxIdx,
InputArray _mask)
{
CV_INSTRUMENT_REGION()
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
CV_Assert( (cn == 1 && (_mask.empty() || _mask.type() == CV_8U)) ||
(cn > 1 && _mask.empty() && !minIdx && !maxIdx) );
CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2 && (_mask.empty() || _src.size() == _mask.size()),
ocl_minMaxIdx(_src, minVal, maxVal, minIdx, maxIdx, _mask))
Mat src = _src.getMat(), mask = _mask.getMat();
CV_OVX_RUN(!ovx::skipSmallImages<VX_KERNEL_MINMAXLOC>(src.cols, src.rows),
openvx_minMaxIdx(src, minVal, maxVal, minIdx, maxIdx, mask))
CV_IPP_RUN_FAST(ipp_minMaxIdx(src, minVal, maxVal, minIdx, maxIdx, mask))
MinMaxIdxFunc func = getMinmaxTab(depth);
CV_Assert( func != 0 );
2012-06-08 01:21:29 +08:00
const Mat* arrays[] = {&src, &mask, 0};
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs);
2012-06-08 01:21:29 +08:00
size_t minidx = 0, maxidx = 0;
int iminval = INT_MAX, imaxval = INT_MIN;
2015-07-20 20:10:46 +08:00
float fminval = std::numeric_limits<float>::infinity(), fmaxval = -fminval;
double dminval = std::numeric_limits<double>::infinity(), dmaxval = -dminval;
size_t startidx = 1;
int *minval = &iminval, *maxval = &imaxval;
int planeSize = (int)it.size*cn;
2012-06-08 01:21:29 +08:00
if( depth == CV_32F )
minval = (int*)&fminval, maxval = (int*)&fmaxval;
else if( depth == CV_64F )
minval = (int*)&dminval, maxval = (int*)&dmaxval;
2012-06-08 01:21:29 +08:00
for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize )
func( ptrs[0], ptrs[1], minval, maxval, &minidx, &maxidx, planeSize, startidx );
2012-06-08 01:21:29 +08:00
2015-07-20 20:10:46 +08:00
if (!src.empty() && mask.empty())
{
if( minidx == 0 )
minidx = 1;
if( maxidx == 0 )
maxidx = 1;
}
if( minidx == 0 )
dminval = dmaxval = 0;
else if( depth == CV_32F )
dminval = fminval, dmaxval = fmaxval;
else if( depth <= CV_32S )
dminval = iminval, dmaxval = imaxval;
2012-06-08 01:21:29 +08:00
2010-10-12 20:31:40 +08:00
if( minVal )
*minVal = dminval;
2010-10-12 20:31:40 +08:00
if( maxVal )
*maxVal = dmaxval;
2012-06-08 01:21:29 +08:00
2010-10-12 20:31:40 +08:00
if( minIdx )
ofs2idx(src, minidx, minIdx);
2010-10-12 20:31:40 +08:00
if( maxIdx )
ofs2idx(src, maxidx, maxIdx);
2012-06-08 01:21:29 +08:00
}
2011-12-22 20:51:01 +08:00
void cv::minMaxLoc( InputArray _img, double* minVal, double* maxVal,
2011-12-22 20:51:01 +08:00
Point* minLoc, Point* maxLoc, InputArray mask )
{
CV_INSTRUMENT_REGION()
2013-12-05 20:34:23 +08:00
CV_Assert(_img.dims() <= 2);
2012-06-08 01:21:29 +08:00
minMaxIdx(_img, minVal, maxVal, (int*)minLoc, (int*)maxLoc, mask);
if( minLoc )
std::swap(minLoc->x, minLoc->y);
if( maxLoc )
std::swap(maxLoc->x, maxLoc->y);
}
2011-12-22 20:51:01 +08:00
/****************************************************************************************\
* norm *
\****************************************************************************************/
namespace cv
{
template<typename T, typename ST> int
normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
{
ST result = *_result;
if( !mask )
{
result = std::max(result, normInf<T, ST>(src, len*cn));
}
else
{
for( int i = 0; i < len; i++, src += cn )
if( mask[i] )
{
for( int k = 0; k < cn; k++ )
result = std::max(result, ST(cv_abs(src[k])));
}
}
*_result = result;
return 0;
}
2011-12-22 20:51:01 +08:00
template<typename T, typename ST> int
normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn)
{
ST result = *_result;
if( !mask )
{
result += normL1<T, ST>(src, len*cn);
}
else
{
for( int i = 0; i < len; i++, src += cn )
if( mask[i] )
{
for( int k = 0; k < cn; k++ )
result += cv_abs(src[k]);
}
}
*_result = result;
return 0;
}
template<typename T, typename ST> int
normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn)
{
ST result = *_result;
if( !mask )
{
result += normL2Sqr<T, ST>(src, len*cn);
}
else
{
for( int i = 0; i < len; i++, src += cn )
if( mask[i] )
{
for( int k = 0; k < cn; k++ )
{
T v = src[k];
result += (ST)v*v;
}
}
}
*_result = result;
return 0;
2012-06-08 01:21:29 +08:00
}
2011-12-22 20:51:01 +08:00
template<typename T, typename ST> int
normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
{
ST result = *_result;
if( !mask )
{
result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
}
else
{
for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
if( mask[i] )
{
for( int k = 0; k < cn; k++ )
2011-06-10 14:53:34 +08:00
result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
}
}
*_result = result;
return 0;
}
template<typename T, typename ST> int
normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
{
ST result = *_result;
if( !mask )
{
result += normL1<T, ST>(src1, src2, len*cn);
}
else
{
for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
if( mask[i] )
{
for( int k = 0; k < cn; k++ )
result += std::abs(src1[k] - src2[k]);
}
}
*_result = result;
return 0;
}
template<typename T, typename ST> int
normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
{
ST result = *_result;
if( !mask )
{
result += normL2Sqr<T, ST>(src1, src2, len*cn);
}
else
{
for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
if( mask[i] )
{
for( int k = 0; k < cn; k++ )
{
ST v = src1[k] - src2[k];
result += v*v;
}
}
}
*_result = result;
return 0;
2012-06-08 01:21:29 +08:00
}
2015-03-26 20:25:42 +08:00
Hamming::ResultType Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const
{
return cv::hal::normHamming(a, b, size);
2015-03-26 20:25:42 +08:00
}
#define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \
2011-12-22 20:51:01 +08:00
static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \
{ return norm##L##_(src, mask, r, len, cn); } \
2011-12-22 20:51:01 +08:00
static int normDiff##L##_##suffix(const type* src1, const type* src2, \
const uchar* mask, ntype* r, int len, int cn) \
{ return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
2011-12-22 20:51:01 +08:00
#define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \
2011-12-22 20:51:01 +08:00
CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \
CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \
CV_DEF_NORM_FUNC(L2, suffix, type, l2type)
CV_DEF_NORM_ALL(8u, uchar, int, int, int)
CV_DEF_NORM_ALL(8s, schar, int, int, int)
CV_DEF_NORM_ALL(16u, ushort, int, int, double)
CV_DEF_NORM_ALL(16s, short, int, int, double)
CV_DEF_NORM_ALL(32s, int, int, double, double)
CV_DEF_NORM_ALL(32f, float, float, double, double)
CV_DEF_NORM_ALL(64f, double, double, double, double)
2011-12-22 20:51:01 +08:00
typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
2012-06-08 01:21:29 +08:00
typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
static NormFunc getNormFunc(int normType, int depth)
{
static NormFunc normTab[3][8] =
{
{
(NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
(NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
},
{
(NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s),
(NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
},
{
(NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s),
(NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
}
};
return normTab[normType][depth];
}
static NormDiffFunc getNormDiffFunc(int normType, int depth)
{
static NormDiffFunc normDiffTab[3][8] =
{
{
(NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
(NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
(NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
(NormDiffFunc)normDiffInf_64f, 0
},
{
(NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
(NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
(NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
(NormDiffFunc)normDiffL1_64f, 0
},
{
(NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
(NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
(NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
(NormDiffFunc)normDiffL2_64f, 0
}
};
return normDiffTab[normType][depth];
}
2014-01-25 01:03:31 +08:00
#ifdef HAVE_OPENCL
2013-12-06 23:04:50 +08:00
static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
{
2014-05-14 19:42:30 +08:00
const ocl::Device & d = ocl::Device::getDefault();
#ifdef __ANDROID__
if (d.isNVidia())
return false;
#endif
const int cn = _src.channels();
if (cn > 4)
return false;
int type = _src.type(), depth = CV_MAT_DEPTH(type);
2014-05-14 19:42:30 +08:00
bool doubleSupport = d.doubleFPConfig() > 0,
haveMask = _mask.kind() != _InputArray::NONE;
2012-06-08 01:21:29 +08:00
2014-01-08 00:32:22 +08:00
if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
(!doubleSupport && depth == CV_64F))
2013-12-06 23:04:50 +08:00
return false;
UMat src = _src.getUMat();
if (normType == NORM_INF)
{
if (!ocl_minMaxIdx(_src, NULL, &result, NULL, NULL, _mask,
std::max(depth, CV_32S), depth != CV_8U && depth != CV_16U))
return false;
2013-12-06 23:04:50 +08:00
}
2014-01-08 00:32:22 +08:00
else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR)
2013-12-06 23:04:50 +08:00
{
Scalar sc;
2013-12-06 23:04:50 +08:00
bool unstype = depth == CV_8U || depth == CV_16U;
2014-01-08 00:32:22 +08:00
if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 || normType == NORM_L2SQR ?
OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) )
2014-01-02 03:46:19 +08:00
return false;
double s = 0.0;
for (int i = 0; i < (haveMask ? cn : 1); ++i)
s += sc[i];
2014-01-08 00:32:22 +08:00
result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(s);
2013-12-06 23:04:50 +08:00
}
return true;
}
2014-01-25 01:03:31 +08:00
#endif
#ifdef HAVE_IPP
static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
{
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 >= 700
size_t total_size = src.total();
int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous()))
&& cols > 0 && (size_t)rows*cols == total_size )
{
if( !mask.empty() )
{
IppiSize sz = { cols, rows };
int type = src.type();
typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
ippiMaskNormFuncC1 ippiNorm_C1MR =
normType == NORM_INF ?
(type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR :
type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR :
0) :
normType == NORM_L1 ?
(type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR :
type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR :
type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR :
0) :
normType == NORM_L2 || normType == NORM_L2SQR ?
(type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR :
type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR :
type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR :
0) : 0;
if( ippiNorm_C1MR )
{
Ipp64f norm;
if( CV_INSTRUMENT_FUN_IPP(ippiNorm_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
{
result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
return true;
}
}
typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
ippiMaskNormFuncC3 ippiNorm_C3CMR =
normType == NORM_INF ?
(type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR :
type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR :
type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR :
0) :
normType == NORM_L1 ?
(type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR :
type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR :
type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR :
0) :
normType == NORM_L2 || normType == NORM_L2SQR ?
(type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR :
type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR :
type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR :
0) : 0;
if( ippiNorm_C3CMR )
{
Ipp64f norm1, norm2, norm3;
if( CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
{
Ipp64f norm =
normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
normType == NORM_L1 ? norm1 + norm2 + norm3 :
normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
0;
result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
return true;
}
}
}
else
{
IppiSize sz = { cols*src.channels(), rows };
int type = src.depth();
typedef IppStatus (CV_STDCALL* ippiNormFuncHint)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
typedef IppStatus (CV_STDCALL* ippiNormFuncNoHint)(const void *, int, IppiSize, Ipp64f *);
ippiNormFuncHint ippiNormHint =
normType == NORM_L1 ?
(type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L1_32f_C1R :
0) :
normType == NORM_L2 || normType == NORM_L2SQR ?
(type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L2_32f_C1R :
0) : 0;
ippiNormFuncNoHint ippiNorm =
normType == NORM_INF ?
(type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C1R :
type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C1R :
type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R :
type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R :
0) :
normType == NORM_L1 ?
(type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C1R :
type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C1R :
type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C1R :
0) :
normType == NORM_L2 || normType == NORM_L2SQR ?
(type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R :
type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C1R :
type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C1R :
0) : 0;
if( ippiNormHint || ippiNorm )
{
Ipp64f norm;
IppStatus ret = ippiNormHint ? CV_INSTRUMENT_FUN_IPP(ippiNormHint, src.ptr(), (int)src.step[0], sz, &norm, ippAlgHintAccurate) :
CV_INSTRUMENT_FUN_IPP(ippiNorm, src.ptr(), (int)src.step[0], sz, &norm);
if( ret >= 0 )
{
result = (normType == NORM_L2SQR) ? norm * norm : norm;
return true;
}
}
}
}
#else
CV_UNUSED(src); CV_UNUSED(normType); CV_UNUSED(mask); CV_UNUSED(result);
#endif
return false;
}
#endif
}
double cv::norm( InputArray _src, int normType, InputArray _mask )
{
CV_INSTRUMENT_REGION()
normType &= NORM_TYPE_MASK;
CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
normType == NORM_L2 || normType == NORM_L2SQR ||
((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) );
#if defined HAVE_OPENCL || defined HAVE_IPP
double _result = 0;
#endif
#ifdef HAVE_OPENCL
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
ocl_norm(_src, normType, _mask, _result),
_result)
#endif
Mat src = _src.getMat(), mask = _mask.getMat();
CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_norm(src, normType, mask, _result), _result);
int depth = src.depth(), cn = src.channels();
2012-04-30 22:33:52 +08:00
if( src.isContinuous() && mask.empty() )
2010-10-12 20:31:40 +08:00
{
size_t len = src.total()*cn;
if( len == (size_t)(int)len )
2010-10-12 20:31:40 +08:00
{
2012-04-30 22:33:52 +08:00
if( depth == CV_32F )
{
2012-04-30 22:33:52 +08:00
const float* data = src.ptr<float>();
2012-06-08 01:21:29 +08:00
2012-04-30 22:33:52 +08:00
if( normType == NORM_L2 )
{
double result = 0;
GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
return std::sqrt(result);
}
if( normType == NORM_L2SQR )
{
double result = 0;
GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
return result;
}
if( normType == NORM_L1 )
{
double result = 0;
GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
return result;
}
if( normType == NORM_INF )
{
float result = 0;
GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
return result;
}
}
2012-04-30 22:33:52 +08:00
if( depth == CV_8U )
{
2012-04-30 22:33:52 +08:00
const uchar* data = src.ptr<uchar>();
2012-06-08 01:21:29 +08:00
2012-04-30 22:33:52 +08:00
if( normType == NORM_HAMMING )
2015-03-26 20:25:42 +08:00
{
return hal::normHamming(data, (int)len);
2015-03-26 20:25:42 +08:00
}
2012-06-08 01:21:29 +08:00
2012-04-30 22:33:52 +08:00
if( normType == NORM_HAMMING2 )
2015-03-26 20:25:42 +08:00
{
return hal::normHamming(data, (int)len, 2);
2015-03-26 20:25:42 +08:00
}
}
2010-10-12 20:31:40 +08:00
}
}
2012-06-08 01:21:29 +08:00
CV_Assert( mask.empty() || mask.type() == CV_8U );
2012-06-08 01:21:29 +08:00
2012-04-30 22:33:52 +08:00
if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
{
if( !mask.empty() )
{
Mat temp;
bitwise_and(src, mask, temp);
return norm(temp, normType);
}
int cellSize = normType == NORM_HAMMING ? 1 : 2;
2012-06-08 01:21:29 +08:00
2012-04-30 22:33:52 +08:00
const Mat* arrays[] = {&src, 0};
uchar* ptrs[1];
NAryMatIterator it(arrays, ptrs);
int total = (int)it.size;
int result = 0;
2012-06-08 01:21:29 +08:00
2012-04-30 22:33:52 +08:00
for( size_t i = 0; i < it.nplanes; i++, ++it )
2015-03-26 20:25:42 +08:00
{
result += hal::normHamming(ptrs[0], total, cellSize);
2015-03-26 20:25:42 +08:00
}
2012-06-08 01:21:29 +08:00
2012-04-30 22:33:52 +08:00
return result;
}
2012-06-08 01:21:29 +08:00
NormFunc func = getNormFunc(normType >> 1, depth);
CV_Assert( func != 0 );
2012-06-08 01:21:29 +08:00
const Mat* arrays[] = {&src, &mask, 0};
uchar* ptrs[2];
union
{
double d;
int i;
float f;
}
result;
result.d = 0;
NAryMatIterator it(arrays, ptrs);
int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
2012-04-30 22:33:52 +08:00
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
int isum = 0;
int *ibuf = &result.i;
size_t esz = 0;
2012-06-08 01:21:29 +08:00
if( blockSum )
{
intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
blockSize = std::min(blockSize, intSumBlockSize);
ibuf = &isum;
esz = src.elemSize();
}
2012-06-08 01:21:29 +08:00
for( size_t i = 0; i < it.nplanes; i++, ++it )
2010-10-12 20:31:40 +08:00
{
for( j = 0; j < total; j += blockSize )
2010-10-12 20:31:40 +08:00
{
int bsz = std::min(total - j, blockSize);
2011-12-22 20:51:01 +08:00
func( ptrs[0], ptrs[1], (uchar*)ibuf, bsz, cn );
count += bsz;
if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
{
result.d += isum;
isum = 0;
count = 0;
}
ptrs[0] += bsz*esz;
if( ptrs[1] )
ptrs[1] += bsz;
2010-10-12 20:31:40 +08:00
}
}
2012-06-08 01:21:29 +08:00
if( normType == NORM_INF )
2010-10-12 20:31:40 +08:00
{
if( depth == CV_64F )
;
else if( depth == CV_32F )
result.d = result.f;
else
result.d = result.i;
2010-10-12 20:31:40 +08:00
}
else if( normType == NORM_L2 )
result.d = std::sqrt(result.d);
2012-06-08 01:21:29 +08:00
return result.d;
}
2014-01-25 01:03:31 +08:00
#ifdef HAVE_OPENCL
2013-12-06 23:04:50 +08:00
namespace cv {
2014-02-25 23:32:55 +08:00
static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result )
2013-12-06 23:04:50 +08:00
{
#ifdef __ANDROID__
if (ocl::Device::getDefault().isNVidia())
return false;
#endif
2014-06-07 19:51:41 +08:00
Scalar sc1, sc2;
int cn = _src1.channels();
if (cn > 4)
return false;
int type = _src1.type(), depth = CV_MAT_DEPTH(type);
2014-06-09 04:50:14 +08:00
bool relative = (normType & NORM_RELATIVE) != 0;
2013-12-06 23:04:50 +08:00
normType &= ~NORM_RELATIVE;
2014-06-09 04:50:14 +08:00
bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
2013-12-06 23:04:50 +08:00
2014-06-07 19:51:41 +08:00
if (normsum)
{
if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ?
OCL_OP_SUM_SQR : OCL_OP_SUM, _mask, _src2, relative, sc2))
return false;
}
else
{
2014-06-08 00:53:20 +08:00
if (!ocl_minMaxIdx(_src1, NULL, &sc1[0], NULL, NULL, _mask, std::max(CV_32S, depth),
false, _src2, relative ? &sc2[0] : NULL))
2014-06-07 19:51:41 +08:00
return false;
2014-06-08 00:53:20 +08:00
cn = 1;
2014-06-07 19:51:41 +08:00
}
2013-12-06 23:04:50 +08:00
2014-06-07 19:51:41 +08:00
double s2 = 0;
for (int i = 0; i < cn; ++i)
{
result += sc1[i];
if (relative)
s2 += sc2[i];
}
2013-12-06 23:04:50 +08:00
2014-06-07 19:51:41 +08:00
if (normType == NORM_L2)
{
result = std::sqrt(result);
if (relative)
s2 = std::sqrt(s2);
}
2013-12-06 23:04:50 +08:00
if (relative)
2014-06-07 19:51:41 +08:00
result /= (s2 + DBL_EPSILON);
2013-12-06 23:04:50 +08:00
return true;
}
}
2011-12-22 20:51:01 +08:00
2014-01-25 01:03:31 +08:00
#endif
#ifdef HAVE_IPP
namespace cv
{
static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask, double &result)
{
CV_INSTRUMENT_REGION_IPP()
#if IPP_VERSION_X100 >= 700
Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
if( normType & CV_RELATIVE )
{
normType &= NORM_TYPE_MASK;
size_t total_size = src1.total();
int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
&& cols > 0 && (size_t)rows*cols == total_size )
{
if( !mask.empty() )
{
IppiSize sz = { cols, rows };
int type = src1.type();
typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
ippiMaskNormDiffFuncC1 ippiNormRel_C1MR =
normType == NORM_INF ?
(type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_8u_C1MR :
type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_16u_C1MR :
type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_32f_C1MR :
0) :
normType == NORM_L1 ?
(type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_8u_C1MR :
type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_16u_C1MR :
type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_32f_C1MR :
0) :
normType == NORM_L2 || normType == NORM_L2SQR ?
(type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_8u_C1MR :
type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_16u_C1MR :
type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_32f_C1MR :
0) : 0;
if( ippiNormRel_C1MR )
{
Ipp64f norm;
if( CV_INSTRUMENT_FUN_IPP(ippiNormRel_C1MR, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
{
result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
return true;
}
}
}
else
{
IppiSize sz = { cols*src1.channels(), rows };
int type = src1.depth();
typedef IppStatus (CV_STDCALL* ippiNormRelFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
ippiNormRelFuncHint ippiNormRelHint =
normType == NORM_L1 ?
(type == CV_32F ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R :
0) :
normType == NORM_L2 || normType == NORM_L2SQR ?
(type == CV_32F ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R :
0) : 0;
ippiNormRelFuncNoHint ippiNormRel =
normType == NORM_INF ?
(type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R :
type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R :
type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R :
type == CV_32F ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R :
0) :
normType == NORM_L1 ?
(type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R :
type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R :
type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R :
0) :
normType == NORM_L2 || normType == NORM_L2SQR ?
(type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R :
type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R :
type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R :
0) : 0;
if( ippiNormRelHint || ippiNormRel )
{
Ipp64f norm;
IppStatus ret = ippiNormRelHint ? CV_INSTRUMENT_FUN_IPP(ippiNormRelHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) :
CV_INSTRUMENT_FUN_IPP(ippiNormRel, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm);
if( ret >= 0 )
{
result = (normType == NORM_L2SQR) ? norm * norm : norm;
return true;
}
}
}
}
return false;
}
normType &= NORM_TYPE_MASK;
size_t total_size = src1.total();
int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
&& cols > 0 && (size_t)rows*cols == total_size )
{
if( !mask.empty() )
{
IppiSize sz = { cols, rows };
int type = src1.type();
typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
ippiMaskNormDiffFuncC1 ippiNormDiff_C1MR =
normType == NORM_INF ?
(type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR :
type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR :
type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR :
0) :
normType == NORM_L1 ?
(type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR :
type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR :
type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR :
0) :
normType == NORM_L2 || normType == NORM_L2SQR ?
(type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR :
type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR :
type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR :
0) : 0;
if( ippiNormDiff_C1MR )
{
Ipp64f norm;
if( CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C1MR, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
{
result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
return true;
}
}
typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *);
ippiMaskNormDiffFuncC3 ippiNormDiff_C3CMR =
normType == NORM_INF ?
(type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR :
type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR :
type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR :
0) :
normType == NORM_L1 ?
(type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR :
type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR :
type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR :
0) :
normType == NORM_L2 || normType == NORM_L2SQR ?
(type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR :
type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR :
type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR :
0) : 0;
if( ippiNormDiff_C3CMR )
{
Ipp64f norm1, norm2, norm3;
if( CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C3CMR, src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C3CMR, src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C3CMR, src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
{
Ipp64f norm =
normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
normType == NORM_L1 ? norm1 + norm2 + norm3 :
normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
0;
result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
return true;
}
}
}
else
{
IppiSize sz = { cols*src1.channels(), rows };
int type = src1.depth();
typedef IppStatus (CV_STDCALL* ippiNormDiffFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
typedef IppStatus (CV_STDCALL* ippiNormDiffFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
ippiNormDiffFuncHint ippiNormDiffHint =
normType == NORM_L1 ?
(type == CV_32F ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R :
0) :
normType == NORM_L2 || normType == NORM_L2SQR ?
(type == CV_32F ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R :
0) : 0;
ippiNormDiffFuncNoHint ippiNormDiff =
normType == NORM_INF ?
(type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R :
type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R :
type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
type == CV_32F ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
0) :
normType == NORM_L1 ?
(type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R :
type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R :
type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R :
0) :
normType == NORM_L2 || normType == NORM_L2SQR ?
(type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R :
type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R :
type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R :
0) : 0;
if( ippiNormDiffHint || ippiNormDiff )
{
Ipp64f norm;
IppStatus ret = ippiNormDiffHint ? CV_INSTRUMENT_FUN_IPP(ippiNormDiffHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) :
CV_INSTRUMENT_FUN_IPP(ippiNormDiff, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm);
if( ret >= 0 )
{
result = (normType == NORM_L2SQR) ? norm * norm : norm;
return true;
}
}
}
}
#else
CV_UNUSED(_src1); CV_UNUSED(_src2); CV_UNUSED(normType); CV_UNUSED(_mask); CV_UNUSED(result);
#endif
return false;
}
}
#endif
double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
{
CV_INSTRUMENT_REGION()
CV_Assert( _src1.sameSize(_src2) && _src1.type() == _src2.type() );
#if defined HAVE_OPENCL || defined HAVE_IPP
double _result = 0;
#endif
#ifdef HAVE_OPENCL
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src1.isUMat()),
ocl_norm(_src1, _src2, normType, _mask, _result),
_result)
#endif
CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_norm(_src1, _src2, normType, _mask, _result), _result);
if( normType & CV_RELATIVE )
{
return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
}
Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
int depth = src1.depth(), cn = src1.channels();
normType &= 7;
CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
normType == NORM_L2 || normType == NORM_L2SQR ||
((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
2012-04-30 22:33:52 +08:00
if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
2010-10-12 20:31:40 +08:00
{
size_t len = src1.total()*src1.channels();
if( len == (size_t)(int)len )
2010-10-12 20:31:40 +08:00
{
2012-04-30 22:33:52 +08:00
if( src1.depth() == CV_32F )
{
2012-04-30 22:33:52 +08:00
const float* data1 = src1.ptr<float>();
const float* data2 = src2.ptr<float>();
2012-06-08 01:21:29 +08:00
2012-04-30 22:33:52 +08:00
if( normType == NORM_L2 )
{
double result = 0;
GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
return std::sqrt(result);
}
if( normType == NORM_L2SQR )
{
double result = 0;
GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
return result;
}
if( normType == NORM_L1 )
{
double result = 0;
GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
return result;
}
if( normType == NORM_INF )
{
float result = 0;
GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
return result;
}
}
2010-10-12 20:31:40 +08:00
}
}
2012-06-08 01:21:29 +08:00
CV_Assert( mask.empty() || mask.type() == CV_8U );
2012-06-08 01:21:29 +08:00
2012-04-30 22:33:52 +08:00
if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
{
if( !mask.empty() )
{
Mat temp;
bitwise_xor(src1, src2, temp);
bitwise_and(temp, mask, temp);
return norm(temp, normType);
}
int cellSize = normType == NORM_HAMMING ? 1 : 2;
2012-06-08 01:21:29 +08:00
2012-04-30 22:33:52 +08:00
const Mat* arrays[] = {&src1, &src2, 0};
uchar* ptrs[2];
NAryMatIterator it(arrays, ptrs);
int total = (int)it.size;
int result = 0;
2012-06-08 01:21:29 +08:00
2012-04-30 22:33:52 +08:00
for( size_t i = 0; i < it.nplanes; i++, ++it )
2015-03-26 20:25:42 +08:00
{
result += hal::normHamming(ptrs[0], ptrs[1], total, cellSize);
2015-03-26 20:25:42 +08:00
}
2012-06-08 01:21:29 +08:00
2012-04-30 22:33:52 +08:00
return result;
}
2012-06-08 01:21:29 +08:00
NormDiffFunc func = getNormDiffFunc(normType >> 1, depth);
CV_Assert( func != 0 );
2012-06-08 01:21:29 +08:00
const Mat* arrays[] = {&src1, &src2, &mask, 0};
uchar* ptrs[3];
union
{
double d;
float f;
int i;
unsigned u;
}
result;
result.d = 0;
NAryMatIterator it(arrays, ptrs);
int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
2012-04-30 22:33:52 +08:00
((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
unsigned isum = 0;
unsigned *ibuf = &result.u;
size_t esz = 0;
2012-06-08 01:21:29 +08:00
if( blockSum )
2010-10-12 20:31:40 +08:00
{
intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
blockSize = std::min(blockSize, intSumBlockSize);
ibuf = &isum;
esz = src1.elemSize();
2010-10-12 20:31:40 +08:00
}
2012-06-08 01:21:29 +08:00
for( size_t i = 0; i < it.nplanes; i++, ++it )
2010-10-12 20:31:40 +08:00
{
for( j = 0; j < total; j += blockSize )
2010-10-12 20:31:40 +08:00
{
int bsz = std::min(total - j, blockSize);
func( ptrs[0], ptrs[1], ptrs[2], (uchar*)ibuf, bsz, cn );
count += bsz;
if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
{
result.d += isum;
isum = 0;
count = 0;
}
ptrs[0] += bsz*esz;
ptrs[1] += bsz*esz;
if( ptrs[2] )
ptrs[2] += bsz;
2010-10-12 20:31:40 +08:00
}
}
2012-06-08 01:21:29 +08:00
if( normType == NORM_INF )
2010-10-12 20:31:40 +08:00
{
if( depth == CV_64F )
;
else if( depth == CV_32F )
result.d = result.f;
else
result.d = result.u;
2010-10-12 20:31:40 +08:00
}
else if( normType == NORM_L2 )
result.d = std::sqrt(result.d);
2012-06-08 01:21:29 +08:00
return result.d;
}
///////////////////////////////////// batch distance ///////////////////////////////////////
namespace cv
{
template<typename _Tp, typename _Rt>
void batchDistL1_(const _Tp* src1, const _Tp* src2, size_t step2,
int nvecs, int len, _Rt* dist, const uchar* mask)
{
step2 /= sizeof(src2[0]);
if( !mask )
{
for( int i = 0; i < nvecs; i++ )
dist[i] = normL1<_Tp, _Rt>(src1, src2 + step2*i, len);
}
else
{
_Rt val0 = std::numeric_limits<_Rt>::max();
for( int i = 0; i < nvecs; i++ )
dist[i] = mask[i] ? normL1<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
}
}
template<typename _Tp, typename _Rt>
void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2,
int nvecs, int len, _Rt* dist, const uchar* mask)
{
step2 /= sizeof(src2[0]);
if( !mask )
{
for( int i = 0; i < nvecs; i++ )
dist[i] = normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len);
}
else
{
_Rt val0 = std::numeric_limits<_Rt>::max();
for( int i = 0; i < nvecs; i++ )
dist[i] = mask[i] ? normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
}
}
template<typename _Tp, typename _Rt>
void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
int nvecs, int len, _Rt* dist, const uchar* mask)
{
step2 /= sizeof(src2[0]);
if( !mask )
{
for( int i = 0; i < nvecs; i++ )
dist[i] = std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len));
}
else
{
_Rt val0 = std::numeric_limits<_Rt>::max();
for( int i = 0; i < nvecs; i++ )
dist[i] = mask[i] ? std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len)) : val0;
}
}
static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2,
int nvecs, int len, int* dist, const uchar* mask)
{
step2 /= sizeof(src2[0]);
if( !mask )
{
for( int i = 0; i < nvecs; i++ )
dist[i] = hal::normHamming(src1, src2 + step2*i, len);
}
else
{
int val0 = INT_MAX;
for( int i = 0; i < nvecs; i++ )
2015-03-26 20:25:42 +08:00
{
if (mask[i])
dist[i] = hal::normHamming(src1, src2 + step2*i, len);
2015-03-26 20:25:42 +08:00
else
dist[i] = val0;
}
}
}
static void batchDistHamming2(const uchar* src1, const uchar* src2, size_t step2,
int nvecs, int len, int* dist, const uchar* mask)
{
step2 /= sizeof(src2[0]);
if( !mask )
{
for( int i = 0; i < nvecs; i++ )
dist[i] = hal::normHamming(src1, src2 + step2*i, len, 2);
}
else
{
int val0 = INT_MAX;
for( int i = 0; i < nvecs; i++ )
2015-03-26 20:25:42 +08:00
{
if (mask[i])
dist[i] = hal::normHamming(src1, src2 + step2*i, len, 2);
2015-03-26 20:25:42 +08:00
else
dist[i] = val0;
}
}
}
static void batchDistL1_8u32s(const uchar* src1, const uchar* src2, size_t step2,
int nvecs, int len, int* dist, const uchar* mask)
{
batchDistL1_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
}
static void batchDistL1_8u32f(const uchar* src1, const uchar* src2, size_t step2,
int nvecs, int len, float* dist, const uchar* mask)
{
batchDistL1_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
}
static void batchDistL2Sqr_8u32s(const uchar* src1, const uchar* src2, size_t step2,
int nvecs, int len, int* dist, const uchar* mask)
{
batchDistL2Sqr_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
}
static void batchDistL2Sqr_8u32f(const uchar* src1, const uchar* src2, size_t step2,
int nvecs, int len, float* dist, const uchar* mask)
{
batchDistL2Sqr_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
}
static void batchDistL2_8u32f(const uchar* src1, const uchar* src2, size_t step2,
int nvecs, int len, float* dist, const uchar* mask)
{
batchDistL2_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
}
static void batchDistL1_32f(const float* src1, const float* src2, size_t step2,
int nvecs, int len, float* dist, const uchar* mask)
{
batchDistL1_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
}
static void batchDistL2Sqr_32f(const float* src1, const float* src2, size_t step2,
int nvecs, int len, float* dist, const uchar* mask)
{
batchDistL2Sqr_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
}
static void batchDistL2_32f(const float* src1, const float* src2, size_t step2,
int nvecs, int len, float* dist, const uchar* mask)
{
batchDistL2_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
}
typedef void (*BatchDistFunc)(const uchar* src1, const uchar* src2, size_t step2,
int nvecs, int len, uchar* dist, const uchar* mask);
2012-06-08 01:21:29 +08:00
struct BatchDistInvoker : public ParallelLoopBody
{
BatchDistInvoker( const Mat& _src1, const Mat& _src2,
Mat& _dist, Mat& _nidx, int _K,
const Mat& _mask, int _update,
BatchDistFunc _func)
{
src1 = &_src1;
src2 = &_src2;
dist = &_dist;
nidx = &_nidx;
K = _K;
mask = &_mask;
update = _update;
func = _func;
}
2012-06-08 01:21:29 +08:00
void operator()(const Range& range) const
{
AutoBuffer<int> buf(src2->rows);
int* bufptr = buf;
2012-06-08 01:21:29 +08:00
for( int i = range.start; i < range.end; i++ )
{
func(src1->ptr(i), src2->ptr(), src2->step, src2->rows, src2->cols,
K > 0 ? (uchar*)bufptr : dist->ptr(i), mask->data ? mask->ptr(i) : 0);
2012-06-08 01:21:29 +08:00
if( K > 0 )
{
int* nidxptr = nidx->ptr<int>(i);
// since positive float's can be compared just like int's,
// we handle both CV_32S and CV_32F cases with a single branch
int* distptr = (int*)dist->ptr(i);
2012-06-08 01:21:29 +08:00
int j, k;
2012-06-08 01:21:29 +08:00
for( j = 0; j < src2->rows; j++ )
{
int d = bufptr[j];
if( d < distptr[K-1] )
{
for( k = K-2; k >= 0 && distptr[k] > d; k-- )
{
nidxptr[k+1] = nidxptr[k];
distptr[k+1] = distptr[k];
}
nidxptr[k+1] = j + update;
distptr[k+1] = d;
}
}
}
}
}
2012-06-08 01:21:29 +08:00
const Mat *src1;
const Mat *src2;
Mat *dist;
Mat *nidx;
const Mat *mask;
int K;
int update;
BatchDistFunc func;
};
2012-06-08 01:21:29 +08:00
}
2012-06-08 01:21:29 +08:00
void cv::batchDistance( InputArray _src1, InputArray _src2,
OutputArray _dist, int dtype, OutputArray _nidx,
int normType, int K, InputArray _mask,
int update, bool crosscheck )
{
CV_INSTRUMENT_REGION()
Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
int type = src1.type();
CV_Assert( type == src2.type() && src1.cols == src2.cols &&
(type == CV_32F || type == CV_8U));
CV_Assert( _nidx.needed() == (K > 0) );
2012-06-08 01:21:29 +08:00
if( dtype == -1 )
{
dtype = normType == NORM_HAMMING || normType == NORM_HAMMING2 ? CV_32S : CV_32F;
}
CV_Assert( (type == CV_8U && dtype == CV_32S) || dtype == CV_32F);
K = std::min(K, src2.rows);
2012-06-08 01:21:29 +08:00
_dist.create(src1.rows, (K > 0 ? K : src2.rows), dtype);
Mat dist = _dist.getMat(), nidx;
if( _nidx.needed() )
{
_nidx.create(dist.size(), CV_32S);
nidx = _nidx.getMat();
}
2012-06-08 01:21:29 +08:00
if( update == 0 && K > 0 )
{
dist = Scalar::all(dtype == CV_32S ? (double)INT_MAX : (double)FLT_MAX);
nidx = Scalar::all(-1);
}
2012-06-08 01:21:29 +08:00
if( crosscheck )
{
CV_Assert( K == 1 && update == 0 && mask.empty() );
Mat tdist, tidx;
batchDistance(src2, src1, tdist, dtype, tidx, normType, K, mask, 0, false);
2012-06-08 01:21:29 +08:00
// if an idx-th element from src1 appeared to be the nearest to i-th element of src2,
// we update the minimum mutual distance between idx-th element of src1 and the whole src2 set.
// As a result, if nidx[idx] = i*, it means that idx-th element of src1 is the nearest
// to i*-th element of src2 and i*-th element of src2 is the closest to idx-th element of src1.
// If nidx[idx] = -1, it means that there is no such ideal couple for it in src2.
// This O(N) procedure is called cross-check and it helps to eliminate some false matches.
if( dtype == CV_32S )
{
for( int i = 0; i < tdist.rows; i++ )
{
int idx = tidx.at<int>(i);
int d = tdist.at<int>(i), d0 = dist.at<int>(idx);
if( d < d0 )
{
dist.at<int>(idx) = d;
nidx.at<int>(idx) = i + update;
}
}
}
else
{
for( int i = 0; i < tdist.rows; i++ )
{
int idx = tidx.at<int>(i);
float d = tdist.at<float>(i), d0 = dist.at<float>(idx);
if( d < d0 )
{
dist.at<float>(idx) = d;
nidx.at<int>(idx) = i + update;
}
}
}
return;
}
2012-06-08 01:21:29 +08:00
BatchDistFunc func = 0;
if( type == CV_8U )
{
if( normType == NORM_L1 && dtype == CV_32S )
func = (BatchDistFunc)batchDistL1_8u32s;
else if( normType == NORM_L1 && dtype == CV_32F )
func = (BatchDistFunc)batchDistL1_8u32f;
else if( normType == NORM_L2SQR && dtype == CV_32S )
func = (BatchDistFunc)batchDistL2Sqr_8u32s;
else if( normType == NORM_L2SQR && dtype == CV_32F )
func = (BatchDistFunc)batchDistL2Sqr_8u32f;
else if( normType == NORM_L2 && dtype == CV_32F )
func = (BatchDistFunc)batchDistL2_8u32f;
else if( normType == NORM_HAMMING && dtype == CV_32S )
func = (BatchDistFunc)batchDistHamming;
else if( normType == NORM_HAMMING2 && dtype == CV_32S )
func = (BatchDistFunc)batchDistHamming2;
}
else if( type == CV_32F && dtype == CV_32F )
{
if( normType == NORM_L1 )
func = (BatchDistFunc)batchDistL1_32f;
else if( normType == NORM_L2SQR )
func = (BatchDistFunc)batchDistL2Sqr_32f;
else if( normType == NORM_L2 )
func = (BatchDistFunc)batchDistL2_32f;
}
2012-06-08 01:21:29 +08:00
if( func == 0 )
CV_Error_(CV_StsUnsupportedFormat,
("The combination of type=%d, dtype=%d and normType=%d is not supported",
type, dtype, normType));
2012-06-08 01:21:29 +08:00
parallel_for_(Range(0, src1.rows),
BatchDistInvoker(src1, src2, dist, nidx, K, mask, update, func));
}
void cv::findNonZero( InputArray _src, OutputArray _idx )
{
CV_INSTRUMENT_REGION()
Mat src = _src.getMat();
CV_Assert( src.type() == CV_8UC1 );
int n = countNonZero(src);
if( n == 0 )
{
_idx.release();
return;
}
if( _idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous() )
_idx.release();
_idx.create(n, 1, CV_32SC2);
Mat idx = _idx.getMat();
CV_Assert(idx.isContinuous());
Point* idx_ptr = idx.ptr<Point>();
2012-10-17 07:18:30 +08:00
for( int i = 0; i < src.rows; i++ )
{
const uchar* bin_ptr = src.ptr(i);
for( int j = 0; j < src.cols; j++ )
if( bin_ptr[j] )
*idx_ptr++ = Point(j, i);
}
}
double cv::PSNR(InputArray _src1, InputArray _src2)
{
CV_INSTRUMENT_REGION()
2017-09-17 10:55:45 +08:00
//Input arrays must have depth CV_8U
CV_Assert( _src1.depth() == CV_8U && _src2.depth() == CV_8U );
double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels()));
return 20*log10(255./(diff+DBL_EPSILON));
}
CV_IMPL CvScalar cvSum( const CvArr* srcarr )
{
cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1));
if( CV_IS_IMAGE(srcarr) )
{
int coi = cvGetImageCOI((IplImage*)srcarr);
if( coi )
{
CV_Assert( 0 < coi && coi <= 4 );
sum = cv::Scalar(sum[coi-1]);
}
}
return sum;
}
CV_IMPL int cvCountNonZero( const CvArr* imgarr )
{
cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
if( img.channels() > 1 )
cv::extractImageCOI(imgarr, img);
return countNonZero(img);
}
CV_IMPL CvScalar
cvAvg( const void* imgarr, const void* maskarr )
{
cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
cv::Scalar mean = !maskarr ? cv::mean(img) : cv::mean(img, cv::cvarrToMat(maskarr));
if( CV_IS_IMAGE(imgarr) )
{
int coi = cvGetImageCOI((IplImage*)imgarr);
if( coi )
{
CV_Assert( 0 < coi && coi <= 4 );
mean = cv::Scalar(mean[coi-1]);
}
}
return mean;
}
CV_IMPL void
cvAvgSdv( const CvArr* imgarr, CvScalar* _mean, CvScalar* _sdv, const void* maskarr )
{
cv::Scalar mean, sdv;
cv::Mat mask;
if( maskarr )
mask = cv::cvarrToMat(maskarr);
cv::meanStdDev(cv::cvarrToMat(imgarr, false, true, 1), mean, sdv, mask );
if( CV_IS_IMAGE(imgarr) )
{
int coi = cvGetImageCOI((IplImage*)imgarr);
if( coi )
{
CV_Assert( 0 < coi && coi <= 4 );
mean = cv::Scalar(mean[coi-1]);
sdv = cv::Scalar(sdv[coi-1]);
}
}
if( _mean )
*(cv::Scalar*)_mean = mean;
if( _sdv )
*(cv::Scalar*)_sdv = sdv;
}
CV_IMPL void
cvMinMaxLoc( const void* imgarr, double* _minVal, double* _maxVal,
CvPoint* _minLoc, CvPoint* _maxLoc, const void* maskarr )
{
cv::Mat mask, img = cv::cvarrToMat(imgarr, false, true, 1);
if( maskarr )
mask = cv::cvarrToMat(maskarr);
if( img.channels() > 1 )
cv::extractImageCOI(imgarr, img);
cv::minMaxLoc( img, _minVal, _maxVal,
2011-12-22 20:51:01 +08:00
(cv::Point*)_minLoc, (cv::Point*)_maxLoc, mask );
}
CV_IMPL double
cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
{
cv::Mat a, mask;
if( !imgA )
{
imgA = imgB;
imgB = 0;
}
a = cv::cvarrToMat(imgA, false, true, 1);
if( maskarr )
mask = cv::cvarrToMat(maskarr);
if( a.channels() > 1 && CV_IS_IMAGE(imgA) && cvGetImageCOI((const IplImage*)imgA) > 0 )
cv::extractImageCOI(imgA, a);
if( !imgB )
return !maskarr ? cv::norm(a, normType) : cv::norm(a, normType, mask);
cv::Mat b = cv::cvarrToMat(imgB, false, true, 1);
if( b.channels() > 1 && CV_IS_IMAGE(imgB) && cvGetImageCOI((const IplImage*)imgB) > 0 )
cv::extractImageCOI(imgB, b);
return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);
}
2015-12-15 20:55:43 +08:00
namespace cv { namespace hal {
extern const uchar popCountTable[256] =
2015-12-15 20:55:43 +08:00
{
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
static const uchar popCountTable2[] =
{
0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
};
static const uchar popCountTable4[] =
{
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
int normHamming(const uchar* a, int n, int cellSize)
{
if( cellSize == 1 )
return normHamming(a, n);
const uchar* tab = 0;
if( cellSize == 2 )
tab = popCountTable2;
else if( cellSize == 4 )
tab = popCountTable4;
else
return -1;
int i = 0;
int result = 0;
#if CV_ENABLE_UNROLLED
for( ; i <= n - 4; i += 4 )
result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
#endif
for( ; i < n; i++ )
result += tab[a[i]];
return result;
}
int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
{
if( cellSize == 1 )
return normHamming(a, b, n);
const uchar* tab = 0;
if( cellSize == 2 )
tab = popCountTable2;
else if( cellSize == 4 )
tab = popCountTable4;
else
return -1;
int i = 0;
int result = 0;
#if CV_ENABLE_UNROLLED
2015-12-15 20:55:43 +08:00
for( ; i <= n - 4; i += 4 )
result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
#endif
2015-12-15 20:55:43 +08:00
for( ; i < n; i++ )
result += tab[a[i] ^ b[i]];
return result;
}
float normL2Sqr_(const float* a, const float* b, int n)
{
int j = 0; float d = 0.f;
2017-03-16 13:20:41 +08:00
#if CV_AVX2
float CV_DECL_ALIGNED(32) buf[8];
__m256 d0 = _mm256_setzero_ps();
for( ; j <= n - 8; j += 8 )
{
__m256 t0 = _mm256_sub_ps(_mm256_loadu_ps(a + j), _mm256_loadu_ps(b + j));
2017-09-26 22:23:54 +08:00
#if CV_FMA3
2017-03-16 13:20:41 +08:00
d0 = _mm256_fmadd_ps(t0, t0, d0);
#else
d0 = _mm256_add_ps(d0, _mm256_mul_ps(t0, t0));
#endif
}
_mm256_store_ps(buf, d0);
d = buf[0] + buf[1] + buf[2] + buf[3] + buf[4] + buf[5] + buf[6] + buf[7];
#elif CV_SSE
2015-12-15 20:55:43 +08:00
float CV_DECL_ALIGNED(16) buf[4];
__m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
for( ; j <= n - 8; j += 8 )
{
__m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
__m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0));
d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1));
}
_mm_store_ps(buf, _mm_add_ps(d0, d1));
d = buf[0] + buf[1] + buf[2] + buf[3];
#endif
{
for( ; j <= n - 4; j += 4 )
{
float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3];
d += t0*t0 + t1*t1 + t2*t2 + t3*t3;
}
}
for( ; j < n; j++ )
{
float t = a[j] - b[j];
d += t*t;
}
return d;
}
float normL1_(const float* a, const float* b, int n)
{
int j = 0; float d = 0.f;
#if CV_SSE
float CV_DECL_ALIGNED(16) buf[4];
static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
__m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
__m128 absmask = _mm_load_ps((const float*)absbuf);
for( ; j <= n - 8; j += 8 )
{
__m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
__m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask));
d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask));
}
_mm_store_ps(buf, _mm_add_ps(d0, d1));
d = buf[0] + buf[1] + buf[2] + buf[3];
#elif CV_NEON
float32x4_t v_sum = vdupq_n_f32(0.0f);
for ( ; j <= n - 4; j += 4)
v_sum = vaddq_f32(v_sum, vabdq_f32(vld1q_f32(a + j), vld1q_f32(b + j)));
float CV_DECL_ALIGNED(16) buf[4];
vst1q_f32(buf, v_sum);
d = buf[0] + buf[1] + buf[2] + buf[3];
#endif
{
for( ; j <= n - 4; j += 4 )
{
d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
}
}
for( ; j < n; j++ )
d += std::abs(a[j] - b[j]);
return d;
}
int normL1_(const uchar* a, const uchar* b, int n)
{
int j = 0, d = 0;
#if CV_SSE
__m128i d0 = _mm_setzero_si128();
for( ; j <= n - 16; j += 16 )
{
__m128i t0 = _mm_loadu_si128((const __m128i*)(a + j));
__m128i t1 = _mm_loadu_si128((const __m128i*)(b + j));
d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
}
for( ; j <= n - 4; j += 4 )
{
__m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j));
__m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j));
d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
}
d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0)));
#elif CV_NEON
uint32x4_t v_sum = vdupq_n_u32(0.0f);
for ( ; j <= n - 16; j += 16)
{
uint8x16_t v_dst = vabdq_u8(vld1q_u8(a + j), vld1q_u8(b + j));
uint16x8_t v_low = vmovl_u8(vget_low_u8(v_dst)), v_high = vmovl_u8(vget_high_u8(v_dst));
v_sum = vaddq_u32(v_sum, vaddl_u16(vget_low_u16(v_low), vget_low_u16(v_high)));
v_sum = vaddq_u32(v_sum, vaddl_u16(vget_high_u16(v_low), vget_high_u16(v_high)));
}
uint CV_DECL_ALIGNED(16) buf[4];
vst1q_u32(buf, v_sum);
d = buf[0] + buf[1] + buf[2] + buf[3];
#endif
{
for( ; j <= n - 4; j += 4 )
{
d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
}
}
for( ; j < n; j++ )
d += std::abs(a[j] - b[j]);
return d;
}
}} //cv::hal