mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 22:44:02 +08:00
core: rework code locality
- to reduce binaries size of FFmpeg Windows wrapper - MinGW linker doesn't support -ffunction-sections (used for FFmpeg Windows wrapper) - move code to improve locality with its used dependencies - move UMat::dot() to matmul.dispatch.cpp (Mat::dot() is already there) - move UMat::inv() to lapack.cpp - move UMat::mul() to arithm.cpp - move UMat:eye() to matrix_operations.cpp (near setIdentity() implementation) - move normalize(): convert_scale.cpp => norm.cpp - move convertAndUnrollScalar(): arithm.cpp => copy.cpp - move scalarToRawData(): array.cpp => copy.cpp - move transpose(): matrix_operations.cpp => matrix_transform.cpp - move flip(), rotate(): copy.cpp => matrix_transform.cpp (rotate90 uses flip and transpose) - add 'OPENCV_CORE_EXCLUDE_C_API' CMake variable to exclude compilation of C-API functions from the core module - matrix_wrap.cpp: add compile-time checks for CUDA/OpenGL calls - the steps above allow to reduce FFmpeg wrapper size for ~1.5Mb (initial size of OpenCV part is about 3Mb)
This commit is contained in:
parent
7bcb51eded
commit
65eb946756
@ -145,6 +145,10 @@ ocv_target_link_libraries(${the_module} PRIVATE
|
||||
"${OPENCV_HAL_LINKER_LIBS}"
|
||||
)
|
||||
|
||||
if(OPENCV_CORE_EXCLUDE_C_API)
|
||||
ocv_target_compile_definitions(${the_module} PRIVATE "OPENCV_EXCLUDE_C_API=1")
|
||||
endif()
|
||||
|
||||
if(HAVE_HPX)
|
||||
ocv_target_link_libraries(${the_module} LINK_PRIVATE "${HPX_LIBRARIES}")
|
||||
endif()
|
||||
|
@ -57,26 +57,6 @@ namespace cv
|
||||
* logical operations *
|
||||
\****************************************************************************************/
|
||||
|
||||
void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize )
|
||||
{
|
||||
int scn = (int)sc.total(), cn = CV_MAT_CN(buftype);
|
||||
size_t esz = CV_ELEM_SIZE(buftype);
|
||||
BinaryFunc cvtFn = getConvertFunc(sc.depth(), buftype);
|
||||
CV_Assert(cvtFn);
|
||||
cvtFn(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0);
|
||||
// unroll the scalar
|
||||
if( scn < cn )
|
||||
{
|
||||
CV_Assert( scn == 1 );
|
||||
size_t esz1 = CV_ELEM_SIZE1(buftype);
|
||||
for( size_t i = esz1; i < esz; i++ )
|
||||
scbuf[i] = scbuf[i - esz1];
|
||||
}
|
||||
for( size_t i = esz; i < blocksize*esz; i++ )
|
||||
scbuf[i] = scbuf[i - esz];
|
||||
}
|
||||
|
||||
|
||||
enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4,
|
||||
OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8,
|
||||
OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14,
|
||||
@ -1002,9 +982,7 @@ static BinaryFuncC* getRecipTab()
|
||||
return recipTab;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void cv::multiply(InputArray src1, InputArray src2,
|
||||
void multiply(InputArray src1, InputArray src2,
|
||||
OutputArray dst, double scale, int dtype)
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
@ -1013,7 +991,7 @@ void cv::multiply(InputArray src1, InputArray src2,
|
||||
true, &scale, std::abs(scale - 1.0) < DBL_EPSILON ? OCL_OP_MUL : OCL_OP_MUL_SCALE);
|
||||
}
|
||||
|
||||
void cv::divide(InputArray src1, InputArray src2,
|
||||
void divide(InputArray src1, InputArray src2,
|
||||
OutputArray dst, double scale, int dtype)
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
@ -1021,7 +999,7 @@ void cv::divide(InputArray src1, InputArray src2,
|
||||
arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE);
|
||||
}
|
||||
|
||||
void cv::divide(double scale, InputArray src2,
|
||||
void divide(double scale, InputArray src2,
|
||||
OutputArray dst, int dtype)
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
@ -1029,13 +1007,17 @@ void cv::divide(double scale, InputArray src2,
|
||||
arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE);
|
||||
}
|
||||
|
||||
UMat UMat::mul(InputArray m, double scale) const
|
||||
{
|
||||
UMat dst;
|
||||
multiply(*this, m, dst, scale);
|
||||
return dst;
|
||||
}
|
||||
|
||||
/****************************************************************************************\
|
||||
* addWeighted *
|
||||
\****************************************************************************************/
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
static BinaryFuncC* getAddWeightedTab()
|
||||
{
|
||||
static BinaryFuncC addWeightedTab[] =
|
||||
@ -1849,6 +1831,9 @@ void cv::inRange(InputArray _src, InputArray _lowerb,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifndef OPENCV_EXCLUDE_C_API
|
||||
|
||||
/****************************************************************************************\
|
||||
* Earlier API: cvAdd etc. *
|
||||
\****************************************************************************************/
|
||||
@ -2111,4 +2096,5 @@ cvMaxS( const void* srcarr1, double value, void* dstarr )
|
||||
cv::max( src1, value, dst );
|
||||
}
|
||||
|
||||
#endif // OPENCV_EXCLUDE_C_API
|
||||
/* End of file. */
|
||||
|
@ -48,6 +48,8 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifndef OPENCV_EXCLUDE_C_API
|
||||
|
||||
#define CV_ORIGIN_TL 0
|
||||
#define CV_ORIGIN_BL 1
|
||||
|
||||
@ -3211,53 +3213,6 @@ void DefaultDeleter<CvMatND>::operator ()(CvMatND* obj) const { cvReleaseMatND(&
|
||||
void DefaultDeleter<CvSparseMat>::operator ()(CvSparseMat* obj) const { cvReleaseSparseMat(&obj); }
|
||||
void DefaultDeleter<CvMemStorage>::operator ()(CvMemStorage* obj) const { cvReleaseMemStorage(&obj); }
|
||||
|
||||
template <typename T> static inline
|
||||
void scalarToRawData_(const Scalar& s, T * const buf, const int cn, const int unroll_to)
|
||||
{
|
||||
int i = 0;
|
||||
for(; i < cn; i++)
|
||||
buf[i] = saturate_cast<T>(s.val[i]);
|
||||
for(; i < unroll_to; i++)
|
||||
buf[i] = buf[i-cn];
|
||||
}
|
||||
|
||||
void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
const int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
CV_Assert(cn <= 4);
|
||||
switch(depth)
|
||||
{
|
||||
case CV_8U:
|
||||
scalarToRawData_<uchar>(s, (uchar*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_8S:
|
||||
scalarToRawData_<schar>(s, (schar*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_16U:
|
||||
scalarToRawData_<ushort>(s, (ushort*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_16S:
|
||||
scalarToRawData_<short>(s, (short*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_32S:
|
||||
scalarToRawData_<int>(s, (int*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_32F:
|
||||
scalarToRawData_<float>(s, (float*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_64F:
|
||||
scalarToRawData_<double>(s, (double*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_16F:
|
||||
scalarToRawData_<float16_t>(s, (float16_t*)_buf, cn, unroll_to);
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsUnsupportedFormat,"");
|
||||
}
|
||||
}
|
||||
|
||||
} // cv::
|
||||
|
||||
|
||||
@ -3295,4 +3250,5 @@ void* cvClone( const void* struct_ptr )
|
||||
}
|
||||
|
||||
|
||||
#endif // OPENCV_EXCLUDE_C_API
|
||||
/* End of file. */
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifndef OPENCV_EXCLUDE_C_API
|
||||
|
||||
CV_IMPL void
|
||||
cvSplit( const void* srcarr, void* dstarr0, void* dstarr1, void* dstarr2, void* dstarr3 )
|
||||
@ -132,3 +133,5 @@ CV_IMPL void cvNormalize( const CvArr* srcarr, CvArr* dstarr,
|
||||
CV_Assert( dst.size() == src.size() && src.channels() == dst.channels() );
|
||||
cv::normalize( src, dst, a, b, norm_type, dst.type(), mask );
|
||||
}
|
||||
|
||||
#endif // OPENCV_EXCLUDE_C_API
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include "convert_scale.simd.hpp"
|
||||
#include "convert_scale.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
|
||||
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
@ -117,143 +116,4 @@ void convertScaleAbs(InputArray _src, OutputArray _dst, double alpha, double bet
|
||||
}
|
||||
}
|
||||
|
||||
//==================================================================================================
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype,
|
||||
double scale, double delta )
|
||||
{
|
||||
UMat src = _src.getUMat();
|
||||
|
||||
if( _mask.empty() )
|
||||
src.convertTo( _dst, dtype, scale, delta );
|
||||
else if (src.channels() <= 4)
|
||||
{
|
||||
const ocl::Device & dev = ocl::Device::getDefault();
|
||||
|
||||
int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype),
|
||||
ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, std::max(sdepth, ddepth)),
|
||||
rowsPerWI = dev.isIntel() ? 4 : 1;
|
||||
|
||||
float fscale = static_cast<float>(scale), fdelta = static_cast<float>(delta);
|
||||
bool haveScale = std::fabs(scale - 1) > DBL_EPSILON,
|
||||
haveZeroScale = !(std::fabs(scale) > DBL_EPSILON),
|
||||
haveDelta = std::fabs(delta) > DBL_EPSILON,
|
||||
doubleSupport = dev.doubleFPConfig() > 0;
|
||||
|
||||
if (!haveScale && !haveDelta && stype == dtype)
|
||||
{
|
||||
_src.copyTo(_dst, _mask);
|
||||
return true;
|
||||
}
|
||||
if (haveZeroScale)
|
||||
{
|
||||
_dst.setTo(Scalar(delta), _mask);
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport)
|
||||
return false;
|
||||
|
||||
char cvt[2][40];
|
||||
String opts = format("-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d"
|
||||
" -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s",
|
||||
ocl::typeToStr(stype), ocl::typeToStr(dtype),
|
||||
ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), cn,
|
||||
rowsPerWI, ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]),
|
||||
ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
||||
haveScale ? " -D HAVE_SCALE" : "",
|
||||
haveDelta ? " -D HAVE_DELTA" : "",
|
||||
ocl::typeToStr(sdepth), ocl::typeToStr(ddepth));
|
||||
|
||||
ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts);
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
UMat mask = _mask.getUMat(), dst = _dst.getUMat();
|
||||
|
||||
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
|
||||
maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
|
||||
dstarg = ocl::KernelArg::ReadWrite(dst);
|
||||
|
||||
if (haveScale)
|
||||
{
|
||||
if (haveDelta)
|
||||
k.args(srcarg, maskarg, dstarg, fscale, fdelta);
|
||||
else
|
||||
k.args(srcarg, maskarg, dstarg, fscale);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (haveDelta)
|
||||
k.args(srcarg, maskarg, dstarg, fdelta);
|
||||
else
|
||||
k.args(srcarg, maskarg, dstarg);
|
||||
}
|
||||
|
||||
size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
UMat temp;
|
||||
src.convertTo( temp, dtype, scale, delta );
|
||||
temp.copyTo( _dst, _mask );
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void normalize(InputArray _src, InputOutputArray _dst, double a, double b,
|
||||
int norm_type, int rtype, InputArray _mask)
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
double scale = 1, shift = 0;
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type);
|
||||
|
||||
if( rtype < 0 )
|
||||
rtype = _dst.fixedType() ? _dst.depth() : depth;
|
||||
|
||||
if( norm_type == CV_MINMAX )
|
||||
{
|
||||
double smin = 0, smax = 0;
|
||||
double dmin = MIN( a, b ), dmax = MAX( a, b );
|
||||
minMaxIdx( _src, &smin, &smax, 0, 0, _mask );
|
||||
scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0);
|
||||
if( rtype == CV_32F )
|
||||
{
|
||||
scale = (float)scale;
|
||||
shift = (float)dmin - (float)(smin*scale);
|
||||
}
|
||||
else
|
||||
shift = dmin - smin*scale;
|
||||
}
|
||||
else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C )
|
||||
{
|
||||
scale = norm( _src, norm_type, _mask );
|
||||
scale = scale > DBL_EPSILON ? a/scale : 0.;
|
||||
shift = 0;
|
||||
}
|
||||
else
|
||||
CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" );
|
||||
|
||||
CV_OCL_RUN(_dst.isUMat(),
|
||||
ocl_normalize(_src, _dst, _mask, rtype, scale, shift))
|
||||
|
||||
Mat src = _src.getMat();
|
||||
if( _mask.empty() )
|
||||
src.convertTo( _dst, rtype, scale, shift );
|
||||
else
|
||||
{
|
||||
Mat temp;
|
||||
src.convertTo( temp, rtype, scale, shift );
|
||||
temp.copyTo( _dst, _mask );
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -53,6 +53,72 @@
|
||||
namespace cv
|
||||
{
|
||||
|
||||
template <typename T> static inline
|
||||
void scalarToRawData_(const Scalar& s, T * const buf, const int cn, const int unroll_to)
|
||||
{
|
||||
int i = 0;
|
||||
for(; i < cn; i++)
|
||||
buf[i] = saturate_cast<T>(s.val[i]);
|
||||
for(; i < unroll_to; i++)
|
||||
buf[i] = buf[i-cn];
|
||||
}
|
||||
|
||||
void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
const int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
CV_Assert(cn <= 4);
|
||||
switch(depth)
|
||||
{
|
||||
case CV_8U:
|
||||
scalarToRawData_<uchar>(s, (uchar*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_8S:
|
||||
scalarToRawData_<schar>(s, (schar*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_16U:
|
||||
scalarToRawData_<ushort>(s, (ushort*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_16S:
|
||||
scalarToRawData_<short>(s, (short*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_32S:
|
||||
scalarToRawData_<int>(s, (int*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_32F:
|
||||
scalarToRawData_<float>(s, (float*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_64F:
|
||||
scalarToRawData_<double>(s, (double*)_buf, cn, unroll_to);
|
||||
break;
|
||||
case CV_16F:
|
||||
scalarToRawData_<float16_t>(s, (float16_t*)_buf, cn, unroll_to);
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsUnsupportedFormat,"");
|
||||
}
|
||||
}
|
||||
|
||||
void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize )
|
||||
{
|
||||
int scn = (int)sc.total(), cn = CV_MAT_CN(buftype);
|
||||
size_t esz = CV_ELEM_SIZE(buftype);
|
||||
BinaryFunc cvtFn = getConvertFunc(sc.depth(), buftype);
|
||||
CV_Assert(cvtFn);
|
||||
cvtFn(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0);
|
||||
// unroll the scalar
|
||||
if( scn < cn )
|
||||
{
|
||||
CV_Assert( scn == 1 );
|
||||
size_t esz1 = CV_ELEM_SIZE1(buftype);
|
||||
for( size_t i = esz1; i < esz; i++ )
|
||||
scbuf[i] = scbuf[i - esz1];
|
||||
}
|
||||
for( size_t i = esz; i < blocksize*esz; i++ )
|
||||
scbuf[i] = scbuf[i - esz];
|
||||
}
|
||||
|
||||
template<typename T> static void
|
||||
copyMask_(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, uchar* _dst, size_t dstep, Size size)
|
||||
{
|
||||
@ -594,490 +660,6 @@ Mat& Mat::setTo(InputArray _value, InputArray _mask)
|
||||
return *this;
|
||||
}
|
||||
|
||||
#if CV_SIMD128
|
||||
template<typename V> CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
|
||||
{
|
||||
typedef typename V::lane_type T;
|
||||
int end = (int)(size.width*esz);
|
||||
int width = (end + 1)/2;
|
||||
int width_1 = width & -v_uint8x16::nlanes;
|
||||
int i, j;
|
||||
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
CV_Assert(isAligned<sizeof(T)>(src, dst));
|
||||
#endif
|
||||
|
||||
for( ; size.height--; src += sstep, dst += dstep )
|
||||
{
|
||||
for( i = 0, j = end; i < width_1; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes )
|
||||
{
|
||||
V t0, t1;
|
||||
|
||||
t0 = v_load((T*)((uchar*)src + i));
|
||||
t1 = v_load((T*)((uchar*)src + j - v_uint8x16::nlanes));
|
||||
t0 = v_reverse(t0);
|
||||
t1 = v_reverse(t1);
|
||||
v_store((T*)(dst + j - v_uint8x16::nlanes), t0);
|
||||
v_store((T*)(dst + i), t1);
|
||||
}
|
||||
if (isAligned<sizeof(T)>(src, dst))
|
||||
{
|
||||
for ( ; i < width; i += sizeof(T), j -= sizeof(T) )
|
||||
{
|
||||
T t0, t1;
|
||||
|
||||
t0 = *((T*)((uchar*)src + i));
|
||||
t1 = *((T*)((uchar*)src + j - sizeof(T)));
|
||||
*((T*)(dst + j - sizeof(T))) = t0;
|
||||
*((T*)(dst + i)) = t1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for ( ; i < width; i += sizeof(T), j -= sizeof(T) )
|
||||
{
|
||||
for (int k = 0; k < (int)sizeof(T); k++)
|
||||
{
|
||||
uchar t0, t1;
|
||||
|
||||
t0 = *((uchar*)src + i + k);
|
||||
t1 = *((uchar*)src + j + k - sizeof(T));
|
||||
*(dst + j + k - sizeof(T)) = t0;
|
||||
*(dst + i + k) = t1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T1, typename T2> CV_ALWAYS_INLINE void flipHoriz_double( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
|
||||
{
|
||||
int end = (int)(size.width*esz);
|
||||
int width = (end + 1)/2;
|
||||
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
CV_Assert(isAligned<sizeof(T1)>(src, dst));
|
||||
CV_Assert(isAligned<sizeof(T2)>(src, dst));
|
||||
#endif
|
||||
|
||||
for( ; size.height--; src += sstep, dst += dstep )
|
||||
{
|
||||
for ( int i = 0, j = end; i < width; i += sizeof(T1) + sizeof(T2), j -= sizeof(T1) + sizeof(T2) )
|
||||
{
|
||||
T1 t0, t1;
|
||||
T2 t2, t3;
|
||||
|
||||
t0 = *((T1*)((uchar*)src + i));
|
||||
t2 = *((T2*)((uchar*)src + i + sizeof(T1)));
|
||||
t1 = *((T1*)((uchar*)src + j - sizeof(T1) - sizeof(T2)));
|
||||
t3 = *((T2*)((uchar*)src + j - sizeof(T2)));
|
||||
*((T1*)(dst + j - sizeof(T1) - sizeof(T2))) = t0;
|
||||
*((T2*)(dst + j - sizeof(T2))) = t2;
|
||||
*((T1*)(dst + i)) = t1;
|
||||
*((T2*)(dst + i + sizeof(T1))) = t3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
|
||||
{
|
||||
#if CV_SIMD
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep;
|
||||
#endif
|
||||
if (esz == 2 * v_uint8x16::nlanes)
|
||||
{
|
||||
int end = (int)(size.width*esz);
|
||||
int width = end/2;
|
||||
|
||||
for( ; size.height--; src += sstep, dst += dstep )
|
||||
{
|
||||
for( int i = 0, j = end - 2 * v_uint8x16::nlanes; i < width; i += 2 * v_uint8x16::nlanes, j -= 2 * v_uint8x16::nlanes )
|
||||
{
|
||||
#if CV_SIMD256
|
||||
v_uint8x32 t0, t1;
|
||||
|
||||
t0 = v256_load((uchar*)src + i);
|
||||
t1 = v256_load((uchar*)src + j);
|
||||
v_store(dst + j, t0);
|
||||
v_store(dst + i, t1);
|
||||
#else
|
||||
v_uint8x16 t0, t1, t2, t3;
|
||||
|
||||
t0 = v_load((uchar*)src + i);
|
||||
t1 = v_load((uchar*)src + i + v_uint8x16::nlanes);
|
||||
t2 = v_load((uchar*)src + j);
|
||||
t3 = v_load((uchar*)src + j + v_uint8x16::nlanes);
|
||||
v_store(dst + j, t0);
|
||||
v_store(dst + j + v_uint8x16::nlanes, t1);
|
||||
v_store(dst + i, t2);
|
||||
v_store(dst + i + v_uint8x16::nlanes, t3);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (esz == v_uint8x16::nlanes)
|
||||
{
|
||||
int end = (int)(size.width*esz);
|
||||
int width = end/2;
|
||||
|
||||
for( ; size.height--; src += sstep, dst += dstep )
|
||||
{
|
||||
for( int i = 0, j = end - v_uint8x16::nlanes; i < width; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes )
|
||||
{
|
||||
v_uint8x16 t0, t1;
|
||||
|
||||
t0 = v_load((uchar*)src + i);
|
||||
t1 = v_load((uchar*)src + j);
|
||||
v_store(dst + j, t0);
|
||||
v_store(dst + i, t1);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (esz == 8
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
&& isAligned<sizeof(uint64)>(alignmentMark)
|
||||
#endif
|
||||
)
|
||||
{
|
||||
flipHoriz_single<v_uint64x2>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
else if (esz == 4
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
&& isAligned<sizeof(unsigned)>(alignmentMark)
|
||||
#endif
|
||||
)
|
||||
{
|
||||
flipHoriz_single<v_uint32x4>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
else if (esz == 2
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
&& isAligned<sizeof(ushort)>(alignmentMark)
|
||||
#endif
|
||||
)
|
||||
{
|
||||
flipHoriz_single<v_uint16x8>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
else if (esz == 1)
|
||||
{
|
||||
flipHoriz_single<v_uint8x16>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
else if (esz == 24
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
&& isAligned<sizeof(uint64_t)>(alignmentMark)
|
||||
#endif
|
||||
)
|
||||
{
|
||||
int end = (int)(size.width*esz);
|
||||
int width = (end + 1)/2;
|
||||
|
||||
for( ; size.height--; src += sstep, dst += dstep )
|
||||
{
|
||||
for ( int i = 0, j = end; i < width; i += v_uint8x16::nlanes + sizeof(uint64_t), j -= v_uint8x16::nlanes + sizeof(uint64_t) )
|
||||
{
|
||||
v_uint8x16 t0, t1;
|
||||
uint64_t t2, t3;
|
||||
|
||||
t0 = v_load((uchar*)src + i);
|
||||
t2 = *((uint64_t*)((uchar*)src + i + v_uint8x16::nlanes));
|
||||
t1 = v_load((uchar*)src + j - v_uint8x16::nlanes - sizeof(uint64_t));
|
||||
t3 = *((uint64_t*)((uchar*)src + j - sizeof(uint64_t)));
|
||||
v_store(dst + j - v_uint8x16::nlanes - sizeof(uint64_t), t0);
|
||||
*((uint64_t*)(dst + j - sizeof(uint64_t))) = t2;
|
||||
v_store(dst + i, t1);
|
||||
*((uint64_t*)(dst + i + v_uint8x16::nlanes)) = t3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#if !CV_STRONG_ALIGNMENT
|
||||
else if (esz == 12)
|
||||
{
|
||||
flipHoriz_double<uint64_t,uint>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
else if (esz == 6)
|
||||
{
|
||||
flipHoriz_double<uint,ushort>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
else if (esz == 3)
|
||||
{
|
||||
flipHoriz_double<ushort,uchar>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
#endif
|
||||
else
|
||||
#endif // CV_SIMD
|
||||
{
|
||||
int i, j, limit = (int)(((size.width + 1)/2)*esz);
|
||||
AutoBuffer<int> _tab(size.width*esz);
|
||||
int* tab = _tab.data();
|
||||
|
||||
for( i = 0; i < size.width; i++ )
|
||||
for( size_t k = 0; k < esz; k++ )
|
||||
tab[i*esz + k] = (int)((size.width - i - 1)*esz + k);
|
||||
|
||||
for( ; size.height--; src += sstep, dst += dstep )
|
||||
{
|
||||
for( i = 0; i < limit; i++ )
|
||||
{
|
||||
j = tab[i];
|
||||
uchar t0 = src[i], t1 = src[j];
|
||||
dst[i] = t1; dst[j] = t0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, size_t esz )
|
||||
{
|
||||
const uchar* src1 = src0 + (size.height - 1)*sstep;
|
||||
uchar* dst1 = dst0 + (size.height - 1)*dstep;
|
||||
size.width *= (int)esz;
|
||||
|
||||
for( int y = 0; y < (size.height + 1)/2; y++, src0 += sstep, src1 -= sstep,
|
||||
dst0 += dstep, dst1 -= dstep )
|
||||
{
|
||||
int i = 0;
|
||||
#if CV_SIMD
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
if (isAligned<sizeof(int)>(src0, src1, dst0, dst1))
|
||||
#endif
|
||||
{
|
||||
for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
|
||||
{
|
||||
v_int32 t0 = vx_load((int*)(src0 + i));
|
||||
v_int32 t1 = vx_load((int*)(src1 + i));
|
||||
vx_store((int*)(dst0 + i), t1);
|
||||
vx_store((int*)(dst1 + i), t0);
|
||||
}
|
||||
}
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
else
|
||||
{
|
||||
for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
|
||||
{
|
||||
v_uint8 t0 = vx_load(src0 + i);
|
||||
v_uint8 t1 = vx_load(src1 + i);
|
||||
vx_store(dst0 + i, t1);
|
||||
vx_store(dst1 + i, t0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (isAligned<sizeof(int)>(src0, src1, dst0, dst1))
|
||||
{
|
||||
for( ; i <= size.width - 16; i += 16 )
|
||||
{
|
||||
int t0 = ((int*)(src0 + i))[0];
|
||||
int t1 = ((int*)(src1 + i))[0];
|
||||
|
||||
((int*)(dst0 + i))[0] = t1;
|
||||
((int*)(dst1 + i))[0] = t0;
|
||||
|
||||
t0 = ((int*)(src0 + i))[1];
|
||||
t1 = ((int*)(src1 + i))[1];
|
||||
|
||||
((int*)(dst0 + i))[1] = t1;
|
||||
((int*)(dst1 + i))[1] = t0;
|
||||
|
||||
t0 = ((int*)(src0 + i))[2];
|
||||
t1 = ((int*)(src1 + i))[2];
|
||||
|
||||
((int*)(dst0 + i))[2] = t1;
|
||||
((int*)(dst1 + i))[2] = t0;
|
||||
|
||||
t0 = ((int*)(src0 + i))[3];
|
||||
t1 = ((int*)(src1 + i))[3];
|
||||
|
||||
((int*)(dst0 + i))[3] = t1;
|
||||
((int*)(dst1 + i))[3] = t0;
|
||||
}
|
||||
|
||||
for( ; i <= size.width - 4; i += 4 )
|
||||
{
|
||||
int t0 = ((int*)(src0 + i))[0];
|
||||
int t1 = ((int*)(src1 + i))[0];
|
||||
|
||||
((int*)(dst0 + i))[0] = t1;
|
||||
((int*)(dst1 + i))[0] = t0;
|
||||
}
|
||||
}
|
||||
|
||||
for( ; i < size.width; i++ )
|
||||
{
|
||||
uchar t0 = src0[i];
|
||||
uchar t1 = src1[i];
|
||||
|
||||
dst0[i] = t1;
|
||||
dst1[i] = t0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS };
|
||||
|
||||
static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
|
||||
{
|
||||
CV_Assert(flipCode >= -1 && flipCode <= 1);
|
||||
|
||||
const ocl::Device & dev = ocl::Device::getDefault();
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
|
||||
flipType, kercn = std::min(ocl::predictOptimalVectorWidth(_src, _dst), 4);
|
||||
|
||||
bool doubleSupport = dev.doubleFPConfig() > 0;
|
||||
if (!doubleSupport && depth == CV_64F)
|
||||
kercn = cn;
|
||||
|
||||
if (cn > 4)
|
||||
return false;
|
||||
|
||||
const char * kernelName;
|
||||
if (flipCode == 0)
|
||||
kernelName = "arithm_flip_rows", flipType = FLIP_ROWS;
|
||||
else if (flipCode > 0)
|
||||
kernelName = "arithm_flip_cols", flipType = FLIP_COLS;
|
||||
else
|
||||
kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH;
|
||||
|
||||
int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
|
||||
kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn;
|
||||
|
||||
ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
|
||||
format( "-D T=%s -D T1=%s -D DEPTH=%d -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d",
|
||||
kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)),
|
||||
kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), depth, cn, pxPerWIy, kercn));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
Size size = _src.size();
|
||||
_dst.create(size, type);
|
||||
UMat src = _src.getUMat(), dst = _dst.getUMat();
|
||||
|
||||
int cols = size.width * cn / kercn, rows = size.height;
|
||||
cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols;
|
||||
rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows;
|
||||
|
||||
k.args(ocl::KernelArg::ReadOnlyNoSize(src),
|
||||
ocl::KernelArg::WriteOnly(dst, cn, kercn), rows, cols);
|
||||
|
||||
size_t maxWorkGroupSize = dev.maxWorkGroupSize();
|
||||
CV_Assert(maxWorkGroupSize % 4 == 0);
|
||||
|
||||
size_t globalsize[2] = { (size_t)cols, ((size_t)rows + pxPerWIy - 1) / pxPerWIy },
|
||||
localsize[2] = { maxWorkGroupSize / 4, 4 };
|
||||
return k.run(2, globalsize, (flipType == FLIP_COLS) && !dev.isIntel() ? localsize : NULL, false);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined HAVE_IPP
|
||||
static bool ipp_flip(Mat &src, Mat &dst, int flip_mode)
|
||||
{
|
||||
#ifdef HAVE_IPP_IW
|
||||
CV_INSTRUMENT_REGION_IPP();
|
||||
|
||||
// Details: https://github.com/opencv/opencv/issues/12943
|
||||
if (flip_mode <= 0 /* swap rows */
|
||||
&& cv::ipp::getIppTopFeatures() != ippCPUID_SSE42
|
||||
&& (int64_t)(src.total()) * src.elemSize() >= CV_BIG_INT(0x80000000)/*2Gb*/
|
||||
)
|
||||
return false;
|
||||
|
||||
IppiAxis ippMode;
|
||||
if(flip_mode < 0)
|
||||
ippMode = ippAxsBoth;
|
||||
else if(flip_mode == 0)
|
||||
ippMode = ippAxsHorizontal;
|
||||
else
|
||||
ippMode = ippAxsVertical;
|
||||
|
||||
try
|
||||
{
|
||||
::ipp::IwiImage iwSrc = ippiGetImage(src);
|
||||
::ipp::IwiImage iwDst = ippiGetImage(dst);
|
||||
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode);
|
||||
}
|
||||
catch(const ::ipp::IwException &)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
#else
|
||||
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(flip_mode);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void flip( InputArray _src, OutputArray _dst, int flip_mode )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
CV_Assert( _src.dims() <= 2 );
|
||||
Size size = _src.size();
|
||||
|
||||
if (flip_mode < 0)
|
||||
{
|
||||
if (size.width == 1)
|
||||
flip_mode = 0;
|
||||
if (size.height == 1)
|
||||
flip_mode = 1;
|
||||
}
|
||||
|
||||
if ((size.width == 1 && flip_mode > 0) ||
|
||||
(size.height == 1 && flip_mode == 0))
|
||||
{
|
||||
return _src.copyTo(_dst);
|
||||
}
|
||||
|
||||
CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode))
|
||||
|
||||
Mat src = _src.getMat();
|
||||
int type = src.type();
|
||||
_dst.create( size, type );
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
CV_IPP_RUN_FAST(ipp_flip(src, dst, flip_mode));
|
||||
|
||||
size_t esz = CV_ELEM_SIZE(type);
|
||||
|
||||
if( flip_mode <= 0 )
|
||||
flipVert( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz );
|
||||
else
|
||||
flipHoriz( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz );
|
||||
|
||||
if( flip_mode < 0 )
|
||||
flipHoriz( dst.ptr(), dst.step, dst.ptr(), dst.step, dst.size(), esz );
|
||||
}
|
||||
|
||||
void rotate(InputArray _src, OutputArray _dst, int rotateMode)
|
||||
{
|
||||
CV_Assert(_src.dims() <= 2);
|
||||
|
||||
switch (rotateMode)
|
||||
{
|
||||
case ROTATE_90_CLOCKWISE:
|
||||
transpose(_src, _dst);
|
||||
flip(_dst, _dst, 1);
|
||||
break;
|
||||
case ROTATE_180:
|
||||
flip(_src, _dst, -1);
|
||||
break;
|
||||
case ROTATE_90_COUNTERCLOCKWISE:
|
||||
transpose(_src, _dst);
|
||||
flip(_dst, _dst, 0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined HAVE_OPENCL && !defined __APPLE__
|
||||
|
||||
@ -1499,6 +1081,9 @@ void cv::copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifndef OPENCV_EXCLUDE_C_API
|
||||
|
||||
/* dst = src */
|
||||
CV_IMPL void
|
||||
cvCopy( const void* srcarr, void* dstarr, const void* maskarr )
|
||||
@ -1614,4 +1199,5 @@ cvRepeat( const CvArr* srcarr, CvArr* dstarr )
|
||||
cv::repeat(src, dst.rows/src.rows, dst.cols/src.cols, dst);
|
||||
}
|
||||
|
||||
#endif // OPENCV_EXCLUDE_C_API
|
||||
/* End of file. */
|
||||
|
@ -40,6 +40,8 @@
|
||||
//M*/
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifndef OPENCV_EXCLUDE_C_API
|
||||
|
||||
/* default alignment for dynamic data strucutures, resided in storages. */
|
||||
#define CV_STRUCT_ALIGN ((int)sizeof(double))
|
||||
|
||||
@ -3585,4 +3587,5 @@ void seqInsertSlice( CvSeq* seq, int before_index, const CvArr* from_arr )
|
||||
|
||||
}
|
||||
|
||||
#endif // OPENCV_EXCLUDE_C_API
|
||||
/* End of file. */
|
||||
|
@ -4640,6 +4640,9 @@ int cv::getOptimalDFTSize( int size0 )
|
||||
return optimalDFTSizeTab[b];
|
||||
}
|
||||
|
||||
|
||||
#ifndef OPENCV_EXCLUDE_C_API
|
||||
|
||||
CV_IMPL void
|
||||
cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows )
|
||||
{
|
||||
@ -4695,4 +4698,5 @@ cvGetOptimalDFTSize( int size0 )
|
||||
return cv::getOptimalDFTSize(size0);
|
||||
}
|
||||
|
||||
#endif // OPENCV_EXCLUDE_C_API
|
||||
/* End of file. */
|
||||
|
@ -753,8 +753,6 @@ SVBkSb( int m, int n, const double* w, size_t wstep,
|
||||
(double*)alignPtr(buffer, sizeof(double)), DBL_EPSILON*2 );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/****************************************************************************************\
|
||||
* Determinant of the matrix *
|
||||
\****************************************************************************************/
|
||||
@ -764,7 +762,7 @@ SVBkSb( int m, int n, const double* w, size_t wstep,
|
||||
m(0,1)*((double)m(1,0)*m(2,2) - (double)m(1,2)*m(2,0)) + \
|
||||
m(0,2)*((double)m(1,0)*m(2,1) - (double)m(1,1)*m(2,0)))
|
||||
|
||||
double cv::determinant( InputArray _mat )
|
||||
double determinant( InputArray _mat )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
@ -842,7 +840,7 @@ double cv::determinant( InputArray _mat )
|
||||
#define Df( y, x ) ((float*)(dstdata + y*dststep))[x]
|
||||
#define Dd( y, x ) ((double*)(dstdata + y*dststep))[x]
|
||||
|
||||
double cv::invert( InputArray _src, OutputArray _dst, int method )
|
||||
double invert( InputArray _src, OutputArray _dst, int method )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
@ -1069,13 +1067,19 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
|
||||
return result;
|
||||
}
|
||||
|
||||
UMat UMat::inv(int method) const
|
||||
{
|
||||
UMat m;
|
||||
invert(*this, m, method);
|
||||
return m;
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************************\
|
||||
* Solving a linear system *
|
||||
\****************************************************************************************/
|
||||
|
||||
bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method )
|
||||
bool solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
@ -1374,7 +1378,7 @@ bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int meth
|
||||
|
||||
/////////////////// finding eigenvalues and eigenvectors of a symmetric matrix ///////////////
|
||||
|
||||
bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects )
|
||||
bool eigen( InputArray _src, OutputArray _evals, OutputArray _evects )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
@ -1396,7 +1400,7 @@ bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects )
|
||||
const bool evecNeeded = _evects.needed();
|
||||
const int esOptions = evecNeeded ? Eigen::ComputeEigenvectors : Eigen::EigenvaluesOnly;
|
||||
_evals.create(n, 1, type);
|
||||
cv::Mat evals = _evals.getMat();
|
||||
Mat evals = _evals.getMat();
|
||||
if ( type == CV_64F )
|
||||
{
|
||||
Eigen::MatrixXd src_eig, zeros_eig;
|
||||
@ -1448,9 +1452,6 @@ bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects )
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
static void _SVDcompute( InputArray _aarr, OutputArray _w,
|
||||
OutputArray _u, OutputArray _vt, int flags )
|
||||
{
|
||||
@ -1598,6 +1599,9 @@ void cv::SVBackSubst(InputArray w, InputArray u, InputArray vt, InputArray rhs,
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifndef OPENCV_EXCLUDE_C_API
|
||||
|
||||
CV_IMPL double
|
||||
cvDet( const CvArr* arr )
|
||||
{
|
||||
@ -1789,3 +1793,4 @@ cvSVBkSb( const CvArr* warr, const CvArr* uarr,
|
||||
cv::SVD::backSubst(w, u, v, rhs, dst);
|
||||
CV_Assert( dst.data == dst0.data );
|
||||
}
|
||||
#endif // OPENCV_EXCLUDE_C_API
|
||||
|
@ -1638,6 +1638,9 @@ void patchNaNs( InputOutputArray _a, double _val )
|
||||
|
||||
}
|
||||
|
||||
|
||||
#ifndef OPENCV_EXCLUDE_C_API
|
||||
|
||||
CV_IMPL float cvCbrt(float value) { return cv::cubeRoot(value); }
|
||||
CV_IMPL float cvFastArctan(float y, float x) { return cv::fastAtan2(y, x); }
|
||||
|
||||
@ -1721,6 +1724,7 @@ CV_IMPL int cvCheckArr( const CvArr* arr, int flags,
|
||||
return cv::checkRange(cv::cvarrToMat(arr), (flags & CV_CHECK_QUIET) != 0, 0, minVal, maxVal );
|
||||
}
|
||||
|
||||
#endif // OPENCV_EXCLUDE_C_API
|
||||
|
||||
/*
|
||||
Finds real roots of cubic, quadratic or linear equation.
|
||||
@ -2016,6 +2020,8 @@ double cv::solvePoly( InputArray _coeffs0, OutputArray _roots0, int maxIters )
|
||||
}
|
||||
|
||||
|
||||
#ifndef OPENCV_EXCLUDE_C_API
|
||||
|
||||
CV_IMPL int
|
||||
cvSolveCubic( const CvMat* coeffs, CvMat* roots )
|
||||
{
|
||||
@ -2035,6 +2041,7 @@ void cvSolvePoly(const CvMat* a, CvMat *r, int maxiter, int)
|
||||
CV_Assert( _r.data == _r0.data ); // check that the array of roots was not reallocated
|
||||
}
|
||||
|
||||
#endif // OPENCV_EXCLUDE_C_API
|
||||
|
||||
|
||||
// Common constants for dispatched code
|
||||
|
@ -999,8 +999,79 @@ double Mat::dot(InputArray _mat) const
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
|
||||
{
|
||||
UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1);
|
||||
|
||||
int type = src1.type(), depth = CV_MAT_DEPTH(type),
|
||||
kercn = ocl::predictOptimalVectorWidth(src1, src2);
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
|
||||
if ( !doubleSupport && depth == CV_64F )
|
||||
return false;
|
||||
|
||||
int dbsize = ocl::Device::getDefault().maxComputeUnits();
|
||||
size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
|
||||
int ddepth = std::max(CV_32F, depth);
|
||||
|
||||
int wgs2_aligned = 1;
|
||||
while (wgs2_aligned < (int)wgs)
|
||||
wgs2_aligned <<= 1;
|
||||
wgs2_aligned >>= 1;
|
||||
|
||||
char cvt[40];
|
||||
ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
|
||||
format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT "
|
||||
"-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d",
|
||||
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth),
|
||||
ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
|
||||
ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt),
|
||||
(int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
||||
_src1.isContinuous() ? " -D HAVE_SRC_CONT" : "",
|
||||
_src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
UMat db(1, dbsize, ddepth);
|
||||
|
||||
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
|
||||
src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
|
||||
dbarg = ocl::KernelArg::PtrWriteOnly(db);
|
||||
|
||||
k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg);
|
||||
|
||||
size_t globalsize = dbsize * wgs;
|
||||
if (k.run(1, &globalsize, &wgs, true))
|
||||
{
|
||||
res = sum(db.getMat(ACCESS_READ))[0];
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
double UMat::dot(InputArray m) const
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
CV_Assert(m.sameSize(*this) && m.type() == type());
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
double r = 0;
|
||||
CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r)
|
||||
#endif
|
||||
|
||||
return getMat(ACCESS_READ).dot(m);
|
||||
}
|
||||
|
||||
} // namespace cv::
|
||||
|
||||
|
||||
#ifndef OPENCV_EXCLUDE_C_API
|
||||
/****************************************************************************************\
|
||||
* Earlier API *
|
||||
\****************************************************************************************/
|
||||
@ -1225,4 +1296,6 @@ cvBackProjectPCA( const CvArr* proj_arr, const CvArr* avg_arr,
|
||||
CV_Assert(dst0.data == dst.data);
|
||||
}
|
||||
|
||||
#endif // OPENCV_EXCLUDE_C_API
|
||||
|
||||
/* End of file. */
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "opencv2/core/mat.hpp"
|
||||
#include "opencv2/core/types_c.h"
|
||||
|
||||
#ifndef OPENCV_EXCLUDE_C_API
|
||||
// glue
|
||||
|
||||
CvMatND cvMatND(const cv::Mat& m)
|
||||
@ -360,7 +361,6 @@ cvSort( const CvArr* _src, CvArr* _dst, CvArr* _idx, int flags )
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CV_IMPL int
|
||||
cvKMeans2( const CvArr* _samples, int cluster_count, CvArr* _labels,
|
||||
CvTermCriteria termcrit, int attempts, CvRNG*,
|
||||
@ -389,3 +389,5 @@ cvKMeans2( const CvArr* _samples, int cluster_count, CvArr* _labels,
|
||||
*_compactness = compactness;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // OPENCV_EXCLUDE_C_API
|
||||
|
@ -226,6 +226,23 @@ void cv::setIdentity( InputOutputArray _m, const Scalar& s )
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace cv {
|
||||
|
||||
UMat UMat::eye(int rows, int cols, int type)
|
||||
{
|
||||
return UMat::eye(Size(cols, rows), type);
|
||||
}
|
||||
|
||||
UMat UMat::eye(Size size, int type)
|
||||
{
|
||||
UMat m(size, type);
|
||||
setIdentity(m);
|
||||
return m;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
//////////////////////////////////////////// trace ///////////////////////////////////////////
|
||||
|
||||
cv::Scalar cv::trace( InputArray _m )
|
||||
@ -260,285 +277,6 @@ cv::Scalar cv::trace( InputArray _m )
|
||||
return cv::sum(m.diag());
|
||||
}
|
||||
|
||||
////////////////////////////////////// transpose /////////////////////////////////////////
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
template<typename T> static void
|
||||
transpose_( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz )
|
||||
{
|
||||
int i=0, j, m = sz.width, n = sz.height;
|
||||
|
||||
#if CV_ENABLE_UNROLLED
|
||||
for(; i <= m - 4; i += 4 )
|
||||
{
|
||||
T* d0 = (T*)(dst + dstep*i);
|
||||
T* d1 = (T*)(dst + dstep*(i+1));
|
||||
T* d2 = (T*)(dst + dstep*(i+2));
|
||||
T* d3 = (T*)(dst + dstep*(i+3));
|
||||
|
||||
for( j = 0; j <= n - 4; j += 4 )
|
||||
{
|
||||
const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j);
|
||||
const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1));
|
||||
const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2));
|
||||
const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3));
|
||||
|
||||
d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0];
|
||||
d1[j] = s0[1]; d1[j+1] = s1[1]; d1[j+2] = s2[1]; d1[j+3] = s3[1];
|
||||
d2[j] = s0[2]; d2[j+1] = s1[2]; d2[j+2] = s2[2]; d2[j+3] = s3[2];
|
||||
d3[j] = s0[3]; d3[j+1] = s1[3]; d3[j+2] = s2[3]; d3[j+3] = s3[3];
|
||||
}
|
||||
|
||||
for( ; j < n; j++ )
|
||||
{
|
||||
const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep);
|
||||
d0[j] = s0[0]; d1[j] = s0[1]; d2[j] = s0[2]; d3[j] = s0[3];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for( ; i < m; i++ )
|
||||
{
|
||||
T* d0 = (T*)(dst + dstep*i);
|
||||
j = 0;
|
||||
#if CV_ENABLE_UNROLLED
|
||||
for(; j <= n - 4; j += 4 )
|
||||
{
|
||||
const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j);
|
||||
const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1));
|
||||
const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2));
|
||||
const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3));
|
||||
|
||||
d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0];
|
||||
}
|
||||
#endif
|
||||
for( ; j < n; j++ )
|
||||
{
|
||||
const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep);
|
||||
d0[j] = s0[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T> static void
|
||||
transposeI_( uchar* data, size_t step, int n )
|
||||
{
|
||||
for( int i = 0; i < n; i++ )
|
||||
{
|
||||
T* row = (T*)(data + step*i);
|
||||
uchar* data1 = data + i*sizeof(T);
|
||||
for( int j = i+1; j < n; j++ )
|
||||
std::swap( row[j], *(T*)(data1 + step*j) );
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (*TransposeFunc)( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz );
|
||||
typedef void (*TransposeInplaceFunc)( uchar* data, size_t step, int n );
|
||||
|
||||
#define DEF_TRANSPOSE_FUNC(suffix, type) \
|
||||
static void transpose_##suffix( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) \
|
||||
{ transpose_<type>(src, sstep, dst, dstep, sz); } \
|
||||
\
|
||||
static void transposeI_##suffix( uchar* data, size_t step, int n ) \
|
||||
{ transposeI_<type>(data, step, n); }
|
||||
|
||||
DEF_TRANSPOSE_FUNC(8u, uchar)
|
||||
DEF_TRANSPOSE_FUNC(16u, ushort)
|
||||
DEF_TRANSPOSE_FUNC(8uC3, Vec3b)
|
||||
DEF_TRANSPOSE_FUNC(32s, int)
|
||||
DEF_TRANSPOSE_FUNC(16uC3, Vec3s)
|
||||
DEF_TRANSPOSE_FUNC(32sC2, Vec2i)
|
||||
DEF_TRANSPOSE_FUNC(32sC3, Vec3i)
|
||||
DEF_TRANSPOSE_FUNC(32sC4, Vec4i)
|
||||
DEF_TRANSPOSE_FUNC(32sC6, Vec6i)
|
||||
DEF_TRANSPOSE_FUNC(32sC8, Vec8i)
|
||||
|
||||
static TransposeFunc transposeTab[] =
|
||||
{
|
||||
0, transpose_8u, transpose_16u, transpose_8uC3, transpose_32s, 0, transpose_16uC3, 0,
|
||||
transpose_32sC2, 0, 0, 0, transpose_32sC3, 0, 0, 0, transpose_32sC4,
|
||||
0, 0, 0, 0, 0, 0, 0, transpose_32sC6, 0, 0, 0, 0, 0, 0, 0, transpose_32sC8
|
||||
};
|
||||
|
||||
static TransposeInplaceFunc transposeInplaceTab[] =
|
||||
{
|
||||
0, transposeI_8u, transposeI_16u, transposeI_8uC3, transposeI_32s, 0, transposeI_16uC3, 0,
|
||||
transposeI_32sC2, 0, 0, 0, transposeI_32sC3, 0, 0, 0, transposeI_32sC4,
|
||||
0, 0, 0, 0, 0, 0, 0, transposeI_32sC6, 0, 0, 0, 0, 0, 0, 0, transposeI_32sC8
|
||||
};
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
static bool ocl_transpose( InputArray _src, OutputArray _dst )
|
||||
{
|
||||
const ocl::Device & dev = ocl::Device::getDefault();
|
||||
const int TILE_DIM = 32, BLOCK_ROWS = 8;
|
||||
int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type),
|
||||
rowsPerWI = dev.isIntel() ? 4 : 1;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
_dst.create(src.cols, src.rows, type);
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
String kernelName("transpose");
|
||||
bool inplace = dst.u == src.u;
|
||||
|
||||
if (inplace)
|
||||
{
|
||||
CV_Assert(dst.cols == dst.rows);
|
||||
kernelName += "_inplace";
|
||||
}
|
||||
else
|
||||
{
|
||||
// check required local memory size
|
||||
size_t required_local_memory = (size_t) TILE_DIM*(TILE_DIM+1)*CV_ELEM_SIZE(type);
|
||||
if (required_local_memory > ocl::Device::getDefault().localMemSize())
|
||||
return false;
|
||||
}
|
||||
|
||||
ocl::Kernel k(kernelName.c_str(), ocl::core::transpose_oclsrc,
|
||||
format("-D T=%s -D T1=%s -D cn=%d -D TILE_DIM=%d -D BLOCK_ROWS=%d -D rowsPerWI=%d%s",
|
||||
ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth),
|
||||
cn, TILE_DIM, BLOCK_ROWS, rowsPerWI, inplace ? " -D INPLACE" : ""));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
if (inplace)
|
||||
k.args(ocl::KernelArg::ReadWriteNoSize(dst), dst.rows);
|
||||
else
|
||||
k.args(ocl::KernelArg::ReadOnly(src),
|
||||
ocl::KernelArg::WriteOnlyNoSize(dst));
|
||||
|
||||
size_t localsize[2] = { TILE_DIM, BLOCK_ROWS };
|
||||
size_t globalsize[2] = { (size_t)src.cols, inplace ? ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI : (divUp((size_t)src.rows, TILE_DIM) * BLOCK_ROWS) };
|
||||
|
||||
if (inplace && dev.isIntel())
|
||||
{
|
||||
localsize[0] = 16;
|
||||
localsize[1] = dev.maxWorkGroupSize() / localsize[0];
|
||||
}
|
||||
|
||||
return k.run(2, globalsize, localsize, false);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
static bool ipp_transpose( Mat &src, Mat &dst )
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP();
|
||||
|
||||
int type = src.type();
|
||||
typedef IppStatus (CV_STDCALL * IppiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize);
|
||||
typedef IppStatus (CV_STDCALL * IppiTransposeI)(const void * pSrcDst, int srcDstStep, IppiSize roiSize);
|
||||
IppiTranspose ippiTranspose = 0;
|
||||
IppiTransposeI ippiTranspose_I = 0;
|
||||
|
||||
if (dst.data == src.data && dst.cols == dst.rows)
|
||||
{
|
||||
CV_SUPPRESS_DEPRECATED_START
|
||||
ippiTranspose_I =
|
||||
type == CV_8UC1 ? (IppiTransposeI)ippiTranspose_8u_C1IR :
|
||||
type == CV_8UC3 ? (IppiTransposeI)ippiTranspose_8u_C3IR :
|
||||
type == CV_8UC4 ? (IppiTransposeI)ippiTranspose_8u_C4IR :
|
||||
type == CV_16UC1 ? (IppiTransposeI)ippiTranspose_16u_C1IR :
|
||||
type == CV_16UC3 ? (IppiTransposeI)ippiTranspose_16u_C3IR :
|
||||
type == CV_16UC4 ? (IppiTransposeI)ippiTranspose_16u_C4IR :
|
||||
type == CV_16SC1 ? (IppiTransposeI)ippiTranspose_16s_C1IR :
|
||||
type == CV_16SC3 ? (IppiTransposeI)ippiTranspose_16s_C3IR :
|
||||
type == CV_16SC4 ? (IppiTransposeI)ippiTranspose_16s_C4IR :
|
||||
type == CV_32SC1 ? (IppiTransposeI)ippiTranspose_32s_C1IR :
|
||||
type == CV_32SC3 ? (IppiTransposeI)ippiTranspose_32s_C3IR :
|
||||
type == CV_32SC4 ? (IppiTransposeI)ippiTranspose_32s_C4IR :
|
||||
type == CV_32FC1 ? (IppiTransposeI)ippiTranspose_32f_C1IR :
|
||||
type == CV_32FC3 ? (IppiTransposeI)ippiTranspose_32f_C3IR :
|
||||
type == CV_32FC4 ? (IppiTransposeI)ippiTranspose_32f_C4IR : 0;
|
||||
CV_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
else
|
||||
{
|
||||
ippiTranspose =
|
||||
type == CV_8UC1 ? (IppiTranspose)ippiTranspose_8u_C1R :
|
||||
type == CV_8UC3 ? (IppiTranspose)ippiTranspose_8u_C3R :
|
||||
type == CV_8UC4 ? (IppiTranspose)ippiTranspose_8u_C4R :
|
||||
type == CV_16UC1 ? (IppiTranspose)ippiTranspose_16u_C1R :
|
||||
type == CV_16UC3 ? (IppiTranspose)ippiTranspose_16u_C3R :
|
||||
type == CV_16UC4 ? (IppiTranspose)ippiTranspose_16u_C4R :
|
||||
type == CV_16SC1 ? (IppiTranspose)ippiTranspose_16s_C1R :
|
||||
type == CV_16SC3 ? (IppiTranspose)ippiTranspose_16s_C3R :
|
||||
type == CV_16SC4 ? (IppiTranspose)ippiTranspose_16s_C4R :
|
||||
type == CV_32SC1 ? (IppiTranspose)ippiTranspose_32s_C1R :
|
||||
type == CV_32SC3 ? (IppiTranspose)ippiTranspose_32s_C3R :
|
||||
type == CV_32SC4 ? (IppiTranspose)ippiTranspose_32s_C4R :
|
||||
type == CV_32FC1 ? (IppiTranspose)ippiTranspose_32f_C1R :
|
||||
type == CV_32FC3 ? (IppiTranspose)ippiTranspose_32f_C3R :
|
||||
type == CV_32FC4 ? (IppiTranspose)ippiTranspose_32f_C4R : 0;
|
||||
}
|
||||
|
||||
IppiSize roiSize = { src.cols, src.rows };
|
||||
if (ippiTranspose != 0)
|
||||
{
|
||||
if (CV_INSTRUMENT_FUN_IPP(ippiTranspose, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, roiSize) >= 0)
|
||||
return true;
|
||||
}
|
||||
else if (ippiTranspose_I != 0)
|
||||
{
|
||||
if (CV_INSTRUMENT_FUN_IPP(ippiTranspose_I, dst.ptr(), (int)dst.step, roiSize) >= 0)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
void cv::transpose( InputArray _src, OutputArray _dst )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
int type = _src.type(), esz = CV_ELEM_SIZE(type);
|
||||
CV_Assert( _src.dims() <= 2 && esz <= 32 );
|
||||
|
||||
CV_OCL_RUN(_dst.isUMat(),
|
||||
ocl_transpose(_src, _dst))
|
||||
|
||||
Mat src = _src.getMat();
|
||||
if( src.empty() )
|
||||
{
|
||||
_dst.release();
|
||||
return;
|
||||
}
|
||||
|
||||
_dst.create(src.cols, src.rows, src.type());
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
// handle the case of single-column/single-row matrices, stored in STL vectors.
|
||||
if( src.rows != dst.cols || src.cols != dst.rows )
|
||||
{
|
||||
CV_Assert( src.size() == dst.size() && (src.cols == 1 || src.rows == 1) );
|
||||
src.copyTo(dst);
|
||||
return;
|
||||
}
|
||||
|
||||
CV_IPP_RUN_FAST(ipp_transpose(src, dst))
|
||||
|
||||
if( dst.data == src.data )
|
||||
{
|
||||
TransposeInplaceFunc func = transposeInplaceTab[esz];
|
||||
CV_Assert( func != 0 );
|
||||
CV_Assert( dst.cols == dst.rows );
|
||||
func( dst.ptr(), dst.step, dst.rows );
|
||||
}
|
||||
else
|
||||
{
|
||||
TransposeFunc func = transposeTab[esz];
|
||||
CV_Assert( func != 0 );
|
||||
func( src.ptr(), src.step, dst.ptr(), dst.step, src.size() );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////// completeSymm /////////////////////////////////////////
|
||||
|
||||
|
770
modules/core/src/matrix_transform.cpp
Normal file
770
modules/core/src/matrix_transform.cpp
Normal file
@ -0,0 +1,770 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels_core.hpp"
|
||||
|
||||
namespace cv {
|
||||
|
||||
////////////////////////////////////// transpose /////////////////////////////////////////
|
||||
|
||||
template<typename T> static void
|
||||
transpose_( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz )
|
||||
{
|
||||
int i=0, j, m = sz.width, n = sz.height;
|
||||
|
||||
#if CV_ENABLE_UNROLLED
|
||||
for(; i <= m - 4; i += 4 )
|
||||
{
|
||||
T* d0 = (T*)(dst + dstep*i);
|
||||
T* d1 = (T*)(dst + dstep*(i+1));
|
||||
T* d2 = (T*)(dst + dstep*(i+2));
|
||||
T* d3 = (T*)(dst + dstep*(i+3));
|
||||
|
||||
for( j = 0; j <= n - 4; j += 4 )
|
||||
{
|
||||
const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j);
|
||||
const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1));
|
||||
const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2));
|
||||
const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3));
|
||||
|
||||
d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0];
|
||||
d1[j] = s0[1]; d1[j+1] = s1[1]; d1[j+2] = s2[1]; d1[j+3] = s3[1];
|
||||
d2[j] = s0[2]; d2[j+1] = s1[2]; d2[j+2] = s2[2]; d2[j+3] = s3[2];
|
||||
d3[j] = s0[3]; d3[j+1] = s1[3]; d3[j+2] = s2[3]; d3[j+3] = s3[3];
|
||||
}
|
||||
|
||||
for( ; j < n; j++ )
|
||||
{
|
||||
const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep);
|
||||
d0[j] = s0[0]; d1[j] = s0[1]; d2[j] = s0[2]; d3[j] = s0[3];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for( ; i < m; i++ )
|
||||
{
|
||||
T* d0 = (T*)(dst + dstep*i);
|
||||
j = 0;
|
||||
#if CV_ENABLE_UNROLLED
|
||||
for(; j <= n - 4; j += 4 )
|
||||
{
|
||||
const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j);
|
||||
const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1));
|
||||
const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2));
|
||||
const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3));
|
||||
|
||||
d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0];
|
||||
}
|
||||
#endif
|
||||
for( ; j < n; j++ )
|
||||
{
|
||||
const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep);
|
||||
d0[j] = s0[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T> static void
|
||||
transposeI_( uchar* data, size_t step, int n )
|
||||
{
|
||||
for( int i = 0; i < n; i++ )
|
||||
{
|
||||
T* row = (T*)(data + step*i);
|
||||
uchar* data1 = data + i*sizeof(T);
|
||||
for( int j = i+1; j < n; j++ )
|
||||
std::swap( row[j], *(T*)(data1 + step*j) );
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (*TransposeFunc)( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz );
|
||||
typedef void (*TransposeInplaceFunc)( uchar* data, size_t step, int n );
|
||||
|
||||
#define DEF_TRANSPOSE_FUNC(suffix, type) \
|
||||
static void transpose_##suffix( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) \
|
||||
{ transpose_<type>(src, sstep, dst, dstep, sz); } \
|
||||
\
|
||||
static void transposeI_##suffix( uchar* data, size_t step, int n ) \
|
||||
{ transposeI_<type>(data, step, n); }
|
||||
|
||||
DEF_TRANSPOSE_FUNC(8u, uchar)
|
||||
DEF_TRANSPOSE_FUNC(16u, ushort)
|
||||
DEF_TRANSPOSE_FUNC(8uC3, Vec3b)
|
||||
DEF_TRANSPOSE_FUNC(32s, int)
|
||||
DEF_TRANSPOSE_FUNC(16uC3, Vec3s)
|
||||
DEF_TRANSPOSE_FUNC(32sC2, Vec2i)
|
||||
DEF_TRANSPOSE_FUNC(32sC3, Vec3i)
|
||||
DEF_TRANSPOSE_FUNC(32sC4, Vec4i)
|
||||
DEF_TRANSPOSE_FUNC(32sC6, Vec6i)
|
||||
DEF_TRANSPOSE_FUNC(32sC8, Vec8i)
|
||||
|
||||
static TransposeFunc transposeTab[] =
|
||||
{
|
||||
0, transpose_8u, transpose_16u, transpose_8uC3, transpose_32s, 0, transpose_16uC3, 0,
|
||||
transpose_32sC2, 0, 0, 0, transpose_32sC3, 0, 0, 0, transpose_32sC4,
|
||||
0, 0, 0, 0, 0, 0, 0, transpose_32sC6, 0, 0, 0, 0, 0, 0, 0, transpose_32sC8
|
||||
};
|
||||
|
||||
static TransposeInplaceFunc transposeInplaceTab[] =
|
||||
{
|
||||
0, transposeI_8u, transposeI_16u, transposeI_8uC3, transposeI_32s, 0, transposeI_16uC3, 0,
|
||||
transposeI_32sC2, 0, 0, 0, transposeI_32sC3, 0, 0, 0, transposeI_32sC4,
|
||||
0, 0, 0, 0, 0, 0, 0, transposeI_32sC6, 0, 0, 0, 0, 0, 0, 0, transposeI_32sC8
|
||||
};
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
static bool ocl_transpose( InputArray _src, OutputArray _dst )
|
||||
{
|
||||
const ocl::Device & dev = ocl::Device::getDefault();
|
||||
const int TILE_DIM = 32, BLOCK_ROWS = 8;
|
||||
int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type),
|
||||
rowsPerWI = dev.isIntel() ? 4 : 1;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
_dst.create(src.cols, src.rows, type);
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
String kernelName("transpose");
|
||||
bool inplace = dst.u == src.u;
|
||||
|
||||
if (inplace)
|
||||
{
|
||||
CV_Assert(dst.cols == dst.rows);
|
||||
kernelName += "_inplace";
|
||||
}
|
||||
else
|
||||
{
|
||||
// check required local memory size
|
||||
size_t required_local_memory = (size_t) TILE_DIM*(TILE_DIM+1)*CV_ELEM_SIZE(type);
|
||||
if (required_local_memory > ocl::Device::getDefault().localMemSize())
|
||||
return false;
|
||||
}
|
||||
|
||||
ocl::Kernel k(kernelName.c_str(), ocl::core::transpose_oclsrc,
|
||||
format("-D T=%s -D T1=%s -D cn=%d -D TILE_DIM=%d -D BLOCK_ROWS=%d -D rowsPerWI=%d%s",
|
||||
ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth),
|
||||
cn, TILE_DIM, BLOCK_ROWS, rowsPerWI, inplace ? " -D INPLACE" : ""));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
if (inplace)
|
||||
k.args(ocl::KernelArg::ReadWriteNoSize(dst), dst.rows);
|
||||
else
|
||||
k.args(ocl::KernelArg::ReadOnly(src),
|
||||
ocl::KernelArg::WriteOnlyNoSize(dst));
|
||||
|
||||
size_t localsize[2] = { TILE_DIM, BLOCK_ROWS };
|
||||
size_t globalsize[2] = { (size_t)src.cols, inplace ? ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI : (divUp((size_t)src.rows, TILE_DIM) * BLOCK_ROWS) };
|
||||
|
||||
if (inplace && dev.isIntel())
|
||||
{
|
||||
localsize[0] = 16;
|
||||
localsize[1] = dev.maxWorkGroupSize() / localsize[0];
|
||||
}
|
||||
|
||||
return k.run(2, globalsize, localsize, false);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
static bool ipp_transpose( Mat &src, Mat &dst )
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP();
|
||||
|
||||
int type = src.type();
|
||||
typedef IppStatus (CV_STDCALL * IppiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize);
|
||||
typedef IppStatus (CV_STDCALL * IppiTransposeI)(const void * pSrcDst, int srcDstStep, IppiSize roiSize);
|
||||
IppiTranspose ippiTranspose = 0;
|
||||
IppiTransposeI ippiTranspose_I = 0;
|
||||
|
||||
if (dst.data == src.data && dst.cols == dst.rows)
|
||||
{
|
||||
CV_SUPPRESS_DEPRECATED_START
|
||||
ippiTranspose_I =
|
||||
type == CV_8UC1 ? (IppiTransposeI)ippiTranspose_8u_C1IR :
|
||||
type == CV_8UC3 ? (IppiTransposeI)ippiTranspose_8u_C3IR :
|
||||
type == CV_8UC4 ? (IppiTransposeI)ippiTranspose_8u_C4IR :
|
||||
type == CV_16UC1 ? (IppiTransposeI)ippiTranspose_16u_C1IR :
|
||||
type == CV_16UC3 ? (IppiTransposeI)ippiTranspose_16u_C3IR :
|
||||
type == CV_16UC4 ? (IppiTransposeI)ippiTranspose_16u_C4IR :
|
||||
type == CV_16SC1 ? (IppiTransposeI)ippiTranspose_16s_C1IR :
|
||||
type == CV_16SC3 ? (IppiTransposeI)ippiTranspose_16s_C3IR :
|
||||
type == CV_16SC4 ? (IppiTransposeI)ippiTranspose_16s_C4IR :
|
||||
type == CV_32SC1 ? (IppiTransposeI)ippiTranspose_32s_C1IR :
|
||||
type == CV_32SC3 ? (IppiTransposeI)ippiTranspose_32s_C3IR :
|
||||
type == CV_32SC4 ? (IppiTransposeI)ippiTranspose_32s_C4IR :
|
||||
type == CV_32FC1 ? (IppiTransposeI)ippiTranspose_32f_C1IR :
|
||||
type == CV_32FC3 ? (IppiTransposeI)ippiTranspose_32f_C3IR :
|
||||
type == CV_32FC4 ? (IppiTransposeI)ippiTranspose_32f_C4IR : 0;
|
||||
CV_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
else
|
||||
{
|
||||
ippiTranspose =
|
||||
type == CV_8UC1 ? (IppiTranspose)ippiTranspose_8u_C1R :
|
||||
type == CV_8UC3 ? (IppiTranspose)ippiTranspose_8u_C3R :
|
||||
type == CV_8UC4 ? (IppiTranspose)ippiTranspose_8u_C4R :
|
||||
type == CV_16UC1 ? (IppiTranspose)ippiTranspose_16u_C1R :
|
||||
type == CV_16UC3 ? (IppiTranspose)ippiTranspose_16u_C3R :
|
||||
type == CV_16UC4 ? (IppiTranspose)ippiTranspose_16u_C4R :
|
||||
type == CV_16SC1 ? (IppiTranspose)ippiTranspose_16s_C1R :
|
||||
type == CV_16SC3 ? (IppiTranspose)ippiTranspose_16s_C3R :
|
||||
type == CV_16SC4 ? (IppiTranspose)ippiTranspose_16s_C4R :
|
||||
type == CV_32SC1 ? (IppiTranspose)ippiTranspose_32s_C1R :
|
||||
type == CV_32SC3 ? (IppiTranspose)ippiTranspose_32s_C3R :
|
||||
type == CV_32SC4 ? (IppiTranspose)ippiTranspose_32s_C4R :
|
||||
type == CV_32FC1 ? (IppiTranspose)ippiTranspose_32f_C1R :
|
||||
type == CV_32FC3 ? (IppiTranspose)ippiTranspose_32f_C3R :
|
||||
type == CV_32FC4 ? (IppiTranspose)ippiTranspose_32f_C4R : 0;
|
||||
}
|
||||
|
||||
IppiSize roiSize = { src.cols, src.rows };
|
||||
if (ippiTranspose != 0)
|
||||
{
|
||||
if (CV_INSTRUMENT_FUN_IPP(ippiTranspose, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, roiSize) >= 0)
|
||||
return true;
|
||||
}
|
||||
else if (ippiTranspose_I != 0)
|
||||
{
|
||||
if (CV_INSTRUMENT_FUN_IPP(ippiTranspose_I, dst.ptr(), (int)dst.step, roiSize) >= 0)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void transpose( InputArray _src, OutputArray _dst )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
int type = _src.type(), esz = CV_ELEM_SIZE(type);
|
||||
CV_Assert( _src.dims() <= 2 && esz <= 32 );
|
||||
|
||||
CV_OCL_RUN(_dst.isUMat(),
|
||||
ocl_transpose(_src, _dst))
|
||||
|
||||
Mat src = _src.getMat();
|
||||
if( src.empty() )
|
||||
{
|
||||
_dst.release();
|
||||
return;
|
||||
}
|
||||
|
||||
_dst.create(src.cols, src.rows, src.type());
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
// handle the case of single-column/single-row matrices, stored in STL vectors.
|
||||
if( src.rows != dst.cols || src.cols != dst.rows )
|
||||
{
|
||||
CV_Assert( src.size() == dst.size() && (src.cols == 1 || src.rows == 1) );
|
||||
src.copyTo(dst);
|
||||
return;
|
||||
}
|
||||
|
||||
CV_IPP_RUN_FAST(ipp_transpose(src, dst))
|
||||
|
||||
if( dst.data == src.data )
|
||||
{
|
||||
TransposeInplaceFunc func = transposeInplaceTab[esz];
|
||||
CV_Assert( func != 0 );
|
||||
CV_Assert( dst.cols == dst.rows );
|
||||
func( dst.ptr(), dst.step, dst.rows );
|
||||
}
|
||||
else
|
||||
{
|
||||
TransposeFunc func = transposeTab[esz];
|
||||
CV_Assert( func != 0 );
|
||||
func( src.ptr(), src.step, dst.ptr(), dst.step, src.size() );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if CV_SIMD128
|
||||
template<typename V> CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
|
||||
{
|
||||
typedef typename V::lane_type T;
|
||||
int end = (int)(size.width*esz);
|
||||
int width = (end + 1)/2;
|
||||
int width_1 = width & -v_uint8x16::nlanes;
|
||||
int i, j;
|
||||
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
CV_Assert(isAligned<sizeof(T)>(src, dst));
|
||||
#endif
|
||||
|
||||
for( ; size.height--; src += sstep, dst += dstep )
|
||||
{
|
||||
for( i = 0, j = end; i < width_1; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes )
|
||||
{
|
||||
V t0, t1;
|
||||
|
||||
t0 = v_load((T*)((uchar*)src + i));
|
||||
t1 = v_load((T*)((uchar*)src + j - v_uint8x16::nlanes));
|
||||
t0 = v_reverse(t0);
|
||||
t1 = v_reverse(t1);
|
||||
v_store((T*)(dst + j - v_uint8x16::nlanes), t0);
|
||||
v_store((T*)(dst + i), t1);
|
||||
}
|
||||
if (isAligned<sizeof(T)>(src, dst))
|
||||
{
|
||||
for ( ; i < width; i += sizeof(T), j -= sizeof(T) )
|
||||
{
|
||||
T t0, t1;
|
||||
|
||||
t0 = *((T*)((uchar*)src + i));
|
||||
t1 = *((T*)((uchar*)src + j - sizeof(T)));
|
||||
*((T*)(dst + j - sizeof(T))) = t0;
|
||||
*((T*)(dst + i)) = t1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for ( ; i < width; i += sizeof(T), j -= sizeof(T) )
|
||||
{
|
||||
for (int k = 0; k < (int)sizeof(T); k++)
|
||||
{
|
||||
uchar t0, t1;
|
||||
|
||||
t0 = *((uchar*)src + i + k);
|
||||
t1 = *((uchar*)src + j + k - sizeof(T));
|
||||
*(dst + j + k - sizeof(T)) = t0;
|
||||
*(dst + i + k) = t1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T1, typename T2> CV_ALWAYS_INLINE void flipHoriz_double( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
|
||||
{
|
||||
int end = (int)(size.width*esz);
|
||||
int width = (end + 1)/2;
|
||||
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
CV_Assert(isAligned<sizeof(T1)>(src, dst));
|
||||
CV_Assert(isAligned<sizeof(T2)>(src, dst));
|
||||
#endif
|
||||
|
||||
for( ; size.height--; src += sstep, dst += dstep )
|
||||
{
|
||||
for ( int i = 0, j = end; i < width; i += sizeof(T1) + sizeof(T2), j -= sizeof(T1) + sizeof(T2) )
|
||||
{
|
||||
T1 t0, t1;
|
||||
T2 t2, t3;
|
||||
|
||||
t0 = *((T1*)((uchar*)src + i));
|
||||
t2 = *((T2*)((uchar*)src + i + sizeof(T1)));
|
||||
t1 = *((T1*)((uchar*)src + j - sizeof(T1) - sizeof(T2)));
|
||||
t3 = *((T2*)((uchar*)src + j - sizeof(T2)));
|
||||
*((T1*)(dst + j - sizeof(T1) - sizeof(T2))) = t0;
|
||||
*((T2*)(dst + j - sizeof(T2))) = t2;
|
||||
*((T1*)(dst + i)) = t1;
|
||||
*((T2*)(dst + i + sizeof(T1))) = t3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
|
||||
{
|
||||
#if CV_SIMD
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep;
|
||||
#endif
|
||||
if (esz == 2 * v_uint8x16::nlanes)
|
||||
{
|
||||
int end = (int)(size.width*esz);
|
||||
int width = end/2;
|
||||
|
||||
for( ; size.height--; src += sstep, dst += dstep )
|
||||
{
|
||||
for( int i = 0, j = end - 2 * v_uint8x16::nlanes; i < width; i += 2 * v_uint8x16::nlanes, j -= 2 * v_uint8x16::nlanes )
|
||||
{
|
||||
#if CV_SIMD256
|
||||
v_uint8x32 t0, t1;
|
||||
|
||||
t0 = v256_load((uchar*)src + i);
|
||||
t1 = v256_load((uchar*)src + j);
|
||||
v_store(dst + j, t0);
|
||||
v_store(dst + i, t1);
|
||||
#else
|
||||
v_uint8x16 t0, t1, t2, t3;
|
||||
|
||||
t0 = v_load((uchar*)src + i);
|
||||
t1 = v_load((uchar*)src + i + v_uint8x16::nlanes);
|
||||
t2 = v_load((uchar*)src + j);
|
||||
t3 = v_load((uchar*)src + j + v_uint8x16::nlanes);
|
||||
v_store(dst + j, t0);
|
||||
v_store(dst + j + v_uint8x16::nlanes, t1);
|
||||
v_store(dst + i, t2);
|
||||
v_store(dst + i + v_uint8x16::nlanes, t3);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (esz == v_uint8x16::nlanes)
|
||||
{
|
||||
int end = (int)(size.width*esz);
|
||||
int width = end/2;
|
||||
|
||||
for( ; size.height--; src += sstep, dst += dstep )
|
||||
{
|
||||
for( int i = 0, j = end - v_uint8x16::nlanes; i < width; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes )
|
||||
{
|
||||
v_uint8x16 t0, t1;
|
||||
|
||||
t0 = v_load((uchar*)src + i);
|
||||
t1 = v_load((uchar*)src + j);
|
||||
v_store(dst + j, t0);
|
||||
v_store(dst + i, t1);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (esz == 8
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
&& isAligned<sizeof(uint64)>(alignmentMark)
|
||||
#endif
|
||||
)
|
||||
{
|
||||
flipHoriz_single<v_uint64x2>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
else if (esz == 4
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
&& isAligned<sizeof(unsigned)>(alignmentMark)
|
||||
#endif
|
||||
)
|
||||
{
|
||||
flipHoriz_single<v_uint32x4>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
else if (esz == 2
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
&& isAligned<sizeof(ushort)>(alignmentMark)
|
||||
#endif
|
||||
)
|
||||
{
|
||||
flipHoriz_single<v_uint16x8>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
else if (esz == 1)
|
||||
{
|
||||
flipHoriz_single<v_uint8x16>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
else if (esz == 24
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
&& isAligned<sizeof(uint64_t)>(alignmentMark)
|
||||
#endif
|
||||
)
|
||||
{
|
||||
int end = (int)(size.width*esz);
|
||||
int width = (end + 1)/2;
|
||||
|
||||
for( ; size.height--; src += sstep, dst += dstep )
|
||||
{
|
||||
for ( int i = 0, j = end; i < width; i += v_uint8x16::nlanes + sizeof(uint64_t), j -= v_uint8x16::nlanes + sizeof(uint64_t) )
|
||||
{
|
||||
v_uint8x16 t0, t1;
|
||||
uint64_t t2, t3;
|
||||
|
||||
t0 = v_load((uchar*)src + i);
|
||||
t2 = *((uint64_t*)((uchar*)src + i + v_uint8x16::nlanes));
|
||||
t1 = v_load((uchar*)src + j - v_uint8x16::nlanes - sizeof(uint64_t));
|
||||
t3 = *((uint64_t*)((uchar*)src + j - sizeof(uint64_t)));
|
||||
v_store(dst + j - v_uint8x16::nlanes - sizeof(uint64_t), t0);
|
||||
*((uint64_t*)(dst + j - sizeof(uint64_t))) = t2;
|
||||
v_store(dst + i, t1);
|
||||
*((uint64_t*)(dst + i + v_uint8x16::nlanes)) = t3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#if !CV_STRONG_ALIGNMENT
|
||||
else if (esz == 12)
|
||||
{
|
||||
flipHoriz_double<uint64_t,uint>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
else if (esz == 6)
|
||||
{
|
||||
flipHoriz_double<uint,ushort>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
else if (esz == 3)
|
||||
{
|
||||
flipHoriz_double<ushort,uchar>(src, sstep, dst, dstep, size, esz);
|
||||
}
|
||||
#endif
|
||||
else
|
||||
#endif // CV_SIMD
|
||||
{
|
||||
int i, j, limit = (int)(((size.width + 1)/2)*esz);
|
||||
AutoBuffer<int> _tab(size.width*esz);
|
||||
int* tab = _tab.data();
|
||||
|
||||
for( i = 0; i < size.width; i++ )
|
||||
for( size_t k = 0; k < esz; k++ )
|
||||
tab[i*esz + k] = (int)((size.width - i - 1)*esz + k);
|
||||
|
||||
for( ; size.height--; src += sstep, dst += dstep )
|
||||
{
|
||||
for( i = 0; i < limit; i++ )
|
||||
{
|
||||
j = tab[i];
|
||||
uchar t0 = src[i], t1 = src[j];
|
||||
dst[i] = t1; dst[j] = t0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, size_t esz )
|
||||
{
|
||||
const uchar* src1 = src0 + (size.height - 1)*sstep;
|
||||
uchar* dst1 = dst0 + (size.height - 1)*dstep;
|
||||
size.width *= (int)esz;
|
||||
|
||||
for( int y = 0; y < (size.height + 1)/2; y++, src0 += sstep, src1 -= sstep,
|
||||
dst0 += dstep, dst1 -= dstep )
|
||||
{
|
||||
int i = 0;
|
||||
#if CV_SIMD
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
if (isAligned<sizeof(int)>(src0, src1, dst0, dst1))
|
||||
#endif
|
||||
{
|
||||
for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
|
||||
{
|
||||
v_int32 t0 = vx_load((int*)(src0 + i));
|
||||
v_int32 t1 = vx_load((int*)(src1 + i));
|
||||
vx_store((int*)(dst0 + i), t1);
|
||||
vx_store((int*)(dst1 + i), t0);
|
||||
}
|
||||
}
|
||||
#if CV_STRONG_ALIGNMENT
|
||||
else
|
||||
{
|
||||
for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
|
||||
{
|
||||
v_uint8 t0 = vx_load(src0 + i);
|
||||
v_uint8 t1 = vx_load(src1 + i);
|
||||
vx_store(dst0 + i, t1);
|
||||
vx_store(dst1 + i, t0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (isAligned<sizeof(int)>(src0, src1, dst0, dst1))
|
||||
{
|
||||
for( ; i <= size.width - 16; i += 16 )
|
||||
{
|
||||
int t0 = ((int*)(src0 + i))[0];
|
||||
int t1 = ((int*)(src1 + i))[0];
|
||||
|
||||
((int*)(dst0 + i))[0] = t1;
|
||||
((int*)(dst1 + i))[0] = t0;
|
||||
|
||||
t0 = ((int*)(src0 + i))[1];
|
||||
t1 = ((int*)(src1 + i))[1];
|
||||
|
||||
((int*)(dst0 + i))[1] = t1;
|
||||
((int*)(dst1 + i))[1] = t0;
|
||||
|
||||
t0 = ((int*)(src0 + i))[2];
|
||||
t1 = ((int*)(src1 + i))[2];
|
||||
|
||||
((int*)(dst0 + i))[2] = t1;
|
||||
((int*)(dst1 + i))[2] = t0;
|
||||
|
||||
t0 = ((int*)(src0 + i))[3];
|
||||
t1 = ((int*)(src1 + i))[3];
|
||||
|
||||
((int*)(dst0 + i))[3] = t1;
|
||||
((int*)(dst1 + i))[3] = t0;
|
||||
}
|
||||
|
||||
for( ; i <= size.width - 4; i += 4 )
|
||||
{
|
||||
int t0 = ((int*)(src0 + i))[0];
|
||||
int t1 = ((int*)(src1 + i))[0];
|
||||
|
||||
((int*)(dst0 + i))[0] = t1;
|
||||
((int*)(dst1 + i))[0] = t0;
|
||||
}
|
||||
}
|
||||
|
||||
for( ; i < size.width; i++ )
|
||||
{
|
||||
uchar t0 = src0[i];
|
||||
uchar t1 = src1[i];
|
||||
|
||||
dst0[i] = t1;
|
||||
dst1[i] = t0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS };
|
||||
|
||||
static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
|
||||
{
|
||||
CV_Assert(flipCode >= -1 && flipCode <= 1);
|
||||
|
||||
const ocl::Device & dev = ocl::Device::getDefault();
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
|
||||
flipType, kercn = std::min(ocl::predictOptimalVectorWidth(_src, _dst), 4);
|
||||
|
||||
bool doubleSupport = dev.doubleFPConfig() > 0;
|
||||
if (!doubleSupport && depth == CV_64F)
|
||||
kercn = cn;
|
||||
|
||||
if (cn > 4)
|
||||
return false;
|
||||
|
||||
const char * kernelName;
|
||||
if (flipCode == 0)
|
||||
kernelName = "arithm_flip_rows", flipType = FLIP_ROWS;
|
||||
else if (flipCode > 0)
|
||||
kernelName = "arithm_flip_cols", flipType = FLIP_COLS;
|
||||
else
|
||||
kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH;
|
||||
|
||||
int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
|
||||
kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn;
|
||||
|
||||
ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
|
||||
format( "-D T=%s -D T1=%s -D DEPTH=%d -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d",
|
||||
kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)),
|
||||
kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), depth, cn, pxPerWIy, kercn));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
Size size = _src.size();
|
||||
_dst.create(size, type);
|
||||
UMat src = _src.getUMat(), dst = _dst.getUMat();
|
||||
|
||||
int cols = size.width * cn / kercn, rows = size.height;
|
||||
cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols;
|
||||
rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows;
|
||||
|
||||
k.args(ocl::KernelArg::ReadOnlyNoSize(src),
|
||||
ocl::KernelArg::WriteOnly(dst, cn, kercn), rows, cols);
|
||||
|
||||
size_t maxWorkGroupSize = dev.maxWorkGroupSize();
|
||||
CV_Assert(maxWorkGroupSize % 4 == 0);
|
||||
|
||||
size_t globalsize[2] = { (size_t)cols, ((size_t)rows + pxPerWIy - 1) / pxPerWIy },
|
||||
localsize[2] = { maxWorkGroupSize / 4, 4 };
|
||||
return k.run(2, globalsize, (flipType == FLIP_COLS) && !dev.isIntel() ? localsize : NULL, false);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined HAVE_IPP
|
||||
static bool ipp_flip(Mat &src, Mat &dst, int flip_mode)
|
||||
{
|
||||
#ifdef HAVE_IPP_IW
|
||||
CV_INSTRUMENT_REGION_IPP();
|
||||
|
||||
// Details: https://github.com/opencv/opencv/issues/12943
|
||||
if (flip_mode <= 0 /* swap rows */
|
||||
&& cv::ipp::getIppTopFeatures() != ippCPUID_SSE42
|
||||
&& (int64_t)(src.total()) * src.elemSize() >= CV_BIG_INT(0x80000000)/*2Gb*/
|
||||
)
|
||||
return false;
|
||||
|
||||
IppiAxis ippMode;
|
||||
if(flip_mode < 0)
|
||||
ippMode = ippAxsBoth;
|
||||
else if(flip_mode == 0)
|
||||
ippMode = ippAxsHorizontal;
|
||||
else
|
||||
ippMode = ippAxsVertical;
|
||||
|
||||
try
|
||||
{
|
||||
::ipp::IwiImage iwSrc = ippiGetImage(src);
|
||||
::ipp::IwiImage iwDst = ippiGetImage(dst);
|
||||
|
||||
CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode);
|
||||
}
|
||||
catch(const ::ipp::IwException &)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
#else
|
||||
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(flip_mode);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void flip( InputArray _src, OutputArray _dst, int flip_mode )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
CV_Assert( _src.dims() <= 2 );
|
||||
Size size = _src.size();
|
||||
|
||||
if (flip_mode < 0)
|
||||
{
|
||||
if (size.width == 1)
|
||||
flip_mode = 0;
|
||||
if (size.height == 1)
|
||||
flip_mode = 1;
|
||||
}
|
||||
|
||||
if ((size.width == 1 && flip_mode > 0) ||
|
||||
(size.height == 1 && flip_mode == 0))
|
||||
{
|
||||
return _src.copyTo(_dst);
|
||||
}
|
||||
|
||||
CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode))
|
||||
|
||||
Mat src = _src.getMat();
|
||||
int type = src.type();
|
||||
_dst.create( size, type );
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
CV_IPP_RUN_FAST(ipp_flip(src, dst, flip_mode));
|
||||
|
||||
size_t esz = CV_ELEM_SIZE(type);
|
||||
|
||||
if( flip_mode <= 0 )
|
||||
flipVert( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz );
|
||||
else
|
||||
flipHoriz( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz );
|
||||
|
||||
if( flip_mode < 0 )
|
||||
flipHoriz( dst.ptr(), dst.step, dst.ptr(), dst.step, dst.size(), esz );
|
||||
}
|
||||
|
||||
void rotate(InputArray _src, OutputArray _dst, int rotateMode)
|
||||
{
|
||||
CV_Assert(_src.dims() <= 2);
|
||||
|
||||
switch (rotateMode)
|
||||
{
|
||||
case ROTATE_90_CLOCKWISE:
|
||||
transpose(_src, _dst);
|
||||
flip(_dst, _dst, 1);
|
||||
break;
|
||||
case ROTATE_180:
|
||||
flip(_src, _dst, -1);
|
||||
break;
|
||||
case ROTATE_90_COUNTERCLOCKWISE:
|
||||
transpose(_src, _dst);
|
||||
flip(_dst, _dst, 0);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
@ -316,6 +316,7 @@ void _InputArray::getUMatVector(std::vector<UMat>& umv) const
|
||||
|
||||
cuda::GpuMat _InputArray::getGpuMat() const
|
||||
{
|
||||
#ifdef HAVE_CUDA
|
||||
_InputArray::KindFlag k = kind();
|
||||
|
||||
if (k == CUDA_GPU_MAT)
|
||||
@ -339,14 +340,22 @@ cuda::GpuMat _InputArray::getGpuMat() const
|
||||
return cuda::GpuMat();
|
||||
|
||||
CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for cuda::GpuMat and cuda::HostMem");
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
|
||||
#endif
|
||||
}
|
||||
void _InputArray::getGpuMatVector(std::vector<cuda::GpuMat>& gpumv) const
|
||||
{
|
||||
#ifdef HAVE_CUDA
|
||||
_InputArray::KindFlag k = kind();
|
||||
if (k == STD_VECTOR_CUDA_GPU_MAT)
|
||||
{
|
||||
gpumv = *(std::vector<cuda::GpuMat>*)obj;
|
||||
}
|
||||
#else
|
||||
CV_UNUSED(gpumv);
|
||||
CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
|
||||
#endif
|
||||
}
|
||||
ogl::Buffer _InputArray::getOGlBuffer() const
|
||||
{
|
||||
@ -453,11 +462,15 @@ Size _InputArray::size(int i) const
|
||||
|
||||
if (k == STD_VECTOR_CUDA_GPU_MAT)
|
||||
{
|
||||
#ifdef HAVE_CUDA
|
||||
const std::vector<cuda::GpuMat>& vv = *(const std::vector<cuda::GpuMat>*)obj;
|
||||
if (i < 0)
|
||||
return vv.empty() ? Size() : Size((int)vv.size(), 1);
|
||||
CV_Assert(i < (int)vv.size());
|
||||
return vv[i].size();
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
|
||||
#endif
|
||||
}
|
||||
|
||||
if( k == STD_VECTOR_UMAT )
|
||||
@ -792,6 +805,7 @@ int _InputArray::type(int i) const
|
||||
|
||||
if (k == STD_VECTOR_CUDA_GPU_MAT)
|
||||
{
|
||||
#ifdef HAVE_CUDA
|
||||
const std::vector<cuda::GpuMat>& vv = *(const std::vector<cuda::GpuMat>*)obj;
|
||||
if (vv.empty())
|
||||
{
|
||||
@ -800,6 +814,9 @@ int _InputArray::type(int i) const
|
||||
}
|
||||
CV_Assert(i < (int)vv.size());
|
||||
return vv[i >= 0 ? i : 0].type();
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
|
||||
#endif
|
||||
}
|
||||
|
||||
if( k == OPENGL_BUFFER )
|
||||
@ -1161,22 +1178,34 @@ void _OutputArray::create(Size _sz, int mtype, int i, bool allowTransposed, _Out
|
||||
{
|
||||
CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == _sz);
|
||||
CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
|
||||
#ifdef HAVE_CUDA
|
||||
((cuda::GpuMat*)obj)->create(_sz, mtype);
|
||||
return;
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
|
||||
#endif
|
||||
}
|
||||
if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
||||
{
|
||||
CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == _sz);
|
||||
CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype);
|
||||
#ifdef HAVE_OPENGL
|
||||
((ogl::Buffer*)obj)->create(_sz, mtype);
|
||||
return;
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)");
|
||||
#endif
|
||||
}
|
||||
if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
||||
{
|
||||
CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == _sz);
|
||||
CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype);
|
||||
#ifdef HAVE_CUDA
|
||||
((cuda::HostMem*)obj)->create(_sz, mtype);
|
||||
return;
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
|
||||
#endif
|
||||
}
|
||||
int sizes[] = {_sz.height, _sz.width};
|
||||
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
|
||||
@ -1203,22 +1232,34 @@ void _OutputArray::create(int _rows, int _cols, int mtype, int i, bool allowTran
|
||||
{
|
||||
CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(_cols, _rows));
|
||||
CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
|
||||
#ifdef HAVE_CUDA
|
||||
((cuda::GpuMat*)obj)->create(_rows, _cols, mtype);
|
||||
return;
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
|
||||
#endif
|
||||
}
|
||||
if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
||||
{
|
||||
CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == Size(_cols, _rows));
|
||||
CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype);
|
||||
#ifdef HAVE_OPENGL
|
||||
((ogl::Buffer*)obj)->create(_rows, _cols, mtype);
|
||||
return;
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)");
|
||||
#endif
|
||||
}
|
||||
if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
|
||||
{
|
||||
CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == Size(_cols, _rows));
|
||||
CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype);
|
||||
#ifdef HAVE_CUDA
|
||||
((cuda::HostMem*)obj)->create(_rows, _cols, mtype);
|
||||
return;
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
|
||||
#endif
|
||||
}
|
||||
int sizes[] = {_rows, _cols};
|
||||
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
|
||||
@ -1641,20 +1682,32 @@ void _OutputArray::release() const
|
||||
|
||||
if( k == CUDA_GPU_MAT )
|
||||
{
|
||||
#ifdef HAVE_CUDA
|
||||
((cuda::GpuMat*)obj)->release();
|
||||
return;
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
|
||||
#endif
|
||||
}
|
||||
|
||||
if( k == CUDA_HOST_MEM )
|
||||
{
|
||||
#ifdef HAVE_CUDA
|
||||
((cuda::HostMem*)obj)->release();
|
||||
return;
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
|
||||
#endif
|
||||
}
|
||||
|
||||
if( k == OPENGL_BUFFER )
|
||||
{
|
||||
#ifdef HAVE_OPENGL
|
||||
((ogl::Buffer*)obj)->release();
|
||||
return;
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)");
|
||||
#endif
|
||||
}
|
||||
|
||||
if( k == NONE )
|
||||
@ -1685,8 +1738,12 @@ void _OutputArray::release() const
|
||||
}
|
||||
if (k == STD_VECTOR_CUDA_GPU_MAT)
|
||||
{
|
||||
#ifdef HAVE_CUDA
|
||||
((std::vector<cuda::GpuMat>*)obj)->clear();
|
||||
return;
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
|
||||
#endif
|
||||
}
|
||||
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
|
||||
}
|
||||
@ -1794,9 +1851,13 @@ void _OutputArray::setTo(const _InputArray& arr, const _InputArray & mask) const
|
||||
((UMat*)obj)->setTo(arr, mask);
|
||||
else if( k == CUDA_GPU_MAT )
|
||||
{
|
||||
#ifdef HAVE_CUDA
|
||||
Mat value = arr.getMat();
|
||||
CV_Assert( checkScalar(value, type(), arr.kind(), _InputArray::CUDA_GPU_MAT) );
|
||||
((cuda::GpuMat*)obj)->setTo(Scalar(Vec<double, 4>(value.ptr<double>())), mask);
|
||||
#else
|
||||
CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
|
||||
#endif
|
||||
}
|
||||
else
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
|
@ -205,13 +205,10 @@ int normL1_(const uchar* a, const uchar* b, int n)
|
||||
return d;
|
||||
}
|
||||
|
||||
}} //cv::hal
|
||||
} //cv::hal
|
||||
|
||||
//==================================================================================================
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
template<typename T, typename ST> int
|
||||
normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
|
||||
{
|
||||
@ -594,12 +591,10 @@ static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
|
||||
CV_UNUSED(src); CV_UNUSED(normType); CV_UNUSED(mask); CV_UNUSED(result);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
} // ipp_norm()
|
||||
#endif // HAVE_IPP
|
||||
|
||||
} // cv::
|
||||
|
||||
double cv::norm( InputArray _src, int normType, InputArray _mask )
|
||||
double norm( InputArray _src, int normType, InputArray _mask )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
@ -792,9 +787,6 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
|
||||
//==================================================================================================
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace cv {
|
||||
|
||||
static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result )
|
||||
{
|
||||
#ifdef __ANDROID__
|
||||
@ -849,15 +841,10 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr
|
||||
result /= (s2 + DBL_EPSILON);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
} // ocl_norm()
|
||||
#endif // HAVE_OPENCL
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
namespace cv
|
||||
{
|
||||
static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask, double &result)
|
||||
{
|
||||
CV_INSTRUMENT_REGION_IPP();
|
||||
@ -1083,12 +1070,11 @@ static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArra
|
||||
CV_UNUSED(_src1); CV_UNUSED(_src2); CV_UNUSED(normType); CV_UNUSED(_mask); CV_UNUSED(result);
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} // ipp_norm
|
||||
#endif // HAVE_IPP
|
||||
|
||||
|
||||
double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
|
||||
double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
@ -1280,12 +1266,12 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
|
||||
return result.d;
|
||||
}
|
||||
|
||||
cv::Hamming::ResultType cv::Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const
|
||||
cv::Hamming::ResultType Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const
|
||||
{
|
||||
return cv::hal::normHamming(a, b, size);
|
||||
}
|
||||
|
||||
double cv::PSNR(InputArray _src1, InputArray _src2, double R)
|
||||
double PSNR(InputArray _src1, InputArray _src2, double R)
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
@ -1295,3 +1281,141 @@ double cv::PSNR(InputArray _src1, InputArray _src2, double R)
|
||||
double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels()));
|
||||
return 20*log10(R/(diff+DBL_EPSILON));
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype,
|
||||
double scale, double delta )
|
||||
{
|
||||
UMat src = _src.getUMat();
|
||||
|
||||
if( _mask.empty() )
|
||||
src.convertTo( _dst, dtype, scale, delta );
|
||||
else if (src.channels() <= 4)
|
||||
{
|
||||
const ocl::Device & dev = ocl::Device::getDefault();
|
||||
|
||||
int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype),
|
||||
ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, std::max(sdepth, ddepth)),
|
||||
rowsPerWI = dev.isIntel() ? 4 : 1;
|
||||
|
||||
float fscale = static_cast<float>(scale), fdelta = static_cast<float>(delta);
|
||||
bool haveScale = std::fabs(scale - 1) > DBL_EPSILON,
|
||||
haveZeroScale = !(std::fabs(scale) > DBL_EPSILON),
|
||||
haveDelta = std::fabs(delta) > DBL_EPSILON,
|
||||
doubleSupport = dev.doubleFPConfig() > 0;
|
||||
|
||||
if (!haveScale && !haveDelta && stype == dtype)
|
||||
{
|
||||
_src.copyTo(_dst, _mask);
|
||||
return true;
|
||||
}
|
||||
if (haveZeroScale)
|
||||
{
|
||||
_dst.setTo(Scalar(delta), _mask);
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport)
|
||||
return false;
|
||||
|
||||
char cvt[2][40];
|
||||
String opts = format("-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d"
|
||||
" -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s",
|
||||
ocl::typeToStr(stype), ocl::typeToStr(dtype),
|
||||
ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), cn,
|
||||
rowsPerWI, ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]),
|
||||
ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
||||
haveScale ? " -D HAVE_SCALE" : "",
|
||||
haveDelta ? " -D HAVE_DELTA" : "",
|
||||
ocl::typeToStr(sdepth), ocl::typeToStr(ddepth));
|
||||
|
||||
ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts);
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
UMat mask = _mask.getUMat(), dst = _dst.getUMat();
|
||||
|
||||
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
|
||||
maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
|
||||
dstarg = ocl::KernelArg::ReadWrite(dst);
|
||||
|
||||
if (haveScale)
|
||||
{
|
||||
if (haveDelta)
|
||||
k.args(srcarg, maskarg, dstarg, fscale, fdelta);
|
||||
else
|
||||
k.args(srcarg, maskarg, dstarg, fscale);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (haveDelta)
|
||||
k.args(srcarg, maskarg, dstarg, fdelta);
|
||||
else
|
||||
k.args(srcarg, maskarg, dstarg);
|
||||
}
|
||||
|
||||
size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
UMat temp;
|
||||
src.convertTo( temp, dtype, scale, delta );
|
||||
temp.copyTo( _dst, _mask );
|
||||
}
|
||||
|
||||
return true;
|
||||
} // ocl_normalize
|
||||
#endif // HAVE_OPENCL
|
||||
|
||||
void normalize(InputArray _src, InputOutputArray _dst, double a, double b,
|
||||
int norm_type, int rtype, InputArray _mask)
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
double scale = 1, shift = 0;
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type);
|
||||
|
||||
if( rtype < 0 )
|
||||
rtype = _dst.fixedType() ? _dst.depth() : depth;
|
||||
|
||||
if( norm_type == CV_MINMAX )
|
||||
{
|
||||
double smin = 0, smax = 0;
|
||||
double dmin = MIN( a, b ), dmax = MAX( a, b );
|
||||
minMaxIdx( _src, &smin, &smax, 0, 0, _mask );
|
||||
scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0);
|
||||
if( rtype == CV_32F )
|
||||
{
|
||||
scale = (float)scale;
|
||||
shift = (float)dmin - (float)(smin*scale);
|
||||
}
|
||||
else
|
||||
shift = dmin - smin*scale;
|
||||
}
|
||||
else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C )
|
||||
{
|
||||
scale = norm( _src, norm_type, _mask );
|
||||
scale = scale > DBL_EPSILON ? a/scale : 0.;
|
||||
shift = 0;
|
||||
}
|
||||
else
|
||||
CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" );
|
||||
|
||||
CV_OCL_RUN(_dst.isUMat(),
|
||||
ocl_normalize(_src, _dst, _mask, rtype, scale, shift))
|
||||
|
||||
Mat src = _src.getMat();
|
||||
if( _mask.empty() )
|
||||
src.convertTo( _dst, rtype, scale, shift );
|
||||
else
|
||||
{
|
||||
Mat temp;
|
||||
src.convertTo( temp, rtype, scale, shift );
|
||||
temp.copyTo( _dst, _mask );
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -750,6 +750,9 @@ void cv::randShuffle( InputOutputArray _dst, double iterFactor, RNG* _rng )
|
||||
func( dst, rng, iterFactor );
|
||||
}
|
||||
|
||||
|
||||
#ifndef OPENCV_EXCLUDE_C_API
|
||||
|
||||
CV_IMPL void
|
||||
cvRandArr( CvRNG* _rng, CvArr* arr, int disttype, CvScalar param1, CvScalar param2 )
|
||||
{
|
||||
@ -767,6 +770,9 @@ CV_IMPL void cvRandShuffle( CvArr* arr, CvRNG* _rng, double iter_factor )
|
||||
cv::randShuffle( dst, iter_factor, &rng );
|
||||
}
|
||||
|
||||
#endif // OPENCV_EXCLUDE_C_API
|
||||
|
||||
|
||||
// Mersenne Twister random number generator.
|
||||
// Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifndef OPENCV_EXCLUDE_C_API
|
||||
|
||||
CV_IMPL CvScalar cvSum( const CvArr* srcarr )
|
||||
{
|
||||
cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1));
|
||||
@ -117,3 +119,5 @@ cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
|
||||
|
||||
return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);
|
||||
}
|
||||
|
||||
#endif // OPENCV_EXCLUDE_C_API
|
||||
|
@ -1318,88 +1318,6 @@ UMat UMat::t() const
|
||||
return m;
|
||||
}
|
||||
|
||||
UMat UMat::inv(int method) const
|
||||
{
|
||||
UMat m;
|
||||
invert(*this, m, method);
|
||||
return m;
|
||||
}
|
||||
|
||||
UMat UMat::mul(InputArray m, double scale) const
|
||||
{
|
||||
UMat dst;
|
||||
multiply(*this, m, dst, scale);
|
||||
return dst;
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
|
||||
{
|
||||
UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1);
|
||||
|
||||
int type = src1.type(), depth = CV_MAT_DEPTH(type),
|
||||
kercn = ocl::predictOptimalVectorWidth(src1, src2);
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
|
||||
if ( !doubleSupport && depth == CV_64F )
|
||||
return false;
|
||||
|
||||
int dbsize = ocl::Device::getDefault().maxComputeUnits();
|
||||
size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
|
||||
int ddepth = std::max(CV_32F, depth);
|
||||
|
||||
int wgs2_aligned = 1;
|
||||
while (wgs2_aligned < (int)wgs)
|
||||
wgs2_aligned <<= 1;
|
||||
wgs2_aligned >>= 1;
|
||||
|
||||
char cvt[40];
|
||||
ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
|
||||
format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT "
|
||||
"-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d",
|
||||
ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth),
|
||||
ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
|
||||
ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt),
|
||||
(int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
||||
_src1.isContinuous() ? " -D HAVE_SRC_CONT" : "",
|
||||
_src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
UMat db(1, dbsize, ddepth);
|
||||
|
||||
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
|
||||
src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
|
||||
dbarg = ocl::KernelArg::PtrWriteOnly(db);
|
||||
|
||||
k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg);
|
||||
|
||||
size_t globalsize = dbsize * wgs;
|
||||
if (k.run(1, &globalsize, &wgs, true))
|
||||
{
|
||||
res = sum(db.getMat(ACCESS_READ))[0];
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
double UMat::dot(InputArray m) const
|
||||
{
|
||||
CV_INSTRUMENT_REGION();
|
||||
|
||||
CV_Assert(m.sameSize(*this) && m.type() == type());
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
double r = 0;
|
||||
CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r)
|
||||
#endif
|
||||
|
||||
return getMat(ACCESS_READ).dot(m);
|
||||
}
|
||||
|
||||
UMat UMat::zeros(int rows, int cols, int type)
|
||||
{
|
||||
return UMat(rows, cols, type, Scalar::all(0));
|
||||
@ -1430,18 +1348,6 @@ UMat UMat::ones(int ndims, const int* sz, int type)
|
||||
return UMat(ndims, sz, type, Scalar(1));
|
||||
}
|
||||
|
||||
UMat UMat::eye(int rows, int cols, int type)
|
||||
{
|
||||
return UMat::eye(Size(cols, rows), type);
|
||||
}
|
||||
|
||||
UMat UMat::eye(Size size, int type)
|
||||
{
|
||||
UMat m(size, type);
|
||||
setIdentity(m);
|
||||
return m;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* End of file. */
|
||||
|
Loading…
Reference in New Issue
Block a user