/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. // Copyright (C) 2014-2015, Itseez Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include "precomp.hpp" #include "opencl_kernels_core.hpp" namespace cv { typedef void (*MathFunc)(const void* src, void* dst, int len); static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI); static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI); static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI); static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI); #ifdef HAVE_OPENCL enum { OCL_OP_LOG=0, OCL_OP_EXP=1, OCL_OP_MAG=2, OCL_OP_PHASE_DEGREES=3, OCL_OP_PHASE_RADIANS=4 }; static const char* oclop2str[] = { "OP_LOG", "OP_EXP", "OP_MAG", "OP_PHASE_DEGREES", "OP_PHASE_RADIANS", 0 }; static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, int oclop) { int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int kercn = oclop == OCL_OP_PHASE_DEGREES || oclop == OCL_OP_PHASE_RADIANS ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst); const ocl::Device d = ocl::Device::getDefault(); bool double_support = d.doubleFPConfig() > 0; if (!double_support && depth == CV_64F) return false; int rowsPerWI = d.isIntel() ? 4 : 1; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, format("-D %s -D %s -D dstT=%s -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP", oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), rowsPerWI, double_support ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(); _dst.create(src1.size(), type); UMat dst = _dst.getUMat(); ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1), src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn); if (src2.empty()) k.args(src1arg, dstarg); else k.args(src1arg, src2arg, dstarg); size_t globalsize[] = { src1.cols * cn / kercn, (src1.rows + rowsPerWI - 1) / rowsPerWI }; return k.run(2, globalsize, 0, false); } #endif float fastAtan2( float y, float x ) { float ax = std::abs(x), ay = std::abs(y); float a, c, c2; if( ax >= ay ) { c = ay/(ax + (float)DBL_EPSILON); c2 = c*c; a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c; } else { c = ax/(ay + (float)DBL_EPSILON); c2 = c*c; a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c; } if( x < 0 ) a = 180.f - a; if( y < 0 ) a = 360.f - a; return a; } /* ************************************************************************** *\ Fast cube root by Ken Turkowski (http://www.worldserver.com/turk/computergraphics/papers.html) \* ************************************************************************** */ float cubeRoot( float value ) { float fr; Cv32suf v, m; int ix, s; int ex, shx; v.f = value; ix = v.i & 0x7fffffff; s = v.i & 0x80000000; ex = (ix >> 23) - 127; shx = ex % 3; shx -= shx >= 0 ? 3 : 0; ex = (ex - shx) / 3; /* exponent of cube root */ v.i = (ix & ((1<<23)-1)) | ((shx + 127)<<23); fr = v.f; /* 0.125 <= fr < 1.0 */ /* Use quartic rational polynomial with error < 2^(-24) */ fr = (float)(((((45.2548339756803022511987494 * fr + 192.2798368355061050458134625) * fr + 119.1654824285581628956914143) * fr + 13.43250139086239872172837314) * fr + 0.1636161226585754240958355063)/ ((((14.80884093219134573786480845 * fr + 151.9714051044435648658557668) * fr + 168.5254414101568283957668343) * fr + 33.9905941350215598754191872) * fr + 1.0)); /* fr *= 2^ex * sign */ m.f = value; v.f = fr; v.i = (v.i + (ex << 23) + s) & (m.i*2 != 0 ? -1 : 0); return v.f; } /****************************************************************************************\ * Cartezian -> Polar * \****************************************************************************************/ void magnitude( InputArray src1, InputArray src2, OutputArray dst ) { int type = src1.type(), depth = src1.depth(), cn = src1.channels(); CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F)); CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2, ocl_math_op(src1, src2, dst, OCL_OP_MAG)) Mat X = src1.getMat(), Y = src2.getMat(); dst.create(X.dims, X.size, X.type()); Mat Mag = dst.getMat(); const Mat* arrays[] = {&X, &Y, &Mag, 0}; uchar* ptrs[3]; NAryMatIterator it(arrays, ptrs); int len = (int)it.size*cn; for( size_t i = 0; i < it.nplanes; i++, ++it ) { if( depth == CV_32F ) { const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1]; float *mag = (float*)ptrs[2]; hal::magnitude( x, y, mag, len ); } else { const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1]; double *mag = (double*)ptrs[2]; hal::magnitude( x, y, mag, len ); } } } void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegrees ) { int type = src1.type(), depth = src1.depth(), cn = src1.channels(); CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F)); CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2, ocl_math_op(src1, src2, dst, angleInDegrees ? OCL_OP_PHASE_DEGREES : OCL_OP_PHASE_RADIANS)) Mat X = src1.getMat(), Y = src2.getMat(); dst.create( X.dims, X.size, type ); Mat Angle = dst.getMat(); const Mat* arrays[] = {&X, &Y, &Angle, 0}; uchar* ptrs[3]; NAryMatIterator it(arrays, ptrs); cv::AutoBuffer _buf; float* buf[2] = {0, 0}; int j, k, total = (int)(it.size*cn), blockSize = total; size_t esz1 = X.elemSize1(); if( depth == CV_64F ) { blockSize = std::min(blockSize, ((BLOCK_SIZE+cn-1)/cn)*cn); _buf.allocate(blockSize*2); buf[0] = _buf; buf[1] = buf[0] + blockSize; } for( size_t i = 0; i < it.nplanes; i++, ++it ) { for( j = 0; j < total; j += blockSize ) { int len = std::min(total - j, blockSize); if( depth == CV_32F ) { const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1]; float *angle = (float*)ptrs[2]; hal::fastAtan2( y, x, angle, len, angleInDegrees ); } else { const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1]; double *angle = (double*)ptrs[2]; k = 0; #if CV_SSE2 if (USE_SSE2) { for ( ; k <= len - 4; k += 4) { __m128 v_dst0 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(x + k)), _mm_cvtpd_ps(_mm_loadu_pd(x + k + 2))); __m128 v_dst1 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(y + k)), _mm_cvtpd_ps(_mm_loadu_pd(y + k + 2))); _mm_storeu_ps(buf[0] + k, v_dst0); _mm_storeu_ps(buf[1] + k, v_dst1); } } #endif for( ; k < len; k++ ) { buf[0][k] = (float)x[k]; buf[1][k] = (float)y[k]; } hal::fastAtan2( buf[1], buf[0], buf[0], len, angleInDegrees ); k = 0; #if CV_SSE2 if (USE_SSE2) { for ( ; k <= len - 4; k += 4) { __m128 v_src = _mm_loadu_ps(buf[0] + k); _mm_storeu_pd(angle + k, _mm_cvtps_pd(v_src)); _mm_storeu_pd(angle + k + 2, _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8)))); } } #endif for( ; k < len; k++ ) angle[k] = buf[0][k]; } ptrs[0] += len*esz1; ptrs[1] += len*esz1; ptrs[2] += len*esz1; } } } #ifdef HAVE_OPENCL static bool ocl_cartToPolar( InputArray _src1, InputArray _src2, OutputArray _dst1, OutputArray _dst2, bool angleInDegrees ) { const ocl::Device & d = ocl::Device::getDefault(); int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), rowsPerWI = d.isIntel() ? 4 : 1; bool doubleSupport = d.doubleFPConfig() > 0; if ( !(_src1.dims() <= 2 && _src2.dims() <= 2 && (depth == CV_32F || depth == CV_64F) && type == _src2.type()) || (depth == CV_64F && !doubleSupport) ) return false; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, format("-D BINARY_OP -D dstT=%s -D depth=%d -D rowsPerWI=%d -D OP_CTP_%s%s", ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), depth, rowsPerWI, angleInDegrees ? "AD" : "AR", doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(); Size size = src1.size(); CV_Assert( size == src2.size() ); _dst1.create(size, type); _dst2.create(size, type); UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat(); k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2), ocl::KernelArg::WriteOnly(dst1, cn), ocl::KernelArg::WriteOnlyNoSize(dst2)); size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI }; return k.run(2, globalsize, NULL, false); } #endif void cartToPolar( InputArray src1, InputArray src2, OutputArray dst1, OutputArray dst2, bool angleInDegrees ) { CV_OCL_RUN(dst1.isUMat() && dst2.isUMat(), ocl_cartToPolar(src1, src2, dst1, dst2, angleInDegrees)) Mat X = src1.getMat(), Y = src2.getMat(); int type = X.type(), depth = X.depth(), cn = X.channels(); CV_Assert( X.size == Y.size && type == Y.type() && (depth == CV_32F || depth == CV_64F)); dst1.create( X.dims, X.size, type ); dst2.create( X.dims, X.size, type ); Mat Mag = dst1.getMat(), Angle = dst2.getMat(); const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0}; uchar* ptrs[4]; NAryMatIterator it(arrays, ptrs); cv::AutoBuffer _buf; float* buf[2] = {0, 0}; int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn); size_t esz1 = X.elemSize1(); if( depth == CV_64F ) { _buf.allocate(blockSize*2); buf[0] = _buf; buf[1] = buf[0] + blockSize; } for( size_t i = 0; i < it.nplanes; i++, ++it ) { for( j = 0; j < total; j += blockSize ) { int len = std::min(total - j, blockSize); if( depth == CV_32F ) { const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1]; float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3]; hal::magnitude( x, y, mag, len ); hal::fastAtan2( y, x, angle, len, angleInDegrees ); } else { const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1]; double *angle = (double*)ptrs[3]; hal::magnitude(x, y, (double*)ptrs[2], len); k = 0; #if CV_SSE2 if (USE_SSE2) { for ( ; k <= len - 4; k += 4) { __m128 v_dst0 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(x + k)), _mm_cvtpd_ps(_mm_loadu_pd(x + k + 2))); __m128 v_dst1 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(y + k)), _mm_cvtpd_ps(_mm_loadu_pd(y + k + 2))); _mm_storeu_ps(buf[0] + k, v_dst0); _mm_storeu_ps(buf[1] + k, v_dst1); } } #endif for( ; k < len; k++ ) { buf[0][k] = (float)x[k]; buf[1][k] = (float)y[k]; } hal::fastAtan2( buf[1], buf[0], buf[0], len, angleInDegrees ); k = 0; #if CV_SSE2 if (USE_SSE2) { for ( ; k <= len - 4; k += 4) { __m128 v_src = _mm_loadu_ps(buf[0] + k); _mm_storeu_pd(angle + k, _mm_cvtps_pd(v_src)); _mm_storeu_pd(angle + k + 2, _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8)))); } } #endif for( ; k < len; k++ ) angle[k] = buf[0][k]; } ptrs[0] += len*esz1; ptrs[1] += len*esz1; ptrs[2] += len*esz1; ptrs[3] += len*esz1; } } } /****************************************************************************************\ * Polar -> Cartezian * \****************************************************************************************/ static void SinCos_32f( const float *angle, float *sinval, float* cosval, int len, int angle_in_degrees ) { const int N = 64; static const double sin_table[] = { 0.00000000000000000000, 0.09801714032956060400, 0.19509032201612825000, 0.29028467725446233000, 0.38268343236508978000, 0.47139673682599764000, 0.55557023301960218000, 0.63439328416364549000, 0.70710678118654746000, 0.77301045336273699000, 0.83146961230254524000, 0.88192126434835494000, 0.92387953251128674000, 0.95694033573220894000, 0.98078528040323043000, 0.99518472667219682000, 1.00000000000000000000, 0.99518472667219693000, 0.98078528040323043000, 0.95694033573220894000, 0.92387953251128674000, 0.88192126434835505000, 0.83146961230254546000, 0.77301045336273710000, 0.70710678118654757000, 0.63439328416364549000, 0.55557023301960218000, 0.47139673682599786000, 0.38268343236508989000, 0.29028467725446239000, 0.19509032201612861000, 0.09801714032956082600, 0.00000000000000012246, -0.09801714032956059000, -0.19509032201612836000, -0.29028467725446211000, -0.38268343236508967000, -0.47139673682599764000, -0.55557023301960196000, -0.63439328416364527000, -0.70710678118654746000, -0.77301045336273666000, -0.83146961230254524000, -0.88192126434835494000, -0.92387953251128652000, -0.95694033573220882000, -0.98078528040323032000, -0.99518472667219693000, -1.00000000000000000000, -0.99518472667219693000, -0.98078528040323043000, -0.95694033573220894000, -0.92387953251128663000, -0.88192126434835505000, -0.83146961230254546000, -0.77301045336273688000, -0.70710678118654768000, -0.63439328416364593000, -0.55557023301960218000, -0.47139673682599792000, -0.38268343236509039000, -0.29028467725446250000, -0.19509032201612872000, -0.09801714032956050600, }; static const double k2 = (2*CV_PI)/N; static const double sin_a0 = -0.166630293345647*k2*k2*k2; static const double sin_a2 = k2; static const double cos_a0 = -0.499818138450326*k2*k2; /*static const double cos_a2 = 1;*/ double k1; int i = 0; if( !angle_in_degrees ) k1 = N/(2*CV_PI); else k1 = N/360.; #if CV_AVX2 if (USE_AVX2) { __m128d v_k1 = _mm_set1_pd(k1); __m128d v_1 = _mm_set1_pd(1); __m128i v_N1 = _mm_set1_epi32(N - 1); __m128i v_N4 = _mm_set1_epi32(N >> 2); __m128d v_sin_a0 = _mm_set1_pd(sin_a0); __m128d v_sin_a2 = _mm_set1_pd(sin_a2); __m128d v_cos_a0 = _mm_set1_pd(cos_a0); for ( ; i <= len - 4; i += 4) { __m128 v_angle = _mm_loadu_ps(angle + i); // 0-1 __m128d v_t = _mm_mul_pd(_mm_cvtps_pd(v_angle), v_k1); __m128i v_it = _mm_cvtpd_epi32(v_t); v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it)); __m128i v_sin_idx = _mm_and_si128(v_it, v_N1); __m128i v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1); __m128d v_t2 = _mm_mul_pd(v_t, v_t); __m128d v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t); __m128d v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1); __m128d v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8); __m128d v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8); __m128d v_sin_val_0 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b), _mm_mul_pd(v_cos_a, v_sin_b)); __m128d v_cos_val_0 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b), _mm_mul_pd(v_sin_a, v_sin_b)); // 2-3 v_t = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_angle), 8))), v_k1); v_it = _mm_cvtpd_epi32(v_t); v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it)); v_sin_idx = _mm_and_si128(v_it, v_N1); v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1); v_t2 = _mm_mul_pd(v_t, v_t); v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t); v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1); v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8); v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8); __m128d v_sin_val_1 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b), _mm_mul_pd(v_cos_a, v_sin_b)); __m128d v_cos_val_1 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b), _mm_mul_pd(v_sin_a, v_sin_b)); _mm_storeu_ps(sinval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_sin_val_0), _mm_cvtpd_ps(v_sin_val_1))); _mm_storeu_ps(cosval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_cos_val_0), _mm_cvtpd_ps(v_cos_val_1))); } } #endif for( ; i < len; i++ ) { double t = angle[i]*k1; int it = cvRound(t); t -= it; int sin_idx = it & (N - 1); int cos_idx = (N/4 - sin_idx) & (N - 1); double sin_b = (sin_a0*t*t + sin_a2)*t; double cos_b = cos_a0*t*t + 1; double sin_a = sin_table[sin_idx]; double cos_a = sin_table[cos_idx]; double sin_val = sin_a*cos_b + cos_a*sin_b; double cos_val = cos_a*cos_b - sin_a*sin_b; sinval[i] = (float)sin_val; cosval[i] = (float)cos_val; } } #ifdef HAVE_OPENCL static bool ocl_polarToCart( InputArray _mag, InputArray _angle, OutputArray _dst1, OutputArray _dst2, bool angleInDegrees ) { const ocl::Device & d = ocl::Device::getDefault(); int type = _angle.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), rowsPerWI = d.isIntel() ? 4 : 1; bool doubleSupport = d.doubleFPConfig() > 0; if ( !doubleSupport && depth == CV_64F ) return false; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, format("-D dstT=%s -D rowsPerWI=%d -D depth=%d -D BINARY_OP -D OP_PTC_%s%s", ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), rowsPerWI, depth, angleInDegrees ? "AD" : "AR", doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; UMat mag = _mag.getUMat(), angle = _angle.getUMat(); Size size = angle.size(); CV_Assert(mag.size() == size); _dst1.create(size, type); _dst2.create(size, type); UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat(); k.args(ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::ReadOnlyNoSize(angle), ocl::KernelArg::WriteOnly(dst1, cn), ocl::KernelArg::WriteOnlyNoSize(dst2)); size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI }; return k.run(2, globalsize, NULL, false); } #endif void polarToCart( InputArray src1, InputArray src2, OutputArray dst1, OutputArray dst2, bool angleInDegrees ) { int type = src2.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); CV_Assert((depth == CV_32F || depth == CV_64F) && (src1.empty() || src1.type() == type)); CV_OCL_RUN(!src1.empty() && src2.dims() <= 2 && dst1.isUMat() && dst2.isUMat(), ocl_polarToCart(src1, src2, dst1, dst2, angleInDegrees)) Mat Mag = src1.getMat(), Angle = src2.getMat(); CV_Assert( Mag.empty() || Angle.size == Mag.size); dst1.create( Angle.dims, Angle.size, type ); dst2.create( Angle.dims, Angle.size, type ); Mat X = dst1.getMat(), Y = dst2.getMat(); #if defined(HAVE_IPP) CV_IPP_CHECK() { if (Mag.isContinuous() && Angle.isContinuous() && X.isContinuous() && Y.isContinuous() && !angleInDegrees) { typedef IppStatus (CV_STDCALL * ippsPolarToCart)(const void * pSrcMagn, const void * pSrcPhase, void * pDstRe, void * pDstIm, int len); ippsPolarToCart ippFunc = depth == CV_32F ? (ippsPolarToCart)ippsPolarToCart_32f : depth == CV_64F ? (ippsPolarToCart)ippsPolarToCart_64f : 0; CV_Assert(ippFunc != 0); IppStatus status = ippFunc(Mag.ptr(), Angle.ptr(), X.ptr(), Y.ptr(), static_cast(cn * X.total())); if (status >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); } } #endif const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0}; uchar* ptrs[4]; NAryMatIterator it(arrays, ptrs); cv::AutoBuffer _buf; float* buf[2] = {0, 0}; int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn); size_t esz1 = Angle.elemSize1(); if( depth == CV_64F ) { _buf.allocate(blockSize*2); buf[0] = _buf; buf[1] = buf[0] + blockSize; } for( size_t i = 0; i < it.nplanes; i++, ++it ) { for( j = 0; j < total; j += blockSize ) { int len = std::min(total - j, blockSize); if( depth == CV_32F ) { const float *mag = (const float*)ptrs[0], *angle = (const float*)ptrs[1]; float *x = (float*)ptrs[2], *y = (float*)ptrs[3]; SinCos_32f( angle, y, x, len, angleInDegrees ); if( mag ) { k = 0; #if CV_NEON for( ; k <= len - 4; k += 4 ) { float32x4_t v_m = vld1q_f32(mag + k); vst1q_f32(x + k, vmulq_f32(vld1q_f32(x + k), v_m)); vst1q_f32(y + k, vmulq_f32(vld1q_f32(y + k), v_m)); } #elif CV_SSE2 if (USE_SSE2) { for( ; k <= len - 4; k += 4 ) { __m128 v_m = _mm_loadu_ps(mag + k); _mm_storeu_ps(x + k, _mm_mul_ps(_mm_loadu_ps(x + k), v_m)); _mm_storeu_ps(y + k, _mm_mul_ps(_mm_loadu_ps(y + k), v_m)); } } #endif for( ; k < len; k++ ) { float m = mag[k]; x[k] *= m; y[k] *= m; } } } else { const double *mag = (const double*)ptrs[0], *angle = (const double*)ptrs[1]; double *x = (double*)ptrs[2], *y = (double*)ptrs[3]; for( k = 0; k < len; k++ ) buf[0][k] = (float)angle[k]; SinCos_32f( buf[0], buf[1], buf[0], len, angleInDegrees ); if( mag ) for( k = 0; k < len; k++ ) { double m = mag[k]; x[k] = buf[0][k]*m; y[k] = buf[1][k]*m; } else { std::memcpy(x, buf[0], sizeof(float) * len); std::memcpy(y, buf[1], sizeof(float) * len); } } if( ptrs[0] ) ptrs[0] += len*esz1; ptrs[1] += len*esz1; ptrs[2] += len*esz1; ptrs[3] += len*esz1; } } } /****************************************************************************************\ * E X P * \****************************************************************************************/ #ifdef HAVE_IPP static void Exp_32f_ipp(const float *x, float *y, int n) { CV_IPP_CHECK() { if (0 <= ippsExp_32f_A21(x, y, n)) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); } hal::exp(x, y, n); } static void Exp_64f_ipp(const double *x, double *y, int n) { CV_IPP_CHECK() { if (0 <= ippsExp_64f_A50(x, y, n)) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); } hal::exp(x, y, n); } #define Exp_32f Exp_32f_ipp #define Exp_64f Exp_64f_ipp #else #define Exp_32f hal::exp #define Exp_64f hal::exp #endif void exp( InputArray _src, OutputArray _dst ) { int type = _src.type(), depth = _src.depth(), cn = _src.channels(); CV_Assert( depth == CV_32F || depth == CV_64F ); CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2, ocl_math_op(_src, noArray(), _dst, OCL_OP_EXP)) Mat src = _src.getMat(); _dst.create( src.dims, src.size, type ); Mat dst = _dst.getMat(); const Mat* arrays[] = {&src, &dst, 0}; uchar* ptrs[2]; NAryMatIterator it(arrays, ptrs); int len = (int)(it.size*cn); for( size_t i = 0; i < it.nplanes; i++, ++it ) { if( depth == CV_32F ) Exp_32f((const float*)ptrs[0], (float*)ptrs[1], len); else Exp_64f((const double*)ptrs[0], (double*)ptrs[1], len); } } /****************************************************************************************\ * L O G * \****************************************************************************************/ #ifdef HAVE_IPP static void Log_32f_ipp(const float *x, float *y, int n) { CV_IPP_CHECK() { if (0 <= ippsLn_32f_A21(x, y, n)) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); } hal::log(x, y, n); } static void Log_64f_ipp(const double *x, double *y, int n) { CV_IPP_CHECK() { if (0 <= ippsLn_64f_A50(x, y, n)) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); } hal::log(x, y, n); } #define Log_32f Log_32f_ipp #define Log_64f Log_64f_ipp #else #define Log_32f hal::log #define Log_64f hal::log #endif void log( InputArray _src, OutputArray _dst ) { int type = _src.type(), depth = _src.depth(), cn = _src.channels(); CV_Assert( depth == CV_32F || depth == CV_64F ); CV_OCL_RUN( _dst.isUMat() && _src.dims() <= 2, ocl_math_op(_src, noArray(), _dst, OCL_OP_LOG)) Mat src = _src.getMat(); _dst.create( src.dims, src.size, type ); Mat dst = _dst.getMat(); const Mat* arrays[] = {&src, &dst, 0}; uchar* ptrs[2]; NAryMatIterator it(arrays, ptrs); int len = (int)(it.size*cn); for( size_t i = 0; i < it.nplanes; i++, ++it ) { if( depth == CV_32F ) Log_32f( (const float*)ptrs[0], (float*)ptrs[1], len ); else Log_64f( (const double*)ptrs[0], (double*)ptrs[1], len ); } } /****************************************************************************************\ * P O W E R * \****************************************************************************************/ template struct iPow_SIMD { int operator() ( const T *, T *, int, int) { return 0; } }; #if CV_NEON template <> struct iPow_SIMD { int operator() ( const uchar * src, uchar * dst, int len, int power) { int i = 0; uint32x4_t v_1 = vdupq_n_u32(1u); for ( ; i <= len - 8; i += 8) { uint32x4_t v_a1 = v_1, v_a2 = v_1; uint16x8_t v_src = vmovl_u8(vld1_u8(src + i)); uint32x4_t v_b1 = vmovl_u16(vget_low_u16(v_src)), v_b2 = vmovl_u16(vget_high_u16(v_src)); int p = power; while( p > 1 ) { if (p & 1) { v_a1 = vmulq_u32(v_a1, v_b1); v_a2 = vmulq_u32(v_a2, v_b2); } v_b1 = vmulq_u32(v_b1, v_b1); v_b2 = vmulq_u32(v_b2, v_b2); p >>= 1; } v_a1 = vmulq_u32(v_a1, v_b1); v_a2 = vmulq_u32(v_a2, v_b2); vst1_u8(dst + i, vqmovn_u16(vcombine_u16(vqmovn_u32(v_a1), vqmovn_u32(v_a2)))); } return i; } }; template <> struct iPow_SIMD { int operator() ( const schar * src, schar * dst, int len, int power) { int i = 0; int32x4_t v_1 = vdupq_n_s32(1); for ( ; i <= len - 8; i += 8) { int32x4_t v_a1 = v_1, v_a2 = v_1; int16x8_t v_src = vmovl_s8(vld1_s8(src + i)); int32x4_t v_b1 = vmovl_s16(vget_low_s16(v_src)), v_b2 = vmovl_s16(vget_high_s16(v_src)); int p = power; while( p > 1 ) { if (p & 1) { v_a1 = vmulq_s32(v_a1, v_b1); v_a2 = vmulq_s32(v_a2, v_b2); } v_b1 = vmulq_s32(v_b1, v_b1); v_b2 = vmulq_s32(v_b2, v_b2); p >>= 1; } v_a1 = vmulq_s32(v_a1, v_b1); v_a2 = vmulq_s32(v_a2, v_b2); vst1_s8(dst + i, vqmovn_s16(vcombine_s16(vqmovn_s32(v_a1), vqmovn_s32(v_a2)))); } return i; } }; template <> struct iPow_SIMD { int operator() ( const ushort * src, ushort * dst, int len, int power) { int i = 0; uint32x4_t v_1 = vdupq_n_u32(1u); for ( ; i <= len - 8; i += 8) { uint32x4_t v_a1 = v_1, v_a2 = v_1; uint16x8_t v_src = vld1q_u16(src + i); uint32x4_t v_b1 = vmovl_u16(vget_low_u16(v_src)), v_b2 = vmovl_u16(vget_high_u16(v_src)); int p = power; while( p > 1 ) { if (p & 1) { v_a1 = vmulq_u32(v_a1, v_b1); v_a2 = vmulq_u32(v_a2, v_b2); } v_b1 = vmulq_u32(v_b1, v_b1); v_b2 = vmulq_u32(v_b2, v_b2); p >>= 1; } v_a1 = vmulq_u32(v_a1, v_b1); v_a2 = vmulq_u32(v_a2, v_b2); vst1q_u16(dst + i, vcombine_u16(vqmovn_u32(v_a1), vqmovn_u32(v_a2))); } return i; } }; template <> struct iPow_SIMD { int operator() ( const short * src, short * dst, int len, int power) { int i = 0; int32x4_t v_1 = vdupq_n_s32(1); for ( ; i <= len - 8; i += 8) { int32x4_t v_a1 = v_1, v_a2 = v_1; int16x8_t v_src = vld1q_s16(src + i); int32x4_t v_b1 = vmovl_s16(vget_low_s16(v_src)), v_b2 = vmovl_s16(vget_high_s16(v_src)); int p = power; while( p > 1 ) { if (p & 1) { v_a1 = vmulq_s32(v_a1, v_b1); v_a2 = vmulq_s32(v_a2, v_b2); } v_b1 = vmulq_s32(v_b1, v_b1); v_b2 = vmulq_s32(v_b2, v_b2); p >>= 1; } v_a1 = vmulq_s32(v_a1, v_b1); v_a2 = vmulq_s32(v_a2, v_b2); vst1q_s16(dst + i, vcombine_s16(vqmovn_s32(v_a1), vqmovn_s32(v_a2))); } return i; } }; template <> struct iPow_SIMD { int operator() ( const int * src, int * dst, int len, int power) { int i = 0; int32x4_t v_1 = vdupq_n_s32(1); for ( ; i <= len - 4; i += 4) { int32x4_t v_b = vld1q_s32(src + i), v_a = v_1; int p = power; while( p > 1 ) { if (p & 1) v_a = vmulq_s32(v_a, v_b); v_b = vmulq_s32(v_b, v_b); p >>= 1; } v_a = vmulq_s32(v_a, v_b); vst1q_s32(dst + i, v_a); } return i; } }; template <> struct iPow_SIMD { int operator() ( const float * src, float * dst, int len, int power) { int i = 0; float32x4_t v_1 = vdupq_n_f32(1.0f); for ( ; i <= len - 4; i += 4) { float32x4_t v_b = vld1q_f32(src + i), v_a = v_1; int p = power; while( p > 1 ) { if (p & 1) v_a = vmulq_f32(v_a, v_b); v_b = vmulq_f32(v_b, v_b); p >>= 1; } v_a = vmulq_f32(v_a, v_b); vst1q_f32(dst + i, v_a); } return i; } }; #endif template static void iPow_( const T* src, T* dst, int len, int power ) { iPow_SIMD vop; int i = vop(src, dst, len, power); for( ; i < len; i++ ) { WT a = 1, b = src[i]; int p = power; while( p > 1 ) { if( p & 1 ) a *= b; b *= b; p >>= 1; } a *= b; dst[i] = saturate_cast(a); } } static void iPow8u(const uchar* src, uchar* dst, int len, int power) { iPow_(src, dst, len, power); } static void iPow8s(const schar* src, schar* dst, int len, int power) { iPow_(src, dst, len, power); } static void iPow16u(const ushort* src, ushort* dst, int len, int power) { iPow_(src, dst, len, power); } static void iPow16s(const short* src, short* dst, int len, int power) { iPow_(src, dst, len, power); } static void iPow32s(const int* src, int* dst, int len, int power) { iPow_(src, dst, len, power); } static void iPow32f(const float* src, float* dst, int len, int power) { iPow_(src, dst, len, power); } static void iPow64f(const double* src, double* dst, int len, int power) { iPow_(src, dst, len, power); } typedef void (*IPowFunc)( const uchar* src, uchar* dst, int len, int power ); static IPowFunc ipowTab[] = { (IPowFunc)iPow8u, (IPowFunc)iPow8s, (IPowFunc)iPow16u, (IPowFunc)iPow16s, (IPowFunc)iPow32s, (IPowFunc)iPow32f, (IPowFunc)iPow64f, 0 }; #ifdef HAVE_OPENCL static bool ocl_pow(InputArray _src, double power, OutputArray _dst, bool is_ipower, int ipower) { const ocl::Device & d = ocl::Device::getDefault(); int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), rowsPerWI = d.isIntel() ? 4 : 1; bool doubleSupport = d.doubleFPConfig() > 0; _dst.createSameSize(_src, type); if (is_ipower && (ipower == 0 || ipower == 1)) { if (ipower == 0) _dst.setTo(Scalar::all(1)); else if (ipower == 1) _src.copyTo(_dst); return true; } if (depth == CV_64F && !doubleSupport) return false; bool issqrt = std::abs(power - 0.5) < DBL_EPSILON; const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW"; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, format("-D dstT=%s -D depth=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s", ocl::typeToStr(depth), depth, rowsPerWI, op, doubleSupport ? " -D DOUBLE_SUPPORT" : "")); if (k.empty()) return false; UMat src = _src.getUMat(); _dst.create(src.size(), type); UMat dst = _dst.getUMat(); ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), dstarg = ocl::KernelArg::WriteOnly(dst, cn); if (issqrt) k.args(srcarg, dstarg); else if (is_ipower) k.args(srcarg, dstarg, ipower); else { if (depth == CV_32F) k.args(srcarg, dstarg, (float)power); else k.args(srcarg, dstarg, power); } size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI }; return k.run(2, globalsize, NULL, false); } #endif static void InvSqrt_32f(const float* src, float* dst, int n) { hal::invSqrt(src, dst, n); } static void InvSqrt_64f(const double* src, double* dst, int n) { hal::invSqrt(src, dst, n); } static void Sqrt_32f(const float* src, float* dst, int n) { hal::sqrt(src, dst, n); } static void Sqrt_64f(const double* src, double* dst, int n) { hal::sqrt(src, dst, n); } void pow( InputArray _src, double power, OutputArray _dst ) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), ipower = cvRound(power); bool is_ipower = fabs(ipower - power) < DBL_EPSILON, same = false, useOpenCL = _dst.isUMat() && _src.dims() <= 2; if( is_ipower && !(ocl::Device::getDefault().isIntel() && useOpenCL && depth != CV_64F)) { if( ipower < 0 ) { divide( Scalar::all(1), _src, _dst ); if( ipower == -1 ) return; ipower = -ipower; same = true; } switch( ipower ) { case 0: _dst.createSameSize(_src, type); _dst.setTo(Scalar::all(1)); return; case 1: _src.copyTo(_dst); return; case 2: #if defined(HAVE_IPP) CV_IPP_CHECK() { if (depth == CV_32F && !same && ( (_src.dims() <= 2 && !ocl::useOpenCL()) || (_src.dims() > 2 && _src.isContinuous() && _dst.isContinuous()) )) { Mat src = _src.getMat(); _dst.create( src.dims, src.size, type ); Mat dst = _dst.getMat(); Size size = src.size(); int srcstep = (int)src.step, dststep = (int)dst.step, esz = CV_ELEM_SIZE(type); if (src.isContinuous() && dst.isContinuous()) { size.width = (int)src.total(); size.height = 1; srcstep = dststep = (int)src.total() * esz; } size.width *= cn; IppStatus status = ippiSqr_32f_C1R(src.ptr(), srcstep, dst.ptr(), dststep, ippiSize(size.width, size.height)); if (status >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); } } #endif if (same) multiply(_dst, _dst, _dst); else multiply(_src, _src, _dst); return; } } else CV_Assert( depth == CV_32F || depth == CV_64F ); CV_OCL_RUN(useOpenCL, ocl_pow(same ? _dst : _src, power, _dst, is_ipower, ipower)) Mat src, dst; if (same) src = dst = _dst.getMat(); else { src = _src.getMat(); _dst.create( src.dims, src.size, type ); dst = _dst.getMat(); } const Mat* arrays[] = {&src, &dst, 0}; uchar* ptrs[2]; NAryMatIterator it(arrays, ptrs); int len = (int)(it.size*cn); if( is_ipower ) { IPowFunc func = ipowTab[depth]; CV_Assert( func != 0 ); for( size_t i = 0; i < it.nplanes; i++, ++it ) func( ptrs[0], ptrs[1], len, ipower ); } else if( fabs(fabs(power) - 0.5) < DBL_EPSILON ) { MathFunc func = power < 0 ? (depth == CV_32F ? (MathFunc)InvSqrt_32f : (MathFunc)InvSqrt_64f) : (depth == CV_32F ? (MathFunc)Sqrt_32f : (MathFunc)Sqrt_64f); for( size_t i = 0; i < it.nplanes; i++, ++it ) func( ptrs[0], ptrs[1], len ); } else { #if defined(HAVE_IPP) CV_IPP_CHECK() { if (src.isContinuous() && dst.isContinuous()) { IppStatus status = depth == CV_32F ? ippsPowx_32f_A21(src.ptr(), (Ipp32f)power, dst.ptr(), (Ipp32s)(src.total() * cn)) : ippsPowx_64f_A50(src.ptr(), power, dst.ptr(), (Ipp32s)(src.total() * cn)); if (status >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); } } #endif int j, k, blockSize = std::min(len, ((BLOCK_SIZE + cn-1)/cn)*cn); size_t esz1 = src.elemSize1(); for( size_t i = 0; i < it.nplanes; i++, ++it ) { for( j = 0; j < len; j += blockSize ) { int bsz = std::min(len - j, blockSize); if( depth == CV_32F ) { const float* x = (const float*)ptrs[0]; float* y = (float*)ptrs[1]; Log_32f(x, y, bsz); for( k = 0; k < bsz; k++ ) y[k] = (float)(y[k]*power); Exp_32f(y, y, bsz); } else { const double* x = (const double*)ptrs[0]; double* y = (double*)ptrs[1]; Log_64f(x, y, bsz); for( k = 0; k < bsz; k++ ) y[k] *= power; Exp_64f(y, y, bsz); } ptrs[0] += bsz*esz1; ptrs[1] += bsz*esz1; } } } } void sqrt(InputArray a, OutputArray b) { cv::pow(a, 0.5, b); } /************************** CheckArray for NaN's, Inf's *********************************/ template struct mat_type_assotiations{}; template<> struct mat_type_assotiations { typedef unsigned char type; static const type min_allowable = 0x0; static const type max_allowable = 0xFF; }; template<> struct mat_type_assotiations { typedef signed char type; static const type min_allowable = SCHAR_MIN; static const type max_allowable = SCHAR_MAX; }; template<> struct mat_type_assotiations { typedef unsigned short type; static const type min_allowable = 0x0; static const type max_allowable = USHRT_MAX; }; template<> struct mat_type_assotiations { typedef signed short type; static const type min_allowable = SHRT_MIN; static const type max_allowable = SHRT_MAX; }; template<> struct mat_type_assotiations { typedef int type; static const type min_allowable = (-INT_MAX - 1); static const type max_allowable = INT_MAX; }; // inclusive maxVal !!! template bool checkIntegerRange(cv::Mat src, Point& bad_pt, int minVal, int maxVal, double& bad_value) { typedef mat_type_assotiations type_ass; if (minVal < type_ass::min_allowable && maxVal > type_ass::max_allowable) { return true; } else if (minVal > type_ass::max_allowable || maxVal < type_ass::min_allowable || maxVal < minVal) { bad_pt = cv::Point(0,0); return false; } cv::Mat as_one_channel = src.reshape(1,0); for (int j = 0; j < as_one_channel.rows; ++j) for (int i = 0; i < as_one_channel.cols; ++i) { if (as_one_channel.at(j ,i) < minVal || as_one_channel.at(j ,i) > maxVal) { bad_pt.y = j ; bad_pt.x = i % src.channels(); bad_value = as_one_channel.at(j ,i); return false; } } bad_value = 0.0; return true; } typedef bool (*check_range_function)(cv::Mat src, Point& bad_pt, int minVal, int maxVal, double& bad_value); check_range_function check_range_functions[] = { &checkIntegerRange, &checkIntegerRange, &checkIntegerRange, &checkIntegerRange, &checkIntegerRange }; bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double maxVal) { Mat src = _src.getMat(); if ( src.dims > 2 ) { const Mat* arrays[] = {&src, 0}; Mat planes[1]; NAryMatIterator it(arrays, planes); for ( size_t i = 0; i < it.nplanes; i++, ++it ) { if (!checkRange( it.planes[0], quiet, pt, minVal, maxVal )) { // todo: set index properly return false; } } return true; } int depth = src.depth(); Point badPt(-1, -1); double badValue = 0; if (depth < CV_32F) { // see "Bug #1784" int minVali = minVal<(-INT_MAX - 1) ? (-INT_MAX - 1) : cvFloor(minVal); int maxVali = maxVal>INT_MAX ? INT_MAX : cvCeil(maxVal) - 1; // checkIntegerRang() use inclusive maxVal (check_range_functions[depth])(src, badPt, minVali, maxVali, badValue); } else { int i, loc = 0; Size size = getContinuousSize( src, src.channels() ); if( depth == CV_32F ) { Cv32suf a, b; int ia, ib; const int* isrc = src.ptr(); size_t step = src.step/sizeof(isrc[0]); a.f = (float)std::max(minVal, (double)-FLT_MAX); b.f = (float)std::min(maxVal, (double)FLT_MAX); ia = CV_TOGGLE_FLT(a.i); ib = CV_TOGGLE_FLT(b.i); for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step ) { for( i = 0; i < size.width; i++ ) { int val = isrc[i]; val = CV_TOGGLE_FLT(val); if( val < ia || val >= ib ) { badPt = Point((loc + i) % src.cols, (loc + i) / src.cols); badValue = ((const float*)isrc)[i]; break; } } } } else { Cv64suf a, b; int64 ia, ib; const int64* isrc = src.ptr(); size_t step = src.step/sizeof(isrc[0]); a.f = minVal; b.f = maxVal; ia = CV_TOGGLE_DBL(a.i); ib = CV_TOGGLE_DBL(b.i); for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step ) { for( i = 0; i < size.width; i++ ) { int64 val = isrc[i]; val = CV_TOGGLE_DBL(val); if( val < ia || val >= ib ) { badPt = Point((loc + i) % src.cols, (loc + i) / src.cols); badValue = ((const double*)isrc)[i]; break; } } } } } if( badPt.x >= 0 ) { if( pt ) *pt = badPt; if( !quiet ) CV_Error_( CV_StsOutOfRange, ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue)); } return badPt.x < 0; } #ifdef HAVE_OPENCL static bool ocl_patchNaNs( InputOutputArray _a, float value ) { int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1; ocl::Kernel k("KF", ocl::core::arithm_oclsrc, format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D rowsPerWI=%d", rowsPerWI)); if (k.empty()) return false; UMat a = _a.getUMat(); int cn = a.channels(); k.args(ocl::KernelArg::ReadOnlyNoSize(a), ocl::KernelArg::WriteOnly(a, cn), (float)value); size_t globalsize[2] = { a.cols * cn, (a.rows + rowsPerWI - 1) / rowsPerWI }; return k.run(2, globalsize, NULL, false); } #endif void patchNaNs( InputOutputArray _a, double _val ) { CV_Assert( _a.depth() == CV_32F ); CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2, ocl_patchNaNs(_a, (float)_val)) Mat a = _a.getMat(); const Mat* arrays[] = {&a, 0}; int* ptrs[1]; NAryMatIterator it(arrays, (uchar**)ptrs); size_t len = it.size*a.channels(); Cv32suf val; val.f = (float)_val; #if CV_SSE2 __m128i v_mask1 = _mm_set1_epi32(0x7fffffff), v_mask2 = _mm_set1_epi32(0x7f800000); __m128i v_val = _mm_set1_epi32(val.i); #elif CV_NEON int32x4_t v_mask1 = vdupq_n_s32(0x7fffffff), v_mask2 = vdupq_n_s32(0x7f800000), v_val = vdupq_n_s32(val.i); #endif for( size_t i = 0; i < it.nplanes; i++, ++it ) { int* tptr = ptrs[0]; size_t j = 0; #if CV_SSE2 if (USE_SSE2) { for ( ; j + 4 <= len; j += 4) { __m128i v_src = _mm_loadu_si128((__m128i const *)(tptr + j)); __m128i v_cmp_mask = _mm_cmplt_epi32(v_mask2, _mm_and_si128(v_src, v_mask1)); __m128i v_res = _mm_or_si128(_mm_andnot_si128(v_cmp_mask, v_src), _mm_and_si128(v_cmp_mask, v_val)); _mm_storeu_si128((__m128i *)(tptr + j), v_res); } } #elif CV_NEON for ( ; j + 4 <= len; j += 4) { int32x4_t v_src = vld1q_s32(tptr + j); uint32x4_t v_cmp_mask = vcltq_s32(v_mask2, vandq_s32(v_src, v_mask1)); int32x4_t v_dst = vbslq_s32(v_cmp_mask, v_val, v_src); vst1q_s32(tptr + j, v_dst); } #endif for( ; j < len; j++ ) if( (tptr[j] & 0x7fffffff) > 0x7f800000 ) tptr[j] = val.i; } } } CV_IMPL float cvCbrt(float value) { return cv::cubeRoot(value); } CV_IMPL float cvFastArctan(float y, float x) { return cv::fastAtan2(y, x); } CV_IMPL void cvCartToPolar( const CvArr* xarr, const CvArr* yarr, CvArr* magarr, CvArr* anglearr, int angle_in_degrees ) { cv::Mat X = cv::cvarrToMat(xarr), Y = cv::cvarrToMat(yarr), Mag, Angle; if( magarr ) { Mag = cv::cvarrToMat(magarr); CV_Assert( Mag.size() == X.size() && Mag.type() == X.type() ); } if( anglearr ) { Angle = cv::cvarrToMat(anglearr); CV_Assert( Angle.size() == X.size() && Angle.type() == X.type() ); } if( magarr ) { if( anglearr ) cv::cartToPolar( X, Y, Mag, Angle, angle_in_degrees != 0 ); else cv::magnitude( X, Y, Mag ); } else cv::phase( X, Y, Angle, angle_in_degrees != 0 ); } CV_IMPL void cvPolarToCart( const CvArr* magarr, const CvArr* anglearr, CvArr* xarr, CvArr* yarr, int angle_in_degrees ) { cv::Mat X, Y, Angle = cv::cvarrToMat(anglearr), Mag; if( magarr ) { Mag = cv::cvarrToMat(magarr); CV_Assert( Mag.size() == Angle.size() && Mag.type() == Angle.type() ); } if( xarr ) { X = cv::cvarrToMat(xarr); CV_Assert( X.size() == Angle.size() && X.type() == Angle.type() ); } if( yarr ) { Y = cv::cvarrToMat(yarr); CV_Assert( Y.size() == Angle.size() && Y.type() == Angle.type() ); } cv::polarToCart( Mag, Angle, X, Y, angle_in_degrees != 0 ); } CV_IMPL void cvExp( const CvArr* srcarr, CvArr* dstarr ) { cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); CV_Assert( src.type() == dst.type() && src.size == dst.size ); cv::exp( src, dst ); } CV_IMPL void cvLog( const CvArr* srcarr, CvArr* dstarr ) { cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); CV_Assert( src.type() == dst.type() && src.size == dst.size ); cv::log( src, dst ); } CV_IMPL void cvPow( const CvArr* srcarr, CvArr* dstarr, double power ) { cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); CV_Assert( src.type() == dst.type() && src.size == dst.size ); cv::pow( src, power, dst ); } CV_IMPL int cvCheckArr( const CvArr* arr, int flags, double minVal, double maxVal ) { if( (flags & CV_CHECK_RANGE) == 0 ) minVal = -DBL_MAX, maxVal = DBL_MAX; return cv::checkRange(cv::cvarrToMat(arr), (flags & CV_CHECK_QUIET) != 0, 0, minVal, maxVal ); } /* Finds real roots of cubic, quadratic or linear equation. The original code has been taken from Ken Turkowski web page (http://www.worldserver.com/turk/opensource/) and adopted for OpenCV. Here is the copyright notice. ----------------------------------------------------------------------- Copyright (C) 1978-1999 Ken Turkowski. All rights reserved. Warranty Information Even though I have reviewed this software, I make no warranty or representation, either express or implied, with respect to this software, its quality, accuracy, merchantability, or fitness for a particular purpose. As a result, this software is provided "as is," and you, its user, are assuming the entire risk as to its quality and accuracy. This code may be used and freely distributed as long as it includes this copyright notice and the above warranty information. ----------------------------------------------------------------------- */ int cv::solveCubic( InputArray _coeffs, OutputArray _roots ) { const int n0 = 3; Mat coeffs = _coeffs.getMat(); int ctype = coeffs.type(); CV_Assert( ctype == CV_32F || ctype == CV_64F ); CV_Assert( (coeffs.size() == Size(n0, 1) || coeffs.size() == Size(n0+1, 1) || coeffs.size() == Size(1, n0) || coeffs.size() == Size(1, n0+1)) ); _roots.create(n0, 1, ctype, -1, true, _OutputArray::DEPTH_MASK_FLT); Mat roots = _roots.getMat(); int i = -1, n = 0; double a0 = 1., a1, a2, a3; double x0 = 0., x1 = 0., x2 = 0.; int ncoeffs = coeffs.rows + coeffs.cols - 1; if( ctype == CV_32FC1 ) { if( ncoeffs == 4 ) a0 = coeffs.at(++i); a1 = coeffs.at(i+1); a2 = coeffs.at(i+2); a3 = coeffs.at(i+3); } else { if( ncoeffs == 4 ) a0 = coeffs.at(++i); a1 = coeffs.at(i+1); a2 = coeffs.at(i+2); a3 = coeffs.at(i+3); } if( a0 == 0 ) { if( a1 == 0 ) { if( a2 == 0 ) n = a3 == 0 ? -1 : 0; else { // linear equation x0 = -a3/a2; n = 1; } } else { // quadratic equation double d = a2*a2 - 4*a1*a3; if( d >= 0 ) { d = std::sqrt(d); double q1 = (-a2 + d) * 0.5; double q2 = (a2 + d) * -0.5; if( fabs(q1) > fabs(q2) ) { x0 = q1 / a1; x1 = a3 / q1; } else { x0 = q2 / a1; x1 = a3 / q2; } n = d > 0 ? 2 : 1; } } } else { a0 = 1./a0; a1 *= a0; a2 *= a0; a3 *= a0; double Q = (a1 * a1 - 3 * a2) * (1./9); double R = (2 * a1 * a1 * a1 - 9 * a1 * a2 + 27 * a3) * (1./54); double Qcubed = Q * Q * Q; double d = Qcubed - R * R; if( d >= 0 ) { double theta = acos(R / std::sqrt(Qcubed)); double sqrtQ = std::sqrt(Q); double t0 = -2 * sqrtQ; double t1 = theta * (1./3); double t2 = a1 * (1./3); x0 = t0 * cos(t1) - t2; x1 = t0 * cos(t1 + (2.*CV_PI/3)) - t2; x2 = t0 * cos(t1 + (4.*CV_PI/3)) - t2; n = 3; } else { double e; d = std::sqrt(-d); e = std::pow(d + fabs(R), 0.333333333333); if( R > 0 ) e = -e; x0 = (e + Q / e) - a1 * (1./3); n = 1; } } if( roots.type() == CV_32FC1 ) { roots.at(0) = (float)x0; roots.at(1) = (float)x1; roots.at(2) = (float)x2; } else { roots.at(0) = x0; roots.at(1) = x1; roots.at(2) = x2; } return n; } /* finds complex roots of a polynomial using Durand-Kerner method: http://en.wikipedia.org/wiki/Durand%E2%80%93Kerner_method */ double cv::solvePoly( InputArray _coeffs0, OutputArray _roots0, int maxIters ) { typedef Complex C; double maxDiff = 0; int iter, i, j; Mat coeffs0 = _coeffs0.getMat(); int ctype = _coeffs0.type(); int cdepth = CV_MAT_DEPTH(ctype); CV_Assert( CV_MAT_DEPTH(ctype) >= CV_32F && CV_MAT_CN(ctype) <= 2 ); CV_Assert( coeffs0.rows == 1 || coeffs0.cols == 1 ); int n = coeffs0.cols + coeffs0.rows - 2; _roots0.create(n, 1, CV_MAKETYPE(cdepth, 2), -1, true, _OutputArray::DEPTH_MASK_FLT); Mat roots0 = _roots0.getMat(); AutoBuffer buf(n*2+2); C *coeffs = buf, *roots = coeffs + n + 1; Mat coeffs1(coeffs0.size(), CV_MAKETYPE(CV_64F, coeffs0.channels()), coeffs0.channels() == 2 ? coeffs : roots); coeffs0.convertTo(coeffs1, coeffs1.type()); if( coeffs0.channels() == 1 ) { const double* rcoeffs = (const double*)roots; for( i = 0; i <= n; i++ ) coeffs[i] = C(rcoeffs[i], 0); } C p(1, 0), r(1, 1); for( i = 0; i < n; i++ ) { roots[i] = p; p = p * r; } maxIters = maxIters <= 0 ? 1000 : maxIters; for( iter = 0; iter < maxIters; iter++ ) { maxDiff = 0; for( i = 0; i < n; i++ ) { p = roots[i]; C num = coeffs[n], denom = coeffs[n]; for( j = 0; j < n; j++ ) { num = num*p + coeffs[n-j-1]; if( j != i ) denom = denom * (p - roots[j]); } num /= denom; roots[i] = p - num; maxDiff = std::max(maxDiff, cv::abs(num)); } if( maxDiff <= 0 ) break; } if( coeffs0.channels() == 1 ) { const double verySmallEps = 1e-100; for( i = 0; i < n; i++ ) if( fabs(roots[i].im) < verySmallEps ) roots[i].im = 0; } Mat(roots0.size(), CV_64FC2, roots).convertTo(roots0, roots0.type()); return maxDiff; } CV_IMPL int cvSolveCubic( const CvMat* coeffs, CvMat* roots ) { cv::Mat _coeffs = cv::cvarrToMat(coeffs), _roots = cv::cvarrToMat(roots), _roots0 = _roots; int nroots = cv::solveCubic(_coeffs, _roots); CV_Assert( _roots.data == _roots0.data ); // check that the array of roots was not reallocated return nroots; } void cvSolvePoly(const CvMat* a, CvMat *r, int maxiter, int) { cv::Mat _a = cv::cvarrToMat(a); cv::Mat _r = cv::cvarrToMat(r); cv::Mat _r0 = _r; cv::solvePoly(_a, _r, maxiter); CV_Assert( _r.data == _r0.data ); // check that the array of roots was not reallocated } /* End of file. */