// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html #include "precomp.hpp" #include "opencl_kernels_core.hpp" #include "stat.hpp" #include "opencv2/core/detail/dispatch_helper.impl.hpp" #include #include "minmax.simd.hpp" #include "minmax.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content namespace cv { static MinMaxIdxFunc getMinMaxIdxFunc(int depth) { CV_INSTRUMENT_REGION(); CV_CPU_DISPATCH(getMinMaxIdxFunc, (depth), CV_CPU_DISPATCH_MODES_ALL); } static void ofs2idx(const Mat& a, size_t ofs, int* idx) { int i, d = a.dims; if( ofs > 0 ) { ofs--; for( i = d-1; i >= 0; i-- ) { int sz = a.size[i]; idx[i] = (int)(ofs % sz); ofs /= sz; } } else { for( i = d-1; i >= 0; i-- ) idx[i] = -1; } } #ifdef HAVE_OPENCL #define MINMAX_STRUCT_ALIGNMENT 8 // sizeof double template void getMinMaxRes(const Mat & db, double * minVal, double * maxVal, int* minLoc, int* maxLoc, int groupnum, int cols, double * maxVal2) { uint index_max = std::numeric_limits::max(); T minval = std::numeric_limits::max(); T maxval = std::numeric_limits::min() > 0 ? -std::numeric_limits::max() : std::numeric_limits::min(), maxval2 = maxval; uint minloc = index_max, maxloc = index_max; size_t index = 0; const T * minptr = NULL, * maxptr = NULL, * maxptr2 = NULL; const uint * minlocptr = NULL, * maxlocptr = NULL; if (minVal || minLoc) { minptr = db.ptr(); index += sizeof(T) * groupnum; index = alignSize(index, MINMAX_STRUCT_ALIGNMENT); } if (maxVal || maxLoc) { maxptr = (const T *)(db.ptr() + index); index += sizeof(T) * groupnum; index = alignSize(index, MINMAX_STRUCT_ALIGNMENT); } if (minLoc) { minlocptr = (const uint *)(db.ptr() + index); index += sizeof(uint) * groupnum; index = alignSize(index, MINMAX_STRUCT_ALIGNMENT); } if (maxLoc) { maxlocptr = (const uint *)(db.ptr() + index); index += sizeof(uint) * groupnum; index = alignSize(index, MINMAX_STRUCT_ALIGNMENT); } if (maxVal2) maxptr2 = (const T *)(db.ptr() + index); for (int i = 0; i < groupnum; i++) { if (minptr && minptr[i] <= minval) { if (minptr[i] == minval) { if (minlocptr) minloc = std::min(minlocptr[i], minloc); } else { if (minlocptr) minloc = minlocptr[i]; minval = minptr[i]; } } if (maxptr && maxptr[i] >= maxval) { if (maxptr[i] == maxval) { if (maxlocptr) maxloc = std::min(maxlocptr[i], maxloc); } else { if (maxlocptr) maxloc = maxlocptr[i]; maxval = maxptr[i]; } } if (maxptr2 && maxptr2[i] > maxval2) maxval2 = maxptr2[i]; } bool zero_mask = (minLoc && minloc == index_max) || (maxLoc && maxloc == index_max); if (minVal) *minVal = zero_mask ? 0 : (double)minval; if (maxVal) *maxVal = zero_mask ? 0 : (double)maxval; if (maxVal2) *maxVal2 = zero_mask ? 0 : (double)maxval2; if (minLoc) { minLoc[0] = zero_mask ? -1 : minloc / cols; minLoc[1] = zero_mask ? -1 : minloc % cols; } if (maxLoc) { maxLoc[0] = zero_mask ? -1 : maxloc / cols; maxLoc[1] = zero_mask ? -1 : maxloc % cols; } } typedef void (*getMinMaxResFunc)(const Mat & db, double * minVal, double * maxVal, int * minLoc, int *maxLoc, int gropunum, int cols, double * maxVal2); bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask, int ddepth, bool absValues, InputArray _src2, double * maxVal2) { const ocl::Device & dev = ocl::Device::getDefault(); #ifdef __ANDROID__ if (dev.isNVidia()) return false; #endif if (dev.deviceVersionMajor() == 1 && dev.deviceVersionMinor() < 2) { // 'static' storage class specifier used by "minmaxloc" is available from OpenCL 1.2+ only return false; } bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(), haveSrc2 = _src2.kind() != _InputArray::NONE; int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src, _src2)); if (depth >= CV_16F) return false; // disabled following modes since it occasionally fails on AMD devices (e.g. A10-6800K, sep. 2014) if ((haveMask || type == CV_32FC1) && dev.isAMD()) return false; CV_Assert( (cn == 1 && (!haveMask || _mask.type() == CV_8U)) || (cn >= 1 && !minLoc && !maxLoc) ); if (ddepth < 0) ddepth = depth; CV_Assert(!haveSrc2 || _src2.type() == type); if (depth == CV_32S || depth == CV_8S || depth == CV_32U || depth == CV_64U || depth == CV_64S || depth == CV_16F || depth == CV_16BF) return false; if ((depth == CV_64F || ddepth == CV_64F) && !doubleSupport) return false; int groupnum = dev.maxComputeUnits(); size_t wgs = dev.maxWorkGroupSize(); int wgs2_aligned = 1; while (wgs2_aligned < (int)wgs) wgs2_aligned <<= 1; wgs2_aligned >>= 1; bool needMinVal = minVal || minLoc, needMinLoc = minLoc != NULL, needMaxVal = maxVal || maxLoc, needMaxLoc = maxLoc != NULL; // in case of mask we must know whether mask is filled with zeros or not // so let's calculate min or max location, if it's undefined, so mask is zeros if (!(needMaxLoc || needMinLoc) && haveMask) { if (needMinVal) needMinLoc = true; else needMaxLoc = true; } char cvt[2][50]; String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s" " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s" " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d -D convertFromU=%s" " -D MINMAX_STRUCT_ALIGNMENT=%d", depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs, ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", _src.isContinuous() ? " -D HAVE_SRC_CONT" : "", _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn, needMinVal ? " -D NEED_MINVAL" : "", needMaxVal ? " -D NEED_MAXVAL" : "", needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "", ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), ocl::convertTypeStr(depth, ddepth, kercn, cvt[0], sizeof(cvt[0])), absValues ? " -D OP_ABS" : "", haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "", haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth, depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, kercn, cvt[1], sizeof(cvt[1])) : "noconvert", MINMAX_STRUCT_ALIGNMENT); ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts); if (k.empty()) return false; int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S), dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) + (needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0) + (maxVal2 ? esz : 0)) + 5 * MINMAX_STRUCT_ALIGNMENT; UMat src = _src.getUMat(), src2 = _src2.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat(); if (cn > 1 && !haveMask) { src = src.reshape(1); src2 = src2.reshape(1); } if (haveSrc2) { if (!haveMask) k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(src2)); else k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask), ocl::KernelArg::ReadOnlyNoSize(src2)); } else { if (!haveMask) k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), groupnum, ocl::KernelArg::PtrWriteOnly(db)); else k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask)); } size_t globalsize = groupnum * wgs; if (!k.run(1, &globalsize, &wgs, true)) return false; static const getMinMaxResFunc functab[7] = { getMinMaxRes, getMinMaxRes, getMinMaxRes, getMinMaxRes, getMinMaxRes, getMinMaxRes, getMinMaxRes }; CV_Assert(ddepth <= CV_64F); getMinMaxResFunc func = functab[ddepth]; int locTemp[2]; func(db.getMat(ACCESS_READ), minVal, maxVal, needMinLoc ? minLoc ? minLoc : locTemp : minLoc, needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc, groupnum, src.cols, maxVal2); return true; } #endif } void cv::minMaxIdx(InputArray _src, double* minVal, double* maxVal, int* minIdx, int* maxIdx, InputArray _mask) { CV_INSTRUMENT_REGION(); int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); CV_Assert( (cn == 1 && (_mask.empty() || _mask.type() == CV_8U)) || (cn > 1 && _mask.empty() && !minIdx && !maxIdx) ); CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2 && (_mask.empty() || _src.size() == _mask.size()), ocl_minMaxIdx(_src, minVal, maxVal, minIdx, maxIdx, _mask)) Mat src = _src.getMat(), mask = _mask.getMat(); MinMaxIdxFunc func = getMinMaxIdxFunc(depth); CV_Assert( func != 0 ); const Mat* arrays[] = {&src, &mask, 0}; uchar* ptrs[2] = {}; NAryMatIterator it(arrays, ptrs); size_t minidx = 0, maxidx = 0; size_t startidx = 1; union { int i; float f; double d; int64 L; uint64 UL; } minval, maxval; int planeSize = (int)it.size*cn; minval.L = maxval.L = 0; for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize ) func( ptrs[0], ptrs[1], &minval.L, &maxval.L, &minidx, &maxidx, planeSize, startidx ); double dminval, dmaxval; if( depth <= CV_32S || depth == CV_Bool ) dminval = minval.i, dmaxval = maxval.i; else if( depth == CV_32F || depth == CV_16F || depth == CV_16BF ) dminval = minval.f, dmaxval = maxval.f; else if( depth == CV_64F ) dminval = minval.d, dmaxval = maxval.d; else if( depth == CV_64S || depth == CV_32U ) dminval = (double)minval.L, dmaxval = (double)maxval.L; else { CV_Assert(depth == CV_64U); dminval = (double)minval.UL, dmaxval = (double)maxval.UL; } if( minVal ) *minVal = dminval; if( maxVal ) *maxVal = dmaxval; if( minIdx ) ofs2idx(src, minidx, minIdx); if( maxIdx ) ofs2idx(src, maxidx, maxIdx); } void cv::minMaxLoc( InputArray _img, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, InputArray mask ) { CV_INSTRUMENT_REGION(); int dims = _img.dims(); CV_CheckLE(dims, 2, ""); minMaxIdx(_img, minVal, maxVal, (int*)minLoc, (int*)maxLoc, mask); if( minLoc) { if (dims == 2) std::swap(minLoc->x, minLoc->y); else { minLoc->y = 0; } } if( maxLoc) { if (dims == 2) std::swap(maxLoc->x, maxLoc->y); else { maxLoc->y = 0; } } } enum class ReduceMode { FIRST_MIN = 0, //!< get index of first min occurrence LAST_MIN = 1, //!< get index of last min occurrence FIRST_MAX = 2, //!< get index of first max occurrence LAST_MAX = 3, //!< get index of last max occurrence }; template struct reduceMinMaxImpl { void operator()(const cv::Mat& src, cv::Mat& dst, ReduceMode mode, const int axis) const { switch(mode) { case ReduceMode::FIRST_MIN: reduceMinMaxApply(src, dst, axis); break; case ReduceMode::LAST_MIN: reduceMinMaxApply(src, dst, axis); break; case ReduceMode::FIRST_MAX: reduceMinMaxApply(src, dst, axis); break; case ReduceMode::LAST_MAX: reduceMinMaxApply(src, dst, axis); break; } } template class Cmp> static void reduceMinMaxApply(const cv::Mat& src, cv::Mat& dst, const int axis) { Cmp cmp; const auto *src_ptr = src.ptr(); auto *dst_ptr = dst.ptr(); const size_t outer_size = src.total(0, axis); const auto mid_size = static_cast(src.size[axis]); const size_t outer_step = src.total(axis); const size_t dst_step = dst.total(axis); const size_t mid_step = src.total(axis + 1); for (size_t outer = 0; outer < outer_size; ++outer) { const size_t outer_offset = outer * outer_step; const size_t dst_offset = outer * dst_step; for (size_t mid = 0; mid != mid_size; ++mid) { const size_t src_offset = outer_offset + mid * mid_step; for (size_t inner = 0; inner < mid_step; inner++) { int32_t& index = dst_ptr[dst_offset + inner]; const size_t prev = outer_offset + index * mid_step + inner; const size_t curr = src_offset + inner; if (cmp(src_ptr[curr], src_ptr[prev])) { index = static_cast(mid); } } } } } }; static void reduceMinMax(cv::InputArray src, cv::OutputArray dst, ReduceMode mode, int axis) { CV_INSTRUMENT_REGION(); cv::Mat srcMat = src.getMat(); axis = (axis + srcMat.dims) % srcMat.dims; CV_Assert(srcMat.channels() == 1 && axis >= 0 && axis < srcMat.dims); std::vector sizes(srcMat.dims); std::copy(srcMat.size.p, srcMat.size.p + srcMat.dims, sizes.begin()); sizes[axis] = 1; dst.create(srcMat.dims, sizes.data(), CV_32SC1); // indices cv::Mat dstMat = dst.getMat(); dstMat.setTo(cv::Scalar::all(0)); if (!srcMat.isContinuous()) { srcMat = srcMat.clone(); } bool needs_copy = !dstMat.isContinuous(); if (needs_copy) { dstMat = dstMat.clone(); } cv::detail::depthDispatch(srcMat.depth(), srcMat, dstMat, mode, axis); if (needs_copy) { dstMat.copyTo(dst); } } void cv::reduceArgMin(InputArray src, OutputArray dst, int axis, bool lastIndex) { reduceMinMax(src, dst, lastIndex ? ReduceMode::LAST_MIN : ReduceMode::FIRST_MIN, axis); } void cv::reduceArgMax(InputArray src, OutputArray dst, int axis, bool lastIndex) { reduceMinMax(src, dst, lastIndex ? ReduceMode::LAST_MAX : ReduceMode::FIRST_MAX, axis); }