// This file is part of OpenCV project. // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html #include "precomp.hpp" #include "opencl_kernels_core.hpp" #include "convert_scale.simd.hpp" #include "convert_scale.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content namespace cv { static BinaryFunc getCvtScaleAbsFunc(int depth) { CV_INSTRUMENT_REGION(); CV_CPU_DISPATCH(getCvtScaleAbsFunc, (depth), CV_CPU_DISPATCH_MODES_ALL); } BinaryFunc getConvertScaleFunc(int sdepth, int ddepth) { CV_INSTRUMENT_REGION(); CV_CPU_DISPATCH(getConvertScaleFunc, (sdepth, ddepth), CV_CPU_DISPATCH_MODES_ALL); } #ifdef HAVE_OPENCL static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta ) { const ocl::Device & d = ocl::Device::getDefault(); int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); bool doubleSupport = d.doubleFPConfig() > 0; if (!doubleSupport && depth == CV_64F) return false; _dst.create(_src.size(), CV_8UC(cn)); int kercn = 1; if (d.isIntel()) { static const int vectorWidths[] = {4, 4, 4, 4, 4, 4, 4, -1}; kercn = ocl::checkOptimalVectorWidth( vectorWidths, _src, _dst, noArray(), noArray(), noArray(), noArray(), noArray(), noArray(), noArray(), ocl::OCL_VECTOR_MAX); } else kercn = ocl::predictOptimalVectorWidthMax(_src, _dst); int rowsPerWI = d.isIntel() ? 4 : 1; char cvt[2][50]; int wdepth = std::max(depth, CV_32F); String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D DEPTH_dst=%d -D srcT1=%s" " -D workT=%s -D wdepth=%d -D convertToWT1=%s -D convertToDT=%s" " -D workT1=%s -D rowsPerWI=%d%s", ocl::typeToStr(CV_8UC(kercn)), CV_8U, ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), wdepth, ocl::convertTypeStr(depth, wdepth, kercn, cvt[0], sizeof(cvt[0])), ocl::convertTypeStr(wdepth, CV_8U, kercn, cvt[1], sizeof(cvt[1])), ocl::typeToStr(wdepth), rowsPerWI, doubleSupport ? " -D DOUBLE_SUPPORT" : ""); ocl::Kernel k("KF", ocl::core::arithm_oclsrc, build_opt); if (k.empty()) return false; UMat src = _src.getUMat(); UMat dst = _dst.getUMat(); ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn); if (wdepth == CV_32F) k.args(srcarg, dstarg, (float)alpha, (float)beta); else if (wdepth == CV_64F) k.args(srcarg, dstarg, alpha, beta); size_t globalsize[2] = { (size_t)src.cols * cn / kercn, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI }; return k.run(2, globalsize, NULL, false); } #endif void convertScaleAbs(InputArray _src, OutputArray _dst, double alpha, double beta) { CV_INSTRUMENT_REGION(); CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), ocl_convertScaleAbs(_src, _dst, alpha, beta)) Mat src = _src.getMat(); int cn = src.channels(); double scale[] = {alpha, beta}; _dst.create( src.dims, src.size, CV_8UC(cn) ); Mat dst = _dst.getMat(); BinaryFunc func = getCvtScaleAbsFunc(src.depth()); CV_Assert( func != 0 ); if( src.dims <= 2 ) { Size sz = getContinuousSize2D(src, dst, cn); func( src.ptr(), src.step, 0, 0, dst.ptr(), dst.step, sz, scale ); } else { const Mat* arrays[] = {&src, &dst, 0}; uchar* ptrs[2] = {}; NAryMatIterator it(arrays, ptrs); Size sz((int)it.size*cn, 1); for( size_t i = 0; i < it.nplanes; i++, ++it ) func( ptrs[0], 0, 0, 0, ptrs[1], 0, sz, scale ); } } } // namespace