From d81cdb8e1cae1eefd94a58485f81419b9c3d93ce Mon Sep 17 00:00:00 2001 From: Tomoaki Teshima Date: Tue, 23 May 2017 20:00:21 +0900 Subject: [PATCH] add OpenCL version of convertFp16 and test * disable vector operation for now * brush up the implementation based on comment --- modules/core/src/convert.cpp | 37 +++++++++++-- modules/core/src/opencl/halfconvert.cl | 73 ++++++++++++++++++++++++++ modules/core/test/ocl/test_arithm.cpp | 55 +++++++++++++++++++ 3 files changed, 162 insertions(+), 3 deletions(-) create mode 100644 modules/core/src/opencl/halfconvert.cl diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index f431033c09..8baef8a8d9 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -5371,6 +5371,34 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha return k.run(2, globalsize, NULL, false); } +static bool ocl_convertFp16( InputArray _src, OutputArray _dst, int ddepth ) +{ + int type = _src.type(), cn = CV_MAT_CN(type); + + _dst.createSameSize( _src, CV_MAKETYPE(ddepth, cn) ); + int kercn = 1; + int rowsPerWI = 1; + String build_opt = format("-D HALF_SUPPORT -D dstT=%s -D srcT=%s -D rowsPerWI=%d%s", + ddepth == CV_16S ? "half" : "float", + ddepth == CV_16S ? "float" : "half", + rowsPerWI, + ddepth == CV_16S ? " -D FLOAT_TO_HALF " : ""); + ocl::Kernel k("convertFp16", ocl::core::halfconvert_oclsrc, build_opt); + if (k.empty()) + return false; + + UMat src = _src.getUMat(); + UMat dst = _dst.getUMat(); + + ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), + dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn); + + k.args(srcarg, dstarg); + + size_t globalsize[2] = { (size_t)src.cols * cn / kercn, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI }; + return k.run(2, globalsize, NULL, false); +} + #endif } @@ -5411,10 +5439,8 @@ void cv::convertFp16( InputArray _src, OutputArray _dst) { CV_INSTRUMENT_REGION() - Mat src = _src.getMat(); int ddepth = 0; - - switch( src.depth() ) + switch( _src.depth() ) { case CV_32F: ddepth = CV_16S; @@ -5427,6 +5453,11 @@ void cv::convertFp16( InputArray _src, OutputArray _dst) return; } + CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), + ocl_convertFp16(_src, _dst, ddepth)) + + Mat src = _src.getMat(); + int type = CV_MAKETYPE(ddepth, src.channels()); _dst.create( src.dims, src.size, type ); Mat dst = _dst.getMat(); diff --git a/modules/core/src/opencl/halfconvert.cl b/modules/core/src/opencl/halfconvert.cl new file mode 100644 index 0000000000..506df69faf --- /dev/null +++ b/modules/core/src/opencl/halfconvert.cl @@ -0,0 +1,73 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the copyright holders or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifdef HALF_SUPPORT +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16:enable +#endif +#endif + +__kernel void convertFp16(__global const uchar * srcptr, int src_step, int src_offset, + __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols) +{ + int x = get_global_id(0); + int y0 = get_global_id(1) * rowsPerWI; + + if (x < dst_cols) + { + int src_index = mad24(y0, src_step, mad24(x, (int)sizeof(srcT), src_offset)); + int dst_index = mad24(y0, dst_step, mad24(x, (int)sizeof(dstT), dst_offset)); + + for (int y = y0, y1 = min(dst_rows, y0 + rowsPerWI); y < y1; ++y, src_index += src_step, dst_index += dst_step) + { + __global const srcT * src = (__global const srcT *)(srcptr + src_index); + __global dstT * dst = (__global dstT *)(dstptr + dst_index); + +#ifdef FLOAT_TO_HALF + vstore_half(src[0], 0, dst); +#else + dst[0] = vload_half(0, src); +#endif + } + } +} diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index b6b249865f..1164473e61 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -1614,6 +1614,60 @@ OCL_TEST_P(ConvertScaleAbs, Mat) } } +//////////////////////////////// ConvertFp16 //////////////////////////////////////////////// + +PARAM_TEST_CASE(ConvertFp16, Channels, bool) +{ + int cn; + bool fromHalf; + cv::Scalar val; + + TEST_DECLARE_INPUT_PARAMETER(src); + TEST_DECLARE_OUTPUT_PARAMETER(dst); + + virtual void SetUp() + { + cn = GET_PARAM(0); + fromHalf = GET_PARAM(1); + } + + void generateTestData() + { + const int stype = CV_MAKE_TYPE(fromHalf ? CV_32F : CV_16S, cn); + const int dtype = CV_MAKE_TYPE(fromHalf ? CV_16S : CV_32F, cn); + + Size roiSize = randomSize(1, MAX_VALUE); + Border srcBorder = randomBorder(0, 0); + randomSubMat(src, src_roi, roiSize, srcBorder, stype, -11, 11); // FIXIT: Test with minV, maxV + + Border dstBorder = randomBorder(0, 0); + randomSubMat(dst, dst_roi, roiSize, dstBorder, dtype, 5, 16); + + UMAT_UPLOAD_INPUT_PARAMETER(src); + UMAT_UPLOAD_OUTPUT_PARAMETER(dst); + } + + void Near(double threshold = 0.) + { + OCL_EXPECT_MATS_NEAR(dst, threshold); + } + +}; + + +OCL_TEST_P(ConvertFp16, Mat) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::convertFp16(src_roi, dst_roi)); + OCL_ON(cv::convertFp16(usrc_roi, udst_roi)); + + Near(1); + } +} + //////////////////////////////// ScaleAdd //////////////////////////////////////////////// typedef ArithmTestBase ScaleAdd; @@ -1844,6 +1898,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_6 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertFp16, Combine(OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, PatchNaNs, Combine(OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, Psnr, Combine(::testing::Values((MatDepth)CV_8U), OCL_ALL_CHANNELS, Bool()));