add OpenCL version of convertFp16 and test

* disable vector operation for now
 * brush up the implementation based on comment
This commit is contained in:
Tomoaki Teshima 2017-05-23 20:00:21 +09:00
parent 04573615c5
commit d81cdb8e1c
3 changed files with 162 additions and 3 deletions

View File

@ -5371,6 +5371,34 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
return k.run(2, globalsize, NULL, false);
}
static bool ocl_convertFp16( InputArray _src, OutputArray _dst, int ddepth )
{
int type = _src.type(), cn = CV_MAT_CN(type);
_dst.createSameSize( _src, CV_MAKETYPE(ddepth, cn) );
int kercn = 1;
int rowsPerWI = 1;
String build_opt = format("-D HALF_SUPPORT -D dstT=%s -D srcT=%s -D rowsPerWI=%d%s",
ddepth == CV_16S ? "half" : "float",
ddepth == CV_16S ? "float" : "half",
rowsPerWI,
ddepth == CV_16S ? " -D FLOAT_TO_HALF " : "");
ocl::Kernel k("convertFp16", ocl::core::halfconvert_oclsrc, build_opt);
if (k.empty())
return false;
UMat src = _src.getUMat();
UMat dst = _dst.getUMat();
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
k.args(srcarg, dstarg);
size_t globalsize[2] = { (size_t)src.cols * cn / kercn, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
return k.run(2, globalsize, NULL, false);
}
#endif
}
@ -5411,10 +5439,8 @@ void cv::convertFp16( InputArray _src, OutputArray _dst)
{
CV_INSTRUMENT_REGION()
Mat src = _src.getMat();
int ddepth = 0;
switch( src.depth() )
switch( _src.depth() )
{
case CV_32F:
ddepth = CV_16S;
@ -5427,6 +5453,11 @@ void cv::convertFp16( InputArray _src, OutputArray _dst)
return;
}
CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
ocl_convertFp16(_src, _dst, ddepth))
Mat src = _src.getMat();
int type = CV_MAKETYPE(ddepth, src.channels());
_dst.create( src.dims, src.size, type );
Mat dst = _dst.getMat();

View File

@ -0,0 +1,73 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the copyright holders or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifdef HALF_SUPPORT
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16:enable
#endif
#endif
__kernel void convertFp16(__global const uchar * srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols)
{
int x = get_global_id(0);
int y0 = get_global_id(1) * rowsPerWI;
if (x < dst_cols)
{
int src_index = mad24(y0, src_step, mad24(x, (int)sizeof(srcT), src_offset));
int dst_index = mad24(y0, dst_step, mad24(x, (int)sizeof(dstT), dst_offset));
for (int y = y0, y1 = min(dst_rows, y0 + rowsPerWI); y < y1; ++y, src_index += src_step, dst_index += dst_step)
{
__global const srcT * src = (__global const srcT *)(srcptr + src_index);
__global dstT * dst = (__global dstT *)(dstptr + dst_index);
#ifdef FLOAT_TO_HALF
vstore_half(src[0], 0, dst);
#else
dst[0] = vload_half(0, src);
#endif
}
}
}

View File

@ -1614,6 +1614,60 @@ OCL_TEST_P(ConvertScaleAbs, Mat)
}
}
//////////////////////////////// ConvertFp16 ////////////////////////////////////////////////
PARAM_TEST_CASE(ConvertFp16, Channels, bool)
{
int cn;
bool fromHalf;
cv::Scalar val;
TEST_DECLARE_INPUT_PARAMETER(src);
TEST_DECLARE_OUTPUT_PARAMETER(dst);
virtual void SetUp()
{
cn = GET_PARAM(0);
fromHalf = GET_PARAM(1);
}
void generateTestData()
{
const int stype = CV_MAKE_TYPE(fromHalf ? CV_32F : CV_16S, cn);
const int dtype = CV_MAKE_TYPE(fromHalf ? CV_16S : CV_32F, cn);
Size roiSize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, 0);
randomSubMat(src, src_roi, roiSize, srcBorder, stype, -11, 11); // FIXIT: Test with minV, maxV
Border dstBorder = randomBorder(0, 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, dtype, 5, 16);
UMAT_UPLOAD_INPUT_PARAMETER(src);
UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
}
void Near(double threshold = 0.)
{
OCL_EXPECT_MATS_NEAR(dst, threshold);
}
};
OCL_TEST_P(ConvertFp16, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::convertFp16(src_roi, dst_roi));
OCL_ON(cv::convertFp16(usrc_roi, udst_roi));
Near(1);
}
}
//////////////////////////////// ScaleAdd ////////////////////////////////////////////////
typedef ArithmTestBase ScaleAdd;
@ -1844,6 +1898,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_6
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertFp16, Combine(OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, PatchNaNs, Combine(OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Psnr, Combine(::testing::Values((MatDepth)CV_8U), OCL_ALL_CHANNELS, Bool()));