mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 11:10:21 +08:00
add OpenCL version of convertFp16 and test
* disable vector operation for now * brush up the implementation based on comment
This commit is contained in:
parent
04573615c5
commit
d81cdb8e1c
@ -5371,6 +5371,34 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
static bool ocl_convertFp16( InputArray _src, OutputArray _dst, int ddepth )
|
||||
{
|
||||
int type = _src.type(), cn = CV_MAT_CN(type);
|
||||
|
||||
_dst.createSameSize( _src, CV_MAKETYPE(ddepth, cn) );
|
||||
int kercn = 1;
|
||||
int rowsPerWI = 1;
|
||||
String build_opt = format("-D HALF_SUPPORT -D dstT=%s -D srcT=%s -D rowsPerWI=%d%s",
|
||||
ddepth == CV_16S ? "half" : "float",
|
||||
ddepth == CV_16S ? "float" : "half",
|
||||
rowsPerWI,
|
||||
ddepth == CV_16S ? " -D FLOAT_TO_HALF " : "");
|
||||
ocl::Kernel k("convertFp16", ocl::core::halfconvert_oclsrc, build_opt);
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
|
||||
dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
|
||||
|
||||
k.args(srcarg, dstarg);
|
||||
|
||||
size_t globalsize[2] = { (size_t)src.cols * cn / kercn, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
@ -5411,10 +5439,8 @@ void cv::convertFp16( InputArray _src, OutputArray _dst)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
Mat src = _src.getMat();
|
||||
int ddepth = 0;
|
||||
|
||||
switch( src.depth() )
|
||||
switch( _src.depth() )
|
||||
{
|
||||
case CV_32F:
|
||||
ddepth = CV_16S;
|
||||
@ -5427,6 +5453,11 @@ void cv::convertFp16( InputArray _src, OutputArray _dst)
|
||||
return;
|
||||
}
|
||||
|
||||
CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
|
||||
ocl_convertFp16(_src, _dst, ddepth))
|
||||
|
||||
Mat src = _src.getMat();
|
||||
|
||||
int type = CV_MAKETYPE(ddepth, src.channels());
|
||||
_dst.create( src.dims, src.size, type );
|
||||
Mat dst = _dst.getMat();
|
||||
|
73
modules/core/src/opencl/halfconvert.cl
Normal file
73
modules/core/src/opencl/halfconvert.cl
Normal file
@ -0,0 +1,73 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the copyright holders or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifdef HALF_SUPPORT
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
__kernel void convertFp16(__global const uchar * srcptr, int src_step, int src_offset,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y0 = get_global_id(1) * rowsPerWI;
|
||||
|
||||
if (x < dst_cols)
|
||||
{
|
||||
int src_index = mad24(y0, src_step, mad24(x, (int)sizeof(srcT), src_offset));
|
||||
int dst_index = mad24(y0, dst_step, mad24(x, (int)sizeof(dstT), dst_offset));
|
||||
|
||||
for (int y = y0, y1 = min(dst_rows, y0 + rowsPerWI); y < y1; ++y, src_index += src_step, dst_index += dst_step)
|
||||
{
|
||||
__global const srcT * src = (__global const srcT *)(srcptr + src_index);
|
||||
__global dstT * dst = (__global dstT *)(dstptr + dst_index);
|
||||
|
||||
#ifdef FLOAT_TO_HALF
|
||||
vstore_half(src[0], 0, dst);
|
||||
#else
|
||||
dst[0] = vload_half(0, src);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
@ -1614,6 +1614,60 @@ OCL_TEST_P(ConvertScaleAbs, Mat)
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////// ConvertFp16 ////////////////////////////////////////////////
|
||||
|
||||
PARAM_TEST_CASE(ConvertFp16, Channels, bool)
|
||||
{
|
||||
int cn;
|
||||
bool fromHalf;
|
||||
cv::Scalar val;
|
||||
|
||||
TEST_DECLARE_INPUT_PARAMETER(src);
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst);
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
cn = GET_PARAM(0);
|
||||
fromHalf = GET_PARAM(1);
|
||||
}
|
||||
|
||||
void generateTestData()
|
||||
{
|
||||
const int stype = CV_MAKE_TYPE(fromHalf ? CV_32F : CV_16S, cn);
|
||||
const int dtype = CV_MAKE_TYPE(fromHalf ? CV_16S : CV_32F, cn);
|
||||
|
||||
Size roiSize = randomSize(1, MAX_VALUE);
|
||||
Border srcBorder = randomBorder(0, 0);
|
||||
randomSubMat(src, src_roi, roiSize, srcBorder, stype, -11, 11); // FIXIT: Test with minV, maxV
|
||||
|
||||
Border dstBorder = randomBorder(0, 0);
|
||||
randomSubMat(dst, dst_roi, roiSize, dstBorder, dtype, 5, 16);
|
||||
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src);
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
|
||||
}
|
||||
|
||||
void Near(double threshold = 0.)
|
||||
{
|
||||
OCL_EXPECT_MATS_NEAR(dst, threshold);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
OCL_TEST_P(ConvertFp16, Mat)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::convertFp16(src_roi, dst_roi));
|
||||
OCL_ON(cv::convertFp16(usrc_roi, udst_roi));
|
||||
|
||||
Near(1);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////// ScaleAdd ////////////////////////////////////////////////
|
||||
|
||||
typedef ArithmTestBase ScaleAdd;
|
||||
@ -1844,6 +1898,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_6
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertFp16, Combine(OCL_ALL_CHANNELS, Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, PatchNaNs, Combine(OCL_ALL_CHANNELS, Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Psnr, Combine(::testing::Values((MatDepth)CV_8U), OCL_ALL_CHANNELS, Bool()));
|
||||
|
Loading…
Reference in New Issue
Block a user