mirror of
https://github.com/opencv/opencv.git
synced 2024-11-24 11:10:21 +08:00
Merge pull request #10869 from savuor:color_cpp_split
color.cpp split (#10869) * initial split is done * files renamed (these names are excluded during compilation) * IPP code moved to corresponding files * splineBuild, splineInterpolate -> color_lab.cpp * Lab, Luv: little refactored * it compiles (didn't check work); Lab OCL code moved to color_lab.cpp * cvtcolor.cl: Lab/Luv part moved to color_lab.cl * cvtcolor.cl: color_rgb.cl extracted * cvtcolor.cl: color_yuv.cl separated * cvtcolor.cl: color_hsv.cl extracted * cvtcolor.cl: extracted to color_lab.cl and color_rgb.cl * helper functions moved to hpp file * Lab, Luv: moved to color_lab.cpp * CPU XYZ: to color_lab.cpp * OCL XYZ: to color_lab.cpp * warning fixed * CvtHelper added * CPU YUV: to color_yuv.cpp, helpers to color.hpp * CPU HLS/HSV: to color_hsv.cpp * CPU BGR2BGR: to color_rgb.cpp * CPU RGB: to color_rgb.cpp * extra arg removed * CPU YUV: to color_yuv.cpp * color code decoded * OclHelper added, some funcs rewritten * color_lab.cpp: refactored to use OclHelper * OCL RGB: to color_rgb.cpp * OCL HLS/HSV: to color_hsv.cpp * OCL YUV: to color_yuv.cpp * OCL YUV planes: to color_yuv.cpp * OCL: color code reduced * licence to demosaicing.cpp * IPP func tables to color_rgb.cpp * code cleanup * HAVE_OPENCL ifdefs added * helpers made more common * fixed two plane YUV with separate mats * fixed warning in gcc7.2.0 * precomp header fixed * color space classification functions fixed * helpers fixed * rename: isSRGB -> is_sRGB
This commit is contained in:
parent
c727e8a4d0
commit
64916d3d83
File diff suppressed because it is too large
Load Diff
668
modules/imgproc/src/color.hpp
Normal file
668
modules/imgproc/src/color.hpp
Normal file
@ -0,0 +1,668 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html
|
||||
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include "opencv2/core/utility.hpp"
|
||||
#include <limits>
|
||||
#include "opencl_kernels_imgproc.hpp"
|
||||
#include "hal_replacement.hpp"
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
#include "opencv2/core/softfloat.hpp"
|
||||
|
||||
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
//constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601
|
||||
const float B2YF = 0.114f;
|
||||
const float G2YF = 0.587f;
|
||||
const float R2YF = 0.299f;
|
||||
|
||||
enum
|
||||
{
|
||||
yuv_shift = 14,
|
||||
xyz_shift = 12,
|
||||
R2Y = 4899, // == R2YF*16384
|
||||
G2Y = 9617, // == G2YF*16384
|
||||
B2Y = 1868, // == B2YF*16384
|
||||
BLOCK_SIZE = 256
|
||||
};
|
||||
|
||||
template<typename _Tp> struct ColorChannel
|
||||
{
|
||||
typedef float worktype_f;
|
||||
static _Tp max() { return std::numeric_limits<_Tp>::max(); }
|
||||
static _Tp half() { return (_Tp)(max()/2 + 1); }
|
||||
};
|
||||
|
||||
template<> struct ColorChannel<float>
|
||||
{
|
||||
typedef float worktype_f;
|
||||
static float max() { return 1.f; }
|
||||
static float half() { return 0.5f; }
|
||||
};
|
||||
|
||||
/*template<> struct ColorChannel<double>
|
||||
{
|
||||
typedef double worktype_f;
|
||||
static double max() { return 1.; }
|
||||
static double half() { return 0.5; }
|
||||
};*/
|
||||
|
||||
//
|
||||
// Helper functions
|
||||
//
|
||||
|
||||
namespace {
|
||||
|
||||
inline bool isHSV(int code)
|
||||
{
|
||||
switch(code)
|
||||
{
|
||||
case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
|
||||
case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool isLab(int code)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case COLOR_Lab2BGR: case COLOR_Lab2RGB: case COLOR_Lab2LBGR: case COLOR_Lab2LRGB:
|
||||
case COLOR_BGR2Lab: case COLOR_RGB2Lab: case COLOR_LBGR2Lab: case COLOR_LRGB2Lab:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool is_sRGB(int code)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case COLOR_BGR2Lab: case COLOR_RGB2Lab: case COLOR_BGR2Luv: case COLOR_RGB2Luv:
|
||||
case COLOR_Lab2BGR: case COLOR_Lab2RGB: case COLOR_Luv2BGR: case COLOR_Luv2RGB:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool swapBlue(int code)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case COLOR_BGR2BGRA: case COLOR_BGRA2BGR:
|
||||
case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555:
|
||||
case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA:
|
||||
case COLOR_BGR2GRAY: case COLOR_BGRA2GRAY:
|
||||
case COLOR_BGR2YCrCb: case COLOR_BGR2YUV:
|
||||
case COLOR_YCrCb2BGR: case COLOR_YUV2BGR:
|
||||
case COLOR_BGR2XYZ: case COLOR_XYZ2BGR:
|
||||
case COLOR_BGR2HSV: case COLOR_BGR2HLS: case COLOR_BGR2HSV_FULL: case COLOR_BGR2HLS_FULL:
|
||||
case COLOR_YUV2BGR_YV12: case COLOR_YUV2BGRA_YV12: case COLOR_YUV2BGR_IYUV: case COLOR_YUV2BGRA_IYUV:
|
||||
case COLOR_YUV2BGR_NV21: case COLOR_YUV2BGRA_NV21: case COLOR_YUV2BGR_NV12: case COLOR_YUV2BGRA_NV12:
|
||||
case COLOR_Lab2BGR: case COLOR_Luv2BGR: case COLOR_Lab2LBGR: case COLOR_Luv2LBGR:
|
||||
case COLOR_BGR2Lab: case COLOR_BGR2Luv: case COLOR_LBGR2Lab: case COLOR_LBGR2Luv:
|
||||
case COLOR_HSV2BGR: case COLOR_HLS2BGR: case COLOR_HSV2BGR_FULL: case COLOR_HLS2BGR_FULL:
|
||||
case COLOR_YUV2BGR_UYVY: case COLOR_YUV2BGRA_UYVY: case COLOR_YUV2BGR_YUY2:
|
||||
case COLOR_YUV2BGRA_YUY2: case COLOR_YUV2BGR_YVYU: case COLOR_YUV2BGRA_YVYU:
|
||||
case COLOR_BGR2YUV_IYUV: case COLOR_BGRA2YUV_IYUV: case COLOR_BGR2YUV_YV12: case COLOR_BGRA2YUV_YV12:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool isFullRangeHSV(int code)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL: case COLOR_BGR2HLS_FULL: case COLOR_RGB2HLS_FULL:
|
||||
case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inline int dstChannels(int code)
|
||||
{
|
||||
switch( code )
|
||||
{
|
||||
case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2RGBA:
|
||||
case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA:
|
||||
case COLOR_GRAY2BGRA:
|
||||
case COLOR_YUV2BGRA_NV21: case COLOR_YUV2RGBA_NV21: case COLOR_YUV2BGRA_NV12: case COLOR_YUV2RGBA_NV12:
|
||||
case COLOR_YUV2BGRA_YV12: case COLOR_YUV2RGBA_YV12: case COLOR_YUV2BGRA_IYUV: case COLOR_YUV2RGBA_IYUV:
|
||||
case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY: case COLOR_YUV2RGBA_YVYU: case COLOR_YUV2BGRA_YVYU:
|
||||
case COLOR_YUV2RGBA_YUY2: case COLOR_YUV2BGRA_YUY2:
|
||||
|
||||
return 4;
|
||||
|
||||
case COLOR_BGRA2BGR: case COLOR_RGBA2BGR: case COLOR_RGB2BGR:
|
||||
case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB:
|
||||
case COLOR_GRAY2BGR:
|
||||
case COLOR_YUV2BGR_NV21: case COLOR_YUV2RGB_NV21: case COLOR_YUV2BGR_NV12: case COLOR_YUV2RGB_NV12:
|
||||
case COLOR_YUV2BGR_YV12: case COLOR_YUV2RGB_YV12: case COLOR_YUV2BGR_IYUV: case COLOR_YUV2RGB_IYUV:
|
||||
case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGB_YVYU: case COLOR_YUV2BGR_YVYU:
|
||||
case COLOR_YUV2RGB_YUY2: case COLOR_YUV2BGR_YUY2:
|
||||
|
||||
return 3;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline int greenBits(int code)
|
||||
{
|
||||
switch( code )
|
||||
{
|
||||
case COLOR_BGR2BGR565: case COLOR_RGB2BGR565: case COLOR_BGRA2BGR565: case COLOR_RGBA2BGR565:
|
||||
case COLOR_BGR5652BGR: case COLOR_BGR5652RGB: case COLOR_BGR5652BGRA: case COLOR_BGR5652RGBA:
|
||||
case COLOR_BGR5652GRAY: case COLOR_GRAY2BGR565:
|
||||
|
||||
return 6;
|
||||
|
||||
case COLOR_BGR2BGR555: case COLOR_RGB2BGR555: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR555:
|
||||
case COLOR_BGR5552BGR: case COLOR_BGR5552RGB: case COLOR_BGR5552BGRA: case COLOR_BGR5552RGBA:
|
||||
case COLOR_BGR5552GRAY: case COLOR_GRAY2BGR555:
|
||||
|
||||
return 5;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline int uIndex(int code)
|
||||
{
|
||||
switch( code )
|
||||
{
|
||||
case COLOR_RGB2YUV_YV12: case COLOR_BGR2YUV_YV12: case COLOR_RGBA2YUV_YV12: case COLOR_BGRA2YUV_YV12:
|
||||
|
||||
return 2;
|
||||
|
||||
case COLOR_YUV2RGB_YVYU: case COLOR_YUV2BGR_YVYU: case COLOR_YUV2RGBA_YVYU: case COLOR_YUV2BGRA_YVYU:
|
||||
case COLOR_RGB2YUV_IYUV: case COLOR_BGR2YUV_IYUV: case COLOR_RGBA2YUV_IYUV: case COLOR_BGRA2YUV_IYUV:
|
||||
case COLOR_YUV2BGR_NV21: case COLOR_YUV2RGB_NV21: case COLOR_YUV2BGRA_NV21: case COLOR_YUV2RGBA_NV21:
|
||||
case COLOR_YUV2BGR_YV12: case COLOR_YUV2RGB_YV12: case COLOR_YUV2BGRA_YV12: case COLOR_YUV2RGBA_YV12:
|
||||
|
||||
return 1;
|
||||
|
||||
case COLOR_YUV2BGR_NV12: case COLOR_YUV2RGB_NV12: case COLOR_YUV2BGRA_NV12: case COLOR_YUV2RGBA_NV12:
|
||||
case COLOR_YUV2BGR_IYUV: case COLOR_YUV2RGB_IYUV: case COLOR_YUV2BGRA_IYUV: case COLOR_YUV2RGBA_IYUV:
|
||||
case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY:
|
||||
case COLOR_YUV2RGB_YUY2: case COLOR_YUV2BGR_YUY2: case COLOR_YUV2RGBA_YUY2: case COLOR_YUV2BGRA_YUY2:
|
||||
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace::
|
||||
|
||||
template<int i0, int i1 = -1, int i2 = -1>
|
||||
struct Set
|
||||
{
|
||||
static bool contains(int i)
|
||||
{
|
||||
return (i == i0 || i == i1 || i == i2);
|
||||
}
|
||||
};
|
||||
|
||||
template<int i0, int i1>
|
||||
struct Set<i0, i1, -1>
|
||||
{
|
||||
static bool contains(int i)
|
||||
{
|
||||
return (i == i0 || i == i1);
|
||||
}
|
||||
};
|
||||
|
||||
template<int i0>
|
||||
struct Set<i0, -1, -1>
|
||||
{
|
||||
static bool contains(int i)
|
||||
{
|
||||
return (i == i0);
|
||||
}
|
||||
};
|
||||
|
||||
enum SizePolicy
|
||||
{
|
||||
TO_YUV, FROM_YUV, NONE
|
||||
};
|
||||
|
||||
template< typename VScn, typename VDcn, typename VDepth, SizePolicy sizePolicy = NONE >
|
||||
struct CvtHelper
|
||||
{
|
||||
CvtHelper(InputArray _src, OutputArray _dst, int dcn)
|
||||
{
|
||||
int stype = _src.type();
|
||||
scn = CV_MAT_CN(stype), depth = CV_MAT_DEPTH(stype);
|
||||
|
||||
CV_Assert( VScn::contains(scn) && VDcn::contains(dcn) && VDepth::contains(depth) );
|
||||
|
||||
if (_src.getObj() == _dst.getObj()) // inplace processing (#6653)
|
||||
_src.copyTo(src);
|
||||
else
|
||||
src = _src.getMat();
|
||||
Size sz = src.size();
|
||||
switch (sizePolicy)
|
||||
{
|
||||
case TO_YUV:
|
||||
CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0);
|
||||
dstSz = Size(sz.width, sz.height / 2 * 3);
|
||||
break;
|
||||
case FROM_YUV:
|
||||
CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0);
|
||||
dstSz = Size(sz.width, sz.height * 2 / 3);
|
||||
break;
|
||||
case NONE:
|
||||
default:
|
||||
dstSz = sz;
|
||||
break;
|
||||
}
|
||||
_dst.create(dstSz, CV_MAKETYPE(depth, dcn));
|
||||
dst = _dst.getMat();
|
||||
}
|
||||
Mat src, dst;
|
||||
int depth, scn;
|
||||
Size dstSz;
|
||||
};
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
template< typename VScn, typename VDcn, typename VDepth, SizePolicy sizePolicy = NONE >
|
||||
struct OclHelper
|
||||
{
|
||||
OclHelper( InputArray _src, OutputArray _dst, int dcn)
|
||||
{
|
||||
src = _src.getUMat();
|
||||
Size sz = src.size(), dstSz;
|
||||
int scn = src.channels();
|
||||
int depth = src.depth();
|
||||
|
||||
CV_Assert( VScn::contains(scn) && VDcn::contains(dcn) && VDepth::contains(depth) );
|
||||
switch (sizePolicy)
|
||||
{
|
||||
case TO_YUV:
|
||||
CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 );
|
||||
dstSz = Size(sz.width, sz.height / 2 * 3);
|
||||
break;
|
||||
case FROM_YUV:
|
||||
CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 );
|
||||
dstSz = Size(sz.width, sz.height * 2 / 3);
|
||||
break;
|
||||
case NONE:
|
||||
default:
|
||||
dstSz = sz;
|
||||
break;
|
||||
}
|
||||
|
||||
_dst.create(dstSz, CV_MAKETYPE(depth, dcn));
|
||||
dst = _dst.getUMat();
|
||||
}
|
||||
|
||||
bool createKernel(cv::String name, ocl::ProgramSource& source, cv::String options)
|
||||
{
|
||||
ocl::Device dev = ocl::Device::getDefault();
|
||||
int pxPerWIy = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1;
|
||||
int pxPerWIx = 1;
|
||||
|
||||
cv::String baseOptions = format("-D depth=%d -D scn=%d -D PIX_PER_WI_Y=%d ",
|
||||
src.depth(), src.channels(), pxPerWIy);
|
||||
|
||||
switch (sizePolicy)
|
||||
{
|
||||
case TO_YUV:
|
||||
if (dev.isIntel() &&
|
||||
src.cols % 4 == 0 && src.step % 4 == 0 && src.offset % 4 == 0 &&
|
||||
dst.step % 4 == 0 && dst.offset % 4 == 0)
|
||||
{
|
||||
pxPerWIx = 2;
|
||||
}
|
||||
globalSize[0] = (size_t)dst.cols/(2*pxPerWIx);
|
||||
globalSize[1] = ((size_t)dst.rows/3 + pxPerWIy - 1) / pxPerWIy;
|
||||
baseOptions += format("-D PIX_PER_WI_X=%d ", pxPerWIx);
|
||||
break;
|
||||
case FROM_YUV:
|
||||
globalSize[0] = (size_t)dst.cols/2;
|
||||
globalSize[1] = ((size_t)dst.rows/2 + pxPerWIy - 1) / pxPerWIy;
|
||||
break;
|
||||
case NONE:
|
||||
default:
|
||||
globalSize[0] = (size_t)src.cols;
|
||||
globalSize[1] = ((size_t)src.rows + pxPerWIy - 1) / pxPerWIy;
|
||||
break;
|
||||
}
|
||||
|
||||
k.create(name.c_str(), source, baseOptions + options);
|
||||
|
||||
if(k.empty())
|
||||
return false;
|
||||
|
||||
nArgs = k.set(0, ocl::KernelArg::ReadOnlyNoSize(src));
|
||||
nArgs = k.set(nArgs, ocl::KernelArg::WriteOnly(dst));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool run()
|
||||
{
|
||||
return k.run(2, globalSize, NULL, false);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void setArg(const T& arg)
|
||||
{
|
||||
nArgs = k.set(nArgs, arg);
|
||||
}
|
||||
|
||||
UMat src, dst;
|
||||
ocl::Kernel k;
|
||||
size_t globalSize[2];
|
||||
int nArgs;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
///////////////////////////// Top-level template function ////////////////////////////////
|
||||
|
||||
template <typename Cvt>
|
||||
class CvtColorLoop_Invoker : public ParallelLoopBody
|
||||
{
|
||||
typedef typename Cvt::channel_type _Tp;
|
||||
public:
|
||||
|
||||
CvtColorLoop_Invoker(const uchar * src_data_, size_t src_step_, uchar * dst_data_, size_t dst_step_, int width_, const Cvt& _cvt) :
|
||||
ParallelLoopBody(), src_data(src_data_), src_step(src_step_), dst_data(dst_data_), dst_step(dst_step_),
|
||||
width(width_), cvt(_cvt)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void operator()(const Range& range) const
|
||||
{
|
||||
CV_TRACE_FUNCTION();
|
||||
|
||||
const uchar* yS = src_data + static_cast<size_t>(range.start) * src_step;
|
||||
uchar* yD = dst_data + static_cast<size_t>(range.start) * dst_step;
|
||||
|
||||
for( int i = range.start; i < range.end; ++i, yS += src_step, yD += dst_step )
|
||||
cvt(reinterpret_cast<const _Tp*>(yS), reinterpret_cast<_Tp*>(yD), width);
|
||||
}
|
||||
|
||||
private:
|
||||
const uchar * src_data;
|
||||
const size_t src_step;
|
||||
uchar * dst_data;
|
||||
const size_t dst_step;
|
||||
const int width;
|
||||
const Cvt& cvt;
|
||||
|
||||
const CvtColorLoop_Invoker& operator= (const CvtColorLoop_Invoker&);
|
||||
};
|
||||
|
||||
template <typename Cvt>
|
||||
void CvtColorLoop(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, const Cvt& cvt)
|
||||
{
|
||||
parallel_for_(Range(0, height),
|
||||
CvtColorLoop_Invoker<Cvt>(src_data, src_step, dst_data, dst_step, width, cvt),
|
||||
(width * height) / static_cast<double>(1<<16));
|
||||
}
|
||||
|
||||
#if defined (HAVE_IPP) && (IPP_VERSION_X100 >= 700)
|
||||
# define NEED_IPP 1
|
||||
#else
|
||||
# define NEED_IPP 0
|
||||
#endif
|
||||
|
||||
#if NEED_IPP
|
||||
|
||||
#define MAX_IPP8u 255
|
||||
#define MAX_IPP16u 65535
|
||||
#define MAX_IPP32f 1.0
|
||||
|
||||
typedef IppStatus (CV_STDCALL* ippiReorderFunc)(const void *, int, void *, int, IppiSize, const int *);
|
||||
typedef IppStatus (CV_STDCALL* ippiGeneralFunc)(const void *, int, void *, int, IppiSize);
|
||||
typedef IppStatus (CV_STDCALL* ippiColor2GrayFunc)(const void *, int, void *, int, IppiSize, const Ipp32f *);
|
||||
|
||||
template <typename Cvt>
|
||||
class CvtColorIPPLoop_Invoker :
|
||||
public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
|
||||
CvtColorIPPLoop_Invoker(const uchar * src_data_, size_t src_step_, uchar * dst_data_, size_t dst_step_, int width_, const Cvt& _cvt, bool *_ok) :
|
||||
ParallelLoopBody(), src_data(src_data_), src_step(src_step_), dst_data(dst_data_), dst_step(dst_step_), width(width_), cvt(_cvt), ok(_ok)
|
||||
{
|
||||
*ok = true;
|
||||
}
|
||||
|
||||
virtual void operator()(const Range& range) const
|
||||
{
|
||||
const void *yS = src_data + src_step * range.start;
|
||||
void *yD = dst_data + dst_step * range.start;
|
||||
if( !cvt(yS, static_cast<int>(src_step), yD, static_cast<int>(dst_step), width, range.end - range.start) )
|
||||
*ok = false;
|
||||
else
|
||||
{
|
||||
CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const uchar * src_data;
|
||||
const size_t src_step;
|
||||
uchar * dst_data;
|
||||
const size_t dst_step;
|
||||
const int width;
|
||||
const Cvt& cvt;
|
||||
bool *ok;
|
||||
|
||||
const CvtColorIPPLoop_Invoker& operator= (const CvtColorIPPLoop_Invoker&);
|
||||
};
|
||||
|
||||
|
||||
template <typename Cvt>
|
||||
bool CvtColorIPPLoop(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, const Cvt& cvt)
|
||||
{
|
||||
bool ok;
|
||||
parallel_for_(Range(0, height), CvtColorIPPLoop_Invoker<Cvt>(src_data, src_step, dst_data, dst_step, width, cvt, &ok), (width * height)/(double)(1<<16) );
|
||||
return ok;
|
||||
}
|
||||
|
||||
|
||||
template <typename Cvt>
|
||||
bool CvtColorIPPLoopCopy(const uchar * src_data, size_t src_step, int src_type, uchar * dst_data, size_t dst_step, int width, int height, const Cvt& cvt)
|
||||
{
|
||||
Mat temp;
|
||||
Mat src(Size(width, height), src_type, const_cast<uchar*>(src_data), src_step);
|
||||
Mat source = src;
|
||||
if( src_data == dst_data )
|
||||
{
|
||||
src.copyTo(temp);
|
||||
source = temp;
|
||||
}
|
||||
bool ok;
|
||||
parallel_for_(Range(0, source.rows),
|
||||
CvtColorIPPLoop_Invoker<Cvt>(source.data, source.step, dst_data, dst_step,
|
||||
source.cols, cvt, &ok),
|
||||
source.total()/(double)(1<<16) );
|
||||
return ok;
|
||||
}
|
||||
|
||||
|
||||
struct IPPGeneralFunctor
|
||||
{
|
||||
IPPGeneralFunctor(ippiGeneralFunc _func) : ippiColorConvertGeneral(_func){}
|
||||
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
|
||||
{
|
||||
return ippiColorConvertGeneral ? CV_INSTRUMENT_FUN_IPP(ippiColorConvertGeneral, src, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0 : false;
|
||||
}
|
||||
private:
|
||||
ippiGeneralFunc ippiColorConvertGeneral;
|
||||
};
|
||||
|
||||
|
||||
struct IPPReorderFunctor
|
||||
{
|
||||
IPPReorderFunctor(ippiReorderFunc _func, int _order0, int _order1, int _order2) : ippiColorConvertReorder(_func)
|
||||
{
|
||||
order[0] = _order0;
|
||||
order[1] = _order1;
|
||||
order[2] = _order2;
|
||||
order[3] = 3;
|
||||
}
|
||||
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
|
||||
{
|
||||
return ippiColorConvertReorder ? CV_INSTRUMENT_FUN_IPP(ippiColorConvertReorder, src, srcStep, dst, dstStep, ippiSize(cols, rows), order) >= 0 : false;
|
||||
}
|
||||
private:
|
||||
ippiReorderFunc ippiColorConvertReorder;
|
||||
int order[4];
|
||||
};
|
||||
|
||||
|
||||
struct IPPReorderGeneralFunctor
|
||||
{
|
||||
IPPReorderGeneralFunctor(ippiReorderFunc _func1, ippiGeneralFunc _func2, int _order0, int _order1, int _order2, int _depth) :
|
||||
ippiColorConvertReorder(_func1), ippiColorConvertGeneral(_func2), depth(_depth)
|
||||
{
|
||||
order[0] = _order0;
|
||||
order[1] = _order1;
|
||||
order[2] = _order2;
|
||||
order[3] = 3;
|
||||
}
|
||||
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
|
||||
{
|
||||
if (ippiColorConvertReorder == 0 || ippiColorConvertGeneral == 0)
|
||||
return false;
|
||||
|
||||
Mat temp;
|
||||
temp.create(rows, cols, CV_MAKETYPE(depth, 3));
|
||||
if(CV_INSTRUMENT_FUN_IPP(ippiColorConvertReorder, src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows), order) < 0)
|
||||
return false;
|
||||
return CV_INSTRUMENT_FUN_IPP(ippiColorConvertGeneral, temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows)) >= 0;
|
||||
}
|
||||
private:
|
||||
ippiReorderFunc ippiColorConvertReorder;
|
||||
ippiGeneralFunc ippiColorConvertGeneral;
|
||||
int order[4];
|
||||
int depth;
|
||||
};
|
||||
|
||||
|
||||
struct IPPGeneralReorderFunctor
|
||||
{
|
||||
IPPGeneralReorderFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _order0, int _order1, int _order2, int _depth) :
|
||||
ippiColorConvertGeneral(_func1), ippiColorConvertReorder(_func2), depth(_depth)
|
||||
{
|
||||
order[0] = _order0;
|
||||
order[1] = _order1;
|
||||
order[2] = _order2;
|
||||
order[3] = 3;
|
||||
}
|
||||
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
|
||||
{
|
||||
if (ippiColorConvertGeneral == 0 || ippiColorConvertReorder == 0)
|
||||
return false;
|
||||
|
||||
Mat temp;
|
||||
temp.create(rows, cols, CV_MAKETYPE(depth, 3));
|
||||
if(CV_INSTRUMENT_FUN_IPP(ippiColorConvertGeneral, src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0)
|
||||
return false;
|
||||
return CV_INSTRUMENT_FUN_IPP(ippiColorConvertReorder, temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
|
||||
}
|
||||
private:
|
||||
ippiGeneralFunc ippiColorConvertGeneral;
|
||||
ippiReorderFunc ippiColorConvertReorder;
|
||||
int order[4];
|
||||
int depth;
|
||||
};
|
||||
|
||||
extern ippiReorderFunc ippiSwapChannelsC3C4RTab[8];
|
||||
extern ippiReorderFunc ippiSwapChannelsC4C3RTab[8];
|
||||
extern ippiReorderFunc ippiSwapChannelsC3RTab[8];
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
bool oclCvtColorBGR2Luv( InputArray _src, OutputArray _dst, int bidx, bool srgb );
|
||||
bool oclCvtColorBGR2Lab( InputArray _src, OutputArray _dst, int bidx, bool srgb );
|
||||
bool oclCvtColorLab2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, bool srgb);
|
||||
bool oclCvtColorLuv2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, bool srgb);
|
||||
bool oclCvtColorBGR2XYZ( InputArray _src, OutputArray _dst, int bidx );
|
||||
bool oclCvtColorXYZ2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx );
|
||||
|
||||
bool oclCvtColorHSV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, bool full );
|
||||
bool oclCvtColorHLS2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, bool full );
|
||||
bool oclCvtColorBGR2HLS( InputArray _src, OutputArray _dst, int bidx, bool full );
|
||||
bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full );
|
||||
|
||||
bool oclCvtColorBGR2BGR( InputArray _src, OutputArray _dst, int dcn, bool reverse );
|
||||
bool oclCvtColorBGR25x5( InputArray _src, OutputArray _dst, int bidx, int gbits );
|
||||
bool oclCvtColor5x52BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int gbits );
|
||||
bool oclCvtColor5x52Gray( InputArray _src, OutputArray _dst, int gbits );
|
||||
bool oclCvtColorGray25x5( InputArray _src, OutputArray _dst, int gbits );
|
||||
bool oclCvtColorBGR2Gray( InputArray _src, OutputArray _dst, int bidx );
|
||||
bool oclCvtColorGray2BGR( InputArray _src, OutputArray _dst, int dcn );
|
||||
bool oclCvtColorRGBA2mRGBA( InputArray _src, OutputArray _dst );
|
||||
bool oclCvtColormRGBA2RGBA( InputArray _src, OutputArray _dst );
|
||||
|
||||
bool oclCvtColorBGR2YCrCb( InputArray _src, OutputArray _dst, int bidx);
|
||||
bool oclCvtcolorYCrCb2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx);
|
||||
bool oclCvtColorBGR2YUV( InputArray _src, OutputArray _dst, int bidx );
|
||||
bool oclCvtColorYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx );
|
||||
|
||||
bool oclCvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int uidx, int yidx );
|
||||
bool oclCvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int uidx );
|
||||
bool oclCvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int uidx );
|
||||
bool oclCvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, int bidx, int uidx );
|
||||
bool oclCvtColorYUV2Gray_420( InputArray _src, OutputArray _dst );
|
||||
|
||||
#endif
|
||||
|
||||
void cvtColorBGR2Lab( InputArray _src, OutputArray _dst, bool swapb, bool srgb);
|
||||
void cvtColorBGR2Luv( InputArray _src, OutputArray _dst, bool swapb, bool srgb);
|
||||
void cvtColorLab2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool srgb );
|
||||
void cvtColorLuv2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool srgb );
|
||||
void cvtColorBGR2XYZ( InputArray _src, OutputArray _dst, bool swapb );
|
||||
void cvtColorXYZ2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb );
|
||||
|
||||
void cvtColorBGR2YUV( InputArray _src, OutputArray _dst, bool swapb, bool crcb);
|
||||
void cvtColorYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool crcb);
|
||||
|
||||
void cvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx, int ycn);
|
||||
void cvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx );
|
||||
void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, int dcn, bool swapb, int uidx );
|
||||
void cvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx );
|
||||
void cvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, bool swapb, int uidx);
|
||||
void cvtColorYUV2Gray_420( InputArray _src, OutputArray _dst );
|
||||
void cvtColorYUV2Gray_ch( InputArray _src, OutputArray _dst, int coi );
|
||||
|
||||
void cvtColorBGR2HLS( InputArray _src, OutputArray _dst, bool swapb, bool fullRange );
|
||||
void cvtColorBGR2HSV( InputArray _src, OutputArray _dst, bool swapb, bool fullRange );
|
||||
void cvtColorHLS2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool fullRange);
|
||||
void cvtColorHSV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool fullRange);
|
||||
|
||||
void cvtColorBGR2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb);
|
||||
void cvtColorBGR25x5( InputArray _src, OutputArray _dst, bool swapb, int gbits);
|
||||
void cvtColor5x52BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int gbits);
|
||||
void cvtColorBGR2Gray( InputArray _src, OutputArray _dst, bool swapb);
|
||||
void cvtColorGray2BGR( InputArray _src, OutputArray _dst, int dcn);
|
||||
void cvtColor5x52Gray( InputArray _src, OutputArray _dst, int gbits);
|
||||
void cvtColorGray25x5( InputArray _src, OutputArray _dst, int gbits);
|
||||
void cvtColorRGBA2mRGBA(InputArray _src, OutputArray _dst);
|
||||
void cvtColormRGBA2RGBA(InputArray _src, OutputArray _dst);
|
||||
|
||||
} //namespace cv
|
1719
modules/imgproc/src/color_hsv.cpp
Normal file
1719
modules/imgproc/src/color_hsv.cpp
Normal file
File diff suppressed because it is too large
Load Diff
4818
modules/imgproc/src/color_lab.cpp
Normal file
4818
modules/imgproc/src/color_lab.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1802
modules/imgproc/src/color_rgb.cpp
Normal file
1802
modules/imgproc/src/color_rgb.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2708
modules/imgproc/src/color_yuv.cpp
Normal file
2708
modules/imgproc/src/color_yuv.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -41,6 +41,50 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
/********************************* COPYRIGHT NOTICE *******************************\
|
||||
Original code for Bayer->BGR/RGB conversion is provided by Dirk Schaefer
|
||||
from MD-Mathematische Dienste GmbH. Below is the copyright notice:
|
||||
|
||||
IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
By downloading, copying, installing or using the software you agree
|
||||
to this license. If you do not agree to this license, do not download,
|
||||
install, copy or use the software.
|
||||
|
||||
Contributors License Agreement:
|
||||
|
||||
Copyright (c) 2002,
|
||||
MD-Mathematische Dienste GmbH
|
||||
Im Defdahl 5-10
|
||||
44141 Dortmund
|
||||
Germany
|
||||
www.md-it.de
|
||||
|
||||
Redistribution and use in source and binary forms,
|
||||
with or without modification, are permitted provided
|
||||
that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain
|
||||
the above copyright notice, this list of conditions and the following disclaimer.
|
||||
Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
The name of Contributor may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
\**********************************************************************************/
|
||||
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include <limits>
|
||||
|
621
modules/imgproc/src/opencl/color_hsv.cl
Normal file
621
modules/imgproc/src/opencl/color_hsv.cl
Normal file
@ -0,0 +1,621 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/**************************************PUBLICFUNC*************************************/
|
||||
|
||||
#if depth == 0
|
||||
#define DATA_TYPE uchar
|
||||
#define MAX_NUM 255
|
||||
#define HALF_MAX_NUM 128
|
||||
#define COEFF_TYPE int
|
||||
#define SAT_CAST(num) convert_uchar_sat(num)
|
||||
#define DEPTH_0
|
||||
#elif depth == 2
|
||||
#define DATA_TYPE ushort
|
||||
#define MAX_NUM 65535
|
||||
#define HALF_MAX_NUM 32768
|
||||
#define COEFF_TYPE int
|
||||
#define SAT_CAST(num) convert_ushort_sat(num)
|
||||
#define DEPTH_2
|
||||
#elif depth == 5
|
||||
#define DATA_TYPE float
|
||||
#define MAX_NUM 1.0f
|
||||
#define HALF_MAX_NUM 0.5f
|
||||
#define COEFF_TYPE float
|
||||
#define SAT_CAST(num) (num)
|
||||
#define DEPTH_5
|
||||
#else
|
||||
#error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
|
||||
#endif
|
||||
|
||||
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
|
||||
enum
|
||||
{
|
||||
hsv_shift = 12
|
||||
};
|
||||
|
||||
#define scnbytes ((int)sizeof(DATA_TYPE)*scn)
|
||||
#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn)
|
||||
|
||||
#ifndef hscale
|
||||
#define hscale 0
|
||||
#endif
|
||||
|
||||
#ifndef hrange
|
||||
#define hrange 0
|
||||
#endif
|
||||
|
||||
#if bidx == 0
|
||||
#define R_COMP z
|
||||
#define G_COMP y
|
||||
#define B_COMP x
|
||||
#else
|
||||
#define R_COMP x
|
||||
#define G_COMP y
|
||||
#define B_COMP z
|
||||
#endif
|
||||
|
||||
//////////////////////////////////// RGB <-> HSV //////////////////////////////////////
|
||||
|
||||
__constant int sector_data[][3] = { { 1, 3, 0 },
|
||||
{ 1, 0, 2 },
|
||||
{ 3, 0, 1 },
|
||||
{ 0, 2, 1 },
|
||||
{ 0, 1, 3 },
|
||||
{ 2, 1, 0 } };
|
||||
|
||||
#ifdef DEPTH_0
|
||||
|
||||
__kernel void RGB2HSV(__global const uchar* src, int src_step, int src_offset,
|
||||
__global uchar* dst, int dst_step, int dst_offset,
|
||||
int rows, int cols,
|
||||
__constant int * sdiv_table, __constant int * hdiv_table)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
uchar4 src_pix = vload4(0, src + src_index);
|
||||
|
||||
int b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
|
||||
int h, s, v = b;
|
||||
int vmin = b, diff;
|
||||
int vr, vg;
|
||||
|
||||
v = max(v, g);
|
||||
v = max(v, r);
|
||||
vmin = min(vmin, g);
|
||||
vmin = min(vmin, r);
|
||||
|
||||
diff = v - vmin;
|
||||
vr = v == r ? -1 : 0;
|
||||
vg = v == g ? -1 : 0;
|
||||
|
||||
s = mad24(diff, sdiv_table[v], (1 << (hsv_shift-1))) >> hsv_shift;
|
||||
h = (vr & (g - b)) +
|
||||
(~vr & ((vg & mad24(diff, 2, b - r)) + ((~vg) & mad24(4, diff, r - g))));
|
||||
h = mad24(h, hdiv_table[diff], (1 << (hsv_shift-1))) >> hsv_shift;
|
||||
h += h < 0 ? hrange : 0;
|
||||
|
||||
dst[dst_index] = convert_uchar_sat_rte(h);
|
||||
dst[dst_index + 1] = (uchar)s;
|
||||
dst[dst_index + 2] = (uchar)v;
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset,
|
||||
__global uchar* dst, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
uchar4 src_pix = vload4(0, src + src_index);
|
||||
|
||||
float h = src_pix.x, s = src_pix.y*(1/255.f), v = src_pix.z*(1/255.f);
|
||||
float b, g, r;
|
||||
|
||||
if (s != 0)
|
||||
{
|
||||
float tab[4];
|
||||
int sector;
|
||||
h *= hscale;
|
||||
if( h < 0 )
|
||||
do h += 6; while( h < 0 );
|
||||
else if( h >= 6 )
|
||||
do h -= 6; while( h >= 6 );
|
||||
sector = convert_int_sat_rtn(h);
|
||||
h -= sector;
|
||||
if( (unsigned)sector >= 6u )
|
||||
{
|
||||
sector = 0;
|
||||
h = 0.f;
|
||||
}
|
||||
|
||||
tab[0] = v;
|
||||
tab[1] = v*(1.f - s);
|
||||
tab[2] = v*(1.f - s*h);
|
||||
tab[3] = v*(1.f - s*(1.f - h));
|
||||
|
||||
b = tab[sector_data[sector][0]];
|
||||
g = tab[sector_data[sector][1]];
|
||||
r = tab[sector_data[sector][2]];
|
||||
}
|
||||
else
|
||||
b = g = r = v;
|
||||
|
||||
dst[dst_index + bidx] = convert_uchar_sat_rte(b*255.f);
|
||||
dst[dst_index + 1] = convert_uchar_sat_rte(g*255.f);
|
||||
dst[dst_index + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
|
||||
#if dcn == 4
|
||||
dst[dst_index + 3] = MAX_NUM;
|
||||
#endif
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined DEPTH_5
|
||||
|
||||
__kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const float * src = (__global const float *)(srcptr + src_index);
|
||||
__global float * dst = (__global float *)(dstptr + dst_index);
|
||||
float4 src_pix = vload4(0, src);
|
||||
|
||||
float b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
|
||||
float h, s, v;
|
||||
|
||||
float vmin, diff;
|
||||
|
||||
v = vmin = r;
|
||||
if( v < g ) v = g;
|
||||
if( v < b ) v = b;
|
||||
if( vmin > g ) vmin = g;
|
||||
if( vmin > b ) vmin = b;
|
||||
|
||||
diff = v - vmin;
|
||||
s = diff/(float)(fabs(v) + FLT_EPSILON);
|
||||
diff = (float)(60.f/(diff + FLT_EPSILON));
|
||||
if( v == r )
|
||||
h = (g - b)*diff;
|
||||
else if( v == g )
|
||||
h = fma(b - r, diff, 120.f);
|
||||
else
|
||||
h = fma(r - g, diff, 240.f);
|
||||
|
||||
if( h < 0 )
|
||||
h += 360.f;
|
||||
|
||||
dst[0] = h*hscale;
|
||||
dst[1] = s;
|
||||
dst[2] = v;
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
|
||||
__global const float * src = (__global const float *)(srcptr + src_index);
|
||||
__global float * dst = (__global float *)(dstptr + dst_index);
|
||||
float4 src_pix = vload4(0, src);
|
||||
|
||||
float h = src_pix.x, s = src_pix.y, v = src_pix.z;
|
||||
float b, g, r;
|
||||
|
||||
if (s != 0)
|
||||
{
|
||||
float tab[4];
|
||||
int sector;
|
||||
h *= hscale;
|
||||
if(h < 0)
|
||||
do h += 6; while (h < 0);
|
||||
else if (h >= 6)
|
||||
do h -= 6; while (h >= 6);
|
||||
sector = convert_int_sat_rtn(h);
|
||||
h -= sector;
|
||||
if ((unsigned)sector >= 6u)
|
||||
{
|
||||
sector = 0;
|
||||
h = 0.f;
|
||||
}
|
||||
|
||||
tab[0] = v;
|
||||
tab[1] = v*(1.f - s);
|
||||
tab[2] = v*(1.f - s*h);
|
||||
tab[3] = v*(1.f - s*(1.f - h));
|
||||
|
||||
b = tab[sector_data[sector][0]];
|
||||
g = tab[sector_data[sector][1]];
|
||||
r = tab[sector_data[sector][2]];
|
||||
}
|
||||
else
|
||||
b = g = r = v;
|
||||
|
||||
dst[bidx] = b;
|
||||
dst[1] = g;
|
||||
dst[bidx^2] = r;
|
||||
#if dcn == 4
|
||||
dst[3] = MAX_NUM;
|
||||
#endif
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
///////////////////////////////////// RGB <-> HLS //////////////////////////////////////
|
||||
|
||||
#ifdef DEPTH_0
|
||||
|
||||
__kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset,
|
||||
__global uchar* dst, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
uchar4 src_pix = vload4(0, src + src_index);
|
||||
|
||||
float b = src_pix.B_COMP*(1/255.f), g = src_pix.G_COMP*(1/255.f), r = src_pix.R_COMP*(1/255.f);
|
||||
float h = 0.f, s = 0.f, l;
|
||||
float vmin, vmax, diff;
|
||||
|
||||
vmax = vmin = r;
|
||||
if (vmax < g) vmax = g;
|
||||
if (vmax < b) vmax = b;
|
||||
if (vmin > g) vmin = g;
|
||||
if (vmin > b) vmin = b;
|
||||
|
||||
diff = vmax - vmin;
|
||||
l = (vmax + vmin)*0.5f;
|
||||
|
||||
if (diff > FLT_EPSILON)
|
||||
{
|
||||
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
|
||||
diff = 60.f/diff;
|
||||
|
||||
if( vmax == r )
|
||||
h = (g - b)*diff;
|
||||
else if( vmax == g )
|
||||
h = fma(b - r, diff, 120.f);
|
||||
else
|
||||
h = fma(r - g, diff, 240.f);
|
||||
|
||||
if( h < 0.f )
|
||||
h += 360.f;
|
||||
}
|
||||
|
||||
dst[dst_index] = convert_uchar_sat_rte(h*hscale);
|
||||
dst[dst_index + 1] = convert_uchar_sat_rte(l*255.f);
|
||||
dst[dst_index + 2] = convert_uchar_sat_rte(s*255.f);
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
|
||||
__global uchar* dst, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
uchar4 src_pix = vload4(0, src + src_index);
|
||||
|
||||
float h = src_pix.x, l = src_pix.y*(1.f/255.f), s = src_pix.z*(1.f/255.f);
|
||||
float b, g, r;
|
||||
|
||||
if (s != 0)
|
||||
{
|
||||
float tab[4];
|
||||
|
||||
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
|
||||
float p1 = 2*l - p2;
|
||||
|
||||
h *= hscale;
|
||||
if( h < 0 )
|
||||
do h += 6; while( h < 0 );
|
||||
else if( h >= 6 )
|
||||
do h -= 6; while( h >= 6 );
|
||||
|
||||
int sector = convert_int_sat_rtn(h);
|
||||
h -= sector;
|
||||
|
||||
tab[0] = p2;
|
||||
tab[1] = p1;
|
||||
tab[2] = fma(p2 - p1, 1-h, p1);
|
||||
tab[3] = fma(p2 - p1, h, p1);
|
||||
|
||||
b = tab[sector_data[sector][0]];
|
||||
g = tab[sector_data[sector][1]];
|
||||
r = tab[sector_data[sector][2]];
|
||||
}
|
||||
else
|
||||
b = g = r = l;
|
||||
|
||||
dst[dst_index + bidx] = convert_uchar_sat_rte(b*255.f);
|
||||
dst[dst_index + 1] = convert_uchar_sat_rte(g*255.f);
|
||||
dst[dst_index + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
|
||||
#if dcn == 4
|
||||
dst[dst_index + 3] = MAX_NUM;
|
||||
#endif
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined DEPTH_5
|
||||
|
||||
__kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const float * src = (__global const float *)(srcptr + src_index);
|
||||
__global float * dst = (__global float *)(dstptr + dst_index);
|
||||
float4 src_pix = vload4(0, src);
|
||||
|
||||
float b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
|
||||
float h = 0.f, s = 0.f, l;
|
||||
float vmin, vmax, diff;
|
||||
|
||||
vmax = vmin = r;
|
||||
if (vmax < g) vmax = g;
|
||||
if (vmax < b) vmax = b;
|
||||
if (vmin > g) vmin = g;
|
||||
if (vmin > b) vmin = b;
|
||||
|
||||
diff = vmax - vmin;
|
||||
l = (vmax + vmin)*0.5f;
|
||||
|
||||
if (diff > FLT_EPSILON)
|
||||
{
|
||||
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
|
||||
diff = 60.f/diff;
|
||||
|
||||
if( vmax == r )
|
||||
h = (g - b)*diff;
|
||||
else if( vmax == g )
|
||||
h = fma(b - r, diff, 120.f);
|
||||
else
|
||||
h = fma(r - g, diff, 240.f);
|
||||
|
||||
if( h < 0.f ) h += 360.f;
|
||||
}
|
||||
|
||||
dst[0] = h*hscale;
|
||||
dst[1] = l;
|
||||
dst[2] = s;
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const float * src = (__global const float *)(srcptr + src_index);
|
||||
__global float * dst = (__global float *)(dstptr + dst_index);
|
||||
float4 src_pix = vload4(0, src);
|
||||
|
||||
float h = src_pix.x, l = src_pix.y, s = src_pix.z;
|
||||
float b, g, r;
|
||||
|
||||
if (s != 0)
|
||||
{
|
||||
float tab[4];
|
||||
int sector;
|
||||
|
||||
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
|
||||
float p1 = 2*l - p2;
|
||||
|
||||
h *= hscale;
|
||||
if( h < 0 )
|
||||
do h += 6; while( h < 0 );
|
||||
else if( h >= 6 )
|
||||
do h -= 6; while( h >= 6 );
|
||||
|
||||
sector = convert_int_sat_rtn(h);
|
||||
h -= sector;
|
||||
|
||||
tab[0] = p2;
|
||||
tab[1] = p1;
|
||||
tab[2] = fma(p2 - p1, 1-h, p1);
|
||||
tab[3] = fma(p2 - p1, h, p1);
|
||||
|
||||
b = tab[sector_data[sector][0]];
|
||||
g = tab[sector_data[sector][1]];
|
||||
r = tab[sector_data[sector][2]];
|
||||
}
|
||||
else
|
||||
b = g = r = l;
|
||||
|
||||
dst[bidx] = b;
|
||||
dst[1] = g;
|
||||
dst[bidx^2] = r;
|
||||
#if dcn == 4
|
||||
dst[3] = MAX_NUM;
|
||||
#endif
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
735
modules/imgproc/src/opencl/color_lab.cl
Normal file
735
modules/imgproc/src/opencl/color_lab.cl
Normal file
@ -0,0 +1,735 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if depth == 0
|
||||
#define DATA_TYPE uchar
|
||||
#define MAX_NUM 255
|
||||
#define HALF_MAX_NUM 128
|
||||
#define COEFF_TYPE int
|
||||
#define SAT_CAST(num) convert_uchar_sat(num)
|
||||
#define DEPTH_0
|
||||
#elif depth == 2
|
||||
#define DATA_TYPE ushort
|
||||
#define MAX_NUM 65535
|
||||
#define HALF_MAX_NUM 32768
|
||||
#define COEFF_TYPE int
|
||||
#define SAT_CAST(num) convert_ushort_sat(num)
|
||||
#define DEPTH_2
|
||||
#elif depth == 5
|
||||
#define DATA_TYPE float
|
||||
#define MAX_NUM 1.0f
|
||||
#define HALF_MAX_NUM 0.5f
|
||||
#define COEFF_TYPE float
|
||||
#define SAT_CAST(num) (num)
|
||||
#define DEPTH_5
|
||||
#else
|
||||
#error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
|
||||
#endif
|
||||
|
||||
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
|
||||
enum
|
||||
{
|
||||
xyz_shift = 12,
|
||||
};
|
||||
|
||||
#define scnbytes ((int)sizeof(DATA_TYPE)*scn)
|
||||
#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn)
|
||||
|
||||
#define __CAT(x, y) x##y
|
||||
#define CAT(x, y) __CAT(x, y)
|
||||
|
||||
#define DATA_TYPE_4 CAT(DATA_TYPE, 4)
|
||||
#define DATA_TYPE_3 CAT(DATA_TYPE, 3)
|
||||
|
||||
///////////////////////////////////// RGB <-> XYZ //////////////////////////////////////
|
||||
|
||||
__kernel void RGB2XYZ(__global const uchar * srcptr, int src_step, int src_offset,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset,
|
||||
int rows, int cols, __constant COEFF_TYPE * coeffs)
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (dx < cols)
|
||||
{
|
||||
int src_index = mad24(dy, src_step, mad24(dx, scnbytes, src_offset));
|
||||
int dst_index = mad24(dy, dst_step, mad24(dx, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (dy < rows)
|
||||
{
|
||||
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index);
|
||||
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index);
|
||||
|
||||
DATA_TYPE_4 src_pix = vload4(0, src);
|
||||
DATA_TYPE r = src_pix.x, g = src_pix.y, b = src_pix.z;
|
||||
|
||||
#ifdef DEPTH_5
|
||||
float x = fma(r, coeffs[0], fma(g, coeffs[1], b * coeffs[2]));
|
||||
float y = fma(r, coeffs[3], fma(g, coeffs[4], b * coeffs[5]));
|
||||
float z = fma(r, coeffs[6], fma(g, coeffs[7], b * coeffs[8]));
|
||||
#else
|
||||
int x = CV_DESCALE(mad24(r, coeffs[0], mad24(g, coeffs[1], b * coeffs[2])), xyz_shift);
|
||||
int y = CV_DESCALE(mad24(r, coeffs[3], mad24(g, coeffs[4], b * coeffs[5])), xyz_shift);
|
||||
int z = CV_DESCALE(mad24(r, coeffs[6], mad24(g, coeffs[7], b * coeffs[8])), xyz_shift);
|
||||
#endif
|
||||
dst[0] = SAT_CAST(x);
|
||||
dst[1] = SAT_CAST(y);
|
||||
dst[2] = SAT_CAST(z);
|
||||
|
||||
++dy;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offset,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset,
|
||||
int rows, int cols, __constant COEFF_TYPE * coeffs)
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (dx < cols)
|
||||
{
|
||||
int src_index = mad24(dy, src_step, mad24(dx, scnbytes, src_offset));
|
||||
int dst_index = mad24(dy, dst_step, mad24(dx, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (dy < rows)
|
||||
{
|
||||
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index);
|
||||
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index);
|
||||
|
||||
DATA_TYPE_4 src_pix = vload4(0, src);
|
||||
DATA_TYPE x = src_pix.x, y = src_pix.y, z = src_pix.z;
|
||||
|
||||
#ifdef DEPTH_5
|
||||
float b = fma(x, coeffs[0], fma(y, coeffs[1], z * coeffs[2]));
|
||||
float g = fma(x, coeffs[3], fma(y, coeffs[4], z * coeffs[5]));
|
||||
float r = fma(x, coeffs[6], fma(y, coeffs[7], z * coeffs[8]));
|
||||
#else
|
||||
int b = CV_DESCALE(mad24(x, coeffs[0], mad24(y, coeffs[1], z * coeffs[2])), xyz_shift);
|
||||
int g = CV_DESCALE(mad24(x, coeffs[3], mad24(y, coeffs[4], z * coeffs[5])), xyz_shift);
|
||||
int r = CV_DESCALE(mad24(x, coeffs[6], mad24(y, coeffs[7], z * coeffs[8])), xyz_shift);
|
||||
#endif
|
||||
|
||||
DATA_TYPE dst0 = SAT_CAST(b);
|
||||
DATA_TYPE dst1 = SAT_CAST(g);
|
||||
DATA_TYPE dst2 = SAT_CAST(r);
|
||||
#if dcn == 3 || defined DEPTH_5
|
||||
dst[0] = dst0;
|
||||
dst[1] = dst1;
|
||||
dst[2] = dst2;
|
||||
#if dcn == 4
|
||||
dst[3] = MAX_NUM;
|
||||
#endif
|
||||
#else
|
||||
*(__global DATA_TYPE_4 *)dst = (DATA_TYPE_4)(dst0, dst1, dst2, MAX_NUM);
|
||||
#endif
|
||||
|
||||
++dy;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////// [l|s]RGB <-> Lab ///////////////////////////
|
||||
|
||||
#define lab_shift xyz_shift
|
||||
#define gamma_shift 3
|
||||
#define lab_shift2 (lab_shift + gamma_shift)
|
||||
#define GAMMA_TAB_SIZE 1024
|
||||
#define GammaTabScale (float)GAMMA_TAB_SIZE
|
||||
|
||||
inline float splineInterpolate(float x, __global const float * tab, int n)
|
||||
{
|
||||
int ix = clamp(convert_int_sat_rtn(x), 0, n-1);
|
||||
x -= ix;
|
||||
tab += ix << 2;
|
||||
return fma(fma(fma(tab[3], x, tab[2]), x, tab[1]), x, tab[0]);
|
||||
}
|
||||
|
||||
#ifdef DEPTH_0
|
||||
|
||||
__kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int rows, int cols,
|
||||
__global const ushort * gammaTab, __global ushort * LabCbrtTab_b,
|
||||
__constant int * coeffs, int Lscale, int Lshift)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const uchar* src_ptr = src + src_index;
|
||||
__global uchar* dst_ptr = dst + dst_index;
|
||||
uchar4 src_pix = vload4(0, src_ptr);
|
||||
|
||||
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
|
||||
C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
|
||||
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
|
||||
|
||||
int R = gammaTab[src_pix.x], G = gammaTab[src_pix.y], B = gammaTab[src_pix.z];
|
||||
int fX = LabCbrtTab_b[CV_DESCALE(mad24(R, C0, mad24(G, C1, B*C2)), lab_shift)];
|
||||
int fY = LabCbrtTab_b[CV_DESCALE(mad24(R, C3, mad24(G, C4, B*C5)), lab_shift)];
|
||||
int fZ = LabCbrtTab_b[CV_DESCALE(mad24(R, C6, mad24(G, C7, B*C8)), lab_shift)];
|
||||
|
||||
int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 );
|
||||
int a = CV_DESCALE( mad24(500, fX - fY, 128*(1 << lab_shift2)), lab_shift2 );
|
||||
int b = CV_DESCALE( mad24(200, fY - fZ, 128*(1 << lab_shift2)), lab_shift2 );
|
||||
|
||||
dst_ptr[0] = SAT_CAST(L);
|
||||
dst_ptr[1] = SAT_CAST(a);
|
||||
dst_ptr[2] = SAT_CAST(b);
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined DEPTH_5
|
||||
|
||||
__kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offset,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols,
|
||||
#ifdef SRGB
|
||||
__global const float * gammaTab,
|
||||
#endif
|
||||
__constant float * coeffs, float _1_3, float _a)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const float * src = (__global const float *)(srcptr + src_index);
|
||||
__global float * dst = (__global float *)(dstptr + dst_index);
|
||||
float4 src_pix = vload4(0, src);
|
||||
|
||||
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
|
||||
C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
|
||||
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
|
||||
|
||||
float R = clamp(src_pix.x, 0.0f, 1.0f);
|
||||
float G = clamp(src_pix.y, 0.0f, 1.0f);
|
||||
float B = clamp(src_pix.z, 0.0f, 1.0f);
|
||||
|
||||
#ifdef SRGB
|
||||
R = splineInterpolate(R * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
G = splineInterpolate(G * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
B = splineInterpolate(B * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
#endif
|
||||
|
||||
// 7.787f = (29/3)^3/(29*4), 0.008856f = (6/29)^3, 903.3 = (29/3)^3
|
||||
float X = fma(R, C0, fma(G, C1, B*C2));
|
||||
float Y = fma(R, C3, fma(G, C4, B*C5));
|
||||
float Z = fma(R, C6, fma(G, C7, B*C8));
|
||||
|
||||
float FX = X > 0.008856f ? rootn(X, 3) : fma(7.787f, X, _a);
|
||||
float FY = Y > 0.008856f ? rootn(Y, 3) : fma(7.787f, Y, _a);
|
||||
float FZ = Z > 0.008856f ? rootn(Z, 3) : fma(7.787f, Z, _a);
|
||||
|
||||
float L = Y > 0.008856f ? fma(116.f, FY, -16.f) : (903.3f * Y);
|
||||
float a = 500.f * (FX - FY);
|
||||
float b = 200.f * (FY - FZ);
|
||||
|
||||
dst[0] = L;
|
||||
dst[1] = a;
|
||||
dst[2] = b;
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
inline void Lab2BGR_f(const float * srcbuf, float * dstbuf,
|
||||
#ifdef SRGB
|
||||
__global const float * gammaTab,
|
||||
#endif
|
||||
__constant float * coeffs, float lThresh, float fThresh)
|
||||
{
|
||||
float li = srcbuf[0], ai = srcbuf[1], bi = srcbuf[2];
|
||||
|
||||
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
|
||||
C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
|
||||
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
|
||||
|
||||
float y, fy;
|
||||
// 903.3 = (29/3)^3, 7.787 = (29/3)^3/(29*4)
|
||||
if (li <= lThresh)
|
||||
{
|
||||
y = li / 903.3f;
|
||||
fy = fma(7.787f, y, 16.0f / 116.0f);
|
||||
}
|
||||
else
|
||||
{
|
||||
fy = (li + 16.0f) / 116.0f;
|
||||
y = fy * fy * fy;
|
||||
}
|
||||
|
||||
float fxz[] = { ai / 500.0f + fy, fy - bi / 200.0f };
|
||||
|
||||
#pragma unroll
|
||||
for (int j = 0; j < 2; j++)
|
||||
if (fxz[j] <= fThresh)
|
||||
fxz[j] = (fxz[j] - 16.0f / 116.0f) / 7.787f;
|
||||
else
|
||||
fxz[j] = fxz[j] * fxz[j] * fxz[j];
|
||||
|
||||
float x = fxz[0], z = fxz[1];
|
||||
float ro = clamp(fma(C0, x, fma(C1, y, C2 * z)), 0.0f, 1.0f);
|
||||
float go = clamp(fma(C3, x, fma(C4, y, C5 * z)), 0.0f, 1.0f);
|
||||
float bo = clamp(fma(C6, x, fma(C7, y, C8 * z)), 0.0f, 1.0f);
|
||||
|
||||
#ifdef SRGB
|
||||
ro = splineInterpolate(ro * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
go = splineInterpolate(go * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
bo = splineInterpolate(bo * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
#endif
|
||||
|
||||
dstbuf[0] = ro, dstbuf[1] = go, dstbuf[2] = bo;
|
||||
}
|
||||
|
||||
#ifdef DEPTH_0
|
||||
|
||||
__kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int rows, int cols,
|
||||
#ifdef SRGB
|
||||
__global const float * gammaTab,
|
||||
#endif
|
||||
__constant float * coeffs, float lThresh, float fThresh)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const uchar* src_ptr = src + src_index;
|
||||
__global uchar * dst_ptr = dst + dst_index;
|
||||
uchar4 src_pix = vload4(0, src_ptr);
|
||||
|
||||
float srcbuf[3], dstbuf[3];
|
||||
srcbuf[0] = src_pix.x*(100.f/255.f);
|
||||
srcbuf[1] = convert_float(src_pix.y - 128);
|
||||
srcbuf[2] = convert_float(src_pix.z - 128);
|
||||
|
||||
Lab2BGR_f(&srcbuf[0], &dstbuf[0],
|
||||
#ifdef SRGB
|
||||
gammaTab,
|
||||
#endif
|
||||
coeffs, lThresh, fThresh);
|
||||
|
||||
#if dcn == 3
|
||||
dst_ptr[0] = SAT_CAST(dstbuf[0] * 255.0f);
|
||||
dst_ptr[1] = SAT_CAST(dstbuf[1] * 255.0f);
|
||||
dst_ptr[2] = SAT_CAST(dstbuf[2] * 255.0f);
|
||||
#else
|
||||
*(__global uchar4 *)dst_ptr = (uchar4)(SAT_CAST(dstbuf[0] * 255.0f),
|
||||
SAT_CAST(dstbuf[1] * 255.0f), SAT_CAST(dstbuf[2] * 255.0f), MAX_NUM);
|
||||
#endif
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined DEPTH_5
|
||||
|
||||
__kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offset,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols,
|
||||
#ifdef SRGB
|
||||
__global const float * gammaTab,
|
||||
#endif
|
||||
__constant float * coeffs, float lThresh, float fThresh)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const float * src = (__global const float *)(srcptr + src_index);
|
||||
__global float * dst = (__global float *)(dstptr + dst_index);
|
||||
float4 src_pix = vload4(0, src);
|
||||
|
||||
float srcbuf[3], dstbuf[3];
|
||||
srcbuf[0] = src_pix.x, srcbuf[1] = src_pix.y, srcbuf[2] = src_pix.z;
|
||||
|
||||
Lab2BGR_f(&srcbuf[0], &dstbuf[0],
|
||||
#ifdef SRGB
|
||||
gammaTab,
|
||||
#endif
|
||||
coeffs, lThresh, fThresh);
|
||||
|
||||
dst[0] = dstbuf[0], dst[1] = dstbuf[1], dst[2] = dstbuf[2];
|
||||
#if dcn == 4
|
||||
dst[3] = MAX_NUM;
|
||||
#endif
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/////////////////////////////////// [l|s]RGB <-> Luv ///////////////////////////
|
||||
|
||||
#define LAB_CBRT_TAB_SIZE 1024
|
||||
#define LAB_CBRT_TAB_SIZE_B (256*3/2*(1<<gamma_shift))
|
||||
|
||||
__constant float LabCbrtTabScale = LAB_CBRT_TAB_SIZE/1.5f;
|
||||
|
||||
#ifdef DEPTH_5
|
||||
|
||||
__kernel void BGR2Luv(__global const uchar * srcptr, int src_step, int src_offset,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols,
|
||||
#ifdef SRGB
|
||||
__global const float * gammaTab,
|
||||
#endif
|
||||
__global const float * LabCbrtTab, __constant float * coeffs, float _un, float _vn)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
if (y < rows)
|
||||
{
|
||||
__global const float * src = (__global const float *)(srcptr + src_index);
|
||||
__global float * dst = (__global float *)(dstptr + dst_index);
|
||||
|
||||
float R = src[0], G = src[1], B = src[2];
|
||||
|
||||
R = clamp(R, 0.f, 1.f);
|
||||
G = clamp(G, 0.f, 1.f);
|
||||
B = clamp(B, 0.f, 1.f);
|
||||
|
||||
#ifdef SRGB
|
||||
R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
B = splineInterpolate(B*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
#endif
|
||||
float X = fma(R, coeffs[0], fma(G, coeffs[1], B*coeffs[2]));
|
||||
float Y = fma(R, coeffs[3], fma(G, coeffs[4], B*coeffs[5]));
|
||||
float Z = fma(R, coeffs[6], fma(G, coeffs[7], B*coeffs[8]));
|
||||
|
||||
float L = splineInterpolate(Y*LabCbrtTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE);
|
||||
L = fma(116.f, L, -16.f);
|
||||
|
||||
float d = 52.0f / fmax(fma(15.0f, Y, fma(3.0f, Z, X)), FLT_EPSILON);
|
||||
float u = L*fma(X, d, -_un);
|
||||
float v = L*fma(2.25f, Y*d, -_vn);
|
||||
|
||||
dst[0] = L;
|
||||
dst[1] = u;
|
||||
dst[2] = v;
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined DEPTH_0
|
||||
|
||||
__kernel void BGR2Luv(__global const uchar * src, int src_step, int src_offset,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int rows, int cols,
|
||||
#ifdef SRGB
|
||||
__global const float * gammaTab,
|
||||
#endif
|
||||
__global const float * LabCbrtTab, __constant float * coeffs, float _un, float _vn)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
src += mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
dst += mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
if (y < rows)
|
||||
{
|
||||
float scale = 1.0f / 255.0f;
|
||||
float R = src[0]*scale, G = src[1]*scale, B = src[2]*scale;
|
||||
|
||||
#ifdef SRGB
|
||||
R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
B = splineInterpolate(B*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
#endif
|
||||
float X = fma(R, coeffs[0], fma(G, coeffs[1], B*coeffs[2]));
|
||||
float Y = fma(R, coeffs[3], fma(G, coeffs[4], B*coeffs[5]));
|
||||
float Z = fma(R, coeffs[6], fma(G, coeffs[7], B*coeffs[8]));
|
||||
|
||||
float L = splineInterpolate(Y*LabCbrtTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE);
|
||||
L = 116.f*L - 16.f;
|
||||
|
||||
float d = (4*13) / fmax(fma(15.0f, Y, fma(3.0f, Z, X)), FLT_EPSILON);
|
||||
float u = L*(X*d - _un);
|
||||
float v = L*fma(2.25f, Y*d, -_vn);
|
||||
|
||||
dst[0] = SAT_CAST(L * 2.55f);
|
||||
//0.72033 = 255/(220+134), 96.525 = 134*255/(220+134)
|
||||
dst[1] = SAT_CAST(fma(u, 0.72033898305084743f, 96.525423728813564f));
|
||||
//0.9732 = 255/(140+122), 136.259 = 140*255/(140+122)
|
||||
dst[2] = SAT_CAST(fma(v, 0.9732824427480916f, 136.259541984732824f));
|
||||
|
||||
++y;
|
||||
dst += dst_step;
|
||||
src += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef DEPTH_5
|
||||
|
||||
__kernel void Luv2BGR(__global const uchar * srcptr, int src_step, int src_offset,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols,
|
||||
#ifdef SRGB
|
||||
__global const float * gammaTab,
|
||||
#endif
|
||||
__constant float * coeffs, float _un, float _vn)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
if (y < rows)
|
||||
{
|
||||
__global const float * src = (__global const float *)(srcptr + src_index);
|
||||
__global float * dst = (__global float *)(dstptr + dst_index);
|
||||
|
||||
float L = src[0], u = src[1], v = src[2], X, Y, Z;
|
||||
if(L >= 8)
|
||||
{
|
||||
Y = fma(L, 1.f/116.f, 16.f/116.f);
|
||||
Y = Y*Y*Y;
|
||||
}
|
||||
else
|
||||
{
|
||||
Y = L * (1.0f/903.3f); // L*(3./29.)^3
|
||||
}
|
||||
float up = 3.f*fma(L, _un, u);
|
||||
float vp = 0.25f/fma(L, _vn, v);
|
||||
vp = clamp(vp, -0.25f, 0.25f);
|
||||
X = 3.f*Y*up*vp;
|
||||
Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
|
||||
|
||||
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
|
||||
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));
|
||||
float B = fma(X, coeffs[6], fma(Y, coeffs[7], Z * coeffs[8]));
|
||||
|
||||
R = clamp(R, 0.f, 1.f);
|
||||
G = clamp(G, 0.f, 1.f);
|
||||
B = clamp(B, 0.f, 1.f);
|
||||
|
||||
#ifdef SRGB
|
||||
R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
B = splineInterpolate(B*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
#endif
|
||||
|
||||
dst[0] = R;
|
||||
dst[1] = G;
|
||||
dst[2] = B;
|
||||
#if dcn == 4
|
||||
dst[3] = MAX_NUM;
|
||||
#endif
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined DEPTH_0
|
||||
|
||||
__kernel void Luv2BGR(__global const uchar * src, int src_step, int src_offset,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int rows, int cols,
|
||||
#ifdef SRGB
|
||||
__global const float * gammaTab,
|
||||
#endif
|
||||
__constant float * coeffs, float _un, float _vn)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
src += mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
dst += mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
if (y < rows)
|
||||
{
|
||||
float d, X, Y, Z;
|
||||
float L = src[0]*(100.f/255.f);
|
||||
// 1.388235294117647 = (220+134)/255
|
||||
float u = fma(convert_float(src[1]), 1.388235294117647f, -134.f);
|
||||
// 1.027450980392157 = (140+122)/255
|
||||
float v = fma(convert_float(src[2]), 1.027450980392157f, - 140.f);
|
||||
if(L >= 8)
|
||||
{
|
||||
Y = fma(L, 1.f/116.f, 16.f/116.f);
|
||||
Y = Y*Y*Y;
|
||||
}
|
||||
else
|
||||
{
|
||||
Y = L * (1.0f/903.3f); // L*(3./29.)^3
|
||||
}
|
||||
float up = 3.f*fma(L, _un, u);
|
||||
float vp = 0.25f/fma(L, _vn, v);
|
||||
vp = clamp(vp, -0.25f, 0.25f);
|
||||
X = 3.f*Y*up*vp;
|
||||
Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
|
||||
|
||||
//limit X, Y, Z to [0, 2] to fit white point
|
||||
X = clamp(X, 0.f, 2.f); Z = clamp(Z, 0.f, 2.f);
|
||||
|
||||
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
|
||||
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));
|
||||
float B = fma(X, coeffs[6], fma(Y, coeffs[7], Z * coeffs[8]));
|
||||
|
||||
R = clamp(R, 0.f, 1.f);
|
||||
G = clamp(G, 0.f, 1.f);
|
||||
B = clamp(B, 0.f, 1.f);
|
||||
|
||||
#ifdef SRGB
|
||||
R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
B = splineInterpolate(B*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
|
||||
#endif
|
||||
|
||||
uchar dst0 = SAT_CAST(R * 255.0f);
|
||||
uchar dst1 = SAT_CAST(G * 255.0f);
|
||||
uchar dst2 = SAT_CAST(B * 255.0f);
|
||||
|
||||
#if dcn == 4
|
||||
*(__global uchar4 *)dst = (uchar4)(dst0, dst1, dst2, MAX_NUM);
|
||||
#else
|
||||
dst[0] = dst0;
|
||||
dst[1] = dst1;
|
||||
dst[2] = dst2;
|
||||
#endif
|
||||
|
||||
++y;
|
||||
dst += dst_step;
|
||||
src += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
454
modules/imgproc/src/opencl/color_rgb.cl
Normal file
454
modules/imgproc/src/opencl/color_rgb.cl
Normal file
@ -0,0 +1,454 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/**************************************PUBLICFUNC*************************************/
|
||||
|
||||
#if depth == 0
|
||||
#define DATA_TYPE uchar
|
||||
#define MAX_NUM 255
|
||||
#define HALF_MAX_NUM 128
|
||||
#define COEFF_TYPE int
|
||||
#define SAT_CAST(num) convert_uchar_sat(num)
|
||||
#define DEPTH_0
|
||||
#elif depth == 2
|
||||
#define DATA_TYPE ushort
|
||||
#define MAX_NUM 65535
|
||||
#define HALF_MAX_NUM 32768
|
||||
#define COEFF_TYPE int
|
||||
#define SAT_CAST(num) convert_ushort_sat(num)
|
||||
#define DEPTH_2
|
||||
#elif depth == 5
|
||||
#define DATA_TYPE float
|
||||
#define MAX_NUM 1.0f
|
||||
#define HALF_MAX_NUM 0.5f
|
||||
#define COEFF_TYPE float
|
||||
#define SAT_CAST(num) (num)
|
||||
#define DEPTH_5
|
||||
#else
|
||||
#error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
|
||||
#endif
|
||||
|
||||
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
|
||||
enum
|
||||
{
|
||||
yuv_shift = 14,
|
||||
R2Y = 4899,
|
||||
G2Y = 9617,
|
||||
B2Y = 1868
|
||||
};
|
||||
|
||||
//constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601
|
||||
#define B2YF 0.114f
|
||||
#define G2YF 0.587f
|
||||
#define R2YF 0.299f
|
||||
|
||||
#define scnbytes ((int)sizeof(DATA_TYPE)*scn)
|
||||
#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn)
|
||||
|
||||
#if bidx == 0
|
||||
#define R_COMP z
|
||||
#define G_COMP y
|
||||
#define B_COMP x
|
||||
#else
|
||||
#define R_COMP x
|
||||
#define G_COMP y
|
||||
#define B_COMP z
|
||||
#endif
|
||||
|
||||
#define __CAT(x, y) x##y
|
||||
#define CAT(x, y) __CAT(x, y)
|
||||
|
||||
#define DATA_TYPE_4 CAT(DATA_TYPE, 4)
|
||||
#define DATA_TYPE_3 CAT(DATA_TYPE, 3)
|
||||
|
||||
///////////////////////////////////// RGB <-> GRAY //////////////////////////////////////
|
||||
|
||||
__kernel void RGB2Gray(__global const uchar * srcptr, int src_step, int src_offset,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + src_index);
|
||||
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index);
|
||||
DATA_TYPE_3 src_pix = vload3(0, src);
|
||||
#ifdef DEPTH_5
|
||||
dst[0] = fma(src_pix.B_COMP, B2YF, fma(src_pix.G_COMP, G2YF, src_pix.R_COMP * R2YF));
|
||||
#else
|
||||
dst[0] = (DATA_TYPE)CV_DESCALE(mad24(src_pix.B_COMP, B2Y, mad24(src_pix.G_COMP, G2Y, mul24(src_pix.R_COMP, R2Y))), yuv_shift);
|
||||
#endif
|
||||
++y;
|
||||
src_index += src_step;
|
||||
dst_index += dst_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void Gray2RGB(__global const uchar * srcptr, int src_step, int src_offset,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + src_index);
|
||||
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index);
|
||||
DATA_TYPE val = src[0];
|
||||
#if dcn == 3 || defined DEPTH_5
|
||||
dst[0] = dst[1] = dst[2] = val;
|
||||
#if dcn == 4
|
||||
dst[3] = MAX_NUM;
|
||||
#endif
|
||||
#else
|
||||
*(__global DATA_TYPE_4 *)dst = (DATA_TYPE_4)(val, val, val, MAX_NUM);
|
||||
#endif
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB[A] <-> BGR[A] //////////////////////////////////////
|
||||
|
||||
__kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index);
|
||||
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index);
|
||||
#if scn == 3
|
||||
DATA_TYPE_3 src_pix = vload3(0, src);
|
||||
#else
|
||||
DATA_TYPE_4 src_pix = vload4(0, src);
|
||||
#endif
|
||||
|
||||
#ifdef REVERSE
|
||||
dst[0] = src_pix.z;
|
||||
dst[1] = src_pix.y;
|
||||
dst[2] = src_pix.x;
|
||||
#else
|
||||
dst[0] = src_pix.x;
|
||||
dst[1] = src_pix.y;
|
||||
dst[2] = src_pix.z;
|
||||
#endif
|
||||
|
||||
#if dcn == 4
|
||||
#if scn == 3
|
||||
dst[3] = MAX_NUM;
|
||||
#else
|
||||
dst[3] = src[3];
|
||||
#endif
|
||||
#endif
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB5x5 <-> RGB //////////////////////////////////////
|
||||
|
||||
__kernel void RGB5x52RGB(__global const uchar* src, int src_step, int src_offset,
|
||||
__global uchar* dst, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
ushort t = *((__global const ushort*)(src + src_index));
|
||||
|
||||
#if greenbits == 6
|
||||
dst[dst_index + bidx] = (uchar)(t << 3);
|
||||
dst[dst_index + 1] = (uchar)((t >> 3) & ~3);
|
||||
dst[dst_index + (bidx^2)] = (uchar)((t >> 8) & ~7);
|
||||
#else
|
||||
dst[dst_index + bidx] = (uchar)(t << 3);
|
||||
dst[dst_index + 1] = (uchar)((t >> 2) & ~7);
|
||||
dst[dst_index + (bidx^2)] = (uchar)((t >> 7) & ~7);
|
||||
#endif
|
||||
|
||||
#if dcn == 4
|
||||
#if greenbits == 6
|
||||
dst[dst_index + 3] = 255;
|
||||
#else
|
||||
dst[dst_index + 3] = t & 0x8000 ? 255 : 0;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void RGB2RGB5x5(__global const uchar* src, int src_step, int src_offset,
|
||||
__global uchar* dst, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
uchar4 src_pix = vload4(0, src + src_index);
|
||||
|
||||
#if greenbits == 6
|
||||
*((__global ushort*)(dst + dst_index)) = (ushort)((src_pix.B_COMP >> 3)|((src_pix.G_COMP&~3) << 3)|((src_pix.R_COMP&~7) << 8));
|
||||
#elif scn == 3
|
||||
*((__global ushort*)(dst + dst_index)) = (ushort)((src_pix.B_COMP >> 3)|((src_pix.G_COMP&~7) << 2)|((src_pix.R_COMP&~7) << 7));
|
||||
#else
|
||||
*((__global ushort*)(dst + dst_index)) = (ushort)((src_pix.B_COMP >> 3)|((src_pix.G_COMP&~7) << 2)|
|
||||
((src_pix.R_COMP&~7) << 7)|(src_pix.w ? 0x8000 : 0));
|
||||
#endif
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB5x5 <-> Gray //////////////////////////////////////
|
||||
|
||||
__kernel void BGR5x52Gray(__global const uchar* src, int src_step, int src_offset,
|
||||
__global uchar* dst, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
int t = *((__global const ushort*)(src + src_index));
|
||||
|
||||
#if greenbits == 6
|
||||
dst[dst_index] = (uchar)CV_DESCALE(mad24((t << 3) & 0xf8, B2Y, mad24((t >> 3) & 0xfc, G2Y, ((t >> 8) & 0xf8) * R2Y)), yuv_shift);
|
||||
#else
|
||||
dst[dst_index] = (uchar)CV_DESCALE(mad24((t << 3) & 0xf8, B2Y, mad24((t >> 2) & 0xf8, G2Y, ((t >> 7) & 0xf8) * R2Y)), yuv_shift);
|
||||
#endif
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void Gray2BGR5x5(__global const uchar* src, int src_step, int src_offset,
|
||||
__global uchar* dst, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, src_offset + x);
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
int t = src[src_index];
|
||||
|
||||
#if greenbits == 6
|
||||
*((__global ushort*)(dst + dst_index)) = (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8));
|
||||
#else
|
||||
t >>= 3;
|
||||
*((__global ushort*)(dst + dst_index)) = (ushort)(t|(t << 5)|(t << 10));
|
||||
#endif
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
|
||||
|
||||
#ifdef DEPTH_0
|
||||
|
||||
__kernel void RGBA2mRGBA(__global const uchar* src, int src_step, int src_offset,
|
||||
__global uchar* dst, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, src_offset + (x << 2));
|
||||
int dst_index = mad24(y, dst_step, dst_offset + (x << 2));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
uchar4 src_pix = *(__global const uchar4 *)(src + src_index);
|
||||
|
||||
*(__global uchar4 *)(dst + dst_index) =
|
||||
(uchar4)(mad24(src_pix.x, src_pix.w, HALF_MAX_NUM) / MAX_NUM,
|
||||
mad24(src_pix.y, src_pix.w, HALF_MAX_NUM) / MAX_NUM,
|
||||
mad24(src_pix.z, src_pix.w, HALF_MAX_NUM) / MAX_NUM, src_pix.w);
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void mRGBA2RGBA(__global const uchar* src, int src_step, int src_offset,
|
||||
__global uchar* dst, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, 4, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, 4, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
uchar4 src_pix = *(__global const uchar4 *)(src + src_index);
|
||||
uchar v3 = src_pix.w, v3_half = v3 / 2;
|
||||
|
||||
if (v3 == 0)
|
||||
*(__global uchar4 *)(dst + dst_index) = (uchar4)(0, 0, 0, 0);
|
||||
else
|
||||
*(__global uchar4 *)(dst + dst_index) =
|
||||
(uchar4)(mad24(src_pix.x, MAX_NUM, v3_half) / v3,
|
||||
mad24(src_pix.y, MAX_NUM, v3_half) / v3,
|
||||
mad24(src_pix.z, MAX_NUM, v3_half) / v3, v3);
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
674
modules/imgproc/src/opencl/color_yuv.cl
Normal file
674
modules/imgproc/src/opencl/color_yuv.cl
Normal file
@ -0,0 +1,674 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/**************************************PUBLICFUNC*************************************/
|
||||
|
||||
#if depth == 0
|
||||
#define DATA_TYPE uchar
|
||||
#define MAX_NUM 255
|
||||
#define HALF_MAX_NUM 128
|
||||
#define COEFF_TYPE int
|
||||
#define SAT_CAST(num) convert_uchar_sat(num)
|
||||
#define DEPTH_0
|
||||
#elif depth == 2
|
||||
#define DATA_TYPE ushort
|
||||
#define MAX_NUM 65535
|
||||
#define HALF_MAX_NUM 32768
|
||||
#define COEFF_TYPE int
|
||||
#define SAT_CAST(num) convert_ushort_sat(num)
|
||||
#define DEPTH_2
|
||||
#elif depth == 5
|
||||
#define DATA_TYPE float
|
||||
#define MAX_NUM 1.0f
|
||||
#define HALF_MAX_NUM 0.5f
|
||||
#define COEFF_TYPE float
|
||||
#define SAT_CAST(num) (num)
|
||||
#define DEPTH_5
|
||||
#else
|
||||
#error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
|
||||
#endif
|
||||
|
||||
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
|
||||
enum
|
||||
{
|
||||
yuv_shift = 14,
|
||||
R2Y = 4899,
|
||||
G2Y = 9617,
|
||||
B2Y = 1868,
|
||||
};
|
||||
|
||||
//constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601
|
||||
#define B2YF 0.114f
|
||||
#define G2YF 0.587f
|
||||
#define R2YF 0.299f
|
||||
//to YCbCr
|
||||
#define YCBF 0.564f
|
||||
#define YCRF 0.713f
|
||||
#define YCBI 9241
|
||||
#define YCRI 11682
|
||||
//to YUV
|
||||
#define B2UF 0.492f
|
||||
#define R2VF 0.877f
|
||||
#define B2UI 8061
|
||||
#define R2VI 14369
|
||||
//from YUV
|
||||
#define U2BF 2.032f
|
||||
#define U2GF -0.395f
|
||||
#define V2GF -0.581f
|
||||
#define V2RF 1.140f
|
||||
#define U2BI 33292
|
||||
#define U2GI -6472
|
||||
#define V2GI -9519
|
||||
#define V2RI 18678
|
||||
//from YCrCb
|
||||
#define CR2RF 1.403f
|
||||
#define CB2GF -0.344f
|
||||
#define CR2GF -0.714f
|
||||
#define CB2BF 1.773f
|
||||
#define CR2RI 22987
|
||||
#define CB2GI -5636
|
||||
#define CR2GI -11698
|
||||
#define CB2BI 29049
|
||||
|
||||
#define scnbytes ((int)sizeof(DATA_TYPE)*scn)
|
||||
#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn)
|
||||
|
||||
#if bidx == 0
|
||||
#define R_COMP z
|
||||
#define G_COMP y
|
||||
#define B_COMP x
|
||||
#else
|
||||
#define R_COMP x
|
||||
#define G_COMP y
|
||||
#define B_COMP z
|
||||
#endif
|
||||
|
||||
#ifndef uidx
|
||||
#define uidx 0
|
||||
#endif
|
||||
|
||||
#ifndef yidx
|
||||
#define yidx 0
|
||||
#endif
|
||||
|
||||
#ifndef PIX_PER_WI_X
|
||||
#define PIX_PER_WI_X 1
|
||||
#endif
|
||||
|
||||
#define __CAT(x, y) x##y
|
||||
#define CAT(x, y) __CAT(x, y)
|
||||
|
||||
#define DATA_TYPE_4 CAT(DATA_TYPE, 4)
|
||||
#define DATA_TYPE_3 CAT(DATA_TYPE, 3)
|
||||
|
||||
///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
|
||||
|
||||
__constant float c_RGB2YUVCoeffs_f[5] = { B2YF, G2YF, R2YF, B2UF, R2VF };
|
||||
__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, B2UI, R2VI };
|
||||
|
||||
__kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dt_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dt_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + src_index);
|
||||
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index);
|
||||
DATA_TYPE_3 src_pix = vload3(0, src);
|
||||
DATA_TYPE b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
|
||||
|
||||
#ifdef DEPTH_5
|
||||
__constant float * coeffs = c_RGB2YUVCoeffs_f;
|
||||
const DATA_TYPE Y = fma(b, coeffs[0], fma(g, coeffs[1], r * coeffs[2]));
|
||||
const DATA_TYPE U = fma(b - Y, coeffs[3], HALF_MAX_NUM);
|
||||
const DATA_TYPE V = fma(r - Y, coeffs[4], HALF_MAX_NUM);
|
||||
#else
|
||||
__constant int * coeffs = c_RGB2YUVCoeffs_i;
|
||||
const int delta = HALF_MAX_NUM * (1 << yuv_shift);
|
||||
const int Y = CV_DESCALE(mad24(b, coeffs[0], mad24(g, coeffs[1], mul24(r, coeffs[2]))), yuv_shift);
|
||||
const int U = CV_DESCALE(mad24(b - Y, coeffs[3], delta), yuv_shift);
|
||||
const int V = CV_DESCALE(mad24(r - Y, coeffs[4], delta), yuv_shift);
|
||||
#endif
|
||||
|
||||
dst[0] = SAT_CAST( Y );
|
||||
dst[1] = SAT_CAST( U );
|
||||
dst[2] = SAT_CAST( V );
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__constant float c_YUV2RGBCoeffs_f[4] = { U2BF, U2GF, V2GF, V2RF };
|
||||
__constant int c_YUV2RGBCoeffs_i[4] = { U2BI, U2GI, V2GI, V2RI };
|
||||
|
||||
__kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dt_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dt_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + src_index);
|
||||
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index);
|
||||
DATA_TYPE_4 src_pix = vload4(0, src);
|
||||
DATA_TYPE Y = src_pix.x, U = src_pix.y, V = src_pix.z;
|
||||
|
||||
#ifdef DEPTH_5
|
||||
__constant float * coeffs = c_YUV2RGBCoeffs_f;
|
||||
float r = fma(V - HALF_MAX_NUM, coeffs[3], Y);
|
||||
float g = fma(V - HALF_MAX_NUM, coeffs[2], fma(U - HALF_MAX_NUM, coeffs[1], Y));
|
||||
float b = fma(U - HALF_MAX_NUM, coeffs[0], Y);
|
||||
#else
|
||||
__constant int * coeffs = c_YUV2RGBCoeffs_i;
|
||||
const int r = Y + CV_DESCALE(mul24(V - HALF_MAX_NUM, coeffs[3]), yuv_shift);
|
||||
const int g = Y + CV_DESCALE(mad24(V - HALF_MAX_NUM, coeffs[2], mul24(U - HALF_MAX_NUM, coeffs[1])), yuv_shift);
|
||||
const int b = Y + CV_DESCALE(mul24(U - HALF_MAX_NUM, coeffs[0]), yuv_shift);
|
||||
#endif
|
||||
|
||||
dst[bidx] = SAT_CAST( b );
|
||||
dst[1] = SAT_CAST( g );
|
||||
dst[bidx^2] = SAT_CAST( r );
|
||||
#if dcn == 4
|
||||
dst[3] = MAX_NUM;
|
||||
#endif
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
__constant float c_YUV2RGBCoeffs_420[5] = { 1.163999557f, 2.017999649f, -0.390999794f,
|
||||
-0.812999725f, 1.5959997177f };
|
||||
|
||||
__kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dt_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols / 2)
|
||||
{
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows / 2 )
|
||||
{
|
||||
__global const uchar* ysrc = srcptr + mad24(y << 1, src_step, (x << 1) + src_offset);
|
||||
__global const uchar* usrc = srcptr + mad24(rows + y, src_step, (x << 1) + src_offset);
|
||||
__global uchar* dst1 = dstptr + mad24(y << 1, dst_step, mad24(x, dcn<<1, dt_offset));
|
||||
__global uchar* dst2 = dst1 + dst_step;
|
||||
|
||||
float Y1 = ysrc[0];
|
||||
float Y2 = ysrc[1];
|
||||
float Y3 = ysrc[src_step];
|
||||
float Y4 = ysrc[src_step + 1];
|
||||
|
||||
float U = ((float)usrc[uidx]) - HALF_MAX_NUM;
|
||||
float V = ((float)usrc[1-uidx]) - HALF_MAX_NUM;
|
||||
|
||||
__constant float* coeffs = c_YUV2RGBCoeffs_420;
|
||||
float ruv = fma(coeffs[4], V, 0.5f);
|
||||
float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f));
|
||||
float buv = fma(coeffs[1], U, 0.5f);
|
||||
|
||||
Y1 = max(0.f, Y1 - 16.f) * coeffs[0];
|
||||
dst1[2 - bidx] = convert_uchar_sat(Y1 + ruv);
|
||||
dst1[1] = convert_uchar_sat(Y1 + guv);
|
||||
dst1[bidx] = convert_uchar_sat(Y1 + buv);
|
||||
#if dcn == 4
|
||||
dst1[3] = 255;
|
||||
#endif
|
||||
|
||||
Y2 = max(0.f, Y2 - 16.f) * coeffs[0];
|
||||
dst1[dcn + 2 - bidx] = convert_uchar_sat(Y2 + ruv);
|
||||
dst1[dcn + 1] = convert_uchar_sat(Y2 + guv);
|
||||
dst1[dcn + bidx] = convert_uchar_sat(Y2 + buv);
|
||||
#if dcn == 4
|
||||
dst1[7] = 255;
|
||||
#endif
|
||||
|
||||
Y3 = max(0.f, Y3 - 16.f) * coeffs[0];
|
||||
dst2[2 - bidx] = convert_uchar_sat(Y3 + ruv);
|
||||
dst2[1] = convert_uchar_sat(Y3 + guv);
|
||||
dst2[bidx] = convert_uchar_sat(Y3 + buv);
|
||||
#if dcn == 4
|
||||
dst2[3] = 255;
|
||||
#endif
|
||||
|
||||
Y4 = max(0.f, Y4 - 16.f) * coeffs[0];
|
||||
dst2[dcn + 2 - bidx] = convert_uchar_sat(Y4 + ruv);
|
||||
dst2[dcn + 1] = convert_uchar_sat(Y4 + guv);
|
||||
dst2[dcn + bidx] = convert_uchar_sat(Y4 + buv);
|
||||
#if dcn == 4
|
||||
dst2[7] = 255;
|
||||
#endif
|
||||
}
|
||||
++y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if uidx < 2
|
||||
|
||||
__kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dt_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols / 2)
|
||||
{
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows / 2 )
|
||||
{
|
||||
__global const uchar* ysrc = srcptr + mad24(y << 1, src_step, (x << 1) + src_offset);
|
||||
__global uchar* dst1 = dstptr + mad24(y << 1, dst_step, x * (dcn<<1) + dt_offset);
|
||||
__global uchar* dst2 = dst1 + dst_step;
|
||||
|
||||
float Y1 = ysrc[0];
|
||||
float Y2 = ysrc[1];
|
||||
float Y3 = ysrc[src_step];
|
||||
float Y4 = ysrc[src_step + 1];
|
||||
|
||||
#ifdef SRC_CONT
|
||||
__global const uchar* uvsrc = srcptr + mad24(rows, src_step, src_offset);
|
||||
int u_ind = mad24(y, cols >> 1, x);
|
||||
float uv[2] = { ((float)uvsrc[u_ind]) - HALF_MAX_NUM, ((float)uvsrc[u_ind + ((rows * cols) >> 2)]) - HALF_MAX_NUM };
|
||||
#else
|
||||
int vsteps[2] = { cols >> 1, src_step - (cols >> 1)};
|
||||
__global const uchar* usrc = srcptr + mad24(rows + (y>>1), src_step, src_offset + (y%2)*(cols >> 1) + x);
|
||||
__global const uchar* vsrc = usrc + mad24(rows >> 2, src_step, rows % 4 ? vsteps[y%2] : 0);
|
||||
float uv[2] = { ((float)usrc[0]) - HALF_MAX_NUM, ((float)vsrc[0]) - HALF_MAX_NUM };
|
||||
#endif
|
||||
float U = uv[uidx];
|
||||
float V = uv[1-uidx];
|
||||
|
||||
__constant float* coeffs = c_YUV2RGBCoeffs_420;
|
||||
float ruv = fma(coeffs[4], V, 0.5f);
|
||||
float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f));
|
||||
float buv = fma(coeffs[1], U, 0.5f);
|
||||
|
||||
Y1 = max(0.f, Y1 - 16.f) * coeffs[0];
|
||||
dst1[2 - bidx] = convert_uchar_sat(Y1 + ruv);
|
||||
dst1[1] = convert_uchar_sat(Y1 + guv);
|
||||
dst1[bidx] = convert_uchar_sat(Y1 + buv);
|
||||
#if dcn == 4
|
||||
dst1[3] = 255;
|
||||
#endif
|
||||
|
||||
Y2 = max(0.f, Y2 - 16.f) * coeffs[0];
|
||||
dst1[dcn + 2 - bidx] = convert_uchar_sat(Y2 + ruv);
|
||||
dst1[dcn + 1] = convert_uchar_sat(Y2 + guv);
|
||||
dst1[dcn + bidx] = convert_uchar_sat(Y2 + buv);
|
||||
#if dcn == 4
|
||||
dst1[7] = 255;
|
||||
#endif
|
||||
|
||||
Y3 = max(0.f, Y3 - 16.f) * coeffs[0];
|
||||
dst2[2 - bidx] = convert_uchar_sat(Y3 + ruv);
|
||||
dst2[1] = convert_uchar_sat(Y3 + guv);
|
||||
dst2[bidx] = convert_uchar_sat(Y3 + buv);
|
||||
#if dcn == 4
|
||||
dst2[3] = 255;
|
||||
#endif
|
||||
|
||||
Y4 = max(0.f, Y4 - 16.f) * coeffs[0];
|
||||
dst2[dcn + 2 - bidx] = convert_uchar_sat(Y4 + ruv);
|
||||
dst2[dcn + 1] = convert_uchar_sat(Y4 + guv);
|
||||
dst2[dcn + bidx] = convert_uchar_sat(Y4 + buv);
|
||||
#if dcn == 4
|
||||
dst2[7] = 255;
|
||||
#endif
|
||||
}
|
||||
++y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if uidx < 2
|
||||
|
||||
__constant float c_RGB2YUVCoeffs_420[8] = { 0.256999969f, 0.50399971f, 0.09799957f, -0.1479988098f, -0.2909994125f,
|
||||
0.438999176f, -0.3679990768f, -0.0709991455f };
|
||||
|
||||
__kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0) * PIX_PER_WI_X;
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols/2)
|
||||
{
|
||||
int src_index = mad24(y << 1, src_step, mad24(x << 1, scn, src_offset));
|
||||
int ydst_index = mad24(y << 1, dst_step, (x << 1) + dst_offset);
|
||||
int y_rows = rows / 3 * 2;
|
||||
int vsteps[2] = { cols >> 1, dst_step - (cols >> 1)};
|
||||
__constant float* coeffs = c_RGB2YUVCoeffs_420;
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows / 3)
|
||||
{
|
||||
__global const uchar* src1 = srcptr + src_index;
|
||||
__global const uchar* src2 = src1 + src_step;
|
||||
__global uchar* ydst1 = dstptr + ydst_index;
|
||||
__global uchar* ydst2 = ydst1 + dst_step;
|
||||
|
||||
__global uchar* udst = dstptr + mad24(y_rows + (y>>1), dst_step, dst_offset + (y%2)*(cols >> 1) + x);
|
||||
__global uchar* vdst = udst + mad24(y_rows >> 2, dst_step, y_rows % 4 ? vsteps[y%2] : 0);
|
||||
|
||||
#if PIX_PER_WI_X == 2
|
||||
int s11 = *((__global const int*) src1);
|
||||
int s12 = *((__global const int*) src1 + 1);
|
||||
int s13 = *((__global const int*) src1 + 2);
|
||||
#if scn == 4
|
||||
int s14 = *((__global const int*) src1 + 3);
|
||||
#endif
|
||||
int s21 = *((__global const int*) src2);
|
||||
int s22 = *((__global const int*) src2 + 1);
|
||||
int s23 = *((__global const int*) src2 + 2);
|
||||
#if scn == 4
|
||||
int s24 = *((__global const int*) src2 + 3);
|
||||
#endif
|
||||
float src_pix1[scn * 4], src_pix2[scn * 4];
|
||||
|
||||
*((float4*) src_pix1) = convert_float4(as_uchar4(s11));
|
||||
*((float4*) src_pix1 + 1) = convert_float4(as_uchar4(s12));
|
||||
*((float4*) src_pix1 + 2) = convert_float4(as_uchar4(s13));
|
||||
#if scn == 4
|
||||
*((float4*) src_pix1 + 3) = convert_float4(as_uchar4(s14));
|
||||
#endif
|
||||
*((float4*) src_pix2) = convert_float4(as_uchar4(s21));
|
||||
*((float4*) src_pix2 + 1) = convert_float4(as_uchar4(s22));
|
||||
*((float4*) src_pix2 + 2) = convert_float4(as_uchar4(s23));
|
||||
#if scn == 4
|
||||
*((float4*) src_pix2 + 3) = convert_float4(as_uchar4(s24));
|
||||
#endif
|
||||
uchar4 y1, y2;
|
||||
y1.x = convert_uchar_sat(fma(coeffs[0], src_pix1[ 2-bidx], fma(coeffs[1], src_pix1[ 1], fma(coeffs[2], src_pix1[ bidx], 16.5f))));
|
||||
y1.y = convert_uchar_sat(fma(coeffs[0], src_pix1[ scn+2-bidx], fma(coeffs[1], src_pix1[ scn+1], fma(coeffs[2], src_pix1[ scn+bidx], 16.5f))));
|
||||
y1.z = convert_uchar_sat(fma(coeffs[0], src_pix1[2*scn+2-bidx], fma(coeffs[1], src_pix1[2*scn+1], fma(coeffs[2], src_pix1[2*scn+bidx], 16.5f))));
|
||||
y1.w = convert_uchar_sat(fma(coeffs[0], src_pix1[3*scn+2-bidx], fma(coeffs[1], src_pix1[3*scn+1], fma(coeffs[2], src_pix1[3*scn+bidx], 16.5f))));
|
||||
y2.x = convert_uchar_sat(fma(coeffs[0], src_pix2[ 2-bidx], fma(coeffs[1], src_pix2[ 1], fma(coeffs[2], src_pix2[ bidx], 16.5f))));
|
||||
y2.y = convert_uchar_sat(fma(coeffs[0], src_pix2[ scn+2-bidx], fma(coeffs[1], src_pix2[ scn+1], fma(coeffs[2], src_pix2[ scn+bidx], 16.5f))));
|
||||
y2.z = convert_uchar_sat(fma(coeffs[0], src_pix2[2*scn+2-bidx], fma(coeffs[1], src_pix2[2*scn+1], fma(coeffs[2], src_pix2[2*scn+bidx], 16.5f))));
|
||||
y2.w = convert_uchar_sat(fma(coeffs[0], src_pix2[3*scn+2-bidx], fma(coeffs[1], src_pix2[3*scn+1], fma(coeffs[2], src_pix2[3*scn+bidx], 16.5f))));
|
||||
|
||||
*((__global int*) ydst1) = as_int(y1);
|
||||
*((__global int*) ydst2) = as_int(y2);
|
||||
|
||||
float uv[4] = { fma(coeffs[3], src_pix1[ 2-bidx], fma(coeffs[4], src_pix1[ 1], fma(coeffs[5], src_pix1[ bidx], 128.5f))),
|
||||
fma(coeffs[5], src_pix1[ 2-bidx], fma(coeffs[6], src_pix1[ 1], fma(coeffs[7], src_pix1[ bidx], 128.5f))),
|
||||
fma(coeffs[3], src_pix1[2*scn+2-bidx], fma(coeffs[4], src_pix1[2*scn+1], fma(coeffs[5], src_pix1[2*scn+bidx], 128.5f))),
|
||||
fma(coeffs[5], src_pix1[2*scn+2-bidx], fma(coeffs[6], src_pix1[2*scn+1], fma(coeffs[7], src_pix1[2*scn+bidx], 128.5f))) };
|
||||
|
||||
udst[0] = convert_uchar_sat(uv[uidx] );
|
||||
vdst[0] = convert_uchar_sat(uv[1 - uidx]);
|
||||
udst[1] = convert_uchar_sat(uv[2 + uidx]);
|
||||
vdst[1] = convert_uchar_sat(uv[3 - uidx]);
|
||||
#else
|
||||
float4 src_pix1 = convert_float4(vload4(0, src1));
|
||||
float4 src_pix2 = convert_float4(vload4(0, src1+scn));
|
||||
float4 src_pix3 = convert_float4(vload4(0, src2));
|
||||
float4 src_pix4 = convert_float4(vload4(0, src2+scn));
|
||||
|
||||
ydst1[0] = convert_uchar_sat(fma(coeffs[0], src_pix1.R_COMP, fma(coeffs[1], src_pix1.G_COMP, fma(coeffs[2], src_pix1.B_COMP, 16.5f))));
|
||||
ydst1[1] = convert_uchar_sat(fma(coeffs[0], src_pix2.R_COMP, fma(coeffs[1], src_pix2.G_COMP, fma(coeffs[2], src_pix2.B_COMP, 16.5f))));
|
||||
ydst2[0] = convert_uchar_sat(fma(coeffs[0], src_pix3.R_COMP, fma(coeffs[1], src_pix3.G_COMP, fma(coeffs[2], src_pix3.B_COMP, 16.5f))));
|
||||
ydst2[1] = convert_uchar_sat(fma(coeffs[0], src_pix4.R_COMP, fma(coeffs[1], src_pix4.G_COMP, fma(coeffs[2], src_pix4.B_COMP, 16.5f))));
|
||||
|
||||
float uv[2] = { fma(coeffs[3], src_pix1.R_COMP, fma(coeffs[4], src_pix1.G_COMP, fma(coeffs[5], src_pix1.B_COMP, 128.5f))),
|
||||
fma(coeffs[5], src_pix1.R_COMP, fma(coeffs[6], src_pix1.G_COMP, fma(coeffs[7], src_pix1.B_COMP, 128.5f))) };
|
||||
|
||||
udst[0] = convert_uchar_sat(uv[uidx] );
|
||||
vdst[0] = convert_uchar_sat(uv[1-uidx]);
|
||||
#endif
|
||||
++y;
|
||||
src_index += 2*src_step;
|
||||
ydst_index += 2*dst_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
__kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols / 2)
|
||||
{
|
||||
__global const uchar* src = srcptr + mad24(y, src_step, (x << 2) + src_offset);
|
||||
__global uchar* dst = dstptr + mad24(y, dst_step, mad24(x << 1, dcn, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows )
|
||||
{
|
||||
__constant float* coeffs = c_YUV2RGBCoeffs_420;
|
||||
|
||||
#ifndef USE_OPTIMIZED_LOAD
|
||||
float U = ((float) src[uidx]) - HALF_MAX_NUM;
|
||||
float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX_NUM;
|
||||
float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0];
|
||||
float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0];
|
||||
#else
|
||||
int load_src = *((__global int*) src);
|
||||
float vec_src[4] = { load_src & 0xff, (load_src >> 8) & 0xff, (load_src >> 16) & 0xff, (load_src >> 24) & 0xff};
|
||||
float U = vec_src[uidx] - HALF_MAX_NUM;
|
||||
float V = vec_src[(2 + uidx) % 4] - HALF_MAX_NUM;
|
||||
float y00 = max(0.f, vec_src[yidx] - 16.f) * coeffs[0];
|
||||
float y01 = max(0.f, vec_src[yidx + 2] - 16.f) * coeffs[0];
|
||||
#endif
|
||||
|
||||
float ruv = fma(coeffs[4], V, 0.5f);
|
||||
float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f));
|
||||
float buv = fma(coeffs[1], U, 0.5f);
|
||||
|
||||
dst[2 - bidx] = convert_uchar_sat(y00 + ruv);
|
||||
dst[1] = convert_uchar_sat(y00 + guv);
|
||||
dst[bidx] = convert_uchar_sat(y00 + buv);
|
||||
#if dcn == 4
|
||||
dst[3] = 255;
|
||||
#endif
|
||||
|
||||
dst[dcn + 2 - bidx] = convert_uchar_sat(y01 + ruv);
|
||||
dst[dcn + 1] = convert_uchar_sat(y01 + guv);
|
||||
dst[dcn + bidx] = convert_uchar_sat(y01 + buv);
|
||||
#if dcn == 4
|
||||
dst[7] = 255;
|
||||
#endif
|
||||
}
|
||||
++y;
|
||||
src += src_step;
|
||||
dst += dst_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
|
||||
|
||||
__constant float c_RGB2YCrCbCoeffs_f[5] = {R2YF, G2YF, B2YF, YCRF, YCBF};
|
||||
__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, YCRI, YCBI};
|
||||
|
||||
__kernel void RGB2YCrCb(__global const uchar* srcptr, int src_step, int src_offset,
|
||||
__global uchar* dstptr, int dst_step, int dt_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dt_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + src_index);
|
||||
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index);
|
||||
DATA_TYPE_4 src_pix = vload4(0, src);
|
||||
DATA_TYPE b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
|
||||
|
||||
#ifdef DEPTH_5
|
||||
__constant float * coeffs = c_RGB2YCrCbCoeffs_f;
|
||||
DATA_TYPE Y = fma(b, coeffs[2], fma(g, coeffs[1], r * coeffs[0]));
|
||||
DATA_TYPE Cr = fma(r - Y, coeffs[3], HALF_MAX_NUM);
|
||||
DATA_TYPE Cb = fma(b - Y, coeffs[4], HALF_MAX_NUM);
|
||||
#else
|
||||
__constant int * coeffs = c_RGB2YCrCbCoeffs_i;
|
||||
int delta = HALF_MAX_NUM * (1 << yuv_shift);
|
||||
int Y = CV_DESCALE(mad24(b, coeffs[2], mad24(g, coeffs[1], mul24(r, coeffs[0]))), yuv_shift);
|
||||
int Cr = CV_DESCALE(mad24(r - Y, coeffs[3], delta), yuv_shift);
|
||||
int Cb = CV_DESCALE(mad24(b - Y, coeffs[4], delta), yuv_shift);
|
||||
#endif
|
||||
|
||||
dst[0] = SAT_CAST( Y );
|
||||
dst[1] = SAT_CAST( Cr );
|
||||
dst[2] = SAT_CAST( Cb );
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__constant float c_YCrCb2RGBCoeffs_f[4] = { CR2RF, CR2GF, CB2GF, CB2BF };
|
||||
__constant int c_YCrCb2RGBCoeffs_i[4] = { CR2RI, CR2GI, CB2GI, CB2BI };
|
||||
|
||||
__kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
|
||||
__global uchar* dst, int dst_step, int dst_offset,
|
||||
int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1) * PIX_PER_WI_Y;
|
||||
|
||||
if (x < cols)
|
||||
{
|
||||
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
|
||||
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
|
||||
|
||||
#pragma unroll
|
||||
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
|
||||
{
|
||||
if (y < rows)
|
||||
{
|
||||
__global const DATA_TYPE * srcptr = (__global const DATA_TYPE*)(src + src_index);
|
||||
__global DATA_TYPE * dstptr = (__global DATA_TYPE*)(dst + dst_index);
|
||||
|
||||
DATA_TYPE_4 src_pix = vload4(0, srcptr);
|
||||
DATA_TYPE yp = src_pix.x, cr = src_pix.y, cb = src_pix.z;
|
||||
|
||||
#ifdef DEPTH_5
|
||||
__constant float * coeff = c_YCrCb2RGBCoeffs_f;
|
||||
float r = fma(coeff[0], cr - HALF_MAX_NUM, yp);
|
||||
float g = fma(coeff[1], cr - HALF_MAX_NUM, fma(coeff[2], cb - HALF_MAX_NUM, yp));
|
||||
float b = fma(coeff[3], cb - HALF_MAX_NUM, yp);
|
||||
#else
|
||||
__constant int * coeff = c_YCrCb2RGBCoeffs_i;
|
||||
int r = yp + CV_DESCALE(coeff[0] * (cr - HALF_MAX_NUM), yuv_shift);
|
||||
int g = yp + CV_DESCALE(mad24(coeff[1], cr - HALF_MAX_NUM, coeff[2] * (cb - HALF_MAX_NUM)), yuv_shift);
|
||||
int b = yp + CV_DESCALE(coeff[3] * (cb - HALF_MAX_NUM), yuv_shift);
|
||||
#endif
|
||||
|
||||
dstptr[(bidx^2)] = SAT_CAST(r);
|
||||
dstptr[1] = SAT_CAST(g);
|
||||
dstptr[bidx] = SAT_CAST(b);
|
||||
#if dcn == 4
|
||||
dstptr[3] = MAX_NUM;
|
||||
#endif
|
||||
|
||||
++y;
|
||||
dst_index += dst_step;
|
||||
src_index += src_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user