Merge pull request #10869 from savuor:color_cpp_split

color.cpp split (#10869)

* initial split is done

* files renamed (these names are excluded during compilation)

* IPP code moved to corresponding files

* splineBuild, splineInterpolate -> color_lab.cpp

* Lab, Luv: little refactored

* it compiles (didn't check work); Lab OCL code moved to color_lab.cpp

* cvtcolor.cl: Lab/Luv part moved to color_lab.cl

* cvtcolor.cl: color_rgb.cl extracted

* cvtcolor.cl: color_yuv.cl separated

* cvtcolor.cl: color_hsv.cl extracted

* cvtcolor.cl: extracted to color_lab.cl and color_rgb.cl

* helper functions moved to hpp file

* Lab, Luv: moved to color_lab.cpp

* CPU XYZ: to color_lab.cpp

* OCL XYZ: to color_lab.cpp

* warning fixed

* CvtHelper added

* CPU YUV: to color_yuv.cpp, helpers to color.hpp

* CPU HLS/HSV: to color_hsv.cpp

* CPU BGR2BGR: to color_rgb.cpp

* CPU RGB: to color_rgb.cpp

* extra arg removed

* CPU YUV: to color_yuv.cpp

* color code decoded

* OclHelper added, some funcs rewritten

* color_lab.cpp: refactored to use OclHelper

* OCL RGB: to color_rgb.cpp

* OCL HLS/HSV: to color_hsv.cpp

* OCL YUV: to color_yuv.cpp

* OCL YUV planes: to color_yuv.cpp

* OCL: color code reduced

* licence to demosaicing.cpp

* IPP func tables to color_rgb.cpp

* code cleanup

* HAVE_OPENCL ifdefs added

* helpers made more common

* fixed two plane YUV with separate mats

* fixed warning in gcc7.2.0

* precomp header fixed

* color space classification functions fixed

* helpers fixed

* rename: isSRGB -> is_sRGB
This commit is contained in:
Rostislav Vasilikhin 2018-03-15 14:10:40 +03:00 committed by Alexander Alekhin
parent c727e8a4d0
commit 64916d3d83
12 changed files with 14421 additions and 13392 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,668 @@
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#include "opencv2/imgproc.hpp"
#include "opencv2/core/utility.hpp"
#include <limits>
#include "opencl_kernels_imgproc.hpp"
#include "hal_replacement.hpp"
#include "opencv2/core/hal/intrin.hpp"
#include "opencv2/core/softfloat.hpp"
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
namespace cv
{
//constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601
const float B2YF = 0.114f;
const float G2YF = 0.587f;
const float R2YF = 0.299f;
enum
{
yuv_shift = 14,
xyz_shift = 12,
R2Y = 4899, // == R2YF*16384
G2Y = 9617, // == G2YF*16384
B2Y = 1868, // == B2YF*16384
BLOCK_SIZE = 256
};
template<typename _Tp> struct ColorChannel
{
typedef float worktype_f;
static _Tp max() { return std::numeric_limits<_Tp>::max(); }
static _Tp half() { return (_Tp)(max()/2 + 1); }
};
template<> struct ColorChannel<float>
{
typedef float worktype_f;
static float max() { return 1.f; }
static float half() { return 0.5f; }
};
/*template<> struct ColorChannel<double>
{
typedef double worktype_f;
static double max() { return 1.; }
static double half() { return 0.5; }
};*/
//
// Helper functions
//
namespace {
inline bool isHSV(int code)
{
switch(code)
{
case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL:
return true;
default:
return false;
}
}
inline bool isLab(int code)
{
switch (code)
{
case COLOR_Lab2BGR: case COLOR_Lab2RGB: case COLOR_Lab2LBGR: case COLOR_Lab2LRGB:
case COLOR_BGR2Lab: case COLOR_RGB2Lab: case COLOR_LBGR2Lab: case COLOR_LRGB2Lab:
return true;
default:
return false;
}
}
inline bool is_sRGB(int code)
{
switch (code)
{
case COLOR_BGR2Lab: case COLOR_RGB2Lab: case COLOR_BGR2Luv: case COLOR_RGB2Luv:
case COLOR_Lab2BGR: case COLOR_Lab2RGB: case COLOR_Luv2BGR: case COLOR_Luv2RGB:
return true;
default:
return false;
}
}
inline bool swapBlue(int code)
{
switch (code)
{
case COLOR_BGR2BGRA: case COLOR_BGRA2BGR:
case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555:
case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA:
case COLOR_BGR2GRAY: case COLOR_BGRA2GRAY:
case COLOR_BGR2YCrCb: case COLOR_BGR2YUV:
case COLOR_YCrCb2BGR: case COLOR_YUV2BGR:
case COLOR_BGR2XYZ: case COLOR_XYZ2BGR:
case COLOR_BGR2HSV: case COLOR_BGR2HLS: case COLOR_BGR2HSV_FULL: case COLOR_BGR2HLS_FULL:
case COLOR_YUV2BGR_YV12: case COLOR_YUV2BGRA_YV12: case COLOR_YUV2BGR_IYUV: case COLOR_YUV2BGRA_IYUV:
case COLOR_YUV2BGR_NV21: case COLOR_YUV2BGRA_NV21: case COLOR_YUV2BGR_NV12: case COLOR_YUV2BGRA_NV12:
case COLOR_Lab2BGR: case COLOR_Luv2BGR: case COLOR_Lab2LBGR: case COLOR_Luv2LBGR:
case COLOR_BGR2Lab: case COLOR_BGR2Luv: case COLOR_LBGR2Lab: case COLOR_LBGR2Luv:
case COLOR_HSV2BGR: case COLOR_HLS2BGR: case COLOR_HSV2BGR_FULL: case COLOR_HLS2BGR_FULL:
case COLOR_YUV2BGR_UYVY: case COLOR_YUV2BGRA_UYVY: case COLOR_YUV2BGR_YUY2:
case COLOR_YUV2BGRA_YUY2: case COLOR_YUV2BGR_YVYU: case COLOR_YUV2BGRA_YVYU:
case COLOR_BGR2YUV_IYUV: case COLOR_BGRA2YUV_IYUV: case COLOR_BGR2YUV_YV12: case COLOR_BGRA2YUV_YV12:
return false;
default:
return true;
}
}
inline bool isFullRangeHSV(int code)
{
switch (code)
{
case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL: case COLOR_BGR2HLS_FULL: case COLOR_RGB2HLS_FULL:
case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL:
return true;
default:
return false;
}
}
inline int dstChannels(int code)
{
switch( code )
{
case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2RGBA:
case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA:
case COLOR_GRAY2BGRA:
case COLOR_YUV2BGRA_NV21: case COLOR_YUV2RGBA_NV21: case COLOR_YUV2BGRA_NV12: case COLOR_YUV2RGBA_NV12:
case COLOR_YUV2BGRA_YV12: case COLOR_YUV2RGBA_YV12: case COLOR_YUV2BGRA_IYUV: case COLOR_YUV2RGBA_IYUV:
case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY: case COLOR_YUV2RGBA_YVYU: case COLOR_YUV2BGRA_YVYU:
case COLOR_YUV2RGBA_YUY2: case COLOR_YUV2BGRA_YUY2:
return 4;
case COLOR_BGRA2BGR: case COLOR_RGBA2BGR: case COLOR_RGB2BGR:
case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB:
case COLOR_GRAY2BGR:
case COLOR_YUV2BGR_NV21: case COLOR_YUV2RGB_NV21: case COLOR_YUV2BGR_NV12: case COLOR_YUV2RGB_NV12:
case COLOR_YUV2BGR_YV12: case COLOR_YUV2RGB_YV12: case COLOR_YUV2BGR_IYUV: case COLOR_YUV2RGB_IYUV:
case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGB_YVYU: case COLOR_YUV2BGR_YVYU:
case COLOR_YUV2RGB_YUY2: case COLOR_YUV2BGR_YUY2:
return 3;
default:
return 0;
}
}
inline int greenBits(int code)
{
switch( code )
{
case COLOR_BGR2BGR565: case COLOR_RGB2BGR565: case COLOR_BGRA2BGR565: case COLOR_RGBA2BGR565:
case COLOR_BGR5652BGR: case COLOR_BGR5652RGB: case COLOR_BGR5652BGRA: case COLOR_BGR5652RGBA:
case COLOR_BGR5652GRAY: case COLOR_GRAY2BGR565:
return 6;
case COLOR_BGR2BGR555: case COLOR_RGB2BGR555: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR555:
case COLOR_BGR5552BGR: case COLOR_BGR5552RGB: case COLOR_BGR5552BGRA: case COLOR_BGR5552RGBA:
case COLOR_BGR5552GRAY: case COLOR_GRAY2BGR555:
return 5;
default:
return 0;
}
}
inline int uIndex(int code)
{
switch( code )
{
case COLOR_RGB2YUV_YV12: case COLOR_BGR2YUV_YV12: case COLOR_RGBA2YUV_YV12: case COLOR_BGRA2YUV_YV12:
return 2;
case COLOR_YUV2RGB_YVYU: case COLOR_YUV2BGR_YVYU: case COLOR_YUV2RGBA_YVYU: case COLOR_YUV2BGRA_YVYU:
case COLOR_RGB2YUV_IYUV: case COLOR_BGR2YUV_IYUV: case COLOR_RGBA2YUV_IYUV: case COLOR_BGRA2YUV_IYUV:
case COLOR_YUV2BGR_NV21: case COLOR_YUV2RGB_NV21: case COLOR_YUV2BGRA_NV21: case COLOR_YUV2RGBA_NV21:
case COLOR_YUV2BGR_YV12: case COLOR_YUV2RGB_YV12: case COLOR_YUV2BGRA_YV12: case COLOR_YUV2RGBA_YV12:
return 1;
case COLOR_YUV2BGR_NV12: case COLOR_YUV2RGB_NV12: case COLOR_YUV2BGRA_NV12: case COLOR_YUV2RGBA_NV12:
case COLOR_YUV2BGR_IYUV: case COLOR_YUV2RGB_IYUV: case COLOR_YUV2BGRA_IYUV: case COLOR_YUV2RGBA_IYUV:
case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY:
case COLOR_YUV2RGB_YUY2: case COLOR_YUV2BGR_YUY2: case COLOR_YUV2RGBA_YUY2: case COLOR_YUV2BGRA_YUY2:
return 0;
default:
return -1;
}
}
} // namespace::
template<int i0, int i1 = -1, int i2 = -1>
struct Set
{
static bool contains(int i)
{
return (i == i0 || i == i1 || i == i2);
}
};
template<int i0, int i1>
struct Set<i0, i1, -1>
{
static bool contains(int i)
{
return (i == i0 || i == i1);
}
};
template<int i0>
struct Set<i0, -1, -1>
{
static bool contains(int i)
{
return (i == i0);
}
};
enum SizePolicy
{
TO_YUV, FROM_YUV, NONE
};
template< typename VScn, typename VDcn, typename VDepth, SizePolicy sizePolicy = NONE >
struct CvtHelper
{
CvtHelper(InputArray _src, OutputArray _dst, int dcn)
{
int stype = _src.type();
scn = CV_MAT_CN(stype), depth = CV_MAT_DEPTH(stype);
CV_Assert( VScn::contains(scn) && VDcn::contains(dcn) && VDepth::contains(depth) );
if (_src.getObj() == _dst.getObj()) // inplace processing (#6653)
_src.copyTo(src);
else
src = _src.getMat();
Size sz = src.size();
switch (sizePolicy)
{
case TO_YUV:
CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0);
dstSz = Size(sz.width, sz.height / 2 * 3);
break;
case FROM_YUV:
CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0);
dstSz = Size(sz.width, sz.height * 2 / 3);
break;
case NONE:
default:
dstSz = sz;
break;
}
_dst.create(dstSz, CV_MAKETYPE(depth, dcn));
dst = _dst.getMat();
}
Mat src, dst;
int depth, scn;
Size dstSz;
};
#ifdef HAVE_OPENCL
template< typename VScn, typename VDcn, typename VDepth, SizePolicy sizePolicy = NONE >
struct OclHelper
{
OclHelper( InputArray _src, OutputArray _dst, int dcn)
{
src = _src.getUMat();
Size sz = src.size(), dstSz;
int scn = src.channels();
int depth = src.depth();
CV_Assert( VScn::contains(scn) && VDcn::contains(dcn) && VDepth::contains(depth) );
switch (sizePolicy)
{
case TO_YUV:
CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 );
dstSz = Size(sz.width, sz.height / 2 * 3);
break;
case FROM_YUV:
CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 );
dstSz = Size(sz.width, sz.height * 2 / 3);
break;
case NONE:
default:
dstSz = sz;
break;
}
_dst.create(dstSz, CV_MAKETYPE(depth, dcn));
dst = _dst.getUMat();
}
bool createKernel(cv::String name, ocl::ProgramSource& source, cv::String options)
{
ocl::Device dev = ocl::Device::getDefault();
int pxPerWIy = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1;
int pxPerWIx = 1;
cv::String baseOptions = format("-D depth=%d -D scn=%d -D PIX_PER_WI_Y=%d ",
src.depth(), src.channels(), pxPerWIy);
switch (sizePolicy)
{
case TO_YUV:
if (dev.isIntel() &&
src.cols % 4 == 0 && src.step % 4 == 0 && src.offset % 4 == 0 &&
dst.step % 4 == 0 && dst.offset % 4 == 0)
{
pxPerWIx = 2;
}
globalSize[0] = (size_t)dst.cols/(2*pxPerWIx);
globalSize[1] = ((size_t)dst.rows/3 + pxPerWIy - 1) / pxPerWIy;
baseOptions += format("-D PIX_PER_WI_X=%d ", pxPerWIx);
break;
case FROM_YUV:
globalSize[0] = (size_t)dst.cols/2;
globalSize[1] = ((size_t)dst.rows/2 + pxPerWIy - 1) / pxPerWIy;
break;
case NONE:
default:
globalSize[0] = (size_t)src.cols;
globalSize[1] = ((size_t)src.rows + pxPerWIy - 1) / pxPerWIy;
break;
}
k.create(name.c_str(), source, baseOptions + options);
if(k.empty())
return false;
nArgs = k.set(0, ocl::KernelArg::ReadOnlyNoSize(src));
nArgs = k.set(nArgs, ocl::KernelArg::WriteOnly(dst));
return true;
}
bool run()
{
return k.run(2, globalSize, NULL, false);
}
template<typename T>
void setArg(const T& arg)
{
nArgs = k.set(nArgs, arg);
}
UMat src, dst;
ocl::Kernel k;
size_t globalSize[2];
int nArgs;
};
#endif
///////////////////////////// Top-level template function ////////////////////////////////
template <typename Cvt>
class CvtColorLoop_Invoker : public ParallelLoopBody
{
typedef typename Cvt::channel_type _Tp;
public:
CvtColorLoop_Invoker(const uchar * src_data_, size_t src_step_, uchar * dst_data_, size_t dst_step_, int width_, const Cvt& _cvt) :
ParallelLoopBody(), src_data(src_data_), src_step(src_step_), dst_data(dst_data_), dst_step(dst_step_),
width(width_), cvt(_cvt)
{
}
virtual void operator()(const Range& range) const
{
CV_TRACE_FUNCTION();
const uchar* yS = src_data + static_cast<size_t>(range.start) * src_step;
uchar* yD = dst_data + static_cast<size_t>(range.start) * dst_step;
for( int i = range.start; i < range.end; ++i, yS += src_step, yD += dst_step )
cvt(reinterpret_cast<const _Tp*>(yS), reinterpret_cast<_Tp*>(yD), width);
}
private:
const uchar * src_data;
const size_t src_step;
uchar * dst_data;
const size_t dst_step;
const int width;
const Cvt& cvt;
const CvtColorLoop_Invoker& operator= (const CvtColorLoop_Invoker&);
};
template <typename Cvt>
void CvtColorLoop(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, const Cvt& cvt)
{
parallel_for_(Range(0, height),
CvtColorLoop_Invoker<Cvt>(src_data, src_step, dst_data, dst_step, width, cvt),
(width * height) / static_cast<double>(1<<16));
}
#if defined (HAVE_IPP) && (IPP_VERSION_X100 >= 700)
# define NEED_IPP 1
#else
# define NEED_IPP 0
#endif
#if NEED_IPP
#define MAX_IPP8u 255
#define MAX_IPP16u 65535
#define MAX_IPP32f 1.0
typedef IppStatus (CV_STDCALL* ippiReorderFunc)(const void *, int, void *, int, IppiSize, const int *);
typedef IppStatus (CV_STDCALL* ippiGeneralFunc)(const void *, int, void *, int, IppiSize);
typedef IppStatus (CV_STDCALL* ippiColor2GrayFunc)(const void *, int, void *, int, IppiSize, const Ipp32f *);
template <typename Cvt>
class CvtColorIPPLoop_Invoker :
public ParallelLoopBody
{
public:
CvtColorIPPLoop_Invoker(const uchar * src_data_, size_t src_step_, uchar * dst_data_, size_t dst_step_, int width_, const Cvt& _cvt, bool *_ok) :
ParallelLoopBody(), src_data(src_data_), src_step(src_step_), dst_data(dst_data_), dst_step(dst_step_), width(width_), cvt(_cvt), ok(_ok)
{
*ok = true;
}
virtual void operator()(const Range& range) const
{
const void *yS = src_data + src_step * range.start;
void *yD = dst_data + dst_step * range.start;
if( !cvt(yS, static_cast<int>(src_step), yD, static_cast<int>(dst_step), width, range.end - range.start) )
*ok = false;
else
{
CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
}
}
private:
const uchar * src_data;
const size_t src_step;
uchar * dst_data;
const size_t dst_step;
const int width;
const Cvt& cvt;
bool *ok;
const CvtColorIPPLoop_Invoker& operator= (const CvtColorIPPLoop_Invoker&);
};
template <typename Cvt>
bool CvtColorIPPLoop(const uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, const Cvt& cvt)
{
bool ok;
parallel_for_(Range(0, height), CvtColorIPPLoop_Invoker<Cvt>(src_data, src_step, dst_data, dst_step, width, cvt, &ok), (width * height)/(double)(1<<16) );
return ok;
}
template <typename Cvt>
bool CvtColorIPPLoopCopy(const uchar * src_data, size_t src_step, int src_type, uchar * dst_data, size_t dst_step, int width, int height, const Cvt& cvt)
{
Mat temp;
Mat src(Size(width, height), src_type, const_cast<uchar*>(src_data), src_step);
Mat source = src;
if( src_data == dst_data )
{
src.copyTo(temp);
source = temp;
}
bool ok;
parallel_for_(Range(0, source.rows),
CvtColorIPPLoop_Invoker<Cvt>(source.data, source.step, dst_data, dst_step,
source.cols, cvt, &ok),
source.total()/(double)(1<<16) );
return ok;
}
struct IPPGeneralFunctor
{
IPPGeneralFunctor(ippiGeneralFunc _func) : ippiColorConvertGeneral(_func){}
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
{
return ippiColorConvertGeneral ? CV_INSTRUMENT_FUN_IPP(ippiColorConvertGeneral, src, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0 : false;
}
private:
ippiGeneralFunc ippiColorConvertGeneral;
};
struct IPPReorderFunctor
{
IPPReorderFunctor(ippiReorderFunc _func, int _order0, int _order1, int _order2) : ippiColorConvertReorder(_func)
{
order[0] = _order0;
order[1] = _order1;
order[2] = _order2;
order[3] = 3;
}
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
{
return ippiColorConvertReorder ? CV_INSTRUMENT_FUN_IPP(ippiColorConvertReorder, src, srcStep, dst, dstStep, ippiSize(cols, rows), order) >= 0 : false;
}
private:
ippiReorderFunc ippiColorConvertReorder;
int order[4];
};
struct IPPReorderGeneralFunctor
{
IPPReorderGeneralFunctor(ippiReorderFunc _func1, ippiGeneralFunc _func2, int _order0, int _order1, int _order2, int _depth) :
ippiColorConvertReorder(_func1), ippiColorConvertGeneral(_func2), depth(_depth)
{
order[0] = _order0;
order[1] = _order1;
order[2] = _order2;
order[3] = 3;
}
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
{
if (ippiColorConvertReorder == 0 || ippiColorConvertGeneral == 0)
return false;
Mat temp;
temp.create(rows, cols, CV_MAKETYPE(depth, 3));
if(CV_INSTRUMENT_FUN_IPP(ippiColorConvertReorder, src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows), order) < 0)
return false;
return CV_INSTRUMENT_FUN_IPP(ippiColorConvertGeneral, temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows)) >= 0;
}
private:
ippiReorderFunc ippiColorConvertReorder;
ippiGeneralFunc ippiColorConvertGeneral;
int order[4];
int depth;
};
struct IPPGeneralReorderFunctor
{
IPPGeneralReorderFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _order0, int _order1, int _order2, int _depth) :
ippiColorConvertGeneral(_func1), ippiColorConvertReorder(_func2), depth(_depth)
{
order[0] = _order0;
order[1] = _order1;
order[2] = _order2;
order[3] = 3;
}
bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const
{
if (ippiColorConvertGeneral == 0 || ippiColorConvertReorder == 0)
return false;
Mat temp;
temp.create(rows, cols, CV_MAKETYPE(depth, 3));
if(CV_INSTRUMENT_FUN_IPP(ippiColorConvertGeneral, src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0)
return false;
return CV_INSTRUMENT_FUN_IPP(ippiColorConvertReorder, temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0;
}
private:
ippiGeneralFunc ippiColorConvertGeneral;
ippiReorderFunc ippiColorConvertReorder;
int order[4];
int depth;
};
extern ippiReorderFunc ippiSwapChannelsC3C4RTab[8];
extern ippiReorderFunc ippiSwapChannelsC4C3RTab[8];
extern ippiReorderFunc ippiSwapChannelsC3RTab[8];
#endif
#ifdef HAVE_OPENCL
bool oclCvtColorBGR2Luv( InputArray _src, OutputArray _dst, int bidx, bool srgb );
bool oclCvtColorBGR2Lab( InputArray _src, OutputArray _dst, int bidx, bool srgb );
bool oclCvtColorLab2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, bool srgb);
bool oclCvtColorLuv2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, bool srgb);
bool oclCvtColorBGR2XYZ( InputArray _src, OutputArray _dst, int bidx );
bool oclCvtColorXYZ2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx );
bool oclCvtColorHSV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, bool full );
bool oclCvtColorHLS2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, bool full );
bool oclCvtColorBGR2HLS( InputArray _src, OutputArray _dst, int bidx, bool full );
bool oclCvtColorBGR2HSV( InputArray _src, OutputArray _dst, int bidx, bool full );
bool oclCvtColorBGR2BGR( InputArray _src, OutputArray _dst, int dcn, bool reverse );
bool oclCvtColorBGR25x5( InputArray _src, OutputArray _dst, int bidx, int gbits );
bool oclCvtColor5x52BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int gbits );
bool oclCvtColor5x52Gray( InputArray _src, OutputArray _dst, int gbits );
bool oclCvtColorGray25x5( InputArray _src, OutputArray _dst, int gbits );
bool oclCvtColorBGR2Gray( InputArray _src, OutputArray _dst, int bidx );
bool oclCvtColorGray2BGR( InputArray _src, OutputArray _dst, int dcn );
bool oclCvtColorRGBA2mRGBA( InputArray _src, OutputArray _dst );
bool oclCvtColormRGBA2RGBA( InputArray _src, OutputArray _dst );
bool oclCvtColorBGR2YCrCb( InputArray _src, OutputArray _dst, int bidx);
bool oclCvtcolorYCrCb2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx);
bool oclCvtColorBGR2YUV( InputArray _src, OutputArray _dst, int bidx );
bool oclCvtColorYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx );
bool oclCvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int uidx, int yidx );
bool oclCvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int uidx );
bool oclCvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, int bidx, int uidx );
bool oclCvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, int bidx, int uidx );
bool oclCvtColorYUV2Gray_420( InputArray _src, OutputArray _dst );
#endif
void cvtColorBGR2Lab( InputArray _src, OutputArray _dst, bool swapb, bool srgb);
void cvtColorBGR2Luv( InputArray _src, OutputArray _dst, bool swapb, bool srgb);
void cvtColorLab2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool srgb );
void cvtColorLuv2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool srgb );
void cvtColorBGR2XYZ( InputArray _src, OutputArray _dst, bool swapb );
void cvtColorXYZ2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb );
void cvtColorBGR2YUV( InputArray _src, OutputArray _dst, bool swapb, bool crcb);
void cvtColorYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool crcb);
void cvtColorOnePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx, int ycn);
void cvtColorTwoPlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx );
void cvtColorTwoPlaneYUV2BGRpair( InputArray _ysrc, InputArray _uvsrc, OutputArray _dst, int dcn, bool swapb, int uidx );
void cvtColorThreePlaneYUV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int uidx );
void cvtColorBGR2ThreePlaneYUV( InputArray _src, OutputArray _dst, bool swapb, int uidx);
void cvtColorYUV2Gray_420( InputArray _src, OutputArray _dst );
void cvtColorYUV2Gray_ch( InputArray _src, OutputArray _dst, int coi );
void cvtColorBGR2HLS( InputArray _src, OutputArray _dst, bool swapb, bool fullRange );
void cvtColorBGR2HSV( InputArray _src, OutputArray _dst, bool swapb, bool fullRange );
void cvtColorHLS2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool fullRange);
void cvtColorHSV2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, bool fullRange);
void cvtColorBGR2BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb);
void cvtColorBGR25x5( InputArray _src, OutputArray _dst, bool swapb, int gbits);
void cvtColor5x52BGR( InputArray _src, OutputArray _dst, int dcn, bool swapb, int gbits);
void cvtColorBGR2Gray( InputArray _src, OutputArray _dst, bool swapb);
void cvtColorGray2BGR( InputArray _src, OutputArray _dst, int dcn);
void cvtColor5x52Gray( InputArray _src, OutputArray _dst, int gbits);
void cvtColorGray25x5( InputArray _src, OutputArray _dst, int gbits);
void cvtColorRGBA2mRGBA(InputArray _src, OutputArray _dst);
void cvtColormRGBA2RGBA(InputArray _src, OutputArray _dst);
} //namespace cv

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -41,6 +41,50 @@
//
//M*/
/********************************* COPYRIGHT NOTICE *******************************\
Original code for Bayer->BGR/RGB conversion is provided by Dirk Schaefer
from MD-Mathematische Dienste GmbH. Below is the copyright notice:
IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
By downloading, copying, installing or using the software you agree
to this license. If you do not agree to this license, do not download,
install, copy or use the software.
Contributors License Agreement:
Copyright (c) 2002,
MD-Mathematische Dienste GmbH
Im Defdahl 5-10
44141 Dortmund
Germany
www.md-it.de
Redistribution and use in source and binary forms,
with or without modification, are permitted provided
that the following conditions are met:
Redistributions of source code must retain
the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
The name of Contributor may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
\**********************************************************************************/
#include "precomp.hpp"
#include <limits>

View File

@ -0,0 +1,621 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/**************************************PUBLICFUNC*************************************/
#if depth == 0
#define DATA_TYPE uchar
#define MAX_NUM 255
#define HALF_MAX_NUM 128
#define COEFF_TYPE int
#define SAT_CAST(num) convert_uchar_sat(num)
#define DEPTH_0
#elif depth == 2
#define DATA_TYPE ushort
#define MAX_NUM 65535
#define HALF_MAX_NUM 32768
#define COEFF_TYPE int
#define SAT_CAST(num) convert_ushort_sat(num)
#define DEPTH_2
#elif depth == 5
#define DATA_TYPE float
#define MAX_NUM 1.0f
#define HALF_MAX_NUM 0.5f
#define COEFF_TYPE float
#define SAT_CAST(num) (num)
#define DEPTH_5
#else
#error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
#endif
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
enum
{
hsv_shift = 12
};
#define scnbytes ((int)sizeof(DATA_TYPE)*scn)
#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn)
#ifndef hscale
#define hscale 0
#endif
#ifndef hrange
#define hrange 0
#endif
#if bidx == 0
#define R_COMP z
#define G_COMP y
#define B_COMP x
#else
#define R_COMP x
#define G_COMP y
#define B_COMP z
#endif
//////////////////////////////////// RGB <-> HSV //////////////////////////////////////
__constant int sector_data[][3] = { { 1, 3, 0 },
{ 1, 0, 2 },
{ 3, 0, 1 },
{ 0, 2, 1 },
{ 0, 1, 3 },
{ 2, 1, 0 } };
#ifdef DEPTH_0
__kernel void RGB2HSV(__global const uchar* src, int src_step, int src_offset,
__global uchar* dst, int dst_step, int dst_offset,
int rows, int cols,
__constant int * sdiv_table, __constant int * hdiv_table)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
uchar4 src_pix = vload4(0, src + src_index);
int b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
int h, s, v = b;
int vmin = b, diff;
int vr, vg;
v = max(v, g);
v = max(v, r);
vmin = min(vmin, g);
vmin = min(vmin, r);
diff = v - vmin;
vr = v == r ? -1 : 0;
vg = v == g ? -1 : 0;
s = mad24(diff, sdiv_table[v], (1 << (hsv_shift-1))) >> hsv_shift;
h = (vr & (g - b)) +
(~vr & ((vg & mad24(diff, 2, b - r)) + ((~vg) & mad24(4, diff, r - g))));
h = mad24(h, hdiv_table[diff], (1 << (hsv_shift-1))) >> hsv_shift;
h += h < 0 ? hrange : 0;
dst[dst_index] = convert_uchar_sat_rte(h);
dst[dst_index + 1] = (uchar)s;
dst[dst_index + 2] = (uchar)v;
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
__kernel void HSV2RGB(__global const uchar* src, int src_step, int src_offset,
__global uchar* dst, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
uchar4 src_pix = vload4(0, src + src_index);
float h = src_pix.x, s = src_pix.y*(1/255.f), v = src_pix.z*(1/255.f);
float b, g, r;
if (s != 0)
{
float tab[4];
int sector;
h *= hscale;
if( h < 0 )
do h += 6; while( h < 0 );
else if( h >= 6 )
do h -= 6; while( h >= 6 );
sector = convert_int_sat_rtn(h);
h -= sector;
if( (unsigned)sector >= 6u )
{
sector = 0;
h = 0.f;
}
tab[0] = v;
tab[1] = v*(1.f - s);
tab[2] = v*(1.f - s*h);
tab[3] = v*(1.f - s*(1.f - h));
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = v;
dst[dst_index + bidx] = convert_uchar_sat_rte(b*255.f);
dst[dst_index + 1] = convert_uchar_sat_rte(g*255.f);
dst[dst_index + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
#if dcn == 4
dst[dst_index + 3] = MAX_NUM;
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
#elif defined DEPTH_5
__kernel void RGB2HSV(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const float * src = (__global const float *)(srcptr + src_index);
__global float * dst = (__global float *)(dstptr + dst_index);
float4 src_pix = vload4(0, src);
float b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
float h, s, v;
float vmin, diff;
v = vmin = r;
if( v < g ) v = g;
if( v < b ) v = b;
if( vmin > g ) vmin = g;
if( vmin > b ) vmin = b;
diff = v - vmin;
s = diff/(float)(fabs(v) + FLT_EPSILON);
diff = (float)(60.f/(diff + FLT_EPSILON));
if( v == r )
h = (g - b)*diff;
else if( v == g )
h = fma(b - r, diff, 120.f);
else
h = fma(r - g, diff, 240.f);
if( h < 0 )
h += 360.f;
dst[0] = h*hscale;
dst[1] = s;
dst[2] = v;
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
__kernel void HSV2RGB(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const float * src = (__global const float *)(srcptr + src_index);
__global float * dst = (__global float *)(dstptr + dst_index);
float4 src_pix = vload4(0, src);
float h = src_pix.x, s = src_pix.y, v = src_pix.z;
float b, g, r;
if (s != 0)
{
float tab[4];
int sector;
h *= hscale;
if(h < 0)
do h += 6; while (h < 0);
else if (h >= 6)
do h -= 6; while (h >= 6);
sector = convert_int_sat_rtn(h);
h -= sector;
if ((unsigned)sector >= 6u)
{
sector = 0;
h = 0.f;
}
tab[0] = v;
tab[1] = v*(1.f - s);
tab[2] = v*(1.f - s*h);
tab[3] = v*(1.f - s*(1.f - h));
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = v;
dst[bidx] = b;
dst[1] = g;
dst[bidx^2] = r;
#if dcn == 4
dst[3] = MAX_NUM;
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
#endif
///////////////////////////////////// RGB <-> HLS //////////////////////////////////////
#ifdef DEPTH_0
__kernel void RGB2HLS(__global const uchar* src, int src_step, int src_offset,
__global uchar* dst, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
uchar4 src_pix = vload4(0, src + src_index);
float b = src_pix.B_COMP*(1/255.f), g = src_pix.G_COMP*(1/255.f), r = src_pix.R_COMP*(1/255.f);
float h = 0.f, s = 0.f, l;
float vmin, vmax, diff;
vmax = vmin = r;
if (vmax < g) vmax = g;
if (vmax < b) vmax = b;
if (vmin > g) vmin = g;
if (vmin > b) vmin = b;
diff = vmax - vmin;
l = (vmax + vmin)*0.5f;
if (diff > FLT_EPSILON)
{
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
diff = 60.f/diff;
if( vmax == r )
h = (g - b)*diff;
else if( vmax == g )
h = fma(b - r, diff, 120.f);
else
h = fma(r - g, diff, 240.f);
if( h < 0.f )
h += 360.f;
}
dst[dst_index] = convert_uchar_sat_rte(h*hscale);
dst[dst_index + 1] = convert_uchar_sat_rte(l*255.f);
dst[dst_index + 2] = convert_uchar_sat_rte(s*255.f);
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
__kernel void HLS2RGB(__global const uchar* src, int src_step, int src_offset,
__global uchar* dst, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
uchar4 src_pix = vload4(0, src + src_index);
float h = src_pix.x, l = src_pix.y*(1.f/255.f), s = src_pix.z*(1.f/255.f);
float b, g, r;
if (s != 0)
{
float tab[4];
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
float p1 = 2*l - p2;
h *= hscale;
if( h < 0 )
do h += 6; while( h < 0 );
else if( h >= 6 )
do h -= 6; while( h >= 6 );
int sector = convert_int_sat_rtn(h);
h -= sector;
tab[0] = p2;
tab[1] = p1;
tab[2] = fma(p2 - p1, 1-h, p1);
tab[3] = fma(p2 - p1, h, p1);
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = l;
dst[dst_index + bidx] = convert_uchar_sat_rte(b*255.f);
dst[dst_index + 1] = convert_uchar_sat_rte(g*255.f);
dst[dst_index + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
#if dcn == 4
dst[dst_index + 3] = MAX_NUM;
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
#elif defined DEPTH_5
__kernel void RGB2HLS(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const float * src = (__global const float *)(srcptr + src_index);
__global float * dst = (__global float *)(dstptr + dst_index);
float4 src_pix = vload4(0, src);
float b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
float h = 0.f, s = 0.f, l;
float vmin, vmax, diff;
vmax = vmin = r;
if (vmax < g) vmax = g;
if (vmax < b) vmax = b;
if (vmin > g) vmin = g;
if (vmin > b) vmin = b;
diff = vmax - vmin;
l = (vmax + vmin)*0.5f;
if (diff > FLT_EPSILON)
{
s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
diff = 60.f/diff;
if( vmax == r )
h = (g - b)*diff;
else if( vmax == g )
h = fma(b - r, diff, 120.f);
else
h = fma(r - g, diff, 240.f);
if( h < 0.f ) h += 360.f;
}
dst[0] = h*hscale;
dst[1] = l;
dst[2] = s;
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
__kernel void HLS2RGB(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const float * src = (__global const float *)(srcptr + src_index);
__global float * dst = (__global float *)(dstptr + dst_index);
float4 src_pix = vload4(0, src);
float h = src_pix.x, l = src_pix.y, s = src_pix.z;
float b, g, r;
if (s != 0)
{
float tab[4];
int sector;
float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
float p1 = 2*l - p2;
h *= hscale;
if( h < 0 )
do h += 6; while( h < 0 );
else if( h >= 6 )
do h -= 6; while( h >= 6 );
sector = convert_int_sat_rtn(h);
h -= sector;
tab[0] = p2;
tab[1] = p1;
tab[2] = fma(p2 - p1, 1-h, p1);
tab[3] = fma(p2 - p1, h, p1);
b = tab[sector_data[sector][0]];
g = tab[sector_data[sector][1]];
r = tab[sector_data[sector][2]];
}
else
b = g = r = l;
dst[bidx] = b;
dst[1] = g;
dst[bidx^2] = r;
#if dcn == 4
dst[3] = MAX_NUM;
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
#endif

View File

@ -0,0 +1,735 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if depth == 0
#define DATA_TYPE uchar
#define MAX_NUM 255
#define HALF_MAX_NUM 128
#define COEFF_TYPE int
#define SAT_CAST(num) convert_uchar_sat(num)
#define DEPTH_0
#elif depth == 2
#define DATA_TYPE ushort
#define MAX_NUM 65535
#define HALF_MAX_NUM 32768
#define COEFF_TYPE int
#define SAT_CAST(num) convert_ushort_sat(num)
#define DEPTH_2
#elif depth == 5
#define DATA_TYPE float
#define MAX_NUM 1.0f
#define HALF_MAX_NUM 0.5f
#define COEFF_TYPE float
#define SAT_CAST(num) (num)
#define DEPTH_5
#else
#error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
#endif
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
enum
{
xyz_shift = 12,
};
#define scnbytes ((int)sizeof(DATA_TYPE)*scn)
#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn)
#define __CAT(x, y) x##y
#define CAT(x, y) __CAT(x, y)
#define DATA_TYPE_4 CAT(DATA_TYPE, 4)
#define DATA_TYPE_3 CAT(DATA_TYPE, 3)
///////////////////////////////////// RGB <-> XYZ //////////////////////////////////////
__kernel void RGB2XYZ(__global const uchar * srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset,
int rows, int cols, __constant COEFF_TYPE * coeffs)
{
int dx = get_global_id(0);
int dy = get_global_id(1) * PIX_PER_WI_Y;
if (dx < cols)
{
int src_index = mad24(dy, src_step, mad24(dx, scnbytes, src_offset));
int dst_index = mad24(dy, dst_step, mad24(dx, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (dy < rows)
{
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index);
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE r = src_pix.x, g = src_pix.y, b = src_pix.z;
#ifdef DEPTH_5
float x = fma(r, coeffs[0], fma(g, coeffs[1], b * coeffs[2]));
float y = fma(r, coeffs[3], fma(g, coeffs[4], b * coeffs[5]));
float z = fma(r, coeffs[6], fma(g, coeffs[7], b * coeffs[8]));
#else
int x = CV_DESCALE(mad24(r, coeffs[0], mad24(g, coeffs[1], b * coeffs[2])), xyz_shift);
int y = CV_DESCALE(mad24(r, coeffs[3], mad24(g, coeffs[4], b * coeffs[5])), xyz_shift);
int z = CV_DESCALE(mad24(r, coeffs[6], mad24(g, coeffs[7], b * coeffs[8])), xyz_shift);
#endif
dst[0] = SAT_CAST(x);
dst[1] = SAT_CAST(y);
dst[2] = SAT_CAST(z);
++dy;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
__kernel void XYZ2RGB(__global const uchar * srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset,
int rows, int cols, __constant COEFF_TYPE * coeffs)
{
int dx = get_global_id(0);
int dy = get_global_id(1) * PIX_PER_WI_Y;
if (dx < cols)
{
int src_index = mad24(dy, src_step, mad24(dx, scnbytes, src_offset));
int dst_index = mad24(dy, dst_step, mad24(dx, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (dy < rows)
{
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index);
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE x = src_pix.x, y = src_pix.y, z = src_pix.z;
#ifdef DEPTH_5
float b = fma(x, coeffs[0], fma(y, coeffs[1], z * coeffs[2]));
float g = fma(x, coeffs[3], fma(y, coeffs[4], z * coeffs[5]));
float r = fma(x, coeffs[6], fma(y, coeffs[7], z * coeffs[8]));
#else
int b = CV_DESCALE(mad24(x, coeffs[0], mad24(y, coeffs[1], z * coeffs[2])), xyz_shift);
int g = CV_DESCALE(mad24(x, coeffs[3], mad24(y, coeffs[4], z * coeffs[5])), xyz_shift);
int r = CV_DESCALE(mad24(x, coeffs[6], mad24(y, coeffs[7], z * coeffs[8])), xyz_shift);
#endif
DATA_TYPE dst0 = SAT_CAST(b);
DATA_TYPE dst1 = SAT_CAST(g);
DATA_TYPE dst2 = SAT_CAST(r);
#if dcn == 3 || defined DEPTH_5
dst[0] = dst0;
dst[1] = dst1;
dst[2] = dst2;
#if dcn == 4
dst[3] = MAX_NUM;
#endif
#else
*(__global DATA_TYPE_4 *)dst = (DATA_TYPE_4)(dst0, dst1, dst2, MAX_NUM);
#endif
++dy;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
/////////////////////////////////// [l|s]RGB <-> Lab ///////////////////////////
#define lab_shift xyz_shift
#define gamma_shift 3
#define lab_shift2 (lab_shift + gamma_shift)
#define GAMMA_TAB_SIZE 1024
#define GammaTabScale (float)GAMMA_TAB_SIZE
inline float splineInterpolate(float x, __global const float * tab, int n)
{
int ix = clamp(convert_int_sat_rtn(x), 0, n-1);
x -= ix;
tab += ix << 2;
return fma(fma(fma(tab[3], x, tab[2]), x, tab[1]), x, tab[0]);
}
#ifdef DEPTH_0
__kernel void BGR2Lab(__global const uchar * src, int src_step, int src_offset,
__global uchar * dst, int dst_step, int dst_offset, int rows, int cols,
__global const ushort * gammaTab, __global ushort * LabCbrtTab_b,
__constant int * coeffs, int Lscale, int Lshift)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const uchar* src_ptr = src + src_index;
__global uchar* dst_ptr = dst + dst_index;
uchar4 src_pix = vload4(0, src_ptr);
int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
int R = gammaTab[src_pix.x], G = gammaTab[src_pix.y], B = gammaTab[src_pix.z];
int fX = LabCbrtTab_b[CV_DESCALE(mad24(R, C0, mad24(G, C1, B*C2)), lab_shift)];
int fY = LabCbrtTab_b[CV_DESCALE(mad24(R, C3, mad24(G, C4, B*C5)), lab_shift)];
int fZ = LabCbrtTab_b[CV_DESCALE(mad24(R, C6, mad24(G, C7, B*C8)), lab_shift)];
int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 );
int a = CV_DESCALE( mad24(500, fX - fY, 128*(1 << lab_shift2)), lab_shift2 );
int b = CV_DESCALE( mad24(200, fY - fZ, 128*(1 << lab_shift2)), lab_shift2 );
dst_ptr[0] = SAT_CAST(L);
dst_ptr[1] = SAT_CAST(a);
dst_ptr[2] = SAT_CAST(b);
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
#elif defined DEPTH_5
__kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols,
#ifdef SRGB
__global const float * gammaTab,
#endif
__constant float * coeffs, float _1_3, float _a)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const float * src = (__global const float *)(srcptr + src_index);
__global float * dst = (__global float *)(dstptr + dst_index);
float4 src_pix = vload4(0, src);
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
float R = clamp(src_pix.x, 0.0f, 1.0f);
float G = clamp(src_pix.y, 0.0f, 1.0f);
float B = clamp(src_pix.z, 0.0f, 1.0f);
#ifdef SRGB
R = splineInterpolate(R * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
G = splineInterpolate(G * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
B = splineInterpolate(B * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
#endif
// 7.787f = (29/3)^3/(29*4), 0.008856f = (6/29)^3, 903.3 = (29/3)^3
float X = fma(R, C0, fma(G, C1, B*C2));
float Y = fma(R, C3, fma(G, C4, B*C5));
float Z = fma(R, C6, fma(G, C7, B*C8));
float FX = X > 0.008856f ? rootn(X, 3) : fma(7.787f, X, _a);
float FY = Y > 0.008856f ? rootn(Y, 3) : fma(7.787f, Y, _a);
float FZ = Z > 0.008856f ? rootn(Z, 3) : fma(7.787f, Z, _a);
float L = Y > 0.008856f ? fma(116.f, FY, -16.f) : (903.3f * Y);
float a = 500.f * (FX - FY);
float b = 200.f * (FY - FZ);
dst[0] = L;
dst[1] = a;
dst[2] = b;
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
#endif
inline void Lab2BGR_f(const float * srcbuf, float * dstbuf,
#ifdef SRGB
__global const float * gammaTab,
#endif
__constant float * coeffs, float lThresh, float fThresh)
{
float li = srcbuf[0], ai = srcbuf[1], bi = srcbuf[2];
float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2],
C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5],
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
float y, fy;
// 903.3 = (29/3)^3, 7.787 = (29/3)^3/(29*4)
if (li <= lThresh)
{
y = li / 903.3f;
fy = fma(7.787f, y, 16.0f / 116.0f);
}
else
{
fy = (li + 16.0f) / 116.0f;
y = fy * fy * fy;
}
float fxz[] = { ai / 500.0f + fy, fy - bi / 200.0f };
#pragma unroll
for (int j = 0; j < 2; j++)
if (fxz[j] <= fThresh)
fxz[j] = (fxz[j] - 16.0f / 116.0f) / 7.787f;
else
fxz[j] = fxz[j] * fxz[j] * fxz[j];
float x = fxz[0], z = fxz[1];
float ro = clamp(fma(C0, x, fma(C1, y, C2 * z)), 0.0f, 1.0f);
float go = clamp(fma(C3, x, fma(C4, y, C5 * z)), 0.0f, 1.0f);
float bo = clamp(fma(C6, x, fma(C7, y, C8 * z)), 0.0f, 1.0f);
#ifdef SRGB
ro = splineInterpolate(ro * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
go = splineInterpolate(go * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
bo = splineInterpolate(bo * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
#endif
dstbuf[0] = ro, dstbuf[1] = go, dstbuf[2] = bo;
}
#ifdef DEPTH_0
__kernel void Lab2BGR(__global const uchar * src, int src_step, int src_offset,
__global uchar * dst, int dst_step, int dst_offset, int rows, int cols,
#ifdef SRGB
__global const float * gammaTab,
#endif
__constant float * coeffs, float lThresh, float fThresh)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const uchar* src_ptr = src + src_index;
__global uchar * dst_ptr = dst + dst_index;
uchar4 src_pix = vload4(0, src_ptr);
float srcbuf[3], dstbuf[3];
srcbuf[0] = src_pix.x*(100.f/255.f);
srcbuf[1] = convert_float(src_pix.y - 128);
srcbuf[2] = convert_float(src_pix.z - 128);
Lab2BGR_f(&srcbuf[0], &dstbuf[0],
#ifdef SRGB
gammaTab,
#endif
coeffs, lThresh, fThresh);
#if dcn == 3
dst_ptr[0] = SAT_CAST(dstbuf[0] * 255.0f);
dst_ptr[1] = SAT_CAST(dstbuf[1] * 255.0f);
dst_ptr[2] = SAT_CAST(dstbuf[2] * 255.0f);
#else
*(__global uchar4 *)dst_ptr = (uchar4)(SAT_CAST(dstbuf[0] * 255.0f),
SAT_CAST(dstbuf[1] * 255.0f), SAT_CAST(dstbuf[2] * 255.0f), MAX_NUM);
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
#elif defined DEPTH_5
__kernel void Lab2BGR(__global const uchar * srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols,
#ifdef SRGB
__global const float * gammaTab,
#endif
__constant float * coeffs, float lThresh, float fThresh)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const float * src = (__global const float *)(srcptr + src_index);
__global float * dst = (__global float *)(dstptr + dst_index);
float4 src_pix = vload4(0, src);
float srcbuf[3], dstbuf[3];
srcbuf[0] = src_pix.x, srcbuf[1] = src_pix.y, srcbuf[2] = src_pix.z;
Lab2BGR_f(&srcbuf[0], &dstbuf[0],
#ifdef SRGB
gammaTab,
#endif
coeffs, lThresh, fThresh);
dst[0] = dstbuf[0], dst[1] = dstbuf[1], dst[2] = dstbuf[2];
#if dcn == 4
dst[3] = MAX_NUM;
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
#endif
/////////////////////////////////// [l|s]RGB <-> Luv ///////////////////////////
#define LAB_CBRT_TAB_SIZE 1024
#define LAB_CBRT_TAB_SIZE_B (256*3/2*(1<<gamma_shift))
__constant float LabCbrtTabScale = LAB_CBRT_TAB_SIZE/1.5f;
#ifdef DEPTH_5
__kernel void BGR2Luv(__global const uchar * srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols,
#ifdef SRGB
__global const float * gammaTab,
#endif
__global const float * LabCbrtTab, __constant float * coeffs, float _un, float _vn)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
if (y < rows)
{
__global const float * src = (__global const float *)(srcptr + src_index);
__global float * dst = (__global float *)(dstptr + dst_index);
float R = src[0], G = src[1], B = src[2];
R = clamp(R, 0.f, 1.f);
G = clamp(G, 0.f, 1.f);
B = clamp(B, 0.f, 1.f);
#ifdef SRGB
R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
B = splineInterpolate(B*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
#endif
float X = fma(R, coeffs[0], fma(G, coeffs[1], B*coeffs[2]));
float Y = fma(R, coeffs[3], fma(G, coeffs[4], B*coeffs[5]));
float Z = fma(R, coeffs[6], fma(G, coeffs[7], B*coeffs[8]));
float L = splineInterpolate(Y*LabCbrtTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE);
L = fma(116.f, L, -16.f);
float d = 52.0f / fmax(fma(15.0f, Y, fma(3.0f, Z, X)), FLT_EPSILON);
float u = L*fma(X, d, -_un);
float v = L*fma(2.25f, Y*d, -_vn);
dst[0] = L;
dst[1] = u;
dst[2] = v;
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
#elif defined DEPTH_0
__kernel void BGR2Luv(__global const uchar * src, int src_step, int src_offset,
__global uchar * dst, int dst_step, int dst_offset, int rows, int cols,
#ifdef SRGB
__global const float * gammaTab,
#endif
__global const float * LabCbrtTab, __constant float * coeffs, float _un, float _vn)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
src += mad24(y, src_step, mad24(x, scnbytes, src_offset));
dst += mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
if (y < rows)
{
float scale = 1.0f / 255.0f;
float R = src[0]*scale, G = src[1]*scale, B = src[2]*scale;
#ifdef SRGB
R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
B = splineInterpolate(B*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
#endif
float X = fma(R, coeffs[0], fma(G, coeffs[1], B*coeffs[2]));
float Y = fma(R, coeffs[3], fma(G, coeffs[4], B*coeffs[5]));
float Z = fma(R, coeffs[6], fma(G, coeffs[7], B*coeffs[8]));
float L = splineInterpolate(Y*LabCbrtTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE);
L = 116.f*L - 16.f;
float d = (4*13) / fmax(fma(15.0f, Y, fma(3.0f, Z, X)), FLT_EPSILON);
float u = L*(X*d - _un);
float v = L*fma(2.25f, Y*d, -_vn);
dst[0] = SAT_CAST(L * 2.55f);
//0.72033 = 255/(220+134), 96.525 = 134*255/(220+134)
dst[1] = SAT_CAST(fma(u, 0.72033898305084743f, 96.525423728813564f));
//0.9732 = 255/(140+122), 136.259 = 140*255/(140+122)
dst[2] = SAT_CAST(fma(v, 0.9732824427480916f, 136.259541984732824f));
++y;
dst += dst_step;
src += src_step;
}
}
}
#endif
#ifdef DEPTH_5
__kernel void Luv2BGR(__global const uchar * srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols,
#ifdef SRGB
__global const float * gammaTab,
#endif
__constant float * coeffs, float _un, float _vn)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
if (y < rows)
{
__global const float * src = (__global const float *)(srcptr + src_index);
__global float * dst = (__global float *)(dstptr + dst_index);
float L = src[0], u = src[1], v = src[2], X, Y, Z;
if(L >= 8)
{
Y = fma(L, 1.f/116.f, 16.f/116.f);
Y = Y*Y*Y;
}
else
{
Y = L * (1.0f/903.3f); // L*(3./29.)^3
}
float up = 3.f*fma(L, _un, u);
float vp = 0.25f/fma(L, _vn, v);
vp = clamp(vp, -0.25f, 0.25f);
X = 3.f*Y*up*vp;
Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));
float B = fma(X, coeffs[6], fma(Y, coeffs[7], Z * coeffs[8]));
R = clamp(R, 0.f, 1.f);
G = clamp(G, 0.f, 1.f);
B = clamp(B, 0.f, 1.f);
#ifdef SRGB
R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
B = splineInterpolate(B*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
#endif
dst[0] = R;
dst[1] = G;
dst[2] = B;
#if dcn == 4
dst[3] = MAX_NUM;
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
#elif defined DEPTH_0
__kernel void Luv2BGR(__global const uchar * src, int src_step, int src_offset,
__global uchar * dst, int dst_step, int dst_offset, int rows, int cols,
#ifdef SRGB
__global const float * gammaTab,
#endif
__constant float * coeffs, float _un, float _vn)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
src += mad24(y, src_step, mad24(x, scnbytes, src_offset));
dst += mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
if (y < rows)
{
float d, X, Y, Z;
float L = src[0]*(100.f/255.f);
// 1.388235294117647 = (220+134)/255
float u = fma(convert_float(src[1]), 1.388235294117647f, -134.f);
// 1.027450980392157 = (140+122)/255
float v = fma(convert_float(src[2]), 1.027450980392157f, - 140.f);
if(L >= 8)
{
Y = fma(L, 1.f/116.f, 16.f/116.f);
Y = Y*Y*Y;
}
else
{
Y = L * (1.0f/903.3f); // L*(3./29.)^3
}
float up = 3.f*fma(L, _un, u);
float vp = 0.25f/fma(L, _vn, v);
vp = clamp(vp, -0.25f, 0.25f);
X = 3.f*Y*up*vp;
Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
//limit X, Y, Z to [0, 2] to fit white point
X = clamp(X, 0.f, 2.f); Z = clamp(Z, 0.f, 2.f);
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));
float B = fma(X, coeffs[6], fma(Y, coeffs[7], Z * coeffs[8]));
R = clamp(R, 0.f, 1.f);
G = clamp(G, 0.f, 1.f);
B = clamp(B, 0.f, 1.f);
#ifdef SRGB
R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
B = splineInterpolate(B*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
#endif
uchar dst0 = SAT_CAST(R * 255.0f);
uchar dst1 = SAT_CAST(G * 255.0f);
uchar dst2 = SAT_CAST(B * 255.0f);
#if dcn == 4
*(__global uchar4 *)dst = (uchar4)(dst0, dst1, dst2, MAX_NUM);
#else
dst[0] = dst0;
dst[1] = dst1;
dst[2] = dst2;
#endif
++y;
dst += dst_step;
src += src_step;
}
}
}
#endif

View File

@ -0,0 +1,454 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/**************************************PUBLICFUNC*************************************/
#if depth == 0
#define DATA_TYPE uchar
#define MAX_NUM 255
#define HALF_MAX_NUM 128
#define COEFF_TYPE int
#define SAT_CAST(num) convert_uchar_sat(num)
#define DEPTH_0
#elif depth == 2
#define DATA_TYPE ushort
#define MAX_NUM 65535
#define HALF_MAX_NUM 32768
#define COEFF_TYPE int
#define SAT_CAST(num) convert_ushort_sat(num)
#define DEPTH_2
#elif depth == 5
#define DATA_TYPE float
#define MAX_NUM 1.0f
#define HALF_MAX_NUM 0.5f
#define COEFF_TYPE float
#define SAT_CAST(num) (num)
#define DEPTH_5
#else
#error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
#endif
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
enum
{
yuv_shift = 14,
R2Y = 4899,
G2Y = 9617,
B2Y = 1868
};
//constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601
#define B2YF 0.114f
#define G2YF 0.587f
#define R2YF 0.299f
#define scnbytes ((int)sizeof(DATA_TYPE)*scn)
#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn)
#if bidx == 0
#define R_COMP z
#define G_COMP y
#define B_COMP x
#else
#define R_COMP x
#define G_COMP y
#define B_COMP z
#endif
#define __CAT(x, y) x##y
#define CAT(x, y) __CAT(x, y)
#define DATA_TYPE_4 CAT(DATA_TYPE, 4)
#define DATA_TYPE_3 CAT(DATA_TYPE, 3)
///////////////////////////////////// RGB <-> GRAY //////////////////////////////////////
__kernel void RGB2Gray(__global const uchar * srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + src_index);
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index);
DATA_TYPE_3 src_pix = vload3(0, src);
#ifdef DEPTH_5
dst[0] = fma(src_pix.B_COMP, B2YF, fma(src_pix.G_COMP, G2YF, src_pix.R_COMP * R2YF));
#else
dst[0] = (DATA_TYPE)CV_DESCALE(mad24(src_pix.B_COMP, B2Y, mad24(src_pix.G_COMP, G2Y, mul24(src_pix.R_COMP, R2Y))), yuv_shift);
#endif
++y;
src_index += src_step;
dst_index += dst_step;
}
}
}
}
__kernel void Gray2RGB(__global const uchar * srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + src_index);
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index);
DATA_TYPE val = src[0];
#if dcn == 3 || defined DEPTH_5
dst[0] = dst[1] = dst[2] = val;
#if dcn == 4
dst[3] = MAX_NUM;
#endif
#else
*(__global DATA_TYPE_4 *)dst = (DATA_TYPE_4)(val, val, val, MAX_NUM);
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
///////////////////////////////////// RGB[A] <-> BGR[A] //////////////////////////////////////
__kernel void RGB(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const DATA_TYPE * src = (__global const DATA_TYPE *)(srcptr + src_index);
__global DATA_TYPE * dst = (__global DATA_TYPE *)(dstptr + dst_index);
#if scn == 3
DATA_TYPE_3 src_pix = vload3(0, src);
#else
DATA_TYPE_4 src_pix = vload4(0, src);
#endif
#ifdef REVERSE
dst[0] = src_pix.z;
dst[1] = src_pix.y;
dst[2] = src_pix.x;
#else
dst[0] = src_pix.x;
dst[1] = src_pix.y;
dst[2] = src_pix.z;
#endif
#if dcn == 4
#if scn == 3
dst[3] = MAX_NUM;
#else
dst[3] = src[3];
#endif
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
///////////////////////////////////// RGB5x5 <-> RGB //////////////////////////////////////
__kernel void RGB5x52RGB(__global const uchar* src, int src_step, int src_offset,
__global uchar* dst, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
ushort t = *((__global const ushort*)(src + src_index));
#if greenbits == 6
dst[dst_index + bidx] = (uchar)(t << 3);
dst[dst_index + 1] = (uchar)((t >> 3) & ~3);
dst[dst_index + (bidx^2)] = (uchar)((t >> 8) & ~7);
#else
dst[dst_index + bidx] = (uchar)(t << 3);
dst[dst_index + 1] = (uchar)((t >> 2) & ~7);
dst[dst_index + (bidx^2)] = (uchar)((t >> 7) & ~7);
#endif
#if dcn == 4
#if greenbits == 6
dst[dst_index + 3] = 255;
#else
dst[dst_index + 3] = t & 0x8000 ? 255 : 0;
#endif
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
__kernel void RGB2RGB5x5(__global const uchar* src, int src_step, int src_offset,
__global uchar* dst, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
uchar4 src_pix = vload4(0, src + src_index);
#if greenbits == 6
*((__global ushort*)(dst + dst_index)) = (ushort)((src_pix.B_COMP >> 3)|((src_pix.G_COMP&~3) << 3)|((src_pix.R_COMP&~7) << 8));
#elif scn == 3
*((__global ushort*)(dst + dst_index)) = (ushort)((src_pix.B_COMP >> 3)|((src_pix.G_COMP&~7) << 2)|((src_pix.R_COMP&~7) << 7));
#else
*((__global ushort*)(dst + dst_index)) = (ushort)((src_pix.B_COMP >> 3)|((src_pix.G_COMP&~7) << 2)|
((src_pix.R_COMP&~7) << 7)|(src_pix.w ? 0x8000 : 0));
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
///////////////////////////////////// RGB5x5 <-> Gray //////////////////////////////////////
__kernel void BGR5x52Gray(__global const uchar* src, int src_step, int src_offset,
__global uchar* dst, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, dst_offset + x);
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int t = *((__global const ushort*)(src + src_index));
#if greenbits == 6
dst[dst_index] = (uchar)CV_DESCALE(mad24((t << 3) & 0xf8, B2Y, mad24((t >> 3) & 0xfc, G2Y, ((t >> 8) & 0xf8) * R2Y)), yuv_shift);
#else
dst[dst_index] = (uchar)CV_DESCALE(mad24((t << 3) & 0xf8, B2Y, mad24((t >> 2) & 0xf8, G2Y, ((t >> 7) & 0xf8) * R2Y)), yuv_shift);
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
__kernel void Gray2BGR5x5(__global const uchar* src, int src_step, int src_offset,
__global uchar* dst, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, src_offset + x);
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
int t = src[src_index];
#if greenbits == 6
*((__global ushort*)(dst + dst_index)) = (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8));
#else
t >>= 3;
*((__global ushort*)(dst + dst_index)) = (ushort)(t|(t << 5)|(t << 10));
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
/////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
#ifdef DEPTH_0
__kernel void RGBA2mRGBA(__global const uchar* src, int src_step, int src_offset,
__global uchar* dst, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, src_offset + (x << 2));
int dst_index = mad24(y, dst_step, dst_offset + (x << 2));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
uchar4 src_pix = *(__global const uchar4 *)(src + src_index);
*(__global uchar4 *)(dst + dst_index) =
(uchar4)(mad24(src_pix.x, src_pix.w, HALF_MAX_NUM) / MAX_NUM,
mad24(src_pix.y, src_pix.w, HALF_MAX_NUM) / MAX_NUM,
mad24(src_pix.z, src_pix.w, HALF_MAX_NUM) / MAX_NUM, src_pix.w);
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
__kernel void mRGBA2RGBA(__global const uchar* src, int src_step, int src_offset,
__global uchar* dst, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, 4, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, 4, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
uchar4 src_pix = *(__global const uchar4 *)(src + src_index);
uchar v3 = src_pix.w, v3_half = v3 / 2;
if (v3 == 0)
*(__global uchar4 *)(dst + dst_index) = (uchar4)(0, 0, 0, 0);
else
*(__global uchar4 *)(dst + dst_index) =
(uchar4)(mad24(src_pix.x, MAX_NUM, v3_half) / v3,
mad24(src_pix.y, MAX_NUM, v3_half) / v3,
mad24(src_pix.z, MAX_NUM, v3_half) / v3, v3);
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
#endif

View File

@ -0,0 +1,674 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/**************************************PUBLICFUNC*************************************/
#if depth == 0
#define DATA_TYPE uchar
#define MAX_NUM 255
#define HALF_MAX_NUM 128
#define COEFF_TYPE int
#define SAT_CAST(num) convert_uchar_sat(num)
#define DEPTH_0
#elif depth == 2
#define DATA_TYPE ushort
#define MAX_NUM 65535
#define HALF_MAX_NUM 32768
#define COEFF_TYPE int
#define SAT_CAST(num) convert_ushort_sat(num)
#define DEPTH_2
#elif depth == 5
#define DATA_TYPE float
#define MAX_NUM 1.0f
#define HALF_MAX_NUM 0.5f
#define COEFF_TYPE float
#define SAT_CAST(num) (num)
#define DEPTH_5
#else
#error "invalid depth: should be 0 (CV_8U), 2 (CV_16U) or 5 (CV_32F)"
#endif
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
enum
{
yuv_shift = 14,
R2Y = 4899,
G2Y = 9617,
B2Y = 1868,
};
//constants for conversion from/to RGB and Gray, YUV, YCrCb according to BT.601
#define B2YF 0.114f
#define G2YF 0.587f
#define R2YF 0.299f
//to YCbCr
#define YCBF 0.564f
#define YCRF 0.713f
#define YCBI 9241
#define YCRI 11682
//to YUV
#define B2UF 0.492f
#define R2VF 0.877f
#define B2UI 8061
#define R2VI 14369
//from YUV
#define U2BF 2.032f
#define U2GF -0.395f
#define V2GF -0.581f
#define V2RF 1.140f
#define U2BI 33292
#define U2GI -6472
#define V2GI -9519
#define V2RI 18678
//from YCrCb
#define CR2RF 1.403f
#define CB2GF -0.344f
#define CR2GF -0.714f
#define CB2BF 1.773f
#define CR2RI 22987
#define CB2GI -5636
#define CR2GI -11698
#define CB2BI 29049
#define scnbytes ((int)sizeof(DATA_TYPE)*scn)
#define dcnbytes ((int)sizeof(DATA_TYPE)*dcn)
#if bidx == 0
#define R_COMP z
#define G_COMP y
#define B_COMP x
#else
#define R_COMP x
#define G_COMP y
#define B_COMP z
#endif
#ifndef uidx
#define uidx 0
#endif
#ifndef yidx
#define yidx 0
#endif
#ifndef PIX_PER_WI_X
#define PIX_PER_WI_X 1
#endif
#define __CAT(x, y) x##y
#define CAT(x, y) __CAT(x, y)
#define DATA_TYPE_4 CAT(DATA_TYPE, 4)
#define DATA_TYPE_3 CAT(DATA_TYPE, 3)
///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
__constant float c_RGB2YUVCoeffs_f[5] = { B2YF, G2YF, R2YF, B2UF, R2VF };
__constant int c_RGB2YUVCoeffs_i[5] = { B2Y, G2Y, R2Y, B2UI, R2VI };
__kernel void RGB2YUV(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dt_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dt_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + src_index);
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index);
DATA_TYPE_3 src_pix = vload3(0, src);
DATA_TYPE b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
#ifdef DEPTH_5
__constant float * coeffs = c_RGB2YUVCoeffs_f;
const DATA_TYPE Y = fma(b, coeffs[0], fma(g, coeffs[1], r * coeffs[2]));
const DATA_TYPE U = fma(b - Y, coeffs[3], HALF_MAX_NUM);
const DATA_TYPE V = fma(r - Y, coeffs[4], HALF_MAX_NUM);
#else
__constant int * coeffs = c_RGB2YUVCoeffs_i;
const int delta = HALF_MAX_NUM * (1 << yuv_shift);
const int Y = CV_DESCALE(mad24(b, coeffs[0], mad24(g, coeffs[1], mul24(r, coeffs[2]))), yuv_shift);
const int U = CV_DESCALE(mad24(b - Y, coeffs[3], delta), yuv_shift);
const int V = CV_DESCALE(mad24(r - Y, coeffs[4], delta), yuv_shift);
#endif
dst[0] = SAT_CAST( Y );
dst[1] = SAT_CAST( U );
dst[2] = SAT_CAST( V );
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
__constant float c_YUV2RGBCoeffs_f[4] = { U2BF, U2GF, V2GF, V2RF };
__constant int c_YUV2RGBCoeffs_i[4] = { U2BI, U2GI, V2GI, V2RI };
__kernel void YUV2RGB(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dt_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dt_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + src_index);
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE Y = src_pix.x, U = src_pix.y, V = src_pix.z;
#ifdef DEPTH_5
__constant float * coeffs = c_YUV2RGBCoeffs_f;
float r = fma(V - HALF_MAX_NUM, coeffs[3], Y);
float g = fma(V - HALF_MAX_NUM, coeffs[2], fma(U - HALF_MAX_NUM, coeffs[1], Y));
float b = fma(U - HALF_MAX_NUM, coeffs[0], Y);
#else
__constant int * coeffs = c_YUV2RGBCoeffs_i;
const int r = Y + CV_DESCALE(mul24(V - HALF_MAX_NUM, coeffs[3]), yuv_shift);
const int g = Y + CV_DESCALE(mad24(V - HALF_MAX_NUM, coeffs[2], mul24(U - HALF_MAX_NUM, coeffs[1])), yuv_shift);
const int b = Y + CV_DESCALE(mul24(U - HALF_MAX_NUM, coeffs[0]), yuv_shift);
#endif
dst[bidx] = SAT_CAST( b );
dst[1] = SAT_CAST( g );
dst[bidx^2] = SAT_CAST( r );
#if dcn == 4
dst[3] = MAX_NUM;
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
__constant float c_YUV2RGBCoeffs_420[5] = { 1.163999557f, 2.017999649f, -0.390999794f,
-0.812999725f, 1.5959997177f };
__kernel void YUV2RGB_NVx(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dt_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols / 2)
{
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows / 2 )
{
__global const uchar* ysrc = srcptr + mad24(y << 1, src_step, (x << 1) + src_offset);
__global const uchar* usrc = srcptr + mad24(rows + y, src_step, (x << 1) + src_offset);
__global uchar* dst1 = dstptr + mad24(y << 1, dst_step, mad24(x, dcn<<1, dt_offset));
__global uchar* dst2 = dst1 + dst_step;
float Y1 = ysrc[0];
float Y2 = ysrc[1];
float Y3 = ysrc[src_step];
float Y4 = ysrc[src_step + 1];
float U = ((float)usrc[uidx]) - HALF_MAX_NUM;
float V = ((float)usrc[1-uidx]) - HALF_MAX_NUM;
__constant float* coeffs = c_YUV2RGBCoeffs_420;
float ruv = fma(coeffs[4], V, 0.5f);
float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f));
float buv = fma(coeffs[1], U, 0.5f);
Y1 = max(0.f, Y1 - 16.f) * coeffs[0];
dst1[2 - bidx] = convert_uchar_sat(Y1 + ruv);
dst1[1] = convert_uchar_sat(Y1 + guv);
dst1[bidx] = convert_uchar_sat(Y1 + buv);
#if dcn == 4
dst1[3] = 255;
#endif
Y2 = max(0.f, Y2 - 16.f) * coeffs[0];
dst1[dcn + 2 - bidx] = convert_uchar_sat(Y2 + ruv);
dst1[dcn + 1] = convert_uchar_sat(Y2 + guv);
dst1[dcn + bidx] = convert_uchar_sat(Y2 + buv);
#if dcn == 4
dst1[7] = 255;
#endif
Y3 = max(0.f, Y3 - 16.f) * coeffs[0];
dst2[2 - bidx] = convert_uchar_sat(Y3 + ruv);
dst2[1] = convert_uchar_sat(Y3 + guv);
dst2[bidx] = convert_uchar_sat(Y3 + buv);
#if dcn == 4
dst2[3] = 255;
#endif
Y4 = max(0.f, Y4 - 16.f) * coeffs[0];
dst2[dcn + 2 - bidx] = convert_uchar_sat(Y4 + ruv);
dst2[dcn + 1] = convert_uchar_sat(Y4 + guv);
dst2[dcn + bidx] = convert_uchar_sat(Y4 + buv);
#if dcn == 4
dst2[7] = 255;
#endif
}
++y;
}
}
}
#if uidx < 2
__kernel void YUV2RGB_YV12_IYUV(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dt_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols / 2)
{
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows / 2 )
{
__global const uchar* ysrc = srcptr + mad24(y << 1, src_step, (x << 1) + src_offset);
__global uchar* dst1 = dstptr + mad24(y << 1, dst_step, x * (dcn<<1) + dt_offset);
__global uchar* dst2 = dst1 + dst_step;
float Y1 = ysrc[0];
float Y2 = ysrc[1];
float Y3 = ysrc[src_step];
float Y4 = ysrc[src_step + 1];
#ifdef SRC_CONT
__global const uchar* uvsrc = srcptr + mad24(rows, src_step, src_offset);
int u_ind = mad24(y, cols >> 1, x);
float uv[2] = { ((float)uvsrc[u_ind]) - HALF_MAX_NUM, ((float)uvsrc[u_ind + ((rows * cols) >> 2)]) - HALF_MAX_NUM };
#else
int vsteps[2] = { cols >> 1, src_step - (cols >> 1)};
__global const uchar* usrc = srcptr + mad24(rows + (y>>1), src_step, src_offset + (y%2)*(cols >> 1) + x);
__global const uchar* vsrc = usrc + mad24(rows >> 2, src_step, rows % 4 ? vsteps[y%2] : 0);
float uv[2] = { ((float)usrc[0]) - HALF_MAX_NUM, ((float)vsrc[0]) - HALF_MAX_NUM };
#endif
float U = uv[uidx];
float V = uv[1-uidx];
__constant float* coeffs = c_YUV2RGBCoeffs_420;
float ruv = fma(coeffs[4], V, 0.5f);
float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f));
float buv = fma(coeffs[1], U, 0.5f);
Y1 = max(0.f, Y1 - 16.f) * coeffs[0];
dst1[2 - bidx] = convert_uchar_sat(Y1 + ruv);
dst1[1] = convert_uchar_sat(Y1 + guv);
dst1[bidx] = convert_uchar_sat(Y1 + buv);
#if dcn == 4
dst1[3] = 255;
#endif
Y2 = max(0.f, Y2 - 16.f) * coeffs[0];
dst1[dcn + 2 - bidx] = convert_uchar_sat(Y2 + ruv);
dst1[dcn + 1] = convert_uchar_sat(Y2 + guv);
dst1[dcn + bidx] = convert_uchar_sat(Y2 + buv);
#if dcn == 4
dst1[7] = 255;
#endif
Y3 = max(0.f, Y3 - 16.f) * coeffs[0];
dst2[2 - bidx] = convert_uchar_sat(Y3 + ruv);
dst2[1] = convert_uchar_sat(Y3 + guv);
dst2[bidx] = convert_uchar_sat(Y3 + buv);
#if dcn == 4
dst2[3] = 255;
#endif
Y4 = max(0.f, Y4 - 16.f) * coeffs[0];
dst2[dcn + 2 - bidx] = convert_uchar_sat(Y4 + ruv);
dst2[dcn + 1] = convert_uchar_sat(Y4 + guv);
dst2[dcn + bidx] = convert_uchar_sat(Y4 + buv);
#if dcn == 4
dst2[7] = 255;
#endif
}
++y;
}
}
}
#endif
#if uidx < 2
__constant float c_RGB2YUVCoeffs_420[8] = { 0.256999969f, 0.50399971f, 0.09799957f, -0.1479988098f, -0.2909994125f,
0.438999176f, -0.3679990768f, -0.0709991455f };
__kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0) * PIX_PER_WI_X;
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols/2)
{
int src_index = mad24(y << 1, src_step, mad24(x << 1, scn, src_offset));
int ydst_index = mad24(y << 1, dst_step, (x << 1) + dst_offset);
int y_rows = rows / 3 * 2;
int vsteps[2] = { cols >> 1, dst_step - (cols >> 1)};
__constant float* coeffs = c_RGB2YUVCoeffs_420;
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows / 3)
{
__global const uchar* src1 = srcptr + src_index;
__global const uchar* src2 = src1 + src_step;
__global uchar* ydst1 = dstptr + ydst_index;
__global uchar* ydst2 = ydst1 + dst_step;
__global uchar* udst = dstptr + mad24(y_rows + (y>>1), dst_step, dst_offset + (y%2)*(cols >> 1) + x);
__global uchar* vdst = udst + mad24(y_rows >> 2, dst_step, y_rows % 4 ? vsteps[y%2] : 0);
#if PIX_PER_WI_X == 2
int s11 = *((__global const int*) src1);
int s12 = *((__global const int*) src1 + 1);
int s13 = *((__global const int*) src1 + 2);
#if scn == 4
int s14 = *((__global const int*) src1 + 3);
#endif
int s21 = *((__global const int*) src2);
int s22 = *((__global const int*) src2 + 1);
int s23 = *((__global const int*) src2 + 2);
#if scn == 4
int s24 = *((__global const int*) src2 + 3);
#endif
float src_pix1[scn * 4], src_pix2[scn * 4];
*((float4*) src_pix1) = convert_float4(as_uchar4(s11));
*((float4*) src_pix1 + 1) = convert_float4(as_uchar4(s12));
*((float4*) src_pix1 + 2) = convert_float4(as_uchar4(s13));
#if scn == 4
*((float4*) src_pix1 + 3) = convert_float4(as_uchar4(s14));
#endif
*((float4*) src_pix2) = convert_float4(as_uchar4(s21));
*((float4*) src_pix2 + 1) = convert_float4(as_uchar4(s22));
*((float4*) src_pix2 + 2) = convert_float4(as_uchar4(s23));
#if scn == 4
*((float4*) src_pix2 + 3) = convert_float4(as_uchar4(s24));
#endif
uchar4 y1, y2;
y1.x = convert_uchar_sat(fma(coeffs[0], src_pix1[ 2-bidx], fma(coeffs[1], src_pix1[ 1], fma(coeffs[2], src_pix1[ bidx], 16.5f))));
y1.y = convert_uchar_sat(fma(coeffs[0], src_pix1[ scn+2-bidx], fma(coeffs[1], src_pix1[ scn+1], fma(coeffs[2], src_pix1[ scn+bidx], 16.5f))));
y1.z = convert_uchar_sat(fma(coeffs[0], src_pix1[2*scn+2-bidx], fma(coeffs[1], src_pix1[2*scn+1], fma(coeffs[2], src_pix1[2*scn+bidx], 16.5f))));
y1.w = convert_uchar_sat(fma(coeffs[0], src_pix1[3*scn+2-bidx], fma(coeffs[1], src_pix1[3*scn+1], fma(coeffs[2], src_pix1[3*scn+bidx], 16.5f))));
y2.x = convert_uchar_sat(fma(coeffs[0], src_pix2[ 2-bidx], fma(coeffs[1], src_pix2[ 1], fma(coeffs[2], src_pix2[ bidx], 16.5f))));
y2.y = convert_uchar_sat(fma(coeffs[0], src_pix2[ scn+2-bidx], fma(coeffs[1], src_pix2[ scn+1], fma(coeffs[2], src_pix2[ scn+bidx], 16.5f))));
y2.z = convert_uchar_sat(fma(coeffs[0], src_pix2[2*scn+2-bidx], fma(coeffs[1], src_pix2[2*scn+1], fma(coeffs[2], src_pix2[2*scn+bidx], 16.5f))));
y2.w = convert_uchar_sat(fma(coeffs[0], src_pix2[3*scn+2-bidx], fma(coeffs[1], src_pix2[3*scn+1], fma(coeffs[2], src_pix2[3*scn+bidx], 16.5f))));
*((__global int*) ydst1) = as_int(y1);
*((__global int*) ydst2) = as_int(y2);
float uv[4] = { fma(coeffs[3], src_pix1[ 2-bidx], fma(coeffs[4], src_pix1[ 1], fma(coeffs[5], src_pix1[ bidx], 128.5f))),
fma(coeffs[5], src_pix1[ 2-bidx], fma(coeffs[6], src_pix1[ 1], fma(coeffs[7], src_pix1[ bidx], 128.5f))),
fma(coeffs[3], src_pix1[2*scn+2-bidx], fma(coeffs[4], src_pix1[2*scn+1], fma(coeffs[5], src_pix1[2*scn+bidx], 128.5f))),
fma(coeffs[5], src_pix1[2*scn+2-bidx], fma(coeffs[6], src_pix1[2*scn+1], fma(coeffs[7], src_pix1[2*scn+bidx], 128.5f))) };
udst[0] = convert_uchar_sat(uv[uidx] );
vdst[0] = convert_uchar_sat(uv[1 - uidx]);
udst[1] = convert_uchar_sat(uv[2 + uidx]);
vdst[1] = convert_uchar_sat(uv[3 - uidx]);
#else
float4 src_pix1 = convert_float4(vload4(0, src1));
float4 src_pix2 = convert_float4(vload4(0, src1+scn));
float4 src_pix3 = convert_float4(vload4(0, src2));
float4 src_pix4 = convert_float4(vload4(0, src2+scn));
ydst1[0] = convert_uchar_sat(fma(coeffs[0], src_pix1.R_COMP, fma(coeffs[1], src_pix1.G_COMP, fma(coeffs[2], src_pix1.B_COMP, 16.5f))));
ydst1[1] = convert_uchar_sat(fma(coeffs[0], src_pix2.R_COMP, fma(coeffs[1], src_pix2.G_COMP, fma(coeffs[2], src_pix2.B_COMP, 16.5f))));
ydst2[0] = convert_uchar_sat(fma(coeffs[0], src_pix3.R_COMP, fma(coeffs[1], src_pix3.G_COMP, fma(coeffs[2], src_pix3.B_COMP, 16.5f))));
ydst2[1] = convert_uchar_sat(fma(coeffs[0], src_pix4.R_COMP, fma(coeffs[1], src_pix4.G_COMP, fma(coeffs[2], src_pix4.B_COMP, 16.5f))));
float uv[2] = { fma(coeffs[3], src_pix1.R_COMP, fma(coeffs[4], src_pix1.G_COMP, fma(coeffs[5], src_pix1.B_COMP, 128.5f))),
fma(coeffs[5], src_pix1.R_COMP, fma(coeffs[6], src_pix1.G_COMP, fma(coeffs[7], src_pix1.B_COMP, 128.5f))) };
udst[0] = convert_uchar_sat(uv[uidx] );
vdst[0] = convert_uchar_sat(uv[1-uidx]);
#endif
++y;
src_index += 2*src_step;
ydst_index += 2*dst_step;
}
}
}
}
#endif
__kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols / 2)
{
__global const uchar* src = srcptr + mad24(y, src_step, (x << 2) + src_offset);
__global uchar* dst = dstptr + mad24(y, dst_step, mad24(x << 1, dcn, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows )
{
__constant float* coeffs = c_YUV2RGBCoeffs_420;
#ifndef USE_OPTIMIZED_LOAD
float U = ((float) src[uidx]) - HALF_MAX_NUM;
float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX_NUM;
float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0];
float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0];
#else
int load_src = *((__global int*) src);
float vec_src[4] = { load_src & 0xff, (load_src >> 8) & 0xff, (load_src >> 16) & 0xff, (load_src >> 24) & 0xff};
float U = vec_src[uidx] - HALF_MAX_NUM;
float V = vec_src[(2 + uidx) % 4] - HALF_MAX_NUM;
float y00 = max(0.f, vec_src[yidx] - 16.f) * coeffs[0];
float y01 = max(0.f, vec_src[yidx + 2] - 16.f) * coeffs[0];
#endif
float ruv = fma(coeffs[4], V, 0.5f);
float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f));
float buv = fma(coeffs[1], U, 0.5f);
dst[2 - bidx] = convert_uchar_sat(y00 + ruv);
dst[1] = convert_uchar_sat(y00 + guv);
dst[bidx] = convert_uchar_sat(y00 + buv);
#if dcn == 4
dst[3] = 255;
#endif
dst[dcn + 2 - bidx] = convert_uchar_sat(y01 + ruv);
dst[dcn + 1] = convert_uchar_sat(y01 + guv);
dst[dcn + bidx] = convert_uchar_sat(y01 + buv);
#if dcn == 4
dst[7] = 255;
#endif
}
++y;
src += src_step;
dst += dst_step;
}
}
}
///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
__constant float c_RGB2YCrCbCoeffs_f[5] = {R2YF, G2YF, B2YF, YCRF, YCBF};
__constant int c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, YCRI, YCBI};
__kernel void RGB2YCrCb(__global const uchar* srcptr, int src_step, int src_offset,
__global uchar* dstptr, int dst_step, int dt_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dt_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const DATA_TYPE* src = (__global const DATA_TYPE*)(srcptr + src_index);
__global DATA_TYPE* dst = (__global DATA_TYPE*)(dstptr + dst_index);
DATA_TYPE_4 src_pix = vload4(0, src);
DATA_TYPE b = src_pix.B_COMP, g = src_pix.G_COMP, r = src_pix.R_COMP;
#ifdef DEPTH_5
__constant float * coeffs = c_RGB2YCrCbCoeffs_f;
DATA_TYPE Y = fma(b, coeffs[2], fma(g, coeffs[1], r * coeffs[0]));
DATA_TYPE Cr = fma(r - Y, coeffs[3], HALF_MAX_NUM);
DATA_TYPE Cb = fma(b - Y, coeffs[4], HALF_MAX_NUM);
#else
__constant int * coeffs = c_RGB2YCrCbCoeffs_i;
int delta = HALF_MAX_NUM * (1 << yuv_shift);
int Y = CV_DESCALE(mad24(b, coeffs[2], mad24(g, coeffs[1], mul24(r, coeffs[0]))), yuv_shift);
int Cr = CV_DESCALE(mad24(r - Y, coeffs[3], delta), yuv_shift);
int Cb = CV_DESCALE(mad24(b - Y, coeffs[4], delta), yuv_shift);
#endif
dst[0] = SAT_CAST( Y );
dst[1] = SAT_CAST( Cr );
dst[2] = SAT_CAST( Cb );
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}
__constant float c_YCrCb2RGBCoeffs_f[4] = { CR2RF, CR2GF, CB2GF, CB2BF };
__constant int c_YCrCb2RGBCoeffs_i[4] = { CR2RI, CR2GI, CB2GI, CB2BI };
__kernel void YCrCb2RGB(__global const uchar* src, int src_step, int src_offset,
__global uchar* dst, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols)
{
int src_index = mad24(y, src_step, mad24(x, scnbytes, src_offset));
int dst_index = mad24(y, dst_step, mad24(x, dcnbytes, dst_offset));
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
{
if (y < rows)
{
__global const DATA_TYPE * srcptr = (__global const DATA_TYPE*)(src + src_index);
__global DATA_TYPE * dstptr = (__global DATA_TYPE*)(dst + dst_index);
DATA_TYPE_4 src_pix = vload4(0, srcptr);
DATA_TYPE yp = src_pix.x, cr = src_pix.y, cb = src_pix.z;
#ifdef DEPTH_5
__constant float * coeff = c_YCrCb2RGBCoeffs_f;
float r = fma(coeff[0], cr - HALF_MAX_NUM, yp);
float g = fma(coeff[1], cr - HALF_MAX_NUM, fma(coeff[2], cb - HALF_MAX_NUM, yp));
float b = fma(coeff[3], cb - HALF_MAX_NUM, yp);
#else
__constant int * coeff = c_YCrCb2RGBCoeffs_i;
int r = yp + CV_DESCALE(coeff[0] * (cr - HALF_MAX_NUM), yuv_shift);
int g = yp + CV_DESCALE(mad24(coeff[1], cr - HALF_MAX_NUM, coeff[2] * (cb - HALF_MAX_NUM)), yuv_shift);
int b = yp + CV_DESCALE(coeff[3] * (cb - HALF_MAX_NUM), yuv_shift);
#endif
dstptr[(bidx^2)] = SAT_CAST(r);
dstptr[1] = SAT_CAST(g);
dstptr[bidx] = SAT_CAST(b);
#if dcn == 4
dstptr[3] = MAX_NUM;
#endif
++y;
dst_index += dst_step;
src_index += src_step;
}
}
}
}

File diff suppressed because it is too large Load Diff