Merge branch 'master' of https://github.com/Itseez/opencv into brief_cl

This commit is contained in:
Matthias Bady 2013-12-23 19:47:06 +01:00
commit 928924d248
62 changed files with 494145 additions and 575527 deletions

33
LICENSE Normal file
View File

@ -0,0 +1,33 @@
By downloading, copying, installing or using the software you agree to this license.
If you do not agree to this license, do not download, install,
copy or use the software.
License Agreement
For Open Source Computer Vision Library
(3-clause BSD License)
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the names of the copyright holders nor the names of the contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
This software is provided by the copyright holders and contributors "as is" and
any express or implied warranties, including, but not limited to, the implied
warranties of merchantability and fitness for a particular purpose are disclaimed.
In no event shall copyright holders or contributors be liable for any direct,
indirect, incidental, special, exemplary, or consequential damages
(including, but not limited to, procurement of substitute goods or services;
loss of use, data, or profits; or business interruption) however caused
and on any theory of liability, whether in contract, strict liability,
or tort (including negligence or otherwise) arising in any way out of
the use of this software, even if advised of the possibility of such damage.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -617,9 +617,9 @@ inline void Mat::release()
{
if( u && CV_XADD(&u->refcount, -1) == 1 )
deallocate();
u = NULL;
data = datastart = dataend = datalimit = 0;
size.p[0] = 0;
u = 0;
}
inline

View File

@ -49,6 +49,7 @@ namespace cv { namespace ocl {
CV_EXPORTS bool haveOpenCL();
CV_EXPORTS bool useOpenCL();
CV_EXPORTS bool haveAmdBlas();
CV_EXPORTS bool haveAmdFft();
CV_EXPORTS void setUseOpenCL(bool flag);
CV_EXPORTS void finish2();
@ -427,6 +428,61 @@ public:
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); set(i, a11); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
const _Tp12& a12)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
set(i, a12); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
typename _Tp13>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
const _Tp12& a12, const _Tp13& a13)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
i = set(i, a12); set(i, a13); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
typename _Tp13, typename _Tp14>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
const _Tp12& a12, const _Tp13& a13, const _Tp14& a14)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
i = set(i, a12); i = set(i, a13); set(i, a14); return *this;
}
template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
typename _Tp13, typename _Tp14, typename _Tp15>
Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
const _Tp12& a12, const _Tp13& a13, const _Tp14& a14, const _Tp15& a15)
{
int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
i = set(i, a12); i = set(i, a13); i = set(i, a14); set(i, a15); return *this;
}
bool run(int dims, size_t globalsize[],
size_t localsize[], bool sync, const Queue& q=Queue());
bool runTask(bool sync, const Queue& q=Queue());

View File

@ -95,7 +95,7 @@
#undef clAmdFftSetPlanOutStride
#define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_pfn
#undef clAmdFftSetPlanPrecision
//#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn
#define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn
#undef clAmdFftSetPlanScale
#define clAmdFftSetPlanScale clAmdFftSetPlanScale_pfn
#undef clAmdFftSetPlanTransposeResult
@ -134,7 +134,7 @@ extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDistance)(clAmdFftPlanH
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanInStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanLength)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale);
//extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanTransposeResult)(clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed);
extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetResultLocation)(clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness);

View File

@ -915,11 +915,12 @@ void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t bl
enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4,
OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8,
OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14 };
OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14,
OCL_OP_RDIV_SCALE=15 };
static const char* oclop2str[] = { "OP_ADD", "OP_SUB", "OP_RSUB", "OP_ABSDIFF",
"OP_MUL", "OP_MUL_SCALE", "OP_DIV_SCALE", "OP_RECIP_SCALE",
"OP_ADDW", "OP_AND", "OP_OR", "OP_XOR", "OP_NOT", "OP_MIN", "OP_MAX", 0 };
"OP_ADDW", "OP_AND", "OP_OR", "OP_XOR", "OP_NOT", "OP_MIN", "OP_MAX", "OP_RDIV_SCALE", 0 };
static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
InputArray _mask, bool bitwise, int oclop, bool haveScalar )
@ -1301,25 +1302,27 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
int kercn = haveMask || haveScalar ? cn : 1;
char cvtstr[3][32], opts[1024];
char cvtstr[4][32], opts[1024];
sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT2=%s "
"-D dstT=%s -D workT=%s -D convertToWT1=%s "
"-D dstT=%s -D workT=%s -D scaleT=%s -D convertToWT1=%s "
"-D convertToWT2=%s -D convertToDT=%s%s",
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)),
ocl::typeToStr(CV_MAKETYPE(depth2, kercn)),
ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)),
ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)),
ocl::typeToStr(CV_MAKETYPE(wdepth, 1)),
ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]),
ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]),
ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]),
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
size_t usrdata_esz = CV_ELEM_SIZE(wdepth);
const uchar* usrdata_p = (const uchar*)usrdata;
const double* usrdata_d = (const double*)usrdata;
float usrdata_f[3];
int i, n = oclop == OCL_OP_MUL_SCALE || oclop == OCL_OP_DIV_SCALE ||
oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0;
oclop == OCL_OP_RDIV_SCALE || oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0;
if( n > 0 && wdepth == CV_32F )
{
for( i = 0; i < n; i++ )
@ -1352,13 +1355,20 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, buf, esz);
if( !haveMask )
k.args(src1arg, dstarg, scalararg);
{
if(n == 0)
k.args(src1arg, dstarg, scalararg);
else if(n == 1)
k.args(src1arg, dstarg, scalararg,
ocl::KernelArg(0, 0, 0, usrdata_p, usrdata_esz));
else
CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters");
}
else
k.args(src1arg, maskarg, dstarg, scalararg);
}
else
{
size_t usrdata_esz = CV_ELEM_SIZE(wdepth);
src2 = _src2.getUMat();
ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cscale);
@ -1439,6 +1449,8 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
swapped12 = true;
if( oclop == OCL_OP_SUB )
oclop = OCL_OP_RSUB;
if ( oclop == OCL_OP_DIV_SCALE )
oclop = OCL_OP_RDIV_SCALE;
}
else if( !checkScalar(*psrc2, type1, kind2, kind1) )
CV_Error( CV_StsUnmatchedSizes,

View File

@ -1357,43 +1357,65 @@ void cv::LUT( InputArray _src, InputArray _lut, OutputArray _dst )
func(ptrs[0], lut.data, ptrs[1], len, cn, lutcn);
}
namespace cv {
static bool ocl_normalize( InputArray _src, OutputArray _dst, InputArray _mask, int rtype,
double scale, double shift )
{
UMat src = _src.getUMat(), dst = _dst.getUMat();
if( _mask.empty() )
src.convertTo( dst, rtype, scale, shift );
else
{
UMat temp;
src.convertTo( temp, rtype, scale, shift );
temp.copyTo( dst, _mask );
}
return true;
}
}
void cv::normalize( InputArray _src, OutputArray _dst, double a, double b,
int norm_type, int rtype, InputArray _mask )
{
Mat src = _src.getMat(), mask = _mask.getMat();
double scale = 1, shift = 0;
if( norm_type == CV_MINMAX )
{
double smin = 0, smax = 0;
double dmin = MIN( a, b ), dmax = MAX( a, b );
minMaxLoc( _src, &smin, &smax, 0, 0, mask );
minMaxLoc( _src, &smin, &smax, 0, 0, _mask );
scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0);
shift = dmin - smin*scale;
}
else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C )
{
scale = norm( src, norm_type, mask );
scale = norm( _src, norm_type, _mask );
scale = scale > DBL_EPSILON ? a/scale : 0.;
shift = 0;
}
else
CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" );
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
if( rtype < 0 )
rtype = _dst.fixedType() ? _dst.depth() : src.depth();
rtype = _dst.fixedType() ? _dst.depth() : depth;
_dst.createSameSize(_src, CV_MAKETYPE(rtype, cn));
_dst.create(src.dims, src.size, CV_MAKETYPE(rtype, src.channels()));
Mat dst = _dst.getMat();
if (ocl::useOpenCL() && _dst.isUMat() &&
ocl_normalize(_src, _dst, _mask, rtype, scale, shift))
return;
if( !mask.data )
Mat src = _src.getMat(), dst = _dst.getMat();
if( _mask.empty() )
src.convertTo( dst, rtype, scale, shift );
else
{
Mat temp;
src.convertTo( temp, rtype, scale, shift );
temp.copyTo( dst, mask );
temp.copyTo( dst, _mask );
}
}

View File

@ -544,15 +544,36 @@ void flip( InputArray _src, OutputArray _dst, int flip_mode )
}
static bool ocl_repeat(InputArray _src, int ny, int nx, OutputArray _dst)
{
UMat src = _src.getUMat(), dst = _dst.getUMat();
for (int y = 0; y < ny; ++y)
for (int x = 0; x < nx; ++x)
{
Rect roi(x * src.cols, y * src.rows, src.cols, src.rows);
UMat hdr(dst, roi);
src.copyTo(hdr);
}
return true;
}
void repeat(InputArray _src, int ny, int nx, OutputArray _dst)
{
Mat src = _src.getMat();
CV_Assert( src.dims <= 2 );
CV_Assert( _src.dims() <= 2 );
CV_Assert( ny > 0 && nx > 0 );
_dst.create(src.rows*ny, src.cols*nx, src.type());
Mat dst = _dst.getMat();
Size ssize = src.size(), dsize = dst.size();
Size ssize = _src.size();
_dst.create(ssize.height*ny, ssize.width*nx, _src.type());
if (ocl::useOpenCL() && _src.isUMat())
{
CV_Assert(ocl_repeat(_src, ny, nx, _dst));
return;
}
Mat src = _src.getMat(), dst = _dst.getMat();
Size dsize = dst.size();
int esz = (int)src.elemSize();
int x, y;
ssize.width *= esz; dsize.width *= esz;

View File

@ -40,6 +40,9 @@
//M*/
#include "precomp.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
#include "opencl_kernels.hpp"
namespace cv
{
@ -1473,8 +1476,261 @@ typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, in
typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
#endif
#ifdef HAVE_CLAMDFFT
namespace cv {
#define CLAMDDFT_Assert(func) \
{ \
clAmdFftStatus s = (func); \
CV_Assert(s == CLFFT_SUCCESS); \
}
enum FftType
{
R2R = 0, // real to real
C2R = 1, // opencl HERMITIAN_INTERLEAVED to real
R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
C2C = 3 // complex to complex
};
class PlanCache
{
struct FftPlan
{
FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) :
dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step),
doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType), plHandle(0)
{
bool dft_inverse = (flags & DFT_INVERSE) != 0;
bool dft_scale = (flags & DFT_SCALE) != 0;
bool dft_rows = (flags & DFT_ROWS) != 0;
clAmdFftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL;
clAmdFftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D;
size_t batchSize = dft_rows ? dft_size.height : 1;
size_t clLengthsIn[3] = { dft_size.width, dft_rows ? 1 : dft_size.height, 1 };
size_t clStridesIn[3] = { 1, 1, 1 };
size_t clStridesOut[3] = { 1, 1, 1 };
int elemSize = doubleFP ? sizeof(double) : sizeof(float);
switch (fftType)
{
case C2C:
inLayout = CLFFT_COMPLEX_INTERLEAVED;
outLayout = CLFFT_COMPLEX_INTERLEAVED;
clStridesIn[1] = src_step / (elemSize << 1);
clStridesOut[1] = dst_step / (elemSize << 1);
break;
case R2C:
inLayout = CLFFT_REAL;
outLayout = CLFFT_HERMITIAN_INTERLEAVED;
clStridesIn[1] = src_step / elemSize;
clStridesOut[1] = dst_step / (elemSize << 1);
break;
case C2R:
inLayout = CLFFT_HERMITIAN_INTERLEAVED;
outLayout = CLFFT_REAL;
clStridesIn[1] = src_step / (elemSize << 1);
clStridesOut[1] = dst_step / elemSize;
break;
case R2R:
default:
CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type");
break;
}
clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1];
clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1];
// TODO remove all plans if context changed
CLAMDDFT_Assert(clAmdFftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context2::getDefault().ptr(), dim, clLengthsIn))
// setting plan properties
CLAMDDFT_Assert(clAmdFftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE));
CLAMDDFT_Assert(clAmdFftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE))
CLAMDDFT_Assert(clAmdFftSetLayout(plHandle, inLayout, outLayout))
CLAMDDFT_Assert(clAmdFftSetPlanBatchSize(plHandle, batchSize))
CLAMDDFT_Assert(clAmdFftSetPlanInStride(plHandle, dim, clStridesIn))
CLAMDDFT_Assert(clAmdFftSetPlanOutStride(plHandle, dim, clStridesOut))
CLAMDDFT_Assert(clAmdFftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim]))
float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f;
CLAMDDFT_Assert(clAmdFftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale))
// ready to bake
cl_command_queue commandQueue = (cl_command_queue)ocl::Queue::getDefault().ptr();
CLAMDDFT_Assert(clAmdFftBakePlan(plHandle, 1, &commandQueue, NULL, NULL))
}
~FftPlan()
{
// clAmdFftDestroyPlan(&plHandle);
}
friend class PlanCache;
private:
Size dft_size;
int src_step, dst_step;
bool doubleFP;
bool inplace;
int flags;
FftType fftType;
cl_context context;
clAmdFftPlanHandle plHandle;
};
public:
static PlanCache & getInstance()
{
static PlanCache planCache;
return planCache;
}
clAmdFftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP,
bool inplace, int flags, FftType fftType)
{
cl_context currentContext = (cl_context)ocl::Context2::getDefault().ptr();
for (size_t i = 0, size = planStorage.size(); i < size; i ++)
{
const FftPlan * const plan = planStorage[i];
if (plan->dft_size == dft_size &&
plan->flags == flags &&
plan->src_step == src_step &&
plan->dst_step == dst_step &&
plan->doubleFP == doubleFP &&
plan->fftType == fftType &&
plan->inplace == inplace)
{
if (plan->context != currentContext)
{
planStorage.erase(planStorage.begin() + i);
break;
}
return plan->plHandle;
}
}
// no baked plan is found, so let's create a new one
FftPlan * newPlan = new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType);
planStorage.push_back(newPlan);
return newPlan->plHandle;
}
~PlanCache()
{
for (std::vector<FftPlan *>::iterator i = planStorage.begin(), end = planStorage.end(); i != end; ++i)
delete (*i);
planStorage.clear();
}
protected:
PlanCache() :
planStorage()
{
}
std::vector<FftPlan *> planStorage;
};
extern "C" {
static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
{
UMatData * u = (UMatData *)p;
if( u && CV_XADD(&u->urefcount, -1) == 1 )
u->currAllocator->deallocate(u);
u = 0;
clReleaseEvent(e), e = 0;
}
}
static bool ocl_dft(InputArray _src, OutputArray _dst, int flags)
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
Size ssize = _src.size();
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ( (!doubleSupport && depth == CV_64F) ||
!(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) ||
_src.offset() != 0)
return false;
// if is not a multiplication of prime numbers { 2, 3, 5 }
if (ssize.area() != getOptimalDFTSize(ssize.area()))
return false;
int dst_complex_input = cn == 2 ? 1 : 0;
bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0;
int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0;
bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0;
CV_Assert(dft_complex_output + dft_real_output < 2);
FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1);
switch (fftType)
{
case C2C:
_dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2));
break;
case R2C: // TODO implement it if possible
case C2R: // TODO implement it if possible
case R2R: // AMD Fft does not support this type
default:
return false;
}
UMat src = _src.getUMat(), dst = _dst.getUMat();
bool inplace = src.u == dst.u;
clAmdFftPlanHandle plHandle = PlanCache::getInstance().
getPlanHandle(ssize, (int)src.step, (int)dst.step,
depth == CV_64F, inplace, flags, fftType);
// get the bufferSize
size_t bufferSize = 0;
CLAMDDFT_Assert(clAmdFftGetTmpBufSize(plHandle, &bufferSize))
UMat tmpBuffer(1, (int)bufferSize, CV_8UC1);
cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ);
cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW);
cl_command_queue commandQueue = (cl_command_queue)ocl::Queue::getDefault().ptr();
cl_event e = 0;
CLAMDDFT_Assert(clAmdFftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
1, &commandQueue, 0, NULL, &e,
&srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW)))
tmpBuffer.addref();
clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u);
return true;
}
#undef DFT_ASSERT
}
#endif // HAVE_CLAMDFFT
void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
{
#ifdef HAVE_CLAMDFFT
if (ocl::useOpenCL() && ocl::haveAmdFft() && _dst.isUMat() && _src0.dims() <= 2
&& nonzero_rows == 0 && ocl_dft(_src0, _dst, flags))
return;
#endif
static DFTFunc dft_tbl[6] =
{
(DFTFunc)DFT_32f,
@ -1879,9 +2135,46 @@ void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows )
dft( src, dst, flags | DFT_INVERSE, nonzero_rows );
}
namespace cv {
static bool ocl_mulSpectrums( InputArray _srcA, InputArray _srcB,
OutputArray _dst, int flags, bool conjB )
{
int atype = _srcA.type(), btype = _srcB.type();
Size asize = _srcA.size(), bsize = _srcB.size();
CV_Assert(asize == bsize);
if ( !(atype == CV_32FC2 && btype == CV_32FC2) || flags != 0 )
return false;
UMat A = _srcA.getUMat(), B = _srcB.getUMat();
CV_Assert(A.size() == B.size());
_dst.create(A.size(), atype);
UMat dst = _dst.getUMat();
ocl::Kernel k("mulAndScaleSpectrums",
ocl::core::mulspectrums_oclsrc,
format("%s", conjB ? "-D CONJ" : ""));
if (k.empty())
return false;
k.args(ocl::KernelArg::ReadOnlyNoSize(A), ocl::KernelArg::ReadOnlyNoSize(B),
ocl::KernelArg::WriteOnly(dst));
size_t globalsize[2] = { asize.width, asize.height };
return k.run(2, globalsize, NULL, false);
}
}
void cv::mulSpectrums( InputArray _srcA, InputArray _srcB,
OutputArray _dst, int flags, bool conjB )
{
if (ocl::useOpenCL() && _dst.isUMat() &&
ocl_mulSpectrums(_srcA, _srcB, _dst, flags, conjB))
return;
Mat srcA = _srcA.getMat(), srcB = _srcB.getMat();
int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type();
int rows = srcA.rows, cols = srcA.cols;

View File

@ -56,7 +56,10 @@ void MatAllocator::map(UMatData*, int) const
void MatAllocator::unmap(UMatData* u) const
{
if(u->urefcount == 0 && u->refcount == 0)
{
deallocate(u);
u = NULL;
}
}
void MatAllocator::download(UMatData* u, void* dstptr,
@ -179,7 +182,6 @@ public:
UMatData* u = new UMatData(this);
u->data = u->origdata = data;
u->size = total;
u->refcount = data0 == 0;
if(data0)
u->flags |= UMatData::USER_ALLOCATED;
@ -195,6 +197,8 @@ public:
void deallocate(UMatData* u) const
{
CV_Assert(u->urefcount >= 0);
CV_Assert(u->refcount >= 0);
if(u && u->refcount == 0)
{
if( !(u->flags & UMatData::USER_ALLOCATED) )
@ -392,6 +396,7 @@ void Mat::create(int d, const int* _sizes, int _type)
CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
}
addref();
finalizeHdr(*this);
}
@ -409,6 +414,7 @@ void Mat::deallocate()
{
if(u)
(u->currAllocator ? u->currAllocator : allocator ? allocator : getStdAllocator())->unmap(u);
u = NULL;
}
Mat::Mat(const Mat& m, const Range& _rowRange, const Range& _colRange)

View File

@ -43,6 +43,7 @@
#include <map>
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
#ifdef HAVE_OPENCL
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
@ -1423,6 +1424,83 @@ bool haveAmdBlas()
#endif
#ifdef HAVE_CLAMDFFT
class AmdFftHelper
{
public:
static AmdFftHelper & getInstance()
{
static AmdFftHelper amdFft;
return amdFft;
}
bool isAvailable() const
{
return g_isAmdFftAvailable;
}
~AmdFftHelper()
{
try
{
// clAmdFftTeardown();
}
catch (...) { }
}
protected:
AmdFftHelper()
{
if (!g_isAmdFftInitialized)
{
AutoLock lock(m);
if (!g_isAmdFftInitialized && haveOpenCL())
{
try
{
CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
g_isAmdFftAvailable = true;
}
catch (const Exception &)
{
g_isAmdFftAvailable = false;
}
}
else
g_isAmdFftAvailable = false;
g_isAmdFftInitialized = true;
}
}
private:
static clAmdFftSetupData setupData;
static Mutex m;
static bool g_isAmdFftInitialized;
static bool g_isAmdFftAvailable;
};
clAmdFftSetupData AmdFftHelper::setupData;
bool AmdFftHelper::g_isAmdFftAvailable = false;
bool AmdFftHelper::g_isAmdFftInitialized = false;
Mutex AmdFftHelper::m;
bool haveAmdFft()
{
return AmdFftHelper::getInstance().isAvailable();
}
#else
bool haveAmdFft()
{
return false;
}
#endif
void finish2()
{
Queue::getDefault().finish();
@ -1524,6 +1602,7 @@ struct Device::Impl
Impl(void* d)
{
handle = (cl_device_id)d;
refcount = 1;
}
template<typename _TpCL, typename _TpOut>
@ -2784,8 +2863,6 @@ public:
UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step, int flags) const
{
UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags);
u->urefcount = 1;
u->refcount = 0;
return u;
}
@ -2827,7 +2904,6 @@ public:
u->data = 0;
u->size = total;
u->handle = handle;
u->urefcount = 1;
u->flags = flags0;
return u;
@ -2866,7 +2942,6 @@ public:
}
if(accessFlags & ACCESS_WRITE)
u->markHostCopyObsolete(true);
CV_XADD(&u->urefcount, 1);
return true;
}
@ -2905,6 +2980,9 @@ public:
if(!u)
return;
CV_Assert(u->urefcount >= 0);
CV_Assert(u->refcount >= 0);
// TODO: !!! when we add Shared Virtual Memory Support,
// this function (as well as the others) should be corrected
CV_Assert(u->handle != 0 && u->urefcount == 0);

View File

@ -136,8 +136,12 @@
#elif defined OP_MUL_SCALE
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT scale
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * srcelem2 * scale)
#ifdef UNARY_OP
#define EXTRA_PARAMS , workT srcelem2, scaleT scale
#else
#define EXTRA_PARAMS , scaleT scale
#endif
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * scale * srcelem2)
#elif defined OP_DIV
#define PROCESS_ELEM \
@ -146,21 +150,36 @@
#elif defined OP_DIV_SCALE
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT scale
#ifdef UNARY_OP
#define EXTRA_PARAMS , workT srcelem2, scaleT scale
#else
#define EXTRA_PARAMS , scaleT scale
#endif
#define PROCESS_ELEM \
workT e2 = srcelem2, zero = (workT)(0); \
dstelem = convertToDT(e2 != zero ? srcelem1 * scale / e2 : zero)
dstelem = convertToDT(e2 == zero ? zero : (srcelem1 * (workT)(scale) / e2))
#elif defined OP_RDIV_SCALE
#undef EXTRA_PARAMS
#ifdef UNARY_OP
#define EXTRA_PARAMS , workT srcelem2, scaleT scale
#else
#define EXTRA_PARAMS , scaleT scale
#endif
#define PROCESS_ELEM \
workT e1 = srcelem1, zero = (workT)(0); \
dstelem = convertToDT(e1 == zero ? zero : (srcelem2 * (workT)(scale) / e1))
#elif defined OP_RECIP_SCALE
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT scale
#define EXTRA_PARAMS , scaleT scale
#define PROCESS_ELEM \
workT e1 = srcelem1, zero = (workT)(0); \
dstelem = convertToDT(e1 != zero ? scale / e1 : zero)
#elif defined OP_ADDW
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT alpha, workT beta, workT gamma
#define EXTRA_PARAMS , scaleT alpha, scaleT beta, scaleT gamma
#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + srcelem2*beta + gamma)
#elif defined OP_MAG
@ -260,7 +279,8 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
#undef srcelem2
#if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \
defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \
defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW
defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \
defined OP_MUL || defined OP_DIV
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT srcelem2
#endif

View File

@ -41,6 +41,52 @@
//
//M*/
#ifdef COPY_TO_MASK
#define DEFINE_DATA \
int src_index = mad24(y, src_step, x*(int)sizeof(T)*scn + src_offset); \
int dst_index = mad24(y, dst_step, x*(int)sizeof(T)*scn + dst_offset); \
\
__global const T * src = (__global const T *)(srcptr + src_index); \
__global T * dst = (__global T *)(dstptr + dst_index)
__kernel void copyToMask(__global const uchar * srcptr, int src_step, int src_offset,
__global const uchar * maskptr, int mask_step, int mask_offset,
__global uchar * dstptr, int dst_step, int dst_offset,
int dst_rows, int dst_cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < dst_cols && y < dst_rows)
{
int mask_index = mad24(y, mask_step, x * mcn + mask_offset);
__global const uchar * mask = (__global const uchar *)(maskptr + mask_index);
#if mcn == 1
if (mask[0])
{
DEFINE_DATA;
#pragma unroll
for (int c = 0; c < scn; ++c)
dst[c] = src[c];
}
#elif scn == mcn
DEFINE_DATA;
#pragma unroll
for (int c = 0; c < scn; ++c)
if (mask[c])
dst[c] = src[c];
#else
#error "(mcn == 1 || mcn == scn) should be true"
#endif
}
}
#else
__kernel void setMask(__global const uchar* mask, int maskstep, int maskoffset,
__global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols, dstT value )
@ -71,3 +117,5 @@ __kernel void set(__global uchar* dstptr, int dststep, int dstoffset,
*(__global dstT*)(dstptr + dst_index) = value;
}
}
#endif

View File

@ -0,0 +1,81 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the uintel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business uinterruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
inline float2 cmulf(float2 a, float2 b)
{
return (float2)(a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x);
}
inline float2 conjf(float2 a)
{
return (float2)(a.x, - a.y);
}
__kernel void mulAndScaleSpectrums(__global const uchar * src1ptr, int src1_step, int src1_offset,
__global const uchar * src2ptr, int src2_step, int src2_offset,
__global uchar * dstptr, int dst_step, int dst_offset,
int dst_rows, int dst_cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < dst_cols && y < dst_rows)
{
int src1_index = mad24(y, src1_step, x * (int)sizeof(float2) + src1_offset);
int src2_index = mad24(y, src2_step, x * (int)sizeof(float2) + src2_offset);
int dst_index = mad24(y, dst_step, x * (int)sizeof(float2) + dst_offset);
float2 src0 = *(__global const float2 *)(src1ptr + src1_index);
float2 src1 = *(__global const float2 *)(src2ptr + src2_index);
__global float2 * dst = (__global float2 *)(dstptr + dst_index);
#ifdef CONJ
float2 v = cmulf(src0, conjf(src1));
#else
float2 v = cmulf(src0, src1);
#endif
dst[0] = v;
}
}

View File

@ -33,7 +33,7 @@ enum OPENCLAMDFFT_FN_ID {
OPENCLAMDFFT_FN_clAmdFftSetPlanInStride = 23,
// OPENCLAMDFFT_FN_clAmdFftSetPlanLength = 24,
OPENCLAMDFFT_FN_clAmdFftSetPlanOutStride = 25,
// OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision = 26,
OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision = 26,
OPENCLAMDFFT_FN_clAmdFftSetPlanScale = 27,
// OPENCLAMDFFT_FN_clAmdFftSetPlanTransposeResult = 28,
OPENCLAMDFFT_FN_clAmdFftSetResultLocation = 29,
@ -334,9 +334,9 @@ clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle, const clAmdFftDim
openclamdfft_fn3<OPENCLAMDFFT_FN_clAmdFftSetPlanOutStride, clAmdFftStatus, clAmdFftPlanHandle, const clAmdFftDim, size_t*>::switch_fn;
static const struct DynamicFnEntry clAmdFftSetPlanOutStride_definition = { "clAmdFftSetPlanOutStride", (void**)&clAmdFftSetPlanOutStride};
//clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle, clAmdFftPrecision) =
// openclamdfft_fn2<OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision, clAmdFftStatus, clAmdFftPlanHandle, clAmdFftPrecision>::switch_fn;
//static const struct DynamicFnEntry clAmdFftSetPlanPrecision_definition = { "clAmdFftSetPlanPrecision", (void**)&clAmdFftSetPlanPrecision};
clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle, clAmdFftPrecision) =
openclamdfft_fn2<OPENCLAMDFFT_FN_clAmdFftSetPlanPrecision, clAmdFftStatus, clAmdFftPlanHandle, clAmdFftPrecision>::switch_fn;
static const struct DynamicFnEntry clAmdFftSetPlanPrecision_definition = { "clAmdFftSetPlanPrecision", (void**)&clAmdFftSetPlanPrecision};
clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle, clAmdFftDirection, cl_float) =
openclamdfft_fn3<OPENCLAMDFFT_FN_clAmdFftSetPlanScale, clAmdFftStatus, clAmdFftPlanHandle, clAmdFftDirection, cl_float>::switch_fn;
@ -387,7 +387,7 @@ static const struct DynamicFnEntry* openclamdfft_fn[] = {
&clAmdFftSetPlanInStride_definition,
NULL/*&clAmdFftSetPlanLength_definition*/,
&clAmdFftSetPlanOutStride_definition,
NULL/*&clAmdFftSetPlanPrecision_definition*/,
&clAmdFftSetPlanPrecision_definition,
&clAmdFftSetPlanScale_definition,
NULL/*&clAmdFftSetPlanTransposeResult_definition*/,
&clAmdFftSetResultLocation_definition,
@ -396,4 +396,4 @@ static const struct DynamicFnEntry* openclamdfft_fn[] = {
ADDITIONAL_FN_DEFINITIONS // macro for custom functions
};
// number of enabled functions: 14
// number of enabled functions: 15

View File

@ -5,7 +5,7 @@ clAmdFftDestroyPlan
clAmdFftEnqueueTransform
//clAmdFftGetLayout
//clAmdFftGetPlanBatchSize
//clAmdFftGetPlanContext
clAmdFftGetPlanContext
//clAmdFftGetPlanDim
//clAmdFftGetPlanDistance
//clAmdFftGetPlanInStride
@ -22,9 +22,9 @@ clAmdFftSetPlanBatchSize
//clAmdFftSetPlanDim
clAmdFftSetPlanDistance
clAmdFftSetPlanInStride
//clAmdFftSetPlanLength
clAmdFftSetPlanLength
clAmdFftSetPlanOutStride
//clAmdFftSetPlanPrecision
clAmdFftSetPlanPrecision
clAmdFftSetPlanScale
//clAmdFftSetPlanTransposeResult
clAmdFftSetResultLocation

View File

@ -107,7 +107,7 @@ std::wstring GetTempPathWinRT()
if (FAILED(WindowsCreateStringReference(RuntimeClass_Windows_Storage_ApplicationData,
(UINT32)wcslen(RuntimeClass_Windows_Storage_ApplicationData), &hstrHead, &str)))
return wstr;
if (FAILED(RoGetActivationFactory(str, IID_PPV_ARGS(appdataFactory.ReleaseAndGetAddressOf()))))
if (FAILED(Windows::Foundation::GetActivationFactory(str, appdataFactory.ReleaseAndGetAddressOf())))
return wstr;
if (FAILED(appdataFactory->get_Current(appdataRef.ReleaseAndGetAddressOf())))
return wstr;

View File

@ -217,6 +217,7 @@ UMat Mat::getUMat(int accessFlags) const
if(!a)
a = a0;
temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags);
temp_u->refcount = 1;
}
UMat::getStdAllocator()->allocate(temp_u, accessFlags);
hdr.flags = flags;
@ -224,6 +225,7 @@ UMat Mat::getUMat(int accessFlags) const
finalizeHdr(hdr);
hdr.u = temp_u;
hdr.offset = data - datastart;
hdr.addref();
return hdr;
}
@ -271,6 +273,7 @@ void UMat::create(int d, const int* _sizes, int _type)
}
finalizeHdr(*this);
addref();
}
void UMat::copySize(const UMat& m)
@ -294,6 +297,7 @@ UMat::~UMat()
void UMat::deallocate()
{
u->currAllocator->deallocate(u);
u = NULL;
}
@ -657,6 +661,45 @@ void UMat::copyTo(OutputArray _dst) const
}
}
void UMat::copyTo(OutputArray _dst, InputArray _mask) const
{
if( _mask.empty() )
{
copyTo(_dst);
return;
}
int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype);
CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) );
if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2)
{
UMatData * prevu = _dst.getUMat().u;
_dst.create( dims, size, type() );
UMat dst = _dst.getUMat();
if( prevu != dst.u ) // do not leave dst uninitialized
dst = Scalar(0);
ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc,
format("-D COPY_TO_MASK -D T=%s -D scn=%d -D mcn=%d",
ocl::memopTypeToStr(depth()), cn, mcn));
if (!k.empty())
{
k.args(ocl::KernelArg::ReadOnlyNoSize(*this), ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()),
ocl::KernelArg::WriteOnly(dst));
size_t globalsize[2] = { cols, rows };
if (k.run(2, globalsize, NULL, false))
return;
}
}
Mat src = getMat(ACCESS_READ);
src.copyTo(_dst, _mask);
}
void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) const
{
bool noScale = std::fabs(alpha - 1) < DBL_EPSILON && std::fabs(beta) < DBL_EPSILON;

View File

@ -293,7 +293,7 @@ OCL_TEST_P(Mul, Mat)
}
}
OCL_TEST_P(Mul, DISABLED_Scalar)
OCL_TEST_P(Mul, Scalar)
{
for (int j = 0; j < test_loop_times; j++)
{
@ -306,7 +306,7 @@ OCL_TEST_P(Mul, DISABLED_Scalar)
}
}
OCL_TEST_P(Mul, DISABLED_Mat_Scale)
OCL_TEST_P(Mul, Mat_Scale)
{
for (int j = 0; j < test_loop_times; j++)
{
@ -319,6 +319,20 @@ OCL_TEST_P(Mul, DISABLED_Mat_Scale)
}
}
OCL_TEST_P(Mul, Mat_Scalar_Scale)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::multiply(src1_roi, val, dst1_roi, val[0]));
OCL_ON(cv::multiply(usrc1_roi, val, udst1_roi, val[0]));
Near(udst1_roi.depth() >= CV_32F ? 1e-2 : 1);
}
}
//////////////////////////////// Div /////////////////////////////////////////////////
typedef ArithmTestBase Div;
@ -335,7 +349,7 @@ OCL_TEST_P(Div, Mat)
}
}
OCL_TEST_P(Div, DISABLED_Scalar)
OCL_TEST_P(Div, Scalar)
{
for (int j = 0; j < test_loop_times; j++)
{
@ -348,6 +362,19 @@ OCL_TEST_P(Div, DISABLED_Scalar)
}
}
OCL_TEST_P(Div, Scalar2)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::divide(src1_roi, val, dst1_roi));
OCL_ON(cv::divide(usrc1_roi, val, udst1_roi));
Near(udst1_roi.depth() >= CV_32F ? 1e-3 : 1);
}
}
OCL_TEST_P(Div, Mat_Scale)
{
for (int j = 0; j < test_loop_times; j++)
@ -361,8 +388,7 @@ OCL_TEST_P(Div, Mat_Scale)
}
}
OCL_TEST_P(Div, DISABLED_Mat_Scalar_Scale)
OCL_TEST_P(Div, Mat_Scalar_Scale)
{
for (int j = 0; j < test_loop_times; j++)
{
@ -375,6 +401,19 @@ OCL_TEST_P(Div, DISABLED_Mat_Scalar_Scale)
}
}
OCL_TEST_P(Div, Recip)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::divide(val[0], src1_roi, dst1_roi));
OCL_ON(cv::divide(val[0], usrc1_roi, udst1_roi));
Near(udst1_roi.depth() >= CV_32F ? 1e-3 : 1);
}
}
//////////////////////////////// Min/Max /////////////////////////////////////////////////
typedef ArithmTestBase Min;
@ -1180,6 +1219,28 @@ OCL_TEST_P(Sqrt, Mat)
}
}
//////////////////////////////// Normalize ////////////////////////////////////////////////
typedef ArithmTestBase Normalize;
OCL_TEST_P(Normalize, Mat)
{
static int modes[] = { CV_MINMAX, CV_L2, CV_L1, CV_C };
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
for (int i = 0, size = sizeof(modes) / sizeof(modes[0]); i < size; ++i)
{
OCL_OFF(cv::normalize(src1_roi, dst1_roi, 10, 110, modes[i], src1_roi.type(), mask_roi));
OCL_ON(cv::normalize(usrc1_roi, udst1_roi, 10, 110, modes[i], src1_roi.type(), umask_roi));
Near(1);
}
}
}
//////////////////////////////////////// Instantiation /////////////////////////////////////////
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
@ -1214,6 +1275,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, MinMaxIdx, Combine(OCL_ALL_DEPTHS, OCL_ALL_C
OCL_INSTANTIATE_TEST_CASE_P(Arithm, MinMaxIdx_Mask, Combine(OCL_ALL_DEPTHS, ::testing::Values(Channels(1)), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool()));
} } // namespace cvtest::ocl

View File

@ -0,0 +1,164 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE(Dft, cv::Size, MatDepth, bool, bool, bool, bool)
{
cv::Size dft_size;
int dft_flags, depth;
bool inplace;
TEST_DECLARE_INPUT_PARAMETER(src)
TEST_DECLARE_OUTPUT_PARAMETER(dst)
virtual void SetUp()
{
dft_size = GET_PARAM(0);
depth = GET_PARAM(1);
inplace = GET_PARAM(2);
dft_flags = 0;
if (GET_PARAM(3))
dft_flags |= cv::DFT_ROWS;
if (GET_PARAM(4))
dft_flags |= cv::DFT_SCALE;
if (GET_PARAM(5))
dft_flags |= cv::DFT_INVERSE;
}
void generateTestData(int cn = 2)
{
src = randomMat(dft_size, CV_MAKE_TYPE(depth, cn), 0.0, 100.0);
usrc = src.getUMat(ACCESS_READ);
if (inplace)
dst = src, udst = usrc;
}
};
OCL_TEST_P(Dft, C2C)
{
generateTestData();
OCL_OFF(cv::dft(src, dst, dft_flags | cv::DFT_COMPLEX_OUTPUT));
OCL_ON(cv::dft(usrc, udst, dft_flags | cv::DFT_COMPLEX_OUTPUT));
double eps = src.size().area() * 1e-4;
EXPECT_MAT_NEAR(dst, udst, eps);
}
////////////////////////////////////////////////////////////////////////////
// MulSpectrums
PARAM_TEST_CASE(MulSpectrums, bool, bool)
{
bool ccorr, useRoi;
TEST_DECLARE_INPUT_PARAMETER(src1)
TEST_DECLARE_INPUT_PARAMETER(src2)
TEST_DECLARE_OUTPUT_PARAMETER(dst)
virtual void SetUp()
{
ccorr = GET_PARAM(0);
useRoi = GET_PARAM(1);
}
void generateTestData()
{
Size srcRoiSize = randomSize(1, MAX_VALUE);
Border src1Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src1, src1_roi, srcRoiSize, src1Border, CV_32FC2, -11, 11);
Border src2Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src2, src2_roi, srcRoiSize, src2Border, CV_32FC2, -11, 11);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, srcRoiSize, dstBorder, CV_32FC2, 5, 16);
UMAT_UPLOAD_INPUT_PARAMETER(src1)
UMAT_UPLOAD_INPUT_PARAMETER(src2)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
}
};
OCL_TEST_P(MulSpectrums, Mat)
{
for (int i = 0; i < test_loop_times; ++i)
{
generateTestData();
OCL_OFF(cv::mulSpectrums(src1_roi, src2_roi, dst_roi, 0, ccorr));
OCL_ON(cv::mulSpectrums(usrc1_roi, usrc2_roi, udst_roi, 0, ccorr));
OCL_EXPECT_MATS_NEAR_RELATIVE(dst, 1e-6);
}
}
OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20),
cv::Size(512, 1), cv::Size(1024, 768)),
Values(CV_32F, CV_64F),
Bool(), // inplace
Bool(), // DFT_ROWS
Bool(), // DFT_SCALE
Bool()) // DFT_INVERSE
);
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

View File

@ -54,7 +54,7 @@ namespace ocl {
////////////////////////////////converto/////////////////////////////////////////////////
PARAM_TEST_CASE(MatrixTestBase, MatDepth, MatDepth, Channels, bool)
PARAM_TEST_CASE(ConvertTo, MatDepth, MatDepth, Channels, bool)
{
int src_depth, cn, dstType;
bool use_roi;
@ -85,8 +85,6 @@ PARAM_TEST_CASE(MatrixTestBase, MatDepth, MatDepth, Channels, bool)
}
};
typedef MatrixTestBase ConvertTo;
OCL_TEST_P(ConvertTo, Accuracy)
{
for (int j = 0; j < test_loop_times; j++)
@ -103,7 +101,51 @@ OCL_TEST_P(ConvertTo, Accuracy)
}
}
typedef MatrixTestBase CopyTo;
//////////////////////////////// CopyTo /////////////////////////////////////////////////
PARAM_TEST_CASE(CopyTo, MatDepth, Channels, bool, bool)
{
int depth, cn;
bool use_roi, use_mask;
TEST_DECLARE_INPUT_PARAMETER(src)
TEST_DECLARE_INPUT_PARAMETER(mask)
TEST_DECLARE_OUTPUT_PARAMETER(dst)
virtual void SetUp()
{
depth = GET_PARAM(0);
cn = GET_PARAM(1);
use_roi = GET_PARAM(2);
use_mask = GET_PARAM(3);
}
void generateTestData()
{
const int type = CV_MAKE_TYPE(depth, cn);
Size roiSize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
if (use_mask)
{
Border maskBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
int mask_cn = randomDouble(0.0, 2.0) > 1.0 ? cn : 1;
randomSubMat(mask, mask_roi, roiSize, maskBorder, CV_8UC(mask_cn), 0, 2);
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
}
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 5, 16);
UMAT_UPLOAD_INPUT_PARAMETER(src)
if (use_mask)
UMAT_UPLOAD_INPUT_PARAMETER(mask)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
}
};
OCL_TEST_P(CopyTo, Accuracy)
{
@ -111,8 +153,16 @@ OCL_TEST_P(CopyTo, Accuracy)
{
generateTestData();
OCL_OFF(src_roi.copyTo(dst_roi));
OCL_ON(usrc_roi.copyTo(udst_roi));
if (use_mask)
{
OCL_OFF(src_roi.copyTo(dst_roi, mask_roi));
OCL_ON(usrc_roi.copyTo(udst_roi, umask_roi));
}
else
{
OCL_OFF(src_roi.copyTo(dst_roi));
OCL_ON(usrc_roi.copyTo(udst_roi));
}
OCL_EXPECT_MATS_NEAR(dst, 0);
}
@ -122,7 +172,7 @@ OCL_INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
OCL_ALL_DEPTHS, OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
OCL_ALL_DEPTHS, Values((MatDepth)0), OCL_ALL_CHANNELS, Bool()));
OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
} } // namespace cvtest::ocl

View File

@ -1076,6 +1076,11 @@ CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth,
bool normalize = true,
int borderType = BORDER_DEFAULT );
CV_EXPORTS_W void sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth,
Size ksize, Point anchor = Point(-1, -1),
bool normalize = true,
int borderType = BORDER_DEFAULT );
//! a synonym for normalized box filter
CV_EXPORTS_W void blur( InputArray src, OutputArray dst,
Size ksize, Point anchor = Point(-1,-1),

View File

@ -472,7 +472,7 @@ void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
#endif
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
if(dx < 2 && dy < 2 && src.channels() == 1 && borderType == 1)
if(dx < 2 && dy < 2 && _src.channels() == 1 && borderType == 1)
{
Mat src = _src.getMat(), dst = _dst.getMat();
if(IPPDerivScharr(src, dst, ddepth, dx, dy, scale))

View File

@ -0,0 +1,202 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Dachuan Zhao, dachuan@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#define noconvert
inline int idx_row_low(int y, int last_row)
{
return abs(y) % (last_row + 1);
}
inline int idx_row_high(int y, int last_row)
{
return abs(last_row - (int)abs(last_row - y)) % (last_row + 1);
}
inline int idx_row(int y, int last_row)
{
return idx_row_low(idx_row_high(y, last_row), last_row);
}
inline int idx_col_low(int x, int last_col)
{
return abs(x) % (last_col + 1);
}
inline int idx_col_high(int x, int last_col)
{
return abs(last_col - (int)abs(last_col - x)) % (last_col + 1);
}
inline int idx_col(int x, int last_col)
{
return idx_col_low(idx_col_high(x, last_col), last_col);
}
__kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
{
const int x = get_global_id(0);
const int y = get_group_id(1);
__local FT smem[256 + 4];
__global T * dstData = (__global T *)(dst + dst_offset);
__global const uchar * srcData = (__global const uchar*)(src + src_offset);
FT sum;
FT co1 = 0.375f;
FT co2 = 0.25f;
FT co3 = 0.0625f;
const int src_y = 2*y;
const int last_row = src_rows - 1;
const int last_col = src_cols - 1;
if (src_y >= 2 && src_y < src_rows - 2 && x >= 2 && x < src_cols - 2)
{
sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * src_step))[x]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * src_step))[x]);
sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * src_step))[x]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * src_step))[x]);
sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * src_step))[x]);
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * src_step))[left_x]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * src_step))[left_x]);
sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * src_step))[left_x]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * src_step))[left_x]);
sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * src_step))[left_x]);
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * src_step))[right_x]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * src_step))[right_x]);
sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * src_step))[right_x]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * src_step))[right_x]);
sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * src_step))[right_x]);
smem[4 + get_local_id(0)] = sum;
}
}
else
{
int col = idx_col(x, last_col);
sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * src_step))[col]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * src_step))[col]);
sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * src_step))[col]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * src_step))[col]);
sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * src_step))[col]);
smem[2 + get_local_id(0)] = sum;
if (get_local_id(0) < 2)
{
const int left_x = x - 2;
col = idx_col(left_x, last_col);
sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * src_step))[col]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * src_step))[col]);
sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * src_step))[col]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * src_step))[col]);
sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * src_step))[col]);
smem[get_local_id(0)] = sum;
}
if (get_local_id(0) > 253)
{
const int right_x = x + 2;
col = idx_col(right_x, last_col);
sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * src_step))[col]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * src_step))[col]);
sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * src_step))[col]);
sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * src_step))[col]);
sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * src_step))[col]);
smem[4 + get_local_id(0)] = sum;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < 128)
{
const int tid2 = get_local_id(0) * 2;
sum = co3 * smem[2 + tid2 - 2];
sum = sum + co2 * smem[2 + tid2 - 1];
sum = sum + co1 * smem[2 + tid2 ];
sum = sum + co2 * smem[2 + tid2 + 1];
sum = sum + co3 * smem[2 + tid2 + 2];
const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2;
if (dst_x < dst_cols)
dstData[y * dst_step / ((int)sizeof(T)) + dst_x] = convertToT(sum);
}
}

View File

@ -0,0 +1,159 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Zhang Chunpeng chunpeng@multicorewareinc.com
// Dachuan Zhao, dachuan@multicorewareinc.com
// Yao Wang, yao@multicorewareinc.com
// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
///////////////////////////////////////////////////////////////////////
//////////////////////// Generic PyrUp //////////////////////////////
///////////////////////////////////////////////////////////////////////
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#define noconvert
__kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int lsizex = get_local_size(0);
const int lsizey = get_local_size(1);
const int tidx = get_local_id(0);
const int tidy = get_local_id(1);
__local FT s_srcPatch[10][10];
__local FT s_dstPatch[20][16];
__global T * dstData = (__global T *)(dst + dst_offset);
__global const T * srcData = (__global const T *)(src + src_offset);
if( tidx < 10 && tidy < 10 )
{
int srcx = mad24((int)get_group_id(0), lsizex>>1, tidx) - 1;
int srcy = mad24((int)get_group_id(1), lsizey>>1, tidy) - 1;
srcx = abs(srcx);
srcx = min(src_cols - 1, srcx);
srcy = abs(srcy);
srcy = min(src_rows - 1, srcy);
s_srcPatch[tidy][tidx] = convertToFT(srcData[srcx + srcy * src_step / (int) sizeof(T)]);
}
barrier(CLK_LOCAL_MEM_FENCE);
FT sum = 0.f;
const FT evenFlag = (FT)((tidx & 1) == 0);
const FT oddFlag = (FT)((tidx & 1) != 0);
const bool eveny = ((tidy & 1) == 0);
const FT co1 = 0.375f;
const FT co2 = 0.25f;
const FT co3 = 0.0625f;
if(eveny)
{
sum = ( evenFlag* co3 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx - 1) >> 1)];
sum = sum + ( evenFlag* co1 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx + 1) >> 1)];
sum = sum + ( evenFlag* co3 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[2 + tidy][tidx] = sum;
if (tidy < 2)
{
sum = 0;
if (eveny)
{
sum = (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * co1 ) * s_srcPatch[lsizey-16][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[tidy][tidx] = sum;
}
if (tidy > 13)
{
sum = 0;
if (eveny)
{
sum = (evenFlag * co3) * s_srcPatch[lsizey-7][1 + ((tidx - 2) >> 1)];
sum = sum + ( oddFlag * co2) * s_srcPatch[lsizey-7][1 + ((tidx - 1) >> 1)];
sum = sum + (evenFlag * co1) * s_srcPatch[lsizey-7][1 + ((tidx ) >> 1)];
sum = sum + ( oddFlag * co2) * s_srcPatch[lsizey-7][1 + ((tidx + 1) >> 1)];
sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-7][1 + ((tidx + 2) >> 1)];
}
s_dstPatch[4 + tidy][tidx] = sum;
}
barrier(CLK_LOCAL_MEM_FENCE);
sum = co3 * s_dstPatch[2 + tidy - 2][tidx];
sum = sum + co2 * s_dstPatch[2 + tidy - 1][tidx];
sum = sum + co1 * s_dstPatch[2 + tidy ][tidx];
sum = sum + co2 * s_dstPatch[2 + tidy + 1][tidx];
sum = sum + co3 * s_dstPatch[2 + tidy + 2][tidx];
if ((x < dst_cols) && (y < dst_rows))
dstData[x + y * dst_step / (int)sizeof(T)] = convertToT(4.0f * sum);
}

View File

@ -41,6 +41,7 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
namespace cv
{
@ -400,12 +401,99 @@ pyrUp_( const Mat& _src, Mat& _dst, int)
typedef void (*PyrFunc)(const Mat&, Mat&, int);
static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
if (((channels != 1) && (channels != 2) && (channels != 4))
|| (borderType != BORDER_DEFAULT))
return false;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ((depth == CV_64F) && !(doubleSupport))
return false;
Size ssize = _src.size();
Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz;
CV_Assert( ssize.width > 0 && ssize.height > 0 &&
std::abs(dsize.width*2 - ssize.width) <= 2 &&
std::abs(dsize.height*2 - ssize.height) <= 2 );
UMat src = _src.getUMat();
_dst.create( dsize, src.type() );
UMat dst = _dst.getUMat();
const char * const kernelName = "pyrDown";
ocl::ProgramSource2 program = ocl::imgproc::pyr_down_oclsrc;
ocl::Kernel k;
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
char cvt[2][50];
k.create(kernelName, program,
format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s",
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
size_t localThreads[2] = { 256, 1 };
size_t globalThreads[2] = { src.cols, dst.rows };
return k.run(2, globalThreads, localThreads, false);
}
static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
if (((channels != 1) && (channels != 2) && (channels != 4))
|| (borderType != BORDER_DEFAULT))
return false;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if ((depth == CV_64F) && !(doubleSupport))
return false;
Size ssize = _src.size();
if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2)))
return false;
UMat src = _src.getUMat();
Size dsize = Size(ssize.width * 2, ssize.height * 2);
_dst.create( dsize, src.type() );
UMat dst = _dst.getUMat();
const char * const kernelName = "pyrUp";
ocl::ProgramSource2 program = ocl::imgproc::pyr_up_oclsrc;
ocl::Kernel k;
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
char cvt[2][50];
k.create(kernelName, program,
format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s",
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
size_t globalThreads[2] = {dst.cols, dst.rows};
size_t localThreads[2] = {16, 16};
return k.run(2, globalThreads, localThreads, false);
}
}
void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType )
{
if (ocl::useOpenCL() && _dst.isUMat() &&
ocl_pyrDown(_src, _dst, _dsz, borderType))
return;
Mat src = _src.getMat();
Size dsz = _dsz == Size() ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz;
Size dsz = _dsz.area() == 0 ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz;
_dst.create( dsz, src.type() );
Mat dst = _dst.getMat();
@ -434,8 +522,12 @@ void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borde
void cv::pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType )
{
if (ocl::useOpenCL() && _dst.isUMat() &&
ocl_pyrUp(_src, _dst, _dsz, borderType))
return;
Mat src = _src.getMat();
Size dsz = _dsz == Size() ? Size(src.cols*2, src.rows*2) : _dsz;
Size dsz = _dsz.area() == 0 ? Size(src.cols*2, src.rows*2) : _dsz;
_dst.create( dsz, src.type() );
Mat dst = _dst.getMat();

View File

@ -895,6 +895,114 @@ void cv::blur( InputArray src, OutputArray dst,
boxFilter( src, dst, -1, ksize, anchor, true, borderType );
}
/****************************************************************************************\
Squared Box Filter
\****************************************************************************************/
namespace cv
{
template<typename T, typename ST> struct SqrRowSum : public BaseRowFilter
{
SqrRowSum( int _ksize, int _anchor )
{
ksize = _ksize;
anchor = _anchor;
}
void operator()(const uchar* src, uchar* dst, int width, int cn)
{
const T* S = (const T*)src;
ST* D = (ST*)dst;
int i = 0, k, ksz_cn = ksize*cn;
width = (width - 1)*cn;
for( k = 0; k < cn; k++, S++, D++ )
{
ST s = 0;
for( i = 0; i < ksz_cn; i += cn )
{
ST val = (ST)S[i];
s += val*val;
}
D[0] = s;
for( i = 0; i < width; i += cn )
{
ST val0 = (ST)S[i], val1 = (ST)S[i + ksz_cn];
s += val1*val1 - val0*val0;
D[i+cn] = s;
}
}
}
};
static Ptr<BaseRowFilter> getSqrRowSumFilter(int srcType, int sumType, int ksize, int anchor)
{
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(sumType);
CV_Assert( CV_MAT_CN(sumType) == CV_MAT_CN(srcType) );
if( anchor < 0 )
anchor = ksize/2;
if( sdepth == CV_8U && ddepth == CV_32S )
return makePtr<SqrRowSum<uchar, int> >(ksize, anchor);
if( sdepth == CV_8U && ddepth == CV_64F )
return makePtr<SqrRowSum<uchar, double> >(ksize, anchor);
if( sdepth == CV_16U && ddepth == CV_64F )
return makePtr<SqrRowSum<ushort, double> >(ksize, anchor);
if( sdepth == CV_16S && ddepth == CV_64F )
return makePtr<SqrRowSum<short, double> >(ksize, anchor);
if( sdepth == CV_32F && ddepth == CV_64F )
return makePtr<SqrRowSum<float, double> >(ksize, anchor);
if( sdepth == CV_64F && ddepth == CV_64F )
return makePtr<SqrRowSum<double, double> >(ksize, anchor);
CV_Error_( CV_StsNotImplemented,
("Unsupported combination of source format (=%d), and buffer format (=%d)",
srcType, sumType));
return Ptr<BaseRowFilter>();
}
}
void cv::sqrBoxFilter( InputArray _src, OutputArray _dst, int ddepth,
Size ksize, Point anchor,
bool normalize, int borderType )
{
Mat src = _src.getMat();
int sdepth = src.depth(), cn = src.channels();
if( ddepth < 0 )
ddepth = sdepth < CV_32F ? CV_32F : CV_64F;
_dst.create( src.size(), CV_MAKETYPE(ddepth, cn) );
Mat dst = _dst.getMat();
if( borderType != BORDER_CONSTANT && normalize )
{
if( src.rows == 1 )
ksize.height = 1;
if( src.cols == 1 )
ksize.width = 1;
}
int sumType = CV_64F;
if( sdepth == CV_8U )
sumType = CV_32S;
sumType = CV_MAKETYPE( sumType, cn );
int srcType = CV_MAKETYPE(sdepth, cn);
int dstType = CV_MAKETYPE(ddepth, cn);
Ptr<BaseRowFilter> rowFilter = getSqrRowSumFilter(srcType, sumType, ksize.width, anchor.x );
Ptr<BaseColumnFilter> columnFilter = getColumnSumFilter(sumType,
dstType, ksize.height, anchor.y,
normalize ? 1./(ksize.width*ksize.height) : 1);
Ptr<FilterEngine> f = makePtr<FilterEngine>(Ptr<BaseFilter>(), rowFilter, columnFilter,
srcType, dstType, sumType, borderType );
f->apply( src, dst );
}
/****************************************************************************************\
Gaussian Blur
\****************************************************************************************/

View File

@ -350,52 +350,6 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
}
}
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
if( ( depth == CV_8U ) && ( !_tilted.needed() ) )
{
if( sdepth == CV_32F )
{
if( cn == 1 )
{
IppiSize srcRoiSize = ippiSize( src.cols, src.rows );
_sum.create( isize, CV_MAKETYPE( sdepth, cn ) );
sum = _sum.getMat();
if( _sqsum.needed() && sqdepth == CV_64F )
{
_sqsum.create( isize, CV_MAKETYPE( sqdepth, cn ) );
sqsum = _sqsum.getMat();
ippiSqrIntegral_8u32f64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 );
}
else
{
ippiIntegral_8u32f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, srcRoiSize, 0 );
}
return;
}
}
if( sdepth == CV_32S )
{
if( cn == 1 )
{
IppiSize srcRoiSize = ippiSize( src.cols, src.rows );
_sum.create( isize, CV_MAKETYPE( sdepth, cn ) );
sum = _sum.getMat();
if( _sqsum.needed() && sqdepth == CV_64F )
{
_sqsum.create( isize, CV_MAKETYPE( sqdepth, cn ) );
sqsum = _sqsum.getMat();
ippiSqrIntegral_8u32s64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 );
}
else
{
ippiIntegral_8u32s_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, srcRoiSize, 0 );
}
return;
}
}
}
#endif
Size ssize = _src.size(), isize(ssize.width + 1, ssize.height + 1);
_sum.create( isize, CV_MAKETYPE(sdepth, cn) );
Mat src = _src.getMat(), sum =_sum.getMat(), sqsum, tilted;
@ -404,7 +358,37 @@ void cv::integral( InputArray _src, OutputArray _sum, OutputArray _sqsum, Output
{
_sqsum.create( isize, CV_MAKETYPE(sqdepth, cn) );
sqsum = _sqsum.getMat();
};
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
if( ( depth == CV_8U ) && ( sdepth == CV_32F || sdepth == CV_32S ) && ( !_tilted.needed() ) && ( !_sqsum.needed() || sqdepth == CV_64F ) && ( cn == 1 ) )
{
IppiSize srcRoiSize = ippiSize( src.cols, src.rows );
if( sdepth == CV_32F )
{
if( _sqsum.needed() )
{
ippiSqrIntegral_8u32f64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 );
}
else
{
ippiIntegral_8u32f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32f*)sum.data, (int)sum.step, srcRoiSize, 0 );
}
}
else if( sdepth == CV_32S )
{
if( _sqsum.needed() )
{
ippiSqrIntegral_8u32s64f_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, (Ipp64f*)sqsum.data, (int)sqsum.step, srcRoiSize, 0, 0 );
}
else
{
ippiIntegral_8u32s_C1R( (const Ipp8u*)src.data, (int)src.step, (Ipp32s*)sum.data, (int)sum.step, srcRoiSize, 0 );
}
}
return;
}
#endif
if( _tilted.needed() )
{

View File

@ -0,0 +1,142 @@
///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Yao Wang yao@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, bool)
{
int depth, channels;
bool use_roi;
TEST_DECLARE_INPUT_PARAMETER(src)
TEST_DECLARE_OUTPUT_PARAMETER(dst)
virtual void SetUp()
{
depth = GET_PARAM(0);
channels = GET_PARAM(1);
use_roi = GET_PARAM(2);
}
void generateTestData(Size src_roiSize, Size dst_roiSize)
{
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, src_roiSize, srcBorder, CV_MAKETYPE(depth, channels), -MAX_VALUE, MAX_VALUE);
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, dst_roiSize, dstBorder, CV_MAKETYPE(depth, channels), -MAX_VALUE, MAX_VALUE);
UMAT_UPLOAD_INPUT_PARAMETER(src)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
}
void Near(double threshold = 0.0)
{
OCL_EXPECT_MATS_NEAR(dst, threshold);
}
};
/////////////////////// PyrDown //////////////////////////
typedef PyrTestBase PyrDown;
OCL_TEST_P(PyrDown, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
Size src_roiSize = randomSize(1, MAX_VALUE);
Size dst_roiSize = Size(randomInt((src_roiSize.width - 1) / 2, (src_roiSize.width + 3) / 2),
randomInt((src_roiSize.height - 1) / 2, (src_roiSize.height + 3) / 2));
dst_roiSize = dst_roiSize.area() == 0 ? Size((src_roiSize.width + 1) / 2, (src_roiSize.height + 1) / 2) : dst_roiSize;
generateTestData(src_roiSize, dst_roiSize);
OCL_OFF(pyrDown(src_roi, dst_roi, dst_roiSize));
OCL_ON(pyrDown(usrc_roi, udst_roi, dst_roiSize));
Near(depth == CV_32F ? 1e-4f : 1.0f);
}
}
OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrDown, Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
Values(1, 2, 4),
Bool()
));
/////////////////////// PyrUp //////////////////////////
typedef PyrTestBase PyrUp;
OCL_TEST_P(PyrUp, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
Size src_roiSize = randomSize(1, MAX_VALUE);
Size dst_roiSize = Size(2 * src_roiSize.width, 2 * src_roiSize.height);
generateTestData(src_roiSize, dst_roiSize);
OCL_OFF(pyrUp(src_roi, dst_roi, dst_roiSize));
OCL_ON(pyrUp(usrc_roi, udst_roi, dst_roiSize));
Near(depth == CV_32F ? 1e-4f : 1.0f);
}
}
OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrUp, Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
Values(1, 2, 4),
Bool()
));
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

View File

@ -86,12 +86,14 @@ FeatureEvaluator::setImage
------------------------------
Assigns an image to feature evaluator.
.. ocv:function:: bool FeatureEvaluator::setImage(const Mat& img, Size origWinSize)
.. ocv:function:: bool FeatureEvaluator::setImage(InputArray img, Size origWinSize, Size sumSize)
:param img: Matrix of the type ``CV_8UC1`` containing an image where the features are computed.
:param img: Matrix of the type ``CV_8UC1`` containing an image where the features are computed.
:param origWinSize: Size of training images.
:param sumSize: The requested size of integral images (so if the integral image is smaller, it resides in the top-left corner of the larger image of requested size). Because the features are represented using offsets from the image origin, using the same sumSize for all scales helps to avoid constant readjustments of the features to different scales.
The method assigns an image, where the features will be computed, to the feature evaluator.

View File

@ -111,12 +111,15 @@ public:
};
CV_EXPORTS void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps = 0.2);
CV_EXPORTS_W void groupRectangles(CV_IN_OUT std::vector<Rect>& rectList, CV_OUT std::vector<int>& weights, int groupThreshold, double eps = 0.2);
CV_EXPORTS void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps, std::vector<int>* weights, std::vector<double>* levelWeights );
CV_EXPORTS_W void groupRectangles(CV_IN_OUT std::vector<Rect>& rectList, CV_OUT std::vector<int>& weights,
int groupThreshold, double eps = 0.2);
CV_EXPORTS void groupRectangles(std::vector<Rect>& rectList, int groupThreshold,
double eps, std::vector<int>* weights, std::vector<double>* levelWeights );
CV_EXPORTS void groupRectangles(std::vector<Rect>& rectList, std::vector<int>& rejectLevels,
std::vector<double>& levelWeights, int groupThreshold, double eps = 0.2);
CV_EXPORTS void groupRectangles_meanshift(std::vector<Rect>& rectList, std::vector<double>& foundWeights, std::vector<double>& foundScales,
double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
CV_EXPORTS void groupRectangles_meanshift(std::vector<Rect>& rectList, std::vector<double>& foundWeights,
std::vector<double>& foundScales,
double detectThreshold = 0.0, Size winDetSize = Size(64, 128));
class CV_EXPORTS FeatureEvaluator
{
@ -132,7 +135,7 @@ public:
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const;
virtual bool setImage(const Mat& img, Size origWinSize);
virtual bool setImage(InputArray img, Size origWinSize, Size sumSize);
virtual bool setWindow(Point p);
virtual double calcOrd(int featureIdx) const;
@ -232,6 +235,8 @@ public:
CV_WRAP int getFeatureType() const;
void* getOldCascade();
CV_WRAP static bool convert(const String& oldcascade, const String& newcascade);
void setMaskGenerator(const Ptr<BaseCascadeClassifier::MaskGenerator>& maskGenerator);
Ptr<BaseCascadeClassifier::MaskGenerator> getMaskGenerator();

View File

@ -7,10 +7,10 @@
// copy or use the software.
//
//
// Intel License Agreement
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000, Intel Corporation, all rights reserved.
// Copyright (C) 2008-2013, Itseez Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
@ -23,13 +23,13 @@
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// * The name of Itseez Inc. may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// In no event shall the copyright holders or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
@ -44,6 +44,7 @@
#include "cascadedetect.hpp"
#include "opencv2/objdetect/objdetect_c.h"
#include "opencl_kernels.hpp"
#if defined (LOG_CASCADE_STATISTIC)
struct Logger
@ -113,6 +114,13 @@ struct Logger
namespace cv
{
template<typename _Tp> void copyVectorToUMat(const std::vector<_Tp>& v, UMat& um)
{
if(v.empty())
um.release();
Mat(1, (int)(v.size()*sizeof(v[0])), CV_8U, (void*)&v[0]).copyTo(um);
}
void groupRectangles(std::vector<Rect>& rectList, int groupThreshold, double eps, std::vector<int>* weights, std::vector<double>* levelWeights)
{
if( groupThreshold <= 0 || rectList.empty() )
@ -434,7 +442,7 @@ FeatureEvaluator::~FeatureEvaluator() {}
bool FeatureEvaluator::read(const FileNode&) {return true;}
Ptr<FeatureEvaluator> FeatureEvaluator::clone() const { return Ptr<FeatureEvaluator>(); }
int FeatureEvaluator::getFeatureType() const {return -1;}
bool FeatureEvaluator::setImage(const Mat&, Size) {return true;}
bool FeatureEvaluator::setImage(InputArray, Size, Size) {return true;}
bool FeatureEvaluator::setWindow(Point) { return true; }
double FeatureEvaluator::calcOrd(int) const { return 0.; }
int FeatureEvaluator::calcCat(int) const { return 0; }
@ -466,7 +474,8 @@ bool HaarEvaluator::Feature :: read( const FileNode& node )
HaarEvaluator::HaarEvaluator()
{
features = makePtr<std::vector<Feature> >();
optfeaturesPtr = 0;
pwin = 0;
}
HaarEvaluator::~HaarEvaluator()
{
@ -474,16 +483,24 @@ HaarEvaluator::~HaarEvaluator()
bool HaarEvaluator::read(const FileNode& node)
{
features->resize(node.size());
featuresPtr = &(*features)[0];
FileNodeIterator it = node.begin(), it_end = node.end();
size_t i, n = node.size();
CV_Assert(n > 0);
if(features.empty())
features = makePtr<std::vector<Feature> >();
if(optfeatures.empty())
optfeatures = makePtr<std::vector<OptFeature> >();
features->resize(n);
FileNodeIterator it = node.begin();
hasTiltedFeatures = false;
std::vector<Feature>& ff = *features;
sumSize0 = Size();
ufbuf.release();
for(int i = 0; it != it_end; ++it, i++)
for(i = 0; i < n; i++, ++it)
{
if(!featuresPtr[i].read(*it))
if(!ff[i].read(*it))
return false;
if( featuresPtr[i].tilted )
if( ff[i].tilted )
hasTiltedFeatures = true;
}
return true;
@ -494,59 +511,102 @@ Ptr<FeatureEvaluator> HaarEvaluator::clone() const
Ptr<HaarEvaluator> ret = makePtr<HaarEvaluator>();
ret->origWinSize = origWinSize;
ret->features = features;
ret->featuresPtr = &(*ret->features)[0];
ret->optfeatures = optfeatures;
ret->optfeaturesPtr = optfeatures->empty() ? 0 : &(*(ret->optfeatures))[0];
ret->hasTiltedFeatures = hasTiltedFeatures;
ret->sum0 = sum0, ret->sqsum0 = sqsum0, ret->tilted0 = tilted0;
ret->sum = sum, ret->sqsum = sqsum, ret->tilted = tilted;
ret->sum0 = sum0; ret->sqsum0 = sqsum0;
ret->sum = sum; ret->sqsum = sqsum;
ret->usum0 = usum0; ret->usqsum0 = usqsum0; ret->ufbuf = ufbuf;
ret->normrect = normrect;
memcpy( ret->p, p, 4*sizeof(p[0]) );
memcpy( ret->pq, pq, 4*sizeof(pq[0]) );
ret->offset = offset;
memcpy( ret->nofs, nofs, 4*sizeof(nofs[0]) );
ret->pwin = pwin;
ret->varianceNormFactor = varianceNormFactor;
return ret;
}
bool HaarEvaluator::setImage( const Mat &image, Size _origWinSize )
bool HaarEvaluator::setImage( InputArray _image, Size _origWinSize, Size _sumSize )
{
int rn = image.rows+1, cn = image.cols+1;
Size imgsz = _image.size();
int cols = imgsz.width, rows = imgsz.height;
if (imgsz.width < origWinSize.width || imgsz.height < origWinSize.height)
return false;
origWinSize = _origWinSize;
normrect = Rect(1, 1, origWinSize.width-2, origWinSize.height-2);
if (image.cols < origWinSize.width || image.rows < origWinSize.height)
return false;
int rn = _sumSize.height, cn = _sumSize.width, rn_scale = hasTiltedFeatures ? 2 : 1;
int sumStep, tofs = 0;
CV_Assert(rn >= rows+1 && cn >= cols+1);
if( sum0.rows < rn || sum0.cols < cn )
if( _image.isUMat() )
{
sum0.create(rn, cn, CV_32S);
sqsum0.create(rn, cn, CV_64F);
if (hasTiltedFeatures)
tilted0.create( rn, cn, CV_32S);
}
sum = Mat(rn, cn, CV_32S, sum0.data);
sqsum = Mat(rn, cn, CV_64F, sqsum0.data);
usum0.create(rn*rn_scale, cn, CV_32S);
usqsum0.create(rn, cn, CV_32S);
usum = UMat(usum0, Rect(0, 0, cols+1, rows+1));
usqsum = UMat(usqsum0, Rect(0, 0, cols, rows));
if( hasTiltedFeatures )
{
tilted = Mat(rn, cn, CV_32S, tilted0.data);
integral(image, sum, sqsum, tilted);
if( hasTiltedFeatures )
{
UMat utilted(usum0, Rect(0, _sumSize.height, cols+1, rows+1));
integral(_image, usum, noArray(), utilted, CV_32S);
tofs = (int)((utilted.offset - usum.offset)/sizeof(int));
}
else
{
integral(_image, usum, noArray(), noArray(), CV_32S);
}
sqrBoxFilter(_image, usqsum, CV_32S,
Size(normrect.width, normrect.height),
Point(0, 0), false);
/*sqrBoxFilter(_image.getMat(), sqsum, CV_32S,
Size(normrect.width, normrect.height),
Point(0, 0), false);
sqsum.copyTo(usqsum);*/
sumStep = (int)(usum.step/usum.elemSize());
}
else
integral(image, sum, sqsum);
const int* sdata = (const int*)sum.data;
const double* sqdata = (const double*)sqsum.data;
size_t sumStep = sum.step/sizeof(sdata[0]);
size_t sqsumStep = sqsum.step/sizeof(sqdata[0]);
{
sum0.create(rn*rn_scale, cn, CV_32S);
sqsum0.create(rn, cn, CV_32S);
sum = sum0(Rect(0, 0, cols+1, rows+1));
sqsum = sqsum0(Rect(0, 0, cols, rows));
CV_SUM_PTRS( p[0], p[1], p[2], p[3], sdata, normrect, sumStep );
CV_SUM_PTRS( pq[0], pq[1], pq[2], pq[3], sqdata, normrect, sqsumStep );
if( hasTiltedFeatures )
{
Mat tilted = sum0(Rect(0, _sumSize.height, cols+1, rows+1));
integral(_image, sum, noArray(), tilted, CV_32S);
tofs = (int)((tilted.data - sum.data)/sizeof(int));
}
else
integral(_image, sum, noArray(), noArray(), CV_32S);
sqrBoxFilter(_image, sqsum, CV_32S,
Size(normrect.width, normrect.height),
Point(0, 0), false);
sumStep = (int)(sum.step/sum.elemSize());
}
CV_SUM_OFS( nofs[0], nofs[1], nofs[2], nofs[3], 0, normrect, sumStep );
size_t fi, nfeatures = features->size();
const std::vector<Feature>& ff = *features;
if( sumSize0 != _sumSize )
{
optfeatures->resize(nfeatures);
optfeaturesPtr = &(*optfeatures)[0];
for( fi = 0; fi < nfeatures; fi++ )
optfeaturesPtr[fi].setOffsets( ff[fi], sumStep, tofs );
}
if( _image.isUMat() && (sumSize0 != _sumSize || ufbuf.empty()) )
copyVectorToUMat(*optfeatures, ufbuf);
sumSize0 = _sumSize;
for( fi = 0; fi < nfeatures; fi++ )
featuresPtr[fi].updatePtrs( !featuresPtr[fi].tilted ? sum : tilted );
return true;
}
bool HaarEvaluator::setWindow( Point pt )
{
if( pt.x < 0 || pt.y < 0 ||
@ -554,10 +614,9 @@ bool HaarEvaluator::setWindow( Point pt )
pt.y + origWinSize.height >= sum.rows )
return false;
size_t pOffset = pt.y * (sum.step/sizeof(int)) + pt.x;
size_t pqOffset = pt.y * (sqsum.step/sizeof(double)) + pt.x;
int valsum = CALC_SUM(p, pOffset);
double valsqsum = CALC_SUM(pq, pqOffset);
const int* p = &sum.at<int>(pt);
int valsum = CALC_SUM_OFS(nofs, p);
double valsqsum = sqsum.at<int>(pt.y + normrect.y, pt.x + normrect.x);
double nf = (double)normrect.area() * valsqsum - (double)valsum * valsum;
if( nf > 0. )
@ -565,11 +624,24 @@ bool HaarEvaluator::setWindow( Point pt )
else
nf = 1.;
varianceNormFactor = 1./nf;
offset = (int)pOffset;
pwin = p;
return true;
}
Rect HaarEvaluator::getNormRect() const
{
return normrect;
}
void HaarEvaluator::getUMats(std::vector<UMat>& bufs)
{
bufs.clear();
bufs.push_back(usum);
bufs.push_back(usqsum);
bufs.push_back(ufbuf);
}
//---------------------------------------------- LBPEvaluator -------------------------------------
bool LBPEvaluator::Feature :: read(const FileNode& node )
{
@ -612,8 +684,9 @@ Ptr<FeatureEvaluator> LBPEvaluator::clone() const
return ret;
}
bool LBPEvaluator::setImage( const Mat& image, Size _origWinSize )
bool LBPEvaluator::setImage( InputArray _image, Size _origWinSize, Size )
{
Mat image = _image.getMat();
int rn = image.rows+1, cn = image.cols+1;
origWinSize = _origWinSize;
@ -693,8 +766,9 @@ Ptr<FeatureEvaluator> HOGEvaluator::clone() const
return ret;
}
bool HOGEvaluator::setImage( const Mat& image, Size winSize )
bool HOGEvaluator::setImage( InputArray _image, Size winSize, Size )
{
Mat image = _image.getMat();
int rows = image.rows + 1;
int cols = image.cols + 1;
origWinSize = winSize;
@ -880,7 +954,7 @@ int CascadeClassifierImpl::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, do
if( !evaluator->setWindow(pt) )
return -1;
if( data.isStumpBased )
if( data.isStumpBased() )
{
if( data.featureType == FeatureEvaluator::HAAR )
return predictOrderedStump<HaarEvaluator>( *this, evaluator, weight );
@ -904,11 +978,6 @@ int CascadeClassifierImpl::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, do
}
}
bool CascadeClassifierImpl::setImage( Ptr<FeatureEvaluator>& evaluator, const Mat& image )
{
return empty() ? false : evaluator->setImage(image, data.origWinSize);
}
void CascadeClassifierImpl::setMaskGenerator(const Ptr<MaskGenerator>& _maskGenerator)
{
maskGenerator=_maskGenerator;
@ -1010,11 +1079,12 @@ struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } }
struct getNeighbors { int operator ()(const CvAvgComp& e) const { return e.neighbors; } };
bool CascadeClassifierImpl::detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
int stripSize, int yStep, double factor, std::vector<Rect>& candidates,
std::vector<int>& levels, std::vector<double>& weights, bool outputRejectLevels )
bool CascadeClassifierImpl::detectSingleScale( InputArray _image, Size processingRectSize,
int yStep, double factor, std::vector<Rect>& candidates,
std::vector<int>& levels, std::vector<double>& weights,
Size sumSize0, bool outputRejectLevels )
{
if( !featureEvaluator->setImage( image, data.origWinSize ) )
if( !featureEvaluator->setImage(_image, data.origWinSize, sumSize0) )
return false;
#if defined (LOG_CASCADE_STATISTIC)
@ -1023,13 +1093,21 @@ bool CascadeClassifierImpl::detectSingleScale( const Mat& image, int stripCount,
Mat currentMask;
if (maskGenerator) {
Mat image = _image.getMat();
currentMask=maskGenerator->generateMask(image);
}
std::vector<Rect> candidatesVector;
std::vector<int> rejectLevels;
std::vector<double> levelWeights;
Mutex mtx;
int stripCount, stripSize;
const int PTS_PER_THREAD = 1000;
stripCount = ((processingRectSize.width/yStep)*(processingRectSize.height + yStep-1)/yStep + PTS_PER_THREAD/2)/PTS_PER_THREAD;
stripCount = std::min(std::max(stripCount, 1), 100);
stripSize = (((processingRectSize.height + stripCount - 1)/stripCount + yStep-1)/yStep)*yStep;
if( outputRejectLevels )
{
parallel_for_(Range(0, stripCount), CascadeClassifierInvoker( *this, processingRectSize, stripSize, yStep, factor,
@ -1051,12 +1129,63 @@ bool CascadeClassifierImpl::detectSingleScale( const Mat& image, int stripCount,
return true;
}
bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size processingRectSize,
int yStep, double factor, Size sumSize0 )
{
const int VECTOR_SIZE = 1;
Ptr<HaarEvaluator> haar = featureEvaluator.dynamicCast<HaarEvaluator>();
if( haar.empty() )
return false;
haar->setImage(_image, data.origWinSize, sumSize0);
if( cascadeKernel.empty() )
{
cascadeKernel.create("runHaarClassifierStump", ocl::objdetect::cascadedetect_oclsrc,
format("-D VECTOR_SIZE=%d", VECTOR_SIZE));
if( cascadeKernel.empty() )
return false;
}
if( ustages.empty() )
{
copyVectorToUMat(data.stages, ustages);
copyVectorToUMat(data.stumps, ustumps);
}
std::vector<UMat> bufs;
haar->getUMats(bufs);
CV_Assert(bufs.size() == 3);
Rect normrect = haar->getNormRect();
//processingRectSize = Size(yStep, yStep);
size_t globalsize[] = { (processingRectSize.width/yStep + VECTOR_SIZE-1)/VECTOR_SIZE, processingRectSize.height/yStep };
cascadeKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sqsum
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
// cascade classifier
(int)data.stages.size(),
ocl::KernelArg::PtrReadOnly(ustages),
ocl::KernelArg::PtrReadOnly(ustumps),
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
processingRectSize,
yStep, (float)factor,
normrect, data.origWinSize, MAX_FACES);
bool ok = cascadeKernel.run(2, globalsize, 0, true);
//CV_Assert(ok);
return ok;
}
bool CascadeClassifierImpl::isOldFormatCascade() const
{
return !oldCascade.empty();
}
int CascadeClassifierImpl::getFeatureType() const
{
return featureEvaluator->getFeatureType();
@ -1067,12 +1196,6 @@ Size CascadeClassifierImpl::getOriginalWindowSize() const
return data.origWinSize;
}
bool CascadeClassifierImpl::setImage(InputArray _image)
{
Mat image = _image.getMat();
return featureEvaluator->setImage(image, data.origWinSize);
}
void* CascadeClassifierImpl::getOldCascade()
{
return oldCascade;
@ -1096,36 +1219,75 @@ static void detectMultiScaleOldFormat( const Mat& image, Ptr<CvHaarClassifierCas
std::transform(vecAvgComp.begin(), vecAvgComp.end(), objects.begin(), getRect());
}
void CascadeClassifierImpl::detectMultiScaleNoGrouping( const Mat& image, std::vector<Rect>& candidates,
void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::vector<Rect>& candidates,
std::vector<int>& rejectLevels, std::vector<double>& levelWeights,
double scaleFactor, Size minObjectSize, Size maxObjectSize,
bool outputRejectLevels )
{
candidates.clear();
Size imgsz = _image.size();
int imgtype = _image.type();
if (maskGenerator)
maskGenerator->initializeMask(image);
Mat grayImage, imageBuffer;
candidates.clear();
rejectLevels.clear();
levelWeights.clear();
if( maxObjectSize.height == 0 || maxObjectSize.width == 0 )
maxObjectSize = image.size();
maxObjectSize = imgsz;
Mat grayImage = image;
if( grayImage.channels() > 1 )
bool use_ocl = ocl::useOpenCL() &&
getFeatureType() == FeatureEvaluator::HAAR &&
!isOldFormatCascade() &&
data.isStumpBased() &&
maskGenerator.empty() &&
!outputRejectLevels &&
tryOpenCL;
if( !use_ocl )
{
Mat temp;
cvtColor(grayImage, temp, COLOR_BGR2GRAY);
grayImage = temp;
Mat image = _image.getMat();
if (maskGenerator)
maskGenerator->initializeMask(image);
grayImage = image;
if( CV_MAT_CN(imgtype) > 1 )
{
Mat temp;
cvtColor(grayImage, temp, COLOR_BGR2GRAY);
grayImage = temp;
}
imageBuffer.create(imgsz.height + 1, imgsz.width + 1, CV_8U);
}
else
{
UMat uimage = _image.getUMat();
if( CV_MAT_CN(imgtype) > 1 )
cvtColor(uimage, ugrayImage, COLOR_BGR2GRAY);
else
uimage.copyTo(ugrayImage);
uimageBuffer.create(imgsz.height + 1, imgsz.width + 1, CV_8U);
}
Mat imageBuffer(image.rows + 1, image.cols + 1, CV_8U);
Size sumSize0((imgsz.width + SUM_ALIGN) & -SUM_ALIGN, imgsz.height+1);
if( use_ocl )
{
ufacepos.create(1, MAX_FACES*4 + 1, CV_32S);
UMat ufacecount(ufacepos, Rect(0,0,1,1));
ufacecount.setTo(Scalar::all(0));
}
for( double factor = 1; ; factor *= scaleFactor )
{
Size originalWindowSize = getOriginalWindowSize();
Size windowSize( cvRound(originalWindowSize.width*factor), cvRound(originalWindowSize.height*factor) );
Size scaledImageSize( cvRound( grayImage.cols/factor ), cvRound( grayImage.rows/factor ) );
Size processingRectSize( scaledImageSize.width - originalWindowSize.width, scaledImageSize.height - originalWindowSize.height );
Size scaledImageSize( cvRound( imgsz.width/factor ), cvRound( imgsz.height/factor ) );
Size processingRectSize( scaledImageSize.width - originalWindowSize.width,
scaledImageSize.height - originalWindowSize.height );
if( processingRectSize.width <= 0 || processingRectSize.height <= 0 )
break;
@ -1134,9 +1296,6 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( const Mat& image, std::v
if( windowSize.width < minObjectSize.width || windowSize.height < minObjectSize.height )
continue;
Mat scaledImage( scaledImageSize, CV_8U, imageBuffer.data );
resize( grayImage, scaledImage, scaledImageSize, 0, 0, INTER_LINEAR );
int yStep;
if( getFeatureType() == cv::FeatureEvaluator::HOG )
{
@ -1147,16 +1306,46 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( const Mat& image, std::v
yStep = factor > 2. ? 1 : 2;
}
int stripCount, stripSize;
if( use_ocl )
{
UMat uscaledImage(uimageBuffer, Rect(0, 0, scaledImageSize.width, scaledImageSize.height));
resize( ugrayImage, uscaledImage, scaledImageSize, 0, 0, INTER_LINEAR );
const int PTS_PER_THREAD = 1000;
stripCount = ((processingRectSize.width/yStep)*(processingRectSize.height + yStep-1)/yStep + PTS_PER_THREAD/2)/PTS_PER_THREAD;
stripCount = std::min(std::max(stripCount, 1), 100);
stripSize = (((processingRectSize.height + stripCount - 1)/stripCount + yStep-1)/yStep)*yStep;
if( ocl_detectSingleScale( uscaledImage, processingRectSize, yStep, factor, sumSize0 ) )
continue;
if( !detectSingleScale( scaledImage, stripCount, processingRectSize, stripSize, yStep, factor, candidates,
rejectLevels, levelWeights, outputRejectLevels ) )
break;
/////// if the OpenCL branch has been executed but failed, fall back to CPU: /////
tryOpenCL = false; // for this cascade do not try OpenCL anymore
// since we may already have some partial results from OpenCL code (unlikely, but still),
// we just recursively call the function again, but with tryOpenCL==false it will
// go with CPU route, so there is no infinite recursion
detectMultiScaleNoGrouping( _image, candidates, rejectLevels, levelWeights,
scaleFactor, minObjectSize, maxObjectSize,
outputRejectLevels);
return;
}
else
{
Mat scaledImage( scaledImageSize, CV_8U, imageBuffer.data );
resize( grayImage, scaledImage, scaledImageSize, 0, 0, INTER_LINEAR );
if( !detectSingleScale( scaledImage, processingRectSize, yStep, factor, candidates,
rejectLevels, levelWeights, sumSize0, outputRejectLevels ) )
break;
}
}
if( use_ocl && tryOpenCL )
{
Mat facepos = ufacepos.getMat(ACCESS_READ);
const int* fptr = facepos.ptr<int>();
int i, nfaces = fptr[0];
for( i = 0; i < nfaces; i++ )
{
candidates.push_back(Rect(fptr[i*4+1], fptr[i*4+2], fptr[i*4+3], fptr[i*4+4]));
}
}
}
@ -1167,21 +1356,21 @@ void CascadeClassifierImpl::detectMultiScale( InputArray _image, std::vector<Rec
int flags, Size minObjectSize, Size maxObjectSize,
bool outputRejectLevels )
{
Mat image = _image.getMat();
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U );
CV_Assert( scaleFactor > 1 && _image.depth() == CV_8U );
if( empty() )
return;
if( isOldFormatCascade() )
{
Mat image = _image.getMat();
std::vector<CvAvgComp> fakeVecAvgComp;
detectMultiScaleOldFormat( image, oldCascade, objects, rejectLevels, levelWeights, fakeVecAvgComp, scaleFactor,
minNeighbors, flags, minObjectSize, maxObjectSize, outputRejectLevels );
}
else
{
detectMultiScaleNoGrouping( image, objects, rejectLevels, levelWeights, scaleFactor, minObjectSize, maxObjectSize,
detectMultiScaleNoGrouping( _image, objects, rejectLevels, levelWeights, scaleFactor, minObjectSize, maxObjectSize,
outputRejectLevels );
const double GROUP_EPS = 0.2;
if( outputRejectLevels )
@ -1235,6 +1424,12 @@ void CascadeClassifierImpl::detectMultiScale( InputArray _image, std::vector<Rec
}
}
CascadeClassifierImpl::Data::Data()
{
stageType = featureType = ncategories = maxNodesPerTree = 0;
}
bool CascadeClassifierImpl::Data::read(const FileNode &root)
{
static const float THRESHOLD_EPS = 1e-5f;
@ -1261,8 +1456,6 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
origWinSize.height = (int)root[CC_HEIGHT];
CV_Assert( origWinSize.height > 0 && origWinSize.width > 0 );
isStumpBased = (int)(root[CC_STAGE_PARAMS][CC_MAX_DEPTH]) == 1 ? true : false;
// load feature params
FileNode fn = root[CC_FEATURE_PARAMS];
if( fn.empty() )
@ -1280,8 +1473,10 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
stages.reserve(fn.size());
classifiers.clear();
nodes.clear();
stumps.clear();
FileNodeIterator it = fn.begin(), it_end = fn.end();
maxNodesPerTree = 0;
for( int si = 0; it != it_end; si++, ++it )
{
@ -1307,6 +1502,8 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
DTree tree;
tree.nodeCount = (int)internalNodes.size()/nodeStep;
maxNodesPerTree = std::max(maxNodesPerTree, tree.nodeCount);
classifiers.push_back(tree);
nodes.reserve(nodes.size() + tree.nodeCount);
@ -1342,11 +1539,34 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
}
}
if( isStumpBased() )
{
int nodeOfs = 0, leafOfs = 0;
size_t nstages = stages.size();
for( size_t stageIdx = 0; stageIdx < nstages; stageIdx++ )
{
const Stage& stage = stages[stageIdx];
int ntrees = stage.ntrees;
for( int i = 0; i < ntrees; i++, nodeOfs++, leafOfs+= 2 )
{
const DTreeNode& node = nodes[nodeOfs];
stumps.push_back(Stump(node.featureIdx, node.threshold,
leaves[leafOfs], leaves[leafOfs+1]));
}
}
}
return true;
}
bool CascadeClassifierImpl::read_(const FileNode& root)
{
tryOpenCL = true;
cascadeKernel = ocl::Kernel();
ustages.release();
ustumps.release();
if( !data.read(root) )
return false;

View File

@ -42,24 +42,29 @@ public:
bool isOldFormatCascade() const;
Size getOriginalWindowSize() const;
int getFeatureType() const;
bool setImage( InputArray );
void* getOldCascade();
void setMaskGenerator(const Ptr<MaskGenerator>& maskGenerator);
Ptr<MaskGenerator> getMaskGenerator();
protected:
bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
int stripSize, int yStep, double factor, std::vector<Rect>& candidates,
std::vector<int>& rejectLevels, std::vector<double>& levelWeights, bool outputRejectLevels = false );
enum { SUM_ALIGN = 64 };
void detectMultiScaleNoGrouping( const Mat& image, std::vector<Rect>& candidates,
bool detectSingleScale( InputArray image, Size processingRectSize,
int yStep, double factor, std::vector<Rect>& candidates,
std::vector<int>& rejectLevels, std::vector<double>& levelWeights,
Size sumSize0, bool outputRejectLevels = false );
bool ocl_detectSingleScale( InputArray image, Size processingRectSize,
int yStep, double factor, Size sumSize0 );
void detectMultiScaleNoGrouping( InputArray image, std::vector<Rect>& candidates,
std::vector<int>& rejectLevels, std::vector<double>& levelWeights,
double scaleFactor, Size minObjectSize, Size maxObjectSize,
bool outputRejectLevels = false );
enum { BOOST = 0
};
enum { MAX_FACES = 10000 };
enum { BOOST = 0 };
enum { DO_CANNY_PRUNING = CASCADE_DO_CANNY_PRUNING,
SCALE_IMAGE = CASCADE_SCALE_IMAGE,
FIND_BIGGEST_OBJECT = CASCADE_FIND_BIGGEST_OBJECT,
@ -80,7 +85,6 @@ protected:
template<class FEval>
friend int predictCategoricalStump( CascadeClassifierImpl& cascade, Ptr<FeatureEvaluator> &featureEvaluator, double& weight);
bool setImage( Ptr<FeatureEvaluator>& feval, const Mat& image);
int runAt( Ptr<FeatureEvaluator>& feval, Point pt, double& weight );
class Data
@ -106,13 +110,28 @@ protected:
float threshold;
};
struct Stump
{
Stump() {};
Stump(int _featureIdx, float _threshold, float _left, float _right)
: featureIdx(_featureIdx), threshold(_threshold), left(_left), right(_right) {}
int featureIdx;
float threshold;
float left;
float right;
};
Data();
bool read(const FileNode &node);
bool isStumpBased;
bool isStumpBased() const { return maxNodesPerTree == 1; }
int stageType;
int featureType;
int ncategories;
int maxNodesPerTree;
Size origWinSize;
std::vector<Stage> stages;
@ -120,6 +139,7 @@ protected:
std::vector<DTreeNode> nodes;
std::vector<float> leaves;
std::vector<int> subsets;
std::vector<Stump> stumps;
};
Data data;
@ -127,6 +147,12 @@ protected:
Ptr<CvHaarClassifierCascade> oldCascade;
Ptr<MaskGenerator> maskGenerator;
UMat ugrayImage, uimageBuffer;
UMat ufacepos, ustages, ustumps, usubsets;
ocl::Kernel cascadeKernel;
bool tryOpenCL;
Mutex mtx;
};
#define CC_CASCADE_PARAMS "cascadeParams"
@ -186,6 +212,36 @@ protected:
#define CALC_SUM(rect,offset) CALC_SUM_((rect)[0], (rect)[1], (rect)[2], (rect)[3], offset)
#define CV_SUM_OFS( p0, p1, p2, p3, sum, rect, step ) \
/* (x, y) */ \
(p0) = sum + (rect).x + (step) * (rect).y, \
/* (x + w, y) */ \
(p1) = sum + (rect).x + (rect).width + (step) * (rect).y, \
/* (x + w, y) */ \
(p2) = sum + (rect).x + (step) * ((rect).y + (rect).height), \
/* (x + w, y + h) */ \
(p3) = sum + (rect).x + (rect).width + (step) * ((rect).y + (rect).height)
#define CV_TILTED_OFS( p0, p1, p2, p3, tilted, rect, step ) \
/* (x, y) */ \
(p0) = tilted + (rect).x + (step) * (rect).y, \
/* (x - h, y + h) */ \
(p1) = tilted + (rect).x - (rect).height + (step) * ((rect).y + (rect).height), \
/* (x + w, y + w) */ \
(p2) = tilted + (rect).x + (rect).width + (step) * ((rect).y + (rect).width), \
/* (x + w - h, y + w + h) */ \
(p3) = tilted + (rect).x + (rect).width - (rect).height \
+ (step) * ((rect).y + (rect).width + (rect).height)
#define CALC_SUM_(p0, p1, p2, p3, offset) \
((p0)[offset] - (p1)[offset] - (p2)[offset] + (p3)[offset])
#define CALC_SUM(rect,offset) CALC_SUM_((rect)[0], (rect)[1], (rect)[2], (rect)[3], offset)
#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
#define CALC_SUM_OFS(rect, ptr) CALC_SUM_OFS_((rect)[0], (rect)[1], (rect)[2], (rect)[3], ptr)
//---------------------------------------------- HaarEvaluator ---------------------------------------
class HaarEvaluator : public FeatureEvaluator
@ -195,8 +251,6 @@ public:
{
Feature();
float calc( int offset ) const;
void updatePtrs( const Mat& sum );
bool read( const FileNode& node );
bool tilted;
@ -208,8 +262,19 @@ public:
Rect r;
float weight;
} rect[RECT_NUM];
};
const int* p[RECT_NUM][4];
struct OptFeature
{
OptFeature();
enum { RECT_NUM = Feature::RECT_NUM };
float calc( const int* pwin ) const;
void setOffsets( const Feature& _f, int step, int tofs );
int ofs[RECT_NUM][4];
float weight[4];
};
HaarEvaluator();
@ -219,28 +284,30 @@ public:
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const { return FeatureEvaluator::HAAR; }
virtual bool setImage(const Mat&, Size origWinSize);
virtual bool setImage(InputArray, Size origWinSize, Size sumSize);
virtual bool setWindow(Point pt);
virtual Rect getNormRect() const;
virtual void getUMats(std::vector<UMat>& bufs);
double operator()(int featureIdx) const
{ return featuresPtr[featureIdx].calc(offset) * varianceNormFactor; }
{ return optfeaturesPtr[featureIdx].calc(pwin) * varianceNormFactor; }
virtual double calcOrd(int featureIdx) const
{ return (*this)(featureIdx); }
protected:
Size origWinSize;
Size origWinSize, sumSize0;
Ptr<std::vector<Feature> > features;
Feature* featuresPtr; // optimization
Ptr<std::vector<OptFeature> > optfeatures;
OptFeature* optfeaturesPtr; // optimization
bool hasTiltedFeatures;
Mat sum0, sqsum0, tilted0;
Mat sum, sqsum, tilted;
Mat sum0, sum, sqsum0, sqsum;
UMat usum0, usum, usqsum0, usqsum, ufbuf;
Rect normrect;
const int *p[4];
const double *pq[4];
int nofs[4];
int offset;
const int* pwin;
double varianceNormFactor;
};
@ -249,38 +316,46 @@ inline HaarEvaluator::Feature :: Feature()
tilted = false;
rect[0].r = rect[1].r = rect[2].r = Rect();
rect[0].weight = rect[1].weight = rect[2].weight = 0;
p[0][0] = p[0][1] = p[0][2] = p[0][3] =
p[1][0] = p[1][1] = p[1][2] = p[1][3] =
p[2][0] = p[2][1] = p[2][2] = p[2][3] = 0;
}
inline float HaarEvaluator::Feature :: calc( int _offset ) const
inline HaarEvaluator::OptFeature :: OptFeature()
{
float ret = rect[0].weight * CALC_SUM(p[0], _offset) + rect[1].weight * CALC_SUM(p[1], _offset);
weight[0] = weight[1] = weight[2] = 0.f;
if( rect[2].weight != 0.0f )
ret += rect[2].weight * CALC_SUM(p[2], _offset);
ofs[0][0] = ofs[0][1] = ofs[0][2] = ofs[0][3] =
ofs[1][0] = ofs[1][1] = ofs[1][2] = ofs[1][3] =
ofs[2][0] = ofs[2][1] = ofs[2][2] = ofs[2][3] = 0;
}
inline float HaarEvaluator::OptFeature :: calc( const int* ptr ) const
{
float ret = weight[0] * CALC_SUM_OFS(ofs[0], ptr) +
weight[1] * CALC_SUM_OFS(ofs[1], ptr);
if( weight[2] != 0.0f )
ret += weight[2] * CALC_SUM_OFS(ofs[2], ptr);
return ret;
}
inline void HaarEvaluator::Feature :: updatePtrs( const Mat& _sum )
inline void HaarEvaluator::OptFeature :: setOffsets( const Feature& _f, int step, int tofs )
{
const int* ptr = (const int*)_sum.data;
size_t step = _sum.step/sizeof(ptr[0]);
if (tilted)
weight[0] = _f.rect[0].weight;
weight[1] = _f.rect[1].weight;
weight[2] = _f.rect[2].weight;
Rect r2 = weight[2] > 0 ? _f.rect[2].r : Rect(0,0,0,0);
if (_f.tilted)
{
CV_TILTED_PTRS( p[0][0], p[0][1], p[0][2], p[0][3], ptr, rect[0].r, step );
CV_TILTED_PTRS( p[1][0], p[1][1], p[1][2], p[1][3], ptr, rect[1].r, step );
if (rect[2].weight)
CV_TILTED_PTRS( p[2][0], p[2][1], p[2][2], p[2][3], ptr, rect[2].r, step );
CV_TILTED_OFS( ofs[0][0], ofs[0][1], ofs[0][2], ofs[0][3], tofs, _f.rect[0].r, step );
CV_TILTED_OFS( ofs[1][0], ofs[1][1], ofs[1][2], ofs[1][3], tofs, _f.rect[1].r, step );
CV_TILTED_PTRS( ofs[2][0], ofs[2][1], ofs[2][2], ofs[2][3], tofs, r2, step );
}
else
{
CV_SUM_PTRS( p[0][0], p[0][1], p[0][2], p[0][3], ptr, rect[0].r, step );
CV_SUM_PTRS( p[1][0], p[1][1], p[1][2], p[1][3], ptr, rect[1].r, step );
if (rect[2].weight)
CV_SUM_PTRS( p[2][0], p[2][1], p[2][2], p[2][3], ptr, rect[2].r, step );
CV_SUM_OFS( ofs[0][0], ofs[0][1], ofs[0][2], ofs[0][3], 0, _f.rect[0].r, step );
CV_SUM_OFS( ofs[1][0], ofs[1][1], ofs[1][2], ofs[1][3], 0, _f.rect[1].r, step );
CV_SUM_OFS( ofs[2][0], ofs[2][1], ofs[2][2], ofs[2][3], 0, r2, step );
}
}
@ -311,7 +386,7 @@ public:
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const { return FeatureEvaluator::LBP; }
virtual bool setImage(const Mat& image, Size _origWinSize);
virtual bool setImage(InputArray image, Size _origWinSize, Size);
virtual bool setWindow(Point pt);
int operator()(int featureIdx) const
@ -388,7 +463,7 @@ public:
virtual bool read( const FileNode& node );
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const { return FeatureEvaluator::HOG; }
virtual bool setImage( const Mat& image, Size winSize );
virtual bool setImage( InputArray image, Size winSize, Size );
virtual bool setWindow( Point pt );
double operator()(int featureIdx) const
{
@ -533,30 +608,36 @@ template<class FEval>
inline int predictOrderedStump( CascadeClassifierImpl& cascade,
Ptr<FeatureEvaluator> &_featureEvaluator, double& sum )
{
int nodeOfs = 0, leafOfs = 0;
CV_Assert(!cascade.data.stumps.empty());
FEval& featureEvaluator = (FEval&)*_featureEvaluator;
float* cascadeLeaves = &cascade.data.leaves[0];
CascadeClassifierImpl::Data::DTreeNode* cascadeNodes = &cascade.data.nodes[0];
CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
const CascadeClassifierImpl::Data::Stump* cascadeStumps = &cascade.data.stumps[0];
const CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
int nstages = (int)cascade.data.stages.size();
double tmp = 0;
for( int stageIdx = 0; stageIdx < nstages; stageIdx++ )
{
CascadeClassifierImpl::Data::Stage& stage = cascadeStages[stageIdx];
sum = 0.0;
const CascadeClassifierImpl::Data::Stage& stage = cascadeStages[stageIdx];
tmp = 0;
int ntrees = stage.ntrees;
for( int i = 0; i < ntrees; i++, nodeOfs++, leafOfs+= 2 )
for( int i = 0; i < ntrees; i++ )
{
CascadeClassifierImpl::Data::DTreeNode& node = cascadeNodes[nodeOfs];
double value = featureEvaluator(node.featureIdx);
sum += cascadeLeaves[ value < node.threshold ? leafOfs : leafOfs + 1 ];
const CascadeClassifierImpl::Data::Stump& stump = cascadeStumps[i];
double value = featureEvaluator(stump.featureIdx);
tmp += value < stump.threshold ? stump.left : stump.right;
}
if( sum < stage.threshold )
if( tmp < stage.threshold )
{
sum = (double)tmp;
return -stageIdx;
}
cascadeStumps += ntrees;
}
sum = (double)tmp;
return 1;
}
@ -564,56 +645,44 @@ template<class FEval>
inline int predictCategoricalStump( CascadeClassifierImpl& cascade,
Ptr<FeatureEvaluator> &_featureEvaluator, double& sum )
{
CV_Assert(!cascade.data.stumps.empty());
int nstages = (int)cascade.data.stages.size();
int nodeOfs = 0, leafOfs = 0;
FEval& featureEvaluator = (FEval&)*_featureEvaluator;
size_t subsetSize = (cascade.data.ncategories + 31)/32;
int* cascadeSubsets = &cascade.data.subsets[0];
float* cascadeLeaves = &cascade.data.leaves[0];
CascadeClassifierImpl::Data::DTreeNode* cascadeNodes = &cascade.data.nodes[0];
CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
const int* cascadeSubsets = &cascade.data.subsets[0];
const CascadeClassifierImpl::Data::Stump* cascadeStumps = &cascade.data.stumps[0];
const CascadeClassifierImpl::Data::Stage* cascadeStages = &cascade.data.stages[0];
#ifdef HAVE_TEGRA_OPTIMIZATION
float tmp = 0; // float accumulator -- float operations are quicker
#else
double tmp = 0;
#endif
for( int si = 0; si < nstages; si++ )
{
CascadeClassifierImpl::Data::Stage& stage = cascadeStages[si];
const CascadeClassifierImpl::Data::Stage& stage = cascadeStages[si];
int wi, ntrees = stage.ntrees;
#ifdef HAVE_TEGRA_OPTIMIZATION
tmp = 0;
#else
sum = 0;
#endif
for( wi = 0; wi < ntrees; wi++ )
{
CascadeClassifierImpl::Data::DTreeNode& node = cascadeNodes[nodeOfs];
int c = featureEvaluator(node.featureIdx);
const int* subset = &cascadeSubsets[nodeOfs*subsetSize];
#ifdef HAVE_TEGRA_OPTIMIZATION
tmp += cascadeLeaves[ subset[c>>5] & (1 << (c & 31)) ? leafOfs : leafOfs+1];
#else
sum += cascadeLeaves[ subset[c>>5] & (1 << (c & 31)) ? leafOfs : leafOfs+1];
#endif
nodeOfs++;
leafOfs += 2;
const CascadeClassifierImpl::Data::Stump& stump = cascadeStumps[wi];
int c = featureEvaluator(stump.featureIdx);
const int* subset = &cascadeSubsets[wi*subsetSize];
tmp += (subset[c>>5] & (1 << (c & 31))) ? stump.left : stump.right;
}
#ifdef HAVE_TEGRA_OPTIMIZATION
if( tmp < stage.threshold ) {
if( tmp < stage.threshold )
{
sum = (double)tmp;
return -si;
}
#else
if( sum < stage.threshold )
return -si;
#endif
cascadeStumps += ntrees;
cascadeSubsets += ntrees*subsetSize;
}
#ifdef HAVE_TEGRA_OPTIMIZATION
sum = (double)tmp;
#endif
return 1;
}
}

View File

@ -0,0 +1,275 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, Itseez Inc, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
/* Haar features calculation */
#include "precomp.hpp"
#include <stdio.h>
namespace cv
{
/* field names */
#define ICV_HAAR_SIZE_NAME "size"
#define ICV_HAAR_STAGES_NAME "stages"
#define ICV_HAAR_TREES_NAME "trees"
#define ICV_HAAR_FEATURE_NAME "feature"
#define ICV_HAAR_RECTS_NAME "rects"
#define ICV_HAAR_TILTED_NAME "tilted"
#define ICV_HAAR_THRESHOLD_NAME "threshold"
#define ICV_HAAR_LEFT_NODE_NAME "left_node"
#define ICV_HAAR_LEFT_VAL_NAME "left_val"
#define ICV_HAAR_RIGHT_NODE_NAME "right_node"
#define ICV_HAAR_RIGHT_VAL_NAME "right_val"
#define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold"
#define ICV_HAAR_PARENT_NAME "parent"
#define ICV_HAAR_NEXT_NAME "next"
namespace haar_cvt
{
struct HaarFeature
{
enum { RECT_NUM = 3 };
HaarFeature()
{
tilted = false;
for( int i = 0; i < RECT_NUM; i++ )
{
rect[i].r = Rect(0,0,0,0);
rect[i].weight = 0.f;
}
}
bool tilted;
struct
{
Rect r;
float weight;
} rect[RECT_NUM];
};
struct HaarClassifierNode
{
HaarClassifierNode()
{
f = left = right = 0;
threshold = 0.f;
}
int f, left, right;
float threshold;
};
struct HaarClassifier
{
std::vector<HaarClassifierNode> nodes;
std::vector<float> leaves;
};
struct HaarStageClassifier
{
double threshold;
std::vector<HaarClassifier> weaks;
};
static bool convert(const String& oldcascade, const String& newcascade)
{
FileStorage oldfs(oldcascade, FileStorage::READ);
if( !oldfs.isOpened() )
return false;
FileNode oldroot = oldfs.getFirstTopLevelNode();
FileNode sznode = oldroot[ICV_HAAR_SIZE_NAME];
if( sznode.empty() )
return false;
Size cascadesize;
cascadesize.width = (int)sznode[0];
cascadesize.height = (int)sznode[1];
std::vector<HaarFeature> features;
int i, j, k, n;
FileNode stages_seq = oldroot[ICV_HAAR_STAGES_NAME];
int nstages = (int)stages_seq.size();
std::vector<HaarStageClassifier> stages(nstages);
for( i = 0; i < nstages; i++ )
{
FileNode stagenode = stages_seq[i];
HaarStageClassifier& stage = stages[i];
stage.threshold = (double)stagenode[ICV_HAAR_STAGE_THRESHOLD_NAME];
FileNode weaks_seq = stagenode[ICV_HAAR_TREES_NAME];
int nweaks = (int)weaks_seq.size();
stage.weaks.resize(nweaks);
for( j = 0; j < nweaks; j++ )
{
HaarClassifier& weak = stage.weaks[j];
FileNode weaknode = weaks_seq[j];
int nnodes = (int)weaknode.size();
for( n = 0; n < nnodes; n++ )
{
FileNode nnode = weaknode[n];
FileNode fnode = nnode[ICV_HAAR_FEATURE_NAME];
HaarFeature f;
HaarClassifierNode node;
node.f = (int)features.size();
f.tilted = (int)fnode[ICV_HAAR_TILTED_NAME] != 0;
FileNode rects_seq = fnode[ICV_HAAR_RECTS_NAME];
int nrects = (int)rects_seq.size();
for( k = 0; k < nrects; k++ )
{
FileNode rnode = rects_seq[k];
f.rect[k].r.x = (int)rnode[0];
f.rect[k].r.y = (int)rnode[1];
f.rect[k].r.width = (int)rnode[2];
f.rect[k].r.height = (int)rnode[3];
f.rect[k].weight = (float)rnode[4];
}
features.push_back(f);
node.threshold = nnode[ICV_HAAR_THRESHOLD_NAME];
FileNode leftValNode = nnode[ICV_HAAR_LEFT_VAL_NAME];
if( !leftValNode.empty() )
{
node.left = -(int)weak.leaves.size();
weak.leaves.push_back((float)leftValNode);
}
else
{
node.left = (int)nnode[ICV_HAAR_LEFT_NODE_NAME];
}
FileNode rightValNode = nnode[ICV_HAAR_RIGHT_VAL_NAME];
if( !rightValNode.empty() )
{
node.right = -(int)weak.leaves.size();
weak.leaves.push_back((float)rightValNode);
}
else
{
node.right = (int)nnode[ICV_HAAR_RIGHT_NODE_NAME];
}
weak.nodes.push_back(node);
}
}
}
FileStorage newfs(newcascade, FileStorage::WRITE);
if( !newfs.isOpened() )
return false;
int maxWeakCount = 0, nfeatures = (int)features.size();
for( i = 0; i < nstages; i++ )
maxWeakCount = std::max(maxWeakCount, (int)stages[i].weaks.size());
newfs << "cascade" << "{:opencv-cascade-classifier"
<< "stageType" << "BOOST"
<< "featureType" << "HAAR"
<< "height" << cascadesize.width
<< "width" << cascadesize.height
<< "stageParams" << "{"
<< "maxWeakCount" << (int)maxWeakCount
<< "}"
<< "featureParams" << "{"
<< "maxCatCount" << 0
<< "}"
<< "stageNum" << (int)nstages
<< "stages" << "[";
for( i = 0; i < nstages; i++ )
{
int nweaks = (int)stages[i].weaks.size();
newfs << "{" << "maxWeakCount" << (int)nweaks
<< "stageThreshold" << stages[i].threshold
<< "weakClassifiers" << "[";
for( j = 0; j < nweaks; j++ )
{
const HaarClassifier& c = stages[i].weaks[j];
newfs << "{" << "internalNodes" << "[";
int nnodes = (int)c.nodes.size(), nleaves = (int)c.leaves.size();
for( k = 0; k < nnodes; k++ )
newfs << c.nodes[k].left << c.nodes[k].right
<< c.nodes[k].f << c.nodes[k].threshold;
newfs << "]" << "leafValues" << "[";
for( k = 0; k < nleaves; k++ )
newfs << c.leaves[k];
newfs << "]" << "}";
}
newfs << "]" << "}";
}
newfs << "]"
<< "features" << "[";
for( i = 0; i < nfeatures; i++ )
{
const HaarFeature& f = features[i];
newfs << "{" << "rects" << "[";
for( j = 0; j < HaarFeature::RECT_NUM; j++ )
{
if( j >= 2 && fabs(f.rect[j].weight) < FLT_EPSILON )
break;
newfs << "[" << f.rect[j].r.x << f.rect[j].r.y <<
f.rect[j].r.width << f.rect[j].r.height << f.rect[j].weight << "]";
}
newfs << "]";
if( f.tilted )
newfs << "tilted" << 1;
newfs << "}";
}
newfs << "]" << "}";
return true;
}
}
bool CascadeClassifier::convert(const String& oldcascade, const String& newcascade)
{
bool ok = haar_cvt::convert(oldcascade, newcascade);
if( !ok && newcascade.size() > 0 )
remove(newcascade.c_str());
return ok;
}
}

View File

@ -0,0 +1,185 @@
///////////////////////////// OpenCL kernels for face detection //////////////////////////////
////////////////////////////// see the opencv/doc/license.txt ///////////////////////////////
typedef struct __attribute__((aligned(4))) OptFeature
{
int4 ofs[3] __attribute__((aligned (4)));
float4 weight __attribute__((aligned (4)));
}
OptFeature;
typedef struct __attribute__((aligned(4))) Stump
{
int featureIdx __attribute__((aligned (4)));
float threshold __attribute__((aligned (4))); // for ordered features only
float left __attribute__((aligned (4)));
float right __attribute__((aligned (4)));
}
Stump;
typedef struct __attribute__((aligned (4))) Stage
{
int first __attribute__((aligned (4)));
int ntrees __attribute__((aligned (4)));
float threshold __attribute__((aligned (4)));
}
Stage;
__kernel void runHaarClassifierStump(
__global const int* sum,
int sumstep, int sumoffset,
__global const int* sqsum,
int sqsumstep, int sqsumoffset,
__global const OptFeature* optfeatures,
int nstages,
__global const Stage* stages,
__global const Stump* stumps,
volatile __global int* facepos,
int2 imgsize, int xyscale, float factor,
int4 normrect, int2 windowsize, int maxFaces)
{
int ix = get_global_id(0)*xyscale;
int iy = get_global_id(1)*xyscale;
sumstep /= sizeof(int);
sqsumstep /= sizeof(int);
if( ix < imgsize.x && iy < imgsize.y )
{
int ntrees;
int stageIdx, i;
float s = 0.f;
__global const Stump* stump = stumps;
__global const OptFeature* f;
__global const int* psum = sum + mad24(iy, sumstep, ix);
__global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
int normarea = normrect.z * normrect.w;
float invarea = 1.f/normarea;
float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] +
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
float4 weight, vsval;
int4 ofs, ofs0, ofs1, ofs2;
nf = nf > 0 ? nf : 1.f;
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
{
ntrees = stages[stageIdx].ntrees;
s = 0.f;
for( i = 0; i < ntrees; i++, stump++ )
{
f = optfeatures + stump->featureIdx;
weight = f->weight;
ofs = f->ofs[0];
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
if( weight.z > 0 )
{
ofs = f->ofs[2];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
}
s += (sval < stump->threshold*nf) ? stump->left : stump->right;
}
if( s < stages[stageIdx].threshold )
break;
}
if( stageIdx == nstages )
{
int nfaces = atomic_inc(facepos);
if( nfaces < maxFaces )
{
volatile __global int* face = facepos + 1 + nfaces*4;
face[0] = convert_int_rte(ix*factor);
face[1] = convert_int_rte(iy*factor);
face[2] = convert_int_rte(windowsize.x*factor);
face[3] = convert_int_rte(windowsize.y*factor);
}
}
}
}
#if 0
__kernel void runLBPClassifierStump(
__global const int* sum,
int sumstep, int sumoffset,
__global const int* sqsum,
int sqsumstep, int sqsumoffset,
__global const OptFeature* optfeatures,
int nstages,
__global const Stage* stages,
__global const Stump* stumps,
__global const int* bitsets,
int bitsetSize,
volatile __global int* facepos,
int2 imgsize, int xyscale, float factor,
int4 normrect, int2 windowsize, int maxFaces)
{
int ix = get_global_id(0)*xyscale*VECTOR_SIZE;
int iy = get_global_id(1)*xyscale;
sumstep /= sizeof(int);
sqsumstep /= sizeof(int);
if( ix < imgsize.x && iy < imgsize.y )
{
int ntrees;
int stageIdx, i;
float s = 0.f;
__global const Stump* stump = stumps;
__global const int* bitset = bitsets;
__global const OptFeature* f;
__global const int* psum = sum + mad24(iy, sumstep, ix);
__global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
int normarea = normrect.z * normrect.w;
float invarea = 1.f/normarea;
float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] +
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
float4 weight;
int4 ofs;
nf = nf > 0 ? nf : 1.f;
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
{
ntrees = stages[stageIdx].ntrees;
s = 0.f;
for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize )
{
f = optfeatures + stump->featureIdx;
weight = f->weight;
// compute LBP feature to val
s += (bitset[val >> 5] & (1 << (val & 31))) ? stump->left : stump->right;
}
if( s < stages[stageIdx].threshold )
break;
}
if( stageIdx == nstages )
{
int nfaces = atomic_inc(facepos);
if( nfaces < maxFaces )
{
volatile __global int* face = facepos + 1 + nfaces*4;
face[0] = convert_int_rte(ix*factor);
face[1] = convert_int_rte(iy*factor);
face[2] = convert_int_rte(windowsize.x*factor);
face[3] = convert_int_rte(windowsize.y*factor);
}
}
}
}
#endif

View File

@ -1,45 +1,11 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_OCL_HPP__
#define __OPENCV_OCL_HPP__

View File

@ -48,43 +48,53 @@ using namespace cv;
using namespace cv::ocl;
#if !defined HAVE_CLAMDFFT
void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
{
CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented");
}
namespace cv { namespace ocl {
void fft_teardown();
}}
void cv::ocl::fft_teardown(){}
} }
void cv::ocl::fft_teardown() { }
#else
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
namespace cv
{
namespace ocl
{
void fft_setup();
void fft_teardown();
enum FftType
{
C2R = 1, // complex to complex
R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
C2C = 3 // opencl HERMITIAN_INTERLEAVED to real
};
struct FftPlan
{
protected:
clAmdFftPlanHandle plHandle;
FftPlan& operator=(const FftPlan&);
public:
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type);
~FftPlan();
inline clAmdFftPlanHandle getPlanHandle() { return plHandle; }
const Size dft_size;
const int src_step, dst_step;
const int depth;
const int flags;
const FftType type;
};
class PlanCache
{
protected:
@ -105,10 +115,11 @@ namespace cv
planCache = new PlanCache();
return planCache;
}
// return a baked plan->
// if there is one matched plan, return it
// if not, bake a new one, put it into the planStore and return it.
static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type);
// remove a single plan from the store
// return true if the plan is successfully removed
@ -117,6 +128,7 @@ namespace cv
};
}
}
PlanCache* PlanCache::planCache = NULL;
void cv::ocl::fft_setup()
@ -128,9 +140,11 @@ void cv::ocl::fft_setup()
}
if (pCache.setupData == NULL)
pCache.setupData = new clAmdFftSetupData;
openCLSafeCall(clAmdFftInitSetupData( pCache.setupData ));
pCache.started = true;
}
void cv::ocl::fft_teardown()
{
PlanCache& pCache = *PlanCache::getPlanCache();
@ -154,8 +168,8 @@ void cv::ocl::fft_teardown()
}
// bake a new plan
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
: plHandle(0), dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type)
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type)
: plHandle(0), dft_size(_dft_size), src_step(_src_step), depth(_depth), dst_step(_dst_step), flags(_flags), type(_type)
{
fft_setup();
@ -184,20 +198,20 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
case C2C:
inLayout = CLFFT_COMPLEX_INTERLEAVED;
outLayout = CLFFT_COMPLEX_INTERLEAVED;
clStridesIn[1] = src_step / (2*sizeof(float));
clStridesOut[1] = clStridesIn[1];
clStridesIn[1] = src_step / (2*CV_ELEM_SIZE(_depth));
clStridesOut[1] = dst_step / (2*CV_ELEM_SIZE(_depth));
break;
case R2C:
inLayout = CLFFT_REAL;
outLayout = CLFFT_HERMITIAN_INTERLEAVED;
clStridesIn[1] = src_step / sizeof(float);
clStridesOut[1] = dst_step / (2*sizeof(float));
clStridesIn[1] = src_step / CV_ELEM_SIZE(_depth);
clStridesOut[1] = dst_step / (2*CV_ELEM_SIZE(_depth));
break;
case C2R:
inLayout = CLFFT_HERMITIAN_INTERLEAVED;
outLayout = CLFFT_REAL;
clStridesIn[1] = src_step / (2*sizeof(float));
clStridesOut[1] = dst_step / sizeof(float);
clStridesIn[1] = src_step / (2*CV_ELEM_SIZE(_depth));
clStridesOut[1] = dst_step / CV_ELEM_SIZE(_depth);
break;
default:
//std::runtime_error("does not support this convertion!");
@ -211,6 +225,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getClContextPtr(), dim, clLengthsIn ) );
openCLSafeCall( clAmdFftSetPlanPrecision( plHandle, depth == CV_64F ? CLFFT_DOUBLE : CLFFT_SINGLE ) );
openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) );
@ -225,6 +240,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
//ready to bake
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getClCommandQueuePtr(), NULL, NULL ) );
}
cv::ocl::FftPlan::~FftPlan()
{
openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
@ -242,7 +258,7 @@ cv::ocl::PlanCache::~PlanCache()
fft_teardown();
}
FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type)
FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type)
{
PlanCache& pCache = *PlanCache::getPlanCache();
std::vector<FftPlan *>& pStore = pCache.planStore;
@ -256,6 +272,7 @@ FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_ste
plan->flags == _flags &&
plan->src_step == _src_step &&
plan->dst_step == _dst_step &&
plan->depth == _depth &&
plan->type == _type
)
{
@ -263,7 +280,7 @@ FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_ste
}
}
// no baked plan is found
FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type);
FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _depth, _flags, _type);
pStore.push_back(newPlan);
return newPlan;
}
@ -286,6 +303,8 @@ bool cv::ocl::PlanCache::removePlan(clAmdFftPlanHandle plHandle)
void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
{
CV_Assert(cv::ocl::haveAmdFft());
if(dft_size == Size(0, 0))
{
dft_size = src.size();
@ -296,9 +315,6 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
// the two flags are not compatible
CV_Assert( !((flags & DFT_SCALE) && (flags & DFT_ROWS)) );
// similar assertions with cuda module
CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
//bool is_1d_input = (src.rows == 1);
//int is_row_dft = flags & DFT_ROWS;
//int is_scaled_dft = flags & DFT_SCALE;
@ -306,6 +322,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
bool is_complex_input = src.channels() == 2;
bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
int depth = src.depth();
// We don't support real-to-real transform
CV_Assert(is_complex_input || is_complex_output);
@ -314,14 +331,17 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
switch(type)
{
case C2C:
dst.create(src.rows, src.cols, CV_32FC2);
dst.create(src.rows, src.cols, CV_MAKE_TYPE(depth, 2));
printf("C2C\n");
break;
case R2C:
dst.create(src.rows, src.cols / 2 + 1, CV_32FC2);
dst.create(src.rows, src.cols / 2 + 1, CV_MAKE_TYPE(depth, 2));
printf("R2C\n");
break;
case C2R:
CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
dst.create(src.rows, dft_size.width, CV_32FC1);
dst.create(src.rows, dft_size.width, CV_MAKE_TYPE(depth, 1));
printf("C2R\n");
break;
default:
//std::runtime_error("does not support this convertion!");
@ -329,7 +349,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
throw std::exception();
break;
}
clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, flags, type)->getPlanHandle();
clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, depth, flags, type)->getPlanHandle();
//get the buffersize
size_t buffersize = 0;
@ -356,7 +376,7 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
{
openCLFree(clMedBuffer);
}
//fft_teardown();
fft_teardown();
}
#endif

View File

@ -75,6 +75,7 @@
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/ocl.hpp"
#define __ATI__

View File

@ -50,32 +50,36 @@ using namespace std;
////////////////////////////////////////////////////////////////////////////
// Dft
PARAM_TEST_CASE(Dft, cv::Size, int)
PARAM_TEST_CASE(Dft, cv::Size, int, bool)
{
cv::Size dft_size;
int dft_flags;
bool doubleFP;
virtual void SetUp()
{
dft_size = GET_PARAM(0);
dft_flags = GET_PARAM(1);
doubleFP = GET_PARAM(2);
}
};
OCL_TEST_P(Dft, C2C)
{
cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 100.0);
cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC2 : CV_32FC2, 0.0, 100.0);
cv::Mat b_gold;
cv::ocl::oclMat d_b;
cv::dft(a, b_gold, dft_flags);
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags);
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4);
}
OCL_TEST_P(Dft, R2C)
{
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 100.0);
cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC1 : CV_32FC1, 0.0, 100.0);
cv::Mat b_gold, b_gold_roi;
cv::ocl::oclMat d_b, d_c;
@ -92,7 +96,7 @@ OCL_TEST_P(Dft, R2C)
OCL_TEST_P(Dft, R2CthenC2R)
{
cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
cv::Mat a = randomMat(dft_size, doubleFP ? CV_64FC1 : CV_32FC1, 0.0, 10.0);
cv::ocl::oclMat d_b, d_c;
cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), 0);
@ -102,7 +106,7 @@ OCL_TEST_P(Dft, R2CthenC2R)
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine(
testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)),
testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) ));
testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE), testing::Bool()));
////////////////////////////////////////////////////////////////////////////
// MulSpectrums

View File

@ -43,15 +43,25 @@
int cv::meanShift( InputArray _probImage, Rect& window, TermCriteria criteria )
{
Mat mat = _probImage.getMat();
Size size;
int cn;
Mat mat;
UMat umat;
bool isUMat = _probImage.isUMat();
if (isUMat)
umat = _probImage.getUMat(), cn = umat.channels(), size = umat.size();
else
mat = _probImage.getMat(), cn = mat.channels(), size = mat.size();
Rect cur_rect = window;
CV_Assert( mat.channels() == 1 );
CV_Assert( cn == 1 );
if( window.height <= 0 || window.width <= 0 )
CV_Error( Error::StsBadArg, "Input window has non-positive sizes" );
window = window & Rect(0, 0, mat.cols, mat.rows);
window = window & Rect(0, 0, size.width, size.height);
double eps = (criteria.type & TermCriteria::EPS) ? std::max(criteria.epsilon, 0.) : 1.;
eps = cvRound(eps*eps);
@ -59,16 +69,16 @@ int cv::meanShift( InputArray _probImage, Rect& window, TermCriteria criteria )
for( i = 0; i < niters; i++ )
{
cur_rect = cur_rect & Rect(0, 0, mat.cols, mat.rows);
cur_rect = cur_rect & Rect(0, 0, size.width, size.height);
if( cur_rect == Rect() )
{
cur_rect.x = mat.cols/2;
cur_rect.y = mat.rows/2;
cur_rect.x = size.width/2;
cur_rect.y = size.height/2;
}
cur_rect.width = std::max(cur_rect.width, 1);
cur_rect.height = std::max(cur_rect.height, 1);
Moments m = moments(mat(cur_rect));
Moments m = isUMat ? moments(umat(cur_rect)) : moments(mat(cur_rect));
// Calculating center of mass
if( fabs(m.m00) < DBL_EPSILON )
@ -77,8 +87,8 @@ int cv::meanShift( InputArray _probImage, Rect& window, TermCriteria criteria )
int dx = cvRound( m.m10/m.m00 - window.width*0.5 );
int dy = cvRound( m.m01/m.m00 - window.height*0.5 );
int nx = std::min(std::max(cur_rect.x + dx, 0), mat.cols - cur_rect.width);
int ny = std::min(std::max(cur_rect.y + dy, 0), mat.rows - cur_rect.height);
int nx = std::min(std::max(cur_rect.x + dx, 0), size.width - cur_rect.width);
int ny = std::min(std::max(cur_rect.y + dy, 0), size.height - cur_rect.height);
dx = nx - cur_rect.x;
dy = ny - cur_rect.y;
@ -99,9 +109,17 @@ cv::RotatedRect cv::CamShift( InputArray _probImage, Rect& window,
TermCriteria criteria )
{
const int TOLERANCE = 10;
Mat mat = _probImage.getMat();
Size size;
Mat mat;
UMat umat;
bool isUMat = _probImage.isUMat();
meanShift( mat, window, criteria );
if (isUMat)
umat = _probImage.getUMat(), size = umat.size();
else
mat = _probImage.getMat(), size = mat.size();
meanShift( _probImage, window, criteria );
window.x -= TOLERANCE;
if( window.x < 0 )
@ -112,15 +130,15 @@ cv::RotatedRect cv::CamShift( InputArray _probImage, Rect& window,
window.y = 0;
window.width += 2 * TOLERANCE;
if( window.x + window.width > mat.cols )
window.width = mat.cols - window.x;
if( window.x + window.width > size.width )
window.width = size.width - window.x;
window.height += 2 * TOLERANCE;
if( window.y + window.height > mat.rows )
window.height = mat.rows - window.y;
if( window.y + window.height > size.height )
window.height = size.height - window.y;
// Calculating moments in new center mass
Moments m = moments( mat(window) );
Moments m = isUMat ? moments(umat(window)) : moments(mat(window));
double m00 = m.m00, m10 = m.m10, m01 = m.m01;
double mu11 = m.mu11, mu20 = m.mu20, mu02 = m.mu02;
@ -164,19 +182,19 @@ cv::RotatedRect cv::CamShift( InputArray _probImage, Rect& window,
int t1 = cvRound( fabs( width * sn ));
t0 = MAX( t0, t1 ) + 2;
window.width = MIN( t0, (mat.cols - _xc) * 2 );
window.width = MIN( t0, (size.width - _xc) * 2 );
t0 = cvRound( fabs( length * sn ));
t1 = cvRound( fabs( width * cs ));
t0 = MAX( t0, t1 ) + 2;
window.height = MIN( t0, (mat.rows - _yc) * 2 );
window.height = MIN( t0, (size.height - _yc) * 2 );
window.x = MAX( 0, _xc - window.width / 2 );
window.y = MAX( 0, _yc - window.height / 2 );
window.width = MIN( mat.cols - window.x, window.width );
window.height = MIN( mat.rows - window.y, window.height );
window.width = MIN( size.width - window.x, window.width );
window.height = MIN( size.height - window.y, window.height );
RotatedRect box;
box.size.height = (float)length;

View File

@ -98,6 +98,8 @@ int main( int argc, const char** argv )
return -1;
}
cout << "old cascade: " << (cascade.isOldFormatCascade() ? "TRUE" : "FALSE") << endl;
if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') )
{
int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0';
@ -199,13 +201,12 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
t = (double)getTickCount();
cvtColor( img, gray, COLOR_BGR2GRAY );
resize( gray, smallImg, Size(), scale0, scale0, INTER_LINEAR );
cvtColor(smallImg, canvas, COLOR_GRAY2BGR);
equalizeHist( smallImg, smallImg );
resize( img, smallImg, Size(), scale0, scale0, INTER_LINEAR );
cvtColor( smallImg, gray, COLOR_BGR2GRAY );
equalizeHist( gray, gray );
cascade.detectMultiScale( smallImg, faces,
1.1, 2, 0
cascade.detectMultiScale( gray, faces,
1.1, 3, 0
//|CASCADE_FIND_BIGGEST_OBJECT
//|CASCADE_DO_ROUGH_SEARCH
|CASCADE_SCALE_IMAGE
@ -213,8 +214,8 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
Size(30, 30) );
if( tryflip )
{
flip(smallImg, smallImg, 1);
cascade.detectMultiScale( smallImg, faces2,
flip(gray, gray, 1);
cascade.detectMultiScale( gray, faces2,
1.1, 2, 0
//|CASCADE_FIND_BIGGEST_OBJECT
//|CASCADE_DO_ROUGH_SEARCH
@ -227,7 +228,7 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
}
}
t = (double)getTickCount() - t;
cvtColor(smallImg, canvas, COLOR_GRAY2BGR);
smallImg.copyTo(canvas);
double fps = getTickFrequency()/t;
@ -255,7 +256,7 @@ void detectAndDraw( UMat& img, Mat& canvas, CascadeClassifier& cascade,
color, 3, 8, 0);
if( nestedCascade.empty() )
continue;
UMat smallImgROI = smallImg(*r);
UMat smallImgROI = gray(*r);
nestedCascade.detectMultiScale( smallImgROI, nestedObjects,
1.1, 2, 0
//|CASCADE_FIND_BIGGEST_OBJECT