2012-07-17 01:08:14 +08:00
|
|
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
|
|
//
|
|
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
|
|
// If you do not agree to this license, do not download, install,
|
|
|
|
// copy or use the software.
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// License Agreement
|
|
|
|
// For Open Source Computer Vision Library
|
|
|
|
//
|
|
|
|
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
|
|
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
2013-02-08 11:41:46 +08:00
|
|
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
2012-07-17 01:08:14 +08:00
|
|
|
// Third party copyrights are property of their respective owners.
|
|
|
|
//
|
|
|
|
// @Authors
|
|
|
|
// Niko Li, newlife20080214@gmail.com
|
2013-02-08 11:41:46 +08:00
|
|
|
// Yao Wang, bitwangyaoyao@gmail.com
|
2012-07-17 01:08:14 +08:00
|
|
|
//
|
|
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
|
|
// are permitted provided that the following conditions are met:
|
|
|
|
//
|
|
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer.
|
|
|
|
//
|
|
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
|
|
// and/or other oclMaterials provided with the distribution.
|
|
|
|
//
|
|
|
|
// * The name of the copyright holders may not be used to endorse or promote products
|
|
|
|
// derived from this software without specific prior written permission.
|
|
|
|
//
|
|
|
|
// This software is provided by the copyright holders and contributors "as is" and
|
|
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
|
|
//
|
|
|
|
//M*/
|
|
|
|
|
|
|
|
#include "precomp.hpp"
|
|
|
|
|
2012-10-11 16:22:47 +08:00
|
|
|
#define ALIGN 32
|
2012-07-17 01:08:14 +08:00
|
|
|
#define GPU_MATRIX_MALLOC_STEP(step) (((step) + ALIGN - 1) / ALIGN) * ALIGN
|
|
|
|
|
|
|
|
using namespace cv;
|
|
|
|
using namespace cv::ocl;
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
|
|
//////////////////////////////// oclMat ////////////////////////////////
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
//helper routines
|
|
|
|
namespace cv
|
|
|
|
{
|
|
|
|
namespace ocl
|
|
|
|
{
|
|
|
|
///////////////////////////OpenCL kernel strings///////////////////////////
|
|
|
|
extern const char *operator_copyToM;
|
|
|
|
extern const char *operator_convertTo;
|
|
|
|
extern const char *operator_setTo;
|
|
|
|
extern const char *operator_setToM;
|
2012-10-11 16:22:47 +08:00
|
|
|
extern const char *convertC3C4;
|
2013-02-27 17:32:32 +08:00
|
|
|
extern DevMemType gDeviceMemType;
|
|
|
|
extern DevMemRW gDeviceMemRW;
|
2012-07-17 01:08:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-21 14:05:29 +08:00
|
|
|
|
2012-07-17 01:08:14 +08:00
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
|
|
// convert_C3C4
|
2013-01-25 18:31:34 +08:00
|
|
|
static void convert_C3C4(const cl_mem &src, oclMat &dst)
|
2012-07-17 01:08:14 +08:00
|
|
|
{
|
2012-10-11 16:22:47 +08:00
|
|
|
int dstStep_in_pixel = dst.step1() / dst.oclchannels();
|
|
|
|
int pixel_end = dst.wholecols * dst.wholerows - 1;
|
2012-07-17 01:08:14 +08:00
|
|
|
Context *clCxt = dst.clCxt;
|
|
|
|
string kernelName = "convertC3C4";
|
2012-10-11 16:22:47 +08:00
|
|
|
char compile_option[32];
|
2012-08-03 14:08:36 +08:00
|
|
|
switch(dst.depth())
|
|
|
|
{
|
|
|
|
case 0:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=uchar4");
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=char4");
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=ushort4");
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=short4");
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=int4");
|
|
|
|
break;
|
|
|
|
case 5:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=float4");
|
|
|
|
break;
|
|
|
|
case 6:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=double4");
|
|
|
|
break;
|
2012-10-11 16:22:47 +08:00
|
|
|
default:
|
|
|
|
CV_Error(CV_StsUnsupportedFormat, "unknown depth");
|
2012-08-03 14:08:36 +08:00
|
|
|
}
|
2012-07-17 01:08:14 +08:00
|
|
|
vector< pair<size_t, const void *> > args;
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&src));
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.wholecols));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.wholerows));
|
2012-08-03 14:08:36 +08:00
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep_in_pixel));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end));
|
2012-07-17 01:08:14 +08:00
|
|
|
|
2012-10-11 16:22:47 +08:00
|
|
|
size_t globalThreads[3] = {((dst.wholecols * dst.wholerows + 3) / 4 + 255) / 256 * 256, 1, 1};
|
2012-07-17 01:08:14 +08:00
|
|
|
size_t localThreads[3] = {256, 1, 1};
|
|
|
|
|
2012-10-11 16:22:47 +08:00
|
|
|
openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
|
2012-07-17 01:08:14 +08:00
|
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
|
|
// convert_C4C3
|
2013-01-25 18:31:34 +08:00
|
|
|
static void convert_C4C3(const oclMat &src, cl_mem &dst)
|
2012-07-17 01:08:14 +08:00
|
|
|
{
|
2012-10-11 16:22:47 +08:00
|
|
|
int srcStep_in_pixel = src.step1() / src.oclchannels();
|
|
|
|
int pixel_end = src.wholecols * src.wholerows - 1;
|
2012-07-17 01:08:14 +08:00
|
|
|
Context *clCxt = src.clCxt;
|
|
|
|
string kernelName = "convertC4C3";
|
2012-10-11 16:22:47 +08:00
|
|
|
char compile_option[32];
|
2012-08-03 14:08:36 +08:00
|
|
|
switch(src.depth())
|
|
|
|
{
|
|
|
|
case 0:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=uchar4");
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=char4");
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=ushort4");
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=short4");
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=int4");
|
|
|
|
break;
|
|
|
|
case 5:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=float4");
|
|
|
|
break;
|
|
|
|
case 6:
|
|
|
|
sprintf(compile_option, "-D GENTYPE4=double4");
|
|
|
|
break;
|
2012-10-11 16:22:47 +08:00
|
|
|
default:
|
|
|
|
CV_Error(CV_StsUnsupportedFormat, "unknown depth");
|
2012-08-03 14:08:36 +08:00
|
|
|
}
|
2012-07-17 01:08:14 +08:00
|
|
|
|
|
|
|
vector< pair<size_t, const void *> > args;
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows));
|
2012-08-03 14:08:36 +08:00
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end));
|
2012-07-17 01:08:14 +08:00
|
|
|
|
2012-10-11 16:22:47 +08:00
|
|
|
size_t globalThreads[3] = {((src.wholecols * src.wholerows + 3) / 4 + 255) / 256 * 256, 1, 1};
|
2012-07-17 01:08:14 +08:00
|
|
|
size_t localThreads[3] = {256, 1, 1};
|
|
|
|
|
2012-10-11 16:22:47 +08:00
|
|
|
openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
|
2012-07-17 01:08:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void cv::ocl::oclMat::upload(const Mat &m)
|
|
|
|
{
|
|
|
|
CV_DbgAssert(!m.empty());
|
|
|
|
Size wholeSize;
|
|
|
|
Point ofs;
|
|
|
|
m.locateROI(wholeSize, ofs);
|
2012-10-11 16:22:47 +08:00
|
|
|
// int type = m.type();
|
|
|
|
// if(m.oclchannels() == 3)
|
|
|
|
//{
|
|
|
|
// type = CV_MAKETYPE(m.depth(), 4);
|
|
|
|
//}
|
|
|
|
create(wholeSize, m.type());
|
2012-07-17 01:08:14 +08:00
|
|
|
|
2012-08-03 14:08:36 +08:00
|
|
|
if(m.channels() == 3)
|
|
|
|
{
|
2012-10-11 16:22:47 +08:00
|
|
|
int pitch = wholeSize.width * 3 * m.elemSize1();
|
|
|
|
int tail_padding = m.elemSize1() * 3072;
|
|
|
|
int err;
|
2013-03-18 05:59:24 +08:00
|
|
|
cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
|
2012-10-11 16:22:47 +08:00
|
|
|
(pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
|
|
|
|
openCLVerifyCall(err);
|
|
|
|
|
|
|
|
openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3);
|
2012-10-22 15:14:22 +08:00
|
|
|
convert_C3C4(temp, *this);
|
2012-10-11 16:22:47 +08:00
|
|
|
//int* cputemp=new int[wholeSize.height*wholeSize.width * 3];
|
|
|
|
//int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
|
|
|
|
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
|
|
|
|
// 0, wholeSize.height*wholeSize.width * 3* sizeof(int), cputemp, 0, NULL, NULL));
|
|
|
|
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE,
|
|
|
|
// 0, this->step*this->wholerows, cpudata, 0, NULL, NULL));
|
|
|
|
//for(int i=0;i<wholeSize.height;i++)
|
|
|
|
//{
|
|
|
|
// int *a = cputemp+i*wholeSize.width * 3,*b = cpudata + i*this->step/sizeof(int);
|
|
|
|
// for(int j=0;j<wholeSize.width;j++)
|
|
|
|
// {
|
|
|
|
// if((a[3*j] != b[4*j])||(a[3*j+1] != b[4*j+1])||(a[3*j+2] != b[4*j+2]))
|
|
|
|
// printf("rows=%d,cols=%d,cputtemp=%d,%d,%d;cpudata=%d,%d,%d\n",
|
|
|
|
// i,j,a[3*j],a[3*j+1],a[3*j+2],b[4*j],b[4*j+1],b[4*j+2]);
|
|
|
|
// }
|
|
|
|
//}
|
|
|
|
//delete []cputemp;
|
|
|
|
//delete []cpudata;
|
|
|
|
openCLSafeCall(clReleaseMemObject(temp));
|
2012-08-03 14:08:36 +08:00
|
|
|
}
|
|
|
|
else
|
2012-10-11 16:22:47 +08:00
|
|
|
{
|
|
|
|
openCLMemcpy2D(clCxt, data, step, m.datastart, m.step, wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice);
|
|
|
|
}
|
2012-07-17 01:08:14 +08:00
|
|
|
|
|
|
|
rows = m.rows;
|
|
|
|
cols = m.cols;
|
|
|
|
offset = ofs.y * step + ofs.x * elemSize();
|
2012-10-11 16:22:47 +08:00
|
|
|
//download_channels = m.channels();
|
2012-07-17 01:08:14 +08:00
|
|
|
}
|
|
|
|
|
2013-06-21 14:05:29 +08:00
|
|
|
cv::ocl::oclMat::operator cv::_InputArray()
|
|
|
|
{
|
|
|
|
_InputArray newInputArray;
|
|
|
|
newInputArray.flags = cv::_InputArray::OCL_MAT;
|
|
|
|
newInputArray.obj = reinterpret_cast<void *>(this);
|
|
|
|
return newInputArray;
|
|
|
|
}
|
|
|
|
|
|
|
|
cv::ocl::oclMat::operator cv::_OutputArray()
|
|
|
|
{
|
|
|
|
_OutputArray newOutputArray;
|
|
|
|
newOutputArray.flags = cv::_InputArray::OCL_MAT;
|
|
|
|
newOutputArray.obj = reinterpret_cast<void *>(this);
|
|
|
|
return newOutputArray;
|
|
|
|
}
|
|
|
|
|
2013-06-21 14:51:23 +08:00
|
|
|
cv::ocl::oclMat& cv::ocl::getOclMatRef(InputArray src)
|
2013-06-21 14:05:29 +08:00
|
|
|
{
|
|
|
|
CV_Assert(src.flags & cv::_InputArray::OCL_MAT);
|
|
|
|
return *reinterpret_cast<oclMat*>(src.obj);
|
|
|
|
}
|
|
|
|
|
2013-06-21 14:51:23 +08:00
|
|
|
cv::ocl::oclMat& cv::ocl::getOclMatRef(OutputArray src)
|
2013-06-21 14:05:29 +08:00
|
|
|
{
|
|
|
|
CV_Assert(src.flags & cv::_InputArray::OCL_MAT);
|
|
|
|
return *reinterpret_cast<oclMat*>(src.obj);
|
|
|
|
}
|
|
|
|
|
2012-07-17 01:08:14 +08:00
|
|
|
void cv::ocl::oclMat::download(cv::Mat &m) const
|
|
|
|
{
|
|
|
|
CV_DbgAssert(!this->empty());
|
2012-10-11 16:22:47 +08:00
|
|
|
// int t = type();
|
|
|
|
// if(download_channels == 3)
|
|
|
|
//{
|
|
|
|
// t = CV_MAKETYPE(depth(), 3);
|
|
|
|
//}
|
|
|
|
m.create(wholerows, wholecols, type());
|
|
|
|
|
|
|
|
if(m.channels() == 3)
|
2012-08-03 14:08:36 +08:00
|
|
|
{
|
2012-10-11 16:22:47 +08:00
|
|
|
int pitch = wholecols * 3 * m.elemSize1();
|
|
|
|
int tail_padding = m.elemSize1() * 3072;
|
|
|
|
int err;
|
2013-03-18 05:59:24 +08:00
|
|
|
cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
|
2012-10-11 16:22:47 +08:00
|
|
|
(pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
|
|
|
|
openCLVerifyCall(err);
|
|
|
|
|
2012-10-22 15:14:22 +08:00
|
|
|
convert_C4C3(*this, temp);
|
2012-10-11 16:22:47 +08:00
|
|
|
openCLMemcpy2D(clCxt, m.data, m.step, temp, pitch, wholecols * m.elemSize(), wholerows, clMemcpyDeviceToHost, 3);
|
|
|
|
//int* cputemp=new int[wholecols*wholerows * 3];
|
|
|
|
//int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
|
|
|
|
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
|
|
|
|
// 0, wholecols*wholerows * 3* sizeof(int), cputemp, 0, NULL, NULL));
|
|
|
|
//openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE,
|
|
|
|
// 0, this->step*this->wholerows, cpudata, 0, NULL, NULL));
|
|
|
|
//for(int i=0;i<wholerows;i++)
|
|
|
|
//{
|
|
|
|
// int *a = cputemp+i*wholecols * 3,*b = cpudata + i*this->step/sizeof(int);
|
|
|
|
// for(int j=0;j<wholecols;j++)
|
|
|
|
// {
|
|
|
|
// if((a[3*j] != b[4*j])||(a[3*j+1] != b[4*j+1])||(a[3*j+2] != b[4*j+2]))
|
|
|
|
// printf("rows=%d,cols=%d,cputtemp=%d,%d,%d;cpudata=%d,%d,%d\n",
|
|
|
|
// i,j,a[3*j],a[3*j+1],a[3*j+2],b[4*j],b[4*j+1],b[4*j+2]);
|
|
|
|
// }
|
|
|
|
//}
|
|
|
|
//delete []cputemp;
|
|
|
|
//delete []cpudata;
|
|
|
|
openCLSafeCall(clReleaseMemObject(temp));
|
2012-08-03 14:08:36 +08:00
|
|
|
}
|
|
|
|
else
|
2012-10-11 16:22:47 +08:00
|
|
|
{
|
|
|
|
openCLMemcpy2D(clCxt, m.data, m.step, data, step, wholecols * elemSize(), wholerows, clMemcpyDeviceToHost);
|
|
|
|
}
|
2012-07-17 01:08:14 +08:00
|
|
|
Size wholesize;
|
|
|
|
Point ofs;
|
|
|
|
locateROI(wholesize, ofs);
|
|
|
|
m.adjustROI(-ofs.y, ofs.y + rows - wholerows, -ofs.x, ofs.x + cols - wholecols);
|
|
|
|
}
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
////////////////////////////////// CopyTo /////////////////////////////////
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
2013-01-25 18:31:34 +08:00
|
|
|
static void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, string kernelName)
|
2012-07-17 01:08:14 +08:00
|
|
|
{
|
|
|
|
CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols &&
|
2012-09-12 10:20:04 +08:00
|
|
|
src.rows == dst.rows && src.cols == dst.cols
|
2012-10-11 16:22:47 +08:00
|
|
|
&& mask.type() == CV_8UC1);
|
2012-07-17 01:08:14 +08:00
|
|
|
|
|
|
|
vector<pair<size_t , const void *> > args;
|
|
|
|
|
2012-09-12 10:20:04 +08:00
|
|
|
std::string string_types[4][7] = {{"uchar", "char", "ushort", "short", "int", "float", "double"},
|
|
|
|
{"uchar2", "char2", "ushort2", "short2", "int2", "float2", "double2"},
|
|
|
|
{"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"},
|
|
|
|
{"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"}
|
2012-07-17 01:08:14 +08:00
|
|
|
};
|
2012-10-11 16:22:47 +08:00
|
|
|
char compile_option[32];
|
|
|
|
sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str());
|
2012-07-17 01:08:14 +08:00
|
|
|
size_t localThreads[3] = {16, 16, 1};
|
2013-09-16 19:11:56 +08:00
|
|
|
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
|
2012-07-17 01:08:14 +08:00
|
|
|
|
|
|
|
int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
|
|
|
|
int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();
|
|
|
|
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));
|
|
|
|
|
|
|
|
openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads,
|
2012-10-11 16:22:47 +08:00
|
|
|
localThreads, args, -1, -1, compile_option);
|
2012-07-17 01:08:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void cv::ocl::oclMat::copyTo( oclMat &mat, const oclMat &mask) const
|
|
|
|
{
|
|
|
|
if (mask.empty())
|
|
|
|
{
|
2013-09-25 20:18:04 +08:00
|
|
|
CV_DbgAssert(!this->empty());
|
|
|
|
mat.create(size(), type());
|
|
|
|
openCLCopyBuffer2D(clCxt, mat.data, mat.step, mat.offset,
|
|
|
|
data, step, cols * elemSize(), rows, offset);
|
2012-07-17 01:08:14 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mat.create(size(), type());
|
|
|
|
copy_to_with_mask(*this, mat, mask, "copy_to_with_mask");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
//////////////////////////////// ConvertTo ////////////////////////////////
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
2013-01-25 18:31:34 +08:00
|
|
|
static void convert_run(const oclMat &src, oclMat &dst, double alpha, double beta)
|
2012-07-17 01:08:14 +08:00
|
|
|
{
|
2013-09-11 17:35:39 +08:00
|
|
|
string kernelName = "convert_to";
|
2012-07-17 01:08:14 +08:00
|
|
|
float alpha_f = alpha, beta_f = beta;
|
2013-09-11 17:35:39 +08:00
|
|
|
int sdepth = src.depth(), ddepth = dst.depth();
|
|
|
|
int sstep1 = (int)src.step1(), dstep1 = (int)dst.step1();
|
|
|
|
int cols1 = src.cols * src.oclchannels();
|
|
|
|
|
|
|
|
char buildOptions[150], convertString[50];
|
|
|
|
const char * typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
|
|
|
sprintf(convertString, "convert_%s_sat_rte", typeMap[ddepth]);
|
|
|
|
sprintf(buildOptions, "-D srcT=%s -D dstT=%s -D convertToDstType=%s", typeMap[sdepth],
|
|
|
|
typeMap[ddepth], CV_32F == ddepth || ddepth == CV_64F ? "" : convertString);
|
|
|
|
|
2012-07-17 01:08:14 +08:00
|
|
|
CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols);
|
|
|
|
vector<pair<size_t , const void *> > args;
|
2013-09-11 17:35:39 +08:00
|
|
|
|
|
|
|
size_t localThreads[3] = { 16, 16, 1 };
|
|
|
|
size_t globalThreads[3] = { divUp(cols1, localThreads[0]) * localThreads[0],
|
|
|
|
divUp(dst.rows, localThreads[1]) * localThreads[1], 1 };
|
|
|
|
|
|
|
|
int doffset1 = dst.offset / dst.elemSize1();
|
|
|
|
int soffset1 = src.offset / src.elemSize1();
|
|
|
|
|
2012-07-17 01:08:14 +08:00
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
|
2013-09-11 17:35:39 +08:00
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&cols1 ));
|
2012-07-17 01:08:14 +08:00
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows ));
|
2013-09-11 17:35:39 +08:00
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&sstep1 ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&soffset1 ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dstep1 ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&doffset1 ));
|
2012-07-17 01:08:14 +08:00
|
|
|
args.push_back( make_pair( sizeof(cl_float) , (void *)&alpha_f ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_float) , (void *)&beta_f ));
|
2013-09-11 17:35:39 +08:00
|
|
|
|
2012-07-17 01:08:14 +08:00
|
|
|
openCLExecuteKernel(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
|
2013-09-11 17:35:39 +08:00
|
|
|
localThreads, args, -1, -1, buildOptions);
|
2012-07-17 01:08:14 +08:00
|
|
|
}
|
|
|
|
void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double beta ) const
|
|
|
|
{
|
2013-09-11 17:35:39 +08:00
|
|
|
if (!clCxt->supportsFeature(Context::CL_DOUBLE) &&
|
|
|
|
(depth() == CV_64F || dst.depth() == CV_64F))
|
|
|
|
{
|
|
|
|
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
|
|
|
return;
|
|
|
|
}
|
2012-07-17 01:08:14 +08:00
|
|
|
|
|
|
|
bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon()
|
|
|
|
&& fabs(beta) < std::numeric_limits<double>::epsilon();
|
|
|
|
|
|
|
|
if( rtype < 0 )
|
|
|
|
rtype = type();
|
|
|
|
else
|
2013-06-20 11:26:22 +08:00
|
|
|
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
|
2012-07-17 01:08:14 +08:00
|
|
|
|
|
|
|
int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype);
|
|
|
|
if( sdepth == ddepth && noScale )
|
|
|
|
{
|
|
|
|
copyTo(dst);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
oclMat temp;
|
|
|
|
const oclMat *psrc = this;
|
|
|
|
if( sdepth != ddepth && psrc == &dst )
|
|
|
|
psrc = &(temp = *this);
|
|
|
|
|
|
|
|
dst.create( size(), rtype );
|
|
|
|
convert_run(*psrc, dst, alpha, beta);
|
|
|
|
}
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
//////////////////////////////// setTo ////////////////////////////////////
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
2013-09-20 18:22:18 +08:00
|
|
|
|
2012-07-17 01:08:14 +08:00
|
|
|
oclMat &cv::ocl::oclMat::operator = (const Scalar &s)
|
|
|
|
{
|
|
|
|
setTo(s);
|
|
|
|
return *this;
|
|
|
|
}
|
2013-09-20 18:22:18 +08:00
|
|
|
|
2013-01-25 18:31:34 +08:00
|
|
|
static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kernelName)
|
2012-07-17 01:08:14 +08:00
|
|
|
{
|
|
|
|
vector<pair<size_t , const void *> > args;
|
2012-08-03 14:08:36 +08:00
|
|
|
|
2012-07-17 01:08:14 +08:00
|
|
|
size_t localThreads[3] = {16, 16, 1};
|
2013-09-20 18:22:18 +08:00
|
|
|
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
|
2012-07-17 01:08:14 +08:00
|
|
|
int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
|
2013-09-20 18:22:18 +08:00
|
|
|
|
|
|
|
if (dst.type() == CV_8UC1)
|
2012-07-17 01:08:14 +08:00
|
|
|
globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
2013-09-20 18:22:18 +08:00
|
|
|
|
|
|
|
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
|
|
|
const char channelMap[] = { ' ', ' ', '2', '4', '4' };
|
|
|
|
std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]);
|
|
|
|
|
|
|
|
Mat mat(1, 1, dst.type(), scalar);
|
|
|
|
|
2013-01-25 18:31:34 +08:00
|
|
|
#ifdef CL_VERSION_1_2
|
2013-09-20 18:22:18 +08:00
|
|
|
// this enables backwards portability to
|
|
|
|
// run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
|
|
|
|
if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) &&
|
2013-05-03 09:45:56 +08:00
|
|
|
dst.offset == 0 && dst.cols == dst.wholecols)
|
2012-10-11 16:22:47 +08:00
|
|
|
{
|
2013-09-20 18:22:18 +08:00
|
|
|
const int sizeofMap[][7] =
|
|
|
|
{
|
|
|
|
{ sizeof(cl_uchar) , sizeof(cl_char) , sizeof(cl_ushort) , sizeof(cl_short) , sizeof(cl_int) , sizeof(cl_float) , sizeof(cl_double) },
|
|
|
|
{ sizeof(cl_uchar2), sizeof(cl_char2), sizeof(cl_ushort2), sizeof(cl_short2), sizeof(cl_int2), sizeof(cl_float2), sizeof(cl_double2) },
|
|
|
|
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 },
|
|
|
|
{ sizeof(cl_uchar4), sizeof(cl_char4), sizeof(cl_ushort4), sizeof(cl_short4), sizeof(cl_int4), sizeof(cl_float4), sizeof(cl_double4) },
|
|
|
|
};
|
|
|
|
int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()];
|
|
|
|
|
2013-08-21 20:44:09 +08:00
|
|
|
clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(),
|
2013-09-20 18:22:18 +08:00
|
|
|
(cl_mem)dst.data, (void*)mat.data, sizeofGeneric,
|
|
|
|
0, dst.step * dst.rows, 0, NULL, NULL);
|
2012-10-11 16:22:47 +08:00
|
|
|
}
|
|
|
|
else
|
2013-05-03 09:45:56 +08:00
|
|
|
#endif
|
2012-10-11 16:22:47 +08:00
|
|
|
{
|
2013-09-20 18:22:18 +08:00
|
|
|
oclMat m(mat);
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void*)&m.data ));
|
2012-10-11 16:22:47 +08:00
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
|
2013-09-20 18:22:18 +08:00
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));
|
|
|
|
|
2012-10-11 16:22:47 +08:00
|
|
|
openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
|
2013-09-20 18:22:18 +08:00
|
|
|
localThreads, args, -1, -1, buildOptions.c_str());
|
2012-10-11 16:22:47 +08:00
|
|
|
}
|
2012-07-17 01:08:14 +08:00
|
|
|
}
|
|
|
|
|
2013-01-25 18:31:34 +08:00
|
|
|
static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, string kernelName)
|
2012-07-17 01:08:14 +08:00
|
|
|
{
|
|
|
|
CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols);
|
|
|
|
vector<pair<size_t , const void *> > args;
|
2013-09-20 18:22:18 +08:00
|
|
|
size_t localThreads[3] = { 16, 16, 1 };
|
|
|
|
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
|
2012-07-17 01:08:14 +08:00
|
|
|
int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
|
2013-09-20 18:22:18 +08:00
|
|
|
|
|
|
|
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
|
|
|
const char channelMap[] = { ' ', ' ', '2', '4', '4' };
|
|
|
|
std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]);
|
|
|
|
|
|
|
|
oclMat m(Mat(1, 1, dst.type(), scalar));
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&m.data ));
|
2012-07-17 01:08:14 +08:00
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_mem) , (void *)&mask.data ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
|
|
|
|
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));
|
|
|
|
openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads,
|
2013-09-20 18:22:18 +08:00
|
|
|
localThreads, args, -1, -1, buildOptions.c_str());
|
2012-07-17 01:08:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask)
|
|
|
|
{
|
|
|
|
CV_Assert(mask.type() == CV_8UC1);
|
|
|
|
CV_Assert( this->depth() >= 0 && this->depth() <= 6 );
|
|
|
|
CV_DbgAssert( !this->empty());
|
|
|
|
if (mask.empty())
|
|
|
|
{
|
2013-09-20 18:22:18 +08:00
|
|
|
set_to_withoutmask_run(*this, scalar, type() == CV_8UC1 ?
|
|
|
|
"set_to_without_mask_C1_D0" : "set_to_without_mask");
|
2012-07-17 01:08:14 +08:00
|
|
|
}
|
|
|
|
else
|
2012-10-11 16:22:47 +08:00
|
|
|
set_to_withmask_run(*this, scalar, mask, "set_to_with_mask");
|
2012-07-17 01:08:14 +08:00
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const
|
|
|
|
{
|
2012-10-11 16:22:47 +08:00
|
|
|
if( new_rows != 0 && new_rows != rows)
|
2013-09-20 18:22:18 +08:00
|
|
|
CV_Error( CV_StsBadFunc, "oclMat's number of rows can not be changed for current version" );
|
2012-10-11 16:22:47 +08:00
|
|
|
|
|
|
|
oclMat hdr = *this;
|
|
|
|
|
|
|
|
int cn = oclchannels();
|
|
|
|
if (new_cn == 0)
|
|
|
|
new_cn = cn;
|
|
|
|
|
|
|
|
int total_width = cols * cn;
|
|
|
|
if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0)
|
|
|
|
new_rows = rows * total_width / new_cn;
|
|
|
|
|
|
|
|
if (new_rows != 0 && new_rows != rows)
|
|
|
|
{
|
|
|
|
int total_size = total_width * rows;
|
|
|
|
|
|
|
|
if (!isContinuous())
|
|
|
|
CV_Error(CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed");
|
|
|
|
|
|
|
|
if ((unsigned)new_rows > (unsigned)total_size)
|
|
|
|
CV_Error(CV_StsOutOfRange, "Bad new number of rows");
|
|
|
|
|
|
|
|
total_width = total_size / new_rows;
|
|
|
|
if (total_width * new_rows != total_size)
|
|
|
|
CV_Error(CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows");
|
|
|
|
|
|
|
|
hdr.rows = new_rows;
|
|
|
|
hdr.step = total_width * elemSize1();
|
|
|
|
}
|
|
|
|
|
|
|
|
int new_width = total_width / new_cn;
|
|
|
|
if (new_width * new_cn != total_width)
|
|
|
|
CV_Error(CV_BadNumChannels, "The total width is not divisible by the new number of channels");
|
|
|
|
|
|
|
|
hdr.cols = new_width;
|
|
|
|
hdr.wholecols = new_width;
|
|
|
|
hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);
|
2012-07-17 01:08:14 +08:00
|
|
|
return hdr;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2013-02-27 17:32:32 +08:00
|
|
|
void cv::ocl::oclMat::createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type)
|
|
|
|
{
|
|
|
|
createEx(size.height, size.width, type, rw_type, mem_type);
|
|
|
|
}
|
|
|
|
|
2012-07-17 01:08:14 +08:00
|
|
|
void cv::ocl::oclMat::create(int _rows, int _cols, int _type)
|
2013-02-27 17:32:32 +08:00
|
|
|
{
|
|
|
|
createEx(_rows, _cols, _type, gDeviceMemRW, gDeviceMemType);
|
|
|
|
}
|
|
|
|
|
|
|
|
void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type, DevMemRW rw_type, DevMemType mem_type)
|
2012-07-17 01:08:14 +08:00
|
|
|
{
|
|
|
|
clCxt = Context::getContext();
|
|
|
|
/* core logic */
|
|
|
|
_type &= TYPE_MASK;
|
|
|
|
if( rows == _rows && cols == _cols && type() == _type && data )
|
|
|
|
return;
|
|
|
|
if( data )
|
|
|
|
release();
|
|
|
|
CV_DbgAssert( _rows >= 0 && _cols >= 0 );
|
|
|
|
if( _rows > 0 && _cols > 0 )
|
|
|
|
{
|
|
|
|
flags = Mat::MAGIC_VAL + _type;
|
|
|
|
rows = _rows;
|
|
|
|
cols = _cols;
|
|
|
|
wholerows = _rows;
|
|
|
|
wholecols = _cols;
|
|
|
|
size_t esz = elemSize();
|
|
|
|
|
|
|
|
void *dev_ptr;
|
2013-02-27 17:32:32 +08:00
|
|
|
openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), rows, rw_type, mem_type);
|
2012-07-17 01:08:14 +08:00
|
|
|
|
2012-10-11 16:22:47 +08:00
|
|
|
if (esz * cols == step)
|
2012-07-17 01:08:14 +08:00
|
|
|
flags |= Mat::CONTINUOUS_FLAG;
|
|
|
|
|
|
|
|
int64 _nettosize = (int64)step * rows;
|
|
|
|
size_t nettosize = (size_t)_nettosize;
|
|
|
|
|
|
|
|
datastart = data = (uchar *)dev_ptr;
|
|
|
|
dataend = data + nettosize;
|
|
|
|
|
|
|
|
refcount = (int *)fastMalloc(sizeof(*refcount));
|
|
|
|
*refcount = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void cv::ocl::oclMat::release()
|
|
|
|
{
|
|
|
|
if( refcount && CV_XADD(refcount, -1) == 1 )
|
|
|
|
{
|
|
|
|
fastFree(refcount);
|
|
|
|
openCLFree(datastart);
|
|
|
|
}
|
|
|
|
data = datastart = dataend = 0;
|
|
|
|
step = rows = cols = 0;
|
|
|
|
offset = wholerows = wholecols = 0;
|
|
|
|
refcount = 0;
|
|
|
|
}
|
|
|
|
|
2013-02-08 11:41:46 +08:00
|
|
|
oclMat& cv::ocl::oclMat::operator+=( const oclMat& m )
|
|
|
|
{
|
|
|
|
add(*this, m, *this);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
oclMat& cv::ocl::oclMat::operator-=( const oclMat& m )
|
|
|
|
{
|
|
|
|
subtract(*this, m, *this);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
oclMat& cv::ocl::oclMat::operator*=( const oclMat& m )
|
|
|
|
{
|
|
|
|
multiply(*this, m, *this);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
oclMat& cv::ocl::oclMat::operator/=( const oclMat& m )
|
|
|
|
{
|
|
|
|
divide(*this, m, *this);
|
|
|
|
return *this;
|
|
|
|
}
|