Merge pull request #2028 from KonstantinMatskevich:ocl_tapi_morphology

This commit is contained in:
Andrey Pavlenko 2013-12-26 19:20:12 +04:00 committed by OpenCV Buildbot
commit a981295524
3 changed files with 450 additions and 28 deletions

View File

@ -43,6 +43,7 @@
#include "precomp.hpp"
#include <limits.h>
#include <stdio.h>
#include "opencl_kernels.hpp"
/****************************************************************************************\
Basic Morphological Operations: Erosion & Dilation
@ -1283,11 +1284,132 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
}
#endif
static const char* op2str[] = {"ERODE", "DILATE"};
static bool ocl_morphology_op(InputArray _src, OutputArray _dst, InputArray _kernel, Size &ksize, const Point anchor, int iterations, int op)
{
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (_src.depth() == CV_64F && !doubleSupport)
return false;
UMat kernel8U;
_kernel.getUMat().convertTo(kernel8U, CV_8U);
UMat kernel = kernel8U.reshape(1, 1);
bool rectKernel = true;
for(int i = 0; i < kernel.rows * kernel.cols; ++i)
if(kernel.getMat(ACCESS_READ).at<uchar>(i) != 1)
rectKernel = false;
UMat src = _src.getUMat();
#ifdef ANDROID
size_t localThreads[3] = {16, 8, 1};
#else
size_t localThreads[3] = {16, 16, 1};
#endif
size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], (src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1};
if(localThreads[0]*localThreads[1] * 2 < (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1))
return false;
char compile_option[128];
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D %s %s %s -D GENTYPE=%s -D DEPTH_%d",
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], op2str[op], doubleSupport?"-D DOUBLE_SUPPORT" :"", rectKernel?"-D RECTKERNEL":"",
ocl::typeToStr(_src.type()), _src.depth() );
std::vector<ocl::Kernel> kernels;
for(int i = 0; i<iterations; i++)
{
ocl::Kernel k( "morph", ocl::imgproc::morph_oclsrc, compile_option);
if (k.empty())
return false;
kernels.push_back(k);
}
_dst.create(src.size(), src.type());
UMat dst = _dst.getUMat();
if( iterations== 1 && src.u != dst.u)
{
Size wholesize;
Point ofs;
src.locateROI(wholesize, ofs);
int wholecols = wholesize.width, wholerows = wholesize.height;
int idxArg = 0;
idxArg = kernels[0].set(idxArg, ocl::KernelArg::ReadOnlyNoSize(src));
idxArg = kernels[0].set(idxArg, ocl::KernelArg::WriteOnlyNoSize(dst));
idxArg = kernels[0].set(idxArg, ofs.x);
idxArg = kernels[0].set(idxArg, ofs.y);
idxArg = kernels[0].set(idxArg, src.cols);
idxArg = kernels[0].set(idxArg, src.rows);
idxArg = kernels[0].set(idxArg, ocl::KernelArg::PtrReadOnly(kernel));
idxArg = kernels[0].set(idxArg, wholecols);
idxArg = kernels[0].set(idxArg, wholerows);
return kernels[0].run(2, globalThreads, localThreads, false);
}
for(int i = 0; i< iterations; i++)
{
UMat source;
Size wholesize;
Point ofs;
if( i == 0)
{
int cols = src.cols, rows = src.rows;
src.locateROI(wholesize,ofs);
src.adjustROI(ofs.y, wholesize.height - rows - ofs.y, ofs.x, wholesize.width - cols - ofs.x);
src.copyTo(source);
src.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
}
else
{
int cols = dst.cols, rows = dst.rows;
dst.locateROI(wholesize,ofs);
dst.adjustROI(ofs.y, wholesize.height - rows - ofs.y, ofs.x, wholesize.width - cols - ofs.x);
dst.copyTo(source);
dst.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
}
source.locateROI(wholesize, ofs);
int wholecols = wholesize.width, wholerows = wholesize.height;
int idxArg = 0;
idxArg = kernels[i].set(idxArg, ocl::KernelArg::ReadOnlyNoSize(source));
idxArg = kernels[i].set(idxArg, ocl::KernelArg::WriteOnlyNoSize(dst));
idxArg = kernels[i].set(idxArg, ofs.x);
idxArg = kernels[i].set(idxArg, ofs.y);
idxArg = kernels[i].set(idxArg, source.cols);
idxArg = kernels[i].set(idxArg, source.rows);
idxArg = kernels[i].set(idxArg, ocl::KernelArg::PtrReadOnly(kernel));
idxArg = kernels[i].set(idxArg, wholecols);
idxArg = kernels[i].set(idxArg, wholerows);
if (!kernels[i].run(2, globalThreads, localThreads, false))
return false;
}
return true;
}
static void morphOp( int op, InputArray _src, OutputArray _dst,
InputArray _kernel,
Point anchor, int iterations,
int borderType, const Scalar& borderValue )
{
int src_type = _src.type(), dst_type = _dst.type(),
src_cn = CV_MAT_CN(src_type), src_depth = CV_MAT_DEPTH(src_type);
bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.size() == _dst.size() && src_type == dst_type &&
_src.dims()<=2 && (src_cn == 1 || src_cn == 4) && (anchor.x == -1) && (anchor.y == -1) &&
(src_depth == CV_8U || src_depth == CV_32F || src_depth == CV_64F ) &&
(borderType == cv::BORDER_CONSTANT) && (borderValue == morphologyDefaultBorderValue()) &&
(op == MORPH_ERODE || op == MORPH_DILATE);
Mat kernel = _kernel.getMat();
Size ksize = kernel.data ? kernel.size() : Size(3,3);
anchor = normalizeAnchor(anchor, ksize);
@ -1299,13 +1421,11 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
return;
#endif
Mat src = _src.getMat();
_dst.create( src.size(), src.type() );
Mat dst = _dst.getMat();
if( iterations == 0 || kernel.rows*kernel.cols == 1 )
{
Mat src = _src.getMat();
_dst.create( src.size(), src.type() );
Mat dst = _dst.getMat();
src.copyTo(dst);
return;
}
@ -1326,6 +1446,14 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
iterations = 1;
}
if (useOpenCL && ocl_morphology_op(_src, _dst, kernel, ksize, anchor, iterations, op) )
return;
Mat src = _src.getMat();
_dst.create( src.size(), src.type() );
Mat dst = _dst.getMat();
int nStripes = 1;
#if defined HAVE_TEGRA_OPTIMIZATION
if (src.data != dst.data && iterations == 1 && //NOTE: threads are not used for inplace processing
@ -1362,49 +1490,97 @@ void cv::dilate( InputArray src, OutputArray dst, InputArray kernel,
morphOp( MORPH_DILATE, src, dst, kernel, anchor, iterations, borderType, borderValue );
}
void cv::morphologyEx( InputArray _src, OutputArray _dst, int op,
InputArray kernel, Point anchor, int iterations,
int borderType, const Scalar& borderValue )
{
Mat src = _src.getMat(), temp;
_dst.create(src.size(), src.type());
Mat dst = _dst.getMat();
int src_type = _src.type(), dst_type = _dst.type(),
src_cn = CV_MAT_CN(src_type), src_depth = CV_MAT_DEPTH(src_type);
bool use_opencl = cv::ocl::useOpenCL() && _src.isUMat() && _src.size() == _dst.size() && src_type == dst_type &&
_src.dims()<=2 && (src_cn == 1 || src_cn == 4) && (anchor.x == -1) && (anchor.y == -1) &&
(src_depth == CV_8U || src_depth == CV_32F || src_depth == CV_64F ) &&
(borderType == cv::BORDER_CONSTANT) && (borderValue == morphologyDefaultBorderValue());
_dst.create(_src.size(), _src.type());
Mat src, dst, temp;
UMat usrc, udst, utemp;
switch( op )
{
case MORPH_ERODE:
erode( src, dst, kernel, anchor, iterations, borderType, borderValue );
erode( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
break;
case MORPH_DILATE:
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
break;
case MORPH_OPEN:
erode( src, dst, kernel, anchor, iterations, borderType, borderValue );
dilate( dst, dst, kernel, anchor, iterations, borderType, borderValue );
erode( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
dilate( _dst, _dst, kernel, anchor, iterations, borderType, borderValue );
break;
case CV_MOP_CLOSE:
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
erode( dst, dst, kernel, anchor, iterations, borderType, borderValue );
dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
erode( _dst, _dst, kernel, anchor, iterations, borderType, borderValue );
break;
case CV_MOP_GRADIENT:
erode( src, temp, kernel, anchor, iterations, borderType, borderValue );
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
dst -= temp;
erode( _src, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, anchor, iterations, borderType, borderValue );
dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
if(use_opencl)
{
udst = _dst.getUMat();
subtract(udst, utemp, udst);
}
else
{
dst = _dst.getMat();
dst -= temp;
}
break;
case CV_MOP_TOPHAT:
if( src.data != dst.data )
temp = dst;
erode( src, temp, kernel, anchor, iterations, borderType, borderValue );
dilate( temp, temp, kernel, anchor, iterations, borderType, borderValue );
dst = src - temp;
if(use_opencl)
{
usrc = _src.getUMat();
udst = _dst.getUMat();
if( usrc.u != udst.u )
utemp = udst;
}
else
{
src = _src.getMat();
dst = _dst.getMat();
if( src.data != dst.data )
temp = dst;
}
erode( _src, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, anchor, iterations, borderType, borderValue );
dilate( use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel,
anchor, iterations, borderType, borderValue );
if(use_opencl)
subtract(usrc, utemp, udst);
else
dst = src - temp;
break;
case CV_MOP_BLACKHAT:
if( src.data != dst.data )
temp = dst;
dilate( src, temp, kernel, anchor, iterations, borderType, borderValue );
erode( temp, temp, kernel, anchor, iterations, borderType, borderValue );
dst = temp - src;
if(use_opencl)
{
usrc = _src.getUMat();
udst = _dst.getUMat();
if( usrc.u != udst.u )
utemp = udst;
}
else
{
src = _src.getMat();
dst = _dst.getMat();
if( src.data != dst.data )
temp = dst;
}
dilate( _src, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, anchor, iterations, borderType, borderValue );
erode( use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel,
anchor, iterations, borderType, borderValue );
if(use_opencl)
subtract(utemp, usrc, udst);
else
dst = temp - src;
break;
default:
CV_Error( CV_StsBadArg, "unknown morphological operation" );

View File

@ -0,0 +1,152 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Niko Li, newlife20080214@gmail.com
// Zero Lin, zero.lin@amd.com
// Yao Wang, bitwangyaoyao@gmail.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#ifdef DEPTH_0
#ifdef ERODE
#define VAL 255
#endif
#ifdef DILATE
#define VAL 0
#endif
#endif
#ifdef DEPTH_5
#ifdef ERODE
#define VAL FLT_MAX
#endif
#ifdef DILATE
#define VAL -FLT_MAX
#endif
#endif
#ifdef DEPTH_6
#ifdef ERODE
#define VAL DBL_MAX
#endif
#ifdef DILATE
#define VAL -DBL_MAX
#endif
#endif
#ifdef ERODE
#define MORPH_OP(A,B) min((A),(B))
#endif
#ifdef DILATE
#define MORPH_OP(A,B) max((A),(B))
#endif
//BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii
#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
__kernel void morph(__global const uchar * restrict srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset,
int src_offset_x, int src_offset_y,
int cols, int rows,
__constant uchar * mat_kernel,
int src_whole_cols, int src_whole_rows)
{
int l_x = get_local_id(0);
int l_y = get_local_id(1);
int x = get_group_id(0)*LSIZE0;
int y = get_group_id(1)*LSIZE1;
int start_x = x+src_offset_x-RADIUSX;
int end_x = x + src_offset_x+LSIZE0+RADIUSX;
int width = end_x -(x+src_offset_x-RADIUSX)+1;
int start_y = y+src_offset_y-RADIUSY;
int point1 = mad24(l_y,LSIZE0,l_x);
int point2 = point1 + LSIZE0*LSIZE1;
int tl_x = point1 % width;
int tl_y = point1 / width;
int tl_x2 = point2 % width;
int tl_y2 = point2 / width;
int cur_x = start_x + tl_x;
int cur_y = start_y + tl_y;
int cur_x2 = start_x + tl_x2;
int cur_y2 = start_y + tl_y2;
int start_addr = mad24(cur_y,src_step, cur_x*(int)sizeof(GENTYPE));
int start_addr2 = mad24(cur_y2,src_step, cur_x2*(int)sizeof(GENTYPE));
GENTYPE temp0,temp1;
__local GENTYPE LDS_DAT[2*LSIZE1*LSIZE0];
int end_addr = mad24(src_whole_rows - 1,src_step,src_whole_cols*(int)sizeof(GENTYPE));
//read pixels from src
start_addr = ((start_addr < end_addr) && (start_addr > 0)) ? start_addr : 0;
start_addr2 = ((start_addr2 < end_addr) && (start_addr2 > 0)) ? start_addr2 : 0;
__global const GENTYPE * src;
src = (__global const GENTYPE *)(srcptr+start_addr);
temp0 = src[0];
src = (__global const GENTYPE *)(srcptr+start_addr2);
temp1 = src[0];
//judge if read out of boundary
temp0= ELEM(cur_x,0,src_whole_cols,(GENTYPE)VAL,temp0);
temp0= ELEM(cur_y,0,src_whole_rows,(GENTYPE)VAL,temp0);
temp1= ELEM(cur_x2,0,src_whole_cols,(GENTYPE)VAL,temp1);
temp1= ELEM(cur_y2,0,src_whole_rows,(GENTYPE)VAL,temp1);
LDS_DAT[point1] = temp0;
LDS_DAT[point2] = temp1;
barrier(CLK_LOCAL_MEM_FENCE);
GENTYPE res = (GENTYPE)VAL;
for(int i=0; i<2*RADIUSY+1; i++)
for(int j=0; j<2*RADIUSX+1; j++)
{
res =
#ifndef RECTKERNEL
mat_kernel[i*(2*RADIUSX+1)+j] ?
#endif
MORPH_OP(res,LDS_DAT[mad24(l_y+i,width,l_x+j)])
#ifndef RECTKERNEL
:res
#endif
;
}
int gidx = get_global_id(0);
int gidy = get_global_id(1);
if(gidx<cols && gidy<rows)
{
int dst_index = mad24(gidy, dst_step, dst_offset + gidx * (int)sizeof(GENTYPE));
__global GENTYPE * dst = (__global GENTYPE *)(dstptr + dst_index);
dst[0] = res;
}
}

View File

@ -229,6 +229,75 @@ OCL_TEST_P(GaussianBlurTest, Mat)
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// Erode
typedef FilterTestBase Erode;
OCL_TEST_P(Erode, Mat)
{
Size kernelSize(ksize, ksize);
int iterations = (int)param;
for (int j = 0; j < test_loop_times; j++)
{
random_roi();
Mat kernel = randomMat(kernelSize, CV_8UC1, 0, 3);
OCL_OFF(cv::erode(src_roi, dst_roi, kernel, Point(-1,-1), iterations) );
OCL_ON(cv::erode(usrc_roi, udst_roi, kernel, Point(-1,-1), iterations) );
Near();
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// Dilate
typedef FilterTestBase Dilate;
OCL_TEST_P(Dilate, Mat)
{
Size kernelSize(ksize, ksize);
int iterations = (int)param;
for (int j = 0; j < test_loop_times; j++)
{
random_roi();
Mat kernel = randomMat(kernelSize, CV_8UC1, 0, 3);
OCL_OFF(cv::dilate(src_roi, dst_roi, kernel, Point(-1,-1), iterations) );
OCL_ON(cv::dilate(usrc_roi, udst_roi, kernel, Point(-1,-1), iterations) );
Near();
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// MorphologyEx
typedef FilterTestBase MorphologyEx;
OCL_TEST_P(MorphologyEx, Mat)
{
Size kernelSize(ksize, ksize);
int iterations = (int)param;
int op = size.height;
for (int j = 0; j < test_loop_times; j++)
{
random_roi();
Mat kernel = randomMat(kernelSize, CV_8UC1, 0, 3);
OCL_OFF(cv::morphologyEx(src_roi, dst_roi, op, kernel, Point(-1,-1), iterations) );
OCL_ON(cv::morphologyEx(usrc_roi, udst_roi, op, kernel, Point(-1,-1), iterations) );
Near();
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define FILTER_BORDER_SET_NO_ISOLATED \
@ -285,6 +354,31 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, GaussianBlurTest, Combine(
Values(0.0), // not used
Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
Values(3, 5, 7),
Values(Size(0,0)),//not used
Values((BorderType)BORDER_CONSTANT),//not used
Values(1.0, 2.0, 3.0),
Bool() ) );
OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
Values(3, 5, 7),
Values(Size(0,0)),//not used
Values((BorderType)BORDER_CONSTANT),//not used
Values(1.0, 2.0, 3.0),
Bool() ) );
OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
Values(3, 5, 7),
Values(Size(0,0), Size(0,1), Size(0,2), Size(0,3), Size(0,4), Size(0,5),Size(0,6)),//uses as generator of operations
Values((BorderType)BORDER_CONSTANT),//not used
Values(1.0, 2.0, 3.0),
Bool() ) );
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL