mirror of
https://github.com/opencv/opencv.git
synced 2024-12-01 23:30:06 +08:00
Merge pull request #2496 from ilya-lavrenov:tapi_imgproc
This commit is contained in:
commit
bdfd29a0b8
@ -42,7 +42,6 @@
|
|||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <stdio.h>
|
|
||||||
#include "opencl_kernels.hpp"
|
#include "opencl_kernels.hpp"
|
||||||
|
|
||||||
/****************************************************************************************\
|
/****************************************************************************************\
|
||||||
@ -1291,9 +1290,10 @@ static bool ocl_morphology_op(InputArray _src, OutputArray _dst, Mat kernel,
|
|||||||
{
|
{
|
||||||
CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE);
|
CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE);
|
||||||
|
|
||||||
|
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||||
|
|
||||||
if (_src.depth() == CV_64F && !doubleSupport)
|
if (depth == CV_64F && !doubleSupport)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
UMat kernel8U;
|
UMat kernel8U;
|
||||||
@ -1324,10 +1324,11 @@ static bool ocl_morphology_op(InputArray _src, OutputArray _dst, Mat kernel,
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
static const char * const op2str[] = { "ERODE", "DILATE" };
|
static const char * const op2str[] = { "ERODE", "DILATE" };
|
||||||
String buildOptions = format("-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D %s%s%s -D GENTYPE=%s -D DEPTH_%d",
|
String buildOptions = format("-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D %s%s%s"
|
||||||
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], op2str[op],
|
" -D T=%s -D DEPTH_%d -D cn=%d -D T1=%s", anchor.x, anchor.y,
|
||||||
|
(int)localThreads[0], (int)localThreads[1], op2str[op],
|
||||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "", rectKernel ? " -D RECTKERNEL" : "",
|
doubleSupport ? " -D DOUBLE_SUPPORT" : "", rectKernel ? " -D RECTKERNEL" : "",
|
||||||
ocl::typeToStr(_src.type()), _src.depth() );
|
ocl::typeToStr(_src.type()), _src.depth(), cn, ocl::typeToStr(depth));
|
||||||
|
|
||||||
std::vector<ocl::Kernel> kernels;
|
std::vector<ocl::Kernel> kernels;
|
||||||
for (int i = 0; i < iterations; i++)
|
for (int i = 0; i < iterations; i++)
|
||||||
@ -1348,16 +1349,9 @@ static bool ocl_morphology_op(InputArray _src, OutputArray _dst, Mat kernel,
|
|||||||
src.locateROI(wholesize, ofs);
|
src.locateROI(wholesize, ofs);
|
||||||
int wholecols = wholesize.width, wholerows = wholesize.height;
|
int wholecols = wholesize.width, wholerows = wholesize.height;
|
||||||
|
|
||||||
int idxArg = 0;
|
kernels[0].args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnlyNoSize(dst),
|
||||||
idxArg = kernels[0].set(idxArg, ocl::KernelArg::ReadOnlyNoSize(src));
|
ofs.x, ofs.y, src.cols, src.rows, ocl::KernelArg::PtrReadOnly(kernel8U),
|
||||||
idxArg = kernels[0].set(idxArg, ocl::KernelArg::WriteOnlyNoSize(dst));
|
wholecols, wholerows);
|
||||||
idxArg = kernels[0].set(idxArg, ofs.x);
|
|
||||||
idxArg = kernels[0].set(idxArg, ofs.y);
|
|
||||||
idxArg = kernels[0].set(idxArg, src.cols);
|
|
||||||
idxArg = kernels[0].set(idxArg, src.rows);
|
|
||||||
idxArg = kernels[0].set(idxArg, ocl::KernelArg::PtrReadOnly(kernel8U));
|
|
||||||
idxArg = kernels[0].set(idxArg, wholecols);
|
|
||||||
idxArg = kernels[0].set(idxArg, wholerows);
|
|
||||||
|
|
||||||
return kernels[0].run(2, globalThreads, localThreads, false);
|
return kernels[0].run(2, globalThreads, localThreads, false);
|
||||||
}
|
}
|
||||||
@ -1367,6 +1361,7 @@ static bool ocl_morphology_op(InputArray _src, OutputArray _dst, Mat kernel,
|
|||||||
UMat source;
|
UMat source;
|
||||||
Size wholesize;
|
Size wholesize;
|
||||||
Point ofs;
|
Point ofs;
|
||||||
|
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
{
|
{
|
||||||
int cols = src.cols, rows = src.rows;
|
int cols = src.cols, rows = src.rows;
|
||||||
@ -1385,20 +1380,11 @@ static bool ocl_morphology_op(InputArray _src, OutputArray _dst, Mat kernel,
|
|||||||
dst.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
|
dst.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
|
||||||
source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
|
source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
|
||||||
}
|
}
|
||||||
|
|
||||||
source.locateROI(wholesize, ofs);
|
source.locateROI(wholesize, ofs);
|
||||||
int wholecols = wholesize.width, wholerows = wholesize.height;
|
|
||||||
|
|
||||||
int idxArg = 0;
|
kernels[i].args(ocl::KernelArg::ReadOnlyNoSize(source), ocl::KernelArg::WriteOnlyNoSize(dst),
|
||||||
idxArg = kernels[i].set(idxArg, ocl::KernelArg::ReadOnlyNoSize(source));
|
ofs.x, ofs.y, source.cols, source.rows, ocl::KernelArg::PtrReadOnly(kernel8U),
|
||||||
idxArg = kernels[i].set(idxArg, ocl::KernelArg::WriteOnlyNoSize(dst));
|
wholesize.width, wholesize.height);
|
||||||
idxArg = kernels[i].set(idxArg, ofs.x);
|
|
||||||
idxArg = kernels[i].set(idxArg, ofs.y);
|
|
||||||
idxArg = kernels[i].set(idxArg, source.cols);
|
|
||||||
idxArg = kernels[i].set(idxArg, source.rows);
|
|
||||||
idxArg = kernels[i].set(idxArg, ocl::KernelArg::PtrReadOnly(kernel8U));
|
|
||||||
idxArg = kernels[i].set(idxArg, wholecols);
|
|
||||||
idxArg = kernels[i].set(idxArg, wholerows);
|
|
||||||
|
|
||||||
if (!kernels[i].run(2, globalThreads, localThreads, false))
|
if (!kernels[i].run(2, globalThreads, localThreads, false))
|
||||||
return false;
|
return false;
|
||||||
@ -1450,7 +1436,7 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
CV_OCL_RUN(_dst.isUMat() && _src.size() == _dst.size() && src_type == dst_type &&
|
CV_OCL_RUN(_dst.isUMat() && _src.size() == _dst.size() && src_type == dst_type &&
|
||||||
_src.dims() <= 2 && (src_cn == 1 || src_cn == 4) &&
|
_src.dims() <= 2 && src_cn <= 4 &&
|
||||||
(src_depth == CV_8U || src_depth == CV_32F || src_depth == CV_64F ) &&
|
(src_depth == CV_8U || src_depth == CV_32F || src_depth == CV_64F ) &&
|
||||||
borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue() &&
|
borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue() &&
|
||||||
(op == MORPH_ERODE || op == MORPH_DILATE),
|
(op == MORPH_ERODE || op == MORPH_DILATE),
|
||||||
|
@ -43,6 +43,16 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if cn != 3
|
||||||
|
#define loadpix(addr) *(__global const T *)(addr)
|
||||||
|
#define storepix(val, addr) *(__global T *)(addr) = val
|
||||||
|
#define TSIZE (int)sizeof(T)
|
||||||
|
#else
|
||||||
|
#define loadpix(addr) vload3(0, (__global const T1 *)(addr))
|
||||||
|
#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
|
||||||
|
#define TSIZE ((int)sizeof(T1)*3)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef DEPTH_0
|
#ifdef DEPTH_0
|
||||||
#ifdef ERODE
|
#ifdef ERODE
|
||||||
#define VAL 255
|
#define VAL 255
|
||||||
@ -50,16 +60,14 @@
|
|||||||
#ifdef DILATE
|
#ifdef DILATE
|
||||||
#define VAL 0
|
#define VAL 0
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#elif defined DEPTH_5
|
||||||
#ifdef DEPTH_5
|
|
||||||
#ifdef ERODE
|
#ifdef ERODE
|
||||||
#define VAL FLT_MAX
|
#define VAL FLT_MAX
|
||||||
#endif
|
#endif
|
||||||
#ifdef DILATE
|
#ifdef DILATE
|
||||||
#define VAL -FLT_MAX
|
#define VAL -FLT_MAX
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#elif defined DEPTH_6
|
||||||
#ifdef DEPTH_6
|
|
||||||
#ifdef ERODE
|
#ifdef ERODE
|
||||||
#define VAL DBL_MAX
|
#define VAL DBL_MAX
|
||||||
#endif
|
#endif
|
||||||
@ -74,61 +82,55 @@
|
|||||||
#ifdef DILATE
|
#ifdef DILATE
|
||||||
#define MORPH_OP(A,B) max((A),(B))
|
#define MORPH_OP(A,B) max((A),(B))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii
|
// BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii
|
||||||
#define ELEM(i, l_edge, r_edge, elem1, elem2) (i) < (l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
|
#define ELEM(i, l_edge, r_edge, elem1, elem2) (i) < (l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
|
||||||
|
|
||||||
__kernel void morph(__global const uchar * restrict srcptr, int src_step, int src_offset,
|
__kernel void morph(__global const uchar * srcptr, int src_step, int src_offset,
|
||||||
__global uchar * dstptr, int dst_step, int dst_offset,
|
__global uchar * dstptr, int dst_step, int dst_offset,
|
||||||
int src_offset_x, int src_offset_y,
|
int src_offset_x, int src_offset_y, int cols, int rows,
|
||||||
int cols, int rows,
|
__constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows)
|
||||||
__constant uchar * mat_kernel,
|
|
||||||
int src_whole_cols, int src_whole_rows)
|
|
||||||
{
|
{
|
||||||
int l_x = get_local_id(0);
|
int gidx = get_global_id(0), gidy = get_global_id(1);
|
||||||
int l_y = get_local_id(1);
|
int l_x = get_local_id(0), l_y = get_local_id(1);
|
||||||
int x = get_group_id(0)*LSIZE0;
|
int x = get_group_id(0) * LSIZE0, y = get_group_id(1) * LSIZE1;
|
||||||
int y = get_group_id(1)*LSIZE1;
|
|
||||||
int start_x = x + src_offset_x - RADIUSX;
|
int start_x = x + src_offset_x - RADIUSX;
|
||||||
int end_x = x + src_offset_x + LSIZE0 + RADIUSX;
|
int end_x = x + src_offset_x + LSIZE0 + RADIUSX;
|
||||||
int width = end_x - (x + src_offset_x - RADIUSX) + 1;
|
int width = end_x - (x + src_offset_x - RADIUSX) + 1;
|
||||||
int start_y = y + src_offset_y - RADIUSY;
|
int start_y = y + src_offset_y - RADIUSY;
|
||||||
int point1 = mad24(l_y, LSIZE0, l_x);
|
int point1 = mad24(l_y, LSIZE0, l_x);
|
||||||
int point2 = point1 + LSIZE0 * LSIZE1;
|
int point2 = point1 + LSIZE0 * LSIZE1;
|
||||||
int tl_x = point1 % width;
|
int tl_x = point1 % width, tl_y = point1 / width;
|
||||||
int tl_y = point1 / width;
|
int tl_x2 = point2 % width, tl_y2 = point2 / width;
|
||||||
int tl_x2 = point2 % width;
|
int cur_x = start_x + tl_x, cur_y = start_y + tl_y;
|
||||||
int tl_y2 = point2 / width;
|
int cur_x2 = start_x + tl_x2, cur_y2 = start_y + tl_y2;
|
||||||
int cur_x = start_x + tl_x;
|
int start_addr = mad24(cur_y, src_step, cur_x * TSIZE);
|
||||||
int cur_y = start_y + tl_y;
|
int start_addr2 = mad24(cur_y2, src_step, cur_x2 * TSIZE);
|
||||||
int cur_x2 = start_x + tl_x2;
|
|
||||||
int cur_y2 = start_y + tl_y2;
|
__local T LDS_DAT[2*LSIZE1*LSIZE0];
|
||||||
int start_addr = mad24(cur_y,src_step, cur_x*(int)sizeof(GENTYPE));
|
|
||||||
int start_addr2 = mad24(cur_y2,src_step, cur_x2*(int)sizeof(GENTYPE));
|
|
||||||
GENTYPE temp0,temp1;
|
|
||||||
__local GENTYPE LDS_DAT[2*LSIZE1*LSIZE0];
|
|
||||||
|
|
||||||
int end_addr = mad24(src_whole_rows - 1,src_step,src_whole_cols*(int)sizeof(GENTYPE));
|
|
||||||
// read pixels from src
|
// read pixels from src
|
||||||
start_addr = ((start_addr < end_addr) && (start_addr > 0)) ? start_addr : 0;
|
int end_addr = mad24(src_whole_rows - 1, src_step, src_whole_cols * TSIZE);
|
||||||
start_addr2 = ((start_addr2 < end_addr) && (start_addr2 > 0)) ? start_addr2 : 0;
|
start_addr = start_addr < end_addr && start_addr > 0 ? start_addr : 0;
|
||||||
__global const GENTYPE * src;
|
start_addr2 = start_addr2 < end_addr && start_addr2 > 0 ? start_addr2 : 0;
|
||||||
src = (__global const GENTYPE *)(srcptr+start_addr);
|
|
||||||
temp0 = src[0];
|
|
||||||
src = (__global const GENTYPE *)(srcptr+start_addr2);
|
|
||||||
temp1 = src[0];
|
|
||||||
//judge if read out of boundary
|
|
||||||
temp0= ELEM(cur_x,0,src_whole_cols,(GENTYPE)VAL,temp0);
|
|
||||||
temp0= ELEM(cur_y,0,src_whole_rows,(GENTYPE)VAL,temp0);
|
|
||||||
|
|
||||||
temp1= ELEM(cur_x2,0,src_whole_cols,(GENTYPE)VAL,temp1);
|
T temp0 = loadpix(srcptr + start_addr);
|
||||||
temp1= ELEM(cur_y2,0,src_whole_rows,(GENTYPE)VAL,temp1);
|
T temp1 = loadpix(srcptr + start_addr2);
|
||||||
|
|
||||||
|
// judge if read out of boundary
|
||||||
|
temp0 = ELEM(cur_x, 0, src_whole_cols, (T)(VAL),temp0);
|
||||||
|
temp0 = ELEM(cur_y, 0, src_whole_rows, (T)(VAL),temp0);
|
||||||
|
|
||||||
|
temp1 = ELEM(cur_x2, 0, src_whole_cols, (T)(VAL), temp1);
|
||||||
|
temp1 = ELEM(cur_y2, 0, src_whole_rows, (T)(VAL), temp1);
|
||||||
|
|
||||||
LDS_DAT[point1] = temp0;
|
LDS_DAT[point1] = temp0;
|
||||||
LDS_DAT[point2] = temp1;
|
LDS_DAT[point2] = temp1;
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
GENTYPE res = (GENTYPE)VAL;
|
|
||||||
for(int i=0; i<2*RADIUSY+1; i++)
|
T res = (T)(VAL);
|
||||||
for(int j=0; j<2*RADIUSX+1; j++)
|
for (int i = 0, sizey = 2 * RADIUSY + 1; i < sizey; i++)
|
||||||
|
for (int j = 0, sizex = 2 * RADIUSX + 1; j < sizex; j++)
|
||||||
{
|
{
|
||||||
res =
|
res =
|
||||||
#ifndef RECTKERNEL
|
#ifndef RECTKERNEL
|
||||||
@ -140,13 +142,10 @@ __kernel void morph(__global const uchar * restrict srcptr, int src_step, int sr
|
|||||||
#endif
|
#endif
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
int gidx = get_global_id(0);
|
|
||||||
int gidy = get_global_id(1);
|
|
||||||
if (gidx < cols && gidy < rows)
|
if (gidx < cols && gidy < rows)
|
||||||
{
|
{
|
||||||
int dst_index = mad24(gidy, dst_step, dst_offset + gidx * (int)sizeof(GENTYPE));
|
int dst_index = mad24(gidy, dst_step, mad24(gidx, TSIZE, dst_offset));
|
||||||
__global GENTYPE * dst = (__global GENTYPE *)(dstptr + dst_index);
|
storepix(res, dstptr + dst_index);
|
||||||
dst[0] = res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -349,7 +349,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, GaussianBlurTest, Combine(
|
|||||||
Bool()));
|
Bool()));
|
||||||
|
|
||||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(
|
OCL_INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(
|
||||||
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC4),
|
||||||
Values(3, 5, 7),
|
Values(3, 5, 7),
|
||||||
Values(Size(0,0)),//not used
|
Values(Size(0,0)),//not used
|
||||||
Values((BorderType)BORDER_CONSTANT),//not used
|
Values((BorderType)BORDER_CONSTANT),//not used
|
||||||
@ -357,7 +357,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(
|
|||||||
Bool()));
|
Bool()));
|
||||||
|
|
||||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
|
OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
|
||||||
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC4),
|
||||||
Values(3, 5, 7),
|
Values(3, 5, 7),
|
||||||
Values(Size(0,0)),//not used
|
Values(Size(0,0)),//not used
|
||||||
Values((BorderType)BORDER_CONSTANT),//not used
|
Values((BorderType)BORDER_CONSTANT),//not used
|
||||||
@ -365,9 +365,9 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
|
|||||||
Bool()));
|
Bool()));
|
||||||
|
|
||||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine(
|
OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine(
|
||||||
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4, CV_64FC1, CV_64FC4),
|
||||||
Values(3, 5, 7),
|
Values(3, 5, 7),
|
||||||
Values(Size(0,0), Size(0,1), Size(0,2), Size(0,3), Size(0,4), Size(0,5),Size(0,6)),//uses as generator of operations
|
Values(Size(0, 0), Size(0, 1), Size(0, 2), Size(0, 3), Size(0, 4), Size(0, 5), Size(0, 6)), // used as generator of operations
|
||||||
Values((BorderType)BORDER_CONSTANT),// not used
|
Values((BorderType)BORDER_CONSTANT),// not used
|
||||||
Values(1.0, 2.0, 3.0),
|
Values(1.0, 2.0, 3.0),
|
||||||
Bool()));
|
Bool()));
|
||||||
|
Loading…
Reference in New Issue
Block a user