mirror of
https://github.com/opencv/opencv.git
synced 2024-11-29 13:47:32 +08:00
Optimizations to OpenCL bilateral filter.
This commit is contained in:
parent
a2dec6c34e
commit
ede6d4482b
@ -32,28 +32,79 @@
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
|
||||
|
||||
|
||||
__kernel void bilateral(__global const uchar * src, int src_step, int src_offset,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||
__constant float * color_weight, __constant float * space_weight, __constant int * space_ofs)
|
||||
__constant float * space_weight, __constant int * space_ofs)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (y < dst_rows && x < dst_cols)
|
||||
{
|
||||
int src_index = mad24(y + radius, src_step, x + radius + src_offset);
|
||||
int dst_index = mad24(y, dst_step, x + dst_offset);
|
||||
float sum = 0.f, wsum = 0.f;
|
||||
int val0 = convert_int(src[src_index]);
|
||||
|
||||
#pragma unroll
|
||||
for (int k = 0; k < maxk; k++ )
|
||||
{
|
||||
int val = convert_int(src[src_index + space_ofs[k]]);
|
||||
float w = space_weight[k] * color_weight[abs(val - val0)];
|
||||
float w = space_weight[k] * native_exp((float)((val - val0) * (val - val0) * gauss_color_coeff));
|
||||
sum += (float)(val) * w;
|
||||
wsum += w;
|
||||
}
|
||||
dst[dst_index] = convert_uchar_rtz(sum / wsum + 0.5f);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void bilateral_float(__global const uchar * src, int src_step, int src_offset,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||
__constant float * space_weight, __constant int * space_ofs)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
if (y < dst_rows && x < dst_cols)
|
||||
{
|
||||
int src_index = mad24(y + radius, src_step, x + radius + src_offset);
|
||||
int dst_index = mad24(y, dst_step, x + dst_offset);
|
||||
float sum = 0.f, wsum = 0.f;
|
||||
float val0 = convert_float(src[src_index]);
|
||||
#pragma unroll
|
||||
for (int k = 0; k < maxk; k++ )
|
||||
{
|
||||
float val = convert_float(src[src_index + space_ofs[k]]);
|
||||
float w = space_weight[k] * native_exp((val - val0) * (val - val0) * gauss_color_coeff);
|
||||
sum += (float)(val) * w;
|
||||
wsum += w;
|
||||
}
|
||||
dst[dst_index] = convert_uchar_rtz(sum / wsum + 0.5f);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void bilateral_float4(__global const uchar * src, int src_step, int src_offset,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||
__constant float * space_weight, __constant int * space_ofs)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
if (y < dst_rows && x < dst_cols / 4 )
|
||||
{
|
||||
int src_index = ((y + radius) * src_step) + x * 4 + (radius + src_offset);
|
||||
int dst_index = (y * dst_step) + x * 4 + dst_offset ;
|
||||
float4 sum = 0.f, wsum = 0.f;
|
||||
float4 val0 = convert_float4(vload4(0, src + src_index));
|
||||
#pragma unroll
|
||||
for (int k = 0; k < maxk; k++ )
|
||||
{
|
||||
float4 val = convert_float4(vload4(0, src + src_index + space_ofs[k]));
|
||||
float spacew = space_weight[k];
|
||||
float4 w = spacew * native_exp((val - val0) * (val - val0) * gauss_color_coeff);
|
||||
sum += val * w;
|
||||
wsum += w;
|
||||
}
|
||||
sum = sum / wsum + .5f;
|
||||
vstore4(convert_uchar4_rtz(sum), 0, dst + dst_index);
|
||||
}
|
||||
}
|
||||
|
@ -2210,7 +2210,7 @@ static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d,
|
||||
double sigma_color, double sigma_space,
|
||||
int borderType)
|
||||
{
|
||||
int type = _src.type(), cn = CV_MAT_CN(type);
|
||||
int type = _src.type();
|
||||
int i, j, maxk, radius;
|
||||
|
||||
if ( type != CV_8UC1 )
|
||||
@ -2237,19 +2237,14 @@ static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d,
|
||||
|
||||
copyMakeBorder(src, temp, radius, radius, radius, radius, borderType);
|
||||
|
||||
std::vector<float> _color_weight(cn * 256);
|
||||
std::vector<float> _space_weight(d * d);
|
||||
std::vector<int> _space_ofs(d * d);
|
||||
float *color_weight = &_color_weight[0];
|
||||
float *space_weight = &_space_weight[0];
|
||||
int *space_ofs = &_space_ofs[0];
|
||||
|
||||
// initialize color-related bilateral filter coefficients
|
||||
for( i = 0; i < 256 * cn; i++ )
|
||||
color_weight[i] = (float)std::exp(i * i * gauss_color_coeff);
|
||||
|
||||
// initialize space-related bilateral filter coefficients
|
||||
for( i = -radius, maxk = 0; i <= radius; i++ )
|
||||
{
|
||||
for( j = -radius; j <= radius; j++ )
|
||||
{
|
||||
double r = std::sqrt((double)i * i + (double)j * j);
|
||||
@ -2258,26 +2253,43 @@ static bool ocl_bilateralFilter_8u(InputArray _src, OutputArray _dst, int d,
|
||||
space_weight[maxk] = (float)std::exp(r * r * gauss_space_coeff);
|
||||
space_ofs[maxk++] = (int)(i * temp.step + j);
|
||||
}
|
||||
}
|
||||
|
||||
ocl::Kernel k("bilateral", ocl::imgproc::bilateral_oclsrc,
|
||||
format("-D radius=%d -D maxk=%d", radius, maxk));
|
||||
String kernelName("bilateral");
|
||||
size_t sizeDiv = 1;
|
||||
|
||||
if ((ocl::Device::getDefault().isIntel()) &&
|
||||
(ocl::Device::getDefault().type() == ocl::Device::TYPE_GPU))
|
||||
{
|
||||
//Intel GPU
|
||||
if (dst.cols % 4 == 0)
|
||||
{
|
||||
kernelName = "bilateral_float4";
|
||||
sizeDiv = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
kernelName = "bilateral_float";
|
||||
}
|
||||
}
|
||||
ocl::Kernel k(kernelName.c_str(), ocl::imgproc::bilateral_oclsrc,
|
||||
format("-D radius=%d -D maxk=%d "
|
||||
"-D gauss_color_coeff=%f", radius, maxk,
|
||||
(float)gauss_color_coeff));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
Mat mcolor_weight(1, cn * 256, CV_32FC1, color_weight);
|
||||
Mat mspace_weight(1, d * d, CV_32FC1, space_weight);
|
||||
Mat mspace_ofs(1, d * d, CV_32SC1, space_ofs);
|
||||
UMat ucolor_weight, uspace_weight, uspace_ofs;
|
||||
mcolor_weight.copyTo(ucolor_weight);
|
||||
UMat uspace_weight, uspace_ofs;
|
||||
mspace_weight.copyTo(uspace_weight);
|
||||
mspace_ofs.copyTo(uspace_ofs);
|
||||
|
||||
k.args(ocl::KernelArg::ReadOnlyNoSize(temp), ocl::KernelArg::WriteOnly(dst),
|
||||
ocl::KernelArg::PtrReadOnly(ucolor_weight),
|
||||
ocl::KernelArg::PtrReadOnly(uspace_weight),
|
||||
ocl::KernelArg::PtrReadOnly(uspace_ofs));
|
||||
|
||||
size_t globalsize[2] = { dst.cols, dst.rows };
|
||||
size_t globalsize[2] = { dst.cols / sizeDiv, dst.rows };
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
|
@ -62,12 +62,14 @@ PARAM_TEST_CASE(FilterTestBase, MatType,
|
||||
Size, // dx, dy
|
||||
BorderType, // border type
|
||||
double, // optional parameter
|
||||
bool) // roi or not
|
||||
bool, // roi or not
|
||||
int) //width multiplier
|
||||
{
|
||||
int type, borderType, ksize;
|
||||
Size size;
|
||||
double param;
|
||||
bool useRoi;
|
||||
int widthMultiple;
|
||||
|
||||
TEST_DECLARE_INPUT_PARAMETER(src)
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst)
|
||||
@ -80,6 +82,7 @@ PARAM_TEST_CASE(FilterTestBase, MatType,
|
||||
borderType = GET_PARAM(3);
|
||||
param = GET_PARAM(4);
|
||||
useRoi = GET_PARAM(5);
|
||||
widthMultiple = GET_PARAM(6);
|
||||
}
|
||||
|
||||
void random_roi(int minSize = 1)
|
||||
@ -88,6 +91,9 @@ PARAM_TEST_CASE(FilterTestBase, MatType,
|
||||
minSize = ksize;
|
||||
|
||||
Size roiSize = randomSize(minSize, MAX_VALUE);
|
||||
roiSize.width &= ~((widthMultiple * 2) - 1);
|
||||
roiSize.width += widthMultiple;
|
||||
|
||||
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
|
||||
|
||||
@ -320,7 +326,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(
|
||||
Values(Size(0, 0)), // not used
|
||||
FILTER_BORDER_SET_NO_ISOLATED,
|
||||
Values(0.0), // not used
|
||||
Bool()));
|
||||
Bool(),
|
||||
Values(1, 4)));
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, LaplacianTest, Combine(
|
||||
FILTER_TYPES,
|
||||
@ -328,7 +335,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, LaplacianTest, Combine(
|
||||
Values(Size(0, 0)), // not used
|
||||
FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
|
||||
Values(1.0, 0.2, 3.0), // kernel scale
|
||||
Bool()));
|
||||
Bool(),
|
||||
Values(1))); // not used
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, SobelTest, Combine(
|
||||
FILTER_TYPES,
|
||||
@ -336,7 +344,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, SobelTest, Combine(
|
||||
Values(Size(1, 0), Size(1, 1), Size(2, 0), Size(2, 1)), // dx, dy
|
||||
FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
|
||||
Values(0.0), // not used
|
||||
Bool()));
|
||||
Bool(),
|
||||
Values(1))); // not used
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, ScharrTest, Combine(
|
||||
FILTER_TYPES,
|
||||
@ -344,7 +353,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, ScharrTest, Combine(
|
||||
Values(Size(0, 1), Size(1, 0)), // dx, dy
|
||||
FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
|
||||
Values(1.0, 0.2), // kernel scale
|
||||
Bool()));
|
||||
Bool(),
|
||||
Values(1))); // not used
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, GaussianBlurTest, Combine(
|
||||
FILTER_TYPES,
|
||||
@ -352,7 +362,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, GaussianBlurTest, Combine(
|
||||
Values(Size(0, 0)), // not used
|
||||
FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
|
||||
Values(0.0), // not used
|
||||
Bool()));
|
||||
Bool(),
|
||||
Values(1))); // not used
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(
|
||||
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
|
||||
@ -360,7 +371,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(
|
||||
Values(Size(0,0)),//not used
|
||||
Values((BorderType)BORDER_CONSTANT),//not used
|
||||
Values(1.0, 2.0, 3.0),
|
||||
Bool() ) );
|
||||
Bool(),
|
||||
Values(1))); // not used
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
|
||||
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
|
||||
@ -368,7 +380,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
|
||||
Values(Size(0,0)),//not used
|
||||
Values((BorderType)BORDER_CONSTANT),//not used
|
||||
Values(1.0, 2.0, 3.0),
|
||||
Bool() ) );
|
||||
Bool(),
|
||||
Values(1))); // not used
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine(
|
||||
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
|
||||
@ -376,7 +389,8 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine(
|
||||
Values(Size(0,0), Size(0,1), Size(0,2), Size(0,3), Size(0,4), Size(0,5),Size(0,6)),//uses as generator of operations
|
||||
Values((BorderType)BORDER_CONSTANT),//not used
|
||||
Values(1.0, 2.0, 3.0),
|
||||
Bool() ) );
|
||||
Bool(),
|
||||
Values(1))); // not used
|
||||
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
Loading…
Reference in New Issue
Block a user