Merge pull request #1365 from bitwangyaoyao:2.4_bilateral

This commit is contained in:
Roman Donchenko 2013-09-09 16:46:32 +04:00 committed by OpenCV Buildbot
commit 84435ba741
11 changed files with 994 additions and 67 deletions

View File

@ -412,6 +412,28 @@ http://www.dai.ed.ac.uk/CVonline/LOCAL\_COPIES/MANDUCHI1/Bilateral\_Filtering.ht
This filter does not work inplace.
adaptiveBilateralFilter
-----------------------
Applies the adaptive bilateral filter to an image.
.. ocv:function:: void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize, double sigmaSpace, Point anchor=Point(-1, -1), int borderType=BORDER_DEFAULT )
.. ocv:pyfunction:: cv2.adaptiveBilateralFilter(src, ksize, sigmaSpace[, dst[, anchor[, borderType]]]) -> dst
:param src: Source 8-bit, 1-channel or 3-channel image.
:param dst: Destination image of the same size and type as ``src`` .
:param ksize: filter kernel size.
:param sigmaSpace: Filter sigma in the coordinate space. It has similar meaning with ``sigmaSpace`` in ``bilateralFilter``.
:param anchor: anchor point; default value ``Point(-1,-1)`` means that the anchor is at the kernel center. Only default value is supported now.
:param borderType: border mode used to extrapolate pixels outside of the image.
The function applies adaptive bilateral filtering to the input image. This filter is similar to ``bilateralFilter``, in that dissimilarity from and distance to the center pixel is punished. Instead of using ``sigmaColor``, we employ the variance of pixel values in the neighbourhood.
blur

View File

@ -398,6 +398,10 @@ CV_EXPORTS_W void GaussianBlur( InputArray src,
CV_EXPORTS_W void bilateralFilter( InputArray src, OutputArray dst, int d,
double sigmaColor, double sigmaSpace,
int borderType=BORDER_DEFAULT );
//! smooths the image using adaptive bilateral filter
CV_EXPORTS_W void adaptiveBilateralFilter( InputArray src, OutputArray dst, Size ksize,
double sigmaSpace, Point anchor=Point(-1, -1),
int borderType=BORDER_DEFAULT );
//! smooths the image using the box filter. Each pixel is processed in O(1) time
CV_EXPORTS_W void boxFilter( InputArray src, OutputArray dst, int ddepth,
Size ksize, Point anchor=Point(-1,-1),

View File

@ -2272,6 +2272,236 @@ void cv::bilateralFilter( InputArray _src, OutputArray _dst, int d,
"Bilateral filtering is only implemented for 8u and 32f images" );
}
/****************************************************************************************\
Adaptive Bilateral Filtering
\****************************************************************************************/
namespace cv
{
#define CALCVAR 1
#define FIXED_WEIGHT 0
class adaptiveBilateralFilter_8u_Invoker :
public ParallelLoopBody
{
public:
adaptiveBilateralFilter_8u_Invoker(Mat& _dest, const Mat& _temp, Size _ksize, double _sigma_space, Point _anchor) :
temp(&_temp), dest(&_dest), ksize(_ksize), sigma_space(_sigma_space), anchor(_anchor)
{
if( sigma_space <= 0 )
sigma_space = 1;
CV_Assert((ksize.width & 1) && (ksize.height & 1));
space_weight.resize(ksize.width * ksize.height);
double sigma2 = sigma_space * sigma_space;
int idx = 0;
int w = ksize.width / 2;
int h = ksize.height / 2;
for(int y=-h; y<=h; y++)
for(int x=-w; x<=w; x++)
{
space_weight[idx++] = (float)(sigma2 / (sigma2 + x * x + y * y));
}
}
virtual void operator()(const Range& range) const
{
int cn = dest->channels();
int anX = anchor.x;
const uchar *tptr;
for(int i = range.start;i < range.end; i++)
{
int startY = i;
if(cn == 1)
{
float var;
int currVal;
int sumVal = 0;
int sumValSqr = 0;
int currValCenter;
int currWRTCenter;
float weight;
float totalWeight = 0.;
float tmpSum = 0.;
for(int j = 0;j < dest->cols *cn; j+=cn)
{
sumVal = 0;
sumValSqr= 0;
totalWeight = 0.;
tmpSum = 0.;
// Top row: don't sum the very last element
int startLMJ = 0;
int endLMJ = ksize.width - 1;
int howManyAll = (anX *2 +1)*(ksize.width );
#if CALCVAR
for(int x = startLMJ; x< endLMJ; x++)
{
tptr = temp->ptr(startY + x) +j;
for(int y=-anX; y<=anX; y++)
{
currVal = tptr[cn*(y+anX)];
sumVal += currVal;
sumValSqr += (currVal *currVal);
}
}
var = ( (sumValSqr * howManyAll)- sumVal * sumVal ) / ( (float)(howManyAll*howManyAll));
#else
var = 900.0;
#endif
startLMJ = 0;
endLMJ = ksize.width;
tptr = temp->ptr(startY + (startLMJ+ endLMJ)/2);
currValCenter =tptr[j+cn*anX];
for(int x = startLMJ; x< endLMJ; x++)
{
tptr = temp->ptr(startY + x) +j;
for(int y=-anX; y<=anX; y++)
{
#if FIXED_WEIGHT
weight = 1.0;
#else
currVal = tptr[cn*(y+anX)];
currWRTCenter = currVal - currValCenter;
weight = var / ( var + (currWRTCenter * currWRTCenter) ) * space_weight[x*ksize.width+y+anX];;
#endif
tmpSum += ((float)tptr[cn*(y+anX)] * weight);
totalWeight += weight;
}
}
tmpSum /= totalWeight;
dest->at<uchar>(startY ,j)= static_cast<uchar>(tmpSum);
}
}
else
{
assert(cn == 3);
float var_b, var_g, var_r;
int currVal_b, currVal_g, currVal_r;
int sumVal_b= 0, sumVal_g= 0, sumVal_r= 0;
int sumValSqr_b= 0, sumValSqr_g= 0, sumValSqr_r= 0;
int currValCenter_b= 0, currValCenter_g= 0, currValCenter_r= 0;
int currWRTCenter_b, currWRTCenter_g, currWRTCenter_r;
float weight_b, weight_g, weight_r;
float totalWeight_b= 0., totalWeight_g= 0., totalWeight_r= 0.;
float tmpSum_b = 0., tmpSum_g= 0., tmpSum_r = 0.;
for(int j = 0;j < dest->cols *cn; j+=cn)
{
sumVal_b= 0, sumVal_g= 0, sumVal_r= 0;
sumValSqr_b= 0, sumValSqr_g= 0, sumValSqr_r= 0;
totalWeight_b= 0., totalWeight_g= 0., totalWeight_r= 0.;
tmpSum_b = 0., tmpSum_g= 0., tmpSum_r = 0.;
// Top row: don't sum the very last element
int startLMJ = 0;
int endLMJ = ksize.width - 1;
int howManyAll = (anX *2 +1)*(ksize.width);
#if CALCVAR
for(int x = startLMJ; x< endLMJ; x++)
{
tptr = temp->ptr(startY + x) +j;
for(int y=-anX; y<=anX; y++)
{
currVal_b = tptr[cn*(y+anX)], currVal_g = tptr[cn*(y+anX)+1], currVal_r =tptr[cn*(y+anX)+2];
sumVal_b += currVal_b;
sumVal_g += currVal_g;
sumVal_r += currVal_r;
sumValSqr_b += (currVal_b *currVal_b);
sumValSqr_g += (currVal_g *currVal_g);
sumValSqr_r += (currVal_r *currVal_r);
}
}
var_b = ( (sumValSqr_b * howManyAll)- sumVal_b * sumVal_b ) / ( (float)(howManyAll*howManyAll));
var_g = ( (sumValSqr_g * howManyAll)- sumVal_g * sumVal_g ) / ( (float)(howManyAll*howManyAll));
var_r = ( (sumValSqr_r * howManyAll)- sumVal_r * sumVal_r ) / ( (float)(howManyAll*howManyAll));
#else
var_b = 900.0; var_g = 900.0;var_r = 900.0;
#endif
startLMJ = 0;
endLMJ = ksize.width;
tptr = temp->ptr(startY + (startLMJ+ endLMJ)/2) + j;
currValCenter_b =tptr[cn*anX], currValCenter_g =tptr[cn*anX+1], currValCenter_r =tptr[cn*anX+2];
for(int x = startLMJ; x< endLMJ; x++)
{
tptr = temp->ptr(startY + x) +j;
for(int y=-anX; y<=anX; y++)
{
#if FIXED_WEIGHT
weight_b = 1.0;
weight_g = 1.0;
weight_r = 1.0;
#else
currVal_b = tptr[cn*(y+anX)];currVal_g=tptr[cn*(y+anX)+1];currVal_r=tptr[cn*(y+anX)+2];
currWRTCenter_b = currVal_b - currValCenter_b;
currWRTCenter_g = currVal_g - currValCenter_g;
currWRTCenter_r = currVal_r - currValCenter_r;
float cur_spw = space_weight[x*ksize.width+y+anX];
weight_b = var_b / ( var_b + (currWRTCenter_b * currWRTCenter_b) ) * cur_spw;
weight_g = var_g / ( var_g + (currWRTCenter_g * currWRTCenter_g) ) * cur_spw;
weight_r = var_r / ( var_r + (currWRTCenter_r * currWRTCenter_r) ) * cur_spw;
#endif
tmpSum_b += ((float)tptr[cn*(y+anX)] * weight_b);
tmpSum_g += ((float)tptr[cn*(y+anX)+1] * weight_g);
tmpSum_r += ((float)tptr[cn*(y+anX)+2] * weight_r);
totalWeight_b += weight_b, totalWeight_g += weight_g, totalWeight_r += weight_r;
}
}
tmpSum_b /= totalWeight_b;
tmpSum_g /= totalWeight_g;
tmpSum_r /= totalWeight_r;
dest->at<uchar>(startY,j )= static_cast<uchar>(tmpSum_b);
dest->at<uchar>(startY,j+1)= static_cast<uchar>(tmpSum_g);
dest->at<uchar>(startY,j+2)= static_cast<uchar>(tmpSum_r);
}
}
}
}
private:
const Mat *temp;
Mat *dest;
Size ksize;
double sigma_space;
Point anchor;
vector<float> space_weight;
};
static void adaptiveBilateralFilter_8u( const Mat& src, Mat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType )
{
Size size = src.size();
CV_Assert( (src.type() == CV_8UC1 || src.type() == CV_8UC3) &&
src.type() == dst.type() && src.size() == dst.size() &&
src.data != dst.data );
Mat temp;
copyMakeBorder(src, temp, anchor.x, anchor.y, anchor.x, anchor.y, borderType);
adaptiveBilateralFilter_8u_Invoker body(dst, temp, ksize, sigmaSpace, anchor);
parallel_for_(Range(0, size.height), body, dst.total()/(double)(1<<16));
}
}
void cv::adaptiveBilateralFilter( InputArray _src, OutputArray _dst, Size ksize,
double sigmaSpace, Point anchor, int borderType )
{
Mat src = _src.getMat();
_dst.create(src.size(), src.type());
Mat dst = _dst.getMat();
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);
anchor = normalizeAnchor(anchor,ksize);
if( src.depth() == CV_8U )
adaptiveBilateralFilter_8u( src, dst, ksize, sigmaSpace, anchor, borderType );
else
CV_Error( CV_StsUnsupportedFormat,
"Adaptive Bilateral filtering is only implemented for 8u images" );
}
//////////////////////////////////////////////////////////////////////////////////////////
CV_IMPL void

View File

@ -127,7 +127,7 @@ ocl::bilateralFilter
--------------------
Returns void
.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT)
.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT)
:param src: The source image

View File

@ -520,7 +520,15 @@ namespace cv
//! bilateralFilter
// supports 8UC1 8UC4
CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT);
CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT);
//! Applies an adaptive bilateral filter to the input image
// This is not truly a bilateral filter. Instead of using user provided fixed parameters,
// the function calculates a constant at each window based on local standard deviation,
// and use this constant to do filtering.
// supports 8UC1 8UC3
CV_EXPORTS void adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT);
//! computes exponent of each matrix element (b = e**a)
// supports only CV_32FC1 type
CV_EXPORTS void exp(const oclMat &a, oclMat &b);

View File

@ -321,3 +321,82 @@ PERF_TEST_P(filter2DFixture, filter2D,
else
OCL_PERF_ELSE
}
///////////// Bilateral////////////////////////
typedef Size_MatType BilateralFixture;
PERF_TEST_P(BilateralFixture, Bilateral,
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), d = 7;
double sigmacolor = 50.0, sigmaspace = 50.0;
Mat src(srcSize, type), dst(srcSize, type);
declare.in(src, WARMUP_RNG).out(dst);
if (srcSize == OCL_SIZE_4000 && type == CV_8UC3)
declare.time(8);
if (RUN_OCL_IMPL)
{
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
OCL_TEST_CYCLE() cv::ocl::bilateralFilter(oclSrc, oclDst, d, sigmacolor, sigmaspace);
oclDst.download(dst);
SANITY_CHECK(dst);
}
else if (RUN_PLAIN_IMPL)
{
TEST_CYCLE() cv::bilateralFilter(src, dst, d, sigmacolor, sigmaspace);
SANITY_CHECK(dst);
}
else
OCL_PERF_ELSE
}
///////////// adaptiveBilateral////////////////////////
typedef Size_MatType adaptiveBilateralFixture;
PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral,
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
double sigmaspace = 10.0;
Size ksize(9,9);
Mat src(srcSize, type), dst(srcSize, type);
declare.in(src, WARMUP_RNG).out(dst);
if (srcSize == OCL_SIZE_4000)
declare.time(15);
if (RUN_OCL_IMPL)
{
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
OCL_TEST_CYCLE() cv::ocl::adaptiveBilateralFilter(oclSrc, oclDst, ksize, sigmaspace);
oclDst.download(dst);
SANITY_CHECK(dst, 1.);
}
else if (RUN_PLAIN_IMPL)
{
TEST_CYCLE() cv::adaptiveBilateralFilter(src, dst, ksize, sigmaspace);
SANITY_CHECK(dst);
}
else
OCL_PERF_ELSE
}

View File

@ -64,6 +64,7 @@ extern const char *filter_sep_row;
extern const char *filter_sep_col;
extern const char *filtering_laplacian;
extern const char *filtering_morph;
extern const char *filtering_adaptive_bilateral;
}
}
@ -1616,3 +1617,100 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
f->apply(src, dst);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Adaptive Bilateral Filter
void cv::ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, Point anchor, int borderType)
{
CV_Assert((ksize.width & 1) && (ksize.height & 1)); // ksize must be odd
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3); // source must be 8bit RGB image
if( sigmaSpace <= 0 )
sigmaSpace = 1;
Mat lut(Size(ksize.width, ksize.height), CV_32FC1);
double sigma2 = sigmaSpace * sigmaSpace;
int idx = 0;
int w = ksize.width / 2;
int h = ksize.height / 2;
for(int y=-h; y<=h; y++)
for(int x=-w; x<=w; x++)
{
lut.at<float>(idx++) = sigma2 / (sigma2 + x * x + y * y);
}
oclMat dlut(lut);
int depth = src.depth();
int cn = src.oclchannels();
normalizeAnchor(anchor, ksize);
const static String kernelName = "edgeEnhancingFilter";
dst.create(src.size(), src.type());
char btype[30];
switch(borderType)
{
case BORDER_CONSTANT:
sprintf(btype, "BORDER_CONSTANT");
break;
case BORDER_REPLICATE:
sprintf(btype, "BORDER_REPLICATE");
break;
case BORDER_REFLECT:
sprintf(btype, "BORDER_REFLECT");
break;
case BORDER_WRAP:
sprintf(btype, "BORDER_WRAP");
break;
case BORDER_REFLECT101:
sprintf(btype, "BORDER_REFLECT_101");
break;
default:
CV_Error(CV_StsBadArg, "This border type is not supported");
break;
}
//the following constants may be adjusted for performance concerns
const static size_t blockSizeX = 64, blockSizeY = 1, EXTRA = ksize.height - 1;
//Normalize the result by default
const float alpha = ksize.height * ksize.width;
const size_t gSize = blockSizeX - ksize.width / 2 * 2;
const size_t globalSizeX = (src.cols) % gSize == 0 ?
src.cols / gSize * blockSizeX :
(src.cols / gSize + 1) * blockSizeX;
const size_t rows_per_thread = 1 + EXTRA;
const size_t globalSizeY = ((src.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ?
((src.rows + rows_per_thread - 1) / rows_per_thread) :
(((src.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
size_t globalThreads[3] = { globalSizeX, globalSizeY, 1};
size_t localThreads[3] = { blockSizeX, blockSizeY, 1};
char build_options[250];
//LDATATYPESIZE is sizeof local data store. This is to exemplify effect of LDS on kernel performance
sprintf(build_options,
"-D VAR_PER_CHANNEL=1 -D CALCVAR=1 -D FIXED_WEIGHT=0 -D EXTRA=%d"
" -D THREADS=%d -D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s",
static_cast<int>(EXTRA), static_cast<int>(blockSizeX), anchor.x, anchor.y, ksize.width, ksize.height, btype);
std::vector<pair<size_t , const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), &src.data));
args.push_back(std::make_pair(sizeof(cl_mem), &dst.data));
args.push_back(std::make_pair(sizeof(cl_float), (void *)&alpha));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.offset));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholerows));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.wholecols));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.offset));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&dst.step));
args.push_back(std::make_pair(sizeof(cl_mem), &dlut.data));
int lut_step = dlut.step1();
args.push_back(std::make_pair(sizeof(cl_int), (void *)&lut_step));
openCLExecuteKernel(Context::getContext(), &filtering_adaptive_bilateral, kernelName,
globalThreads, localThreads, args, cn, depth, build_options);
}

View File

@ -0,0 +1,424 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Harris Gasparakis, harris.gasparakis@amd.com
// Xiaopeng Fu, fuxiaopeng2222@163.com
// Yao Wang, bitwangyaoyao@gmail.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifdef BORDER_REPLICATE
//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (l_edge) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (r_edge)-1 : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (t_edge) :(i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (b_edge)-1 :(addr))
#endif
#ifdef BORDER_REFLECT
//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i)-1 : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i)-1 : (i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
#endif
#ifdef BORDER_REFLECT_101
//BORDER_REFLECT_101: gfedcb|abcdefgh|gfedcba
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i) : (i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
#endif
//blur function does not support BORDER_WRAP
#ifdef BORDER_WRAP
//BORDER_WRAP: cdefgh|abcdefgh|abcdefg
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (i)+(r_edge) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (i)+(b_edge) : (i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
#endif
__kernel void
edgeEnhancingFilter_C4_D0(
__global const uchar4 * restrict src,
__global uchar4 *dst,
float alpha,
int src_offset,
int src_whole_rows,
int src_whole_cols,
int src_step,
int dst_offset,
int dst_rows,
int dst_cols,
int dst_step,
__global const float* lut,
int lut_step)
{
int col = get_local_id(0);
const int gX = get_group_id(0);
const int gY = get_group_id(1);
int src_x_off = (src_offset % src_step) >> 2;
int src_y_off = src_offset / src_step;
int dst_x_off = (dst_offset % dst_step) >> 2;
int dst_y_off = dst_offset / dst_step;
int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
int startY = (gY * (1+EXTRA)) - anY + src_y_off;
int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
int posX = dst_startX - dst_x_off + col;
int posY = (gY * (1+EXTRA)) ;
__local uchar4 data[ksY+EXTRA][THREADS];
float4 tmp_sum[1+EXTRA];
for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
{
tmp_sum[tmpint] = (float4)(0,0,0,0);
}
#ifdef BORDER_CONSTANT
bool con;
uchar4 ss;
for(int j = 0; j < ksY+EXTRA; j++)
{
con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
int cur_col = clamp(startX + col, 0, src_whole_cols);
if(con)
{
ss = src[(startY+j)*(src_step>>2) + cur_col];
}
data[j][col] = con ? ss : (uchar4)0;
}
#else
for(int j= 0; j < ksY+EXTRA; j++)
{
int selected_row;
int selected_col;
selected_row = ADDR_H(startY+j, 0, src_whole_rows);
selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
data[j][col] = src[selected_row * (src_step>>2) + selected_col];
}
#endif
barrier(CLK_LOCAL_MEM_FENCE);
float4 var[1+EXTRA];
#if VAR_PER_CHANNEL
float4 weight;
float4 totalWeight = (float4)(0,0,0,0);
#else
float weight;
float totalWeight = 0;
#endif
int4 currValCenter;
int4 currWRTCenter;
int4 sumVal = 0;
int4 sumValSqr = 0;
if(col < (THREADS-(ksX-1)))
{
int4 currVal;
int howManyAll = (2*anX+1)*(ksY);
//find variance of all data
int startLMj;
int endLMj ;
#if CALCVAR
// Top row: don't sum the very last element
for(int extraCnt = 0; extraCnt <=EXTRA; extraCnt++)
{
startLMj = extraCnt;
endLMj = ksY+extraCnt-1;
sumVal =0;
sumValSqr=0;
for(int j = startLMj; j < endLMj; j++)
{
for(int i=-anX; i<=anX; i++)
{
currVal = convert_int4(data[j][col+anX+i]) ;
sumVal += currVal;
sumValSqr += mul24(currVal, currVal);
}
}
var[extraCnt] = convert_float4( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) ;
#else
var[extraCnt] = (float4)(900.0, 900.0, 900.0, 0.0);
#endif
}
for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
{
// top row: include the very first element, even on first time
startLMj = extraCnt;
// go all the way, unless this is the last local mem chunk,
// then stay within limits - 1
endLMj = extraCnt + ksY;
// Top row: don't sum the very last element
currValCenter = convert_int4( data[ (startLMj + endLMj)/2][col+anX] );
for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
{
for(int i=-anX; i<=anX; i++)
{
#if FIXED_WEIGHT
#if VAR_PER_CHANNEL
weight.x = 1.0f;
weight.y = 1.0f;
weight.z = 1.0f;
weight.w = 1.0f;
#else
weight = 1.0f;
#endif
#else
currVal = convert_int4(data[j][col+anX+i]) ;
currWRTCenter = currVal-currValCenter;
#if VAR_PER_CHANNEL
weight = var[extraCnt] / (var[extraCnt] + convert_float4(currWRTCenter * currWRTCenter)) * (float4)(lut[lut_j*lut_step+anX+i]);
//weight.x = var[extraCnt].x / ( var[extraCnt].x + (float) mul24(currWRTCenter.x , currWRTCenter.x) ) ;
//weight.y = var[extraCnt].y / ( var[extraCnt].y + (float) mul24(currWRTCenter.y , currWRTCenter.y) ) ;
//weight.z = var[extraCnt].z / ( var[extraCnt].z + (float) mul24(currWRTCenter.z , currWRTCenter.z) ) ;
//weight.w = 0;
#else
weight = 1.0f/(1.0f+( mul24(currWRTCenter.x, currWRTCenter.x) + mul24(currWRTCenter.y, currWRTCenter.y) + mul24(currWRTCenter.z, currWRTCenter.z))/(var.x+var.y+var.z));
#endif
#endif
tmp_sum[extraCnt] += convert_float4(data[j][col+anX+i]) * weight;
totalWeight += weight;
}
}
tmp_sum[extraCnt] /= totalWeight;
if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
{
dst[(dst_startY+extraCnt) * (dst_step>>2)+ dst_startX + col] = convert_uchar4(tmp_sum[extraCnt]);
}
#if VAR_PER_CHANNEL
totalWeight = (float4)(0,0,0,0);
#else
totalWeight = 0;
#endif
}
}
}
__kernel void
edgeEnhancingFilter_C1_D0(
__global const uchar * restrict src,
__global uchar *dst,
float alpha,
int src_offset,
int src_whole_rows,
int src_whole_cols,
int src_step,
int dst_offset,
int dst_rows,
int dst_cols,
int dst_step,
__global const float * lut,
int lut_step)
{
int col = get_local_id(0);
const int gX = get_group_id(0);
const int gY = get_group_id(1);
int src_x_off = (src_offset % src_step);
int src_y_off = src_offset / src_step;
int dst_x_off = (dst_offset % dst_step);
int dst_y_off = dst_offset / dst_step;
int startX = gX * (THREADS-ksX+1) - anX + src_x_off;
int startY = (gY * (1+EXTRA)) - anY + src_y_off;
int dst_startX = gX * (THREADS-ksX+1) + dst_x_off;
int dst_startY = (gY * (1+EXTRA)) + dst_y_off;
int posX = dst_startX - dst_x_off + col;
int posY = (gY * (1+EXTRA)) ;
__local uchar data[ksY+EXTRA][THREADS];
float tmp_sum[1+EXTRA];
for(int tmpint = 0; tmpint < 1+EXTRA; tmpint++)
{
tmp_sum[tmpint] = (float)(0);
}
#ifdef BORDER_CONSTANT
bool con;
uchar ss;
for(int j = 0; j < ksY+EXTRA; j++)
{
con = (startX+col >= 0 && startX+col < src_whole_cols && startY+j >= 0 && startY+j < src_whole_rows);
int cur_col = clamp(startX + col, 0, src_whole_cols);
if(con)
{
ss = src[(startY+j)*(src_step) + cur_col];
}
data[j][col] = con ? ss : 0;
}
#else
for(int j= 0; j < ksY+EXTRA; j++)
{
int selected_row;
int selected_col;
selected_row = ADDR_H(startY+j, 0, src_whole_rows);
selected_row = ADDR_B(startY+j, src_whole_rows, selected_row);
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
data[j][col] = src[selected_row * (src_step) + selected_col];
}
#endif
barrier(CLK_LOCAL_MEM_FENCE);
float var[1+EXTRA];
float weight;
float totalWeight = 0;
int currValCenter;
int currWRTCenter;
int sumVal = 0;
int sumValSqr = 0;
if(col < (THREADS-(ksX-1)))
{
int currVal;
int howManyAll = (2*anX+1)*(ksY);
//find variance of all data
int startLMj;
int endLMj;
#if CALCVAR
// Top row: don't sum the very last element
for(int extraCnt=0; extraCnt<=EXTRA; extraCnt++)
{
startLMj = extraCnt;
endLMj = ksY+extraCnt-1;
sumVal = 0;
sumValSqr =0;
for(int j = startLMj; j < endLMj; j++)
{
for(int i=-anX; i<=anX; i++)
{
currVal = (uint)(data[j][col+anX+i]) ;
sumVal += currVal;
sumValSqr += mul24(currVal, currVal);
}
}
var[extraCnt] = (float)( ( (sumValSqr * howManyAll)- mul24(sumVal , sumVal) ) ) / ( (float)(howManyAll*howManyAll) ) ;
#else
var[extraCnt] = (float)(900.0);
#endif
}
for(int extraCnt = 0; extraCnt <= EXTRA; extraCnt++)
{
// top row: include the very first element, even on first time
startLMj = extraCnt;
// go all the way, unless this is the last local mem chunk,
// then stay within limits - 1
endLMj = extraCnt + ksY;
// Top row: don't sum the very last element
currValCenter = (int)( data[ (startLMj + endLMj)/2][col+anX] );
for(int j = startLMj, lut_j = 0; j < endLMj; j++, lut_j++)
{
for(int i=-anX; i<=anX; i++)
{
#if FIXED_WEIGHT
weight = 1.0f;
#else
currVal = (int)(data[j][col+anX+i]) ;
currWRTCenter = currVal-currValCenter;
weight = var[extraCnt] / (var[extraCnt] + (float)mul24(currWRTCenter,currWRTCenter)) * lut[lut_j*lut_step+anX+i] ;
#endif
tmp_sum[extraCnt] += (float)(data[j][col+anX+i] * weight);
totalWeight += weight;
}
}
tmp_sum[extraCnt] /= totalWeight;
if(posX >= 0 && posX < dst_cols && (posY+extraCnt) >= 0 && (posY+extraCnt) < dst_rows)
{
dst[(dst_startY+extraCnt) * (dst_step)+ dst_startX + col] = (uchar)(tmp_sum[extraCnt]);
}
totalWeight = 0;
}
}
}

View File

@ -353,6 +353,69 @@ TEST_P(Filter2D, Mat)
Near(1);
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Bilateral
struct Bilateral : FilterTestBase
{
int type;
cv::Size ksize;
int bordertype;
double sigmacolor, sigmaspace;
virtual void SetUp()
{
type = GET_PARAM(0);
ksize = GET_PARAM(1);
bordertype = GET_PARAM(3);
Init(type);
cv::RNG &rng = TS::ptr()->get_rng();
sigmacolor = rng.uniform(20, 100);
sigmaspace = rng.uniform(10, 40);
}
};
TEST_P(Bilateral, Mat)
{
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
cv::bilateralFilter(mat1_roi, dst_roi, ksize.width, sigmacolor, sigmaspace, bordertype);
cv::ocl::bilateralFilter(gmat1, gdst, ksize.width, sigmacolor, sigmaspace, bordertype);
Near(1);
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// AdaptiveBilateral
struct AdaptiveBilateral : FilterTestBase
{
int type;
cv::Size ksize;
int bordertype;
Point anchor;
virtual void SetUp()
{
type = GET_PARAM(0);
ksize = GET_PARAM(1);
bordertype = GET_PARAM(3);
Init(type);
anchor = Point(-1,-1);
}
};
TEST_P(AdaptiveBilateral, Mat)
{
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
cv::adaptiveBilateralFilter(mat1_roi, dst_roi, ksize, 5, anchor, bordertype);
cv::ocl::adaptiveBilateralFilter(gmat1, gdst, ksize, 5, anchor, bordertype);
Near(1);
}
}
INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
@ -400,4 +463,17 @@ INSTANTIATE_TEST_CASE_P(Filter, Filter2D, testing::Combine(
Values(Size(0, 0)), //not use
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REFLECT101, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT)));
INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(
Values(CV_8UC1, CV_8UC3),
Values(Size(5, 5), Size(9, 9)),
Values(Size(0, 0)), //not use
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE,
(MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_WRAP, (MatType)cv::BORDER_REFLECT_101)));
INSTANTIATE_TEST_CASE_P(Filter, AdaptiveBilateral, Combine(
Values(CV_8UC1, CV_8UC3),
Values(Size(5, 5), Size(9, 9)),
Values(Size(0, 0)), //not use
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE,
(MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
#endif // HAVE_OPENCL

View File

@ -475,56 +475,6 @@ TEST_P(equalizeHist, Mat)
}
////////////////////////////////bilateralFilter////////////////////////////////////////////
struct bilateralFilter : ImgprocTestBase {};
TEST_P(bilateralFilter, Mat)
{
double sigmacolor = 50.0;
int radius = 9;
int d = 2 * radius + 1;
double sigmaspace = 20.0;
int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101};
//const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
if (mat1.depth() != CV_8U || mat1.type() != dst.type())
{
cout << "Unsupported type" << endl;
EXPECT_DOUBLE_EQ(0.0, 0.0);
}
else
{
for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++)
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE) && (mat1_roi.cols <= radius)) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius))
{
continue;
}
//if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows))
//{
// dst_roi.adjustROI(radius, radius, radius, radius);
// cldst_roi.adjustROI(radius, radius, radius, radius);
//}
//else
//{
// continue;
//}
cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
Near(1.);
}
}
}
////////////////////////////////copyMakeBorder////////////////////////////////////////////
struct CopyMakeBorder : ImgprocTestBase {};
@ -1618,21 +1568,6 @@ INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
NULL_TYPE,
Values(false))); // Values(false) is the reserved parameter
//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
// ONE_TYPE(CV_8UC1),
// NULL_TYPE,
// ONE_TYPE(CV_8UC1),
// NULL_TYPE,
// NULL_TYPE,
// Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
Values(CV_8UC1, CV_8UC3),
NULL_TYPE,
Values(CV_8UC1, CV_8UC3),
NULL_TYPE,
NULL_TYPE,
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),

View File

@ -0,0 +1,51 @@
// This sample shows the difference of adaptive bilateral filter and bilateral filter.
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/ocl/ocl.hpp"
using namespace cv;
using namespace std;
int main( int argc, const char** argv )
{
const char* keys =
"{ i | input | | specify input image }"
"{ k | ksize | 5 | specify kernel size }";
CommandLineParser cmd(argc, argv, keys);
string src_path = cmd.get<string>("i");
int ks = cmd.get<int>("k");
const char * winName[] = {"input", "adaptive bilateral CPU", "adaptive bilateral OpenCL", "bilateralFilter OpenCL"};
Mat src = imread(src_path);
Mat abFilterCPU;
if(src.empty()){
//cout << "error read image: " << src_path << endl;
return -1;
}
std::vector<ocl::Info> infos;
ocl::getDevice(infos);
ocl::oclMat dsrc(src), dABFilter, dBFilter;
Size ksize(ks, ks);
adaptiveBilateralFilter(src,abFilterCPU, ksize, 10);
ocl::adaptiveBilateralFilter(dsrc, dABFilter, ksize, 10);
ocl::bilateralFilter(dsrc, dBFilter, ks, 30, 9);
Mat abFilter = dABFilter;
Mat bFilter = dBFilter;
imshow(winName[0], src);
imshow(winName[1], abFilterCPU);
imshow(winName[2], abFilter);
imshow(winName[3], bFilter);
waitKey();
return 0;
}