Merge pull request #3814 from erikrk:denoising-16bit-master

This commit is contained in:
Vadim Pisarevsky 2015-03-24 15:23:58 +00:00
commit 5501cfd809
9 changed files with 977 additions and 318 deletions

View File

@ -442,6 +442,10 @@ template<typename _Tp> static inline _Tp saturate_cast(int v) { return _Tp(
template<typename _Tp> static inline _Tp saturate_cast(float v) { return _Tp(v); }
/** @overload */
template<typename _Tp> static inline _Tp saturate_cast(double v) { return _Tp(v); }
/** @overload */
template<typename _Tp> static inline _Tp saturate_cast(int64 v) { return _Tp(v); }
/** @overload */
template<typename _Tp> static inline _Tp saturate_cast(uint64 v) { return _Tp(v); }
//! @cond IGNORED
@ -452,6 +456,8 @@ template<> inline uchar saturate_cast<uchar>(short v) { return saturate_c
template<> inline uchar saturate_cast<uchar>(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); }
template<> inline uchar saturate_cast<uchar>(float v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
template<> inline uchar saturate_cast<uchar>(double v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
template<> inline uchar saturate_cast<uchar>(int64 v) { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
template<> inline uchar saturate_cast<uchar>(uint64 v) { return (uchar)std::min(v, (uint64)UCHAR_MAX); }
template<> inline schar saturate_cast<schar>(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); }
template<> inline schar saturate_cast<schar>(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); }
@ -460,6 +466,8 @@ template<> inline schar saturate_cast<schar>(short v) { return saturate_c
template<> inline schar saturate_cast<schar>(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); }
template<> inline schar saturate_cast<schar>(float v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
template<> inline schar saturate_cast<schar>(double v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
template<> inline schar saturate_cast<schar>(int64 v) { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
template<> inline schar saturate_cast<schar>(uint64 v) { return (schar)std::min(v, (uint64)SCHAR_MAX); }
template<> inline ushort saturate_cast<ushort>(schar v) { return (ushort)std::max((int)v, 0); }
template<> inline ushort saturate_cast<ushort>(short v) { return (ushort)std::max((int)v, 0); }
@ -467,12 +475,16 @@ template<> inline ushort saturate_cast<ushort>(int v) { return (ushort)((
template<> inline ushort saturate_cast<ushort>(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); }
template<> inline ushort saturate_cast<ushort>(float v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
template<> inline ushort saturate_cast<ushort>(double v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
template<> inline ushort saturate_cast<ushort>(int64 v) { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
template<> inline ushort saturate_cast<ushort>(uint64 v) { return (ushort)std::min(v, (uint64)USHRT_MAX); }
template<> inline short saturate_cast<short>(ushort v) { return (short)std::min((int)v, SHRT_MAX); }
template<> inline short saturate_cast<short>(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
template<> inline short saturate_cast<short>(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); }
template<> inline short saturate_cast<short>(float v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
template<> inline short saturate_cast<short>(double v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
template<> inline short saturate_cast<short>(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
template<> inline short saturate_cast<short>(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); }
template<> inline int saturate_cast<int>(float v) { return cvRound(v); }
template<> inline int saturate_cast<int>(double v) { return cvRound(v); }

View File

@ -119,7 +119,7 @@ CV_EXPORTS_W void inpaint( InputArray src, InputArray inpaintMask,
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational
optimizations. Noise expected to be a gaussian white noise
@param src Input 8-bit 1-channel, 2-channel or 3-channel image.
@param src Input 8-bit 1-channel, 2-channel, 3-channel or 4-channel image.
@param dst Output image with the same size and type as src .
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
Should be odd. Recommended value 7 pixels
@ -138,6 +138,35 @@ parameter.
CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3,
int templateWindowSize = 7, int searchWindowSize = 21);
/** @brief Perform image denoising using Non-local Means Denoising algorithm
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational
optimizations. Noise expected to be a gaussian white noise
@param src Input 8-bit or 16-bit (only with NORM_L1) 1-channel,
2-channel, 3-channel or 4-channel image.
@param dst Output image with the same size and type as src .
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
Should be odd. Recommended value 7 pixels
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
denoising time. Recommended value 21 pixels
@param h Array of parameters regulating filter strength, either one
parameter applied to all channels or one per channel in dst. Big h value
perfectly removes noise but also removes image details, smaller h
value preserves details but also preserves some noise
@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1
This function expected to be applied to grayscale images. For colored images look at
fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored
image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting
image to CIELAB colorspace and then separately denoise L and AB components with different h
parameter.
*/
CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst,
const std::vector<float>& h,
int templateWindowSize = 7, int searchWindowSize = 21,
int normType = NORM_L2);
/** @brief Modification of fastNlMeansDenoising function for colored images
@param src Input 8-bit 3-channel image.
@ -165,7 +194,35 @@ captured in small period of time. For example video. This version of the functio
images or for manual manipulation with colorspaces. For more details see
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
@param srcImgs Input 8-bit 1-channel, 2-channel or 3-channel images sequence. All images should
@param srcImgs Input 8-bit 1-channel, 2-channel, 3-channel or
4-channel images sequence. All images should have the same type and
size.
@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
@param temporalWindowSize Number of surrounding images to use for target image denoising. Should
be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to
imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise
srcImgs[imgToDenoiseIndex] image.
@param dst Output image with the same size and type as srcImgs images.
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
Should be odd. Recommended value 7 pixels
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
denoising time. Recommended value 21 pixels
@param h Parameter regulating filter strength. Bigger h value
perfectly removes noise but also removes image details, smaller h
value preserves details but also preserves some noise
*/
CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst,
int imgToDenoiseIndex, int temporalWindowSize,
float h = 3, int templateWindowSize = 7, int searchWindowSize = 21);
/** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been
captured in small period of time. For example video. This version of the function is for grayscale
images or for manual manipulation with colorspaces. For more details see
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
@param srcImgs Input 8-bit or 16-bit (only with NORM_L1) 1-channel,
2-channel, 3-channel or 4-channel images sequence. All images should
have the same type and size.
@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
@param temporalWindowSize Number of surrounding images to use for target image denoising. Should
@ -178,13 +235,17 @@ Should be odd. Recommended value 7 pixels
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
denoising time. Recommended value 21 pixels
@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly
removes noise but also removes image details, smaller h value preserves details but also preserves
some noise
@param h Array of parameters regulating filter strength, either one
parameter applied to all channels or one per channel in dst. Big h value
perfectly removes noise but also removes image details, smaller h
value preserves details but also preserves some noise
@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1
*/
CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst,
int imgToDenoiseIndex, int temporalWindowSize,
float h = 3, int templateWindowSize = 7, int searchWindowSize = 21);
const std::vector<float>& h,
int templateWindowSize = 7, int searchWindowSize = 21,
int normType = NORM_L2);
/** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences

View File

@ -45,42 +45,115 @@
#include "fast_nlmeans_multi_denoising_invoker.hpp"
#include "fast_nlmeans_denoising_opencl.hpp"
template<typename ST, typename IT, typename UIT, typename D>
static void fastNlMeansDenoising_( const Mat& src, Mat& dst, const std::vector<float>& h,
int templateWindowSize, int searchWindowSize)
{
int hn = (int)h.size();
switch (CV_MAT_CN(src.type())) {
case 1:
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<ST, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
break;
case 2:
if (hn == 1)
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
else
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
break;
case 3:
if (hn == 1)
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
else
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
break;
case 4:
if (hn == 1)
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
else
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
break;
default:
CV_Error(Error::StsBadArg,
"Unsupported number of channels! Only 1, 2, 3, and 4 are supported");
}
}
void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
int templateWindowSize, int searchWindowSize)
{
fastNlMeansDenoising(_src, _dst, std::vector<float>(1, h),
templateWindowSize, searchWindowSize);
}
void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, const std::vector<float>& h,
int templateWindowSize, int searchWindowSize, int normType)
{
int hn = (int)h.size(), type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
CV_Assert(hn == 1 || hn == cn);
Size src_size = _src.size();
CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) &&
src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes
ocl_fastNlMeansDenoising(_src, _dst, h, templateWindowSize, searchWindowSize))
ocl_fastNlMeansDenoising(_src, _dst, &h[0], hn,
templateWindowSize, searchWindowSize, normType))
Mat src = _src.getMat();
_dst.create(src_size, src.type());
Mat dst = _dst.getMat();
switch (normType) {
case NORM_L2:
#ifdef HAVE_TEGRA_OPTIMIZATION
if(tegra::useTegra() && tegra::fastNlMeansDenoising(src, dst, h, templateWindowSize, searchWindowSize))
if(hn == 1 && tegra::useTegra() &&
tegra::fastNlMeansDenoising(src, dst, h[0], templateWindowSize, searchWindowSize))
return;
#endif
switch (src.type()) {
switch (depth) {
case CV_8U:
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<uchar>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC2:
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<cv::Vec2b>(
src, dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC3:
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<cv::Vec3b>(
src, dst, templateWindowSize, searchWindowSize, h));
fastNlMeansDenoising_<uchar, int, unsigned, DistSquared>(src, dst, h,
templateWindowSize,
searchWindowSize);
break;
default:
CV_Error(Error::StsBadArg,
"Unsupported image format! Only CV_8UC1, CV_8UC2 and CV_8UC3 are supported");
"Unsupported depth! Only CV_8U is supported for NORM_L2");
}
break;
case NORM_L1:
switch (depth) {
case CV_8U:
fastNlMeansDenoising_<uchar, int, unsigned, DistAbs>(src, dst, h,
templateWindowSize,
searchWindowSize);
break;
case CV_16U:
fastNlMeansDenoising_<ushort, int64, uint64, DistAbs>(src, dst, h,
templateWindowSize,
searchWindowSize);
break;
default:
CV_Error(Error::StsBadArg,
"Unsupported depth! Only CV_8U and CV_16U are supported for NORM_L1");
}
break;
default:
CV_Error(Error::StsBadArg,
"Unsupported norm type! Only NORM_L2 and NORM_L1 are supported");
}
}
@ -92,7 +165,7 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
Size src_size = _src.size();
if (type != CV_8UC3 && type != CV_8UC4)
{
CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3!");
CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3 or CV_8UC4!");
return;
}
@ -108,8 +181,8 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
Mat src_lab;
cvtColor(src, src_lab, COLOR_LBGR2Lab);
Mat l(src_size, CV_8U);
Mat ab(src_size, CV_8UC2);
Mat l(src_size, CV_MAKE_TYPE(depth, 1));
Mat ab(src_size, CV_MAKE_TYPE(depth, 2));
Mat l_ab[] = { l, ab };
int from_to[] = { 0,0, 1,1, 2,2 };
mixChannels(&src_lab, 1, l_ab, 2, from_to, 3);
@ -157,9 +230,76 @@ static void fastNlMeansDenoisingMultiCheckPreconditions(
}
}
template<typename ST, typename IT, typename UIT, typename D>
static void fastNlMeansDenoisingMulti_( const std::vector<Mat>& srcImgs, Mat& dst,
int imgToDenoiseIndex, int temporalWindowSize,
const std::vector<float>& h,
int templateWindowSize, int searchWindowSize)
{
int hn = (int)h.size();
switch (srcImgs[0].type())
{
case CV_8U:
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<uchar, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
break;
case CV_8UC2:
if (hn == 1)
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
else
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
break;
case CV_8UC3:
if (hn == 1)
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
else
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
break;
case CV_8UC4:
if (hn == 1)
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
else
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
break;
default:
CV_Error(Error::StsBadArg,
"Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported");
}
}
void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
int imgToDenoiseIndex, int temporalWindowSize,
float h, int templateWindowSize, int searchWindowSize)
{
fastNlMeansDenoisingMulti(_srcImgs, _dst, imgToDenoiseIndex, temporalWindowSize,
std::vector<float>(1, h), templateWindowSize, searchWindowSize);
}
void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
int imgToDenoiseIndex, int temporalWindowSize,
const std::vector<float>& h,
int templateWindowSize, int searchWindowSize, int normType)
{
std::vector<Mat> srcImgs;
_srcImgs.getMatVector(srcImgs);
@ -168,32 +308,52 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
srcImgs, imgToDenoiseIndex,
temporalWindowSize, templateWindowSize, searchWindowSize);
int hn = (int)h.size();
int type = srcImgs[0].type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
CV_Assert(hn == 1 || hn == cn);
_dst.create(srcImgs[0].size(), srcImgs[0].type());
Mat dst = _dst.getMat();
switch (srcImgs[0].type())
{
switch (normType) {
case NORM_L2:
switch (depth) {
case CV_8U:
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<uchar>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC2:
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<cv::Vec2b>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
break;
case CV_8UC3:
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<cv::Vec3b>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, h));
fastNlMeansDenoisingMulti_<uchar, int, unsigned,
DistSquared>(srcImgs, dst,
imgToDenoiseIndex, temporalWindowSize,
h,
templateWindowSize, searchWindowSize);
break;
default:
CV_Error(Error::StsBadArg,
"Unsupported matrix format! Only uchar, Vec2b, Vec3b are supported");
"Unsupported depth! Only CV_8U is supported for NORM_L2");
}
break;
case NORM_L1:
switch (depth) {
case CV_8U:
fastNlMeansDenoisingMulti_<uchar, int, unsigned,
DistAbs>(srcImgs, dst,
imgToDenoiseIndex, temporalWindowSize,
h,
templateWindowSize, searchWindowSize);
break;
case CV_16U:
fastNlMeansDenoisingMulti_<ushort, int64, uint64,
DistAbs>(srcImgs, dst,
imgToDenoiseIndex, temporalWindowSize,
h,
templateWindowSize, searchWindowSize);
break;
default:
CV_Error(Error::StsBadArg,
"Unsupported depth! Only CV_8U and CV_16U are supported for NORM_L1");
}
break;
default:
CV_Error(Error::StsBadArg,
"Unsupported norm type! Only NORM_L2 and NORM_L1 are supported");
}
}
@ -212,9 +372,10 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
_dst.create(srcImgs[0].size(), srcImgs[0].type());
Mat dst = _dst.getMat();
int type = srcImgs[0].type(), depth = CV_MAT_DEPTH(type);
int src_imgs_size = static_cast<int>(srcImgs.size());
if (srcImgs[0].type() != CV_8UC3)
if (type != CV_8UC3)
{
CV_Error(Error::StsBadArg, "Type of input images should be CV_8UC3!");
return;
@ -228,9 +389,9 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
std::vector<Mat> ab(src_imgs_size);
for (int i = 0; i < src_imgs_size; i++)
{
src_lab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC3);
l[i] = Mat::zeros(srcImgs[0].size(), CV_8UC1);
ab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC2);
src_lab[i] = Mat::zeros(srcImgs[0].size(), type);
l[i] = Mat::zeros(srcImgs[0].size(), CV_MAKE_TYPE(depth, 1));
ab[i] = Mat::zeros(srcImgs[0].size(), CV_MAKE_TYPE(depth, 2));
cvtColor(srcImgs[i], src_lab[i], COLOR_LBGR2Lab);
Mat l_ab[] = { l[i], ab[i] };

View File

@ -50,13 +50,13 @@
using namespace cv;
template <typename T>
template <typename T, typename IT, typename UIT, typename D, typename WT>
struct FastNlMeansDenoisingInvoker :
public ParallelLoopBody
{
public:
FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
int template_window_size, int search_window_size, const float h);
int template_window_size, int search_window_size, const float *h);
void operator() (const Range& range) const;
@ -75,9 +75,9 @@ private:
int template_window_half_size_;
int search_window_half_size_;
int fixed_point_mult_;
typename pixelInfo<WT>::sampleType fixed_point_mult_;
int almost_template_window_size_sq_bin_shift_;
std::vector<int> almost_dist2weight_;
std::vector<WT> almost_dist2weight_;
void calcDistSumsForFirstElementInRow(
int i, Array2d<int>& dist_sums,
@ -99,15 +99,15 @@ inline int getNearestPowerOf2(int value)
return p;
}
template <class T>
FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
template <typename T, typename IT, typename UIT, typename D, typename WT>
FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansDenoisingInvoker(
const Mat& src, Mat& dst,
int template_window_size,
int search_window_size,
const float h) :
const float *h) :
src_(src), dst_(dst)
{
CV_Assert(src.channels() == sizeof(T)); //T is Vec1b or Vec2b or Vec3b
CV_Assert(src.channels() == pixelInfo<T>::channels);
template_window_half_size_ = template_window_size / 2;
search_window_half_size_ = search_window_size / 2;
@ -117,8 +117,10 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
border_size_ = search_window_half_size_ + template_window_half_size_;
copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT);
const int max_estimate_sum_value = search_window_size_ * search_window_size_ * 255;
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value;
const IT max_estimate_sum_value =
(IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
pixelInfo<WT>::sampleMax());
// precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift
@ -127,30 +129,24 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq);
double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
int max_dist = 255 * 255 * sizeof(T);
int max_dist = D::template maxDist<T>();
int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
almost_dist2weight_.resize(almost_max_dist);
const double WEIGHT_THRESHOLD = 0.001;
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
{
double dist = almost_dist * almost_dist2actual_dist_multiplier;
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
weight = 0;
almost_dist2weight_[almost_dist] = weight;
almost_dist2weight_[almost_dist] =
D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
}
CV_Assert(almost_dist2weight_[0] == fixed_point_mult_);
// additional optimization init end
if (dst_.empty())
dst_ = Mat::zeros(src_.size(), src_.type());
}
template <class T>
void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
template <typename T, typename IT, typename UIT, typename D, typename WT>
void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
{
int row_from = range.start;
int row_to = range.end - 1;
@ -215,7 +211,7 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
dist_sums_row[x] -= col_dist_sums_row[x];
int bx = start_bx + x;
col_dist_sums_row[x] = up_col_dist_sums_row[x] + calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
col_dist_sums_row[x] = up_col_dist_sums_row[x] + D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
dist_sums_row[x] += col_dist_sums_row[x];
up_col_dist_sums_row[x] = col_dist_sums_row[x];
@ -227,9 +223,11 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
}
// calc weights
int estimation[3], weights_sum = 0;
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
estimation[channel_num] = 0;
for (size_t channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
weights_sum[channel_num] = 0;
for (int y = 0; y < search_window_size_; y++)
{
@ -238,24 +236,21 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
for (int x = 0; x < search_window_size_; x++)
{
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_;
int weight = almost_dist2weight_[almostAvgDist];
weights_sum += weight;
WT weight = almost_dist2weight_[almostAvgDist];
T p = cur_row_ptr[border_size_ + search_window_x + x];
incWithWeight(estimation, weight, p);
incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
}
}
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum/2) / weights_sum;
dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation);
divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
weights_sum);
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
}
}
}
template <class T>
inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
template <typename T, typename IT, typename UIT, typename D, typename WT>
inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
int i,
Array2d<int>& dist_sums,
Array3d<int>& col_dist_sums,
@ -276,7 +271,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++)
{
int dist = calcDist<T>(extended_src_,
int dist = D::template calcDist<T>(extended_src_,
border_size_ + i + ty, border_size_ + j + tx,
border_size_ + start_y + ty, border_size_ + start_x + tx);
@ -288,8 +283,8 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
}
}
template <class T>
inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
template <typename T, typename IT, typename UIT, typename D, typename WT>
inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
int i, int j, int first_col_num,
Array2d<int>& dist_sums,
Array3d<int>& col_dist_sums,
@ -312,7 +307,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
int by = start_by + y;
int bx = start_bx + x;
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
col_dist_sums[new_last_col_num][y][x] += calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
col_dist_sums[new_last_col_num][y][x] += D::template calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x];
up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x];

View File

@ -44,118 +44,438 @@
using namespace cv;
template <typename T> static inline int calcDist(const T a, const T b);
template <> inline int calcDist(const uchar a, const uchar b)
template <typename T> struct pixelInfo_
{
return (a-b) * (a-b);
static const int channels = 1;
typedef T sampleType;
};
template <typename ET, int n> struct pixelInfo_<Vec<ET, n> >
{
static const int channels = n;
typedef ET sampleType;
};
template <typename T> struct pixelInfo: public pixelInfo_<T>
{
using typename pixelInfo_<T>::sampleType;
static inline sampleType sampleMax()
{
return std::numeric_limits<sampleType>::max();
}
template <> inline int calcDist(const Vec2b a, const Vec2b b)
static inline sampleType sampleMin()
{
return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]);
return std::numeric_limits<sampleType>::min();
}
template <> inline int calcDist(const Vec3b a, const Vec3b b)
static inline size_t sampleBytes()
{
return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]) + (a[2]-b[2])*(a[2]-b[2]);
return sizeof(sampleType);
}
template <typename T> static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
static inline size_t sampleBits()
{
return 8*sampleBytes();
}
};
class DistAbs
{
template <typename T> struct calcDist_
{
static inline int f(const T a, const T b)
{
return std::abs((int)(a-b));
}
};
template <typename ET> struct calcDist_<Vec<ET, 2> >
{
static inline int f(const Vec<ET, 2> a, const Vec<ET, 2> b)
{
return std::abs((int)(a[0]-b[0])) + std::abs((int)(a[1]-b[1]));
}
};
template <typename ET> struct calcDist_<Vec<ET, 3> >
{
static inline int f(const Vec<ET, 3> a, const Vec<ET, 3> b)
{
return
std::abs((int)(a[0]-b[0])) +
std::abs((int)(a[1]-b[1])) +
std::abs((int)(a[2]-b[2]));
}
};
template <typename ET> struct calcDist_<Vec<ET, 4> >
{
static inline int f(const Vec<ET, 4> a, const Vec<ET, 4> b)
{
return
std::abs((int)(a[0]-b[0])) +
std::abs((int)(a[1]-b[1])) +
std::abs((int)(a[2]-b[2])) +
std::abs((int)(a[3]-b[3]));
}
};
template <typename T, typename WT> struct calcWeight_
{
static inline WT f(double dist, const float *h, WT fixed_point_mult)
{
double w = std::exp(-dist*dist / (h[0]*h[0] * pixelInfo<T>::channels));
if (std::isnan(w)) w = 1.0; // Handle h = 0.0
static const double WEIGHT_THRESHOLD = 0.001;
WT weight = (WT)round(fixed_point_mult * w);
if (weight < WEIGHT_THRESHOLD * fixed_point_mult) weight = 0;
return weight;
}
};
template <typename T, typename ET, int n> struct calcWeight_<T, Vec<ET, n> >
{
static inline Vec<ET, n> f(double dist, const float *h, ET fixed_point_mult)
{
Vec<ET, n> res;
for (int i=0; i<n; i++)
res[i] = calcWeight<T, ET>(dist, &h[i], fixed_point_mult);
return res;
}
};
public:
template <typename T> static inline int calcDist(const T a, const T b)
{
return calcDist_<T>::f(a, b);
}
template <typename T>
static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
{
const T a = m.at<T>(i1, j1);
const T b = m.at<T>(i2, j2);
return calcDist<T>(a,b);
}
template <typename T> static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
template <typename T>
static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
{
return calcDist(a_down, b_down) - calcDist(a_up, b_up);
return calcDist<T>(a_down, b_down) - calcDist<T>(a_up, b_up);
};
template <typename T, typename WT>
static inline WT calcWeight(double dist, const float *h,
typename pixelInfo<WT>::sampleType fixed_point_mult)
{
return calcWeight_<T, WT>::f(dist, h, fixed_point_mult);
}
template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uchar b_down)
template <typename T>
static inline int maxDist()
{
return (int)pixelInfo<T>::sampleMax() * pixelInfo<T>::channels;
}
};
class DistSquared
{
template <typename T> struct calcDist_
{
static inline int f(const T a, const T b)
{
return (int)(a-b) * (int)(a-b);
}
};
template <typename ET> struct calcDist_<Vec<ET, 2> >
{
static inline int f(const Vec<ET, 2> a, const Vec<ET, 2> b)
{
return (int)(a[0]-b[0])*(int)(a[0]-b[0]) + (int)(a[1]-b[1])*(int)(a[1]-b[1]);
}
};
template <typename ET> struct calcDist_<Vec<ET, 3> >
{
static inline int f(const Vec<ET, 3> a, const Vec<ET, 3> b)
{
return
(int)(a[0]-b[0])*(int)(a[0]-b[0]) +
(int)(a[1]-b[1])*(int)(a[1]-b[1]) +
(int)(a[2]-b[2])*(int)(a[2]-b[2]);
}
};
template <typename ET> struct calcDist_<Vec<ET, 4> >
{
static inline int f(const Vec<ET, 4> a, const Vec<ET, 4> b)
{
return
(int)(a[0]-b[0])*(int)(a[0]-b[0]) +
(int)(a[1]-b[1])*(int)(a[1]-b[1]) +
(int)(a[2]-b[2])*(int)(a[2]-b[2]) +
(int)(a[3]-b[3])*(int)(a[3]-b[3]);
}
};
template <typename T> struct calcUpDownDist_
{
static inline int f(T a_up, T a_down, T b_up, T b_down)
{
int A = a_down - b_down;
int B = a_up - b_up;
return (A-B)*(A+B);
}
};
template <typename T> static inline void incWithWeight(int* estimation, int weight, T p);
template <> inline void incWithWeight(int* estimation, int weight, uchar p)
template <typename ET, int n> struct calcUpDownDist_<Vec<ET, n> >
{
estimation[0] += weight * p;
private:
typedef Vec<ET, n> T;
public:
static inline int f(T a_up, T a_down, T b_up, T b_down)
{
return calcDist<T>(a_down, b_down) - calcDist<T>(a_up, b_up);
}
};
template <> inline void incWithWeight(int* estimation, int weight, Vec2b p)
template <typename T, typename WT> struct calcWeight_
{
estimation[0] += weight * p[0];
estimation[1] += weight * p[1];
static inline WT f(double dist, const float *h, WT fixed_point_mult)
{
double w = std::exp(-dist / (h[0]*h[0] * pixelInfo<T>::channels));
if (std::isnan(w)) w = 1.0; // Handle h = 0.0
static const double WEIGHT_THRESHOLD = 0.001;
WT weight = (WT)round(fixed_point_mult * w);
if (weight < WEIGHT_THRESHOLD * fixed_point_mult) weight = 0;
return weight;
}
};
template <> inline void incWithWeight(int* estimation, int weight, Vec3b p)
template <typename T, typename ET, int n> struct calcWeight_<T, Vec<ET, n> >
{
estimation[0] += weight * p[0];
estimation[1] += weight * p[1];
estimation[2] += weight * p[2];
}
template <> inline void incWithWeight(int* estimation, int weight, int p)
static inline Vec<ET, n> f(double dist, const float *h, ET fixed_point_mult)
{
estimation[0] += weight * p;
}
template <> inline void incWithWeight(int* estimation, int weight, Vec2i p)
{
estimation[0] += weight * p[0];
estimation[1] += weight * p[1];
}
template <> inline void incWithWeight(int* estimation, int weight, Vec3i p)
{
estimation[0] += weight * p[0];
estimation[1] += weight * p[1];
estimation[2] += weight * p[2];
}
template <typename T> static inline T saturateCastFromArray(int* estimation);
template <> inline uchar saturateCastFromArray(int* estimation)
{
return saturate_cast<uchar>(estimation[0]);
}
template <> inline Vec2b saturateCastFromArray(int* estimation)
{
Vec2b res;
res[0] = saturate_cast<uchar>(estimation[0]);
res[1] = saturate_cast<uchar>(estimation[1]);
Vec<ET, n> res;
for (int i=0; i<n; i++)
res[i] = calcWeight<T, ET>(dist, &h[i], fixed_point_mult);
return res;
}
};
template <> inline Vec3b saturateCastFromArray(int* estimation)
public:
template <typename T> static inline int calcDist(const T a, const T b)
{
Vec3b res;
res[0] = saturate_cast<uchar>(estimation[0]);
res[1] = saturate_cast<uchar>(estimation[1]);
res[2] = saturate_cast<uchar>(estimation[2]);
return calcDist_<T>::f(a, b);
}
template <typename T>
static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
{
const T a = m.at<T>(i1, j1);
const T b = m.at<T>(i2, j2);
return calcDist<T>(a,b);
}
template <typename T>
static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
{
return calcUpDownDist_<T>::f(a_up, a_down, b_up, b_down);
};
template <typename T, typename WT>
static inline WT calcWeight(double dist, const float *h,
typename pixelInfo<WT>::sampleType fixed_point_mult)
{
return calcWeight_<T, WT>::f(dist, h, fixed_point_mult);
}
template <typename T>
static inline int maxDist()
{
return (int)pixelInfo<T>::sampleMax() * (int)pixelInfo<T>::sampleMax() *
pixelInfo<T>::channels;
}
};
template <typename T, typename IT, typename WT> struct incWithWeight_
{
static inline void f(IT* estimation, IT* weights_sum, WT weight, T p)
{
estimation[0] += (IT)weight * p;
weights_sum[0] += (IT)weight;
}
};
template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 2>, IT, WT>
{
static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 2> p)
{
estimation[0] += (IT)weight * p[0];
estimation[1] += (IT)weight * p[1];
weights_sum[0] += (IT)weight;
}
};
template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 3>, IT, WT>
{
static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 3> p)
{
estimation[0] += (IT)weight * p[0];
estimation[1] += (IT)weight * p[1];
estimation[2] += (IT)weight * p[2];
weights_sum[0] += (IT)weight;
}
};
template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 4>, IT, WT>
{
static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 4> p)
{
estimation[0] += (IT)weight * p[0];
estimation[1] += (IT)weight * p[1];
estimation[2] += (IT)weight * p[2];
estimation[3] += (IT)weight * p[3];
weights_sum[0] += (IT)weight;
}
};
template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 2>, IT, Vec<EW, 2> >
{
static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 2> weight, Vec<ET, 2> p)
{
estimation[0] += (IT)weight[0] * p[0];
estimation[1] += (IT)weight[1] * p[1];
weights_sum[0] += (IT)weight[0];
weights_sum[1] += (IT)weight[1];
}
};
template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 3>, IT, Vec<EW, 3> >
{
static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 3> weight, Vec<ET, 3> p)
{
estimation[0] += (IT)weight[0] * p[0];
estimation[1] += (IT)weight[1] * p[1];
estimation[2] += (IT)weight[2] * p[2];
weights_sum[0] += (IT)weight[0];
weights_sum[1] += (IT)weight[1];
weights_sum[2] += (IT)weight[2];
}
};
template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 4>, IT, Vec<EW, 4> >
{
static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 4> weight, Vec<ET, 4> p)
{
estimation[0] += (IT)weight[0] * p[0];
estimation[1] += (IT)weight[1] * p[1];
estimation[2] += (IT)weight[2] * p[2];
estimation[3] += (IT)weight[3] * p[3];
weights_sum[0] += (IT)weight[0];
weights_sum[1] += (IT)weight[1];
weights_sum[2] += (IT)weight[2];
weights_sum[3] += (IT)weight[3];
}
};
template <typename T, typename IT, typename WT>
static inline void incWithWeight(IT* estimation, IT* weights_sum, WT weight, T p)
{
return incWithWeight_<T, IT, WT>::f(estimation, weights_sum, weight, p);
}
template <typename IT, typename UIT, int nc, int nw> struct divByWeightsSum_
{
static inline void f(IT* estimation, IT* weights_sum);
};
template <typename IT, typename UIT> struct divByWeightsSum_<IT, UIT, 1, 1>
{
static inline void f(IT* estimation, IT* weights_sum)
{
estimation[0] = (static_cast<UIT>(estimation[0]) + weights_sum[0]/2) / weights_sum[0];
}
};
template <typename IT, typename UIT, int n> struct divByWeightsSum_<IT, UIT, n, 1>
{
static inline void f(IT* estimation, IT* weights_sum)
{
for (size_t i = 0; i < n; i++)
estimation[i] = (static_cast<UIT>(estimation[i]) + weights_sum[0]/2) / weights_sum[0];
}
};
template <typename IT, typename UIT, int n> struct divByWeightsSum_<IT, UIT, n, n>
{
static inline void f(IT* estimation, IT* weights_sum)
{
for (size_t i = 0; i < n; i++)
estimation[i] = (static_cast<UIT>(estimation[i]) + weights_sum[i]/2) / weights_sum[i];
}
};
template <typename IT, typename UIT, int nc, int nw>
static inline void divByWeightsSum(IT* estimation, IT* weights_sum)
{
return divByWeightsSum_<IT, UIT, nc, nw>::f(estimation, weights_sum);
}
template <typename T, typename IT> struct saturateCastFromArray_
{
static inline T f(IT* estimation)
{
return saturate_cast<T>(estimation[0]);
}
};
template <typename ET, typename IT> struct saturateCastFromArray_<Vec<ET, 2>, IT>
{
static inline Vec<ET, 2> f(IT* estimation)
{
Vec<ET, 2> res;
res[0] = saturate_cast<ET>(estimation[0]);
res[1] = saturate_cast<ET>(estimation[1]);
return res;
}
};
template <> inline int saturateCastFromArray(int* estimation)
template <typename ET, typename IT> struct saturateCastFromArray_<Vec<ET, 3>, IT>
{
return estimation[0];
static inline Vec<ET, 3> f(IT* estimation)
{
Vec<ET, 3> res;
res[0] = saturate_cast<ET>(estimation[0]);
res[1] = saturate_cast<ET>(estimation[1]);
res[2] = saturate_cast<ET>(estimation[2]);
return res;
}
};
template <> inline Vec2i saturateCastFromArray(int* estimation)
template <typename ET, typename IT> struct saturateCastFromArray_<Vec<ET, 4>, IT>
{
estimation[1] = 0;
return Vec2i(estimation);
static inline Vec<ET, 4> f(IT* estimation)
{
Vec<ET, 4> res;
res[0] = saturate_cast<ET>(estimation[0]);
res[1] = saturate_cast<ET>(estimation[1]);
res[2] = saturate_cast<ET>(estimation[2]);
res[3] = saturate_cast<ET>(estimation[3]);
return res;
}
};
template <> inline Vec3i saturateCastFromArray(int* estimation)
template <typename T, typename IT> static inline T saturateCastFromArray(IT* estimation)
{
return Vec3i(estimation);
return saturateCastFromArray_<T, IT>::f(estimation);
}
#endif

View File

@ -28,12 +28,16 @@ static int divUp(int a, int b)
return (a + b - 1) / b;
}
template <typename FT>
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn,
template <typename FT, typename ST, typename WT>
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight,
int searchWindowSize, int templateWindowSize,
const FT *h, int hn, int cn, int normType,
int & almostTemplateWindowSizeSqBinShift)
{
const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255;
int fixedPointMult = std::numeric_limits<int>::max() / maxEstimateSumValue;
const WT maxEstimateSumValue = searchWindowSize * searchWindowSize *
std::numeric_limits<ST>::max();
int fixedPointMult = (int)std::min<WT>(std::numeric_limits<WT>::max() / maxEstimateSumValue,
std::numeric_limits<int>::max());
int depth = DataType<FT>::depth;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
@ -48,33 +52,44 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
const FT WEIGHT_THRESHOLD = 1e-3f;
int maxDist = 255 * 255 * cn;
int maxDist = normType == NORM_L1 ? std::numeric_limits<ST>::max() * cn :
std::numeric_limits<ST>::max() * std::numeric_limits<ST>::max() * cn;
int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
FT den = 1.0f / (h * h * cn);
FT den[4];
CV_Assert(hn > 0 && hn <= 4);
for (int i=0; i<hn; i++)
den[i] = 1.0f / (h[i] * h[i] * cn);
almostDist2Weight.create(1, almostMaxDist, CV_32SC1);
almostDist2Weight.create(1, almostMaxDist, CV_32SC(hn == 3 ? 4 : hn));
char buf[40];
ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
format("-D OP_CALC_WEIGHTS -D FT=%s -D w_t=%s"
" -D wlut_t=%s -D convert_wlut_t=%s%s%s",
ocl::typeToStr(depth), ocl::typeToStr(CV_MAKE_TYPE(depth, hn)),
ocl::typeToStr(CV_32SC(hn)), ocl::convertTypeStr(depth, CV_32S, hn, buf),
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
normType == NORM_L1 ? " -D ABS" : ""));
if (k.empty())
return false;
k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist,
almostDist2ActualDistMultiplier, fixedPointMult, den, WEIGHT_THRESHOLD);
almostDist2ActualDistMultiplier, fixedPointMult,
ocl::KernelArg::Constant(den, (hn == 3 ? 4 : hn)*sizeof(FT)), WEIGHT_THRESHOLD);
size_t globalsize[1] = { almostMaxDist };
return k.run(1, globalsize, NULL, false);
}
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int templateWindowSize, int searchWindowSize)
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const float *h, int hn,
int templateWindowSize, int searchWindowSize, int normType)
{
int type = _src.type(), cn = CV_MAT_CN(type);
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT;
Size size = _src.size();
if ( type != CV_8UC1 && type != CV_8UC2 && type != CV_8UC4 )
if (cn < 1 || cn > 4 || ((normType != NORM_L2 || depth != CV_8U) &&
(normType != NORM_L1 || (depth != CV_8U && depth != CV_16U))))
return false;
int templateWindowHalfWize = templateWindowSize / 2;
@ -84,33 +99,68 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS);
int almostTemplateWindowSizeSqBinShift = -1;
char cvt[2][40];
char buf[4][40];
String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D pixel_t=%s -D int_t=%s -D wlut_t=%s"
" -D weight_t=%s -D convert_weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
" -D convert_int_t=%s -D cn=%d -D convert_uchar_t=%s",
templateWindowSize, searchWindowSize, ocl::typeToStr(type),
ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, ctaSize,
templateWindowHalfWize, searchWindowHalfSize,
ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), cn,
ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1]));
" -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s",
templateWindowSize, searchWindowSize,
ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)),
ocl::typeToStr(CV_32SC(hn)),
depth == CV_8U ? ocl::typeToStr(CV_32SC(hn)) :
format("long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
depth == CV_8U ? ocl::convertTypeStr(CV_32S, CV_32S, hn, buf[0]) :
format("convert_long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) :
format("long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
depth == CV_8U ? ocl::convertTypeStr(depth, CV_32S, cn, buf[1]) :
format("convert_long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
BLOCK_COLS, BLOCK_ROWS,
ctaSize, templateWindowHalfWize, searchWindowHalfSize,
ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn,
(depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn),
ocl::convertTypeStr(CV_32S, depth, cn, buf[3]),
normType == NORM_L1 ? " -D ABS" : "");
ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
if (k.empty())
return false;
UMat almostDist2Weight;
if (!ocl_calcAlmostDist2Weight<float>(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn,
almostTemplateWindowSizeSqBinShift))
if ((depth == CV_8U &&
!ocl_calcAlmostDist2Weight<float, uchar, int>(almostDist2Weight,
searchWindowSize, templateWindowSize,
h, hn, cn, normType,
almostTemplateWindowSizeSqBinShift)) ||
(depth == CV_16U &&
!ocl_calcAlmostDist2Weight<float, ushort, int64>(almostDist2Weight,
searchWindowSize, templateWindowSize,
h, hn, cn, normType,
almostTemplateWindowSizeSqBinShift)))
return false;
CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);
UMat srcex;
int borderSize = searchWindowHalfSize + templateWindowHalfWize;
if (cn == 3) {
srcex.create(size.height + 2*borderSize, size.width + 2*borderSize, CV_MAKE_TYPE(depth, 4));
UMat src(srcex, Rect(borderSize, borderSize, size.width, size.height));
int from_to[] = { 0,0, 1,1, 2,2 };
mixChannels(std::vector<UMat>(1, _src.getUMat()), std::vector<UMat>(1, src), from_to, 3);
copyMakeBorder(src, srcex, borderSize, borderSize, borderSize, borderSize,
BORDER_DEFAULT|BORDER_ISOLATED); // create borders in place
}
else
copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);
_dst.create(size, type);
UMat dst = _dst.getUMat();
UMat dst;
if (cn == 3)
dst.create(size, CV_MAKE_TYPE(depth, 4));
else
dst = _dst.getUMat();
int searchWindowSizeSq = searchWindowSize * searchWindowSize;
Size upColSumSize(size.width, searchWindowSizeSq * nblocksy);
@ -123,7 +173,14 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift);
size_t globalsize[2] = { nblocksx * ctaSize, nblocksy }, localsize[2] = { ctaSize, 1 };
return k.run(2, globalsize, localsize, false);
if (!k.run(2, globalsize, localsize, false)) return false;
if (cn == 3) {
int from_to[] = { 0,0, 1,1, 2,2 };
mixChannels(std::vector<UMat>(1, dst), std::vector<UMat>(1, _dst.getUMat()), from_to, 3);
}
return true;
}
static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,

View File

@ -50,14 +50,14 @@
using namespace cv;
template <typename T>
template <typename T, typename IT, typename UIT, typename D, typename WT>
struct FastNlMeansMultiDenoisingInvoker :
ParallelLoopBody
{
public:
FastNlMeansMultiDenoisingInvoker(const std::vector<Mat>& srcImgs, int imgToDenoiseIndex,
int temporalWindowSize, Mat& dst, int template_window_size,
int search_window_size, const float h);
int search_window_size, const float *h);
void operator() (const Range& range) const;
@ -81,9 +81,9 @@ private:
int search_window_half_size_;
int temporal_window_half_size_;
int fixed_point_mult_;
typename pixelInfo<WT>::sampleType fixed_point_mult_;
int almost_template_window_size_sq_bin_shift;
std::vector<int> almost_dist2weight;
std::vector<WT> almost_dist2weight;
void calcDistSumsForFirstElementInRow(int i, Array3d<int>& dist_sums,
Array4d<int>& col_dist_sums,
@ -94,19 +94,19 @@ private:
Array4d<int>& up_col_dist_sums) const;
};
template <class T>
FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
template <typename T, typename IT, typename UIT, typename D, typename WT>
FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansMultiDenoisingInvoker(
const std::vector<Mat>& srcImgs,
int imgToDenoiseIndex,
int temporalWindowSize,
cv::Mat& dst,
int template_window_size,
int search_window_size,
const float h) :
const float *h) :
dst_(dst), extended_srcs_(srcImgs.size())
{
CV_Assert(srcImgs.size() > 0);
CV_Assert(srcImgs[0].channels() == sizeof(T));
CV_Assert(srcImgs[0].channels() == pixelInfo<T>::channels);
rows_ = srcImgs[0].rows;
cols_ = srcImgs[0].cols;
@ -125,8 +125,10 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT);
main_extended_src_ = extended_srcs_[temporal_window_half_size_];
const int max_estimate_sum_value = temporal_window_size_ * search_window_size_ * search_window_size_ * 255;
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value;
const IT max_estimate_sum_value =
(IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
pixelInfo<WT>::sampleMax());
// precalc weight for every possible l2 dist between blocks
// additional optimization of precalced weights to replace division(averaging) by binary shift
@ -138,30 +140,24 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
int max_dist = 255 * 255 * sizeof(T);
int max_dist = D::template maxDist<T>();
int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
almost_dist2weight.resize(almost_max_dist);
const double WEIGHT_THRESHOLD = 0.001;
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
{
double dist = almost_dist * almost_dist2actual_dist_multiplier;
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
weight = 0;
almost_dist2weight[almost_dist] = weight;
almost_dist2weight[almost_dist] =
D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
}
CV_Assert(almost_dist2weight[0] == fixed_point_mult_);
// additional optimization init end
if (dst_.empty())
dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type());
}
template <class T>
void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
template <typename T, typename IT, typename UIT, typename D, typename WT>
void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
{
int row_from = range.start;
int row_to = range.end - 1;
@ -234,7 +230,7 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
dist_sums_row[x] -= col_dist_sums_row[x];
col_dist_sums_row[x] = up_col_dist_sums_row[x] +
calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]);
D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]);
dist_sums_row[x] += col_dist_sums_row[x];
up_col_dist_sums_row[x] = col_dist_sums_row[x];
@ -247,11 +243,11 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
}
// calc weights
int weights_sum = 0;
int estimation[3];
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
estimation[channel_num] = 0;
for (size_t channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
weights_sum[channel_num] = 0;
for (int d = 0; d < temporal_window_size_; d++)
{
@ -266,26 +262,22 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
{
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift;
int weight = almost_dist2weight[almostAvgDist];
weights_sum += weight;
WT weight = almost_dist2weight[almostAvgDist];
T p = cur_row_ptr[border_size_ + search_window_x + x];
incWithWeight(estimation, weight, p);
incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
}
}
}
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum / 2) / weights_sum;
dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation);
divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
weights_sum);
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
}
}
}
template <class T>
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
template <typename T, typename IT, typename UIT, typename D, typename WT>
inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
int i, Array3d<int>& dist_sums, Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
{
int j = 0;
@ -310,7 +302,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
{
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
{
int dist = calcDist<T>(
int dist = D::template calcDist<T>(
main_extended_src_.at<T>(border_size_ + i + ty, border_size_ + j + tx),
cur_extended_src.at<T>(border_size_ + start_y + ty, border_size_ + start_x + tx));
@ -325,8 +317,8 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
}
}
template <class T>
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
template <typename T, typename IT, typename UIT, typename D, typename WT>
inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
int i, int j, int first_col_num, Array3d<int>& dist_sums,
Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
{
@ -353,7 +345,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo
int* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x];
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
{
*col_dist_sums_ptr += calcDist<T>(
*col_dist_sums_ptr += D::template calcDist<T>(
main_extended_src_.at<T>(ay + ty, ax),
cur_extended_src.at<T>(by + ty, bx));
}

View File

@ -20,21 +20,23 @@
#ifdef OP_CALC_WEIGHTS
__kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almostMaxDist,
__kernel void calcAlmostDist2Weight(__global wlut_t * almostDist2Weight, int almostMaxDist,
FT almostDist2ActualDistMultiplier, int fixedPointMult,
FT den, FT WEIGHT_THRESHOLD)
w_t den, FT WEIGHT_THRESHOLD)
{
int almostDist = get_global_id(0);
if (almostDist < almostMaxDist)
{
FT dist = almostDist * almostDist2ActualDistMultiplier;
int weight = convert_int_sat_rte(fixedPointMult * exp(-dist * den));
if (weight < WEIGHT_THRESHOLD * fixedPointMult)
weight = 0;
almostDist2Weight[almostDist] = weight;
#ifdef ABS
w_t w = exp((w_t)(-dist*dist) * den);
#else
w_t w = exp((w_t)(-dist) * den);
#endif
wlut_t weight = convert_wlut_t(fixedPointMult * (isnan(w) ? (w_t)1.0 : w));
almostDist2Weight[almostDist] =
weight < (wlut_t)(WEIGHT_THRESHOLD * fixedPointMult) ? (wlut_t)0 : weight;
}
}
@ -44,21 +46,35 @@ __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almost
#define SEARCH_SIZE_SQ (SEARCH_SIZE * SEARCH_SIZE)
inline int calcDist(uchar_t a, uchar_t b)
inline int calcDist(pixel_t a, pixel_t b)
{
#ifdef ABS
int_t retval = convert_int_t(abs_diff(a, b));
#else
int_t diff = convert_int_t(a) - convert_int_t(b);
int_t retval = diff * diff;
#endif
#if cn == 1
return retval;
#elif cn == 2
return retval.x + retval.y;
#elif cn == 3
return retval.x + retval.y + retval.z;
#elif cn == 4
return retval.x + retval.y + retval.z + retval.w;
#else
#error "cn should be either 1 or 2"
#error "cn should be either 1, 2, 3 or 4"
#endif
}
inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_value, uchar_t up_value_t)
#ifdef ABS
inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t)
{
return calcDist(down_value, down_value_t) - calcDist(up_value, up_value_t);
}
#else
inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t)
{
int_t A = convert_int_t(down_value) - convert_int_t(down_value_t);
int_t B = convert_int_t(up_value) - convert_int_t(up_value_t);
@ -68,10 +84,15 @@ inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_v
return retval;
#elif cn == 2
return retval.x + retval.y;
#elif cn == 3
return retval.x + retval.y + retval.z;
#elif cn == 4
return retval.x + retval.y + retval.z + retval.w;
#else
#error "cn should be either 1 or 2"
#error "cn should be either 1, 2, 3 or 4"
#endif
}
#endif
#define COND if (x == 0 && y == 0)
@ -87,9 +108,9 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
{
int dist = 0, value;
__global const uchar_t * src_template = (__global const uchar_t *)(src +
mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset)));
__global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset)));
__global const pixel_t * src_template = (__global const pixel_t *)(src +
mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset)));
__global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset)));
__global int * col_dists_current = col_dists + i * TEMPLATE_SIZE;
#pragma unroll
@ -107,8 +128,8 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
dist += value;
}
src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step);
src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step);
src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);
src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);
}
#pragma unroll
@ -130,9 +151,9 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int
for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
{
__global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset)));
__global const uchar_t * src_template = (__global const uchar_t *)(src +
mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset)));
__global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset)));
__global const pixel_t * src_template = (__global const pixel_t *)(src +
mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset)));
__global int * col_dists_current = col_dists + TEMPLATE_SIZE * i;
int col_dist = 0;
@ -142,8 +163,8 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int
{
col_dist += calcDist(src_current[0], src_template[0]);
src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step);
src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step);
src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);
src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);
}
dists[i] += col_dist - col_dists_current[first];
@ -160,8 +181,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
int sy_up = y - TEMPLATE_SIZE2 - 1;
int sy_down = y + TEMPLATE_SIZE2;
uchar_t up_value = *(__global const uchar_t *)(src + mad24(sy_up, src_step, mad24(cn, sx, src_offset)));
uchar_t down_value = *(__global const uchar_t *)(src + mad24(sy_down, src_step, mad24(cn, sx, src_offset)));
pixel_t up_value = *(__global const pixel_t *)(src + mad24(sy_up, src_step, mad24(psz, sx, src_offset)));
pixel_t down_value = *(__global const pixel_t *)(src + mad24(sy_down, src_step, mad24(psz, sx, src_offset)));
sx -= SEARCH_SIZE2;
sy_up -= SEARCH_SIZE2;
@ -171,8 +192,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
{
int wx = i % SEARCH_SIZE, wy = i / SEARCH_SIZE;
uchar_t up_value_t = *(__global const uchar_t *)(src + mad24(sy_up + wy, src_step, mad24(cn, sx + wx, src_offset)));
uchar_t down_value_t = *(__global const uchar_t *)(src + mad24(sy_down + wy, src_step, mad24(cn, sx + wx, src_offset)));
pixel_t up_value_t = *(__global const pixel_t *)(src + mad24(sy_up + wy, src_step, mad24(psz, sx + wx, src_offset)));
pixel_t down_value_t = *(__global const pixel_t *)(src + mad24(sy_down + wy, src_step, mad24(psz, sx + wx, src_offset)));
__global int * col_dists_current = col_dists + mad24(i, TEMPLATE_SIZE, first);
__global int * up_col_dists_current = up_col_dists + mad24(x0, SEARCH_SIZE_SQ, i);
@ -186,24 +207,25 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
}
inline void convolveWindow(__global const uchar * src, int src_step, int src_offset,
__local int * dists, __global const int * almostDist2Weight,
__local int * dists, __global const wlut_t * almostDist2Weight,
__global uchar * dst, int dst_step, int dst_offset,
int y, int x, int id, __local int * weights_local,
__local int_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift)
int y, int x, int id, __local weight_t * weights_local,
__local sum_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift)
{
int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2, weights = 0;
int_t weighted_sum = (int_t)(0);
int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2;
weight_t weights = (weight_t)0;
sum_t weighted_sum = (sum_t)0;
for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
{
int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, cn, src_offset));
int_t src_value = convert_int_t(*(__global const uchar_t *)(src + src_index));
int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, psz, src_offset));
sum_t src_value = convert_sum_t(*(__global const pixel_t *)(src + src_index));
int almostAvgDist = dists[i] >> almostTemplateWindowSizeSqBinShift;
int weight = almostDist2Weight[almostAvgDist];
weight_t weight = convert_weight_t(almostDist2Weight[almostAvgDist]);
weights += weight;
weighted_sum += (int_t)(weight) * src_value;
weighted_sum += (sum_t)weight * src_value;
}
weights_local[id] = weights;
@ -223,26 +245,27 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
if (id == 0)
{
int dst_index = mad24(y, dst_step, mad24(cn, x, dst_offset));
int_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] +
int dst_index = mad24(y, dst_step, mad24(psz, x, dst_offset));
sum_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] +
weighted_sum_local[2] + weighted_sum_local[3];
int weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3];
weight_t weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3];
*(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local_0 / (int_t)(weights_local_0));
*(__global pixel_t *)(dst + dst_index) = convert_pixel_t(weighted_sum_local_0 / (sum_t)weights_local_0);
}
}
__kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
__global const int * almostDist2Weight, __global uchar * buffer,
__global const wlut_t * almostDist2Weight, __global uchar * buffer,
int almostTemplateWindowSizeSqBinShift)
{
int block_x = get_group_id(0), nblocks_x = get_num_groups(0);
int block_y = get_group_id(1);
int id = get_local_id(0), first;
__local int dists[SEARCH_SIZE_SQ], weights[CTA_SIZE];
__local int_t weighted_sum[CTA_SIZE];
__local int dists[SEARCH_SIZE_SQ];
__local weight_t weights[CTA_SIZE];
__local sum_t weighted_sum[CTA_SIZE];
int x0 = block_x * BLOCK_COLS, x1 = min(x0 + BLOCK_COLS, dst_cols);
int y0 = block_y * BLOCK_ROWS, y1 = min(y0 + BLOCK_ROWS, dst_rows);

View File

@ -13,11 +13,11 @@
namespace cvtest {
namespace ocl {
PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool)
PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, int, bool, bool)
{
int cn, templateWindowSize, searchWindowSize;
float h;
bool use_roi;
int cn, normType, templateWindowSize, searchWindowSize;
std::vector<float> h;
bool use_roi, use_image;
TEST_DECLARE_INPUT_PARAMETER(src);
TEST_DECLARE_OUTPUT_PARAMETER(dst);
@ -25,29 +25,46 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool)
virtual void SetUp()
{
cn = GET_PARAM(0);
use_roi = GET_PARAM(1);
normType = GET_PARAM(1);
use_roi = GET_PARAM(2);
use_image = GET_PARAM(3);
templateWindowSize = 7;
searchWindowSize = 21;
h = 3.0f;
h.resize(cn);
for (int i=0; i<cn; i++)
h[i] = 3.0f + 0.5f*i;
}
virtual void generateTestData()
{
const int type = CV_8UC(cn);
Mat image;
if (cn == 1)
{
image = readImage("denoising/lena_noised_gaussian_sigma=10.png", IMREAD_GRAYSCALE);
if (use_image) {
image = readImage("denoising/lena_noised_gaussian_sigma=10.png",
cn == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
ASSERT_FALSE(image.empty());
}
const int type = CV_8UC(cn);
Size roiSize = cn == 1 ? image.size() : randomSize(1, MAX_VALUE);
Size roiSize = use_image ? image.size() : randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, 0, 255);
if (cn == 1)
image.copyTo(src_roi);
if (use_image) {
ASSERT_TRUE(cn > 0 && cn <= 4);
if (cn == 2) {
int from_to[] = { 0,0, 1,1 };
src_roi.create(roiSize, type);
mixChannels(&image, 1, &src_roi, 1, from_to, 2);
}
else if (cn == 4) {
int from_to[] = { 0,0, 1,1, 2,2, 1,3};
src_roi.create(roiSize, type);
mixChannels(&image, 1, &src_roi, 1, from_to, 4);
}
else image.copyTo(src_roi);
}
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 0, 255);
@ -65,8 +82,23 @@ OCL_TEST_P(FastNlMeansDenoising, Mat)
{
generateTestData();
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize));
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize));
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, std::vector<float>(1, h[0]), templateWindowSize, searchWindowSize, normType));
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, std::vector<float>(1, h[0]), templateWindowSize, searchWindowSize, normType));
OCL_EXPECT_MATS_NEAR(dst, 1);
}
}
typedef FastNlMeansDenoisingTestBase FastNlMeansDenoising_hsep;
OCL_TEST_P(FastNlMeansDenoising_hsep, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize, normType));
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize, normType));
OCL_EXPECT_MATS_NEAR(dst, 1);
}
@ -80,15 +112,21 @@ OCL_TEST_P(FastNlMeansDenoisingColored, Mat)
{
generateTestData();
OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h, h, templateWindowSize, searchWindowSize));
OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h, h, templateWindowSize, searchWindowSize));
OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h[0], h[0], templateWindowSize, searchWindowSize));
OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h[0], h[0], templateWindowSize, searchWindowSize));
OCL_EXPECT_MATS_NEAR(dst, 1);
}
}
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, Combine(Values(3, 4), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising,
Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1),
Bool(), Values(true)));
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising_hsep,
Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1),
Bool(), Values(true)));
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored,
Combine(Values(3, 4), Values((int)NORM_L2), Bool(), Values(false)));
} } // namespace cvtest::ocl