mirror of
https://github.com/opencv/opencv.git
synced 2024-11-25 19:50:38 +08:00
Merge pull request #3814 from erikrk:denoising-16bit-master
This commit is contained in:
commit
5501cfd809
@ -442,6 +442,10 @@ template<typename _Tp> static inline _Tp saturate_cast(int v) { return _Tp(
|
||||
template<typename _Tp> static inline _Tp saturate_cast(float v) { return _Tp(v); }
|
||||
/** @overload */
|
||||
template<typename _Tp> static inline _Tp saturate_cast(double v) { return _Tp(v); }
|
||||
/** @overload */
|
||||
template<typename _Tp> static inline _Tp saturate_cast(int64 v) { return _Tp(v); }
|
||||
/** @overload */
|
||||
template<typename _Tp> static inline _Tp saturate_cast(uint64 v) { return _Tp(v); }
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
@ -452,6 +456,8 @@ template<> inline uchar saturate_cast<uchar>(short v) { return saturate_c
|
||||
template<> inline uchar saturate_cast<uchar>(unsigned v) { return (uchar)std::min(v, (unsigned)UCHAR_MAX); }
|
||||
template<> inline uchar saturate_cast<uchar>(float v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
|
||||
template<> inline uchar saturate_cast<uchar>(double v) { int iv = cvRound(v); return saturate_cast<uchar>(iv); }
|
||||
template<> inline uchar saturate_cast<uchar>(int64 v) { return (uchar)((uint64)v <= (uint64)UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
|
||||
template<> inline uchar saturate_cast<uchar>(uint64 v) { return (uchar)std::min(v, (uint64)UCHAR_MAX); }
|
||||
|
||||
template<> inline schar saturate_cast<schar>(uchar v) { return (schar)std::min((int)v, SCHAR_MAX); }
|
||||
template<> inline schar saturate_cast<schar>(ushort v) { return (schar)std::min((unsigned)v, (unsigned)SCHAR_MAX); }
|
||||
@ -460,6 +466,8 @@ template<> inline schar saturate_cast<schar>(short v) { return saturate_c
|
||||
template<> inline schar saturate_cast<schar>(unsigned v) { return (schar)std::min(v, (unsigned)SCHAR_MAX); }
|
||||
template<> inline schar saturate_cast<schar>(float v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
|
||||
template<> inline schar saturate_cast<schar>(double v) { int iv = cvRound(v); return saturate_cast<schar>(iv); }
|
||||
template<> inline schar saturate_cast<schar>(int64 v) { return (schar)((uint64)((int64)v-SCHAR_MIN) <= (uint64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
|
||||
template<> inline schar saturate_cast<schar>(uint64 v) { return (schar)std::min(v, (uint64)SCHAR_MAX); }
|
||||
|
||||
template<> inline ushort saturate_cast<ushort>(schar v) { return (ushort)std::max((int)v, 0); }
|
||||
template<> inline ushort saturate_cast<ushort>(short v) { return (ushort)std::max((int)v, 0); }
|
||||
@ -467,12 +475,16 @@ template<> inline ushort saturate_cast<ushort>(int v) { return (ushort)((
|
||||
template<> inline ushort saturate_cast<ushort>(unsigned v) { return (ushort)std::min(v, (unsigned)USHRT_MAX); }
|
||||
template<> inline ushort saturate_cast<ushort>(float v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
|
||||
template<> inline ushort saturate_cast<ushort>(double v) { int iv = cvRound(v); return saturate_cast<ushort>(iv); }
|
||||
template<> inline ushort saturate_cast<ushort>(int64 v) { return (ushort)((uint64)v <= (uint64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
|
||||
template<> inline ushort saturate_cast<ushort>(uint64 v) { return (ushort)std::min(v, (uint64)USHRT_MAX); }
|
||||
|
||||
template<> inline short saturate_cast<short>(ushort v) { return (short)std::min((int)v, SHRT_MAX); }
|
||||
template<> inline short saturate_cast<short>(int v) { return (short)((unsigned)(v - SHRT_MIN) <= (unsigned)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
|
||||
template<> inline short saturate_cast<short>(unsigned v) { return (short)std::min(v, (unsigned)SHRT_MAX); }
|
||||
template<> inline short saturate_cast<short>(float v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
|
||||
template<> inline short saturate_cast<short>(double v) { int iv = cvRound(v); return saturate_cast<short>(iv); }
|
||||
template<> inline short saturate_cast<short>(int64 v) { return (short)((uint64)((int64)v - SHRT_MIN) <= (uint64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
|
||||
template<> inline short saturate_cast<short>(uint64 v) { return (short)std::min(v, (uint64)SHRT_MAX); }
|
||||
|
||||
template<> inline int saturate_cast<int>(float v) { return cvRound(v); }
|
||||
template<> inline int saturate_cast<int>(double v) { return cvRound(v); }
|
||||
|
@ -119,7 +119,7 @@ CV_EXPORTS_W void inpaint( InputArray src, InputArray inpaintMask,
|
||||
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational
|
||||
optimizations. Noise expected to be a gaussian white noise
|
||||
|
||||
@param src Input 8-bit 1-channel, 2-channel or 3-channel image.
|
||||
@param src Input 8-bit 1-channel, 2-channel, 3-channel or 4-channel image.
|
||||
@param dst Output image with the same size and type as src .
|
||||
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
|
||||
Should be odd. Recommended value 7 pixels
|
||||
@ -138,6 +138,35 @@ parameter.
|
||||
CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst, float h = 3,
|
||||
int templateWindowSize = 7, int searchWindowSize = 21);
|
||||
|
||||
/** @brief Perform image denoising using Non-local Means Denoising algorithm
|
||||
<http://www.ipol.im/pub/algo/bcm_non_local_means_denoising/> with several computational
|
||||
optimizations. Noise expected to be a gaussian white noise
|
||||
|
||||
@param src Input 8-bit or 16-bit (only with NORM_L1) 1-channel,
|
||||
2-channel, 3-channel or 4-channel image.
|
||||
@param dst Output image with the same size and type as src .
|
||||
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
|
||||
Should be odd. Recommended value 7 pixels
|
||||
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
|
||||
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
|
||||
denoising time. Recommended value 21 pixels
|
||||
@param h Array of parameters regulating filter strength, either one
|
||||
parameter applied to all channels or one per channel in dst. Big h value
|
||||
perfectly removes noise but also removes image details, smaller h
|
||||
value preserves details but also preserves some noise
|
||||
@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1
|
||||
|
||||
This function expected to be applied to grayscale images. For colored images look at
|
||||
fastNlMeansDenoisingColored. Advanced usage of this functions can be manual denoising of colored
|
||||
image in different colorspaces. Such approach is used in fastNlMeansDenoisingColored by converting
|
||||
image to CIELAB colorspace and then separately denoise L and AB components with different h
|
||||
parameter.
|
||||
*/
|
||||
CV_EXPORTS_W void fastNlMeansDenoising( InputArray src, OutputArray dst,
|
||||
const std::vector<float>& h,
|
||||
int templateWindowSize = 7, int searchWindowSize = 21,
|
||||
int normType = NORM_L2);
|
||||
|
||||
/** @brief Modification of fastNlMeansDenoising function for colored images
|
||||
|
||||
@param src Input 8-bit 3-channel image.
|
||||
@ -165,7 +194,35 @@ captured in small period of time. For example video. This version of the functio
|
||||
images or for manual manipulation with colorspaces. For more details see
|
||||
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
|
||||
|
||||
@param srcImgs Input 8-bit 1-channel, 2-channel or 3-channel images sequence. All images should
|
||||
@param srcImgs Input 8-bit 1-channel, 2-channel, 3-channel or
|
||||
4-channel images sequence. All images should have the same type and
|
||||
size.
|
||||
@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
|
||||
@param temporalWindowSize Number of surrounding images to use for target image denoising. Should
|
||||
be odd. Images from imgToDenoiseIndex - temporalWindowSize / 2 to
|
||||
imgToDenoiseIndex - temporalWindowSize / 2 from srcImgs will be used to denoise
|
||||
srcImgs[imgToDenoiseIndex] image.
|
||||
@param dst Output image with the same size and type as srcImgs images.
|
||||
@param templateWindowSize Size in pixels of the template patch that is used to compute weights.
|
||||
Should be odd. Recommended value 7 pixels
|
||||
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
|
||||
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
|
||||
denoising time. Recommended value 21 pixels
|
||||
@param h Parameter regulating filter strength. Bigger h value
|
||||
perfectly removes noise but also removes image details, smaller h
|
||||
value preserves details but also preserves some noise
|
||||
*/
|
||||
CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst,
|
||||
int imgToDenoiseIndex, int temporalWindowSize,
|
||||
float h = 3, int templateWindowSize = 7, int searchWindowSize = 21);
|
||||
|
||||
/** @brief Modification of fastNlMeansDenoising function for images sequence where consequtive images have been
|
||||
captured in small period of time. For example video. This version of the function is for grayscale
|
||||
images or for manual manipulation with colorspaces. For more details see
|
||||
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.6394>
|
||||
|
||||
@param srcImgs Input 8-bit or 16-bit (only with NORM_L1) 1-channel,
|
||||
2-channel, 3-channel or 4-channel images sequence. All images should
|
||||
have the same type and size.
|
||||
@param imgToDenoiseIndex Target image to denoise index in srcImgs sequence
|
||||
@param temporalWindowSize Number of surrounding images to use for target image denoising. Should
|
||||
@ -178,13 +235,17 @@ Should be odd. Recommended value 7 pixels
|
||||
@param searchWindowSize Size in pixels of the window that is used to compute weighted average for
|
||||
given pixel. Should be odd. Affect performance linearly: greater searchWindowsSize - greater
|
||||
denoising time. Recommended value 21 pixels
|
||||
@param h Parameter regulating filter strength for luminance component. Bigger h value perfectly
|
||||
removes noise but also removes image details, smaller h value preserves details but also preserves
|
||||
some noise
|
||||
@param h Array of parameters regulating filter strength, either one
|
||||
parameter applied to all channels or one per channel in dst. Big h value
|
||||
perfectly removes noise but also removes image details, smaller h
|
||||
value preserves details but also preserves some noise
|
||||
@param normType Type of norm used for weight calculation. Can be either NORM_L2 or NORM_L1
|
||||
*/
|
||||
CV_EXPORTS_W void fastNlMeansDenoisingMulti( InputArrayOfArrays srcImgs, OutputArray dst,
|
||||
int imgToDenoiseIndex, int temporalWindowSize,
|
||||
float h = 3, int templateWindowSize = 7, int searchWindowSize = 21);
|
||||
const std::vector<float>& h,
|
||||
int templateWindowSize = 7, int searchWindowSize = 21,
|
||||
int normType = NORM_L2);
|
||||
|
||||
/** @brief Modification of fastNlMeansDenoisingMulti function for colored images sequences
|
||||
|
||||
|
@ -45,42 +45,115 @@
|
||||
#include "fast_nlmeans_multi_denoising_invoker.hpp"
|
||||
#include "fast_nlmeans_denoising_opencl.hpp"
|
||||
|
||||
template<typename ST, typename IT, typename UIT, typename D>
|
||||
static void fastNlMeansDenoising_( const Mat& src, Mat& dst, const std::vector<float>& h,
|
||||
int templateWindowSize, int searchWindowSize)
|
||||
{
|
||||
int hn = (int)h.size();
|
||||
|
||||
switch (CV_MAT_CN(src.type())) {
|
||||
case 1:
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<ST, IT, UIT, D, int>(
|
||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
break;
|
||||
case 2:
|
||||
if (hn == 1)
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
|
||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
else
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
|
||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
break;
|
||||
case 3:
|
||||
if (hn == 1)
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
|
||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
else
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
|
||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
break;
|
||||
case 4:
|
||||
if (hn == 1)
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
|
||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
else
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
|
||||
src, dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
break;
|
||||
default:
|
||||
CV_Error(Error::StsBadArg,
|
||||
"Unsupported number of channels! Only 1, 2, 3, and 4 are supported");
|
||||
}
|
||||
}
|
||||
|
||||
void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
|
||||
int templateWindowSize, int searchWindowSize)
|
||||
{
|
||||
fastNlMeansDenoising(_src, _dst, std::vector<float>(1, h),
|
||||
templateWindowSize, searchWindowSize);
|
||||
}
|
||||
|
||||
void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, const std::vector<float>& h,
|
||||
int templateWindowSize, int searchWindowSize, int normType)
|
||||
{
|
||||
int hn = (int)h.size(), type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
CV_Assert(hn == 1 || hn == cn);
|
||||
|
||||
Size src_size = _src.size();
|
||||
CV_OCL_RUN(_src.dims() <= 2 && (_src.isUMat() || _dst.isUMat()) &&
|
||||
src_size.width > 5 && src_size.height > 5, // low accuracy on small sizes
|
||||
ocl_fastNlMeansDenoising(_src, _dst, h, templateWindowSize, searchWindowSize))
|
||||
ocl_fastNlMeansDenoising(_src, _dst, &h[0], hn,
|
||||
templateWindowSize, searchWindowSize, normType))
|
||||
|
||||
Mat src = _src.getMat();
|
||||
_dst.create(src_size, src.type());
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
switch (normType) {
|
||||
case NORM_L2:
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
if(tegra::useTegra() && tegra::fastNlMeansDenoising(src, dst, h, templateWindowSize, searchWindowSize))
|
||||
if(hn == 1 && tegra::useTegra() &&
|
||||
tegra::fastNlMeansDenoising(src, dst, h[0], templateWindowSize, searchWindowSize))
|
||||
return;
|
||||
#endif
|
||||
|
||||
switch (src.type()) {
|
||||
switch (depth) {
|
||||
case CV_8U:
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<uchar>(
|
||||
src, dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_8UC2:
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<cv::Vec2b>(
|
||||
src, dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_8UC3:
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<cv::Vec3b>(
|
||||
src, dst, templateWindowSize, searchWindowSize, h));
|
||||
fastNlMeansDenoising_<uchar, int, unsigned, DistSquared>(src, dst, h,
|
||||
templateWindowSize,
|
||||
searchWindowSize);
|
||||
break;
|
||||
default:
|
||||
CV_Error(Error::StsBadArg,
|
||||
"Unsupported image format! Only CV_8UC1, CV_8UC2 and CV_8UC3 are supported");
|
||||
"Unsupported depth! Only CV_8U is supported for NORM_L2");
|
||||
}
|
||||
break;
|
||||
case NORM_L1:
|
||||
switch (depth) {
|
||||
case CV_8U:
|
||||
fastNlMeansDenoising_<uchar, int, unsigned, DistAbs>(src, dst, h,
|
||||
templateWindowSize,
|
||||
searchWindowSize);
|
||||
break;
|
||||
case CV_16U:
|
||||
fastNlMeansDenoising_<ushort, int64, uint64, DistAbs>(src, dst, h,
|
||||
templateWindowSize,
|
||||
searchWindowSize);
|
||||
break;
|
||||
default:
|
||||
CV_Error(Error::StsBadArg,
|
||||
"Unsupported depth! Only CV_8U and CV_16U are supported for NORM_L1");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
CV_Error(Error::StsBadArg,
|
||||
"Unsupported norm type! Only NORM_L2 and NORM_L1 are supported");
|
||||
}
|
||||
}
|
||||
|
||||
@ -92,7 +165,7 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
|
||||
Size src_size = _src.size();
|
||||
if (type != CV_8UC3 && type != CV_8UC4)
|
||||
{
|
||||
CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3!");
|
||||
CV_Error(Error::StsBadArg, "Type of input image should be CV_8UC3 or CV_8UC4!");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -108,8 +181,8 @@ void cv::fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
|
||||
Mat src_lab;
|
||||
cvtColor(src, src_lab, COLOR_LBGR2Lab);
|
||||
|
||||
Mat l(src_size, CV_8U);
|
||||
Mat ab(src_size, CV_8UC2);
|
||||
Mat l(src_size, CV_MAKE_TYPE(depth, 1));
|
||||
Mat ab(src_size, CV_MAKE_TYPE(depth, 2));
|
||||
Mat l_ab[] = { l, ab };
|
||||
int from_to[] = { 0,0, 1,1, 2,2 };
|
||||
mixChannels(&src_lab, 1, l_ab, 2, from_to, 3);
|
||||
@ -157,9 +230,76 @@ static void fastNlMeansDenoisingMultiCheckPreconditions(
|
||||
}
|
||||
}
|
||||
|
||||
template<typename ST, typename IT, typename UIT, typename D>
|
||||
static void fastNlMeansDenoisingMulti_( const std::vector<Mat>& srcImgs, Mat& dst,
|
||||
int imgToDenoiseIndex, int temporalWindowSize,
|
||||
const std::vector<float>& h,
|
||||
int templateWindowSize, int searchWindowSize)
|
||||
{
|
||||
int hn = (int)h.size();
|
||||
|
||||
switch (srcImgs[0].type())
|
||||
{
|
||||
case CV_8U:
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<uchar, IT, UIT, D, int>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
break;
|
||||
case CV_8UC2:
|
||||
if (hn == 1)
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
else
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
break;
|
||||
case CV_8UC3:
|
||||
if (hn == 1)
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
else
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
break;
|
||||
case CV_8UC4:
|
||||
if (hn == 1)
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
else
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, &h[0]));
|
||||
break;
|
||||
default:
|
||||
CV_Error(Error::StsBadArg,
|
||||
"Unsupported image format! Only CV_8U, CV_8UC2, CV_8UC3 and CV_8UC4 are supported");
|
||||
}
|
||||
}
|
||||
|
||||
void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
|
||||
int imgToDenoiseIndex, int temporalWindowSize,
|
||||
float h, int templateWindowSize, int searchWindowSize)
|
||||
{
|
||||
fastNlMeansDenoisingMulti(_srcImgs, _dst, imgToDenoiseIndex, temporalWindowSize,
|
||||
std::vector<float>(1, h), templateWindowSize, searchWindowSize);
|
||||
}
|
||||
|
||||
void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
|
||||
int imgToDenoiseIndex, int temporalWindowSize,
|
||||
const std::vector<float>& h,
|
||||
int templateWindowSize, int searchWindowSize, int normType)
|
||||
{
|
||||
std::vector<Mat> srcImgs;
|
||||
_srcImgs.getMatVector(srcImgs);
|
||||
@ -168,32 +308,52 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
|
||||
srcImgs, imgToDenoiseIndex,
|
||||
temporalWindowSize, templateWindowSize, searchWindowSize);
|
||||
|
||||
int hn = (int)h.size();
|
||||
int type = srcImgs[0].type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
CV_Assert(hn == 1 || hn == cn);
|
||||
|
||||
_dst.create(srcImgs[0].size(), srcImgs[0].type());
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
switch (srcImgs[0].type())
|
||||
{
|
||||
switch (normType) {
|
||||
case NORM_L2:
|
||||
switch (depth) {
|
||||
case CV_8U:
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<uchar>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_8UC2:
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<cv::Vec2b>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_8UC3:
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<cv::Vec3b>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, h));
|
||||
fastNlMeansDenoisingMulti_<uchar, int, unsigned,
|
||||
DistSquared>(srcImgs, dst,
|
||||
imgToDenoiseIndex, temporalWindowSize,
|
||||
h,
|
||||
templateWindowSize, searchWindowSize);
|
||||
break;
|
||||
default:
|
||||
CV_Error(Error::StsBadArg,
|
||||
"Unsupported matrix format! Only uchar, Vec2b, Vec3b are supported");
|
||||
"Unsupported depth! Only CV_8U is supported for NORM_L2");
|
||||
}
|
||||
break;
|
||||
case NORM_L1:
|
||||
switch (depth) {
|
||||
case CV_8U:
|
||||
fastNlMeansDenoisingMulti_<uchar, int, unsigned,
|
||||
DistAbs>(srcImgs, dst,
|
||||
imgToDenoiseIndex, temporalWindowSize,
|
||||
h,
|
||||
templateWindowSize, searchWindowSize);
|
||||
break;
|
||||
case CV_16U:
|
||||
fastNlMeansDenoisingMulti_<ushort, int64, uint64,
|
||||
DistAbs>(srcImgs, dst,
|
||||
imgToDenoiseIndex, temporalWindowSize,
|
||||
h,
|
||||
templateWindowSize, searchWindowSize);
|
||||
break;
|
||||
default:
|
||||
CV_Error(Error::StsBadArg,
|
||||
"Unsupported depth! Only CV_8U and CV_16U are supported for NORM_L1");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
CV_Error(Error::StsBadArg,
|
||||
"Unsupported norm type! Only NORM_L2 and NORM_L1 are supported");
|
||||
}
|
||||
}
|
||||
|
||||
@ -212,9 +372,10 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
|
||||
_dst.create(srcImgs[0].size(), srcImgs[0].type());
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
int type = srcImgs[0].type(), depth = CV_MAT_DEPTH(type);
|
||||
int src_imgs_size = static_cast<int>(srcImgs.size());
|
||||
|
||||
if (srcImgs[0].type() != CV_8UC3)
|
||||
if (type != CV_8UC3)
|
||||
{
|
||||
CV_Error(Error::StsBadArg, "Type of input images should be CV_8UC3!");
|
||||
return;
|
||||
@ -228,9 +389,9 @@ void cv::fastNlMeansDenoisingColoredMulti( InputArrayOfArrays _srcImgs, OutputAr
|
||||
std::vector<Mat> ab(src_imgs_size);
|
||||
for (int i = 0; i < src_imgs_size; i++)
|
||||
{
|
||||
src_lab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC3);
|
||||
l[i] = Mat::zeros(srcImgs[0].size(), CV_8UC1);
|
||||
ab[i] = Mat::zeros(srcImgs[0].size(), CV_8UC2);
|
||||
src_lab[i] = Mat::zeros(srcImgs[0].size(), type);
|
||||
l[i] = Mat::zeros(srcImgs[0].size(), CV_MAKE_TYPE(depth, 1));
|
||||
ab[i] = Mat::zeros(srcImgs[0].size(), CV_MAKE_TYPE(depth, 2));
|
||||
cvtColor(srcImgs[i], src_lab[i], COLOR_LBGR2Lab);
|
||||
|
||||
Mat l_ab[] = { l[i], ab[i] };
|
||||
|
@ -50,13 +50,13 @@
|
||||
|
||||
using namespace cv;
|
||||
|
||||
template <typename T>
|
||||
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||
struct FastNlMeansDenoisingInvoker :
|
||||
public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
|
||||
int template_window_size, int search_window_size, const float h);
|
||||
int template_window_size, int search_window_size, const float *h);
|
||||
|
||||
void operator() (const Range& range) const;
|
||||
|
||||
@ -75,9 +75,9 @@ private:
|
||||
int template_window_half_size_;
|
||||
int search_window_half_size_;
|
||||
|
||||
int fixed_point_mult_;
|
||||
typename pixelInfo<WT>::sampleType fixed_point_mult_;
|
||||
int almost_template_window_size_sq_bin_shift_;
|
||||
std::vector<int> almost_dist2weight_;
|
||||
std::vector<WT> almost_dist2weight_;
|
||||
|
||||
void calcDistSumsForFirstElementInRow(
|
||||
int i, Array2d<int>& dist_sums,
|
||||
@ -99,15 +99,15 @@ inline int getNearestPowerOf2(int value)
|
||||
return p;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
|
||||
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||
FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansDenoisingInvoker(
|
||||
const Mat& src, Mat& dst,
|
||||
int template_window_size,
|
||||
int search_window_size,
|
||||
const float h) :
|
||||
const float *h) :
|
||||
src_(src), dst_(dst)
|
||||
{
|
||||
CV_Assert(src.channels() == sizeof(T)); //T is Vec1b or Vec2b or Vec3b
|
||||
CV_Assert(src.channels() == pixelInfo<T>::channels);
|
||||
|
||||
template_window_half_size_ = template_window_size / 2;
|
||||
search_window_half_size_ = search_window_size / 2;
|
||||
@ -117,8 +117,10 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
|
||||
border_size_ = search_window_half_size_ + template_window_half_size_;
|
||||
copyMakeBorder(src_, extended_src_, border_size_, border_size_, border_size_, border_size_, BORDER_DEFAULT);
|
||||
|
||||
const int max_estimate_sum_value = search_window_size_ * search_window_size_ * 255;
|
||||
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value;
|
||||
const IT max_estimate_sum_value =
|
||||
(IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
|
||||
fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
|
||||
pixelInfo<WT>::sampleMax());
|
||||
|
||||
// precalc weight for every possible l2 dist between blocks
|
||||
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
||||
@ -127,30 +129,24 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
|
||||
almost_template_window_size_sq_bin_shift_ = getNearestPowerOf2(template_window_size_sq);
|
||||
double almost_dist2actual_dist_multiplier = ((double)(1 << almost_template_window_size_sq_bin_shift_)) / template_window_size_sq;
|
||||
|
||||
int max_dist = 255 * 255 * sizeof(T);
|
||||
int max_dist = D::template maxDist<T>();
|
||||
int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
|
||||
almost_dist2weight_.resize(almost_max_dist);
|
||||
|
||||
const double WEIGHT_THRESHOLD = 0.001;
|
||||
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
||||
{
|
||||
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
||||
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
|
||||
|
||||
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
|
||||
weight = 0;
|
||||
|
||||
almost_dist2weight_[almost_dist] = weight;
|
||||
almost_dist2weight_[almost_dist] =
|
||||
D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
|
||||
}
|
||||
CV_Assert(almost_dist2weight_[0] == fixed_point_mult_);
|
||||
|
||||
// additional optimization init end
|
||||
if (dst_.empty())
|
||||
dst_ = Mat::zeros(src_.size(), src_.type());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
|
||||
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||
void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
|
||||
{
|
||||
int row_from = range.start;
|
||||
int row_to = range.end - 1;
|
||||
@ -215,7 +211,7 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
|
||||
dist_sums_row[x] -= col_dist_sums_row[x];
|
||||
|
||||
int bx = start_bx + x;
|
||||
col_dist_sums_row[x] = up_col_dist_sums_row[x] + calcUpDownDist(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
|
||||
col_dist_sums_row[x] = up_col_dist_sums_row[x] + D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[bx], b_down_ptr[bx]);
|
||||
|
||||
dist_sums_row[x] += col_dist_sums_row[x];
|
||||
up_col_dist_sums_row[x] = col_dist_sums_row[x];
|
||||
@ -227,9 +223,11 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
|
||||
}
|
||||
|
||||
// calc weights
|
||||
int estimation[3], weights_sum = 0;
|
||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
||||
IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
|
||||
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
|
||||
estimation[channel_num] = 0;
|
||||
for (size_t channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
|
||||
weights_sum[channel_num] = 0;
|
||||
|
||||
for (int y = 0; y < search_window_size_; y++)
|
||||
{
|
||||
@ -238,24 +236,21 @@ void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const
|
||||
for (int x = 0; x < search_window_size_; x++)
|
||||
{
|
||||
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift_;
|
||||
int weight = almost_dist2weight_[almostAvgDist];
|
||||
weights_sum += weight;
|
||||
|
||||
WT weight = almost_dist2weight_[almostAvgDist];
|
||||
T p = cur_row_ptr[border_size_ + search_window_x + x];
|
||||
incWithWeight(estimation, weight, p);
|
||||
incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
||||
estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum/2) / weights_sum;
|
||||
|
||||
dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation);
|
||||
divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
|
||||
weights_sum);
|
||||
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
|
||||
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||
inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
|
||||
int i,
|
||||
Array2d<int>& dist_sums,
|
||||
Array3d<int>& col_dist_sums,
|
||||
@ -276,7 +271,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
|
||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
||||
for (int tx = -template_window_half_size_; tx <= template_window_half_size_; tx++)
|
||||
{
|
||||
int dist = calcDist<T>(extended_src_,
|
||||
int dist = D::template calcDist<T>(extended_src_,
|
||||
border_size_ + i + ty, border_size_ + j + tx,
|
||||
border_size_ + start_y + ty, border_size_ + start_x + tx);
|
||||
|
||||
@ -288,8 +283,8 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
|
||||
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||
inline void FastNlMeansDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
|
||||
int i, int j, int first_col_num,
|
||||
Array2d<int>& dist_sums,
|
||||
Array3d<int>& col_dist_sums,
|
||||
@ -312,7 +307,7 @@ inline void FastNlMeansDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
|
||||
int by = start_by + y;
|
||||
int bx = start_bx + x;
|
||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
||||
col_dist_sums[new_last_col_num][y][x] += calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
|
||||
col_dist_sums[new_last_col_num][y][x] += D::template calcDist<T>(extended_src_, ay + ty, ax, by + ty, bx);
|
||||
|
||||
dist_sums[y][x] += col_dist_sums[new_last_col_num][y][x];
|
||||
up_col_dist_sums[j][y][x] = col_dist_sums[new_last_col_num][y][x];
|
||||
|
@ -44,118 +44,438 @@
|
||||
|
||||
using namespace cv;
|
||||
|
||||
template <typename T> static inline int calcDist(const T a, const T b);
|
||||
|
||||
template <> inline int calcDist(const uchar a, const uchar b)
|
||||
template <typename T> struct pixelInfo_
|
||||
{
|
||||
return (a-b) * (a-b);
|
||||
static const int channels = 1;
|
||||
typedef T sampleType;
|
||||
};
|
||||
|
||||
template <typename ET, int n> struct pixelInfo_<Vec<ET, n> >
|
||||
{
|
||||
static const int channels = n;
|
||||
typedef ET sampleType;
|
||||
};
|
||||
|
||||
template <typename T> struct pixelInfo: public pixelInfo_<T>
|
||||
{
|
||||
using typename pixelInfo_<T>::sampleType;
|
||||
|
||||
static inline sampleType sampleMax()
|
||||
{
|
||||
return std::numeric_limits<sampleType>::max();
|
||||
}
|
||||
|
||||
template <> inline int calcDist(const Vec2b a, const Vec2b b)
|
||||
static inline sampleType sampleMin()
|
||||
{
|
||||
return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]);
|
||||
return std::numeric_limits<sampleType>::min();
|
||||
}
|
||||
|
||||
template <> inline int calcDist(const Vec3b a, const Vec3b b)
|
||||
static inline size_t sampleBytes()
|
||||
{
|
||||
return (a[0]-b[0])*(a[0]-b[0]) + (a[1]-b[1])*(a[1]-b[1]) + (a[2]-b[2])*(a[2]-b[2]);
|
||||
return sizeof(sampleType);
|
||||
}
|
||||
|
||||
template <typename T> static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
|
||||
static inline size_t sampleBits()
|
||||
{
|
||||
return 8*sampleBytes();
|
||||
}
|
||||
};
|
||||
|
||||
class DistAbs
|
||||
{
|
||||
template <typename T> struct calcDist_
|
||||
{
|
||||
static inline int f(const T a, const T b)
|
||||
{
|
||||
return std::abs((int)(a-b));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ET> struct calcDist_<Vec<ET, 2> >
|
||||
{
|
||||
static inline int f(const Vec<ET, 2> a, const Vec<ET, 2> b)
|
||||
{
|
||||
return std::abs((int)(a[0]-b[0])) + std::abs((int)(a[1]-b[1]));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ET> struct calcDist_<Vec<ET, 3> >
|
||||
{
|
||||
static inline int f(const Vec<ET, 3> a, const Vec<ET, 3> b)
|
||||
{
|
||||
return
|
||||
std::abs((int)(a[0]-b[0])) +
|
||||
std::abs((int)(a[1]-b[1])) +
|
||||
std::abs((int)(a[2]-b[2]));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ET> struct calcDist_<Vec<ET, 4> >
|
||||
{
|
||||
static inline int f(const Vec<ET, 4> a, const Vec<ET, 4> b)
|
||||
{
|
||||
return
|
||||
std::abs((int)(a[0]-b[0])) +
|
||||
std::abs((int)(a[1]-b[1])) +
|
||||
std::abs((int)(a[2]-b[2])) +
|
||||
std::abs((int)(a[3]-b[3]));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename WT> struct calcWeight_
|
||||
{
|
||||
static inline WT f(double dist, const float *h, WT fixed_point_mult)
|
||||
{
|
||||
double w = std::exp(-dist*dist / (h[0]*h[0] * pixelInfo<T>::channels));
|
||||
if (std::isnan(w)) w = 1.0; // Handle h = 0.0
|
||||
|
||||
static const double WEIGHT_THRESHOLD = 0.001;
|
||||
WT weight = (WT)round(fixed_point_mult * w);
|
||||
if (weight < WEIGHT_THRESHOLD * fixed_point_mult) weight = 0;
|
||||
|
||||
return weight;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename ET, int n> struct calcWeight_<T, Vec<ET, n> >
|
||||
{
|
||||
static inline Vec<ET, n> f(double dist, const float *h, ET fixed_point_mult)
|
||||
{
|
||||
Vec<ET, n> res;
|
||||
for (int i=0; i<n; i++)
|
||||
res[i] = calcWeight<T, ET>(dist, &h[i], fixed_point_mult);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
template <typename T> static inline int calcDist(const T a, const T b)
|
||||
{
|
||||
return calcDist_<T>::f(a, b);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
|
||||
{
|
||||
const T a = m.at<T>(i1, j1);
|
||||
const T b = m.at<T>(i2, j2);
|
||||
return calcDist<T>(a,b);
|
||||
}
|
||||
|
||||
template <typename T> static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
|
||||
template <typename T>
|
||||
static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
|
||||
{
|
||||
return calcDist(a_down, b_down) - calcDist(a_up, b_up);
|
||||
return calcDist<T>(a_down, b_down) - calcDist<T>(a_up, b_up);
|
||||
};
|
||||
|
||||
template <typename T, typename WT>
|
||||
static inline WT calcWeight(double dist, const float *h,
|
||||
typename pixelInfo<WT>::sampleType fixed_point_mult)
|
||||
{
|
||||
return calcWeight_<T, WT>::f(dist, h, fixed_point_mult);
|
||||
}
|
||||
|
||||
template <> inline int calcUpDownDist(uchar a_up, uchar a_down, uchar b_up, uchar b_down)
|
||||
template <typename T>
|
||||
static inline int maxDist()
|
||||
{
|
||||
return (int)pixelInfo<T>::sampleMax() * pixelInfo<T>::channels;
|
||||
}
|
||||
};
|
||||
|
||||
class DistSquared
|
||||
{
|
||||
template <typename T> struct calcDist_
|
||||
{
|
||||
static inline int f(const T a, const T b)
|
||||
{
|
||||
return (int)(a-b) * (int)(a-b);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ET> struct calcDist_<Vec<ET, 2> >
|
||||
{
|
||||
static inline int f(const Vec<ET, 2> a, const Vec<ET, 2> b)
|
||||
{
|
||||
return (int)(a[0]-b[0])*(int)(a[0]-b[0]) + (int)(a[1]-b[1])*(int)(a[1]-b[1]);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ET> struct calcDist_<Vec<ET, 3> >
|
||||
{
|
||||
static inline int f(const Vec<ET, 3> a, const Vec<ET, 3> b)
|
||||
{
|
||||
return
|
||||
(int)(a[0]-b[0])*(int)(a[0]-b[0]) +
|
||||
(int)(a[1]-b[1])*(int)(a[1]-b[1]) +
|
||||
(int)(a[2]-b[2])*(int)(a[2]-b[2]);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ET> struct calcDist_<Vec<ET, 4> >
|
||||
{
|
||||
static inline int f(const Vec<ET, 4> a, const Vec<ET, 4> b)
|
||||
{
|
||||
return
|
||||
(int)(a[0]-b[0])*(int)(a[0]-b[0]) +
|
||||
(int)(a[1]-b[1])*(int)(a[1]-b[1]) +
|
||||
(int)(a[2]-b[2])*(int)(a[2]-b[2]) +
|
||||
(int)(a[3]-b[3])*(int)(a[3]-b[3]);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct calcUpDownDist_
|
||||
{
|
||||
static inline int f(T a_up, T a_down, T b_up, T b_down)
|
||||
{
|
||||
int A = a_down - b_down;
|
||||
int B = a_up - b_up;
|
||||
return (A-B)*(A+B);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> static inline void incWithWeight(int* estimation, int weight, T p);
|
||||
|
||||
template <> inline void incWithWeight(int* estimation, int weight, uchar p)
|
||||
template <typename ET, int n> struct calcUpDownDist_<Vec<ET, n> >
|
||||
{
|
||||
estimation[0] += weight * p;
|
||||
private:
|
||||
typedef Vec<ET, n> T;
|
||||
public:
|
||||
static inline int f(T a_up, T a_down, T b_up, T b_down)
|
||||
{
|
||||
return calcDist<T>(a_down, b_down) - calcDist<T>(a_up, b_up);
|
||||
}
|
||||
};
|
||||
|
||||
template <> inline void incWithWeight(int* estimation, int weight, Vec2b p)
|
||||
template <typename T, typename WT> struct calcWeight_
|
||||
{
|
||||
estimation[0] += weight * p[0];
|
||||
estimation[1] += weight * p[1];
|
||||
static inline WT f(double dist, const float *h, WT fixed_point_mult)
|
||||
{
|
||||
double w = std::exp(-dist / (h[0]*h[0] * pixelInfo<T>::channels));
|
||||
if (std::isnan(w)) w = 1.0; // Handle h = 0.0
|
||||
|
||||
static const double WEIGHT_THRESHOLD = 0.001;
|
||||
WT weight = (WT)round(fixed_point_mult * w);
|
||||
if (weight < WEIGHT_THRESHOLD * fixed_point_mult) weight = 0;
|
||||
|
||||
return weight;
|
||||
}
|
||||
};
|
||||
|
||||
template <> inline void incWithWeight(int* estimation, int weight, Vec3b p)
|
||||
template <typename T, typename ET, int n> struct calcWeight_<T, Vec<ET, n> >
|
||||
{
|
||||
estimation[0] += weight * p[0];
|
||||
estimation[1] += weight * p[1];
|
||||
estimation[2] += weight * p[2];
|
||||
}
|
||||
|
||||
template <> inline void incWithWeight(int* estimation, int weight, int p)
|
||||
static inline Vec<ET, n> f(double dist, const float *h, ET fixed_point_mult)
|
||||
{
|
||||
estimation[0] += weight * p;
|
||||
}
|
||||
|
||||
template <> inline void incWithWeight(int* estimation, int weight, Vec2i p)
|
||||
{
|
||||
estimation[0] += weight * p[0];
|
||||
estimation[1] += weight * p[1];
|
||||
}
|
||||
|
||||
template <> inline void incWithWeight(int* estimation, int weight, Vec3i p)
|
||||
{
|
||||
estimation[0] += weight * p[0];
|
||||
estimation[1] += weight * p[1];
|
||||
estimation[2] += weight * p[2];
|
||||
}
|
||||
|
||||
template <typename T> static inline T saturateCastFromArray(int* estimation);
|
||||
|
||||
template <> inline uchar saturateCastFromArray(int* estimation)
|
||||
{
|
||||
return saturate_cast<uchar>(estimation[0]);
|
||||
}
|
||||
|
||||
template <> inline Vec2b saturateCastFromArray(int* estimation)
|
||||
{
|
||||
Vec2b res;
|
||||
res[0] = saturate_cast<uchar>(estimation[0]);
|
||||
res[1] = saturate_cast<uchar>(estimation[1]);
|
||||
Vec<ET, n> res;
|
||||
for (int i=0; i<n; i++)
|
||||
res[i] = calcWeight<T, ET>(dist, &h[i], fixed_point_mult);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template <> inline Vec3b saturateCastFromArray(int* estimation)
|
||||
public:
|
||||
template <typename T> static inline int calcDist(const T a, const T b)
|
||||
{
|
||||
Vec3b res;
|
||||
res[0] = saturate_cast<uchar>(estimation[0]);
|
||||
res[1] = saturate_cast<uchar>(estimation[1]);
|
||||
res[2] = saturate_cast<uchar>(estimation[2]);
|
||||
return calcDist_<T>::f(a, b);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline int calcDist(const Mat& m, int i1, int j1, int i2, int j2)
|
||||
{
|
||||
const T a = m.at<T>(i1, j1);
|
||||
const T b = m.at<T>(i2, j2);
|
||||
return calcDist<T>(a,b);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline int calcUpDownDist(T a_up, T a_down, T b_up, T b_down)
|
||||
{
|
||||
return calcUpDownDist_<T>::f(a_up, a_down, b_up, b_down);
|
||||
};
|
||||
|
||||
template <typename T, typename WT>
|
||||
static inline WT calcWeight(double dist, const float *h,
|
||||
typename pixelInfo<WT>::sampleType fixed_point_mult)
|
||||
{
|
||||
return calcWeight_<T, WT>::f(dist, h, fixed_point_mult);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static inline int maxDist()
|
||||
{
|
||||
return (int)pixelInfo<T>::sampleMax() * (int)pixelInfo<T>::sampleMax() *
|
||||
pixelInfo<T>::channels;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename IT, typename WT> struct incWithWeight_
|
||||
{
|
||||
static inline void f(IT* estimation, IT* weights_sum, WT weight, T p)
|
||||
{
|
||||
estimation[0] += (IT)weight * p;
|
||||
weights_sum[0] += (IT)weight;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 2>, IT, WT>
|
||||
{
|
||||
static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 2> p)
|
||||
{
|
||||
estimation[0] += (IT)weight * p[0];
|
||||
estimation[1] += (IT)weight * p[1];
|
||||
weights_sum[0] += (IT)weight;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 3>, IT, WT>
|
||||
{
|
||||
static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 3> p)
|
||||
{
|
||||
estimation[0] += (IT)weight * p[0];
|
||||
estimation[1] += (IT)weight * p[1];
|
||||
estimation[2] += (IT)weight * p[2];
|
||||
weights_sum[0] += (IT)weight;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ET, typename IT, typename WT> struct incWithWeight_<Vec<ET, 4>, IT, WT>
|
||||
{
|
||||
static inline void f(IT* estimation, IT* weights_sum, WT weight, Vec<ET, 4> p)
|
||||
{
|
||||
estimation[0] += (IT)weight * p[0];
|
||||
estimation[1] += (IT)weight * p[1];
|
||||
estimation[2] += (IT)weight * p[2];
|
||||
estimation[3] += (IT)weight * p[3];
|
||||
weights_sum[0] += (IT)weight;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 2>, IT, Vec<EW, 2> >
|
||||
{
|
||||
static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 2> weight, Vec<ET, 2> p)
|
||||
{
|
||||
estimation[0] += (IT)weight[0] * p[0];
|
||||
estimation[1] += (IT)weight[1] * p[1];
|
||||
weights_sum[0] += (IT)weight[0];
|
||||
weights_sum[1] += (IT)weight[1];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 3>, IT, Vec<EW, 3> >
|
||||
{
|
||||
static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 3> weight, Vec<ET, 3> p)
|
||||
{
|
||||
estimation[0] += (IT)weight[0] * p[0];
|
||||
estimation[1] += (IT)weight[1] * p[1];
|
||||
estimation[2] += (IT)weight[2] * p[2];
|
||||
weights_sum[0] += (IT)weight[0];
|
||||
weights_sum[1] += (IT)weight[1];
|
||||
weights_sum[2] += (IT)weight[2];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ET, typename IT, typename EW> struct incWithWeight_<Vec<ET, 4>, IT, Vec<EW, 4> >
|
||||
{
|
||||
static inline void f(IT* estimation, IT* weights_sum, Vec<EW, 4> weight, Vec<ET, 4> p)
|
||||
{
|
||||
estimation[0] += (IT)weight[0] * p[0];
|
||||
estimation[1] += (IT)weight[1] * p[1];
|
||||
estimation[2] += (IT)weight[2] * p[2];
|
||||
estimation[3] += (IT)weight[3] * p[3];
|
||||
weights_sum[0] += (IT)weight[0];
|
||||
weights_sum[1] += (IT)weight[1];
|
||||
weights_sum[2] += (IT)weight[2];
|
||||
weights_sum[3] += (IT)weight[3];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename IT, typename WT>
|
||||
static inline void incWithWeight(IT* estimation, IT* weights_sum, WT weight, T p)
|
||||
{
|
||||
return incWithWeight_<T, IT, WT>::f(estimation, weights_sum, weight, p);
|
||||
}
|
||||
|
||||
template <typename IT, typename UIT, int nc, int nw> struct divByWeightsSum_
|
||||
{
|
||||
static inline void f(IT* estimation, IT* weights_sum);
|
||||
};
|
||||
|
||||
template <typename IT, typename UIT> struct divByWeightsSum_<IT, UIT, 1, 1>
|
||||
{
|
||||
static inline void f(IT* estimation, IT* weights_sum)
|
||||
{
|
||||
estimation[0] = (static_cast<UIT>(estimation[0]) + weights_sum[0]/2) / weights_sum[0];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename IT, typename UIT, int n> struct divByWeightsSum_<IT, UIT, n, 1>
|
||||
{
|
||||
static inline void f(IT* estimation, IT* weights_sum)
|
||||
{
|
||||
for (size_t i = 0; i < n; i++)
|
||||
estimation[i] = (static_cast<UIT>(estimation[i]) + weights_sum[0]/2) / weights_sum[0];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename IT, typename UIT, int n> struct divByWeightsSum_<IT, UIT, n, n>
|
||||
{
|
||||
static inline void f(IT* estimation, IT* weights_sum)
|
||||
{
|
||||
for (size_t i = 0; i < n; i++)
|
||||
estimation[i] = (static_cast<UIT>(estimation[i]) + weights_sum[i]/2) / weights_sum[i];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename IT, typename UIT, int nc, int nw>
|
||||
static inline void divByWeightsSum(IT* estimation, IT* weights_sum)
|
||||
{
|
||||
return divByWeightsSum_<IT, UIT, nc, nw>::f(estimation, weights_sum);
|
||||
}
|
||||
|
||||
template <typename T, typename IT> struct saturateCastFromArray_
|
||||
{
|
||||
static inline T f(IT* estimation)
|
||||
{
|
||||
return saturate_cast<T>(estimation[0]);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ET, typename IT> struct saturateCastFromArray_<Vec<ET, 2>, IT>
|
||||
{
|
||||
static inline Vec<ET, 2> f(IT* estimation)
|
||||
{
|
||||
Vec<ET, 2> res;
|
||||
res[0] = saturate_cast<ET>(estimation[0]);
|
||||
res[1] = saturate_cast<ET>(estimation[1]);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template <> inline int saturateCastFromArray(int* estimation)
|
||||
template <typename ET, typename IT> struct saturateCastFromArray_<Vec<ET, 3>, IT>
|
||||
{
|
||||
return estimation[0];
|
||||
static inline Vec<ET, 3> f(IT* estimation)
|
||||
{
|
||||
Vec<ET, 3> res;
|
||||
res[0] = saturate_cast<ET>(estimation[0]);
|
||||
res[1] = saturate_cast<ET>(estimation[1]);
|
||||
res[2] = saturate_cast<ET>(estimation[2]);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template <> inline Vec2i saturateCastFromArray(int* estimation)
|
||||
template <typename ET, typename IT> struct saturateCastFromArray_<Vec<ET, 4>, IT>
|
||||
{
|
||||
estimation[1] = 0;
|
||||
return Vec2i(estimation);
|
||||
static inline Vec<ET, 4> f(IT* estimation)
|
||||
{
|
||||
Vec<ET, 4> res;
|
||||
res[0] = saturate_cast<ET>(estimation[0]);
|
||||
res[1] = saturate_cast<ET>(estimation[1]);
|
||||
res[2] = saturate_cast<ET>(estimation[2]);
|
||||
res[3] = saturate_cast<ET>(estimation[3]);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template <> inline Vec3i saturateCastFromArray(int* estimation)
|
||||
template <typename T, typename IT> static inline T saturateCastFromArray(IT* estimation)
|
||||
{
|
||||
return Vec3i(estimation);
|
||||
return saturateCastFromArray_<T, IT>::f(estimation);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -28,12 +28,16 @@ static int divUp(int a, int b)
|
||||
return (a + b - 1) / b;
|
||||
}
|
||||
|
||||
template <typename FT>
|
||||
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindowSize, int templateWindowSize, FT h, int cn,
|
||||
template <typename FT, typename ST, typename WT>
|
||||
static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight,
|
||||
int searchWindowSize, int templateWindowSize,
|
||||
const FT *h, int hn, int cn, int normType,
|
||||
int & almostTemplateWindowSizeSqBinShift)
|
||||
{
|
||||
const int maxEstimateSumValue = searchWindowSize * searchWindowSize * 255;
|
||||
int fixedPointMult = std::numeric_limits<int>::max() / maxEstimateSumValue;
|
||||
const WT maxEstimateSumValue = searchWindowSize * searchWindowSize *
|
||||
std::numeric_limits<ST>::max();
|
||||
int fixedPointMult = (int)std::min<WT>(std::numeric_limits<WT>::max() / maxEstimateSumValue,
|
||||
std::numeric_limits<int>::max());
|
||||
int depth = DataType<FT>::depth;
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
|
||||
@ -48,33 +52,44 @@ static bool ocl_calcAlmostDist2Weight(UMat & almostDist2Weight, int searchWindow
|
||||
FT almostDist2ActualDistMultiplier = (FT)(1 << almostTemplateWindowSizeSqBinShift) / templateWindowSizeSq;
|
||||
|
||||
const FT WEIGHT_THRESHOLD = 1e-3f;
|
||||
int maxDist = 255 * 255 * cn;
|
||||
int maxDist = normType == NORM_L1 ? std::numeric_limits<ST>::max() * cn :
|
||||
std::numeric_limits<ST>::max() * std::numeric_limits<ST>::max() * cn;
|
||||
int almostMaxDist = (int)(maxDist / almostDist2ActualDistMultiplier + 1);
|
||||
FT den = 1.0f / (h * h * cn);
|
||||
FT den[4];
|
||||
CV_Assert(hn > 0 && hn <= 4);
|
||||
for (int i=0; i<hn; i++)
|
||||
den[i] = 1.0f / (h[i] * h[i] * cn);
|
||||
|
||||
almostDist2Weight.create(1, almostMaxDist, CV_32SC1);
|
||||
almostDist2Weight.create(1, almostMaxDist, CV_32SC(hn == 3 ? 4 : hn));
|
||||
|
||||
char buf[40];
|
||||
ocl::Kernel k("calcAlmostDist2Weight", ocl::photo::nlmeans_oclsrc,
|
||||
format("-D OP_CALC_WEIGHTS -D FT=%s%s", ocl::typeToStr(depth),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||
format("-D OP_CALC_WEIGHTS -D FT=%s -D w_t=%s"
|
||||
" -D wlut_t=%s -D convert_wlut_t=%s%s%s",
|
||||
ocl::typeToStr(depth), ocl::typeToStr(CV_MAKE_TYPE(depth, hn)),
|
||||
ocl::typeToStr(CV_32SC(hn)), ocl::convertTypeStr(depth, CV_32S, hn, buf),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
||||
normType == NORM_L1 ? " -D ABS" : ""));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
k.args(ocl::KernelArg::PtrWriteOnly(almostDist2Weight), almostMaxDist,
|
||||
almostDist2ActualDistMultiplier, fixedPointMult, den, WEIGHT_THRESHOLD);
|
||||
almostDist2ActualDistMultiplier, fixedPointMult,
|
||||
ocl::KernelArg::Constant(den, (hn == 3 ? 4 : hn)*sizeof(FT)), WEIGHT_THRESHOLD);
|
||||
|
||||
size_t globalsize[1] = { almostMaxDist };
|
||||
return k.run(1, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
|
||||
int templateWindowSize, int searchWindowSize)
|
||||
static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, const float *h, int hn,
|
||||
int templateWindowSize, int searchWindowSize, int normType)
|
||||
{
|
||||
int type = _src.type(), cn = CV_MAT_CN(type);
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
int ctaSize = ocl::Device::getDefault().isIntel() ? CTA_SIZE_INTEL : CTA_SIZE_DEFAULT;
|
||||
Size size = _src.size();
|
||||
|
||||
if ( type != CV_8UC1 && type != CV_8UC2 && type != CV_8UC4 )
|
||||
if (cn < 1 || cn > 4 || ((normType != NORM_L2 || depth != CV_8U) &&
|
||||
(normType != NORM_L1 || (depth != CV_8U && depth != CV_16U))))
|
||||
return false;
|
||||
|
||||
int templateWindowHalfWize = templateWindowSize / 2;
|
||||
@ -84,33 +99,68 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
|
||||
int nblocksx = divUp(size.width, BLOCK_COLS), nblocksy = divUp(size.height, BLOCK_ROWS);
|
||||
int almostTemplateWindowSizeSqBinShift = -1;
|
||||
|
||||
char cvt[2][40];
|
||||
char buf[4][40];
|
||||
String opts = format("-D OP_CALC_FASTNLMEANS -D TEMPLATE_SIZE=%d -D SEARCH_SIZE=%d"
|
||||
" -D uchar_t=%s -D int_t=%s -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
|
||||
" -D pixel_t=%s -D int_t=%s -D wlut_t=%s"
|
||||
" -D weight_t=%s -D convert_weight_t=%s -D sum_t=%s -D convert_sum_t=%s"
|
||||
" -D BLOCK_COLS=%d -D BLOCK_ROWS=%d"
|
||||
" -D CTA_SIZE=%d -D TEMPLATE_SIZE2=%d -D SEARCH_SIZE2=%d"
|
||||
" -D convert_int_t=%s -D cn=%d -D convert_uchar_t=%s",
|
||||
templateWindowSize, searchWindowSize, ocl::typeToStr(type),
|
||||
ocl::typeToStr(CV_32SC(cn)), BLOCK_COLS, BLOCK_ROWS, ctaSize,
|
||||
templateWindowHalfWize, searchWindowHalfSize,
|
||||
ocl::convertTypeStr(CV_8U, CV_32S, cn, cvt[0]), cn,
|
||||
ocl::convertTypeStr(CV_32S, CV_8U, cn, cvt[1]));
|
||||
" -D convert_int_t=%s -D cn=%d -D psz=%d -D convert_pixel_t=%s%s",
|
||||
templateWindowSize, searchWindowSize,
|
||||
ocl::typeToStr(type), ocl::typeToStr(CV_32SC(cn)),
|
||||
ocl::typeToStr(CV_32SC(hn)),
|
||||
depth == CV_8U ? ocl::typeToStr(CV_32SC(hn)) :
|
||||
format("long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
|
||||
depth == CV_8U ? ocl::convertTypeStr(CV_32S, CV_32S, hn, buf[0]) :
|
||||
format("convert_long%s", hn > 1 ? format("%d", hn).c_str() : "").c_str(),
|
||||
depth == CV_8U ? ocl::typeToStr(CV_32SC(cn)) :
|
||||
format("long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
|
||||
depth == CV_8U ? ocl::convertTypeStr(depth, CV_32S, cn, buf[1]) :
|
||||
format("convert_long%s", cn > 1 ? format("%d", cn).c_str() : "").c_str(),
|
||||
BLOCK_COLS, BLOCK_ROWS,
|
||||
ctaSize, templateWindowHalfWize, searchWindowHalfSize,
|
||||
ocl::convertTypeStr(depth, CV_32S, cn, buf[2]), cn,
|
||||
(depth == CV_8U ? sizeof(uchar) : sizeof(ushort)) * (cn == 3 ? 4 : cn),
|
||||
ocl::convertTypeStr(CV_32S, depth, cn, buf[3]),
|
||||
normType == NORM_L1 ? " -D ABS" : "");
|
||||
|
||||
ocl::Kernel k("fastNlMeansDenoising", ocl::photo::nlmeans_oclsrc, opts);
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
UMat almostDist2Weight;
|
||||
if (!ocl_calcAlmostDist2Weight<float>(almostDist2Weight, searchWindowSize, templateWindowSize, h, cn,
|
||||
almostTemplateWindowSizeSqBinShift))
|
||||
if ((depth == CV_8U &&
|
||||
!ocl_calcAlmostDist2Weight<float, uchar, int>(almostDist2Weight,
|
||||
searchWindowSize, templateWindowSize,
|
||||
h, hn, cn, normType,
|
||||
almostTemplateWindowSizeSqBinShift)) ||
|
||||
(depth == CV_16U &&
|
||||
!ocl_calcAlmostDist2Weight<float, ushort, int64>(almostDist2Weight,
|
||||
searchWindowSize, templateWindowSize,
|
||||
h, hn, cn, normType,
|
||||
almostTemplateWindowSizeSqBinShift)))
|
||||
return false;
|
||||
CV_Assert(almostTemplateWindowSizeSqBinShift >= 0);
|
||||
|
||||
UMat srcex;
|
||||
int borderSize = searchWindowHalfSize + templateWindowHalfWize;
|
||||
if (cn == 3) {
|
||||
srcex.create(size.height + 2*borderSize, size.width + 2*borderSize, CV_MAKE_TYPE(depth, 4));
|
||||
UMat src(srcex, Rect(borderSize, borderSize, size.width, size.height));
|
||||
int from_to[] = { 0,0, 1,1, 2,2 };
|
||||
mixChannels(std::vector<UMat>(1, _src.getUMat()), std::vector<UMat>(1, src), from_to, 3);
|
||||
copyMakeBorder(src, srcex, borderSize, borderSize, borderSize, borderSize,
|
||||
BORDER_DEFAULT|BORDER_ISOLATED); // create borders in place
|
||||
}
|
||||
else
|
||||
copyMakeBorder(_src, srcex, borderSize, borderSize, borderSize, borderSize, BORDER_DEFAULT);
|
||||
|
||||
_dst.create(size, type);
|
||||
UMat dst = _dst.getUMat();
|
||||
UMat dst;
|
||||
if (cn == 3)
|
||||
dst.create(size, CV_MAKE_TYPE(depth, 4));
|
||||
else
|
||||
dst = _dst.getUMat();
|
||||
|
||||
int searchWindowSizeSq = searchWindowSize * searchWindowSize;
|
||||
Size upColSumSize(size.width, searchWindowSizeSq * nblocksy);
|
||||
@ -123,7 +173,14 @@ static bool ocl_fastNlMeansDenoising(InputArray _src, OutputArray _dst, float h,
|
||||
ocl::KernelArg::PtrReadOnly(buffer), almostTemplateWindowSizeSqBinShift);
|
||||
|
||||
size_t globalsize[2] = { nblocksx * ctaSize, nblocksy }, localsize[2] = { ctaSize, 1 };
|
||||
return k.run(2, globalsize, localsize, false);
|
||||
if (!k.run(2, globalsize, localsize, false)) return false;
|
||||
|
||||
if (cn == 3) {
|
||||
int from_to[] = { 0,0, 1,1, 2,2 };
|
||||
mixChannels(std::vector<UMat>(1, dst), std::vector<UMat>(1, _dst.getUMat()), from_to, 3);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ocl_fastNlMeansDenoisingColored( InputArray _src, OutputArray _dst,
|
||||
|
@ -50,14 +50,14 @@
|
||||
|
||||
using namespace cv;
|
||||
|
||||
template <typename T>
|
||||
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||
struct FastNlMeansMultiDenoisingInvoker :
|
||||
ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
FastNlMeansMultiDenoisingInvoker(const std::vector<Mat>& srcImgs, int imgToDenoiseIndex,
|
||||
int temporalWindowSize, Mat& dst, int template_window_size,
|
||||
int search_window_size, const float h);
|
||||
int search_window_size, const float *h);
|
||||
|
||||
void operator() (const Range& range) const;
|
||||
|
||||
@ -81,9 +81,9 @@ private:
|
||||
int search_window_half_size_;
|
||||
int temporal_window_half_size_;
|
||||
|
||||
int fixed_point_mult_;
|
||||
typename pixelInfo<WT>::sampleType fixed_point_mult_;
|
||||
int almost_template_window_size_sq_bin_shift;
|
||||
std::vector<int> almost_dist2weight;
|
||||
std::vector<WT> almost_dist2weight;
|
||||
|
||||
void calcDistSumsForFirstElementInRow(int i, Array3d<int>& dist_sums,
|
||||
Array4d<int>& col_dist_sums,
|
||||
@ -94,19 +94,19 @@ private:
|
||||
Array4d<int>& up_col_dist_sums) const;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
|
||||
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||
FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::FastNlMeansMultiDenoisingInvoker(
|
||||
const std::vector<Mat>& srcImgs,
|
||||
int imgToDenoiseIndex,
|
||||
int temporalWindowSize,
|
||||
cv::Mat& dst,
|
||||
int template_window_size,
|
||||
int search_window_size,
|
||||
const float h) :
|
||||
const float *h) :
|
||||
dst_(dst), extended_srcs_(srcImgs.size())
|
||||
{
|
||||
CV_Assert(srcImgs.size() > 0);
|
||||
CV_Assert(srcImgs[0].channels() == sizeof(T));
|
||||
CV_Assert(srcImgs[0].channels() == pixelInfo<T>::channels);
|
||||
|
||||
rows_ = srcImgs[0].rows;
|
||||
cols_ = srcImgs[0].cols;
|
||||
@ -125,8 +125,10 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
|
||||
border_size_, border_size_, border_size_, border_size_, cv::BORDER_DEFAULT);
|
||||
|
||||
main_extended_src_ = extended_srcs_[temporal_window_half_size_];
|
||||
const int max_estimate_sum_value = temporal_window_size_ * search_window_size_ * search_window_size_ * 255;
|
||||
fixed_point_mult_ = std::numeric_limits<int>::max() / max_estimate_sum_value;
|
||||
const IT max_estimate_sum_value =
|
||||
(IT)temporal_window_size_ * (IT)search_window_size_ * (IT)search_window_size_ * (IT)pixelInfo<T>::sampleMax();
|
||||
fixed_point_mult_ = (int)std::min<IT>(std::numeric_limits<IT>::max() / max_estimate_sum_value,
|
||||
pixelInfo<WT>::sampleMax());
|
||||
|
||||
// precalc weight for every possible l2 dist between blocks
|
||||
// additional optimization of precalced weights to replace division(averaging) by binary shift
|
||||
@ -138,30 +140,24 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
|
||||
int almost_template_window_size_sq = 1 << almost_template_window_size_sq_bin_shift;
|
||||
double almost_dist2actual_dist_multiplier = (double) almost_template_window_size_sq / template_window_size_sq;
|
||||
|
||||
int max_dist = 255 * 255 * sizeof(T);
|
||||
int max_dist = D::template maxDist<T>();
|
||||
int almost_max_dist = (int)(max_dist / almost_dist2actual_dist_multiplier + 1);
|
||||
almost_dist2weight.resize(almost_max_dist);
|
||||
|
||||
const double WEIGHT_THRESHOLD = 0.001;
|
||||
for (int almost_dist = 0; almost_dist < almost_max_dist; almost_dist++)
|
||||
{
|
||||
double dist = almost_dist * almost_dist2actual_dist_multiplier;
|
||||
int weight = cvRound(fixed_point_mult_ * std::exp(-dist / (h * h * sizeof(T))));
|
||||
|
||||
if (weight < WEIGHT_THRESHOLD * fixed_point_mult_)
|
||||
weight = 0;
|
||||
|
||||
almost_dist2weight[almost_dist] = weight;
|
||||
almost_dist2weight[almost_dist] =
|
||||
D::template calcWeight<T, WT>(dist, h, fixed_point_mult_);
|
||||
}
|
||||
CV_Assert(almost_dist2weight[0] == fixed_point_mult_);
|
||||
|
||||
// additional optimization init end
|
||||
if (dst_.empty())
|
||||
dst_ = Mat::zeros(srcImgs[0].size(), srcImgs[0].type());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
|
||||
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||
void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::operator() (const Range& range) const
|
||||
{
|
||||
int row_from = range.start;
|
||||
int row_to = range.end - 1;
|
||||
@ -234,7 +230,7 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
|
||||
dist_sums_row[x] -= col_dist_sums_row[x];
|
||||
|
||||
col_dist_sums_row[x] = up_col_dist_sums_row[x] +
|
||||
calcUpDownDist(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]);
|
||||
D::template calcUpDownDist<T>(a_up, a_down, b_up_ptr[start_bx + x], b_down_ptr[start_bx + x]);
|
||||
|
||||
dist_sums_row[x] += col_dist_sums_row[x];
|
||||
up_col_dist_sums_row[x] = col_dist_sums_row[x];
|
||||
@ -247,11 +243,11 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
|
||||
}
|
||||
|
||||
// calc weights
|
||||
int weights_sum = 0;
|
||||
|
||||
int estimation[3];
|
||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
||||
IT estimation[pixelInfo<T>::channels], weights_sum[pixelInfo<WT>::channels];
|
||||
for (size_t channel_num = 0; channel_num < pixelInfo<T>::channels; channel_num++)
|
||||
estimation[channel_num] = 0;
|
||||
for (size_t channel_num = 0; channel_num < pixelInfo<WT>::channels; channel_num++)
|
||||
weights_sum[channel_num] = 0;
|
||||
|
||||
for (int d = 0; d < temporal_window_size_; d++)
|
||||
{
|
||||
@ -266,26 +262,22 @@ void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const
|
||||
{
|
||||
int almostAvgDist = dist_sums_row[x] >> almost_template_window_size_sq_bin_shift;
|
||||
|
||||
int weight = almost_dist2weight[almostAvgDist];
|
||||
weights_sum += weight;
|
||||
|
||||
WT weight = almost_dist2weight[almostAvgDist];
|
||||
T p = cur_row_ptr[border_size_ + search_window_x + x];
|
||||
incWithWeight(estimation, weight, p);
|
||||
incWithWeight<T, IT, WT>(estimation, weights_sum, weight, p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t channel_num = 0; channel_num < sizeof(T); channel_num++)
|
||||
estimation[channel_num] = ((unsigned)estimation[channel_num] + weights_sum / 2) / weights_sum;
|
||||
|
||||
dst_.at<T>(i,j) = saturateCastFromArray<T>(estimation);
|
||||
|
||||
divByWeightsSum<IT, UIT, pixelInfo<T>::channels, pixelInfo<WT>::channels>(estimation,
|
||||
weights_sum);
|
||||
dst_.at<T>(i,j) = saturateCastFromArray<T, IT>(estimation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRow(
|
||||
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||
inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForFirstElementInRow(
|
||||
int i, Array3d<int>& dist_sums, Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
|
||||
{
|
||||
int j = 0;
|
||||
@ -310,7 +302,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
|
||||
{
|
||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
||||
{
|
||||
int dist = calcDist<T>(
|
||||
int dist = D::template calcDist<T>(
|
||||
main_extended_src_.at<T>(border_size_ + i + ty, border_size_ + j + tx),
|
||||
cur_extended_src.at<T>(border_size_ + start_y + ty, border_size_ + start_x + tx));
|
||||
|
||||
@ -325,8 +317,8 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForFirstElementInRo
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRow(
|
||||
template <typename T, typename IT, typename UIT, typename D, typename WT>
|
||||
inline void FastNlMeansMultiDenoisingInvoker<T, IT, UIT, D, WT>::calcDistSumsForElementInFirstRow(
|
||||
int i, int j, int first_col_num, Array3d<int>& dist_sums,
|
||||
Array4d<int>& col_dist_sums, Array4d<int>& up_col_dist_sums) const
|
||||
{
|
||||
@ -353,7 +345,7 @@ inline void FastNlMeansMultiDenoisingInvoker<T>::calcDistSumsForElementInFirstRo
|
||||
int* col_dist_sums_ptr = &col_dist_sums[new_last_col_num][d][y][x];
|
||||
for (int ty = -template_window_half_size_; ty <= template_window_half_size_; ty++)
|
||||
{
|
||||
*col_dist_sums_ptr += calcDist<T>(
|
||||
*col_dist_sums_ptr += D::template calcDist<T>(
|
||||
main_extended_src_.at<T>(ay + ty, ax),
|
||||
cur_extended_src.at<T>(by + ty, bx));
|
||||
}
|
||||
|
@ -20,21 +20,23 @@
|
||||
|
||||
#ifdef OP_CALC_WEIGHTS
|
||||
|
||||
__kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almostMaxDist,
|
||||
__kernel void calcAlmostDist2Weight(__global wlut_t * almostDist2Weight, int almostMaxDist,
|
||||
FT almostDist2ActualDistMultiplier, int fixedPointMult,
|
||||
FT den, FT WEIGHT_THRESHOLD)
|
||||
w_t den, FT WEIGHT_THRESHOLD)
|
||||
{
|
||||
int almostDist = get_global_id(0);
|
||||
|
||||
if (almostDist < almostMaxDist)
|
||||
{
|
||||
FT dist = almostDist * almostDist2ActualDistMultiplier;
|
||||
int weight = convert_int_sat_rte(fixedPointMult * exp(-dist * den));
|
||||
|
||||
if (weight < WEIGHT_THRESHOLD * fixedPointMult)
|
||||
weight = 0;
|
||||
|
||||
almostDist2Weight[almostDist] = weight;
|
||||
#ifdef ABS
|
||||
w_t w = exp((w_t)(-dist*dist) * den);
|
||||
#else
|
||||
w_t w = exp((w_t)(-dist) * den);
|
||||
#endif
|
||||
wlut_t weight = convert_wlut_t(fixedPointMult * (isnan(w) ? (w_t)1.0 : w));
|
||||
almostDist2Weight[almostDist] =
|
||||
weight < (wlut_t)(WEIGHT_THRESHOLD * fixedPointMult) ? (wlut_t)0 : weight;
|
||||
}
|
||||
}
|
||||
|
||||
@ -44,21 +46,35 @@ __kernel void calcAlmostDist2Weight(__global int * almostDist2Weight, int almost
|
||||
|
||||
#define SEARCH_SIZE_SQ (SEARCH_SIZE * SEARCH_SIZE)
|
||||
|
||||
inline int calcDist(uchar_t a, uchar_t b)
|
||||
inline int calcDist(pixel_t a, pixel_t b)
|
||||
{
|
||||
#ifdef ABS
|
||||
int_t retval = convert_int_t(abs_diff(a, b));
|
||||
#else
|
||||
int_t diff = convert_int_t(a) - convert_int_t(b);
|
||||
int_t retval = diff * diff;
|
||||
#endif
|
||||
|
||||
#if cn == 1
|
||||
return retval;
|
||||
#elif cn == 2
|
||||
return retval.x + retval.y;
|
||||
#elif cn == 3
|
||||
return retval.x + retval.y + retval.z;
|
||||
#elif cn == 4
|
||||
return retval.x + retval.y + retval.z + retval.w;
|
||||
#else
|
||||
#error "cn should be either 1 or 2"
|
||||
#error "cn should be either 1, 2, 3 or 4"
|
||||
#endif
|
||||
}
|
||||
|
||||
inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_value, uchar_t up_value_t)
|
||||
#ifdef ABS
|
||||
inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t)
|
||||
{
|
||||
return calcDist(down_value, down_value_t) - calcDist(up_value, up_value_t);
|
||||
}
|
||||
#else
|
||||
inline int calcDistUpDown(pixel_t down_value, pixel_t down_value_t, pixel_t up_value, pixel_t up_value_t)
|
||||
{
|
||||
int_t A = convert_int_t(down_value) - convert_int_t(down_value_t);
|
||||
int_t B = convert_int_t(up_value) - convert_int_t(up_value_t);
|
||||
@ -68,10 +84,15 @@ inline int calcDistUpDown(uchar_t down_value, uchar_t down_value_t, uchar_t up_v
|
||||
return retval;
|
||||
#elif cn == 2
|
||||
return retval.x + retval.y;
|
||||
#elif cn == 3
|
||||
return retval.x + retval.y + retval.z;
|
||||
#elif cn == 4
|
||||
return retval.x + retval.y + retval.z + retval.w;
|
||||
#else
|
||||
#error "cn should be either 1 or 2"
|
||||
#error "cn should be either 1, 2, 3 or 4"
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#define COND if (x == 0 && y == 0)
|
||||
|
||||
@ -87,9 +108,9 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
|
||||
{
|
||||
int dist = 0, value;
|
||||
|
||||
__global const uchar_t * src_template = (__global const uchar_t *)(src +
|
||||
mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset)));
|
||||
__global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset)));
|
||||
__global const pixel_t * src_template = (__global const pixel_t *)(src +
|
||||
mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset)));
|
||||
__global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset)));
|
||||
__global int * col_dists_current = col_dists + i * TEMPLATE_SIZE;
|
||||
|
||||
#pragma unroll
|
||||
@ -107,8 +128,8 @@ inline void calcFirstElementInRow(__global const uchar * src, int src_step, int
|
||||
dist += value;
|
||||
}
|
||||
|
||||
src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step);
|
||||
src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step);
|
||||
src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);
|
||||
src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);
|
||||
}
|
||||
|
||||
#pragma unroll
|
||||
@ -130,9 +151,9 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int
|
||||
|
||||
for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
|
||||
{
|
||||
__global const uchar_t * src_current = (__global const uchar_t *)(src + mad24(y, src_step, mad24(cn, x, src_offset)));
|
||||
__global const uchar_t * src_template = (__global const uchar_t *)(src +
|
||||
mad24(sy + i / SEARCH_SIZE, src_step, mad24(cn, sx + i % SEARCH_SIZE, src_offset)));
|
||||
__global const pixel_t * src_current = (__global const pixel_t *)(src + mad24(y, src_step, mad24(psz, x, src_offset)));
|
||||
__global const pixel_t * src_template = (__global const pixel_t *)(src +
|
||||
mad24(sy + i / SEARCH_SIZE, src_step, mad24(psz, sx + i % SEARCH_SIZE, src_offset)));
|
||||
__global int * col_dists_current = col_dists + TEMPLATE_SIZE * i;
|
||||
|
||||
int col_dist = 0;
|
||||
@ -142,8 +163,8 @@ inline void calcElementInFirstRow(__global const uchar * src, int src_step, int
|
||||
{
|
||||
col_dist += calcDist(src_current[0], src_template[0]);
|
||||
|
||||
src_current = (__global const uchar_t *)((__global const uchar *)src_current + src_step);
|
||||
src_template = (__global const uchar_t *)((__global const uchar *)src_template + src_step);
|
||||
src_current = (__global const pixel_t *)((__global const uchar *)src_current + src_step);
|
||||
src_template = (__global const pixel_t *)((__global const uchar *)src_template + src_step);
|
||||
}
|
||||
|
||||
dists[i] += col_dist - col_dists_current[first];
|
||||
@ -160,8 +181,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
|
||||
int sy_up = y - TEMPLATE_SIZE2 - 1;
|
||||
int sy_down = y + TEMPLATE_SIZE2;
|
||||
|
||||
uchar_t up_value = *(__global const uchar_t *)(src + mad24(sy_up, src_step, mad24(cn, sx, src_offset)));
|
||||
uchar_t down_value = *(__global const uchar_t *)(src + mad24(sy_down, src_step, mad24(cn, sx, src_offset)));
|
||||
pixel_t up_value = *(__global const pixel_t *)(src + mad24(sy_up, src_step, mad24(psz, sx, src_offset)));
|
||||
pixel_t down_value = *(__global const pixel_t *)(src + mad24(sy_down, src_step, mad24(psz, sx, src_offset)));
|
||||
|
||||
sx -= SEARCH_SIZE2;
|
||||
sy_up -= SEARCH_SIZE2;
|
||||
@ -171,8 +192,8 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
|
||||
{
|
||||
int wx = i % SEARCH_SIZE, wy = i / SEARCH_SIZE;
|
||||
|
||||
uchar_t up_value_t = *(__global const uchar_t *)(src + mad24(sy_up + wy, src_step, mad24(cn, sx + wx, src_offset)));
|
||||
uchar_t down_value_t = *(__global const uchar_t *)(src + mad24(sy_down + wy, src_step, mad24(cn, sx + wx, src_offset)));
|
||||
pixel_t up_value_t = *(__global const pixel_t *)(src + mad24(sy_up + wy, src_step, mad24(psz, sx + wx, src_offset)));
|
||||
pixel_t down_value_t = *(__global const pixel_t *)(src + mad24(sy_down + wy, src_step, mad24(psz, sx + wx, src_offset)));
|
||||
|
||||
__global int * col_dists_current = col_dists + mad24(i, TEMPLATE_SIZE, first);
|
||||
__global int * up_col_dists_current = up_col_dists + mad24(x0, SEARCH_SIZE_SQ, i);
|
||||
@ -186,24 +207,25 @@ inline void calcElement(__global const uchar * src, int src_step, int src_offset
|
||||
}
|
||||
|
||||
inline void convolveWindow(__global const uchar * src, int src_step, int src_offset,
|
||||
__local int * dists, __global const int * almostDist2Weight,
|
||||
__local int * dists, __global const wlut_t * almostDist2Weight,
|
||||
__global uchar * dst, int dst_step, int dst_offset,
|
||||
int y, int x, int id, __local int * weights_local,
|
||||
__local int_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift)
|
||||
int y, int x, int id, __local weight_t * weights_local,
|
||||
__local sum_t * weighted_sum_local, int almostTemplateWindowSizeSqBinShift)
|
||||
{
|
||||
int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2, weights = 0;
|
||||
int_t weighted_sum = (int_t)(0);
|
||||
int sx = x - SEARCH_SIZE2, sy = y - SEARCH_SIZE2;
|
||||
weight_t weights = (weight_t)0;
|
||||
sum_t weighted_sum = (sum_t)0;
|
||||
|
||||
for (int i = id; i < SEARCH_SIZE_SQ; i += CTA_SIZE)
|
||||
{
|
||||
int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, cn, src_offset));
|
||||
int_t src_value = convert_int_t(*(__global const uchar_t *)(src + src_index));
|
||||
int src_index = mad24(sy + i / SEARCH_SIZE, src_step, mad24(i % SEARCH_SIZE + sx, psz, src_offset));
|
||||
sum_t src_value = convert_sum_t(*(__global const pixel_t *)(src + src_index));
|
||||
|
||||
int almostAvgDist = dists[i] >> almostTemplateWindowSizeSqBinShift;
|
||||
int weight = almostDist2Weight[almostAvgDist];
|
||||
weight_t weight = convert_weight_t(almostDist2Weight[almostAvgDist]);
|
||||
|
||||
weights += weight;
|
||||
weighted_sum += (int_t)(weight) * src_value;
|
||||
weighted_sum += (sum_t)weight * src_value;
|
||||
}
|
||||
|
||||
weights_local[id] = weights;
|
||||
@ -223,26 +245,27 @@ inline void convolveWindow(__global const uchar * src, int src_step, int src_off
|
||||
|
||||
if (id == 0)
|
||||
{
|
||||
int dst_index = mad24(y, dst_step, mad24(cn, x, dst_offset));
|
||||
int_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] +
|
||||
int dst_index = mad24(y, dst_step, mad24(psz, x, dst_offset));
|
||||
sum_t weighted_sum_local_0 = weighted_sum_local[0] + weighted_sum_local[1] +
|
||||
weighted_sum_local[2] + weighted_sum_local[3];
|
||||
int weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3];
|
||||
weight_t weights_local_0 = weights_local[0] + weights_local[1] + weights_local[2] + weights_local[3];
|
||||
|
||||
*(__global uchar_t *)(dst + dst_index) = convert_uchar_t(weighted_sum_local_0 / (int_t)(weights_local_0));
|
||||
*(__global pixel_t *)(dst + dst_index) = convert_pixel_t(weighted_sum_local_0 / (sum_t)weights_local_0);
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void fastNlMeansDenoising(__global const uchar * src, int src_step, int src_offset,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||
__global const int * almostDist2Weight, __global uchar * buffer,
|
||||
__global const wlut_t * almostDist2Weight, __global uchar * buffer,
|
||||
int almostTemplateWindowSizeSqBinShift)
|
||||
{
|
||||
int block_x = get_group_id(0), nblocks_x = get_num_groups(0);
|
||||
int block_y = get_group_id(1);
|
||||
int id = get_local_id(0), first;
|
||||
|
||||
__local int dists[SEARCH_SIZE_SQ], weights[CTA_SIZE];
|
||||
__local int_t weighted_sum[CTA_SIZE];
|
||||
__local int dists[SEARCH_SIZE_SQ];
|
||||
__local weight_t weights[CTA_SIZE];
|
||||
__local sum_t weighted_sum[CTA_SIZE];
|
||||
|
||||
int x0 = block_x * BLOCK_COLS, x1 = min(x0 + BLOCK_COLS, dst_cols);
|
||||
int y0 = block_y * BLOCK_ROWS, y1 = min(y0 + BLOCK_ROWS, dst_rows);
|
||||
|
@ -13,11 +13,11 @@
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
|
||||
PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool)
|
||||
PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, int, bool, bool)
|
||||
{
|
||||
int cn, templateWindowSize, searchWindowSize;
|
||||
float h;
|
||||
bool use_roi;
|
||||
int cn, normType, templateWindowSize, searchWindowSize;
|
||||
std::vector<float> h;
|
||||
bool use_roi, use_image;
|
||||
|
||||
TEST_DECLARE_INPUT_PARAMETER(src);
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst);
|
||||
@ -25,29 +25,46 @@ PARAM_TEST_CASE(FastNlMeansDenoisingTestBase, Channels, bool)
|
||||
virtual void SetUp()
|
||||
{
|
||||
cn = GET_PARAM(0);
|
||||
use_roi = GET_PARAM(1);
|
||||
normType = GET_PARAM(1);
|
||||
use_roi = GET_PARAM(2);
|
||||
use_image = GET_PARAM(3);
|
||||
|
||||
templateWindowSize = 7;
|
||||
searchWindowSize = 21;
|
||||
h = 3.0f;
|
||||
|
||||
h.resize(cn);
|
||||
for (int i=0; i<cn; i++)
|
||||
h[i] = 3.0f + 0.5f*i;
|
||||
}
|
||||
|
||||
virtual void generateTestData()
|
||||
{
|
||||
const int type = CV_8UC(cn);
|
||||
Mat image;
|
||||
if (cn == 1)
|
||||
{
|
||||
image = readImage("denoising/lena_noised_gaussian_sigma=10.png", IMREAD_GRAYSCALE);
|
||||
|
||||
if (use_image) {
|
||||
image = readImage("denoising/lena_noised_gaussian_sigma=10.png",
|
||||
cn == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
|
||||
ASSERT_FALSE(image.empty());
|
||||
}
|
||||
|
||||
const int type = CV_8UC(cn);
|
||||
|
||||
Size roiSize = cn == 1 ? image.size() : randomSize(1, MAX_VALUE);
|
||||
Size roiSize = use_image ? image.size() : randomSize(1, MAX_VALUE);
|
||||
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(src, src_roi, roiSize, srcBorder, type, 0, 255);
|
||||
if (cn == 1)
|
||||
image.copyTo(src_roi);
|
||||
if (use_image) {
|
||||
ASSERT_TRUE(cn > 0 && cn <= 4);
|
||||
if (cn == 2) {
|
||||
int from_to[] = { 0,0, 1,1 };
|
||||
src_roi.create(roiSize, type);
|
||||
mixChannels(&image, 1, &src_roi, 1, from_to, 2);
|
||||
}
|
||||
else if (cn == 4) {
|
||||
int from_to[] = { 0,0, 1,1, 2,2, 1,3};
|
||||
src_roi.create(roiSize, type);
|
||||
mixChannels(&image, 1, &src_roi, 1, from_to, 4);
|
||||
}
|
||||
else image.copyTo(src_roi);
|
||||
}
|
||||
|
||||
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, 0, 255);
|
||||
@ -65,8 +82,23 @@ OCL_TEST_P(FastNlMeansDenoising, Mat)
|
||||
{
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize));
|
||||
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize));
|
||||
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, std::vector<float>(1, h[0]), templateWindowSize, searchWindowSize, normType));
|
||||
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, std::vector<float>(1, h[0]), templateWindowSize, searchWindowSize, normType));
|
||||
|
||||
OCL_EXPECT_MATS_NEAR(dst, 1);
|
||||
}
|
||||
}
|
||||
|
||||
typedef FastNlMeansDenoisingTestBase FastNlMeansDenoising_hsep;
|
||||
|
||||
OCL_TEST_P(FastNlMeansDenoising_hsep, Mat)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::fastNlMeansDenoising(src_roi, dst_roi, h, templateWindowSize, searchWindowSize, normType));
|
||||
OCL_ON(cv::fastNlMeansDenoising(usrc_roi, udst_roi, h, templateWindowSize, searchWindowSize, normType));
|
||||
|
||||
OCL_EXPECT_MATS_NEAR(dst, 1);
|
||||
}
|
||||
@ -80,15 +112,21 @@ OCL_TEST_P(FastNlMeansDenoisingColored, Mat)
|
||||
{
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h, h, templateWindowSize, searchWindowSize));
|
||||
OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h, h, templateWindowSize, searchWindowSize));
|
||||
OCL_OFF(cv::fastNlMeansDenoisingColored(src_roi, dst_roi, h[0], h[0], templateWindowSize, searchWindowSize));
|
||||
OCL_ON(cv::fastNlMeansDenoisingColored(usrc_roi, udst_roi, h[0], h[0], templateWindowSize, searchWindowSize));
|
||||
|
||||
OCL_EXPECT_MATS_NEAR(dst, 1);
|
||||
}
|
||||
}
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising, Combine(Values(1, 2), Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored, Combine(Values(3, 4), Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising,
|
||||
Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1),
|
||||
Bool(), Values(true)));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoising_hsep,
|
||||
Combine(Values(1, 2, 3, 4), Values((int)NORM_L2, (int)NORM_L1),
|
||||
Bool(), Values(true)));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Photo, FastNlMeansDenoisingColored,
|
||||
Combine(Values(3, 4), Values((int)NORM_L2), Bool(), Values(false)));
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user