Merge pull request #27182 from CodeLinaro:boxFilter_hal_changes

Parallel_for in box Filter and support for 32f box filter in Fastcv hal #27182

Added parallel_for in box filter hal and support for 32f box filter

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
adsha-quic 2025-04-16 21:03:38 +05:30 committed by GitHub
parent 3962803e7a
commit 6ffc515b2a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 106 additions and 37 deletions

View File

@ -314,6 +314,69 @@ int fastcv_hal_sobel(
CV_HAL_RETURN(status, hal_sobel);
}
class FcvBoxLoop_Invoker : public cv::ParallelLoopBody
{
public:
FcvBoxLoop_Invoker(cv::Mat src_, int width_, int height_, cv::Mat dst_, int bdr_, int knl_, int normalize_, int stripeHeight_, int nStripes_, int depth_) :
cv::ParallelLoopBody(), src(src_), width(width_), height(height_), dst(dst_), bdr(bdr_), knl(knl_), normalize(normalize_), stripeHeight(stripeHeight_), nStripes(nStripes_), depth(depth_)
{
}
virtual void operator()(const cv::Range& range) const CV_OVERRIDE
{
int height_ = stripeHeight * (range.end - range.start);
int width_ = width;
cv::Mat src_;
int n = knl/2;
if(range.end == nStripes)
height_ += (height - range.end * stripeHeight);
src_ = cv::Mat(height_ + 2*n, width_ + 2*n, depth);
if(range.start == 0 && range.end == nStripes)
cv::copyMakeBorder(src(cv::Rect(0, 0, width_, height_)), src_, n, n, n, n, bdr);
else if(range.start == 0)
cv::copyMakeBorder(src(cv::Rect(0, 0, width_, height_ + n)), src_, n, 0, n, n, bdr);
else if(range.end == nStripes)
cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + n)), src_, 0, n, n, n, bdr);
else
cv::copyMakeBorder(src(cv::Rect(0, range.start * stripeHeight - n, width_, height_ + 2*n)), src_, 0, 0, n, n, bdr);
cv::Mat dst_padded = cv::Mat(height_ + 2*n, width_ + 2*n, depth);
if(depth == CV_32F)
fcvBoxFilterNxNf32((float*)src_.data, width_ + 2*n, height_ + 2*n, (width_ + 2*n)*sizeof(float),
knl, (float*)dst_padded.data, dst_padded.step[0]);
else
{
auto func = knl == 3 ? fcvBoxFilter3x3u8_v3 : fcvBoxFilter5x5u8_v2;
func(src_.data, width_ + 2*n, height_ + 2*n, width_ + 2*n,
dst_padded.data, dst_padded.step[0], normalize, FASTCV_BORDER_UNDEFINED, 0);
}
int start_val = stripeHeight * range.start;
cv::Mat dst_temp1 = dst_padded(cv::Rect(n, n, width_, height_));
cv::Mat dst_temp2 = dst(cv::Rect(0, start_val, width_, height_));
dst_temp1.copyTo(dst_temp2);
}
private:
cv::Mat src;
const int width;
const int height;
cv::Mat dst;
const int bdr;
const int knl;
const int normalize;
const int stripeHeight;
const int nStripes;
int depth;
FcvBoxLoop_Invoker(const FcvBoxLoop_Invoker &); // = delete;
const FcvBoxLoop_Invoker& operator= (const FcvBoxLoop_Invoker &); // = delete;
};
int fastcv_hal_boxFilter(
const uchar* src_data,
size_t src_step,
@ -335,15 +398,7 @@ int fastcv_hal_boxFilter(
bool normalize,
int border_type)
{
if((width*height) < (320*240))
{
CV_HAL_RETURN_NOT_IMPLEMENTED("input size not supported");
}
else if(src_data == dst_data)
{
CV_HAL_RETURN_NOT_IMPLEMENTED("in-place processing not supported");
}
else if(src_depth != CV_8U || cn != 1)
if((src_depth != CV_8U && src_depth != CV_32F) || cn != 1)
{
CV_HAL_RETURN_NOT_IMPLEMENTED("src type not supported");
}
@ -351,8 +406,7 @@ int fastcv_hal_boxFilter(
{
CV_HAL_RETURN_NOT_IMPLEMENTED("same src and dst type supported");
}
else if(ksize_width != ksize_height ||
(ksize_width != 3 && ksize_width != 5))
else if(ksize_width != ksize_height)
{
CV_HAL_RETURN_NOT_IMPLEMENTED("kernel size not supported");
}
@ -363,37 +417,52 @@ int fastcv_hal_boxFilter(
CV_HAL_RETURN_NOT_IMPLEMENTED("ROI not supported");
}
if(src_depth == CV_32F && normalize != 1)
CV_HAL_RETURN_NOT_IMPLEMENTED("normalized kernel supported for float types");
if(src_depth == CV_32F && (height < 5 || width < 5 || ksize_height < 5))
CV_HAL_RETURN_NOT_IMPLEMENTED("size not supported");
if(src_depth == CV_8U && (ksize_width != 3 && ksize_width != 5))
CV_HAL_RETURN_NOT_IMPLEMENTED("kernel size not supported");
INITIALIZATION_CHECK;
fcvBorderType bdr;
uint8_t bdrVal = 0;
switch(border_type)
cv::Mat dst_temp;
bool inPlace = src_data == dst_data ? true : false ;
int nThreads = cv::getNumThreads();
cv::Mat src = cv::Mat(height, width, src_depth, (void*)src_data, src_step);
if(inPlace)
dst_temp = cv::Mat(height, width, src_depth);
else
dst_temp = cv::Mat(height, width, src_depth, (void*)dst_data, dst_step);
int nStripes, stripeHeight = src.rows/nThreads;
if((size_t)src.rows < ksize_height || stripeHeight < 5 || nThreads <= 1)
{
case cv::BORDER_REPLICATE:
bdr = FASTCV_BORDER_REPLICATE;
break;
case cv::BORDER_REFLECT:
bdr = FASTCV_BORDER_REFLECT;
break;
case cv::BORDER_REFLECT101: // cv::BORDER_REFLECT_101, BORDER_DEFAULT
bdr = FASTCV_BORDER_REFLECT_V2;
break;
default:
CV_HAL_RETURN_NOT_IMPLEMENTED("border type not supported");
nStripes = 1;
stripeHeight = src.rows;
}
else
{
nStripes = nThreads;
stripeHeight = src.rows/nThreads;
}
cv::parallel_for_(cv::Range(0, nStripes),
FcvBoxLoop_Invoker(src, width, height, dst_temp, border_type, ksize_width, normalize, stripeHeight, nStripes, src_depth), nStripes);
if(inPlace)
{
cv::Mat dst = cv::Mat(height, width, src_depth, (void*)dst_data, dst_step);
dst_temp.copyTo(dst);
}
fcvStatus status = FASTCV_SUCCESS;
if(ksize_width == 3)
{
status = fcvBoxFilter3x3u8_v3(src_data, width, height, src_step,
dst_data, dst_step, normalize, bdr, bdrVal);
}
else if(ksize_width == 5)
{
status = fcvBoxFilter5x5u8_v2(src_data, width, height, src_step,
dst_data, dst_step, normalize, bdr, bdrVal);
}
CV_HAL_RETURN(status,hal_boxFilter);
}

View File

@ -104,7 +104,7 @@ PERF_TEST_P(Size_MatType_BorderType, blur16x16,
Size size = get<0>(GetParam());
int type = get<1>(GetParam());
BorderType btype = get<2>(GetParam());
double eps = 1e-3;
double eps = 1.25e-3;
eps = CV_MAT_DEPTH(type) <= CV_32S ? 1 : eps;