diff --git a/modules/gpu/src/cuda/resize.cu b/modules/gpu/src/cuda/resize.cu index 110e62d036..d679c32263 100644 --- a/modules/gpu/src/cuda/resize.cu +++ b/modules/gpu/src/cuda/resize.cu @@ -77,8 +77,8 @@ namespace cv { namespace gpu { namespace device if (dst_x < dst.cols && dst_y < dst.rows) { - const float src_x = dst_x * fx; - const float src_y = dst_y * fy; + const float src_x = (dst_x + 0.5f) * fx - 0.5f; + const float src_y = (dst_y + 0.5f) * fy - 0.5f; work_type out = VecTraits::all(0); @@ -86,16 +86,18 @@ namespace cv { namespace gpu { namespace device const int y1 = __float2int_rd(src_y); const int x2 = x1 + 1; const int y2 = y1 + 1; - const int x2_read = ::min(x2, src.cols - 1); - const int y2_read = ::min(y2, src.rows - 1); + const int x1_read = ::max(::min(x1, src.cols - 1), 0); + const int y1_read = ::max(::min(y1, src.rows - 1), 0); + const int x2_read = ::max(::min(x2, src.cols - 1), 0); + const int y2_read = ::max(::min(y2, src.rows - 1), 0); - T src_reg = src(y1, x1); + T src_reg = src(y1_read, x1_read); out = out + src_reg * ((x2 - src_x) * (y2 - src_y)); - src_reg = src(y1, x2_read); + src_reg = src(y1_read, x2_read); out = out + src_reg * ((src_x - x1) * (y2 - src_y)); - src_reg = src(y2_read, x1); + src_reg = src(y2_read, x1_read); out = out + src_reg * ((x2 - src_x) * (src_y - y1)); src_reg = src(y2_read, x2_read); @@ -119,6 +121,20 @@ namespace cv { namespace gpu { namespace device } } + template __global__ void resize_linear(const Ptr2D src, PtrStepSz dst, const float fy, const float fx) + { + const int dst_x = blockDim.x * blockIdx.x + threadIdx.x; + const int dst_y = blockDim.y * blockIdx.y + threadIdx.y; + + if (dst_x < dst.cols && dst_y < dst.rows) + { + const float src_x = (dst_x + 0.5f) * fx - 0.5f; + const float src_y = (dst_y + 0.5f) * fy - 0.5f; + + dst(dst_y, dst_x) = src(src_y, src_x); + } + } + template __global__ void resize_area(const Ptr2D src, PtrStepSz dst) { const int x = blockDim.x * blockIdx.x + threadIdx.x; @@ -231,7 +247,7 @@ namespace cv { namespace gpu { namespace device TextureAccessor texSrc = texAccessor(src, 0, 0); LinearFilter< TextureAccessor > filteredSrc(texSrc); - resize<<>>(filteredSrc, dst, fy, fx); + resize_linear<<>>(filteredSrc, dst, fy, fx); } else { @@ -241,7 +257,7 @@ namespace cv { namespace gpu { namespace device BorderReader, BrdReplicate > brdSrc(texSrc, brd); LinearFilter< BorderReader, BrdReplicate > > filteredSrc(brdSrc); - resize<<>>(filteredSrc, dst, fy, fx); + resize_linear<<>>(filteredSrc, dst, fy, fx); } cudaSafeCall( cudaGetLastError() ); diff --git a/modules/gpu/test/test_resize.cpp b/modules/gpu/test/test_resize.cpp index b59e7b33f4..25f0f0e2bb 100644 --- a/modules/gpu/test/test_resize.cpp +++ b/modules/gpu/test/test_resize.cpp @@ -73,6 +73,28 @@ namespace } } + template class Interpolator> + void resizeLinearImpl(const cv::Mat& src, cv::Mat& dst, double fx, double fy) + { + const int cn = src.channels(); + + cv::Size dsize(cv::saturate_cast(src.cols * fx), cv::saturate_cast(src.rows * fy)); + + dst.create(dsize, src.type()); + + float ifx = static_cast(1.0 / fx); + float ify = static_cast(1.0 / fy); + + for (int y = 0; y < dsize.height; ++y) + { + for (int x = 0; x < dsize.width; ++x) + { + for (int c = 0; c < cn; ++c) + dst.at(y, x * cn + c) = Interpolator::getValue(src, (y + 0.5f) * ify - 0.5f, (x + 0.5f) * ifx - 0.5f, c, cv::BORDER_REPLICATE); + } + } + } + void resizeGold(const cv::Mat& src, cv::Mat& dst, double fx, double fy, int interpolation) { typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst, double fx, double fy); @@ -90,12 +112,12 @@ namespace static const func_t linear_funcs[] = { - resizeImpl, - resizeImpl, - resizeImpl, - resizeImpl, - resizeImpl, - resizeImpl + resizeLinearImpl, + resizeLinearImpl, + resizeLinearImpl, + resizeLinearImpl, + resizeLinearImpl, + resizeLinearImpl }; static const func_t cubic_funcs[] = @@ -204,7 +226,15 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine( DIFFERENT_SIZES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), testing::Values(0.3, 0.5), - testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_AREA)), + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_AREA)), + WHOLE_SUBMAT)); + +INSTANTIATE_TEST_CASE_P(GPU_ImgProc2, ResizeSameAsHost, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), + testing::Values(0.3, 0.5, 1.5, 2.0), + testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), WHOLE_SUBMAT)); #endif // HAVE_CUDA