diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index ca1b3aa9b5..6a0cf1cd08 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -240,6 +240,7 @@ public: bool isUMatVector() const; bool isMatx() const; bool isVector() const; + bool isGpuMat() const; bool isGpuMatVector() const; ~_InputArray(); diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp index 9b2da34e61..617c61ddad 100644 --- a/modules/core/include/opencv2/core/mat.inl.hpp +++ b/modules/core/include/opencv2/core/mat.inl.hpp @@ -157,6 +157,7 @@ inline bool _InputArray::isMatx() const { return kind() == _InputArray::MATX; } inline bool _InputArray::isVector() const { return kind() == _InputArray::STD_VECTOR || kind() == _InputArray::STD_BOOL_VECTOR || kind() == _InputArray::STD_ARRAY; } +inline bool _InputArray::isGpuMat() const { return kind() == _InputArray::CUDA_GPU_MAT; } inline bool _InputArray::isGpuMatVector() const { return kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT; } //////////////////////////////////////////////////////////////////////////////////////// diff --git a/modules/stitching/include/opencv2/stitching/detail/blenders.hpp b/modules/stitching/include/opencv2/stitching/detail/blenders.hpp index 07473d459e..4c14340722 100644 --- a/modules/stitching/include/opencv2/stitching/detail/blenders.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/blenders.hpp @@ -145,6 +145,18 @@ private: #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) std::vector gpu_dst_pyr_laplace_; std::vector gpu_dst_band_weights_; + std::vector gpu_tl_points_; + std::vector gpu_imgs_with_border_; + std::vector > gpu_weight_pyr_gauss_vec_; + std::vector > gpu_src_pyr_laplace_vec_; + std::vector > gpu_ups_; + cuda::GpuMat gpu_dst_mask_; + cuda::GpuMat gpu_mask_; + cuda::GpuMat gpu_img_; + cuda::GpuMat gpu_weight_map_; + cuda::GpuMat gpu_add_mask_; + int gpu_feed_idx_; + bool gpu_initialized_; #endif }; diff --git a/modules/stitching/src/blenders.cpp b/modules/stitching/src/blenders.cpp index 5a7fa856c0..5e9f4133ea 100644 --- a/modules/stitching/src/blenders.cpp +++ b/modules/stitching/src/blenders.cpp @@ -221,6 +221,7 @@ MultiBandBlender::MultiBandBlender(int try_gpu, int num_bands, int weight_type) #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) can_use_gpu_ = try_gpu && cuda::getCudaEnabledDeviceCount(); + gpu_feed_idx_ = 0; #else (void) try_gpu; can_use_gpu_ = false; @@ -248,6 +249,15 @@ void MultiBandBlender::prepare(Rect dst_roi) #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) if (can_use_gpu_) { + gpu_initialized_ = false; + gpu_feed_idx_ = 0; + + gpu_tl_points_.clear(); + gpu_weight_pyr_gauss_vec_.clear(); + gpu_src_pyr_laplace_vec_.clear(); + gpu_ups_.clear(); + gpu_imgs_with_border_.clear(); + gpu_dst_pyr_laplace_.resize(num_bands_ + 1); gpu_dst_pyr_laplace_[0].create(dst_roi.size(), CV_16SC3); gpu_dst_pyr_laplace_[0].setTo(Scalar::all(0)); @@ -320,7 +330,37 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) int64 t = getTickCount(); #endif - UMat img = _img.getUMat(); + UMat img; + +#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) + // If using gpu save the top left coordinate when running first time after prepare + if (can_use_gpu_) + { + if (!gpu_initialized_) + { + gpu_tl_points_.push_back(tl); + } + else + { + tl = gpu_tl_points_[gpu_feed_idx_]; + } + } + // If _img is not a GpuMat get it as UMat from the InputArray object. + // If it is GpuMat make a dummy object with right dimensions but no data and + // get _img as a GpuMat + if (!_img.isGpuMat()) +#endif + { + img = _img.getUMat(); + } +#if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) + else + { + gpu_img_ = _img.getGpuMat(); + img = UMat(gpu_img_.rows, gpu_img_.cols, gpu_img_.type()); + } +#endif + CV_Assert(img.type() == CV_16SC3 || img.type() == CV_8UC3); CV_Assert(mask.type() == CV_8U); @@ -357,42 +397,63 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) if (can_use_gpu_) { + if (!gpu_initialized_) + { + gpu_imgs_with_border_.push_back(cuda::GpuMat()); + gpu_weight_pyr_gauss_vec_.push_back(std::vector(num_bands_+1)); + gpu_src_pyr_laplace_vec_.push_back(std::vector(num_bands_+1)); + gpu_ups_.push_back(std::vector(num_bands_)); + } + + // If _img is not GpuMat upload it to gpu else gpu_img_ was set already + if (!_img.isGpuMat()) + { + gpu_img_.upload(img); + } + // Create the source image Laplacian pyramid - cuda::GpuMat gpu_img; - gpu_img.upload(img); - cuda::GpuMat img_with_border; - cuda::copyMakeBorder(gpu_img, img_with_border, top, bottom, left, right, BORDER_REFLECT); - std::vector gpu_src_pyr_laplace(num_bands_ + 1); - img_with_border.convertTo(gpu_src_pyr_laplace[0], CV_16S); + cuda::copyMakeBorder(gpu_img_, gpu_imgs_with_border_[gpu_feed_idx_], top, bottom, + left, right, BORDER_REFLECT); + gpu_imgs_with_border_[gpu_feed_idx_].convertTo(gpu_src_pyr_laplace_vec_[gpu_feed_idx_][0], CV_16S); for (int i = 0; i < num_bands_; ++i) - cuda::pyrDown(gpu_src_pyr_laplace[i], gpu_src_pyr_laplace[i + 1]); + cuda::pyrDown(gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i], + gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i + 1]); for (int i = 0; i < num_bands_; ++i) { - cuda::GpuMat up; - cuda::pyrUp(gpu_src_pyr_laplace[i + 1], up); - cuda::subtract(gpu_src_pyr_laplace[i], up, gpu_src_pyr_laplace[i]); + cuda::pyrUp(gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i + 1], gpu_ups_[gpu_feed_idx_][i]); + cuda::subtract(gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i], + gpu_ups_[gpu_feed_idx_][i], + gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i]); } - // Create the weight map Gaussian pyramid - cuda::GpuMat gpu_mask; - gpu_mask.upload(mask); - cuda::GpuMat weight_map; - std::vector gpu_weight_pyr_gauss(num_bands_ + 1); + // Create the weight map Gaussian pyramid only if not yet initialized + if (!gpu_initialized_) + { + if (mask.isGpuMat()) + { + gpu_mask_ = mask.getGpuMat(); + } + else + { + gpu_mask_.upload(mask); + } - if (weight_type_ == CV_32F) - { - gpu_mask.convertTo(weight_map, CV_32F, 1. / 255.); + if (weight_type_ == CV_32F) + { + gpu_mask_.convertTo(gpu_weight_map_, CV_32F, 1. / 255.); + } + else // weight_type_ == CV_16S + { + gpu_mask_.convertTo(gpu_weight_map_, CV_16S); + cuda::compare(gpu_mask_, 0, gpu_add_mask_, CMP_NE); + cuda::add(gpu_weight_map_, Scalar::all(1), gpu_weight_map_, gpu_add_mask_); + } + cuda::copyMakeBorder(gpu_weight_map_, gpu_weight_pyr_gauss_vec_[gpu_feed_idx_][0], top, + bottom, left, right, BORDER_CONSTANT); + for (int i = 0; i < num_bands_; ++i) + cuda::pyrDown(gpu_weight_pyr_gauss_vec_[gpu_feed_idx_][i], + gpu_weight_pyr_gauss_vec_[gpu_feed_idx_][i + 1]); } - else // weight_type_ == CV_16S - { - gpu_mask.convertTo(weight_map, CV_16S); - cuda::GpuMat add_mask; - cuda::compare(gpu_mask, 0, add_mask, CMP_NE); - cuda::add(weight_map, Scalar::all(1), weight_map, add_mask); - } - cuda::copyMakeBorder(weight_map, gpu_weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT); - for (int i = 0; i < num_bands_; ++i) - cuda::pyrDown(gpu_weight_pyr_gauss[i], gpu_weight_pyr_gauss[i + 1]); int y_tl = tl_new.y - dst_roi_.y; int y_br = br_new.y - dst_roi_.y; @@ -403,9 +464,9 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) for (int i = 0; i <= num_bands_; ++i) { Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl); - cuda::GpuMat &_src_pyr_laplace = gpu_src_pyr_laplace[i]; + cuda::GpuMat &_src_pyr_laplace = gpu_src_pyr_laplace_vec_[gpu_feed_idx_][i]; cuda::GpuMat _dst_pyr_laplace = gpu_dst_pyr_laplace_[i](rc); - cuda::GpuMat &_weight_pyr_gauss = gpu_weight_pyr_gauss[i]; + cuda::GpuMat &_weight_pyr_gauss = gpu_weight_pyr_gauss_vec_[gpu_feed_idx_][i]; cuda::GpuMat _dst_band_weights = gpu_dst_band_weights_[i](rc); using namespace cv::cuda::device::blend; @@ -420,6 +481,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) x_tl /= 2; y_tl /= 2; x_br /= 2; y_br /= 2; } + ++gpu_feed_idx_; return; } #endif @@ -445,7 +507,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) UMat weight_map; std::vector weight_pyr_gauss(num_bands_ + 1); - if(weight_type_ == CV_32F) + if (weight_type_ == CV_32F) { mask.getUMat().convertTo(weight_map, CV_32F, 1./255.); } @@ -486,7 +548,7 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) Mat _dst_pyr_laplace = dst_pyr_laplace_[i](rc).getMat(ACCESS_RW); Mat _weight_pyr_gauss = weight_pyr_gauss[i].getMat(ACCESS_READ); Mat _dst_band_weights = dst_band_weights_[i](rc).getMat(ACCESS_RW); - if(weight_type_ == CV_32F) + if (weight_type_ == CV_32F) { for (int y = 0; y < rc.height; ++y) { @@ -540,11 +602,15 @@ void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask) { - cv::UMat dst_band_weights_0; Rect dst_rc(0, 0, dst_roi_final_.width, dst_roi_final_.height); #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING) if (can_use_gpu_) { + if (!gpu_initialized_) + { + gpu_ups_.push_back(std::vector(num_bands_+1)); + } + for (int i = 0; i <= num_bands_; ++i) { cuda::GpuMat dst_i = gpu_dst_pyr_laplace_[i]; @@ -564,20 +630,50 @@ void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask) // Restore image from Laplacian pyramid for (size_t i = num_bands_; i > 0; --i) { - cuda::GpuMat up; - cuda::pyrUp(gpu_dst_pyr_laplace_[i], up); - cuda::add(up, gpu_dst_pyr_laplace_[i - 1], gpu_dst_pyr_laplace_[i - 1]); + cuda::pyrUp(gpu_dst_pyr_laplace_[i], gpu_ups_[gpu_ups_.size()-1][num_bands_-i]); + cuda::add(gpu_ups_[gpu_ups_.size()-1][num_bands_-i], + gpu_dst_pyr_laplace_[i - 1], + gpu_dst_pyr_laplace_[i - 1]); } - gpu_dst_pyr_laplace_[0](dst_rc).download(dst_); - gpu_dst_band_weights_[0].download(dst_band_weights_0); + // If dst is GpuMat do masking on gpu and return dst as a GpuMat + // else download the image to cpu and return it as an ordinary Mat + if (dst.isGpuMat()) + { + cuda::GpuMat &gpu_dst = dst.getGpuMatRef(); - gpu_dst_pyr_laplace_.clear(); - gpu_dst_band_weights_.clear(); + cuda::compare(gpu_dst_band_weights_[0](dst_rc), WEIGHT_EPS, gpu_dst_mask_, CMP_GT); + + cuda::compare(gpu_dst_mask_, 0, gpu_mask_, CMP_EQ); + + gpu_dst_pyr_laplace_[0](dst_rc).setTo(Scalar::all(0), gpu_mask_); + gpu_dst_pyr_laplace_[0](dst_rc).convertTo(gpu_dst, CV_16S); + + } + else + { + gpu_dst_pyr_laplace_[0](dst_rc).download(dst_); + Mat dst_band_weights_0; + gpu_dst_band_weights_[0].download(dst_band_weights_0); + + compare(dst_band_weights_0(dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT); + Blender::blend(dst, dst_mask); + } + + // Set destination Mats to 0 so new image can be blended + for (size_t i = 0; i < num_bands_ + 1; ++i) + { + gpu_dst_band_weights_[i].setTo(0); + gpu_dst_pyr_laplace_[i].setTo(Scalar::all(0)); + } + gpu_feed_idx_ = 0; + gpu_initialized_ = true; } else #endif { + cv::UMat dst_band_weights_0; + for (int i = 0; i <= num_bands_; ++i) normalizeUsingWeightMap(dst_band_weights_[i], dst_pyr_laplace_[i]); @@ -588,11 +684,11 @@ void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask) dst_pyr_laplace_.clear(); dst_band_weights_.clear(); + + compare(dst_band_weights_0(dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT); + + Blender::blend(dst, dst_mask); } - - compare(dst_band_weights_0(dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT); - - Blender::blend(dst, dst_mask); } diff --git a/modules/stitching/test/test_blenders.cuda.cpp b/modules/stitching/test/test_blenders.cuda.cpp index 5d556febe9..0a3f61e2e4 100644 --- a/modules/stitching/test/test_blenders.cuda.cpp +++ b/modules/stitching/test/test_blenders.cuda.cpp @@ -50,12 +50,16 @@ namespace opencv_test { namespace { detail::MultiBandBlender blender(try_cuda, 5); blender.prepare(Rect(0, 0, max(im1.cols, im2.cols), max(im1.rows, im2.rows))); - blender.feed(im1, mask1, Point(0,0)); - blender.feed(im2, mask2, Point(0,0)); - Mat result_s, result_mask; - blender.blend(result_s, result_mask); - result_s.convertTo(result, CV_8U); + // If using cuda try blending multiple times without calling prepare inbetween + for (int i = 0; i < (try_cuda ? 10 : 1); ++i) { + blender.feed(im1, mask1, Point(0, 0)); + blender.feed(im2, mask2, Point(0, 0)); + + Mat result_s, result_mask; + blender.blend(result_s, result_mask); + result_s.convertTo(result, CV_8U); + } } TEST(CUDA_MultiBandBlender, Accuracy)