diff --git a/modules/features2d/src/kaze/AKAZEFeatures.cpp b/modules/features2d/src/kaze/AKAZEFeatures.cpp index 16c2cad032..5bcdb13b41 100644 --- a/modules/features2d/src/kaze/AKAZEFeatures.cpp +++ b/modules/features2d/src/kaze/AKAZEFeatures.cpp @@ -15,6 +15,10 @@ #include +#ifdef HAVE_OPENCL // OpenCL is not well supported +#undef HAVE_OPENCL +#endif + // Namespaces namespace cv { @@ -251,38 +255,41 @@ private: #ifdef HAVE_OPENCL static inline bool -ocl_non_linear_diffusion_step(const UMat& Lt, const UMat& Lf, UMat& Lstep, float step_size) +ocl_non_linear_diffusion_step(InputArray Lt_, InputArray Lf_, OutputArray Lstep_, float step_size) { - if(!Lt.isContinuous()) - return false; + if (!Lt_.isContinuous()) + return false; - size_t globalSize[] = {(size_t)Lt.cols, (size_t)Lt.rows}; + UMat Lt = Lt_.getUMat(), Lf = Lf_.getUMat(), Lstep = Lstep_.getUMat(); - ocl::Kernel ker("AKAZE_nld_step_scalar", ocl::features2d::akaze_oclsrc); - if( ker.empty() ) - return false; + size_t globalSize[] = {(size_t)Lt.cols, (size_t)Lt.rows}; - return ker.args( - ocl::KernelArg::ReadOnly(Lt), - ocl::KernelArg::PtrReadOnly(Lf), - ocl::KernelArg::PtrWriteOnly(Lstep), - step_size).run(2, globalSize, 0, true); + ocl::Kernel ker("AKAZE_nld_step_scalar", ocl::features2d::akaze_oclsrc); + if (ker.empty()) + return false; + + return ker.args( + ocl::KernelArg::ReadOnly(Lt), + ocl::KernelArg::PtrReadOnly(Lf), + ocl::KernelArg::PtrWriteOnly(Lstep), + step_size) + .run(2, globalSize, 0, true); } #endif // HAVE_OPENCL static inline void -non_linear_diffusion_step(const UMat& Lt, const UMat& Lf, UMat& Lstep, float step_size) +non_linear_diffusion_step(InputArray Lt, InputArray Lf, OutputArray Lstep, float step_size) { CV_INSTRUMENT_REGION() Lstep.create(Lt.size(), Lt.type()); - CV_OCL_RUN(true, ocl_non_linear_diffusion_step(Lt, Lf, Lstep, step_size)); +#ifdef HAVE_OPENCL + CV_OCL_RUN(OCL_PERFORMANCE_CHECK(Lstep.isUMat()), ocl_non_linear_diffusion_step(Lt, Lf, Lstep, step_size)); +#endif - // when on CPU UMats should be already allocated on CPU so getMat here is basicallly no-op - Mat Mstep = Lstep.getMat(ACCESS_WRITE); - parallel_for_(Range(0, Lt.rows), NonLinearScalarDiffusionStep(Lt.getMat(ACCESS_READ), - Lf.getMat(ACCESS_READ), Mstep, step_size)); + Mat Mstep = Lstep.getMat(); + parallel_for_(Range(0, Lt.rows()), NonLinearScalarDiffusionStep(Lt.getMat(), Lf.getMat(), Mstep, step_size)); } /** @@ -347,25 +354,28 @@ compute_kcontrast(const cv::Mat& Lx, const cv::Mat& Ly, float perc, int nbins) #ifdef HAVE_OPENCL static inline bool -ocl_pm_g2(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast) +ocl_pm_g2(InputArray Lx_, InputArray Ly_, OutputArray Lflow_, float kcontrast) { - int total = Lx.rows * Lx.cols; - size_t globalSize[] = {(size_t)total}; + UMat Lx = Lx_.getUMat(), Ly = Ly_.getUMat(), Lflow = Lflow_.getUMat(); - ocl::Kernel ker("AKAZE_pm_g2", ocl::features2d::akaze_oclsrc); - if( ker.empty() ) - return false; + int total = Lx.rows * Lx.cols; + size_t globalSize[] = {(size_t)total}; - return ker.args( - ocl::KernelArg::PtrReadOnly(Lx), - ocl::KernelArg::PtrReadOnly(Ly), - ocl::KernelArg::PtrWriteOnly(Lflow), - kcontrast, total).run(1, globalSize, 0, true); + ocl::Kernel ker("AKAZE_pm_g2", ocl::features2d::akaze_oclsrc); + if (ker.empty()) + return false; + + return ker.args( + ocl::KernelArg::PtrReadOnly(Lx), + ocl::KernelArg::PtrReadOnly(Ly), + ocl::KernelArg::PtrWriteOnly(Lflow), + kcontrast, total) + .run(1, globalSize, 0, true); } #endif // HAVE_OPENCL static inline void -compute_diffusivity(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast, int diffusivity) +compute_diffusivity(InputArray Lx, InputArray Ly, OutputArray Lflow, float kcontrast, int diffusivity) { CV_INSTRUMENT_REGION() @@ -376,7 +386,9 @@ compute_diffusivity(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast pm_g1(Lx, Ly, Lflow, kcontrast); break; case KAZE::DIFF_PM_G2: - CV_OCL_RUN(true, ocl_pm_g2(Lx, Ly, Lflow, kcontrast)); +#ifdef HAVE_OPENCL + CV_OCL_RUN(OCL_PERFORMANCE_CHECK(Lflow.isUMat()), ocl_pm_g2(Lx, Ly, Lflow, kcontrast)); +#endif pm_g2(Lx, Ly, Lflow, kcontrast); break; case KAZE::DIFF_WEICKERT: @@ -391,32 +403,6 @@ compute_diffusivity(const UMat& Lx, const UMat& Ly, UMat& Lflow, float kcontrast } } -/** - * @brief Fetches pyramid from the gpu. - * @details Setups mapping for matrices that might be probably on the GPU, if the - * code executes with OpenCL. This will setup MLx, MLy, Mdet members in the pyramid with - * mapping to respective UMats. This must be called before CPU-only parts of AKAZE, that work - * only on these Mats. - * - * This prevents mapping/unmapping overhead (and possible uploads/downloads) that would occur, if - * we just create Mats from UMats each time we need it later. This has devastating effects on OCL - * performace. - * - * @param evolution Pyramid to download - */ -static inline void downloadPyramid(std::vector& evolution) -{ - CV_INSTRUMENT_REGION() - - for (size_t i = 0; i < evolution.size(); ++i) { - Evolution& e = evolution[i]; - e.Mx = e.Lx.getMat(ACCESS_READ); - e.My = e.Ly.getMat(ACCESS_READ); - e.Mt = e.Lt.getMat(ACCESS_READ); - e.Mdet = e.Ldet.getMat(ACCESS_READ); - } -} - /** * @brief This method creates the nonlinear scale space for a given image * @param img Input image for which the nonlinear scale space needs to be created @@ -435,12 +421,11 @@ void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray img) if (evolution_.size() == 1) { // we don't need to compute kcontrast factor Compute_Determinant_Hessian_Response(); - downloadPyramid(evolution_); return; } // derivatives, flow and diffusion step - UMat Lx, Ly, Lsmooth, Lflow, Lstep; + Mat Lx, Ly, Lsmooth, Lflow, Lstep; // compute derivatives for computing k contrast GaussianBlur(img, Lsmooth, Size(5, 5), 1.0f, 1.0f, BORDER_REPLICATE); @@ -448,8 +433,7 @@ void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray img) Scharr(Lsmooth, Ly, CV_32F, 0, 1, 1, 0, BORDER_DEFAULT); Lsmooth.release(); // compute the kcontrast factor - float kcontrast = compute_kcontrast(Lx.getMat(ACCESS_READ), Ly.getMat(ACCESS_READ), - options_.kcontrast_percentile, options_.kcontrast_nbins); + float kcontrast = compute_kcontrast(Lx, Ly, options_.kcontrast_percentile, options_.kcontrast_nbins); // Now generate the rest of evolution levels for (size_t i = 1; i < evolution_.size(); i++) { @@ -483,31 +467,30 @@ void AKAZEFeatures::Create_Nonlinear_Scale_Space(InputArray img) } Compute_Determinant_Hessian_Response(); - downloadPyramid(evolution_); - - return; } /* ************************************************************************* */ #ifdef HAVE_OPENCL static inline bool -ocl_compute_determinant(const UMat& Lxx, const UMat& Lxy, const UMat& Lyy, - UMat& Ldet, float sigma) +ocl_compute_determinant(InputArray Lxx_, InputArray Lxy_, InputArray Lyy_, OutputArray Ldet_, float sigma) { - const int total = Lxx.rows * Lxx.cols; - size_t globalSize[] = {(size_t)total}; + UMat Lxx = Lxx_.getUMat(), Lxy = Lxy_.getUMat(), Lyy = Lyy_.getUMat(), Ldet = Ldet_.getUMat(); - ocl::Kernel ker("AKAZE_compute_determinant", ocl::features2d::akaze_oclsrc); - if( ker.empty() ) - return false; + const int total = Lxx.rows * Lxx.cols; + size_t globalSize[] = {(size_t)total}; - return ker.args( - ocl::KernelArg::PtrReadOnly(Lxx), - ocl::KernelArg::PtrReadOnly(Lxy), - ocl::KernelArg::PtrReadOnly(Lyy), - ocl::KernelArg::PtrWriteOnly(Ldet), - sigma, total).run(1, globalSize, 0, true); + ocl::Kernel ker("AKAZE_compute_determinant", ocl::features2d::akaze_oclsrc); + if (ker.empty()) + return false; + + return ker.args( + ocl::KernelArg::PtrReadOnly(Lxx), + ocl::KernelArg::PtrReadOnly(Lxy), + ocl::KernelArg::PtrReadOnly(Lyy), + ocl::KernelArg::PtrWriteOnly(Ldet), + sigma, total) + .run(1, globalSize, 0, true); } #endif // HAVE_OPENCL @@ -521,27 +504,30 @@ ocl_compute_determinant(const UMat& Lxx, const UMat& Lxy, const UMat& Lyy, * @param Ldet output determinant * @param sigma determinant will be scaled by this sigma */ -static inline void compute_determinant(const UMat& Lxx, const UMat& Lxy, const UMat& Lyy, - UMat& Ldet, float sigma) +static inline void compute_determinant(InputArray Lxx, InputArray Lxy, InputArray Lyy, OutputArray Ldet, float sigma) { - CV_INSTRUMENT_REGION() + CV_INSTRUMENT_REGION() - Ldet.create(Lxx.size(), Lxx.type()); + Ldet.create(Lxx.size(), Lxx.type()); - CV_OCL_RUN(true, ocl_compute_determinant(Lxx, Lxy, Lyy, Ldet, sigma)); - - // output determinant - Mat Mxx = Lxx.getMat(ACCESS_READ), Mxy = Lxy.getMat(ACCESS_READ), Myy = Lyy.getMat(ACCESS_READ); - Mat Mdet = Ldet.getMat(ACCESS_WRITE); - float *lxx = Mxx.ptr(); - float *lxy = Mxy.ptr(); - float *lyy = Myy.ptr(); - float *ldet = Mdet.ptr(); - const int total = Lxx.cols * Lxx.rows; - for (int j = 0; j < total; j++) { - ldet[j] = (lxx[j] * lyy[j] - lxy[j] * lxy[j]) * sigma; - } +#ifdef HAVE_OPENCL + CV_OCL_RUN(OCL_PERFORMANCE_CHECK(Ldet.isUMat()), ocl_compute_determinant(Lxx, Lxy, Lyy, Ldet, sigma)); +#endif + // output determinant + Mat Mxx = Lxx.getMat(), Mxy = Lxy.getMat(), Myy = Lyy.getMat(), Mdet = Ldet.getMat(); + const int W = Mxx.cols, H = Mxx.rows; + for (int y = 0; y < H; y++) + { + float *lxx = Mxx.ptr(y); + float *lxy = Mxy.ptr(y); + float *lyy = Myy.ptr(y); + float *ldet = Mdet.ptr(y); + for (int x = 0; x < W; x++) + { + ldet[x] = (lxx[x] * lyy[x] - lxy[x] * lxy[x]) * sigma; + } + } } class DeterminantHessianResponse : public ParallelLoopBody @@ -554,7 +540,7 @@ public: void operator()(const Range& range) const { - UMat Lxx, Lxy, Lyy; + Mat Lxx, Lxy, Lyy; for (int i = range.start; i < range.end; i++) { @@ -670,16 +656,16 @@ public: const Evolution &e = (*evolution_)[i]; Mat &kpts = (*keypoints_by_layers_)[i]; // this mask will hold positions of keypoints in this level - kpts = Mat::zeros(e.Mdet.size(), CV_8UC1); + kpts = Mat::zeros(e.Ldet.size(), CV_8UC1); // if border is too big we shouldn't search any keypoints if (e.border + 1 >= e.Ldet.rows) continue; - const float * prev = e.Mdet.ptr(e.border - 1); - const float * curr = e.Mdet.ptr(e.border ); - const float * next = e.Mdet.ptr(e.border + 1); - const float * ldet = e.Mdet.ptr(); + const float * prev = e.Ldet.ptr(e.border - 1); + const float * curr = e.Ldet.ptr(e.border ); + const float * next = e.Ldet.ptr(e.border + 1); + const float * ldet = e.Ldet.ptr(); uchar *mask = kpts.ptr(); const int search_radius = e.sigma_size; // size of keypoint in this level @@ -743,8 +729,8 @@ void AKAZEFeatures::Find_Scale_Space_Extrema(std::vector& keypoints_by_laye const Mat &keypoints = keypoints_by_layers[i]; const uchar *const kpts = keypoints_by_layers[i].ptr(); uchar *const kpts_prev = keypoints_by_layers[i-1].ptr(); - const float *const ldet = evolution_[i].Mdet.ptr(); - const float *const ldet_prev = evolution_[i-1].Mdet.ptr(); + const float *const ldet = evolution_[i].Ldet.ptr(); + const float *const ldet_prev = evolution_[i-1].Ldet.ptr(); // ratios are just powers of 2 const int diff_ratio = (int)evolution_[i].octave_ratio / (int)evolution_[i-1].octave_ratio; const int search_radius = evolution_[i].sigma_size * diff_ratio; // size of keypoint in this level @@ -775,8 +761,8 @@ void AKAZEFeatures::Find_Scale_Space_Extrema(std::vector& keypoints_by_laye const Mat &keypoints = keypoints_by_layers[i]; const uchar *const kpts = keypoints_by_layers[i].ptr(); uchar *const kpts_next = keypoints_by_layers[i+1].ptr(); - const float *const ldet = evolution_[i].Mdet.ptr(); - const float *const ldet_next = evolution_[i+1].Mdet.ptr(); + const float *const ldet = evolution_[i].Ldet.ptr(); + const float *const ldet_next = evolution_[i+1].Ldet.ptr(); // ratios are just powers of 2, i+1 ratio is always greater or equal to i const int diff_ratio = (int)evolution_[i+1].octave_ratio / (int)evolution_[i].octave_ratio; const int search_radius = evolution_[i+1].sigma_size; // size of keypoints in upper level @@ -814,7 +800,7 @@ void AKAZEFeatures::Do_Subpixel_Refinement( for (size_t i = 0; i < keypoints_by_layers.size(); i++) { const Evolution &e = evolution_[i]; - const float * const ldet = e.Mdet.ptr(); + const float * const ldet = e.Ldet.ptr(); const float ratio = e.octave_ratio; const int cols = e.Ldet.cols; const Mat& keypoints = keypoints_by_layers[i]; @@ -1308,7 +1294,7 @@ void Compute_Main_Orientation(KeyPoint& kpt, const std::vector& evolu // Sample derivatives responses for the points within radius of 6*scale const int ang_size = 109; float resX[ang_size], resY[ang_size]; - Sample_Derivative_Response_Radius6(e.Mx, e.My, x0, y0, scale, resX, resY); + Sample_Derivative_Response_Radius6(e.Lx, e.Ly, x0, y0, scale, resX, resY); // Compute the angle of each gradient vector float Ang[ang_size]; @@ -1445,8 +1431,8 @@ void MSURF_Upright_Descriptor_64_Invoker::Get_MSURF_Upright_Descriptor_64(const ratio = (float)(1 << kpt.octave); scale = cvRound(0.5f*kpt.size / ratio); const int level = kpt.class_id; - Mat Lx = evolution[level].Mx; - Mat Ly = evolution[level].My; + const Mat Lx = evolution[level].Lx; + const Mat Ly = evolution[level].Ly; yf = kpt.pt.y / ratio; xf = kpt.pt.x / ratio; @@ -1575,8 +1561,8 @@ void MSURF_Descriptor_64_Invoker::Get_MSURF_Descriptor_64(const KeyPoint& kpt, f scale = cvRound(0.5f*kpt.size / ratio); angle = kpt.angle * static_cast(CV_PI / 180.f); const int level = kpt.class_id; - Mat Lx = evolution[level].Mx; - Mat Ly = evolution[level].My; + const Mat Lx = evolution[level].Lx; + const Mat Ly = evolution[level].Ly; yf = kpt.pt.y / ratio; xf = kpt.pt.x / ratio; co = cos(angle); @@ -1708,9 +1694,9 @@ void Upright_MLDB_Full_Descriptor_Invoker::Get_Upright_MLDB_Full_Descriptor(cons ratio = (float)(1 << kpt.octave); scale = cvRound(0.5f*kpt.size / ratio); const int level = kpt.class_id; - Mat Lx = evolution[level].Mx; - Mat Ly = evolution[level].My; - Mat Lt = evolution[level].Mt; + const Mat Lx = evolution[level].Lx; + const Mat Ly = evolution[level].Ly; + const Mat Lt = evolution[level].Lt; yf = kpt.pt.y / ratio; xf = kpt.pt.x / ratio; @@ -1795,9 +1781,9 @@ void MLDB_Full_Descriptor_Invoker::MLDB_Fill_Values(float* values, int sample_st int pattern_size = options_->descriptor_pattern_size; int chan = options_->descriptor_channels; int valpos = 0; - Mat Lx = evolution[level].Mx; - Mat Ly = evolution[level].My; - Mat Lt = evolution[level].Mt; + const Mat Lx = evolution[level].Lx; + const Mat Ly = evolution[level].Ly; + const Mat Lt = evolution[level].Lt; for (int i = -pattern_size; i < pattern_size; i += sample_step) { for (int j = -pattern_size; j < pattern_size; j += sample_step) { @@ -1944,9 +1930,9 @@ void MLDB_Descriptor_Subset_Invoker::Get_MLDB_Descriptor_Subset(const KeyPoint& int scale = cvRound(0.5f*kpt.size / ratio); float angle = kpt.angle * static_cast(CV_PI / 180.f); const int level = kpt.class_id; - Mat Lx = evolution[level].Mx; - Mat Ly = evolution[level].My; - Mat Lt = evolution[level].Mt; + const Mat Lx = evolution[level].Lx; + const Mat Ly = evolution[level].Ly; + const Mat Lt = evolution[level].Lt; float yf = kpt.pt.y / ratio; float xf = kpt.pt.x / ratio; float co = cos(angle); @@ -2051,9 +2037,9 @@ void Upright_MLDB_Descriptor_Subset_Invoker::Get_Upright_MLDB_Descriptor_Subset( float ratio = (float)(1 << kpt.octave); int scale = cvRound(0.5f*kpt.size / ratio); const int level = kpt.class_id; - Mat Lx = evolution[level].Mx; - Mat Ly = evolution[level].My; - Mat Lt = evolution[level].Mt; + const Mat Lx = evolution[level].Lx; + const Mat Ly = evolution[level].Ly; + const Mat Lt = evolution[level].Lt; float yf = kpt.pt.y / ratio; float xf = kpt.pt.x / ratio; diff --git a/modules/features2d/src/kaze/AKAZEFeatures.h b/modules/features2d/src/kaze/AKAZEFeatures.h index 9f44b57937..18dc5fd99c 100644 --- a/modules/features2d/src/kaze/AKAZEFeatures.h +++ b/modules/features2d/src/kaze/AKAZEFeatures.h @@ -29,15 +29,10 @@ struct Evolution border = 0; } - UMat Lx, Ly; ///< First order spatial derivatives - UMat Lt; ///< Evolution image - UMat Lsmooth; ///< Smoothed image, used only for computing determinant, released afterwards - UMat Ldet; ///< Detector response - - // the same as above, holding CPU mapping to UMats above - Mat Mx, My; - Mat Mt; - Mat Mdet; + Mat Lx, Ly; ///< First order spatial derivatives + Mat Lt; ///< Evolution image + Mat Lsmooth; ///< Smoothed image, used only for computing determinant, released afterwards + Mat Ldet; ///< Detector response Size size; ///< Size of the layer float etime; ///< Evolution time