diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp index 459932cc68..cbecaec796 100644 --- a/modules/ocl/src/fft.cpp +++ b/modules/ocl/src/fft.cpp @@ -45,29 +45,28 @@ #include #include "precomp.hpp" -#ifdef HAVE_CLAMDFFT - using namespace cv; using namespace cv::ocl; using namespace std; #if !defined HAVE_OPENCL -void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags) +void cv::ocl::dft(const oclMat &, oclMat &, Size , int ) { throw_nogpu(); } #elif !defined HAVE_CLAMDFFT -void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags) +void cv::ocl::dft(const oclMat&, oclMat&, Size, int) { CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented"); } #else -#include - +#include "clAmdFft.h" namespace cv { namespace ocl { + void fft_setup(); + void fft_teardown(); enum FftType { C2R = 1, // complex to complex @@ -76,73 +75,94 @@ namespace cv }; struct FftPlan { - friend void fft_setup(); - friend void fft_teardown(); - ~FftPlan(); protected: + clAmdFftPlanHandle plHandle; + FftPlan& operator=(const FftPlan&); + public: FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); + ~FftPlan(); + inline clAmdFftPlanHandle getPlanHandle() { return plHandle; } + const Size dft_size; const int src_step, dst_step; const int flags; const FftType type; - clAmdFftPlanHandle plHandle; - static vector planStore; - static bool started; - static clAmdFftSetupData *setupData; + }; + class PlanCache + { + protected: + PlanCache(); + ~PlanCache(); + friend class auto_ptr; + static auto_ptr planCache; + + bool started; + vector planStore; + clAmdFftSetupData *setupData; public: + friend void fft_setup(); + friend void fft_teardown(); + + static PlanCache* getPlanCache() + { + if( NULL == planCache.get()) + planCache.reset(new PlanCache()); + return planCache.get(); + } // return a baked plan-> // if there is one matched plan, return it // if not, bake a new one, put it into the planStore and return it. - static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); + static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); + + // remove a single plan from the store + // return true if the plan is successfully removed + // else + static bool removePlan(clAmdFftPlanHandle ); }; } } -bool cv::ocl::FftPlan::started = false; -vector cv::ocl::FftPlan::planStore = vector(); -clAmdFftSetupData *cv::ocl::FftPlan::setupData = 0; +auto_ptr PlanCache::planCache; void cv::ocl::fft_setup() { - if(FftPlan::started) + PlanCache& pCache = *PlanCache::getPlanCache(); + if(pCache.started) { return; } - FftPlan::setupData = new clAmdFftSetupData; - openCLSafeCall(clAmdFftInitSetupData( FftPlan::setupData )); - FftPlan::started = true; + pCache.setupData = new clAmdFftSetupData; + openCLSafeCall(clAmdFftInitSetupData( pCache.setupData )); + pCache.started = true; } void cv::ocl::fft_teardown() { - if(!FftPlan::started) + PlanCache& pCache = *PlanCache::getPlanCache(); + if(!pCache.started) { return; } - delete FftPlan::setupData; - for(int i = 0; i < FftPlan::planStore.size(); i ++) + delete pCache.setupData; + for(size_t i = 0; i < pCache.planStore.size(); i ++) { - delete FftPlan::planStore[i]; + delete pCache.planStore[i]; } - FftPlan::planStore.clear(); + pCache.planStore.clear(); openCLSafeCall( clAmdFftTeardown( ) ); - FftPlan::started = false; + pCache.started = false; } // bake a new plan cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type) - : dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type), plHandle(0) + : plHandle(0), dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), flags(_flags), type(_type) { - if(!FftPlan::started) - { - // implicitly do fft setup - fft_setup(); - } + fft_setup(); bool is_1d_input = (_dft_size.height == 1); int is_row_dft = flags & DFT_ROWS; - int is_scaled_dft = flags & DFT_SCALE; - int is_inverse = flags & DFT_INVERSE; + int is_scaled_dft = flags & DFT_SCALE; + int is_inverse = flags & DFT_INVERSE; - clAmdFftResultLocation place; + //clAmdFftResultLocation place; clAmdFftLayout inLayout; clAmdFftLayout outLayout; clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D; @@ -150,7 +170,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla size_t batchSize = is_row_dft ? dft_size.height : 1; size_t clLengthsIn[ 3 ] = {1, 1, 1}; size_t clStridesIn[ 3 ] = {1, 1, 1}; - size_t clLengthsOut[ 3 ] = {1, 1, 1}; + //size_t clLengthsOut[ 3 ] = {1, 1, 1}; size_t clStridesOut[ 3 ] = {1, 1, 1}; clLengthsIn[0] = dft_size.width; clLengthsIn[1] = is_row_dft ? 1 : dft_size.height; @@ -166,14 +186,12 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla clStridesOut[1] = clStridesIn[1]; break; case R2C: - CV_Assert(!is_row_dft); // this is not supported yet inLayout = CLFFT_REAL; outLayout = CLFFT_HERMITIAN_INTERLEAVED; clStridesIn[1] = src_step / sizeof(float); clStridesOut[1] = dst_step / sizeof(std::complex); break; case C2R: - CV_Assert(!is_row_dft); // this is not supported yet inLayout = CLFFT_HERMITIAN_INTERLEAVED; outLayout = CLFFT_REAL; clStridesIn[1] = src_step / sizeof(std::complex); @@ -197,27 +215,39 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla openCLSafeCall( clAmdFftSetPlanInStride ( plHandle, dim, clStridesIn ) ); openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) ); - openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesIn[ dim ]) ); + openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesOut[ dim ]) ); + + float scale_ = is_scaled_dft ? 1.f / _dft_size.area() : 1.f; + openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) ); + + //ready to bake openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &(Context::getContext()->impl->clCmdQueue), NULL, NULL ) ); } cv::ocl::FftPlan::~FftPlan() { - for(int i = 0; i < planStore.size(); i ++) - { - if(planStore[i]->plHandle == plHandle) - { - planStore.erase(planStore.begin() + i); - } - } openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) ); } -clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type) +cv::ocl::PlanCache::PlanCache() + : started(false), + planStore(vector()), + setupData(NULL) { +} + +cv::ocl::PlanCache::~PlanCache() +{ + fft_teardown(); +} + +FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type) +{ + PlanCache& pCache = *PlanCache::getPlanCache(); + vector& pStore = pCache.planStore; // go through search - for(int i = 0; i < planStore.size(); i ++) + for(size_t i = 0; i < pStore.size(); i ++) { - FftPlan *plan = planStore[i]; + FftPlan *plan = pStore[i]; if( plan->dft_size.width == _dft_size.width && plan->dft_size.height == _dft_size.height && @@ -225,15 +255,31 @@ clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int plan->src_step == _src_step && plan->dst_step == _dst_step && plan->type == _type - ) + ) { - return plan->plHandle; + return plan; } } // no baked plan is found FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _flags, _type); - planStore.push_back(newPlan); - return newPlan->plHandle; + pStore.push_back(newPlan); + return newPlan; +} + +bool cv::ocl::PlanCache::removePlan(clAmdFftPlanHandle plHandle) +{ + PlanCache& pCache = *PlanCache::getPlanCache(); + vector& pStore = pCache.planStore; + for(size_t i = 0; i < pStore.size(); i ++) + { + if(pStore[i]->getPlanHandle() == plHandle) + { + pStore.erase(pStore.begin() + i); + delete pStore[i]; + return true; + } + } + return false; } void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) @@ -245,19 +291,20 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) // check if the given dft size is of optimal dft size CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area())); + // the two flags are not compatible + CV_Assert( !((flags & DFT_SCALE) && (flags & DFT_ROWS)) ); + // similar assertions with cuda module CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2); - // we don't support DFT_SCALE flag - CV_Assert(!(DFT_SCALE & flags)); - - bool is_1d_input = (src.rows == 1); - int is_row_dft = flags & DFT_ROWS; - int is_scaled_dft = flags & DFT_SCALE; + //bool is_1d_input = (src.rows == 1); + //int is_row_dft = flags & DFT_ROWS; + //int is_scaled_dft = flags & DFT_SCALE; int is_inverse = flags & DFT_INVERSE; bool is_complex_input = src.channels() == 2; bool is_complex_output = !(flags & DFT_REAL_OUTPUT); + // We don't support real-to-real transform CV_Assert(is_complex_input || is_complex_output); FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1); @@ -268,12 +315,10 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) dst.create(src.rows, src.cols, CV_32FC2); break; case R2C: - CV_Assert(!is_row_dft); // this is not supported yet dst.create(src.rows, src.cols / 2 + 1, CV_32FC2); break; case C2R: CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows); - CV_Assert(!is_row_dft); // this is not supported yet dst.create(src.rows, dft_size.width, CV_32FC1); break; default: @@ -282,13 +327,14 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) throw exception(); break; } - clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type); + clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, flags, type)->getPlanHandle(); //get the buffersize size_t buffersize = 0; openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) ); //allocate the intermediate buffer + // TODO, bind this with the current FftPlan cl_mem clMedBuffer = NULL; if (buffersize) { @@ -297,17 +343,17 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) openCLSafeCall( medstatus ); } openCLSafeCall( clAmdFftEnqueueTransform( plHandle, - is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, - 1, - &src.clCxt->impl->clCmdQueue, - 0, NULL, NULL, - (cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) ); + is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, + 1, + &src.clCxt->impl->clCmdQueue, + 0, NULL, NULL, + (cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) ); openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) ); if(clMedBuffer) { openCLFree(clMedBuffer); } + //fft_teardown(); } #endif -#endif //HAVE_CLAMDFFT diff --git a/modules/ocl/test/test_fft.cpp b/modules/ocl/test/test_fft.cpp index 13c71a8c32..0fee8b03d4 100644 --- a/modules/ocl/test/test_fft.cpp +++ b/modules/ocl/test/test_fft.cpp @@ -48,50 +48,59 @@ using namespace std; #ifdef HAVE_CLAMDFFT //////////////////////////////////////////////////////////////////////////// // Dft -PARAM_TEST_CASE(Dft, cv::Size, bool) +PARAM_TEST_CASE(Dft, cv::Size, int) { cv::Size dft_size; - bool dft_rows; - //std::vector oclinfo; + int dft_flags; virtual void SetUp() { - //int devnums = getDevice(oclinfo); - // CV_Assert(devnums > 0); - dft_size = GET_PARAM(0); - dft_rows = GET_PARAM(1); + dft_size = GET_PARAM(0); + dft_flags = GET_PARAM(1); } }; TEST_P(Dft, C2C) { - cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0); + cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 100.0); cv::Mat b_gold; - int flags = 0; - flags |= dft_rows ? cv::DFT_ROWS : 0; cv::ocl::oclMat d_b; - cv::dft(a, b_gold, flags); - cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); + cv::dft(a, b_gold, dft_flags); + cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags); EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, ""); } +TEST_P(Dft, R2C) +{ + cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 100.0); + cv::Mat b_gold, b_gold_roi; + + cv::ocl::oclMat d_b, d_c; + cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), dft_flags); + cv::dft(a, b_gold, cv::DFT_COMPLEX_OUTPUT | dft_flags); + + b_gold_roi = b_gold(cv::Rect(0, 0, d_b.cols, d_b.rows)); + EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4, ""); + + cv::Mat c_gold; + cv::dft(b_gold, c_gold, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE); + EXPECT_MAT_NEAR(b_gold_roi, cv::Mat(d_b), a.size().area() * 1e-4, ""); +} TEST_P(Dft, R2CthenC2R) { cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0); - int flags = 0; - //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet - cv::ocl::oclMat d_b, d_c; - cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); - cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT); + cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), 0); + cv::ocl::dft(d_b, d_c, a.size(), cv::DFT_SCALE | cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT); EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, ""); } -INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine( - testing::Values(cv::Size(5, 4), cv::Size(20, 20)), - testing::Values(false, true))); + +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine( + testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)), + testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) )); #endif // HAVE_CLAMDFFT