From e8d9ed89559ad33167d23e562b52d18bb0c9f817 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 20 Sep 2013 19:19:52 +0400 Subject: [PATCH 1/8] ocl: split initialization.cpp into 3 files: context, operations, programcache --- modules/nonfree/src/surf.ocl.cpp | 2 +- modules/ocl/include/opencv2/ocl/ocl.hpp | 148 ++- .../ocl/include/opencv2/ocl/private/util.hpp | 228 ++-- modules/ocl/perf/main.cpp | 28 +- modules/ocl/src/arithm.cpp | 122 +- modules/ocl/src/bgfg_mog.cpp | 4 +- modules/ocl/src/binarycaching.hpp | 55 +- modules/ocl/src/brute_force_matcher.cpp | 8 +- modules/ocl/src/canny.cpp | 8 +- modules/ocl/src/cl_context.cpp | 507 ++++++++ modules/ocl/src/cl_operations.cpp | 434 +++++++ modules/ocl/src/cl_programcache.cpp | 311 +++++ modules/ocl/src/error.cpp | 16 +- modules/ocl/src/fft.cpp | 44 +- modules/ocl/src/filtering.cpp | 2 +- modules/ocl/src/gemm.cpp | 2 +- modules/ocl/src/gftt.cpp | 2 +- modules/ocl/src/haar.cpp | 14 +- modules/ocl/src/hog.cpp | 14 +- modules/ocl/src/imgproc.cpp | 38 +- modules/ocl/src/initialization.cpp | 1090 ----------------- modules/ocl/src/knearest.cpp | 14 +- modules/ocl/src/matrix_operations.cpp | 13 +- modules/ocl/src/mcwutil.cpp | 24 +- modules/ocl/src/moments.cpp | 4 +- modules/ocl/src/pyrdown.cpp | 4 +- modules/ocl/src/pyrlk.cpp | 4 +- modules/ocl/src/pyrup.cpp | 4 +- modules/ocl/src/split_merge.cpp | 4 +- modules/ocl/src/stereo_csbp.cpp | 36 +- modules/ocl/src/stereobp.cpp | 5 +- modules/ocl/src/tvl1flow.cpp | 2 +- modules/ocl/test/main.cpp | 35 +- modules/superres/perf/perf_superres_ocl.cpp | 3 - modules/superres/src/btv_l1_ocl.cpp | 2 +- modules/superres/test/test_superres.cpp | 2 - 36 files changed, 1699 insertions(+), 1534 deletions(-) create mode 100644 modules/ocl/src/cl_context.cpp create mode 100644 modules/ocl/src/cl_operations.cpp create mode 100644 modules/ocl/src/cl_programcache.cpp delete mode 100644 modules/ocl/src/initialization.cpp diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp index f8c1ad7294..59eab705d6 100644 --- a/modules/nonfree/src/surf.ocl.cpp +++ b/modules/nonfree/src/surf.ocl.cpp @@ -74,7 +74,7 @@ namespace cv } cl_kernel kernel; kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr); - size_t wave_size = queryDeviceInfo(kernel); + size_t wave_size = queryWaveFrontSize(kernel); CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS); sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast(wave_size)); openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr); diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index c296f57a3d..21bb607471 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -57,8 +57,7 @@ namespace cv { namespace ocl { - using std::auto_ptr; - enum + enum DeviceType { CVCL_DEVICE_TYPE_DEFAULT = (1 << 0), CVCL_DEVICE_TYPE_CPU = (1 << 1), @@ -93,77 +92,113 @@ namespace cv //return -1 if the target type is unsupported, otherwise return 0 CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT); - //this class contains ocl runtime information - class CV_EXPORTS Info + // these classes contain OpenCL runtime information + + struct PlatformInfo; + + struct DeviceInfo { - public: - struct Impl; - Impl *impl; + int _id; // reserved, don't use it - Info(); - Info(const Info &m); - ~Info(); - void release(); - Info &operator = (const Info &m); - std::vector DeviceName; + DeviceType deviceType; + std::string deviceProfile; + std::string deviceVersion; + std::string deviceName; + std::string deviceVendor; + int deviceVendorId; + std::string deviceDriverVersion; + std::string deviceExtensions; + + size_t maxWorkGroupSize; + std::vector maxWorkItemSizes; + int maxComputeUnits; + size_t localMemorySize; + + int deviceVersionMajor; + int deviceVersionMinor; + + bool haveDoubleSupport; + bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0 + + std::string compilationExtraOptions; + + const PlatformInfo* platform; + + DeviceInfo(); }; + + struct PlatformInfo + { + int _id; // reserved, don't use it + + std::string platformProfile; + std::string platformVersion; + std::string platformName; + std::string platformVendor; + std::string platformExtensons; + + int platformVersionMajor; + int platformVersionMinor; + + std::vector devices; + + PlatformInfo(); + }; + //////////////////////////////// Initialization & Info //////////////////////// - //this function may be obsoleted - //CV_EXPORTS cl_device_id getDevice(); - //the function must be called before any other cv::ocl::functions, it initialize ocl runtime - //each Info relates to an OpenCL platform - //there is one or more devices in each platform, each one has a separate name - CV_EXPORTS int getDevice(std::vector &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU); + typedef std::vector PlatformsInfo; - //set device you want to use, optional function after getDevice be called - //the devnum is the index of the selected device in DeviceName vector of INfo - CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0); + CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms); - //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue - //returns cl_context * - CV_EXPORTS void* getoclContext(); - //returns cl_command_queue * - CV_EXPORTS void* getoclCommandQueue(); + typedef std::vector DevicesInfo; - //explicit call clFinish. The global command queue will be used. - CV_EXPORTS void finish(); + CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU, + const PlatformInfo* platform = NULL); - //this function enable ocl module to use customized cl_context and cl_command_queue - //getDevice also need to be called before this function - CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0); - - //returns true when global OpenCL context is initialized - CV_EXPORTS bool initialized(); + // set device you want to use + CV_EXPORTS void setDevice(const DeviceInfo* info); //////////////////////////////// Error handling //////////////////////// CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func); - //////////////////////////////// OpenCL context //////////////////////// - //This is a global singleton class used to represent a OpenCL context. + enum FEATURE_TYPE + { + FEATURE_CL_DOUBLE = 1, + FEATURE_CL_UNIFIED_MEM, + FEATURE_CL_VER_1_2 + }; + + // Represents OpenCL context, interface class CV_EXPORTS Context { protected: - Context(); - friend class auto_ptr; - friend bool initialized(); - private: - static auto_ptr clCxt; - static int val; + Context() { } + ~Context() { } public: - ~Context(); - void release(); - Info::Impl* impl; - static Context* getContext(); - static void setContext(Info &oclinfo); - enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2}; - bool supportsFeature(int ftype) const; - size_t computeUnits() const; - void* oclContext(); - void* oclCommandQueue(); + bool supportsFeature(FEATURE_TYPE featureType) const; + const DeviceInfo& getDeviceInfo() const; + + const void* getOpenCLContextPtr() const; + const void* getOpenCLCommandQueuePtr() const; + const void* getOpenCLDeviceIDPtr() const; }; + inline const void *getClContextPtr() + { + return Context::getContext()->getOpenCLContextPtr(); + } + + inline const void *getClCommandQueuePtr() + { + return Context::getContext()->getOpenCLCommandQueuePtr(); + } + + bool CV_EXPORTS supportsFeature(FEATURE_TYPE featureType); + + void CV_EXPORTS finish(); + //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt , const char **source, string kernelName, @@ -384,7 +419,7 @@ namespace cv uchar *dataend; //! OpenCL context associated with the oclMat object. - Context *clCxt; + Context *clCxt; // TODO clCtx //add offset for handle ROI, calculated in byte int offset; //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used @@ -1879,11 +1914,6 @@ namespace cv oclMat temp5; }; - static inline size_t divUp(size_t total, size_t grain) - { - return (total + grain - 1) / grain; - } - /*!***************K Nearest Neighbour*************!*/ class CV_EXPORTS KNearestNeighbour: public CvKNearest { diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp index 3de0d438d4..2aba472f66 100644 --- a/modules/ocl/include/opencv2/ocl/private/util.hpp +++ b/modules/ocl/include/opencv2/ocl/private/util.hpp @@ -52,120 +52,138 @@ namespace cv { - namespace ocl +namespace ocl +{ + +inline cl_device_id getClDeviceID(const Context *ctx) +{ + return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr()); +} + +inline cl_context getClContext(const Context *ctx) +{ + return *(cl_context*)(ctx->getOpenCLContextPtr()); +} + +inline cl_command_queue getClCommandQueue(const Context *ctx) +{ + return *(cl_command_queue*)(ctx->getOpenCLCommandQueuePtr()); +} + +enum openCLMemcpyKind +{ + clMemcpyHostToDevice = 0, + clMemcpyDeviceToHost, + clMemcpyDeviceToDevice +}; +///////////////////////////OpenCL call wrappers//////////////////////////// +void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch, + size_t widthInBytes, size_t height); +void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch, + size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type); +void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, + const void *src, size_t spitch, + size_t width, size_t height, openCLMemcpyKind kind, int channels = -1); +void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset, + const void *src, size_t spitch, + size_t width, size_t height, int src_offset); +void CV_EXPORTS openCLFree(void *devPtr); +cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size); +void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size); +cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, + const char **source, std::string kernelName); +cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, + const char **source, std::string kernelName, const char *build_options); +void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads); +void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair > &args, + int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1); +void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName, + size_t globalThreads[3], size_t localThreads[3], + std::vector< std::pair > &args, int channels, int depth, const char *build_options); +void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth); +void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, + int depth, const char *build_options); + +cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value, + const size_t size); + +cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr); + +int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName); + +enum FLUSH_MODE +{ + CLFINISH = 0, + CLFLUSH, + DISABLE +}; + +void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); +void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, + int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); +// bind oclMat to OpenCL image textures +// note: +// 1. there is no memory management. User need to explicitly release the resource +// 2. for faster clamping, there is no buffer padding for the constructed texture +cl_mem CV_EXPORTS bindTexture(const oclMat &mat); +void CV_EXPORTS releaseTexture(cl_mem& texture); + +//Represents an image texture object +class CV_EXPORTS TextureCL +{ +public: + TextureCL(cl_mem tex, int r, int c, int t) + : tex_(tex), rows(r), cols(c), type(t) {} + ~TextureCL() { - enum openCLMemcpyKind - { - clMemcpyHostToDevice = 0, - clMemcpyDeviceToHost, - clMemcpyDeviceToDevice - }; - ///////////////////////////OpenCL call wrappers//////////////////////////// - void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch, - size_t widthInBytes, size_t height); - void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch, - size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type); - void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, - const void *src, size_t spitch, - size_t width, size_t height, openCLMemcpyKind kind, int channels = -1); - void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset, - const void *src, size_t spitch, - size_t width, size_t height, int src_offset); - void CV_EXPORTS openCLFree(void *devPtr); - cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size); - void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size); - cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, - const char **source, std::string kernelName); - cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, - const char **source, std::string kernelName, const char *build_options); - void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads); - void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair > &args, - int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1); - void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName, - size_t globalThreads[3], size_t localThreads[3], - std::vector< std::pair > &args, int channels, int depth, const char *build_options); - void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth); - void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, - int depth, const char *build_options); + openCLFree(tex_); + } + operator cl_mem() + { + return tex_; + } + cl_mem const tex_; + const int rows; + const int cols; + const int type; +private: + //disable assignment + void operator=(const TextureCL&); +}; +// bind oclMat to OpenCL image textures and retunrs an TextureCL object +// note: +// for faster clamping, there is no buffer padding for the constructed texture +Ptr CV_EXPORTS bindTexturePtr(const oclMat &mat); - cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value, - const size_t size); +// returns whether the current context supports image2d_t format or not +bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext()); - cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr); +bool CV_EXPORTS isCpuDevice(); - int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName); +size_t CV_EXPORTS queryWaveFrontSize(cl_kernel kernel); - enum FLUSH_MODE - { - CLFINISH = 0, - CLFLUSH, - DISABLE - }; - void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); - void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, - int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); - // bind oclMat to OpenCL image textures - // note: - // 1. there is no memory management. User need to explicitly release the resource - // 2. for faster clamping, there is no buffer padding for the constructed texture - cl_mem CV_EXPORTS bindTexture(const oclMat &mat); - void CV_EXPORTS releaseTexture(cl_mem& texture); - //Represents an image texture object - class CV_EXPORTS TextureCL - { - public: - TextureCL(cl_mem tex, int r, int c, int t) - : tex_(tex), rows(r), cols(c), type(t) {} - ~TextureCL() - { - openCLFree(tex_); - } - operator cl_mem() - { - return tex_; - } - cl_mem const tex_; - const int rows; - const int cols; - const int type; - private: - //disable assignment - void operator=(const TextureCL&); - }; - // bind oclMat to OpenCL image textures and retunrs an TextureCL object - // note: - // for faster clamping, there is no buffer padding for the constructed texture - Ptr CV_EXPORTS bindTexturePtr(const oclMat &mat); +inline size_t divUp(size_t total, size_t grain) +{ + return (total + grain - 1) / grain; +} - // returns whether the current context supports image2d_t format or not - bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext()); - - // the enums are used to query device information - // currently only support wavefront size queries - enum DEVICE_INFO - { - WAVEFRONT_SIZE, //in AMD speak - IS_CPU_DEVICE //check if the device is CPU - }; - template - _ty queryDeviceInfo(cl_kernel kernel = NULL); - - template<> - int CV_EXPORTS queryDeviceInfo(cl_kernel kernel); - template<> - size_t CV_EXPORTS queryDeviceInfo(cl_kernel kernel); - template<> - bool CV_EXPORTS queryDeviceInfo(cl_kernel kernel); - - unsigned long CV_EXPORTS queryLocalMemInfo(); - }//namespace ocl +inline size_t roundUp(size_t sz, size_t n) +{ + // we don't assume that n is a power of 2 (see alignSize) + // equal to divUp(sz, n) * n + size_t t = sz + n - 1; + size_t rem = t % n; + size_t result = t - rem; + return result; +} +}//namespace ocl }//namespace cv #endif //__OPENCV_OCL_PRIVATE_UTIL__ diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index e24c2c14e5..e82af4e322 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -56,7 +56,7 @@ int main(int argc, char ** argv) const char * keys = "{ h | help | false | print help message }" "{ t | type | gpu | set device type:cpu or gpu}" - "{ p | platform | 0 | set platform id }" + "{ p | platform | -1 | set platform id }" "{ d | device | 0 | set device id }"; CommandLineParser cmd(argc, argv, keys); @@ -68,28 +68,34 @@ int main(int argc, char ** argv) } string type = cmd.get("type"); - unsigned int pid = cmd.get("platform"); + int pid = cmd.get("platform"); int device = cmd.get("device"); int flag = type == "cpu" ? cv::ocl::CVCL_DEVICE_TYPE_CPU : cv::ocl::CVCL_DEVICE_TYPE_GPU; - std::vector oclinfo; - int devnums = cv::ocl::getDevice(oclinfo, flag); - if (devnums <= device || device < 0) + cv::ocl::PlatformsInfo platformsInfo; + cv::ocl::getOpenCLPlatforms(platformsInfo); + if (pid >= (int)platformsInfo.size()) { - std::cout << "device invalid\n"; - return -1; + std::cout << "platform is invalid\n"; + return 1; } - if (pid >= oclinfo.size()) + cv::ocl::DevicesInfo devicesInfo; + int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); + if (device < 0 || device >= devnums) { - std::cout << "platform invalid\n"; - return -1; + std::cout << "device/platform invalid\n"; + return 1; } - cv::ocl::setDevice(oclinfo[pid], device); + cv::ocl::setDevice(devicesInfo[device]); cv::ocl::setBinaryDiskCache(cv::ocl::CACHE_UPDATE); + cout << "Device type:" << type << endl + << "Platform name:" << devicesInfo[device]->platform->platformName << endl + << "Device name:" << devicesInfo[device]->deviceName << endl; + CV_PERF_TEST_MAIN_INTERNALS(ocl, impls) } diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 2a663b990a..7d97e67545 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -51,50 +51,10 @@ //M*/ #include "precomp.hpp" -#include - +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ - namespace ocl - { - //////////////////////////////// OpenCL kernel strings ///////////////////// - - extern const char *arithm_absdiff_nonsaturate; - extern const char *arithm_nonzero; - extern const char *arithm_sum; - extern const char *arithm_minMax; - extern const char *arithm_minMaxLoc; - extern const char *arithm_minMaxLoc_mask; - extern const char *arithm_LUT; - extern const char *arithm_add; - extern const char *arithm_add_mask; - extern const char *arithm_add_scalar; - extern const char *arithm_add_scalar_mask; - extern const char *arithm_bitwise_binary; - extern const char *arithm_bitwise_binary_mask; - extern const char *arithm_bitwise_binary_scalar; - extern const char *arithm_bitwise_binary_scalar_mask; - extern const char *arithm_bitwise_not; - extern const char *arithm_compare; - extern const char *arithm_transpose; - extern const char *arithm_flip; - extern const char *arithm_flip_rc; - extern const char *arithm_magnitude; - extern const char *arithm_cartToPolar; - extern const char *arithm_polarToCart; - extern const char *arithm_exp; - extern const char *arithm_log; - extern const char *arithm_addWeighted; - extern const char *arithm_phase; - extern const char *arithm_pow; - extern const char *arithm_setidentity; - } -} ////////////////////////////////////////////////////////////////////////////// /////////////////////// add subtract multiply divide ///////////////////////// @@ -106,7 +66,7 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const oclMat &dst, int op_type, bool use_scalar = false) { Context *clCxt = src1.clCxt; - bool hasDouble = clCxt->supportsFeature(Context::CL_DOUBLE); + bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE); if (!hasDouble && (src1.depth() == CV_64F || src2.depth() == CV_64F || dst.depth() == CV_64F)) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); @@ -264,7 +224,7 @@ void cv::ocl::absdiff(const oclMat &src1, const Scalar &src2, oclMat &dst) ////////////////////////////////////////////////////////////////////////////// static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpOp, - string kernelName, const char **kernelString) + string kernelName, const cv::ocl::ProgramEntry* source) { CV_Assert(src1.type() == src2.type()); dst.create(src1.size(), CV_8UC1); @@ -295,13 +255,13 @@ static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str()); } void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int cmpOp) { - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -358,7 +318,7 @@ Scalar arithmetic_sum(const oclMat &src, int type, int ddepth) { CV_Assert(src.step % src.elemSize() == 0); - size_t groupnum = src.clCxt->computeUnits(); + size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits; CV_Assert(groupnum != 0); int dbsize = groupnum * src.oclchannels(); @@ -385,7 +345,7 @@ typedef Scalar (*sumFunc)(const oclMat &src, int type, int ddepth); Scalar cv::ocl::sum(const oclMat &src) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } @@ -396,7 +356,7 @@ Scalar cv::ocl::sum(const oclMat &src) arithmetic_sum }; - bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); + bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE); int ddepth = std::max(src.depth(), CV_32S); if (!hasDouble && ddepth == CV_64F) ddepth = CV_32F; @@ -407,7 +367,7 @@ Scalar cv::ocl::sum(const oclMat &src) Scalar cv::ocl::absSum(const oclMat &src) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } @@ -418,7 +378,7 @@ Scalar cv::ocl::absSum(const oclMat &src) arithmetic_sum }; - bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); + bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE); int ddepth = std::max(src.depth(), CV_32S); if (!hasDouble && ddepth == CV_64F) ddepth = CV_32F; @@ -429,7 +389,7 @@ Scalar cv::ocl::absSum(const oclMat &src) Scalar cv::ocl::sqrSum(const oclMat &src) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } @@ -440,7 +400,7 @@ Scalar cv::ocl::sqrSum(const oclMat &src) arithmetic_sum }; - bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE); + bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE); int ddepth = src.depth() <= CV_32S ? CV_32S : (hasDouble ? CV_64F : CV_32F); sumFunc func = functab[ddepth - CV_32S]; @@ -524,7 +484,7 @@ template void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf) { - size_t groupnum = src.clCxt->computeUnits(); + size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits; CV_Assert(groupnum != 0); int dbsize = groupnum * 2 * src.elemSize(); @@ -566,7 +526,7 @@ void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, cons if (minVal == NULL && maxVal == NULL) return; - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } @@ -699,7 +659,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -746,7 +706,7 @@ static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kern static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -792,9 +752,9 @@ static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kern args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); - const char **kernelString = isVertical ? &arithm_flip_rc : &arithm_flip; + const cv::ocl::ProgramEntry* source = isVertical ? &arithm_flip_rc : &arithm_flip; - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth); + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth); } void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode) @@ -860,10 +820,10 @@ void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst) //////////////////////////////// exp log ///////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString) +static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source) { Context *clCxt = src.clCxt; - if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -893,7 +853,7 @@ static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernel args.push_back( make_pair( sizeof(cl_int), (void *)&srcstep1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, src.oclchannels(), -1, buildOptions.c_str()); } @@ -913,7 +873,7 @@ void cv::ocl::log(const oclMat &src, oclMat &dst) static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName) { - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -955,9 +915,9 @@ void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst) arithmetic_magnitude_phase_run(src1, src2, dst, "arithm_magnitude"); } -static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) +static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source) { - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -985,7 +945,7 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); } void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees) @@ -1004,7 +964,7 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleI static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart, string kernelName, bool angleInDegrees) { - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -1057,7 +1017,7 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees, string kernelName) { - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -1176,7 +1136,7 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal, Point *minLoc, Point *maxLoc, const oclMat &mask) { CV_Assert(src.oclchannels() == 1); - size_t groupnum = src.clCxt->computeUnits(); + size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits; CV_Assert(groupnum != 0); int minloc = -1 , maxloc = -1; int vlen = 4, dbsize = groupnum * vlen * 4 * sizeof(T) ; @@ -1238,7 +1198,7 @@ typedef void (*minMaxLocFunc)(const oclMat &src, double *minVal, double *maxVal, void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal, Point *minLoc, Point *maxLoc, const oclMat &mask) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); return; @@ -1251,7 +1211,7 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal, }; minMaxLocFunc func; - func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)]; + func = functab[(int)src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)]; func(src, minVal, maxVal, minLoc, maxLoc, mask); } @@ -1296,7 +1256,7 @@ int cv::ocl::countNonZero(const oclMat &src) CV_Assert(src.channels() == 1); Context *clCxt = src.clCxt; - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "selected device doesn't support double"); } @@ -1327,7 +1287,7 @@ int cv::ocl::countNonZero(const oclMat &src) ////////////////////////////////bitwise_op//////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString) +static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source) { dst.create(src1.size(), src1.type()); @@ -1361,7 +1321,7 @@ static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); } enum { AND = 0, OR, XOR }; @@ -1370,7 +1330,7 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca oclMat &dst, int operationType) { Context *clCxt = src1.clCxt; - if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F) + if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F) { cout << "Selected device does not support double" << endl; return; @@ -1442,7 +1402,7 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F) { cout << "Selected device does not support double" << endl; return; @@ -1571,7 +1531,7 @@ oclMatExpr::operator oclMat() const static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false) { Context *clCxt = src.clCxt; - if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; @@ -1623,7 +1583,7 @@ void cv::ocl::transpose(const oclMat &src, oclMat &dst) void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, double beta, double gama, oclMat &dst) { Context *clCxt = src1.clCxt; - bool hasDouble = clCxt->supportsFeature(Context::CL_DOUBLE); + bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE); if (!hasDouble && src1.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); @@ -1688,7 +1648,7 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, /////////////////////////////////// Pow ////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const char **kernelString) +static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source) { CV_Assert(src1.cols == dst.cols && src1.rows == dst.rows); CV_Assert(src1.type() == dst.type()); @@ -1718,17 +1678,17 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); float pf = static_cast(p); - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE)) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) args.push_back( make_pair( sizeof(cl_float), (void *)&pf )); else args.push_back( make_pair( sizeof(cl_double), (void *)&p )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); } void cv::ocl::pow(const oclMat &x, double p, oclMat &y) { - if (!x.clCxt->supportsFeature(Context::CL_DOUBLE) && x.type() == CV_64F) + if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp index 3051ac82f3..cb0dee80f8 100644 --- a/modules/ocl/src/bgfg_mog.cpp +++ b/modules/ocl/src/bgfg_mog.cpp @@ -392,7 +392,7 @@ void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float var constants->c_tau = tau; constants->c_shadowVal = shadowVal; - cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()), + cl_constants = load_constant(*((cl_context*)getClContextPtr()), *((cl_command_queue*)getClCommandQueuePtr()), (void *)constants, sizeof(_contant_struct)); } @@ -635,4 +635,4 @@ void cv::ocl::MOG2::release() mean_.release(); bgmodelUsedModes_.release(); -} \ No newline at end of file +} diff --git a/modules/ocl/src/binarycaching.hpp b/modules/ocl/src/binarycaching.hpp index 0ec565f88b..cc9e71a330 100644 --- a/modules/ocl/src/binarycaching.hpp +++ b/modules/ocl/src/binarycaching.hpp @@ -50,41 +50,36 @@ using namespace std; using std::cout; using std::endl; -namespace cv +namespace cv { namespace ocl { + +class ProgramCache { - namespace ocl - { - class ProgramCache - { - protected: - ProgramCache(); - friend class auto_ptr; - static auto_ptr programCache; +protected: + ProgramCache(); + ~ProgramCache(); + friend class std::auto_ptr; +public: + static ProgramCache *getProgramCache(); - public: - ~ProgramCache(); - static ProgramCache *getProgramCache() - { - if( NULL == programCache.get()) - programCache.reset(new ProgramCache()); - return programCache.get(); - } + cl_program getProgram(const Context *ctx, const char **source, string kernelName, + const char *build_options); - //lookup the binary given the file name - cl_program progLookup(string srcsign); + void releaseProgram(); +protected: + //lookup the binary given the file name + cl_program progLookup(string srcsign); - //add program to the cache - void addProgram(string srcsign, cl_program program); - void releaseProgram(); + //add program to the cache + void addProgram(string srcsign, cl_program program); - map codeCache; - unsigned int cacheSize; - //The presumed watermark for the cache volume (256MB). Is it enough? - //We may need more delicate algorithms when necessary later. - //Right now, let's just leave it along. - static const unsigned MAX_PROG_CACHE_SIZE = 1024; - }; + map codeCache; + unsigned int cacheSize; - }//namespace ocl + //The presumed watermark for the cache volume (256MB). Is it enough? + //We may need more delicate algorithms when necessary later. + //Right now, let's just leave it along. + static const unsigned MAX_PROG_CACHE_SIZE = 1024; +}; +}//namespace ocl }//namespace cv diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp index 74da6ddd06..0273ed5891 100644 --- a/modules/ocl/src/brute_force_matcher.cpp +++ b/modules/ocl/src/brute_force_matcher.cpp @@ -245,7 +245,7 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, const oclM { const oclMat zeroMask; const oclMat &tempMask = mask.data ? mask : zeroMask; - bool is_cpu = queryDeviceInfo(); + bool is_cpu = isCpuDevice(); if (query.cols <= 64) { matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType); @@ -265,7 +265,7 @@ static void matchDispatcher(const oclMat &query, const oclMat *trains, int n, co { const oclMat zeroMask; const oclMat &tempMask = mask.data ? mask : zeroMask; - bool is_cpu = queryDeviceInfo(); + bool is_cpu = isCpuDevice(); if (query.cols <= 64) { matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType); @@ -286,7 +286,7 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, float maxD { const oclMat zeroMask; const oclMat &tempMask = mask.data ? mask : zeroMask; - bool is_cpu = queryDeviceInfo(); + bool is_cpu = isCpuDevice(); if (query.cols <= 64) { matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType); @@ -469,7 +469,7 @@ static void calcDistanceDispatcher(const oclMat &query, const oclMat &train, con static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &trainIdx, const oclMat &distance, int distType) { - bool is_cpu = queryDeviceInfo(); + bool is_cpu = isCpuDevice(); if (query.cols <= 64) { knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType); diff --git a/modules/ocl/src/canny.cpp b/modules/ocl/src/canny.cpp index 4c7b988f6f..a25c1973ef 100644 --- a/modules/ocl/src/canny.cpp +++ b/modules/ocl/src/canny.cpp @@ -98,7 +98,7 @@ void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size) { openCLFree(counter); } - counter = clCreateBuffer( *((cl_context*)getoclContext()), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err ); + counter = clCreateBuffer( *((cl_context*)getClContextPtr()), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err ); openCLSafeCall(err); } @@ -354,7 +354,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols) { unsigned int count; - openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL)); + openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL)); Context *clCxt = map.clCxt; string kernelName = "edgesHysteresisGlobal"; vector< pair > args; @@ -363,7 +363,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi int count_i[1] = {0}; while(count > 0) { - openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL)); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL)); args.clear(); size_t globalThreads[3] = {std::min(count, 65535u) * 128, divUp(count, 65535), 1}; @@ -378,7 +378,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); - openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL)); + openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL)); std::swap(st1, st2); } } diff --git a/modules/ocl/src/cl_context.cpp b/modules/ocl/src/cl_context.cpp new file mode 100644 index 0000000000..6413465f65 --- /dev/null +++ b/modules/ocl/src/cl_context.cpp @@ -0,0 +1,507 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Guoping Long, longguoping@gmail.com +// Niko Li, newlife20080214@gmail.com +// Yao Wang, bitwangyaoyao@gmail.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include +#include +#include "binarycaching.hpp" + +#undef __CL_ENABLE_EXCEPTIONS +#include + +namespace cv { namespace ocl { + +extern void fft_teardown(); +extern void clBlasTeardown(); + +struct PlatformInfoImpl +{ + cl_platform_id platform_id; + + std::vector deviceIDs; + + PlatformInfo info; + + PlatformInfoImpl() + : platform_id(NULL) + { + } +}; + +struct DeviceInfoImpl +{ + cl_platform_id platform_id; + cl_device_id device_id; + + DeviceInfo info; + + DeviceInfoImpl() + : platform_id(NULL), device_id(NULL) + { + } +}; + +static std::vector global_platforms; +static std::vector global_devices; + +static bool parseOpenCLVersion(const std::string& versionStr, int& major, int& minor) +{ + size_t p0 = versionStr.find(' '); + while (true) + { + if (p0 == std::string::npos) + break; + if (p0 + 1 >= versionStr.length()) + break; + char c = versionStr[p0 + 1]; + if (isdigit(c)) + break; + p0 = versionStr.find(' ', p0 + 1); + } + size_t p1 = versionStr.find('.', p0); + size_t p2 = versionStr.find(' ', p1); + if (p0 == std::string::npos || p1 == std::string::npos || p2 == std::string::npos) + { + major = 0; + minor = 0; + return false; + } + std::string majorStr = versionStr.substr(p0 + 1, p1 - p0 - 1); + std::string minorStr = versionStr.substr(p1 + 1, p2 - p1 - 1); + major = atoi(majorStr.c_str()); + minor = atoi(minorStr.c_str()); + return true; +} + +static int initializeOpenCLDevices() +{ + assert(global_devices.size() == 0); + + std::vector platforms; + try + { + openCLSafeCall(cl::Platform::get(&platforms)); + } + catch (cv::Exception& e) + { + return 0; // OpenCL not found + } + + global_platforms.resize(platforms.size()); + + for (size_t i = 0; i < platforms.size(); ++i) + { + PlatformInfoImpl& platformInfo = global_platforms[i]; + platformInfo.info._id = i; + + cl::Platform& platform = platforms[i]; + + platformInfo.platform_id = platform(); + openCLSafeCall(platform.getInfo(CL_PLATFORM_PROFILE, &platformInfo.info.platformProfile)); + openCLSafeCall(platform.getInfo(CL_PLATFORM_VERSION, &platformInfo.info.platformVersion)); + openCLSafeCall(platform.getInfo(CL_PLATFORM_NAME, &platformInfo.info.platformName)); + openCLSafeCall(platform.getInfo(CL_PLATFORM_VENDOR, &platformInfo.info.platformVendor)); + openCLSafeCall(platform.getInfo(CL_PLATFORM_EXTENSIONS, &platformInfo.info.platformExtensons)); + + parseOpenCLVersion(platformInfo.info.platformVersion, + platformInfo.info.platformVersionMajor, platformInfo.info.platformVersionMinor); + + std::vector devices; + cl_int status = platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); + if(status != CL_DEVICE_NOT_FOUND) + openCLVerifyCall(status); + + if(devices.size() > 0) + { + int baseIndx = global_devices.size(); + global_devices.resize(baseIndx + devices.size()); + platformInfo.deviceIDs.resize(devices.size()); + platformInfo.info.devices.resize(devices.size()); + + for(size_t j = 0; j < devices.size(); ++j) + { + cl::Device& device = devices[j]; + + DeviceInfoImpl& deviceInfo = global_devices[baseIndx + j]; + deviceInfo.info._id = baseIndx + j; + deviceInfo.platform_id = platform(); + deviceInfo.device_id = device(); + + deviceInfo.info.platform = &platformInfo.info; + platformInfo.deviceIDs[j] = deviceInfo.info._id; + + cl_device_type type = -1; + openCLSafeCall(device.getInfo(CL_DEVICE_TYPE, &type)); + deviceInfo.info.deviceType = DeviceType(type); + + openCLSafeCall(device.getInfo(CL_DEVICE_PROFILE, &deviceInfo.info.deviceProfile)); + openCLSafeCall(device.getInfo(CL_DEVICE_VERSION, &deviceInfo.info.deviceVersion)); + openCLSafeCall(device.getInfo(CL_DEVICE_NAME, &deviceInfo.info.deviceName)); + openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR, &deviceInfo.info.deviceVendor)); + cl_uint vendorID = -1; + openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR_ID, &vendorID)); + deviceInfo.info.deviceVendorId = vendorID; + openCLSafeCall(device.getInfo(CL_DRIVER_VERSION, &deviceInfo.info.deviceDriverVersion)); + openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions)); + + parseOpenCLVersion(deviceInfo.info.deviceVersion, + deviceInfo.info.deviceVersionMajor, deviceInfo.info.deviceVersionMinor); + + size_t maxWorkGroupSize = 0; + openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &maxWorkGroupSize)); + deviceInfo.info.maxWorkGroupSize = maxWorkGroupSize; + + cl_uint maxDimensions = 0; + openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &maxDimensions)); + std::vector maxWorkItemSizes(maxDimensions); + openCLSafeCall(clGetDeviceInfo(device(), CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions, + (void *)&maxWorkItemSizes[0], 0)); + deviceInfo.info.maxWorkItemSizes = maxWorkItemSizes; + + cl_uint maxComputeUnits = 0; + openCLSafeCall(device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &maxComputeUnits)); + deviceInfo.info.maxComputeUnits = maxComputeUnits; + + cl_ulong localMemorySize = 0; + openCLSafeCall(device.getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &localMemorySize)); + deviceInfo.info.localMemorySize = (size_t)localMemorySize; + + + cl_bool unifiedMemory = false; + openCLSafeCall(device.getInfo(CL_DEVICE_HOST_UNIFIED_MEMORY, &unifiedMemory)); + deviceInfo.info.isUnifiedMemory = unifiedMemory != 0; + + //initialize extra options for compilation. Currently only fp64 is included. + //Assume 4KB is enough to store all possible extensions. + openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions)); + + size_t fp64_khr = deviceInfo.info.deviceExtensions.find("cl_khr_fp64"); + if(fp64_khr != std::string::npos) + { + deviceInfo.info.compilationExtraOptions += "-D DOUBLE_SUPPORT"; + deviceInfo.info.haveDoubleSupport = true; + } + else + { + deviceInfo.info.haveDoubleSupport = false; + } + } + } + } + + for (size_t i = 0; i < platforms.size(); ++i) + { + PlatformInfoImpl& platformInfo = global_platforms[i]; + for(size_t j = 0; j < platformInfo.deviceIDs.size(); ++j) + { + DeviceInfoImpl& deviceInfo = global_devices[platformInfo.deviceIDs[j]]; + platformInfo.info.devices[j] = &deviceInfo.info; + } + } + + return global_devices.size(); +} + + +DeviceInfo::DeviceInfo() + : _id(-1), deviceType(DeviceType(0)), + deviceVendorId(-1), + maxWorkGroupSize(0), maxComputeUnits(0), localMemorySize(0), + deviceVersionMajor(0), deviceVersionMinor(0), + haveDoubleSupport(false), isUnifiedMemory(false), + platform(NULL) +{ + // nothing +} + +PlatformInfo::PlatformInfo() + : _id(-1), + platformVersionMajor(0), platformVersionMinor(0) +{ + // nothing +} + +//////////////////////////////// OpenCL context //////////////////////// +//This is a global singleton class used to represent a OpenCL context. +class ContextImpl : public Context +{ +public: + const cl_device_id clDeviceID; + cl_context clContext; + cl_command_queue clCmdQueue; + const DeviceInfo& deviceInfo; + +protected: + ContextImpl(const DeviceInfo& deviceInfo, cl_device_id clDeviceID) + : clDeviceID(clDeviceID), clContext(NULL), clCmdQueue(NULL), deviceInfo(deviceInfo) + { + // nothing + } + ~ContextImpl(); +public: + + static ContextImpl* getContext(); + static void setContext(const DeviceInfo* deviceInfo); + + bool supportsFeature(FEATURE_TYPE featureType) const; + + static void cleanupContext(void); +}; + +static cv::Mutex currentContextMutex; +static ContextImpl* currentContext = NULL; + +Context* Context::getContext() +{ + return currentContext; +} + +bool Context::supportsFeature(FEATURE_TYPE featureType) const +{ + return ((ContextImpl*)this)->supportsFeature(featureType); +} + +const DeviceInfo& Context::getDeviceInfo() const +{ + return ((ContextImpl*)this)->deviceInfo; +} + +const void* Context::getOpenCLContextPtr() const +{ + return &(((ContextImpl*)this)->clContext); +} + +const void* Context::getOpenCLCommandQueuePtr() const +{ + return &(((ContextImpl*)this)->clCmdQueue); +} + +const void* Context::getOpenCLDeviceIDPtr() const +{ + return &(((ContextImpl*)this)->clDeviceID); +} + + +bool ContextImpl::supportsFeature(FEATURE_TYPE featureType) const +{ + switch (featureType) + { + case FEATURE_CL_DOUBLE: + return deviceInfo.haveDoubleSupport; + case FEATURE_CL_UNIFIED_MEM: + return deviceInfo.isUnifiedMemory; + case FEATURE_CL_VER_1_2: + return deviceInfo.deviceVersionMajor > 1 || (deviceInfo.deviceVersionMajor == 1 && deviceInfo.deviceVersionMinor >= 2); + } + CV_Error(CV_StsBadArg, "Invalid feature type"); + return false; +} + +#if defined(WIN32) +static bool __termination = false; +#endif + +ContextImpl::~ContextImpl() +{ + fft_teardown(); + clBlasTeardown(); + +#ifdef WIN32 + // if process is on termination stage (ExitProcess was called and other threads were terminated) + // then disable command queue release because it may cause program hang + if (!__termination) +#endif + { + if(clCmdQueue) + { + openCLSafeCall(clReleaseCommandQueue(clCmdQueue)); // some cleanup problems are here + } + + if(clContext) + { + openCLSafeCall(clReleaseContext(clContext)); + } + } + clCmdQueue = NULL; + clContext = NULL; +} + +void ContextImpl::cleanupContext(void) +{ + cv::AutoLock lock(currentContextMutex); + if (currentContext) + delete currentContext; + currentContext = NULL; +} + +void ContextImpl::setContext(const DeviceInfo* deviceInfo) +{ + CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size()); + + DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id]; + CV_Assert(deviceInfo == &infoImpl.info); + + cl_int status = 0; + cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(infoImpl.platform_id), 0 }; + cl_context clContext = clCreateContext(cps, 1, &infoImpl.device_id, NULL, NULL, &status); + openCLVerifyCall(status); + // TODO add CL_QUEUE_PROFILING_ENABLE + cl_command_queue clCmdQueue = clCreateCommandQueue(clContext, infoImpl.device_id, 0, &status); + openCLVerifyCall(status); + + ContextImpl* ctx = new ContextImpl(infoImpl.info, infoImpl.device_id); + ctx->clCmdQueue = clCmdQueue; + ctx->clContext = clContext; + + ContextImpl* old = NULL; + { + cv::AutoLock lock(currentContextMutex); + old = currentContext; + currentContext = ctx; + } + if (old != NULL) + { + delete old; + } +} + +ContextImpl* ContextImpl::getContext() +{ + return currentContext; +} + +int getOpenCLPlatforms(PlatformsInfo& platforms) +{ + platforms.clear(); + + for (size_t id = 0; id < global_platforms.size(); ++id) + { + PlatformInfoImpl& impl = global_platforms[id]; + platforms.push_back(&impl.info); + } + + return platforms.size(); +} + +int getOpenCLDevices(std::vector &devices, int deviceType, const PlatformInfo* platform) +{ + devices.clear(); + + switch(deviceType) + { + case CVCL_DEVICE_TYPE_DEFAULT: + case CVCL_DEVICE_TYPE_CPU: + case CVCL_DEVICE_TYPE_GPU: + case CVCL_DEVICE_TYPE_ACCELERATOR: + case CVCL_DEVICE_TYPE_ALL: + break; + default: + return 0; + } + + if (platform == NULL) + { + for (size_t id = 0; id < global_devices.size(); ++id) + { + DeviceInfoImpl& deviceInfo = global_devices[id]; + if (((int)deviceInfo.info.deviceType & deviceType) == deviceType) + { + devices.push_back(&deviceInfo.info); + } + } + } + else + { + for (size_t id = 0; id < platform->devices.size(); ++id) + { + const DeviceInfo* deviceInfo = platform->devices[id]; + if (((int)deviceInfo->deviceType & deviceType) == deviceType) + { + devices.push_back(deviceInfo); + } + } + } + + return (int)devices.size(); +} + +void setDevice(const DeviceInfo* info) +{ + ContextImpl::setContext(info); +} + +bool supportsFeature(FEATURE_TYPE featureType) +{ + return Context::getContext()->supportsFeature(featureType); +} + +struct __Module +{ + __Module() { initializeOpenCLDevices(); } + ~__Module() { ContextImpl::cleanupContext(); } +}; +static __Module __module; + + +}//namespace ocl +}//namespace cv + + +#if defined(WIN32) && defined(CVAPI_EXPORTS) + +extern "C" +BOOL WINAPI DllMain(HINSTANCE /*hInst*/, DWORD fdwReason, LPVOID lpReserved) +{ + if (fdwReason == DLL_PROCESS_DETACH) + { + if (lpReserved != NULL) // called after ExitProcess() call + cv::ocl::__termination = true; + } + return TRUE; +} + +#endif diff --git a/modules/ocl/src/cl_operations.cpp b/modules/ocl/src/cl_operations.cpp new file mode 100644 index 0000000000..42138adbe0 --- /dev/null +++ b/modules/ocl/src/cl_operations.cpp @@ -0,0 +1,434 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Guoping Long, longguoping@gmail.com +// Niko Li, newlife20080214@gmail.com +// Yao Wang, bitwangyaoyao@gmail.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include +#include +#include "binarycaching.hpp" + +#undef __CL_ENABLE_EXCEPTIONS +#include + +//#define PRINT_KERNEL_RUN_TIME +#define RUN_TIMES 100 +#ifndef CL_MEM_USE_PERSISTENT_MEM_AMD +#define CL_MEM_USE_PERSISTENT_MEM_AMD 0 +#endif +//#define AMD_DOUBLE_DIFFER + +namespace cv { namespace ocl { + +DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT; +DevMemRW gDeviceMemRW = DEVICE_MEM_R_W; +int gDevMemTypeValueMap[5] = {0, + CL_MEM_ALLOC_HOST_PTR, + CL_MEM_USE_HOST_PTR, + CL_MEM_COPY_HOST_PTR, + CL_MEM_USE_PERSISTENT_MEM_AMD}; +int gDevMemRWValueMap[3] = {CL_MEM_READ_WRITE, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY}; + +void finish() +{ + clFinish(getClCommandQueue(Context::getContext())); +} + +bool isCpuDevice() +{ + const DeviceInfo& info = Context::getContext()->getDeviceInfo(); + return (info.deviceType == CVCL_DEVICE_TYPE_CPU); +} + +size_t queryWaveFrontSize(cl_kernel kernel) +{ + const DeviceInfo& info = Context::getContext()->getDeviceInfo(); + if (info.deviceType == CVCL_DEVICE_TYPE_CPU) + return 1; + size_t wavefront = 0; + CV_Assert(kernel != NULL); + openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(Context::getContext()), + CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &wavefront, NULL)); + return wavefront; +} + + +void openCLReadBuffer(Context *ctx, cl_mem dst_buffer, void *host_buffer, size_t size) +{ + cl_int status; + status = clEnqueueReadBuffer(getClCommandQueue(ctx), dst_buffer, CL_TRUE, 0, + size, host_buffer, 0, NULL, NULL); + openCLVerifyCall(status); +} + +cl_mem openCLCreateBuffer(Context *ctx, size_t flag , size_t size) +{ + cl_int status; + cl_mem buffer = clCreateBuffer(getClContext(ctx), (cl_mem_flags)flag, size, NULL, &status); + openCLVerifyCall(status); + return buffer; +} + +void openCLMallocPitch(Context *ctx, void **dev_ptr, size_t *pitch, + size_t widthInBytes, size_t height) +{ + openCLMallocPitchEx(ctx, dev_ptr, pitch, widthInBytes, height, gDeviceMemRW, gDeviceMemType); +} + +void openCLMallocPitchEx(Context *ctx, void **dev_ptr, size_t *pitch, + size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type) +{ + cl_int status; + *dev_ptr = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type], + widthInBytes * height, 0, &status); + openCLVerifyCall(status); + *pitch = widthInBytes; +} + +void openCLMemcpy2D(Context *ctx, void *dst, size_t dpitch, + const void *src, size_t spitch, + size_t width, size_t height, openCLMemcpyKind kind, int channels) +{ + size_t buffer_origin[3] = {0, 0, 0}; + size_t host_origin[3] = {0, 0, 0}; + size_t region[3] = {width, height, 1}; + if(kind == clMemcpyHostToDevice) + { + if(dpitch == width || channels == 3 || height == 1) + { + openCLSafeCall(clEnqueueWriteBuffer(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE, + 0, width * height, src, 0, NULL, NULL)); + } + else + { + openCLSafeCall(clEnqueueWriteBufferRect(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE, + buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0)); + } + } + else if(kind == clMemcpyDeviceToHost) + { + if(spitch == width || channels == 3 || height == 1) + { + openCLSafeCall(clEnqueueReadBuffer(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE, + 0, width * height, dst, 0, NULL, NULL)); + } + else + { + openCLSafeCall(clEnqueueReadBufferRect(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE, + buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0)); + } + } +} + +void openCLCopyBuffer2D(Context *ctx, void *dst, size_t dpitch, int dst_offset, + const void *src, size_t spitch, + size_t width, size_t height, int src_offset) +{ + size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0}; + size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0}; + size_t region[3] = {width, height, 1}; + + openCLSafeCall(clEnqueueCopyBufferRect(getClCommandQueue(ctx), (cl_mem)src, (cl_mem)dst, src_origin, dst_origin, + region, spitch, 0, dpitch, 0, 0, 0, 0)); +} + +void openCLFree(void *devPtr) +{ + openCLSafeCall(clReleaseMemObject((cl_mem)devPtr)); +} + +cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName) +{ + return openCLGetKernelFromSource(ctx, source, kernelName, NULL); +} + +cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName, + const char *build_options) +{ + cl_kernel kernel; + cl_int status = 0; + CV_Assert(ProgramCache::getProgramCache() != NULL); + cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, kernelName, build_options); + CV_Assert(program != NULL); + kernel = clCreateKernel(program, kernelName.c_str(), &status); + openCLVerifyCall(status); + return kernel; +} + +void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThreads) +{ + size_t kernelWorkGroupSize; + openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(ctx), + CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0)); + CV_Assert( localThreads[0] <= ctx->getDeviceInfo().maxWorkItemSizes[0] ); + CV_Assert( localThreads[1] <= ctx->getDeviceInfo().maxWorkItemSizes[1] ); + CV_Assert( localThreads[2] <= ctx->getDeviceInfo().maxWorkItemSizes[2] ); + CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= kernelWorkGroupSize ); + CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= ctx->getDeviceInfo().maxWorkGroupSize ); +} + +#ifdef PRINT_KERNEL_RUN_TIME +static double total_execute_time = 0; +static double total_kernel_time = 0; +#endif +void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName, size_t globalThreads[3], + size_t localThreads[3], vector< pair > &args, int channels, + int depth, const char *build_options) +{ + //construct kernel name + //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number + //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char) + stringstream idxStr; + if(channels != -1) + idxStr << "_C" << channels; + if(depth != -1) + idxStr << "_D" << depth; + kernelName += idxStr.str(); + + cl_kernel kernel; + kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options); + + if ( localThreads != NULL) + { + globalThreads[0] = roundUp(globalThreads[0], localThreads[0]); + globalThreads[1] = roundUp(globalThreads[1], localThreads[1]); + globalThreads[2] = roundUp(globalThreads[2], localThreads[2]); + + cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads); + } + for(size_t i = 0; i < args.size(); i ++) + openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); + +#ifndef PRINT_KERNEL_RUN_TIME + openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, + localThreads, 0, NULL, NULL)); +#else + cl_event event = NULL; + openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, + localThreads, 0, NULL, &event)); + + cl_ulong start_time, end_time, queue_time; + double execute_time = 0; + double total_time = 0; + + openCLSafeCall(clWaitForEvents(1, &event)); + openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, + sizeof(cl_ulong), &start_time, 0)); + + openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, + sizeof(cl_ulong), &end_time, 0)); + + openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, + sizeof(cl_ulong), &queue_time, 0)); + + execute_time = (double)(end_time - start_time) / (1000 * 1000); + total_time = (double)(end_time - queue_time) / (1000 * 1000); + + total_execute_time += execute_time; + total_kernel_time += total_time; + clReleaseEvent(event); +#endif + + clFlush(getClCommandQueue(ctx)); + openCLSafeCall(clReleaseKernel(kernel)); +} + +void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + vector< pair > &args, int channels, int depth) +{ + openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args, + channels, depth, NULL); +} +void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + vector< pair > &args, int channels, int depth, const char *build_options) + +{ +#ifndef PRINT_KERNEL_RUN_TIME + openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth, + build_options); +#else + string data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"}; + cout << endl; + cout << "Function Name: " << kernelName; + if(depth >= 0) + cout << " |data type: " << data_type[depth]; + cout << " |channels: " << channels; + cout << " |Time Unit: " << "ms" << endl; + + total_execute_time = 0; + total_kernel_time = 0; + cout << "-------------------------------------" << endl; + + cout << setiosflags(ios::left) << setw(15) << "excute time"; + cout << setiosflags(ios::left) << setw(15) << "lauch time"; + cout << setiosflags(ios::left) << setw(15) << "kernel time" << endl; + int i = 0; + for(i = 0; i < RUN_TIMES; i++) + openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth, + build_options); + + cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl; + cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl; +#endif +} + +double openCLExecuteKernelInterop(Context *ctx , const char **source, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + vector< pair > &args, int channels, int depth, const char *build_options, + bool finish, bool measureKernelTime, bool cleanUp) + +{ + //construct kernel name + //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number + //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char) + stringstream idxStr; + if(channels != -1) + idxStr << "_C" << channels; + if(depth != -1) + idxStr << "_D" << depth; + kernelName += idxStr.str(); + + cl_kernel kernel; + kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options); + + double kernelTime = 0.0; + + if( globalThreads != NULL) + { + if ( localThreads != NULL) + { + globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0]; + globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1]; + globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2]; + + //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2]; + cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads); + } + for(size_t i = 0; i < args.size(); i ++) + openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); + + if(measureKernelTime == false) + { + openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, + localThreads, 0, NULL, NULL)); + } + else + { + cl_event event = NULL; + openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads, + localThreads, 0, NULL, &event)); + + cl_ulong end_time, queue_time; + + openCLSafeCall(clWaitForEvents(1, &event)); + + openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, + sizeof(cl_ulong), &end_time, 0)); + + openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, + sizeof(cl_ulong), &queue_time, 0)); + + kernelTime = (double)(end_time - queue_time) / (1000 * 1000); + + clReleaseEvent(event); + } + } + + if(finish) + { + clFinish(getClCommandQueue(ctx)); + } + + if(cleanUp) + { + openCLSafeCall(clReleaseKernel(kernel)); + } + + return kernelTime; +} + +//double openCLExecuteKernelInterop(Context *ctx , const char **fileName, const int numFiles, string kernelName, +// size_t globalThreads[3], size_t localThreads[3], +// vector< pair > &args, int channels, int depth, const char *build_options, +// bool finish, bool measureKernelTime, bool cleanUp) +// +//{ +// std::vector fsource; +// for (int i = 0 ; i < numFiles ; i++) +// { +// std::string str; +// if (convertToString(fileName[i], str) >= 0) +// fsource.push_back(str); +// } +// const char **source = new const char *[numFiles]; +// for (int i = 0 ; i < numFiles ; i++) +// source[i] = fsource[i].c_str(); +// double kernelTime = openCLExecuteKernelInterop(ctx ,source, kernelName, globalThreads, localThreads, +// args, channels, depth, build_options, finish, measureKernelTime, cleanUp); +// fsource.clear(); +// delete []source; +// return kernelTime; +//} + +cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, + const size_t size) +{ + int status; + cl_mem con_struct; + + con_struct = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &status); + openCLSafeCall(status); + + openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size, + value, 0, 0, 0)); + + return con_struct; + +} + +}//namespace ocl +}//namespace cv diff --git a/modules/ocl/src/cl_programcache.cpp b/modules/ocl/src/cl_programcache.cpp new file mode 100644 index 0000000000..3261319c34 --- /dev/null +++ b/modules/ocl/src/cl_programcache.cpp @@ -0,0 +1,311 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Guoping Long, longguoping@gmail.com +// Niko Li, newlife20080214@gmail.com +// Yao Wang, bitwangyaoyao@gmail.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include +#include +#include "binarycaching.hpp" + +#undef __CL_ENABLE_EXCEPTIONS +#include + +namespace cv { namespace ocl { +/* + * The binary caching system to eliminate redundant program source compilation. + * Strictly, this is not a cache because we do not implement evictions right now. + * We shall add such features to trade-off memory consumption and performance when necessary. + */ + +std::auto_ptr _programCache; +ProgramCache* ProgramCache::getProgramCache() +{ + if (NULL == _programCache.get()) + _programCache.reset(new ProgramCache()); + return _programCache.get(); +} + +ProgramCache::ProgramCache() +{ + codeCache.clear(); + cacheSize = 0; +} + +ProgramCache::~ProgramCache() +{ + releaseProgram(); +} + +cl_program ProgramCache::progLookup(string srcsign) +{ + map::iterator iter; + iter = codeCache.find(srcsign); + if(iter != codeCache.end()) + return iter->second; + else + return NULL; +} + +void ProgramCache::addProgram(string srcsign , cl_program program) +{ + if(!progLookup(srcsign)) + { + codeCache.insert(map::value_type(srcsign, program)); + } +} + +void ProgramCache::releaseProgram() +{ + map::iterator iter; + for(iter = codeCache.begin(); iter != codeCache.end(); iter++) + { + openCLSafeCall(clReleaseProgram(iter->second)); + } + codeCache.clear(); + cacheSize = 0; +} + +static int enable_disk_cache = +#ifdef _DEBUG + false; +#else + true; +#endif +static int update_disk_cache = false; +static String binpath = ""; + +void setBinaryDiskCache(int mode, String path) +{ + if(mode == CACHE_NONE) + { + update_disk_cache = 0; + enable_disk_cache = 0; + return; + } + update_disk_cache |= (mode & CACHE_UPDATE) == CACHE_UPDATE; + enable_disk_cache |= +#ifdef _DEBUG + (mode & CACHE_DEBUG) == CACHE_DEBUG; +#else + (mode & CACHE_RELEASE) == CACHE_RELEASE; +#endif + if(enable_disk_cache && !path.empty()) + { + binpath = path; + } +} + +void setBinpath(const char *path) +{ + binpath = path; +} + +int savetofile(const Context*, cl_program &program, const char *fileName) +{ + size_t binarySize; + openCLSafeCall(clGetProgramInfo(program, + CL_PROGRAM_BINARY_SIZES, + sizeof(size_t), + &binarySize, NULL)); + char* binary = (char*)malloc(binarySize); + if(binary == NULL) + { + CV_Error(CV_StsNoMem, "Failed to allocate host memory."); + } + openCLSafeCall(clGetProgramInfo(program, + CL_PROGRAM_BINARIES, + sizeof(char *), + &binary, + NULL)); + + FILE *fp = fopen(fileName, "wb+"); + if(fp != NULL) + { + fwrite(binary, binarySize, 1, fp); + free(binary); + fclose(fp); + } + return 1; +} + +cl_program ProgramCache::getProgram(const Context *ctx, const char **source, string kernelName, + const char *build_options) +{ + cl_program program; + cl_int status = 0; + stringstream src_sign; + string srcsign; + string filename; + + if (NULL != build_options) + { + src_sign << (int64)(*source) << getClContext(ctx) << "_" << build_options; + } + else + { + src_sign << (int64)(*source) << getClContext(ctx); + } + srcsign = src_sign.str(); + + program = NULL; + program = ProgramCache::getProgramCache()->progLookup(srcsign); + + if (!program) + { + //config build programs + std::string all_build_options; + if (!ctx->getDeviceInfo().compilationExtraOptions.empty()) + all_build_options += ctx->getDeviceInfo().compilationExtraOptions; + if (build_options != NULL) + { + all_build_options += " "; + all_build_options += build_options; + } + filename = binpath + kernelName + "_" + ctx->getDeviceInfo().deviceName + all_build_options + ".clb"; + + FILE *fp = enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL; + if(fp == NULL || update_disk_cache) + { + if(fp != NULL) + fclose(fp); + + program = clCreateProgramWithSource( + getClContext(ctx), 1, source, NULL, &status); + openCLVerifyCall(status); + cl_device_id device = getClDeviceID(ctx); + status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL); + if(status == CL_SUCCESS && enable_disk_cache) + savetofile(ctx, program, filename.c_str()); + } + else + { + fseek(fp, 0, SEEK_END); + size_t binarySize = ftell(fp); + fseek(fp, 0, SEEK_SET); + char *binary = new char[binarySize]; + CV_Assert(1 == fread(binary, binarySize, 1, fp)); + fclose(fp); + cl_int status = 0; + cl_device_id device = getClDeviceID(ctx); + program = clCreateProgramWithBinary(getClContext(ctx), + 1, + &device, + (const size_t *)&binarySize, + (const unsigned char **)&binary, + NULL, + &status); + openCLVerifyCall(status); + status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL); + delete[] binary; + } + + if(status != CL_SUCCESS) + { + if(status == CL_BUILD_PROGRAM_FAILURE) + { + cl_int logStatus; + char *buildLog = NULL; + size_t buildLogSize = 0; + logStatus = clGetProgramBuildInfo(program, + getClDeviceID(ctx), CL_PROGRAM_BUILD_LOG, buildLogSize, + buildLog, &buildLogSize); + if(logStatus != CL_SUCCESS) + std::cout << "Failed to build the program and get the build info." << endl; + buildLog = new char[buildLogSize]; + CV_DbgAssert(!!buildLog); + memset(buildLog, 0, buildLogSize); + openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx), + CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL)); + std::cout << "\n\t\t\tBUILD LOG\n"; + std::cout << buildLog << endl; + delete [] buildLog; + } + openCLVerifyCall(status); + } + //Cache the binary for future use if build_options is null + if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE) + this->addProgram(srcsign, program); + else + cout << "Warning: code cache has been full.\n"; + } + return program; +} + +//// Converts the contents of a file into a string +//static int convertToString(const char *filename, std::string& s) +//{ +// size_t size; +// char* str; +// +// std::fstream f(filename, (std::fstream::in | std::fstream::binary)); +// if(f.is_open()) +// { +// size_t fileSize; +// f.seekg(0, std::fstream::end); +// size = fileSize = (size_t)f.tellg(); +// f.seekg(0, std::fstream::beg); +// +// str = new char[size+1]; +// if(!str) +// { +// f.close(); +// return -1; +// } +// +// f.read(str, fileSize); +// f.close(); +// str[size] = '\0'; +// +// s = str; +// delete[] str; +// return 0; +// } +// printf("Error: Failed to open file %s\n", filename); +// return -1; +//} + +} // namespace ocl +} // namespace cv diff --git a/modules/ocl/src/error.cpp b/modules/ocl/src/error.cpp index e854e70cd0..cd6d3d5346 100644 --- a/modules/ocl/src/error.cpp +++ b/modules/ocl/src/error.cpp @@ -152,19 +152,19 @@ namespace cv case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE"; //case CL_INVALID_PROPERTY: - // return "CL_INVALID_PROPERTY"; + // return "CL_INVALID_PROPERTY"; //case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR: - // return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; + // return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; //case CL_PLATFORM_NOT_FOUND_KHR: - // return "CL_PLATFORM_NOT_FOUND_KHR"; - // //case CL_INVALID_PROPERTY_EXT: - // // return "CL_INVALID_PROPERTY_EXT"; + // return "CL_PLATFORM_NOT_FOUND_KHR"; + // //case CL_INVALID_PROPERTY_EXT: + // // return "CL_INVALID_PROPERTY_EXT"; //case CL_DEVICE_PARTITION_FAILED_EXT: - // return "CL_DEVICE_PARTITION_FAILED_EXT"; + // return "CL_DEVICE_PARTITION_FAILED_EXT"; //case CL_INVALID_PARTITION_COUNT_EXT: - // return "CL_INVALID_PARTITION_COUNT_EXT"; + // return "CL_INVALID_PARTITION_COUNT_EXT"; //default: - // return "unknown error code"; + // return "unknown error code"; } static char buf[256]; sprintf(buf, "%d", err); diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp index b6cc070fb5..c0785ac9d8 100644 --- a/modules/ocl/src/fft.cpp +++ b/modules/ocl/src/fft.cpp @@ -156,25 +156,25 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla { fft_setup(); - bool is_1d_input = (_dft_size.height == 1); - int is_row_dft = flags & DFT_ROWS; + bool is_1d_input = (_dft_size.height == 1); + int is_row_dft = flags & DFT_ROWS; int is_scaled_dft = flags & DFT_SCALE; - int is_inverse = flags & DFT_INVERSE; + int is_inverse = flags & DFT_INVERSE; - //clAmdFftResultLocation place; - clAmdFftLayout inLayout; - clAmdFftLayout outLayout; - clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D; + //clAmdFftResultLocation place; + clAmdFftLayout inLayout; + clAmdFftLayout outLayout; + clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D; - size_t batchSize = is_row_dft ? dft_size.height : 1; + size_t batchSize = is_row_dft ? dft_size.height : 1; size_t clLengthsIn[ 3 ] = {1, 1, 1}; size_t clStridesIn[ 3 ] = {1, 1, 1}; //size_t clLengthsOut[ 3 ] = {1, 1, 1}; size_t clStridesOut[ 3 ] = {1, 1, 1}; - clLengthsIn[0] = dft_size.width; - clLengthsIn[1] = is_row_dft ? 1 : dft_size.height; - clStridesIn[0] = 1; - clStridesOut[0] = 1; + clLengthsIn[0] = dft_size.width; + clLengthsIn[1] = is_row_dft ? 1 : dft_size.height; + clStridesIn[0] = 1; + clStridesOut[0] = 1; switch(_type) { @@ -206,7 +206,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1]; clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1]; - openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getoclContext(), dim, clLengthsIn ) ); + openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getClContextPtr(), dim, clLengthsIn ) ); openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) ); openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) ); @@ -220,7 +220,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) ); //ready to bake - openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getoclCommandQueue(), NULL, NULL ) ); + openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getClCommandQueuePtr(), NULL, NULL ) ); } cv::ocl::FftPlan::~FftPlan() { @@ -296,12 +296,12 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) // similar assertions with cuda module CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2); - //bool is_1d_input = (src.rows == 1); - //int is_row_dft = flags & DFT_ROWS; - //int is_scaled_dft = flags & DFT_SCALE; - int is_inverse = flags & DFT_INVERSE; - bool is_complex_input = src.channels() == 2; - bool is_complex_output = !(flags & DFT_REAL_OUTPUT); + //bool is_1d_input = (src.rows == 1); + //int is_row_dft = flags & DFT_ROWS; + //int is_scaled_dft = flags & DFT_SCALE; + int is_inverse = flags & DFT_INVERSE; + bool is_complex_input = src.channels() == 2; + bool is_complex_output = !(flags & DFT_REAL_OUTPUT); // We don't support real-to-real transform @@ -338,10 +338,10 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) if (buffersize) { cl_int medstatus; - clMedBuffer = clCreateBuffer ( (cl_context)src.clCxt->oclContext(), CL_MEM_READ_WRITE, buffersize, 0, &medstatus); + clMedBuffer = clCreateBuffer ( *(cl_context*)(src.clCxt->getOpenCLContextPtr()), CL_MEM_READ_WRITE, buffersize, 0, &medstatus); openCLSafeCall( medstatus ); } - cl_command_queue clq = (cl_command_queue)src.clCxt->oclCommandQueue(); + cl_command_queue clq = *(cl_command_queue*)(src.clCxt->getOpenCLCommandQueuePtr()); openCLSafeCall( clAmdFftEnqueueTransform( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, 1, diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index 284dc61632..caaf53d849 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -1430,7 +1430,7 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale) { - if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F) + if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp index ec03c2f932..687f26f632 100644 --- a/modules/ocl/src/gemm.cpp +++ b/modules/ocl/src/gemm.cpp @@ -134,7 +134,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, int offb = src2.offset; int offc = dst.offset; - cl_command_queue clq = (cl_command_queue)src1.clCxt->oclCommandQueue(); + cl_command_queue clq = *(cl_command_queue*)src1.clCxt->getOpenCLCommandQueuePtr(); switch(src1.type()) { case CV_32FC1: diff --git a/modules/ocl/src/gftt.cpp b/modules/ocl/src/gftt.cpp index 37ebaafa38..29a96ae658 100644 --- a/modules/ocl/src/gftt.cpp +++ b/modules/ocl/src/gftt.cpp @@ -338,7 +338,7 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &poin CV_DbgAssert(points.type() == CV_32FC2); points_v.resize(points.cols); openCLSafeCall(clEnqueueReadBuffer( - *reinterpret_cast(getoclCommandQueue()), + *(cl_command_queue*)getClCommandQueuePtr(), reinterpret_cast(points.data), CL_TRUE, 0, diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index 212fd2c444..e3e73b3c3d 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -745,7 +745,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS if( gimg.cols < minSize.width || gimg.rows < minSize.height ) CV_Error(CV_StsError, "Image too small"); - cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); + cl_command_queue qu = getClCommandQueue(Context::getContext()); if( (flags & CV_HAAR_SCALE_IMAGE) ) { CvSize winSize0 = cascade->orig_window_size; @@ -788,7 +788,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS size_t blocksize = 8; size_t localThreads[3] = { blocksize, blocksize , 1 }; - size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->computeUnits()) *localThreads[0], + size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->getDeviceInfo().maxComputeUnits) *localThreads[0], localThreads[1], 1 }; int outputsz = 256 * globalThreads[0] / localThreads[0]; @@ -949,7 +949,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS int grp_per_CU = 12; size_t blocksize = 8; size_t localThreads[3] = { blocksize, blocksize , 1 }; - size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->computeUnits() *localThreads[0], + size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->getDeviceInfo().maxComputeUnits *localThreads[0], localThreads[1], 1 }; int outputsz = 256 * globalThreads[0] / localThreads[0]; int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) - @@ -1120,7 +1120,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std int blocksize = 8; int grp_per_CU = 12; size_t localThreads[3] = { blocksize, blocksize, 1 }; - size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->computeUnits() *localThreads[0], + size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->getDeviceInfo().maxComputeUnits *localThreads[0], localThreads[1], 1 }; int outputsz = 256 * globalThreads[0] / localThreads[0]; @@ -1148,7 +1148,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std } int *candidate; - cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); + cl_command_queue qu = getClCommandQueue(Context::getContext()); if( (flags & CV_HAAR_SCALE_IMAGE) ) { int indexy = 0; @@ -1340,7 +1340,7 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols, GpuHidHaarStageClassifier *stage; GpuHidHaarClassifier *classifier; GpuHidHaarTreeNode *node; - cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); + cl_command_queue qu = getClCommandQueue(Context::getContext()); if( (flags & CV_HAAR_SCALE_IMAGE) ) { gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade); @@ -1505,7 +1505,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs( CvSize sz; CvSize winSize0 = oldCascade->orig_window_size; detect_piramid_info *scaleinfo; - cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); + cl_command_queue qu = getClCommandQueue(Context::getContext()); if (flags & CV_HAAR_SCALE_IMAGE) { for(factor = 1.f;; factor *= scaleFactor) diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp index 55872829a9..563172bc13 100644 --- a/modules/ocl/src/hog.cpp +++ b/modules/ocl/src/hog.cpp @@ -157,7 +157,7 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo effect_size = Size(0, 0); - if (queryDeviceInfo()) + if (isCpuDevice()) hog_device_cpu = true; else hog_device_cpu = false; @@ -1670,9 +1670,9 @@ void cv::ocl::device::hog::compute_hists(int nbins, else { cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName); - int wave_size = queryDeviceInfo(kernel); + size_t wave_size = queryWaveFrontSize(kernel); char opt[32] = {0}; - sprintf(opt, "-D WAVE_SIZE=%d", wave_size); + sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1, opt); } @@ -1734,9 +1734,9 @@ void cv::ocl::device::hog::normalize_hists(int nbins, else { cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName); - int wave_size = queryDeviceInfo(kernel); + size_t wave_size = queryWaveFrontSize(kernel); char opt[32] = {0}; - sprintf(opt, "-D WAVE_SIZE=%d", wave_size); + sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1, opt); } @@ -1803,9 +1803,9 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, else { cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName); - int wave_size = queryDeviceInfo(kernel); + size_t wave_size = queryWaveFrontSize(kernel); char opt[32] = {0}; - sprintf(opt, "-D WAVE_SIZE=%d", wave_size); + sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size); openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1, opt); } diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 7d0d941dfa..0949605e15 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -289,7 +289,7 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); - if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) + if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) { args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); } @@ -317,7 +317,7 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); - if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) + if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) { args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); } @@ -380,7 +380,7 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); - if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) + if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) { args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d)); args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d)); @@ -802,12 +802,12 @@ namespace cv string kernelName = "warpAffine" + s[interpolation]; - if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) + if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) { cl_int st; - coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st ); + coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st ); openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0)); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0)); } else { @@ -817,8 +817,8 @@ namespace cv { float_coeffs[m][n] = coeffs[m][n]; } - coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st ); - openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0)); + coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st ); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0)); } //TODO: improve this kernel @@ -872,12 +872,12 @@ namespace cv string s[3] = {"NN", "Linear", "Cubic"}; string kernelName = "warpPerspective" + s[interpolation]; - if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) + if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) { cl_int st; - coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st ); + coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st ); openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0)); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0)); } else { @@ -886,9 +886,9 @@ namespace cv for(int n = 0; n < 3; n++) float_coeffs[m][n] = coeffs[m][n]; - coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st ); + coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st ); openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0)); + openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0)); } //TODO: improve this kernel size_t blkSizeX = 16, blkSizeY = 16; @@ -994,7 +994,7 @@ namespace cv void integral(const oclMat &src, oclMat &sum, oclMat &sqsum) { CV_Assert(src.type() == CV_8UC1); - if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -1192,7 +1192,7 @@ namespace cv void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, double k, int borderType) { - if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -1211,7 +1211,7 @@ namespace cv void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType) { - if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } @@ -1512,17 +1512,17 @@ namespace cv String kernelName = "calcLut"; size_t localThreads[3] = { 32, 8, 1 }; size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 }; - bool is_cpu = queryDeviceInfo(); + bool is_cpu = isCpuDevice(); if (is_cpu) openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, (char*)" -D CPU"); else { cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &imgproc_clahe, kernelName); - int wave_size = queryDeviceInfo(kernel); + size_t wave_size = queryWaveFrontSize(kernel); openCLSafeCall(clReleaseKernel(kernel)); static char opt[20] = {0}; - sprintf(opt, " -D WAVE_SIZE=%d", wave_size); + sprintf(opt, " -D WAVE_SIZE=%d", (int)wave_size); openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt); } } diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp deleted file mode 100644 index c18984b078..0000000000 --- a/modules/ocl/src/initialization.cpp +++ /dev/null @@ -1,1090 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Guoping Long, longguoping@gmail.com -// Niko Li, newlife20080214@gmail.com -// Yao Wang, bitwangyaoyao@gmail.com -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include -#include -#include "binarycaching.hpp" - -using namespace cv; -using namespace cv::ocl; -using namespace std; -using std::cout; -using std::endl; - -//#define PRINT_KERNEL_RUN_TIME -#define RUN_TIMES 100 -#ifndef CL_MEM_USE_PERSISTENT_MEM_AMD -#define CL_MEM_USE_PERSISTENT_MEM_AMD 0 -#endif -//#define AMD_DOUBLE_DIFFER - -namespace cv -{ - namespace ocl - { - extern void fft_teardown(); - extern void clBlasTeardown(); - /* - * The binary caching system to eliminate redundant program source compilation. - * Strictly, this is not a cache because we do not implement evictions right now. - * We shall add such features to trade-off memory consumption and performance when necessary. - */ - auto_ptr ProgramCache::programCache; - ProgramCache *programCache = NULL; - DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT; - DevMemRW gDeviceMemRW = DEVICE_MEM_R_W; - int gDevMemTypeValueMap[5] = {0, - CL_MEM_ALLOC_HOST_PTR, - CL_MEM_USE_HOST_PTR, - CL_MEM_COPY_HOST_PTR, - CL_MEM_USE_PERSISTENT_MEM_AMD}; - int gDevMemRWValueMap[3] = {CL_MEM_READ_WRITE, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY}; - - ProgramCache::ProgramCache() - { - codeCache.clear(); - cacheSize = 0; - } - - ProgramCache::~ProgramCache() - { - releaseProgram(); - } - - cl_program ProgramCache::progLookup(string srcsign) - { - map::iterator iter; - iter = codeCache.find(srcsign); - if(iter != codeCache.end()) - return iter->second; - else - return NULL; - } - - void ProgramCache::addProgram(string srcsign , cl_program program) - { - if(!progLookup(srcsign)) - { - codeCache.insert(map::value_type(srcsign, program)); - } - } - - void ProgramCache::releaseProgram() - { - map::iterator iter; - for(iter = codeCache.begin(); iter != codeCache.end(); iter++) - { - openCLSafeCall(clReleaseProgram(iter->second)); - } - codeCache.clear(); - cacheSize = 0; - } - struct Info::Impl - { - cl_platform_id oclplatform; - std::vector devices; - std::vector devName; - std::string clVersion; - - cl_context oclcontext; - cl_command_queue clCmdQueue; - int devnum; - size_t maxWorkGroupSize; - cl_uint maxDimensions; // == maxWorkItemSizes.size() - std::vector maxWorkItemSizes; - cl_uint maxComputeUnits; - char extra_options[512]; - int double_support; - int unified_memory; //1 means integrated GPU, otherwise this value is 0 - int refcounter; - - Impl(); - - void setDevice(void *ctx, void *q, int devnum); - - void release() - { - if(1 == CV_XADD(&refcounter, -1)) - { - releaseResources(); - delete this; - } - } - - Impl* copy() - { - CV_XADD(&refcounter, 1); - return this; - } - - private: - Impl(const Impl&); - Impl& operator=(const Impl&); - void releaseResources(); - }; - - // global variables to hold binary cache properties - static int enable_disk_cache = -#ifdef _DEBUG - false; -#else - true; -#endif - static int update_disk_cache = false; - static String binpath = ""; - - Info::Impl::Impl() - :oclplatform(0), - oclcontext(0), - clCmdQueue(0), - devnum(-1), - maxWorkGroupSize(0), - maxDimensions(0), - maxComputeUnits(0), - double_support(0), - unified_memory(0), - refcounter(1) - { - memset(extra_options, 0, 512); - } - - void Info::Impl::releaseResources() - { - devnum = -1; - - if(clCmdQueue) - { - //temporarily disable command queue release as it causes program hang at exit - //openCLSafeCall(clReleaseCommandQueue(clCmdQueue)); - clCmdQueue = 0; - } - - if(oclcontext) - { - openCLSafeCall(clReleaseContext(oclcontext)); - oclcontext = 0; - } - } - - void Info::Impl::setDevice(void *ctx, void *q, int dnum) - { - if((ctx && q) || devnum != dnum) - releaseResources(); - - CV_Assert(dnum >= 0 && dnum < (int)devices.size()); - devnum = dnum; - if(ctx && q) - { - oclcontext = (cl_context)ctx; - clCmdQueue = (cl_command_queue)q; - clRetainContext(oclcontext); - clRetainCommandQueue(clCmdQueue); - } - else - { - cl_int status = 0; - cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(oclplatform), 0 }; - oclcontext = clCreateContext(cps, 1, &devices[devnum], 0, 0, &status); - openCLVerifyCall(status); - clCmdQueue = clCreateCommandQueue(oclcontext, devices[devnum], CL_QUEUE_PROFILING_ENABLE, &status); - openCLVerifyCall(status); - } - - openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&maxWorkGroupSize, 0)); - openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), (void *)&maxDimensions, 0)); - maxWorkItemSizes.resize(maxDimensions); - openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*maxDimensions, (void *)&maxWorkItemSizes[0], 0)); - openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), (void *)&maxComputeUnits, 0)); - - cl_bool unfymem = false; - openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(cl_bool), (void *)&unfymem, 0)); - unified_memory = unfymem ? 1 : 0; - - //initialize extra options for compilation. Currently only fp64 is included. - //Assume 4KB is enough to store all possible extensions. - const int EXT_LEN = 4096 + 1 ; - char extends_set[EXT_LEN]; - size_t extends_size; - openCLSafeCall(clGetDeviceInfo(devices[devnum], CL_DEVICE_EXTENSIONS, EXT_LEN, (void *)extends_set, &extends_size)); - extends_set[EXT_LEN - 1] = 0; - size_t fp64_khr = std::string(extends_set).find("cl_khr_fp64"); - - if(fp64_khr != std::string::npos) - { - sprintf(extra_options, "-D DOUBLE_SUPPORT"); - double_support = 1; - } - else - { - memset(extra_options, 0, 512); - double_support = 0; - } - } - - ////////////////////////Common OpenCL specific calls/////////////// - int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type) - { - rw_type = gDeviceMemRW; - mem_type = gDeviceMemType; - return Context::getContext()->impl->unified_memory; - } - - int setDevMemType(DevMemRW rw_type, DevMemType mem_type) - { - if( (mem_type == DEVICE_MEM_PM && Context::getContext()->impl->unified_memory == 0) || - mem_type == DEVICE_MEM_UHP || - mem_type == DEVICE_MEM_CHP ) - return -1; - gDeviceMemRW = rw_type; - gDeviceMemType = mem_type; - return 0; - } - - int getDevice(std::vector &oclinfo, int devicetype) - { - //TODO: cache oclinfo vector - oclinfo.clear(); - - switch(devicetype) - { - case CVCL_DEVICE_TYPE_DEFAULT: - case CVCL_DEVICE_TYPE_CPU: - case CVCL_DEVICE_TYPE_GPU: - case CVCL_DEVICE_TYPE_ACCELERATOR: - case CVCL_DEVICE_TYPE_ALL: - break; - default: - return 0; - } - - // Platform info - cl_uint numPlatforms; - openCLSafeCall(clGetPlatformIDs(0, 0, &numPlatforms)); - if(numPlatforms < 1) return 0; - - std::vector platforms(numPlatforms); - openCLSafeCall(clGetPlatformIDs(numPlatforms, &platforms[0], 0)); - - char deviceName[256]; - int devcienums = 0; - char clVersion[256]; - for (unsigned i = 0; i < numPlatforms; ++i) - { - cl_uint numsdev = 0; - cl_int status = clGetDeviceIDs(platforms[i], devicetype, 0, NULL, &numsdev); - if(status != CL_DEVICE_NOT_FOUND) - openCLVerifyCall(status); - - if(numsdev > 0) - { - devcienums += numsdev; - std::vector devices(numsdev); - openCLSafeCall(clGetDeviceIDs(platforms[i], devicetype, numsdev, &devices[0], 0)); - - Info ocltmpinfo; - ocltmpinfo.impl->oclplatform = platforms[i]; - openCLSafeCall(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, sizeof(clVersion), clVersion, NULL)); - ocltmpinfo.impl->clVersion = clVersion; - for(unsigned j = 0; j < numsdev; ++j) - { - ocltmpinfo.impl->devices.push_back(devices[j]); - openCLSafeCall(clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(deviceName), deviceName, 0)); - ocltmpinfo.impl->devName.push_back(deviceName); - ocltmpinfo.DeviceName.push_back(deviceName); - } - oclinfo.push_back(ocltmpinfo); - } - } - if(devcienums > 0) - { - setDevice(oclinfo[0]); - } - return devcienums; - } - - void setDevice(Info &oclinfo, int devnum) - { - oclinfo.impl->setDevice(0, 0, devnum); - Context::setContext(oclinfo); - } - - void setDeviceEx(Info &oclinfo, void *ctx, void *q, int devnum) - { - oclinfo.impl->setDevice(ctx, q, devnum); - Context::setContext(oclinfo); - } - - void *getoclContext() - { - return &(Context::getContext()->impl->oclcontext); - } - - void *getoclCommandQueue() - { - return &(Context::getContext()->impl->clCmdQueue); - } - - void finish() - { - clFinish(Context::getContext()->impl->clCmdQueue); - } - - //template specializations of queryDeviceInfo - template<> - bool queryDeviceInfo(cl_kernel) - { - Info::Impl* impl = Context::getContext()->impl; - cl_device_type devicetype; - openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum], - CL_DEVICE_TYPE, sizeof(cl_device_type), - &devicetype, NULL)); - return (devicetype == CVCL_DEVICE_TYPE_CPU); - } - - template - static _ty queryWavesize(cl_kernel kernel) - { - size_t info = 0; - Info::Impl* impl = Context::getContext()->impl; - bool is_cpu = queryDeviceInfo(); - if(is_cpu) - { - return 1; - } - CV_Assert(kernel != NULL); - openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum], - CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &info, NULL)); - return static_cast<_ty>(info); - } - - template<> - size_t queryDeviceInfo(cl_kernel kernel) - { - return queryWavesize(kernel); - } - template<> - int queryDeviceInfo(cl_kernel kernel) - { - return queryWavesize(kernel); - } - - void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size) - { - cl_int status; - status = clEnqueueReadBuffer(clCxt->impl->clCmdQueue, dst_buffer, CL_TRUE, 0, - size, host_buffer, 0, NULL, NULL); - openCLVerifyCall(status); - } - - cl_mem openCLCreateBuffer(Context *clCxt, size_t flag , size_t size) - { - cl_int status; - cl_mem buffer = clCreateBuffer(clCxt->impl->oclcontext, (cl_mem_flags)flag, size, NULL, &status); - openCLVerifyCall(status); - return buffer; - } - - void openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch, - size_t widthInBytes, size_t height) - { - openCLMallocPitchEx(clCxt, dev_ptr, pitch, widthInBytes, height, gDeviceMemRW, gDeviceMemType); - } - - void openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch, - size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type) - { - cl_int status; - *dev_ptr = clCreateBuffer(clCxt->impl->oclcontext, gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type], - widthInBytes * height, 0, &status); - openCLVerifyCall(status); - *pitch = widthInBytes; - } - - void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, - const void *src, size_t spitch, - size_t width, size_t height, openCLMemcpyKind kind, int channels) - { - size_t buffer_origin[3] = {0, 0, 0}; - size_t host_origin[3] = {0, 0, 0}; - size_t region[3] = {width, height, 1}; - if(kind == clMemcpyHostToDevice) - { - if(dpitch == width || channels == 3 || height == 1) - { - openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE, - 0, width * height, src, 0, NULL, NULL)); - } - else - { - openCLSafeCall(clEnqueueWriteBufferRect(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE, - buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0)); - } - } - else if(kind == clMemcpyDeviceToHost) - { - if(spitch == width || channels == 3 || height == 1) - { - openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE, - 0, width * height, dst, 0, NULL, NULL)); - } - else - { - openCLSafeCall(clEnqueueReadBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE, - buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0)); - } - } - } - - void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset, - const void *src, size_t spitch, - size_t width, size_t height, int src_offset) - { - size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0}; - size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0}; - size_t region[3] = {width, height, 1}; - - openCLSafeCall(clEnqueueCopyBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, (cl_mem)dst, src_origin, dst_origin, - region, spitch, 0, dpitch, 0, 0, 0, 0)); - } - - void openCLFree(void *devPtr) - { - openCLSafeCall(clReleaseMemObject((cl_mem)devPtr)); - } - cl_kernel openCLGetKernelFromSource(const Context *clCxt, const char **source, string kernelName) - { - return openCLGetKernelFromSource(clCxt, source, kernelName, NULL); - } - - void setBinaryDiskCache(int mode, String path) - { - if(mode == CACHE_NONE) - { - update_disk_cache = 0; - enable_disk_cache = 0; - return; - } - update_disk_cache |= (mode & CACHE_UPDATE) == CACHE_UPDATE; - enable_disk_cache |= -#ifdef _DEBUG - (mode & CACHE_DEBUG) == CACHE_DEBUG; -#else - (mode & CACHE_RELEASE) == CACHE_RELEASE; -#endif - if(enable_disk_cache && !path.empty()) - { - binpath = path; - } - } - - void setBinpath(const char *path) - { - binpath = path; - } - - int savetofile(const Context*, cl_program &program, const char *fileName) - { - size_t binarySize; - openCLSafeCall(clGetProgramInfo(program, - CL_PROGRAM_BINARY_SIZES, - sizeof(size_t), - &binarySize, NULL)); - char* binary = (char*)malloc(binarySize); - if(binary == NULL) - { - CV_Error(CV_StsNoMem, "Failed to allocate host memory."); - } - openCLSafeCall(clGetProgramInfo(program, - CL_PROGRAM_BINARIES, - sizeof(char *), - &binary, - NULL)); - - FILE *fp = fopen(fileName, "wb+"); - if(fp != NULL) - { - fwrite(binary, binarySize, 1, fp); - free(binary); - fclose(fp); - } - return 1; - } - - cl_kernel openCLGetKernelFromSource(const Context *clCxt, const char **source, string kernelName, - const char *build_options) - { - cl_kernel kernel; - cl_program program ; - cl_int status = 0; - stringstream src_sign; - string srcsign; - string filename; - CV_Assert(programCache != NULL); - - if(NULL != build_options) - { - src_sign << (int64)(*source) << clCxt->impl->oclcontext << "_" << build_options; - } - else - { - src_sign << (int64)(*source) << clCxt->impl->oclcontext; - } - srcsign = src_sign.str(); - - program = NULL; - program = programCache->progLookup(srcsign); - - if(!program) - { - //config build programs - char all_build_options[1024]; - memset(all_build_options, 0, 1024); - char zeromem[512] = {0}; - if(0 != memcmp(clCxt -> impl->extra_options, zeromem, 512)) - strcat(all_build_options, clCxt -> impl->extra_options); - strcat(all_build_options, " "); - if(build_options != NULL) - strcat(all_build_options, build_options); - if(all_build_options != NULL) - { - filename = binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + all_build_options + ".clb"; - } - else - { - filename = binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + ".clb"; - } - - FILE *fp = enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL; - if(fp == NULL || update_disk_cache) - { - if(fp != NULL) - fclose(fp); - - program = clCreateProgramWithSource( - clCxt->impl->oclcontext, 1, source, NULL, &status); - openCLVerifyCall(status); - status = clBuildProgram(program, 1, &(clCxt->impl->devices[clCxt->impl->devnum]), all_build_options, NULL, NULL); - if(status == CL_SUCCESS && enable_disk_cache) - savetofile(clCxt, program, filename.c_str()); - } - else - { - fseek(fp, 0, SEEK_END); - size_t binarySize = ftell(fp); - fseek(fp, 0, SEEK_SET); - char *binary = new char[binarySize]; - CV_Assert(1 == fread(binary, binarySize, 1, fp)); - fclose(fp); - cl_int status = 0; - program = clCreateProgramWithBinary(clCxt->impl->oclcontext, - 1, - &(clCxt->impl->devices[clCxt->impl->devnum]), - (const size_t *)&binarySize, - (const unsigned char **)&binary, - NULL, - &status); - openCLVerifyCall(status); - status = clBuildProgram(program, 1, &(clCxt->impl->devices[clCxt->impl->devnum]), all_build_options, NULL, NULL); - delete[] binary; - } - - if(status != CL_SUCCESS) - { - if(status == CL_BUILD_PROGRAM_FAILURE) - { - cl_int logStatus; - char *buildLog = NULL; - size_t buildLogSize = 0; - logStatus = clGetProgramBuildInfo(program, - clCxt->impl->devices[clCxt->impl->devnum], CL_PROGRAM_BUILD_LOG, buildLogSize, - buildLog, &buildLogSize); - if(logStatus != CL_SUCCESS) - cout << "Failed to build the program and get the build info." << endl; - buildLog = new char[buildLogSize]; - CV_DbgAssert(!!buildLog); - memset(buildLog, 0, buildLogSize); - openCLSafeCall(clGetProgramBuildInfo(program, clCxt->impl->devices[clCxt->impl->devnum], - CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL)); - cout << "\n\t\t\tBUILD LOG\n"; - cout << buildLog << endl; - delete [] buildLog; - } - openCLVerifyCall(status); - } - //Cache the binary for future use if build_options is null - if( (programCache->cacheSize += 1) < programCache->MAX_PROG_CACHE_SIZE) - programCache->addProgram(srcsign, program); - else - cout << "Warning: code cache has been full.\n"; - } - kernel = clCreateKernel(program, kernelName.c_str(), &status); - openCLVerifyCall(status); - return kernel; - } - - void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads) - { - size_t kernelWorkGroupSize; - openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices[clCxt->impl->devnum], - CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0)); - CV_Assert( localThreads[0] <= clCxt->impl->maxWorkItemSizes[0] ); - CV_Assert( localThreads[1] <= clCxt->impl->maxWorkItemSizes[1] ); - CV_Assert( localThreads[2] <= clCxt->impl->maxWorkItemSizes[2] ); - CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= kernelWorkGroupSize ); - CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= clCxt->impl->maxWorkGroupSize ); - } - - static inline size_t roundUp(size_t sz, size_t n) - { - // we don't assume that n is a power of 2 (see alignSize) - // equal to divUp(sz, n) * n - size_t t = sz + n - 1; - size_t rem = t % n; - size_t result = t - rem; - return result; - } - -#ifdef PRINT_KERNEL_RUN_TIME - static double total_execute_time = 0; - static double total_kernel_time = 0; -#endif - void openCLExecuteKernel_(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], - size_t localThreads[3], vector< pair > &args, int channels, - int depth, const char *build_options) - { - //construct kernel name - //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number - //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char) - stringstream idxStr; - if(channels != -1) - idxStr << "_C" << channels; - if(depth != -1) - idxStr << "_D" << depth; - kernelName += idxStr.str(); - - cl_kernel kernel; - kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options); - - if ( localThreads != NULL) - { - globalThreads[0] = roundUp(globalThreads[0], localThreads[0]); - globalThreads[1] = roundUp(globalThreads[1], localThreads[1]); - globalThreads[2] = roundUp(globalThreads[2], localThreads[2]); - - cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads); - } - for(size_t i = 0; i < args.size(); i ++) - openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); - -#ifndef PRINT_KERNEL_RUN_TIME - openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, NULL)); -#else - cl_event event = NULL; - openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, &event)); - - cl_ulong start_time, end_time, queue_time; - double execute_time = 0; - double total_time = 0; - - openCLSafeCall(clWaitForEvents(1, &event)); - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, - sizeof(cl_ulong), &start_time, 0)); - - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, - sizeof(cl_ulong), &end_time, 0)); - - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, - sizeof(cl_ulong), &queue_time, 0)); - - execute_time = (double)(end_time - start_time) / (1000 * 1000); - total_time = (double)(end_time - queue_time) / (1000 * 1000); - - total_execute_time += execute_time; - total_kernel_time += total_time; - clReleaseEvent(event); -#endif - - clFlush(clCxt->impl->clCmdQueue); - openCLSafeCall(clReleaseKernel(kernel)); - } - - void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth) - { - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, - channels, depth, NULL); - } - void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth, const char *build_options) - - { -#ifndef PRINT_KERNEL_RUN_TIME - openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, - build_options); -#else - string data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"}; - cout << endl; - cout << "Function Name: " << kernelName; - if(depth >= 0) - cout << " |data type: " << data_type[depth]; - cout << " |channels: " << channels; - cout << " |Time Unit: " << "ms" << endl; - - total_execute_time = 0; - total_kernel_time = 0; - cout << "-------------------------------------" << endl; - - cout << setiosflags(ios::left) << setw(15) << "excute time"; - cout << setiosflags(ios::left) << setw(15) << "lauch time"; - cout << setiosflags(ios::left) << setw(15) << "kernel time" << endl; - int i = 0; - for(i = 0; i < RUN_TIMES; i++) - openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, - build_options); - - cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl; - cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl; -#endif - } - - double openCLExecuteKernelInterop(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth, const char *build_options, - bool finish, bool measureKernelTime, bool cleanUp) - - { - //construct kernel name - //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number - //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char) - stringstream idxStr; - if(channels != -1) - idxStr << "_C" << channels; - if(depth != -1) - idxStr << "_D" << depth; - kernelName += idxStr.str(); - - cl_kernel kernel; - kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options); - - double kernelTime = 0.0; - - if( globalThreads != NULL) - { - if ( localThreads != NULL) - { - globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0]; - globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1]; - globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2]; - - //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2]; - cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads); - } - for(size_t i = 0; i < args.size(); i ++) - openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); - - if(measureKernelTime == false) - { - openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, NULL)); - } - else - { - cl_event event = NULL; - openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, &event)); - - cl_ulong end_time, queue_time; - - openCLSafeCall(clWaitForEvents(1, &event)); - - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, - sizeof(cl_ulong), &end_time, 0)); - - openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, - sizeof(cl_ulong), &queue_time, 0)); - - kernelTime = (double)(end_time - queue_time) / (1000 * 1000); - - clReleaseEvent(event); - } - } - - if(finish) - { - clFinish(clCxt->impl->clCmdQueue); - } - - if(cleanUp) - { - openCLSafeCall(clReleaseKernel(kernel)); - } - - return kernelTime; - } - - // Converts the contents of a file into a string - static int convertToString(const char *filename, std::string& s) - { - size_t size; - char* str; - - std::fstream f(filename, (std::fstream::in | std::fstream::binary)); - if(f.is_open()) - { - size_t fileSize; - f.seekg(0, std::fstream::end); - size = fileSize = (size_t)f.tellg(); - f.seekg(0, std::fstream::beg); - - str = new char[size+1]; - if(!str) - { - f.close(); - return -1; - } - - f.read(str, fileSize); - f.close(); - str[size] = '\0'; - - s = str; - delete[] str; - return 0; - } - printf("Error: Failed to open file %s\n", filename); - return -1; - } - - double openCLExecuteKernelInterop(Context *clCxt , const char **fileName, const int numFiles, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth, const char *build_options, - bool finish, bool measureKernelTime, bool cleanUp) - - { - std::vector fsource; - for (int i = 0 ; i < numFiles ; i++) - { - std::string str; - if (convertToString(fileName[i], str) >= 0) - fsource.push_back(str); - } - const char **source = new const char *[numFiles]; - for (int i = 0 ; i < numFiles ; i++) - source[i] = fsource[i].c_str(); - double kernelTime = openCLExecuteKernelInterop(clCxt ,source, kernelName, globalThreads, localThreads, - args, channels, depth, build_options, finish, measureKernelTime, cleanUp); - fsource.clear(); - delete []source; - return kernelTime; - } - - cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, - const size_t size) - { - int status; - cl_mem con_struct; - - con_struct = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &status); - openCLSafeCall(status); - - openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size, - value, 0, 0, 0)); - - return con_struct; - - } - - /////////////////////////////OpenCL initialization///////////////// - auto_ptr Context::clCxt; - int Context::val = 0; - static Mutex cs; - static volatile int context_tear_down = 0; - - bool initialized() - { - return *((volatile int*)&Context::val) != 0 && - Context::clCxt->impl->clCmdQueue != NULL&& - Context::clCxt->impl->oclcontext != NULL; - } - - Context* Context::getContext() - { - if(*((volatile int*)&val) != 1) - { - AutoLock al(cs); - if(*((volatile int*)&val) != 1) - { - if (context_tear_down) - return clCxt.get(); - if( 0 == clCxt.get()) - clCxt.reset(new Context); - std::vector oclinfo; - CV_Assert(getDevice(oclinfo, CVCL_DEVICE_TYPE_ALL) > 0); - - *((volatile int*)&val) = 1; - } - } - return clCxt.get(); - } - - void Context::setContext(Info &oclinfo) - { - AutoLock guard(cs); - if(*((volatile int*)&val) != 1) - { - if( 0 == clCxt.get()) - clCxt.reset(new Context); - - clCxt.get()->impl = oclinfo.impl->copy(); - - *((volatile int*)&val) = 1; - } - else - { - clCxt.get()->impl->release(); - clCxt.get()->impl = oclinfo.impl->copy(); - } - } - - Context::Context() - { - impl = 0; - programCache = ProgramCache::getProgramCache(); - } - - Context::~Context() - { - release(); - } - - void Context::release() - { - if (impl) - impl->release(); - programCache->releaseProgram(); - } - - bool Context::supportsFeature(int ftype) const - { - switch(ftype) - { - case CL_DOUBLE: - return impl->double_support == 1; - case CL_UNIFIED_MEM: - return impl->unified_memory == 1; - case CL_VER_1_2: - return impl->clVersion.find("OpenCL 1.2") != string::npos; - default: - return false; - } - } - - size_t Context::computeUnits() const - { - return impl->maxComputeUnits; - } - - unsigned long queryLocalMemInfo() - { - Info::Impl* impl = Context::getContext()->impl; - cl_ulong local_memory_size = 0; - clGetDeviceInfo(impl->devices[impl->devnum], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), (void*)&local_memory_size, 0); - return local_memory_size; - } - - void* Context::oclContext() - { - return impl->oclcontext; - } - - void* Context::oclCommandQueue() - { - return impl->clCmdQueue; - } - - Info::Info() - { - impl = new Impl; - } - - void Info::release() - { - fft_teardown(); - clBlasTeardown(); - impl->release(); - impl = new Impl; - DeviceName.clear(); - } - - Info::~Info() - { - fft_teardown(); - clBlasTeardown(); - impl->release(); - } - - Info &Info::operator = (const Info &m) - { - impl->release(); - impl = m.impl->copy(); - DeviceName = m.DeviceName; - return *this; - } - - Info::Info(const Info &m) - { - impl = m.impl->copy(); - DeviceName = m.DeviceName; - } - }//namespace ocl - -}//namespace cv diff --git a/modules/ocl/src/knearest.cpp b/modules/ocl/src/knearest.cpp index fd9f2fed57..02dc72c4ea 100644 --- a/modules/ocl/src/knearest.cpp +++ b/modules/ocl/src/knearest.cpp @@ -44,17 +44,11 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" + using namespace cv; using namespace cv::ocl; -namespace cv -{ - namespace ocl - { - extern const char* knearest;//knearest - } -} - KNearestNeighbour::KNearestNeighbour() { clear(); @@ -112,7 +106,7 @@ void KNearestNeighbour::find_nearest(const oclMat& samples, int k, oclMat& lable k1 = MIN( k1, k ); String kernel_name = "knn_find_nearest"; - cl_ulong local_memory_size = queryLocalMemInfo(); + cl_ulong local_memory_size = (cl_ulong)Context::getContext()->getDeviceInfo().localMemorySize; int nThreads = local_memory_size / (2 * k * 4); if(nThreads >= 256) nThreads = 256; @@ -122,7 +116,7 @@ void KNearestNeighbour::find_nearest(const oclMat& samples, int k, oclMat& lable size_t global_thread[] = {1, samples.rows, 1}; char build_option[50]; - if(!Context::getContext()->supportsFeature(Context::CL_DOUBLE)) + if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { sprintf(build_option, " "); }else diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index 3ae14eb48d..d247a14794 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -134,7 +134,6 @@ void cv::ocl::oclMat::upload(const Mat &m) Size wholeSize; Point ofs; m.locateROI(wholeSize, ofs); - create(wholeSize, m.type()); if (m.channels() == 3) @@ -142,13 +141,12 @@ void cv::ocl::oclMat::upload(const Mat &m) int pitch = wholeSize.width * 3 * m.elemSize1(); int tail_padding = m.elemSize1() * 3072; int err; - cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, + cl_mem temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err); openCLVerifyCall(err); openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3); convert_C3C4(temp, *this); - openCLSafeCall(clReleaseMemObject(temp)); } else @@ -197,13 +195,12 @@ void cv::ocl::oclMat::download(cv::Mat &m) const int pitch = wholecols * 3 * m.elemSize1(); int tail_padding = m.elemSize1() * 3072; int err; - cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, + cl_mem temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, (pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err); openCLVerifyCall(err); convert_C4C3(*this, temp); openCLMemcpy2D(clCxt, m.data, m.step, temp, pitch, wholecols * m.elemSize(), wholerows, clMemcpyDeviceToHost, 3); - openCLSafeCall(clReleaseMemObject(temp)); } else @@ -319,7 +316,7 @@ static void convert_run(const oclMat &src, oclMat &dst, double alpha, double bet void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double beta ) const { - if (!clCxt->supportsFeature(Context::CL_DOUBLE) && + if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && (depth() == CV_64F || dst.depth() == CV_64F)) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); @@ -380,7 +377,7 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri #ifdef CL_VERSION_1_2 // this enables backwards portability to // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support - if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) && + if (Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2) && dst.offset == 0 && dst.cols == dst.wholecols) { const int sizeofMap[][7] = @@ -392,7 +389,7 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri }; int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()]; - clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(), + clEnqueueFillBuffer(getClCommandQueue(dst.clCxt), (cl_mem)dst.data, (void*)mat.data, sizeofGeneric, 0, dst.step * dst.rows, 0, NULL, NULL); } diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp index fc94e2f3d8..e4e2e918fb 100644 --- a/modules/ocl/src/mcwutil.cpp +++ b/modules/ocl/src/mcwutil.cpp @@ -101,15 +101,15 @@ namespace cv for(size_t i = 0; i < args.size(); i ++) openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); - openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL, globalThreads, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), kernel, 3, NULL, globalThreads, localThreads, 0, NULL, NULL)); switch(finish_mode) { case CLFINISH: - clFinish((cl_command_queue)clCxt->oclCommandQueue()); + clFinish(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr()); case CLFLUSH: - clFlush((cl_command_queue)clCxt->oclCommandQueue()); + clFlush(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr()); break; case DISABLE: default: @@ -178,7 +178,7 @@ namespace cv #ifdef CL_VERSION_1_2 //this enables backwards portability to //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support - if(Context::getContext()->supportsFeature(Context::CL_VER_1_2)) + if(Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2)) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; @@ -191,13 +191,13 @@ namespace cv desc.buffer = NULL; desc.num_mip_levels = 0; desc.num_samples = 0; - texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err); + texture = clCreateImage(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err); } else #endif { texture = clCreateImage2D( - (cl_context)mat.clCxt->oclContext(), + *(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, mat.cols, @@ -212,22 +212,22 @@ namespace cv cl_mem devData; if (mat.cols * mat.elemSize() != mat.step) { - devData = clCreateBuffer((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_ONLY, mat.cols * mat.rows + devData = clCreateBuffer(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_ONLY, mat.cols * mat.rows * mat.elemSize(), NULL, NULL); const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1}; - clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin, + clEnqueueCopyBufferRect(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), (cl_mem)mat.data, devData, origin, origin, regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL); - clFlush((cl_command_queue)mat.clCxt->oclCommandQueue()); + clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr()); } else { devData = (cl_mem)mat.data; } - clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0); + clEnqueueCopyBufferToImage(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), devData, texture, 0, origin, region, 0, NULL, 0); if ((mat.cols * mat.elemSize() != mat.step)) { - clFlush((cl_command_queue)mat.clCxt->oclCommandQueue()); + clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr()); clReleaseMemObject(devData); } @@ -259,7 +259,7 @@ namespace cv try { cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func"); - finish(); + cv::ocl::finish(); _support = true; } catch (const cv::Exception& e) diff --git a/modules/ocl/src/moments.cpp b/modules/ocl/src/moments.cpp index 926b94c9b3..24e8b3e0f6 100644 --- a/modules/ocl/src/moments.cpp +++ b/modules/ocl/src/moments.cpp @@ -106,7 +106,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom ) bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2; - if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE) && is_float) + if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE) && is_float) { CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!"); } @@ -146,7 +146,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom ) cv::Mat dst(dst_a); a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0; - if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE)) + if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { for (int i = 0; i < contour->total; ++i) { diff --git a/modules/ocl/src/pyrdown.cpp b/modules/ocl/src/pyrdown.cpp index 5043da05dc..89df73e9a8 100644 --- a/modules/ocl/src/pyrdown.cpp +++ b/modules/ocl/src/pyrdown.cpp @@ -15,8 +15,8 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Dachuan Zhao, dachuan@multicorewareinc.com -// Yao Wang, yao@multicorewareinc.com +// Dachuan Zhao, dachuan@multicorewareinc.com +// Yao Wang, yao@multicorewareinc.com // // // Redistribution and use in source and binary forms, with or without modification, diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp index cdcc8f231f..a69015d190 100644 --- a/modules/ocl/src/pyrlk.cpp +++ b/modules/ocl/src/pyrlk.cpp @@ -125,7 +125,7 @@ static void lkSparse_run(oclMat &I, oclMat &J, args.push_back( make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr )); - bool is_cpu = queryDeviceInfo(); + bool is_cpu = isCpuDevice(); if (is_cpu) { openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU"); @@ -139,7 +139,7 @@ static void lkSparse_run(oclMat &I, oclMat &J, stringstream idxStr; idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth(); cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str()); - int wave_size = queryDeviceInfo(kernel); + int wave_size = (int)queryWaveFrontSize(kernel); openCLSafeCall(clReleaseKernel(kernel)); static char opt[32] = {0}; diff --git a/modules/ocl/src/pyrup.cpp b/modules/ocl/src/pyrup.cpp index 043031072c..01df30c518 100644 --- a/modules/ocl/src/pyrup.cpp +++ b/modules/ocl/src/pyrup.cpp @@ -15,8 +15,8 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Zhang Chunpeng chunpeng@multicorewareinc.com -// Yao Wang, yao@multicorewareinc.com +// Zhang Chunpeng chunpeng@multicorewareinc.com +// Yao Wang, yao@multicorewareinc.com // // // Redistribution and use in source and binary forms, with or without modification, diff --git a/modules/ocl/src/split_merge.cpp b/modules/ocl/src/split_merge.cpp index 79bd0f0e21..fb8d05aaa7 100644 --- a/modules/ocl/src/split_merge.cpp +++ b/modules/ocl/src/split_merge.cpp @@ -75,7 +75,7 @@ namespace cv { static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst) { - if(!mat_dst.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_dst.type() == CV_64F) + if(!mat_dst.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_dst.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; @@ -170,7 +170,7 @@ namespace cv static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst) { - if(!mat_src.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_src.type() == CV_64F) + if(!mat_src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_src.type() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; diff --git a/modules/ocl/src/stereo_csbp.cpp b/modules/ocl/src/stereo_csbp.cpp index 9052dc82bd..c8334cca42 100644 --- a/modules/ocl/src/stereo_csbp.cpp +++ b/modules/ocl/src/stereo_csbp.cpp @@ -150,10 +150,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&rthis.min_disp_th)); openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&left.step)); openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&rthis.ndisp)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } @@ -200,9 +200,9 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int), (void *)&rthis.min_disp_th)); openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&cdisp_step1)); openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&msg_step)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 3, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } @@ -235,10 +235,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step)); openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step)); openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } static void get_first_initial_global_caller(uchar *data_cost_selected, uchar *disp_selected_pyr, @@ -270,10 +270,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step)); openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step)); openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } @@ -340,10 +340,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.max_data_term)); openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&left.step)); openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&rthis.min_disp_th)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } static void compute_data_cost_reduce_caller(uchar *disp_selected_pyr, uchar *data_cost, @@ -391,10 +391,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 17, sizeof(cl_float), (void *)&rthis.max_data_term)); openCLSafeCall(clSetKernelArg(kernel, 18, sizeof(cl_int), (void *)&left.step)); openCLSafeCall(clSetKernelArg(kernel, 19, sizeof(cl_int), (void *)&rthis.min_disp_th)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 3, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } static void compute_data_cost(uchar *disp_selected_pyr, uchar *data_cost, StereoConstantSpaceBP &rthis, @@ -458,10 +458,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 20, sizeof(cl_int), (void *)&disp_step2)); openCLSafeCall(clSetKernelArg(kernel, 21, sizeof(cl_int), (void *)&msg_step1)); openCLSafeCall(clSetKernelArg(kernel, 22, sizeof(cl_int), (void *)&msg_step2)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } //////////////////////////////////////////////////////////////////////////////////////////////// @@ -500,10 +500,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step)); openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&msg_step)); openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.disc_single_jump)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } static void calc_all_iterations(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected, @@ -552,10 +552,10 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&nr_plane)); openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&msg_step)); openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step)); - openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL, + openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - clFinish(*(cl_command_queue*)getoclCommandQueue()); + clFinish(*(cl_command_queue*)getClCommandQueuePtr()); openCLSafeCall(clReleaseKernel(kernel)); } } diff --git a/modules/ocl/src/stereobp.cpp b/modules/ocl/src/stereobp.cpp index fe9136057b..5bc93aa3f5 100644 --- a/modules/ocl/src/stereobp.cpp +++ b/modules/ocl/src/stereobp.cpp @@ -95,7 +95,10 @@ namespace cv con_struct -> cmax_disc_term = max_disc_term; con_struct -> cdisc_single_jump = disc_single_jump; - cl_con_struct = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()), (void *)con_struct, + Context* clCtx = Context::getContext(); + cl_context clContext = *(cl_context*)(clCtx->getOpenCLContextPtr()); + cl_command_queue clCmdQueue = *(cl_command_queue*)(clCtx->getOpenCLCommandQueuePtr()); + cl_con_struct = load_constant(clContext, clCmdQueue, (void *)con_struct, sizeof(con_struct_t)); delete con_struct; diff --git a/modules/ocl/src/tvl1flow.cpp b/modules/ocl/src/tvl1flow.cpp index 606ac530f7..c9a3f7abc1 100644 --- a/modules/ocl/src/tvl1flow.cpp +++ b/modules/ocl/src/tvl1flow.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Jin Ma, jin@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp index 594c196a59..8071102bad 100644 --- a/modules/ocl/test/main.cpp +++ b/modules/ocl/test/main.cpp @@ -80,18 +80,18 @@ int main(int argc, char **argv) const char *keys = "{ h | help | false | print help message }" "{ t | type | gpu | set device type:cpu or gpu}" - "{ p | platform | 0 | set platform id }" + "{ p | platform | -1 | set platform id }" "{ d | device | 0 | set device id }"; CommandLineParser cmd(argc, argv, keys); if (cmd.get("help")) { - cout << "Avaible options besides goole test option:" << endl; + cout << "Available options besides google test option:" << endl; cmd.printParams(); return 0; } string type = cmd.get("type"); - unsigned int pid = cmd.get("platform"); + int pid = cmd.get("platform"); int device = cmd.get("device"); print_info(); @@ -100,24 +100,29 @@ int main(int argc, char **argv) { flag = CVCL_DEVICE_TYPE_CPU; } - std::vector oclinfo; - int devnums = getDevice(oclinfo, flag); - if(devnums <= device || device < 0) + + cv::ocl::PlatformsInfo platformsInfo; + cv::ocl::getOpenCLPlatforms(platformsInfo); + if (pid >= (int)platformsInfo.size()) { - std::cout << "device invalid\n"; - return -1; - } - if(pid >= oclinfo.size()) - { - std::cout << "platform invalid\n"; - return -1; + std::cout << "platform is invalid\n"; + return 1; } - setDevice(oclinfo[pid], device); + cv::ocl::DevicesInfo devicesInfo; + int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); + if (device < 0 || device >= devnums) + { + std::cout << "device/platform invalid\n"; + return 1; + } + cv::ocl::setDevice(devicesInfo[device]); setBinaryDiskCache(CACHE_UPDATE); - cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl; + cout << "Device type: " << type << endl + << "Platform name: " << devicesInfo[device]->platform->platformName << endl + << "Device name: " << devicesInfo[device]->deviceName << endl; return RUN_ALL_TESTS(); } diff --git a/modules/superres/perf/perf_superres_ocl.cpp b/modules/superres/perf/perf_superres_ocl.cpp index 0b9864cbd3..822b87f441 100644 --- a/modules/superres/perf/perf_superres_ocl.cpp +++ b/modules/superres/perf/perf_superres_ocl.cpp @@ -107,9 +107,6 @@ PERF_TEST_P(Size_MatType, SuperResolution_BTVL1_OCL, Combine(Values(szSmall64, szSmall128), Values(MatType(CV_8UC1), MatType(CV_8UC3)))) { - std::vectorinfo; - cv::ocl::getDevice(info); - declare.time(5 * 60); const Size size = std::tr1::get<0>(GetParam()); diff --git a/modules/superres/src/btv_l1_ocl.cpp b/modules/superres/src/btv_l1_ocl.cpp index 2f27d50259..5aecca0630 100644 --- a/modules/superres/src/btv_l1_ocl.cpp +++ b/modules/superres/src/btv_l1_ocl.cpp @@ -232,7 +232,7 @@ void btv_l1_device_ocl::calcBtvRegularization(const oclMat& src, oclMat& dst, in cl_mem c_btvRegWeights; size_t count = btvWeights_size * sizeof(float); c_btvRegWeights = openCLCreateBuffer(clCxt, CL_MEM_READ_ONLY, count); - int cl_safe_check = clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), c_btvRegWeights, 1, 0, count, btvWeights_, 0, NULL, NULL); + int cl_safe_check = clEnqueueWriteBuffer(getClCommandQueue(clCxt), c_btvRegWeights, 1, 0, count, btvWeights_, 0, NULL, NULL); CV_Assert(cl_safe_check == CL_SUCCESS); args.push_back(make_pair(sizeof(cl_mem), (void*)&src_.data)); diff --git a/modules/superres/test/test_superres.cpp b/modules/superres/test/test_superres.cpp index 1530d6d667..5cb078f77c 100644 --- a/modules/superres/test/test_superres.cpp +++ b/modules/superres/test/test_superres.cpp @@ -278,8 +278,6 @@ TEST_F(SuperResolution, BTVL1_GPU) #if defined(HAVE_OPENCV_OCL) && defined(HAVE_OPENCL) TEST_F(SuperResolution, BTVL1_OCL) { - std::vector infos; - cv::ocl::getDevice(infos); RunTest(cv::superres::createSuperResolution_BTVL1_OCL()); } #endif From b00f79ac5f8e8e876d9e1969b2e5f5d04c828090 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 27 Sep 2013 16:41:25 +0400 Subject: [PATCH 2/8] ocl: move program names into opencl_kernels.hpp --- cmake/OpenCVModule.cmake | 8 +- cmake/cl2cpp.cmake | 25 ++- modules/nonfree/src/surf.ocl.cpp | 167 ++++++++++----------- modules/ocl/src/bgfg_mog.cpp | 5 +- modules/ocl/src/blend.cpp | 12 +- modules/ocl/src/brute_force_matcher.cpp | 14 +- modules/ocl/src/build_warps.cpp | 11 +- modules/ocl/src/canny.cpp | 11 +- modules/ocl/src/color.cpp | 9 +- modules/ocl/src/columnsum.cpp | 11 +- modules/ocl/src/fft.cpp | 2 - modules/ocl/src/filtering.cpp | 17 +-- modules/ocl/src/gemm.cpp | 1 - modules/ocl/src/gftt.cpp | 11 +- modules/ocl/src/haar.cpp | 16 +- modules/ocl/src/hog.cpp | 12 +- modules/ocl/src/imgproc.cpp | 21 +-- modules/ocl/src/interpolate_frames.cpp | 5 +- modules/ocl/src/kalman.cpp | 3 +- modules/ocl/src/kmeans.cpp | 13 +- modules/ocl/src/match_template.cpp | 13 +- modules/ocl/src/matrix_operations.cpp | 17 +-- modules/ocl/src/moments.cpp | 5 +- modules/ocl/src/mssegmentation.cpp | 4 +- modules/ocl/src/optical_flow_farneback.cpp | 11 +- modules/ocl/src/pyrdown.cpp | 15 +- modules/ocl/src/pyrlk.cpp | 11 +- modules/ocl/src/pyrup.cpp | 10 +- modules/ocl/src/sort_by_key.cpp | 10 +- modules/ocl/src/split_merge.cpp | 20 +-- modules/ocl/src/stereo_csbp.cpp | 44 +----- modules/ocl/src/stereobm.cpp | 14 +- modules/ocl/src/stereobp.cpp | 18 +-- modules/ocl/src/tvl1flow.cpp | 13 +- modules/superres/src/btv_l1_ocl.cpp | 3 +- 35 files changed, 154 insertions(+), 428 deletions(-) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 1d87bc1b88..4ed8cf0c0f 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -450,11 +450,11 @@ macro(ocv_glob_module_sources) if(HAVE_OPENCL AND cl_kernels) ocv_include_directories(${OPENCL_INCLUDE_DIRS}) add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp" - COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake" + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp" + COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake" DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake") - source_group("Src\\OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp") - list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/kernels.cpp") + source_group("OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp") + list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp") endif() source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs}) diff --git a/cmake/cl2cpp.cmake b/cmake/cl2cpp.cmake index 1e932eabdb..0733a42441 100644 --- a/cmake/cl2cpp.cmake +++ b/cmake/cl2cpp.cmake @@ -1,6 +1,20 @@ file(GLOB cl_list "${CL_DIR}/*.cl" ) +list(SORT cl_list) -file(WRITE ${OUTPUT} "// This file is auto-generated. Do not edit! +string(REPLACE ".cpp" ".hpp" OUTPUT_HPP "${OUTPUT}") +get_filename_component(OUTPUT_HPP_NAME "${OUTPUT_HPP}" NAME) + +set(STR_CPP "// This file is auto-generated. Do not edit! + +#include \"${OUTPUT_HPP_NAME}\" + +namespace cv +{ +namespace ocl +{ +") + +set(STR_HPP "// This file is auto-generated. Do not edit! namespace cv { @@ -29,7 +43,12 @@ foreach(cl ${cl_list}) string(REGEX REPLACE "\"$" "" lines "${lines}") # unneeded " at the eof - file(APPEND ${OUTPUT} "const char* ${cl_filename}=\"${lines};\n") + set(STR_CPP "${STR_CPP}const char* ${cl_filename}=\"${lines};\n") + set(STR_HPP "${STR_HPP}extern const char* ${cl_filename};\n") endforeach() -file(APPEND ${OUTPUT} "}\n}\n") +set(STR_CPP "${STR_CPP}}\n}\n") +set(STR_HPP "${STR_HPP}}\n}\n") + +file(WRITE ${OUTPUT} "${STR_CPP}") +file(WRITE ${OUTPUT_HPP} "${STR_HPP}") diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp index 59eab705d6..d6f72bc7ad 100644 --- a/modules/nonfree/src/surf.ocl.cpp +++ b/modules/nonfree/src/surf.ocl.cpp @@ -43,27 +43,24 @@ // //M*/ #include "precomp.hpp" -#include #ifdef HAVE_OPENCV_OCL +#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; namespace cv { namespace ocl { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *surf; - const char noImage2dOption [] = "-D DISABLE_IMAGE2D"; static bool use_image2d = false; static void openCLExecuteKernelSURF(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], - size_t localThreads[3], vector< pair > &args, int channels, int depth) + size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth) { char optBuf [100] = {0}; char * optBufPtr = optBuf; @@ -486,26 +483,26 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i Context *clCxt = det.clCxt; string kernelName = "icvCalcLayerDetAndTrace"; - vector< pair > args; + std::vector< std::pair > args; if(sumTex) { - args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&sumTex)); } else { - args.push_back( make_pair( sizeof(cl_mem), (void *)&surf_.sum.data)); // if image2d is not supported + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&surf_.sum.data)); // if image2d is not supported } - args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&trace.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&det.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&trace.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&nOctaveLayers)); - args.push_back( make_pair( sizeof(cl_int), (void *)&octave)); - args.push_back( make_pair( sizeof(cl_int), (void *)&c_layer_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&surf_.sum.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&det.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trace.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&det.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&trace.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&nOctaveLayers)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&c_layer_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&surf_.sum.step)); size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3] = @@ -524,35 +521,35 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat Context *clCxt = det.clCxt; string kernelName = useMask ? "icvFindMaximaInLayer_withmask" : "icvFindMaximaInLayer"; - vector< pair > args; + std::vector< std::pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&trace.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&maxCounter.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&counterOffset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&det.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&trace.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&nLayers)); - args.push_back( make_pair( sizeof(cl_int), (void *)&octave)); - args.push_back( make_pair( sizeof(cl_int), (void *)&layer_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&layer_cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&maxCandidates)); - args.push_back( make_pair( sizeof(cl_float), (void *)&surf_.hessianThreshold)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&det.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&trace.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maxCounter.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&counterOffset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&det.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&trace.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&nLayers)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&layer_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&layer_cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxCandidates)); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&surf_.hessianThreshold)); if(useMask) { if(maskSumTex) { - args.push_back( make_pair( sizeof(cl_mem), (void *)&maskSumTex)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maskSumTex)); } else { - args.push_back( make_pair( sizeof(cl_mem), (void *)&surf_.maskSum.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&surf_.maskSum.data)); } - args.push_back( make_pair( sizeof(cl_mem), (void *)&surf_.maskSum.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&surf_.maskSum.step)); } size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) *localThreads[0], @@ -568,19 +565,19 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMa { Context *clCxt = det.clCxt; string kernelName = "icvInterpolateKeypoint"; - vector< pair > args; + std::vector< std::pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&counters_.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&det.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&octave)); - args.push_back( make_pair( sizeof(cl_int), (void *)&layer_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&max_features)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&det.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counters_.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&det.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&layer_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&max_features)); size_t localThreads[3] = {3, 3, 3}; size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1}; @@ -593,21 +590,21 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat Context *clCxt = counters.clCxt; string kernelName = "icvCalcOrientation"; - vector< pair > args; + std::vector< std::pair > args; if(sumTex) { - args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&sumTex)); } else { - args.push_back( make_pair( sizeof(cl_mem), (void *)&surf_.sum.data)); // if image2d is not supported + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&surf_.sum.data)); // if image2d is not supported } - args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&surf_.sum.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&surf_.sum.step)); size_t localThreads[3] = {32, 4, 1}; size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1}; @@ -620,11 +617,11 @@ void SURF_OCL_Invoker::icvSetUpright_gpu(const oclMat &keypoints, int nFeatures) Context *clCxt = counters.clCxt; string kernelName = "icvSetUpright"; - vector< pair > args; + std::vector< std::pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&nFeatures)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&nFeatures)); size_t localThreads[3] = {256, 1, 1}; size_t globalThreads[3] = {saturate_cast(nFeatures), 1, 1}; @@ -638,7 +635,7 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const // compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D Context *clCxt = descriptors.clCxt; string kernelName; - vector< pair > args; + std::vector< std::pair > args; size_t localThreads[3] = {1, 1, 1}; size_t globalThreads[3] = {1, 1, 1}; @@ -655,19 +652,19 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const args.clear(); if(imgTex) { - args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&imgTex)); } else { - args.push_back( make_pair( sizeof(cl_mem), (void *)&_img.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&_img.data)); } - args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&_img.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&_img.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&_img.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step)); openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); @@ -680,8 +677,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const globalThreads[1] = localThreads[1]; args.clear(); - args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step)); openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); } @@ -698,19 +695,19 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const args.clear(); if(imgTex) { - args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&imgTex)); } else { - args.push_back( make_pair( sizeof(cl_mem), (void *)&_img.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&_img.data)); } - args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&_img.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&_img.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&_img.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypoints.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&_img.step)); openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); @@ -723,8 +720,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const globalThreads[1] = localThreads[1]; args.clear(); - args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&descriptors.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&descriptors.step)); openCLExecuteKernelSURF(clCxt, &surf, kernelName, globalThreads, localThreads, args, -1, -1); } diff --git a/modules/ocl/src/bgfg_mog.cpp b/modules/ocl/src/bgfg_mog.cpp index cb0dee80f8..064fef8d30 100644 --- a/modules/ocl/src/bgfg_mog.cpp +++ b/modules/ocl/src/bgfg_mog.cpp @@ -44,14 +44,15 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" + using namespace cv; using namespace cv::ocl; + namespace cv { namespace ocl { - extern const char* bgfg_mog; - typedef struct _contant_struct { cl_float c_Tb; diff --git a/modules/ocl/src/blend.cpp b/modules/ocl/src/blend.cpp index ec73c8662c..58b91d8c3f 100644 --- a/modules/ocl/src/blend.cpp +++ b/modules/ocl/src/blend.cpp @@ -44,20 +44,10 @@ //M*/ #include "precomp.hpp" -#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ - namespace ocl - { - ////////////////////////////////////OpenCL kernel strings////////////////////////// - extern const char *blend_linear; - } -} void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result) diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp index 0273ed5891..c348db8f30 100644 --- a/modules/ocl/src/brute_force_matcher.cpp +++ b/modules/ocl/src/brute_force_matcher.cpp @@ -45,22 +45,10 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" -#include -#include -#include using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ - namespace ocl - { - ////////////////////////////////////OpenCL kernel strings////////////////////////// - extern const char *brute_force_match; - } -} static const int OPT_SIZE = 100; diff --git a/modules/ocl/src/build_warps.cpp b/modules/ocl/src/build_warps.cpp index c4a092993a..4c400a2b68 100644 --- a/modules/ocl/src/build_warps.cpp +++ b/modules/ocl/src/build_warps.cpp @@ -44,19 +44,10 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *build_warps; - } -} ////////////////////////////////////////////////////////////////////////////// // buildWarpPlaneMaps diff --git a/modules/ocl/src/canny.cpp b/modules/ocl/src/canny.cpp index a25c1973ef..9fc6f65b44 100644 --- a/modules/ocl/src/canny.cpp +++ b/modules/ocl/src/canny.cpp @@ -44,19 +44,10 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *imgproc_canny; - } -} cv::ocl::CannyBuf::CannyBuf(const oclMat &dx_, const oclMat &dy_) : dx(dx_), dy(dy_), counter(NULL) { diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp index 493dbc33c4..443065077c 100644 --- a/modules/ocl/src/color.cpp +++ b/modules/ocl/src/color.cpp @@ -45,6 +45,7 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; @@ -57,14 +58,6 @@ using namespace cv::ocl; #define FLT_EPSILON 1.192092896e-07F #endif -namespace cv -{ -namespace ocl -{ -extern const char *cvt_color; -} -} - namespace { void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx) diff --git a/modules/ocl/src/columnsum.cpp b/modules/ocl/src/columnsum.cpp index 1d6939f4e1..46ff73d224 100644 --- a/modules/ocl/src/columnsum.cpp +++ b/modules/ocl/src/columnsum.cpp @@ -43,20 +43,11 @@ // //M*/ -#include #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ - namespace ocl - { - extern const char *imgproc_columnsum; - } -} void cv::ocl::columnSum(const oclMat &src, oclMat &dst) { diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp index c0785ac9d8..e39a4443c4 100644 --- a/modules/ocl/src/fft.cpp +++ b/modules/ocl/src/fft.cpp @@ -42,12 +42,10 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ -#include #include "precomp.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; #if !defined HAVE_CLAMDFFT void cv::ocl::dft(const oclMat&, oclMat&, Size, int) diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index caaf53d849..758923f55c 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -48,26 +48,11 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" -using namespace std; using namespace cv; using namespace cv::ocl; -//helper routines -namespace cv -{ -namespace ocl -{ -///////////////////////////OpenCL kernel strings/////////////////////////// -extern const char *filtering_boxFilter; -extern const char *filter_sep_row; -extern const char *filter_sep_col; -extern const char *filtering_laplacian; -extern const char *filtering_morph; -extern const char *filtering_adaptive_bilateral; -} -} - namespace { inline void normalizeAnchor(int &anchor, int ksize) diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp index 687f26f632..837fd1fa30 100644 --- a/modules/ocl/src/gemm.cpp +++ b/modules/ocl/src/gemm.cpp @@ -43,7 +43,6 @@ // //M*/ -#include #include "precomp.hpp" namespace cv { namespace ocl { diff --git a/modules/ocl/src/gftt.cpp b/modules/ocl/src/gftt.cpp index 29a96ae658..e24c0a5856 100644 --- a/modules/ocl/src/gftt.cpp +++ b/modules/ocl/src/gftt.cpp @@ -42,23 +42,14 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ -#include #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; static bool use_cpu_sorter = true; -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *imgproc_gftt; - } -} - namespace { enum SortMethod diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index e3e73b3c3d..aac3785e79 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -49,24 +49,10 @@ //M*/ #include "precomp.hpp" -#include -#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - - -namespace cv -{ -namespace ocl -{ -///////////////////////////OpenCL kernel strings/////////////////////////// -extern const char *haarobjectdetect; -extern const char *haarobjectdetectbackup; -extern const char *haarobjectdetect_scaled2; -} -} /* these settings affect the quality of detection: change with care */ #define CV_ADJUST_FEATURES 1 diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp index 563172bc13..2d2de9a2be 100644 --- a/modules/ocl/src/hog.cpp +++ b/modules/ocl/src/hog.cpp @@ -44,9 +44,10 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" + using namespace cv; using namespace cv::ocl; -using namespace std; #define CELL_WIDTH 8 #define CELL_HEIGHT 8 @@ -57,15 +58,6 @@ using namespace std; static oclMat gauss_w_lut; static bool hog_device_cpu; -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *objdetect_hog; - } -} - namespace cv { namespace ocl diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 0949605e15..b4d2b70a0d 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -54,34 +54,15 @@ //M*/ #include "precomp.hpp" -#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; namespace cv { namespace ocl { - - ////////////////////////////////////OpenCL kernel strings////////////////////////// - extern const char *meanShift; - extern const char *imgproc_copymakeboder; - extern const char *imgproc_median; - extern const char *imgproc_threshold; - extern const char *imgproc_resize; - extern const char *imgproc_remap; - extern const char *imgproc_warpAffine; - extern const char *imgproc_warpPerspective; - extern const char *imgproc_integral_sum; - extern const char *imgproc_integral; - extern const char *imgproc_histogram; - extern const char *imgproc_bilateral; - extern const char *imgproc_calcHarris; - extern const char *imgproc_calcMinEigenVal; - extern const char *imgproc_convolve; - extern const char *imgproc_clahe; ////////////////////////////////////OpenCL call wrappers//////////////////////////// template struct index_and_sizeof; diff --git a/modules/ocl/src/interpolate_frames.cpp b/modules/ocl/src/interpolate_frames.cpp index 43b766054f..54063cd7f7 100644 --- a/modules/ocl/src/interpolate_frames.cpp +++ b/modules/ocl/src/interpolate_frames.cpp @@ -44,8 +44,8 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" -using namespace std; using namespace cv; using namespace cv::ocl; @@ -53,9 +53,6 @@ namespace cv { namespace ocl { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *interpolate_frames; - namespace interpolate { //The following are ported from NPP_staging.cu diff --git a/modules/ocl/src/kalman.cpp b/modules/ocl/src/kalman.cpp index 8a5b0d4c2c..6f8243457c 100644 --- a/modules/ocl/src/kalman.cpp +++ b/modules/ocl/src/kalman.cpp @@ -44,7 +44,6 @@ //M*/ #include "precomp.hpp" -using namespace std; using namespace cv; using namespace cv::ocl; @@ -132,4 +131,4 @@ CV_EXPORTS const oclMat& KalmanFilter::correct(const oclMat& measurement) gemm(gain, temp5, 1, statePre, 1, statePost); gemm(gain, temp2, -1, errorCovPre, 1, errorCovPost); return statePost; -} \ No newline at end of file +} diff --git a/modules/ocl/src/kmeans.cpp b/modules/ocl/src/kmeans.cpp index 4de42fce55..06ed8b36ab 100644 --- a/modules/ocl/src/kmeans.cpp +++ b/modules/ocl/src/kmeans.cpp @@ -43,20 +43,11 @@ // //M*/ -#include #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; -using namespace ocl; - -namespace cv -{ -namespace ocl -{ -////////////////////////////////////OpenCL kernel strings////////////////////////// -extern const char *kmeans_kernel; -} -} +using namespace cv::ocl; static void generateRandomCenter(const vector& box, float* center, RNG& rng) { diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp index 7c0a7ac5db..ba84043fc7 100644 --- a/modules/ocl/src/match_template.cpp +++ b/modules/ocl/src/match_template.cpp @@ -44,22 +44,11 @@ //M*/ -#include #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -//helper routines -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *match_template; - } -} namespace cv { diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index d247a14794..80b2f7d81c 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -46,30 +46,19 @@ //M*/ #include "precomp.hpp" - -#define ALIGN 32 -#define GPU_MATRIX_MALLOC_STEP(step) (((step) + ALIGN - 1) / ALIGN) * ALIGN +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; -//////////////////////////////////////////////////////////////////////// -//////////////////////////////// oclMat //////////////////////////////// -//////////////////////////////////////////////////////////////////////// +#define ALIGN 32 +#define GPU_MATRIX_MALLOC_STEP(step) (((step) + ALIGN - 1) / ALIGN) * ALIGN // helper routines namespace cv { namespace ocl { - /////////////////////////// OpenCL kernel strings /////////////////////////// - - extern const char *operator_copyToM; - extern const char *operator_convertTo; - extern const char *operator_setTo; - extern const char *operator_setToM; - extern const char *convertC3C4; extern DevMemType gDeviceMemType; extern DevMemRW gDeviceMemRW; } diff --git a/modules/ocl/src/moments.cpp b/modules/ocl/src/moments.cpp index 24e8b3e0f6..377af28634 100644 --- a/modules/ocl/src/moments.cpp +++ b/modules/ocl/src/moments.cpp @@ -44,13 +44,12 @@ // //M*/ #include "precomp.hpp" -#include +#include "opencl_kernels.hpp" + namespace cv { namespace ocl { -extern const char *moments; - // The function calculates center of gravity and the central second order moments static void icvCompleteMomentState( CvMoments* moments ) { diff --git a/modules/ocl/src/mssegmentation.cpp b/modules/ocl/src/mssegmentation.cpp index 300265bc2e..248f134705 100644 --- a/modules/ocl/src/mssegmentation.cpp +++ b/modules/ocl/src/mssegmentation.cpp @@ -43,8 +43,10 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" -using namespace std; +using namespace cv; +using namespace cv::ocl; // Auxiliray stuff namespace diff --git a/modules/ocl/src/optical_flow_farneback.cpp b/modules/ocl/src/optical_flow_farneback.cpp index 618a637f09..05a850bd17 100644 --- a/modules/ocl/src/optical_flow_farneback.cpp +++ b/modules/ocl/src/optical_flow_farneback.cpp @@ -45,23 +45,14 @@ #include "precomp.hpp" +#include "opencl_kernels.hpp" #include "opencv2/video/tracking.hpp" -using namespace std; using namespace cv; using namespace cv::ocl; #define MIN_SIZE 32 -namespace cv -{ -namespace ocl -{ -///////////////////////////OpenCL kernel strings/////////////////////////// -extern const char *optical_flow_farneback; -} -} - namespace cv { namespace ocl { namespace optflow_farneback diff --git a/modules/ocl/src/pyrdown.cpp b/modules/ocl/src/pyrdown.cpp index 89df73e9a8..6071fc5987 100644 --- a/modules/ocl/src/pyrdown.cpp +++ b/modules/ocl/src/pyrdown.cpp @@ -45,23 +45,10 @@ // //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -using std::cout; -using std::endl; - -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *pyr_down; - - } -} ////////////////////////////////////////////////////////////////////////////// /////////////////////// add subtract multiply divide ///////////////////////// diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp index a69015d190..8e8692e77f 100644 --- a/modules/ocl/src/pyrlk.cpp +++ b/modules/ocl/src/pyrlk.cpp @@ -45,21 +45,12 @@ // //M*/ - #include "precomp.hpp" +#include "opencl_kernels.hpp" -using namespace std; using namespace cv; using namespace cv::ocl; -namespace cv -{ -namespace ocl -{ -extern const char *pyrlk; -extern const char *pyrlk_no_image; -} -} struct dim3 { unsigned int x, y, z; diff --git a/modules/ocl/src/pyrup.cpp b/modules/ocl/src/pyrup.cpp index 01df30c518..025348194d 100644 --- a/modules/ocl/src/pyrup.cpp +++ b/modules/ocl/src/pyrup.cpp @@ -45,21 +45,19 @@ // //M*/ -/* Haar features calculation */ -//#define EMU - #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; + +/* Haar features calculation */ +//#define EMU namespace cv { namespace ocl { - extern const char *pyr_up; - void pyrUp(const cv::ocl::oclMat &src, cv::ocl::oclMat &dst) { int depth = src.depth(), channels = src.channels(), oclChannels = src.oclchannels(); diff --git a/modules/ocl/src/sort_by_key.cpp b/modules/ocl/src/sort_by_key.cpp index 0025f0d911..c2ab00452c 100644 --- a/modules/ocl/src/sort_by_key.cpp +++ b/modules/ocl/src/sort_by_key.cpp @@ -43,18 +43,16 @@ // //M*/ -#include #include "precomp.hpp" +#include "opencl_kernels.hpp" + +using namespace cv; +using namespace cv::ocl; namespace cv { namespace ocl { - -extern const char * kernel_sort_by_key; -extern const char * kernel_stablesort_by_key; -extern const char * kernel_radix_sort_by_key; - void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, int method, bool isGreaterThan); //TODO(pengx17): change this value depending on device other than a constant diff --git a/modules/ocl/src/split_merge.cpp b/modules/ocl/src/split_merge.cpp index fb8d05aaa7..9c9639fd42 100644 --- a/modules/ocl/src/split_merge.cpp +++ b/modules/ocl/src/split_merge.cpp @@ -44,29 +44,11 @@ //M*/ #include "precomp.hpp" -#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -using std::cout; -using std::endl; - -//////////////////////////////////////////////////////////////////////// -///////////////// oclMat merge and split /////////////////////////////// -//////////////////////////////////////////////////////////////////////// - -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *merge_mat; - extern const char *split_mat; - } -} namespace cv { namespace ocl diff --git a/modules/ocl/src/stereo_csbp.cpp b/modules/ocl/src/stereo_csbp.cpp index c8334cca42..f9e86442bb 100644 --- a/modules/ocl/src/stereo_csbp.cpp +++ b/modules/ocl/src/stereo_csbp.cpp @@ -45,51 +45,11 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; -#if !defined (HAVE_OPENCL) - -namespace cv -{ - namespace ocl - { - - void cv::ocl::StereoConstantSpaceBP::estimateRecommendedParams(int, int, int &, int &, int &, int &) - { - throw_nogpu(); - } - cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, int) - { - throw_nogpu(); - } - cv::ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, float, float, - float, float, int, int) - { - throw_nogpu(); - } - - void cv::ocl::StereoConstantSpaceBP::operator()(const oclMat &, const oclMat &, oclMat &) - { - throw_nogpu(); - } - } -} - -#else /* !defined (HAVE_OPENCL) */ - -namespace cv -{ - namespace ocl - { - - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *stereocsbp; - } - -} namespace cv { namespace ocl @@ -755,5 +715,3 @@ void cv::ocl::StereoConstantSpaceBP::operator()(const oclMat &left, const oclMat operators[msg_type](*this, u, d, l, r, disp_selected_pyr, data_cost, data_cost_selected, temp, out, left, right, disp); } - -#endif /* !defined (HAVE_OPENCL) */ diff --git a/modules/ocl/src/stereobm.cpp b/modules/ocl/src/stereobm.cpp index 8195346c00..716a2f1613 100644 --- a/modules/ocl/src/stereobm.cpp +++ b/modules/ocl/src/stereobm.cpp @@ -46,23 +46,11 @@ //M*/ #include "precomp.hpp" -#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -namespace cv -{ -namespace ocl -{ - -///////////////////////////OpenCL kernel strings/////////////////////////// -extern const char *stereobm; - -} -} namespace cv { namespace ocl diff --git a/modules/ocl/src/stereobp.cpp b/modules/ocl/src/stereobp.cpp index 5bc93aa3f5..ef7fff4359 100644 --- a/modules/ocl/src/stereobp.cpp +++ b/modules/ocl/src/stereobp.cpp @@ -45,27 +45,11 @@ //M*/ #include "precomp.hpp" -#include -#include +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; -//////////////////////////////////////////////////////////////////////// -///////////////// stereoBP ///////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////// - -namespace cv -{ - namespace ocl - { - - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *stereobp; - } - -} namespace cv { namespace ocl diff --git a/modules/ocl/src/tvl1flow.cpp b/modules/ocl/src/tvl1flow.cpp index c9a3f7abc1..bbeecb4748 100644 --- a/modules/ocl/src/tvl1flow.cpp +++ b/modules/ocl/src/tvl1flow.cpp @@ -42,21 +42,12 @@ // //M*/ - #include "precomp.hpp" -using namespace std; +#include "opencl_kernels.hpp" + using namespace cv; using namespace cv::ocl; -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char* tvl1flow; - } -} - cv::ocl::OpticalFlowDual_TVL1_OCL::OpticalFlowDual_TVL1_OCL() { tau = 0.25; diff --git a/modules/superres/src/btv_l1_ocl.cpp b/modules/superres/src/btv_l1_ocl.cpp index 5aecca0630..432d2368a3 100644 --- a/modules/superres/src/btv_l1_ocl.cpp +++ b/modules/superres/src/btv_l1_ocl.cpp @@ -56,6 +56,7 @@ cv::Ptr cv::superres::createSuperResolution_BTVL1 } #else +#include "opencl_kernels.hpp" using namespace std; using namespace cv; @@ -67,8 +68,6 @@ namespace cv { namespace ocl { - extern const char* superres_btvl1; - float* btvWeights_ = NULL; size_t btvWeights_size = 0; } From dd9ff587dca19807c43e9c16ffb80bb072a71e35 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 25 Sep 2013 19:07:14 +0400 Subject: [PATCH 3/8] ocl: file-based ProgramCache refactoring --- cmake/OpenCVModule.cmake | 3 +- cmake/cl2cpp.cmake | 19 +- modules/nonfree/src/surf.ocl.cpp | 4 +- modules/ocl/include/opencv2/ocl/ocl.hpp | 21 +- .../ocl/include/opencv2/ocl/private/util.hpp | 76 ++- modules/ocl/perf/main.cpp | 1 - modules/ocl/src/brute_force_matcher.cpp | 4 + modules/ocl/src/cl_context.cpp | 57 +- modules/ocl/src/cl_operations.cpp | 47 +- modules/ocl/src/cl_programcache.cpp | 512 ++++++++++++------ ...{binarycaching.hpp => cl_programcache.hpp} | 47 +- modules/ocl/src/imgproc.cpp | 12 +- modules/ocl/src/mcwutil.cpp | 10 +- modules/ocl/src/moments.cpp | 2 +- modules/ocl/test/main.cpp | 1 - 15 files changed, 529 insertions(+), 287 deletions(-) rename modules/ocl/src/{binarycaching.hpp => cl_programcache.hpp} (71%) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 4ed8cf0c0f..cc17f5b244 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -445,6 +445,8 @@ macro(ocv_glob_module_sources) source_group("Src\\Cuda" FILES ${lib_cuda_srcs} ${lib_cuda_hdrs}) endif() + source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs}) + file(GLOB cl_kernels "src/opencl/*.cl") if(HAVE_OPENCL AND cl_kernels) @@ -457,7 +459,6 @@ macro(ocv_glob_module_sources) list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp") endif() - source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs}) source_group("Include" FILES ${lib_hdrs}) source_group("Include\\detail" FILES ${lib_hdrs_detail}) diff --git a/cmake/cl2cpp.cmake b/cmake/cl2cpp.cmake index 0733a42441..825172b73c 100644 --- a/cmake/cl2cpp.cmake +++ b/cmake/cl2cpp.cmake @@ -20,6 +20,7 @@ namespace cv { namespace ocl { + ") foreach(cl ${cl_list}) @@ -43,12 +44,22 @@ foreach(cl ${cl_list}) string(REGEX REPLACE "\"$" "" lines "${lines}") # unneeded " at the eof - set(STR_CPP "${STR_CPP}const char* ${cl_filename}=\"${lines};\n") - set(STR_HPP "${STR_HPP}extern const char* ${cl_filename};\n") + string(MD5 hash "${lines}") + + set(STR_CPP "${STR_CPP}const struct ProgramEntry ${cl_filename}={\"${cl_filename}\",\n\"${lines}, \"${hash}\"};\n") + set(STR_HPP "${STR_HPP}extern const struct ProgramEntry ${cl_filename};\n") endforeach() set(STR_CPP "${STR_CPP}}\n}\n") set(STR_HPP "${STR_HPP}}\n}\n") -file(WRITE ${OUTPUT} "${STR_CPP}") -file(WRITE ${OUTPUT_HPP} "${STR_HPP}") +file(WRITE "${OUTPUT}" "${STR_CPP}") + +if(EXISTS "${OUTPUT_HPP}") + file(READ "${OUTPUT_HPP}" hpp_lines) +endif() +if("${hpp_lines}" STREQUAL "${STR_HPP}") + message(STATUS "${OUTPUT_HPP} contains same content") +else() + file(WRITE "${OUTPUT_HPP}" "${STR_HPP}") +endif() diff --git a/modules/nonfree/src/surf.ocl.cpp b/modules/nonfree/src/surf.ocl.cpp index d6f72bc7ad..3d5cb4e083 100644 --- a/modules/nonfree/src/surf.ocl.cpp +++ b/modules/nonfree/src/surf.ocl.cpp @@ -55,11 +55,11 @@ namespace cv { namespace ocl { - const char noImage2dOption [] = "-D DISABLE_IMAGE2D"; + static const char noImage2dOption[] = "-D DISABLE_IMAGE2D"; static bool use_image2d = false; - static void openCLExecuteKernelSURF(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], + static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth) { char optBuf [100] = {0}; diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 21bb607471..aece2e1427 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -199,24 +199,6 @@ namespace cv void CV_EXPORTS finish(); - //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. - CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt , - const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - std::vector< std::pair > &args, - int channels, int depth, const char *build_options, - bool finish = true, bool measureKernelTime = false, - bool cleanUp = true); - - //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. - CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt , - const char **fileName, const int numFiles, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - std::vector< std::pair > &args, - int channels, int depth, const char *build_options, - bool finish = true, bool measureKernelTime = false, - bool cleanUp = true); - //! Enable or disable OpenCL program binary caching onto local disk // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the // compiled OpenCL program to be cached to the path automatically as "path/*.clb" @@ -233,12 +215,11 @@ namespace cv CACHE_DEBUG = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC) CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC) CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary - CACHE_UPDATE = 0x1 << 2 // if the binary cache file with the same name is already on the disk, it will be updated. }; CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./"); //! set where binary cache to be saved to - CV_EXPORTS void setBinpath(const char *path); + CV_EXPORTS void setBinaryPath(const char *path); class CV_EXPORTS oclMatExpr; //////////////////////////////// oclMat //////////////////////////////// diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp index 2aba472f66..30288a6cff 100644 --- a/modules/ocl/include/opencv2/ocl/private/util.hpp +++ b/modules/ocl/include/opencv2/ocl/private/util.hpp @@ -55,6 +55,13 @@ namespace cv namespace ocl { +struct ProgramEntry +{ + const char* name; + const char* programStr; + const char* programHash; +}; + inline cl_device_id getClDeviceID(const Context *ctx) { return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr()); @@ -78,41 +85,39 @@ enum openCLMemcpyKind }; ///////////////////////////OpenCL call wrappers//////////////////////////// void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch, - size_t widthInBytes, size_t height); + size_t widthInBytes, size_t height); void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch, - size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type); + size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type); void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, - const void *src, size_t spitch, - size_t width, size_t height, openCLMemcpyKind kind, int channels = -1); + const void *src, size_t spitch, + size_t width, size_t height, openCLMemcpyKind kind, int channels = -1); void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset, - const void *src, size_t spitch, - size_t width, size_t height, int src_offset); + const void *src, size_t spitch, + size_t width, size_t height, int src_offset); void CV_EXPORTS openCLFree(void *devPtr); cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size); void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size); cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, - const char **source, std::string kernelName); + const cv::ocl::ProgramEntry* source, std::string kernelName); cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt, - const char **source, std::string kernelName, const char *build_options); + const cv::ocl::ProgramEntry* source, std::string kernelName, const char *build_options); void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads); -void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair > &args, - int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1); -void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName, - size_t globalThreads[3], size_t localThreads[3], - std::vector< std::pair > &args, int channels, int depth, const char *build_options); -void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth); -void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, - int depth, const char *build_options); +void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const cv::ocl::ProgramEntry* source, string kernelName, std::vector< std::pair > &args, + int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1); +void CV_EXPORTS openCLExecuteKernel_(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, + size_t globalThreads[3], size_t localThreads[3], + std::vector< std::pair > &args, int channels, int depth, const char *build_options); +void CV_EXPORTS openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth); +void CV_EXPORTS openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, + int depth, const char *build_options); cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value, - const size_t size); + const size_t size); cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr); -int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName); - enum FLUSH_MODE { CLFINISH = 0, @@ -120,11 +125,12 @@ enum FLUSH_MODE DISABLE }; -void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); -void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3], - size_t localThreads[3], std::vector< std::pair > &args, int channels, - int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); +void CV_EXPORTS openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); +void CV_EXPORTS openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, std::string kernelName, size_t globalThreads[3], + size_t localThreads[3], std::vector< std::pair > &args, int channels, + int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); + // bind oclMat to OpenCL image textures // note: // 1. there is no memory management. User need to explicitly release the resource @@ -183,6 +189,24 @@ inline size_t roundUp(size_t sz, size_t n) return result; } +//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. +CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt, + const cv::ocl::ProgramEntry* source, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + std::vector< std::pair > &args, + int channels, int depth, const char *build_options, + bool finish = true, bool measureKernelTime = false, + bool cleanUp = true); + +//! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. +CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt, + const cv::ocl::ProgramEntry* source, const int numFiles, string kernelName, + size_t globalThreads[3], size_t localThreads[3], + std::vector< std::pair > &args, + int channels, int depth, const char *build_options, + bool finish = true, bool measureKernelTime = false, + bool cleanUp = true); + }//namespace ocl }//namespace cv diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index e82af4e322..fc71906293 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -91,7 +91,6 @@ int main(int argc, char ** argv) } cv::ocl::setDevice(devicesInfo[device]); - cv::ocl::setBinaryDiskCache(cv::ocl::CACHE_UPDATE); cout << "Device type:" << type << endl << "Platform name:" << devicesInfo[device]->platform->platformName << endl diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp index c348db8f30..aaf0703249 100644 --- a/modules/ocl/src/brute_force_matcher.cpp +++ b/modules/ocl/src/brute_force_matcher.cpp @@ -45,10 +45,14 @@ //M*/ #include "precomp.hpp" +#include +#include +#include #include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; +using namespace std; static const int OPT_SIZE = 100; diff --git a/modules/ocl/src/cl_context.cpp b/modules/ocl/src/cl_context.cpp index 6413465f65..135110077c 100644 --- a/modules/ocl/src/cl_context.cpp +++ b/modules/ocl/src/cl_context.cpp @@ -48,15 +48,16 @@ #include "precomp.hpp" #include #include -#include "binarycaching.hpp" +#include "cl_programcache.hpp" +#if defined _MSC_VER && _MSC_VER >= 1200 +# pragma warning( disable: 4100 4101 4127 4244 4267 4510 4512 4610) +#endif #undef __CL_ENABLE_EXCEPTIONS #include -namespace cv { namespace ocl { - -extern void fft_teardown(); -extern void clBlasTeardown(); +namespace cv { +namespace ocl { struct PlatformInfoImpl { @@ -174,7 +175,7 @@ static int initializeOpenCLDevices() deviceInfo.info.platform = &platformInfo.info; platformInfo.deviceIDs[j] = deviceInfo.info._id; - cl_device_type type = -1; + cl_device_type type = cl_device_type(-1); openCLSafeCall(device.getInfo(CL_DEVICE_TYPE, &type)); deviceInfo.info.deviceType = DeviceType(type); @@ -182,7 +183,7 @@ static int initializeOpenCLDevices() openCLSafeCall(device.getInfo(CL_DEVICE_VERSION, &deviceInfo.info.deviceVersion)); openCLSafeCall(device.getInfo(CL_DEVICE_NAME, &deviceInfo.info.deviceName)); openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR, &deviceInfo.info.deviceVendor)); - cl_uint vendorID = -1; + cl_uint vendorID = 0; openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR_ID, &vendorID)); deviceInfo.info.deviceVendorId = vendorID; openCLSafeCall(device.getInfo(CL_DRIVER_VERSION, &deviceInfo.info.deviceDriverVersion)); @@ -347,9 +348,6 @@ static bool __termination = false; ContextImpl::~ContextImpl() { - fft_teardown(); - clBlasTeardown(); - #ifdef WIN32 // if process is on termination stage (ExitProcess was called and other threads were terminated) // then disable command queue release because it may cause program hang @@ -370,8 +368,14 @@ ContextImpl::~ContextImpl() clContext = NULL; } +void fft_teardown(); +void clBlasTeardown(); + void ContextImpl::cleanupContext(void) { + fft_teardown(); + clBlasTeardown(); + cv::AutoLock lock(currentContextMutex); if (currentContext) delete currentContext; @@ -382,6 +386,15 @@ void ContextImpl::setContext(const DeviceInfo* deviceInfo) { CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size()); + { + cv::AutoLock lock(currentContextMutex); + if (currentContext) + { + if (currentContext->deviceInfo._id == deviceInfo->_id) + return; + } + } + DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id]; CV_Assert(deviceInfo == &infoImpl.info); @@ -466,6 +479,30 @@ int getOpenCLDevices(std::vector &devices, int deviceType, co } } + if (currentContext == NULL) + { + // select default device + const DeviceInfo* selectedDevice = NULL; + for (size_t i = 0; i < devices.size(); i++) + { + const DeviceInfo* dev = devices[i]; + if (dev->deviceType == CL_DEVICE_TYPE_GPU) + { + selectedDevice = dev; + break; + } + else if (dev->deviceType == CL_DEVICE_TYPE_CPU && (selectedDevice == NULL)) + { + selectedDevice = dev; + } + } + + if (selectedDevice) + { + setDevice(selectedDevice); + } + } + return (int)devices.size(); } diff --git a/modules/ocl/src/cl_operations.cpp b/modules/ocl/src/cl_operations.cpp index 42138adbe0..25d7454a2a 100644 --- a/modules/ocl/src/cl_operations.cpp +++ b/modules/ocl/src/cl_operations.cpp @@ -48,10 +48,7 @@ #include "precomp.hpp" #include #include -#include "binarycaching.hpp" - -#undef __CL_ENABLE_EXCEPTIONS -#include +#include "cl_programcache.hpp" //#define PRINT_KERNEL_RUN_TIME #define RUN_TIMES 100 @@ -60,7 +57,8 @@ #endif //#define AMD_DOUBLE_DIFFER -namespace cv { namespace ocl { +namespace cv { +namespace ocl { DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT; DevMemRW gDeviceMemRW = DEVICE_MEM_R_W; @@ -179,21 +177,22 @@ void openCLFree(void *devPtr) openCLSafeCall(clReleaseMemObject((cl_mem)devPtr)); } -cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName) +cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName) { return openCLGetKernelFromSource(ctx, source, kernelName, NULL); } -cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName, +cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, const char *build_options) { cl_kernel kernel; cl_int status = 0; CV_Assert(ProgramCache::getProgramCache() != NULL); - cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, kernelName, build_options); + cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, build_options); CV_Assert(program != NULL); kernel = clCreateKernel(program, kernelName.c_str(), &status); openCLVerifyCall(status); + openCLVerifyCall(clReleaseProgram(program)); return kernel; } @@ -213,7 +212,7 @@ void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThrea static double total_execute_time = 0; static double total_kernel_time = 0; #endif -void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName, size_t globalThreads[3], +void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth, const char *build_options) { @@ -275,14 +274,14 @@ void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName, openCLSafeCall(clReleaseKernel(kernel)); } -void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, +void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth) { openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth, NULL); } -void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, +void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth, const char *build_options) @@ -316,7 +315,7 @@ void openCLExecuteKernel(Context *ctx , const char **source, string kernelName, #endif } -double openCLExecuteKernelInterop(Context *ctx , const char **source, string kernelName, +double openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth, const char *build_options, bool finish, bool measureKernelTime, bool cleanUp) @@ -391,29 +390,6 @@ double openCLExecuteKernelInterop(Context *ctx , const char **source, string ker return kernelTime; } -//double openCLExecuteKernelInterop(Context *ctx , const char **fileName, const int numFiles, string kernelName, -// size_t globalThreads[3], size_t localThreads[3], -// vector< pair > &args, int channels, int depth, const char *build_options, -// bool finish, bool measureKernelTime, bool cleanUp) -// -//{ -// std::vector fsource; -// for (int i = 0 ; i < numFiles ; i++) -// { -// std::string str; -// if (convertToString(fileName[i], str) >= 0) -// fsource.push_back(str); -// } -// const char **source = new const char *[numFiles]; -// for (int i = 0 ; i < numFiles ; i++) -// source[i] = fsource[i].c_str(); -// double kernelTime = openCLExecuteKernelInterop(ctx ,source, kernelName, globalThreads, localThreads, -// args, channels, depth, build_options, finish, measureKernelTime, cleanUp); -// fsource.clear(); -// delete []source; -// return kernelTime; -//} - cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, const size_t size) { @@ -427,7 +403,6 @@ cl_mem load_constant(cl_context context, cl_command_queue command_queue, const v value, 0, 0, 0)); return con_struct; - } }//namespace ocl diff --git a/modules/ocl/src/cl_programcache.cpp b/modules/ocl/src/cl_programcache.cpp index 3261319c34..7c58e7c489 100644 --- a/modules/ocl/src/cl_programcache.cpp +++ b/modules/ocl/src/cl_programcache.cpp @@ -48,85 +48,93 @@ #include "precomp.hpp" #include #include -#include "binarycaching.hpp" +#include "cl_programcache.hpp" +#if defined _MSC_VER && _MSC_VER >= 1200 +# pragma warning( disable: 4100 4244 4267 4510 4512 4610) +#endif #undef __CL_ENABLE_EXCEPTIONS #include namespace cv { namespace ocl { + +#define MAX_PROG_CACHE_SIZE 1024 /* * The binary caching system to eliminate redundant program source compilation. * Strictly, this is not a cache because we do not implement evictions right now. * We shall add such features to trade-off memory consumption and performance when necessary. */ +cv::Mutex ProgramCache::mutexFiles; +cv::Mutex ProgramCache::mutexCache; + std::auto_ptr _programCache; ProgramCache* ProgramCache::getProgramCache() { - if (NULL == _programCache.get()) - _programCache.reset(new ProgramCache()); - return _programCache.get(); + if (NULL == _programCache.get()) + _programCache.reset(new ProgramCache()); + return _programCache.get(); } ProgramCache::ProgramCache() { - codeCache.clear(); - cacheSize = 0; + codeCache.clear(); + cacheSize = 0; } ProgramCache::~ProgramCache() { - releaseProgram(); + releaseProgram(); } -cl_program ProgramCache::progLookup(string srcsign) +cl_program ProgramCache::progLookup(const string& srcsign) { - map::iterator iter; - iter = codeCache.find(srcsign); - if(iter != codeCache.end()) - return iter->second; - else - return NULL; + map::iterator iter; + iter = codeCache.find(srcsign); + if(iter != codeCache.end()) + return iter->second; + else + return NULL; } -void ProgramCache::addProgram(string srcsign , cl_program program) +void ProgramCache::addProgram(const string& srcsign, cl_program program) { - if(!progLookup(srcsign)) - { - codeCache.insert(map::value_type(srcsign, program)); - } + if (!progLookup(srcsign)) + { + clRetainProgram(program); + codeCache.insert(map::value_type(srcsign, program)); + } } void ProgramCache::releaseProgram() { - map::iterator iter; - for(iter = codeCache.begin(); iter != codeCache.end(); iter++) - { - openCLSafeCall(clReleaseProgram(iter->second)); - } - codeCache.clear(); - cacheSize = 0; + map::iterator iter; + for(iter = codeCache.begin(); iter != codeCache.end(); iter++) + { + openCLSafeCall(clReleaseProgram(iter->second)); + } + codeCache.clear(); + cacheSize = 0; } -static int enable_disk_cache = +static int enable_disk_cache = true || #ifdef _DEBUG false; #else true; #endif -static int update_disk_cache = false; static String binpath = ""; void setBinaryDiskCache(int mode, String path) { + enable_disk_cache = 0; + binpath = ""; + if(mode == CACHE_NONE) { - update_disk_cache = 0; - enable_disk_cache = 0; return; } - update_disk_cache |= (mode & CACHE_UPDATE) == CACHE_UPDATE; - enable_disk_cache |= + enable_disk_cache = #ifdef _DEBUG (mode & CACHE_DEBUG) == CACHE_DEBUG; #else @@ -138,108 +146,286 @@ void setBinaryDiskCache(int mode, String path) } } -void setBinpath(const char *path) +void setBinaryPath(const char *path) { binpath = path; } -int savetofile(const Context*, cl_program &program, const char *fileName) +static const int MAX_ENTRIES = 64; + +struct ProgramFileCache { - size_t binarySize; - openCLSafeCall(clGetProgramInfo(program, - CL_PROGRAM_BINARY_SIZES, - sizeof(size_t), - &binarySize, NULL)); - char* binary = (char*)malloc(binarySize); - if(binary == NULL) + struct CV_DECL_ALIGNED(1) ProgramFileHeader { - CV_Error(CV_StsNoMem, "Failed to allocate host memory."); - } - openCLSafeCall(clGetProgramInfo(program, - CL_PROGRAM_BINARIES, - sizeof(char *), - &binary, - NULL)); + int hashLength; + //char hash[]; + }; - FILE *fp = fopen(fileName, "wb+"); - if(fp != NULL) + struct CV_DECL_ALIGNED(1) ProgramFileTable { - fwrite(binary, binarySize, 1, fp); - free(binary); - fclose(fp); - } - return 1; -} + int numberOfEntries; + //int firstEntryOffset[]; + }; -cl_program ProgramCache::getProgram(const Context *ctx, const char **source, string kernelName, - const char *build_options) -{ - cl_program program; - cl_int status = 0; - stringstream src_sign; - string srcsign; - string filename; - - if (NULL != build_options) + struct CV_DECL_ALIGNED(1) ProgramFileConfigurationEntry { - src_sign << (int64)(*source) << getClContext(ctx) << "_" << build_options; - } - else - { - src_sign << (int64)(*source) << getClContext(ctx); - } - srcsign = src_sign.str(); + int nextEntry; + int dataSize; + int optionsLength; + //char options[]; + // char data[]; + }; - program = NULL; - program = ProgramCache::getProgramCache()->progLookup(srcsign); + string fileName_; + const char* hash_; + std::fstream f; - if (!program) + ProgramFileCache(const string& fileName, const char* hash) + : fileName_(fileName), hash_(hash) { - //config build programs - std::string all_build_options; - if (!ctx->getDeviceInfo().compilationExtraOptions.empty()) - all_build_options += ctx->getDeviceInfo().compilationExtraOptions; - if (build_options != NULL) + if (hash_ != NULL) { - all_build_options += " "; - all_build_options += build_options; + f.open(fileName_.c_str(), ios::in|ios::out|ios::binary); + if(f.is_open()) + { + int hashLength = 0; + f.read((char*)&hashLength, sizeof(int)); + std::vector fhash(hashLength + 1); + f.read(&fhash[0], hashLength); + if (f.eof() || strncmp(hash_, &fhash[0], hashLength) != 0) + { + f.close(); + remove(fileName_.c_str()); + return; + } + } } - filename = binpath + kernelName + "_" + ctx->getDeviceInfo().deviceName + all_build_options + ".clb"; + } - FILE *fp = enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL; - if(fp == NULL || update_disk_cache) + int getHash(const string& options) + { + int hash = 0; + for (size_t i = 0; i < options.length(); i++) { - if(fp != NULL) - fclose(fp); + hash = (hash << 2) ^ (hash >> 17) ^ options[i]; + } + return (hash + (hash >> 16)) & (MAX_ENTRIES - 1); + } - program = clCreateProgramWithSource( - getClContext(ctx), 1, source, NULL, &status); - openCLVerifyCall(status); - cl_device_id device = getClDeviceID(ctx); - status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL); - if(status == CL_SUCCESS && enable_disk_cache) - savetofile(ctx, program, filename.c_str()); + bool readConfigurationFromFile(const string& options, std::vector& buf) + { + if (hash_ == NULL) + return false; + + if (!f.is_open()) + return false; + + f.seekg(0, std::fstream::end); + size_t fileSize = (size_t)f.tellg(); + if (fileSize == 0) + { + std::cerr << "Invalid file (empty): " << fileName_ << std::endl; + f.close(); + remove(fileName_.c_str()); + return false; + } + f.seekg(0, std::fstream::beg); + + int hashLength = 0; + f.read((char*)&hashLength, sizeof(int)); + CV_Assert(hashLength > 0); + f.seekg(sizeof(hashLength) + hashLength, std::fstream::beg); + + int numberOfEntries = 0; + f.read((char*)&numberOfEntries, sizeof(int)); + CV_Assert(numberOfEntries > 0); + if (numberOfEntries != MAX_ENTRIES) + { + std::cerr << "Invalid file: " << fileName_ << std::endl; + f.close(); + remove(fileName_.c_str()); + return false; + } + + std::vector firstEntryOffset(numberOfEntries); + f.read((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries); + + int entryNum = getHash(options); + + int entryOffset = firstEntryOffset[entryNum]; + ProgramFileConfigurationEntry entry; + while (entryOffset > 0) + { + f.seekg(entryOffset, std::fstream::beg); + assert(sizeof(entry) == sizeof(int)*3); + f.read((char*)&entry, sizeof(entry)); + std::vector foptions(entry.optionsLength); + if ((int)options.length() == entry.optionsLength) + { + if (entry.optionsLength > 0) + f.read(&foptions[0], entry.optionsLength); + if (memcmp(&foptions[0], options.c_str(), entry.optionsLength) == 0) + { + buf.resize(entry.dataSize); + f.read(&buf[0], entry.dataSize); + f.seekg(0, std::fstream::beg); + return true; + } + } + if (entry.nextEntry <= 0) + break; + entryOffset = entry.nextEntry; + } + return false; + } + + bool writeConfigurationToFile(const string& options, std::vector& buf) + { + if (hash_ == NULL) + return true; // don't save dynamic kernels + + if (!f.is_open()) + { + f.open(fileName_.c_str(), ios::in|ios::out|ios::binary); + if (!f.is_open()) + { + f.open(fileName_.c_str(), ios::out|ios::binary); + if (!f.is_open()) + return false; + } + } + + f.seekg(0, std::fstream::end); + size_t fileSize = (size_t)f.tellg(); + if (fileSize == 0) + { + f.seekp(0, std::fstream::beg); + int hashLength = strlen(hash_); + f.write((char*)&hashLength, sizeof(int)); + f.write(hash_, hashLength); + + int numberOfEntries = MAX_ENTRIES; + f.write((char*)&numberOfEntries, sizeof(int)); + std::vector firstEntryOffset(MAX_ENTRIES, 0); + f.write((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries); + f.close(); + f.open(fileName_.c_str(), ios::in|ios::out|ios::binary); + CV_Assert(f.is_open()); + f.seekg(0, std::fstream::end); + fileSize = (size_t)f.tellg(); + } + f.seekg(0, std::fstream::beg); + + int hashLength = 0; + f.read((char*)&hashLength, sizeof(int)); + CV_Assert(hashLength > 0); + f.seekg(sizeof(hashLength) + hashLength, std::fstream::beg); + + int numberOfEntries = 0; + f.read((char*)&numberOfEntries, sizeof(int)); + CV_Assert(numberOfEntries > 0); + if (numberOfEntries != MAX_ENTRIES) + { + std::cerr << "Invalid file: " << fileName_ << std::endl; + f.close(); + remove(fileName_.c_str()); + return false; + } + + size_t tableEntriesOffset = (size_t)f.tellg(); + std::vector firstEntryOffset(numberOfEntries); + f.read((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries); + + int entryNum = getHash(options); + + int entryOffset = firstEntryOffset[entryNum]; + ProgramFileConfigurationEntry entry; + while (entryOffset > 0) + { + f.seekg(entryOffset, std::fstream::beg); + assert(sizeof(entry) == sizeof(int)*3); + f.read((char*)&entry, sizeof(entry)); + std::vector foptions(entry.optionsLength); + if ((int)options.length() == entry.optionsLength) + { + if (entry.optionsLength > 0) + f.read(&foptions[0], entry.optionsLength); + CV_Assert(memcmp(&foptions, options.c_str(), entry.optionsLength) != 0); + } + if (entry.nextEntry <= 0) + break; + entryOffset = entry.nextEntry; + } + if (entryOffset > 0) + { + f.seekp(entryOffset, std::fstream::beg); + entry.nextEntry = fileSize; + f.write((char*)&entry, sizeof(entry)); } else { - fseek(fp, 0, SEEK_END); - size_t binarySize = ftell(fp); - fseek(fp, 0, SEEK_SET); - char *binary = new char[binarySize]; - CV_Assert(1 == fread(binary, binarySize, 1, fp)); - fclose(fp); - cl_int status = 0; - cl_device_id device = getClDeviceID(ctx); - program = clCreateProgramWithBinary(getClContext(ctx), - 1, - &device, - (const size_t *)&binarySize, - (const unsigned char **)&binary, - NULL, - &status); + firstEntryOffset[entryNum] = fileSize; + f.seekp(tableEntriesOffset, std::fstream::beg); + f.write((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries); + } + f.seekp(fileSize, std::fstream::beg); + entry.nextEntry = 0; + entry.dataSize = buf.size(); + entry.optionsLength = options.length(); + f.write((char*)&entry, sizeof(entry)); + f.write(options.c_str(), entry.optionsLength); + f.write(&buf[0], entry.dataSize); + return true; + } + + cl_program getOrBuildProgram(const Context* ctx, const cv::ocl::ProgramEntry* source, const string& options) + { + cl_int status = 0; + cl_program program = NULL; + std::vector binary; + if (!enable_disk_cache || !readConfigurationFromFile(options, binary)) + { + program = clCreateProgramWithSource(getClContext(ctx), 1, (const char**)&source->programStr, NULL, &status); openCLVerifyCall(status); - status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL); - delete[] binary; + cl_device_id device = getClDeviceID(ctx); + status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL); + if(status == CL_SUCCESS) + { + if (enable_disk_cache) + { + size_t binarySize; + openCLSafeCall(clGetProgramInfo(program, + CL_PROGRAM_BINARY_SIZES, + sizeof(size_t), + &binarySize, NULL)); + + std::vector binary(binarySize); + + char* ptr = &binary[0]; + openCLSafeCall(clGetProgramInfo(program, + CL_PROGRAM_BINARIES, + sizeof(char*), + &ptr, + NULL)); + + if (!writeConfigurationToFile(options, binary)) + { + std::cerr << "Can't write data to file: " << fileName_ << std::endl; + } + } + } + } + else + { + cl_device_id device = getClDeviceID(ctx); + size_t size = binary.size(); + const char* ptr = &binary[0]; + program = clCreateProgramWithBinary(getClContext(ctx), + 1, &device, + (const size_t *)&size, (const unsigned char **)&ptr, + NULL, &status); + openCLVerifyCall(status); + status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL); } if(status != CL_SUCCESS) @@ -259,53 +445,77 @@ cl_program ProgramCache::getProgram(const Context *ctx, const char **source, str memset(buildLog, 0, buildLogSize); openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx), CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL)); - std::cout << "\n\t\t\tBUILD LOG\n"; + std::cout << "\nBUILD LOG: " << options << "\n"; std::cout << buildLog << endl; delete [] buildLog; } openCLVerifyCall(status); } - //Cache the binary for future use if build_options is null - if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE) - this->addProgram(srcsign, program); - else - cout << "Warning: code cache has been full.\n"; + return program; + } +}; + +cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEntry* source, + const char *build_options) +{ + stringstream src_sign; + + src_sign << (int64)(source->programStr); + src_sign << getClContext(ctx); + if (NULL != build_options) + { + src_sign << "_" << build_options; + } + + { + cv::AutoLock lockCache(mutexCache); + cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str()); + if (!!program) + { + clRetainProgram(program); + return program; + } + } + + cv::AutoLock lockCache(mutexFiles); + + // second check + { + cv::AutoLock lockCache(mutexCache); + cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str()); + if (!!program) + { + clRetainProgram(program); + return program; + } + } + + string all_build_options; + if (!ctx->getDeviceInfo().compilationExtraOptions.empty()) + all_build_options += ctx->getDeviceInfo().compilationExtraOptions; + if (build_options != NULL) + { + all_build_options += " "; + all_build_options += build_options; + } + const DeviceInfo& devInfo = ctx->getDeviceInfo(); + string filename = binpath + (source->name ? source->name : "NULL") + "_" + devInfo.platform->platformName + "_" + devInfo.deviceName + ".clb"; + + ProgramFileCache programFileCache(filename, source->programHash); + cl_program program = programFileCache.getOrBuildProgram(ctx, source, all_build_options); + + //Cache the binary for future use if build_options is null + if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE) + { + cv::AutoLock lockCache(mutexCache); + this->addProgram(src_sign.str(), program); + } + else + { + cout << "Warning: code cache has been full.\n"; } return program; } -//// Converts the contents of a file into a string -//static int convertToString(const char *filename, std::string& s) -//{ -// size_t size; -// char* str; -// -// std::fstream f(filename, (std::fstream::in | std::fstream::binary)); -// if(f.is_open()) -// { -// size_t fileSize; -// f.seekg(0, std::fstream::end); -// size = fileSize = (size_t)f.tellg(); -// f.seekg(0, std::fstream::beg); -// -// str = new char[size+1]; -// if(!str) -// { -// f.close(); -// return -1; -// } -// -// f.read(str, fileSize); -// f.close(); -// str[size] = '\0'; -// -// s = str; -// delete[] str; -// return 0; -// } -// printf("Error: Failed to open file %s\n", filename); -// return -1; -//} - } // namespace ocl } // namespace cv diff --git a/modules/ocl/src/binarycaching.hpp b/modules/ocl/src/cl_programcache.hpp similarity index 71% rename from modules/ocl/src/binarycaching.hpp rename to modules/ocl/src/cl_programcache.hpp index cc9e71a330..ea2ab400c6 100644 --- a/modules/ocl/src/binarycaching.hpp +++ b/modules/ocl/src/cl_programcache.hpp @@ -44,41 +44,42 @@ #include "precomp.hpp" -using namespace cv; -using namespace cv::ocl; -using namespace std; -using std::cout; -using std::endl; - -namespace cv { namespace ocl { +namespace cv { +namespace ocl { class ProgramCache { protected: - ProgramCache(); - ~ProgramCache(); - friend class std::auto_ptr; + ProgramCache(); + ~ProgramCache(); + friend class std::auto_ptr; public: - static ProgramCache *getProgramCache(); + static ProgramCache *getProgramCache(); - cl_program getProgram(const Context *ctx, const char **source, string kernelName, + cl_program getProgram(const Context *ctx, const cv::ocl::ProgramEntry* source, const char *build_options); - void releaseProgram(); + void releaseProgram(); protected: - //lookup the binary given the file name - cl_program progLookup(string srcsign); + //lookup the binary given the file name + // (with acquired mutexCache) + cl_program progLookup(const string& srcsign); - //add program to the cache - void addProgram(string srcsign, cl_program program); + //add program to the cache + // (with acquired mutexCache) + void addProgram(const string& srcsign, cl_program program); - map codeCache; - unsigned int cacheSize; + map codeCache; + unsigned int cacheSize; - //The presumed watermark for the cache volume (256MB). Is it enough? - //We may need more delicate algorithms when necessary later. - //Right now, let's just leave it along. - static const unsigned MAX_PROG_CACHE_SIZE = 1024; + //The presumed watermark for the cache volume (256MB). Is it enough? + //We may need more delicate algorithms when necessary later. + //Right now, let's just leave it along. + static const unsigned MAX_PROG_CACHE_SIZE = 1024; + + // acquire both mutexes in this order: 1) mutexFiles 2) mutexCache + static cv::Mutex mutexFiles; + static cv::Mutex mutexCache; }; }//namespace ocl diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index b4d2b70a0d..5e0f54fab5 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -1108,7 +1108,7 @@ namespace cv CV_Assert(Dx.offset == 0 && Dy.offset == 0); } - static void corner_ocl(const char *src_str, string kernelName, int block_size, float k, oclMat &Dx, oclMat &Dy, + static void corner_ocl(const cv::ocl::ProgramEntry* source, string kernelName, int block_size, float k, oclMat &Dx, oclMat &Dy, oclMat &dst, int border_type) { char borderType[30]; @@ -1160,7 +1160,7 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step)); args.push_back( make_pair( sizeof(cl_float) , (void *)&k)); - openCLExecuteKernel(dst.clCxt, &src_str, kernelName, gt, lt, args, -1, -1, build_options); + openCLExecuteKernel(dst.clCxt, source, kernelName, gt, lt, args, -1, -1, build_options); } void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, @@ -1181,7 +1181,7 @@ namespace cv CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); extractCovData(src, dx, dy, blockSize, ksize, borderType); dst.create(src.size(), CV_32F); - corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast(k), dx, dy, dst, borderType); + corner_ocl(&imgproc_calcHarris, "calcHarris", blockSize, static_cast(k), dx, dy, dst, borderType); } void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType) @@ -1200,7 +1200,7 @@ namespace cv CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); extractCovData(src, dx, dy, blockSize, ksize, borderType); dst.create(src.size(), CV_32F); - corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType); + corner_ocl(&imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType); } /////////////////////////////////// MeanShiftfiltering /////////////////////////////////////////////// static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps) @@ -1749,7 +1749,7 @@ namespace cv } //////////////////////////////////convolve//////////////////////////////////////////////////// -static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, string kernelName, const char **kernelString) +static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source) { CV_Assert(src.depth() == CV_32FC1); CV_Assert(temp1.depth() == CV_32F); @@ -1784,7 +1784,7 @@ static void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, st args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&temp1.cols )); - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth); } void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y) { diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp index e4e2e918fb..66aa76560c 100644 --- a/modules/ocl/src/mcwutil.cpp +++ b/modules/ocl/src/mcwutil.cpp @@ -72,7 +72,7 @@ namespace cv namespace ocl { // provide additional methods for the user to interact with the command queue after a task is fired - static void openCLExecuteKernel_2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], + static void openCLExecuteKernel_2(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode) { @@ -118,14 +118,14 @@ namespace cv openCLSafeCall(clReleaseKernel(kernel)); } - void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, + void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth, FLUSH_MODE finish_mode) { openCLExecuteKernel2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, NULL, finish_mode); } - void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, + void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode) @@ -249,7 +249,7 @@ namespace cv bool support_image2d(Context *clCxt) { - static const char * _kernel_string = "__kernel void test_func(image2d_t img) {}"; + const cv::ocl::ProgramEntry _kernel = {NULL, "__kernel void test_func(image2d_t img) {}", NULL}; static bool _isTested = false; static bool _support = false; if(_isTested) @@ -258,7 +258,7 @@ namespace cv } try { - cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func"); + cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel, "test_func"); cv::ocl::finish(); _support = true; } diff --git a/modules/ocl/src/moments.cpp b/modules/ocl/src/moments.cpp index 377af28634..a19f7fc516 100644 --- a/modules/ocl/src/moments.cpp +++ b/modules/ocl/src/moments.cpp @@ -229,7 +229,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary ) CV_Error( CV_StsBadArg, "The passed sequence is not a valid contour" ); } - if( !moments ) + if( !mom ) CV_Error( CV_StsNullPtr, "" ); memset( mom, 0, sizeof(*mom)); diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp index 8071102bad..4061c2579e 100644 --- a/modules/ocl/test/main.cpp +++ b/modules/ocl/test/main.cpp @@ -118,7 +118,6 @@ int main(int argc, char **argv) } cv::ocl::setDevice(devicesInfo[device]); - setBinaryDiskCache(CACHE_UPDATE); cout << "Device type: " << type << endl << "Platform name: " << devicesInfo[device]->platform->platformName << endl From 16adbda4d3c0d788f2924f2fe881c08ee001228d Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 30 Sep 2013 19:07:37 +0400 Subject: [PATCH 4/8] ocl: added OpenCL device selection via OPENCV_OPENCL_DEVICE environment variable --- modules/ocl/perf/main.cpp | 73 +++++---- modules/ocl/src/cl_context.cpp | 275 ++++++++++++++++++++++++++++----- modules/ocl/test/main.cpp | 75 +++++---- 3 files changed, 321 insertions(+), 102 deletions(-) diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index fc71906293..9f87054e6d 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -51,6 +51,8 @@ const char * impls[] = #endif }; +using namespace cv::ocl; + int main(int argc, char ** argv) { const char * keys = @@ -59,42 +61,49 @@ int main(int argc, char ** argv) "{ p | platform | -1 | set platform id }" "{ d | device | 0 | set device id }"; - CommandLineParser cmd(argc, argv, keys); - if (cmd.get("help")) + if (getenv("OPENCV_OPENCL_DEVICE") == NULL) // TODO Remove this after buildbot updates { - cout << "Available options besides google test option:" << endl; - cmd.printParams(); - return 0; + CommandLineParser cmd(argc, argv, keys); + if (cmd.get("help")) + { + cout << "Available options besides google test option:" << endl; + cmd.printParams(); + return 0; + } + + string type = cmd.get("type"); + int pid = cmd.get("platform"); + int device = cmd.get("device"); + + int flag = type == "cpu" ? cv::ocl::CVCL_DEVICE_TYPE_CPU : + cv::ocl::CVCL_DEVICE_TYPE_GPU; + + cv::ocl::PlatformsInfo platformsInfo; + cv::ocl::getOpenCLPlatforms(platformsInfo); + if (pid >= (int)platformsInfo.size()) + { + std::cout << "platform is invalid\n"; + return 1; + } + + cv::ocl::DevicesInfo devicesInfo; + int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); + if (device < 0 || device >= devnums) + { + std::cout << "device/platform invalid\n"; + return 1; + } + + cv::ocl::setDevice(devicesInfo[device]); } - string type = cmd.get("type"); - int pid = cmd.get("platform"); - int device = cmd.get("device"); + const DeviceInfo& deviceInfo = cv::ocl::Context::getContext()->getDeviceInfo(); - int flag = type == "cpu" ? cv::ocl::CVCL_DEVICE_TYPE_CPU : - cv::ocl::CVCL_DEVICE_TYPE_GPU; - - cv::ocl::PlatformsInfo platformsInfo; - cv::ocl::getOpenCLPlatforms(platformsInfo); - if (pid >= (int)platformsInfo.size()) - { - std::cout << "platform is invalid\n"; - return 1; - } - - cv::ocl::DevicesInfo devicesInfo; - int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); - if (device < 0 || device >= devnums) - { - std::cout << "device/platform invalid\n"; - return 1; - } - - cv::ocl::setDevice(devicesInfo[device]); - - cout << "Device type:" << type << endl - << "Platform name:" << devicesInfo[device]->platform->platformName << endl - << "Device name:" << devicesInfo[device]->deviceName << endl; + cout << "Device type: " << (deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU ? + "CPU" : + (deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown")) << endl + << "Platform name: " << deviceInfo.platform->platformName << endl + << "Device name: " << deviceInfo.deviceName << endl; CV_PERF_TEST_MAIN_INTERNALS(ocl, impls) } diff --git a/modules/ocl/src/cl_context.cpp b/modules/ocl/src/cl_context.cpp index 135110077c..e24cc8b358 100644 --- a/modules/ocl/src/cl_context.cpp +++ b/modules/ocl/src/cl_context.cpp @@ -51,7 +51,7 @@ #include "cl_programcache.hpp" #if defined _MSC_VER && _MSC_VER >= 1200 -# pragma warning( disable: 4100 4101 4127 4244 4267 4510 4512 4610) +#pragma warning( disable: 4100 4101 4127 4244 4267 4510 4512 4610) #endif #undef __CL_ENABLE_EXCEPTIONS #include @@ -118,8 +118,211 @@ static bool parseOpenCLVersion(const std::string& versionStr, int& major, int& m return true; } +static void split(const std::string &s, char delim, std::vector &elems) { + std::stringstream ss(s); + std::string item; + while (std::getline(ss, item, delim)) { + elems.push_back(item); + } +} + +static std::vector split(const std::string &s, char delim) { + std::vector elems; + split(s, delim, elems); + return elems; +} + +// Layout: :: +// Sample: AMD:GPU: +// Sample: AMD:GPU:Tahiti +// Sample: :GPU|CPU: = '' = ':' = '::' +static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr, + std::string& platform, std::vector& deviceTypes, std::string& deviceNameOrID) +{ + std::string deviceTypesStr; + size_t p0 = configurationStr.find(':'); + if (p0 != std::string::npos) + { + size_t p1 = configurationStr.find(':', p0 + 1); + if (p1 != std::string::npos) + { + size_t p2 = configurationStr.find(':', p1 + 1); + if (p2 != std::string::npos) + { + std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl; + return false; + } + else + { + // assume platform + device types + device name/id + platform = configurationStr.substr(0, p0); + deviceTypesStr = configurationStr.substr(p0 + 1, p1 - (p0 + 1)); + deviceNameOrID = configurationStr.substr(p1 + 1, configurationStr.length() - (p1 + 1)); + } + } + else + { + // assume platform + device types + platform = configurationStr.substr(0, p0); + deviceTypesStr = configurationStr.substr(p0 + 1, configurationStr.length() - (p0 + 1)); + } + } + else + { + // assume only platform + platform = configurationStr; + } + deviceTypes = split(deviceTypesStr, '|'); + return true; +} + +static bool __deviceSelected = false; +static bool selectOpenCLDevice() +{ + __deviceSelected = true; + + std::string platform; + std::vector deviceTypes; + std::string deviceName; + const char* configuration = getenv("OPENCV_OPENCL_DEVICE"); + if (configuration) + { + if (!parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)) + return false; + } + + bool isID = false; + int deviceID = -1; + if (deviceName.length() == 1) + // We limit ID range to 0..9, because we want to write: + // - '2500' to mean i5-2500 + // - '8350' to mean AMD FX-8350 + // - '650' to mean GeForce 650 + // To extend ID range change condition to '> 0' + { + isID = true; + for (size_t i = 0; i < deviceName.length(); i++) + { + if (!isdigit(deviceName[i])) + { + isID = false; + break; + } + } + if (isID) + { + deviceID = atoi(deviceName.c_str()); + CV_Assert(deviceID >= 0); + } + } + + const PlatformInfo* platformInfo = NULL; + if (platform.length() > 0) + { + PlatformsInfo platforms; + getOpenCLPlatforms(platforms); + for (size_t i = 0; i < platforms.size(); i++) + { + if (platforms[i]->platformName.find(platform) != std::string::npos) + { + platformInfo = platforms[i]; + break; + } + } + if (platformInfo == NULL) + { + std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl; + goto not_found; + } + } + + if (deviceTypes.size() == 0) + { + if (!isID) + { + deviceTypes.push_back("GPU"); + deviceTypes.push_back("CPU"); + } + else + { + deviceTypes.push_back("ALL"); + } + } + for (size_t t = 0; t < deviceTypes.size(); t++) + { + int deviceType = 0; + if (deviceTypes[t] == "GPU") + { + deviceType = CVCL_DEVICE_TYPE_GPU; + } + else if (deviceTypes[t] == "CPU") + { + deviceType = CVCL_DEVICE_TYPE_CPU; + } + else if (deviceTypes[t] == "ACCELERATOR") + { + deviceType = CVCL_DEVICE_TYPE_ACCELERATOR; + } + else if (deviceTypes[t] == "ALL") + { + deviceType = CVCL_DEVICE_TYPE_ALL; + } + else + { + std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl; + goto not_found; + } + + DevicesInfo devices; + getOpenCLDevices(devices, deviceType, platformInfo); + + for (size_t i = (isID ? deviceID : 0); + (isID ? (i == (size_t)deviceID) : true) && (i < devices.size()); + i++) + { + if (isID || devices[i]->deviceName.find(deviceName) != std::string::npos) + { + // check for OpenCL 1.1 + if (devices[i]->deviceVersionMajor < 1 || + (devices[i]->deviceVersionMajor == 1 && devices[i]->deviceVersionMinor < 1)) + { + std::cerr << "Skip unsupported version of OpenCL device: " << devices[i]->deviceName + << "(" << devices[i]->platform->platformName << ")" << std::endl; + continue; // unsupported version of device, skip it + } + try + { + setDevice(devices[i]); + } + catch (...) + { + std::cerr << "ERROR: Can't select OpenCL device: " << devices[i]->deviceName + << "(" << devices[i]->platform->platformName << ")" << std::endl; + goto not_found; + } + return true; + } + } + } +not_found: + std::cerr << "ERROR: Required OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl + << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl + << " Device types: "; + for (size_t t = 0; t < deviceTypes.size(); t++) + { + std::cerr << deviceTypes[t] << " "; + } + std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl; + return false; +} + +static cv::Mutex __initializedMutex; +static bool __initialized = false; static int initializeOpenCLDevices() { + assert(!__initialized); + __initialized = true; + assert(global_devices.size() == 0); std::vector platforms; @@ -284,8 +487,6 @@ protected: } ~ContextImpl(); public: - - static ContextImpl* getContext(); static void setContext(const DeviceInfo* deviceInfo); bool supportsFeature(FEATURE_TYPE featureType) const; @@ -298,6 +499,28 @@ static ContextImpl* currentContext = NULL; Context* Context::getContext() { + if (currentContext == NULL) + { + if (!__initialized || !__deviceSelected) + { + cv::AutoLock lock(__initializedMutex); + if (!__initialized) + { + if (initializeOpenCLDevices() == 0) + { + CV_Error(CV_GpuNotSupported, "OpenCL not available"); + } + } + if (!__deviceSelected) + { + if (!selectOpenCLDevice()) + { + CV_Error(CV_GpuNotSupported, "Can't select OpenCL device"); + } + } + } + CV_Assert(currentContext != NULL); + } return currentContext; } @@ -422,13 +645,11 @@ void ContextImpl::setContext(const DeviceInfo* deviceInfo) } } -ContextImpl* ContextImpl::getContext() -{ - return currentContext; -} - int getOpenCLPlatforms(PlatformsInfo& platforms) { + if (!__initialized) + initializeOpenCLDevices(); + platforms.clear(); for (size_t id = 0; id < global_platforms.size(); ++id) @@ -442,6 +663,9 @@ int getOpenCLPlatforms(PlatformsInfo& platforms) int getOpenCLDevices(std::vector &devices, int deviceType, const PlatformInfo* platform) { + if (!__initialized) + initializeOpenCLDevices(); + devices.clear(); switch(deviceType) @@ -461,7 +685,7 @@ int getOpenCLDevices(std::vector &devices, int deviceType, co for (size_t id = 0; id < global_devices.size(); ++id) { DeviceInfoImpl& deviceInfo = global_devices[id]; - if (((int)deviceInfo.info.deviceType & deviceType) == deviceType) + if (((int)deviceInfo.info.deviceType & deviceType) != 0) { devices.push_back(&deviceInfo.info); } @@ -479,35 +703,14 @@ int getOpenCLDevices(std::vector &devices, int deviceType, co } } - if (currentContext == NULL) - { - // select default device - const DeviceInfo* selectedDevice = NULL; - for (size_t i = 0; i < devices.size(); i++) - { - const DeviceInfo* dev = devices[i]; - if (dev->deviceType == CL_DEVICE_TYPE_GPU) - { - selectedDevice = dev; - break; - } - else if (dev->deviceType == CL_DEVICE_TYPE_CPU && (selectedDevice == NULL)) - { - selectedDevice = dev; - } - } - - if (selectedDevice) - { - setDevice(selectedDevice); - } - } - return (int)devices.size(); } void setDevice(const DeviceInfo* info) { + if (!__deviceSelected) + __deviceSelected = true; + ContextImpl::setContext(info); } @@ -518,14 +721,14 @@ bool supportsFeature(FEATURE_TYPE featureType) struct __Module { - __Module() { initializeOpenCLDevices(); } + __Module() { /* moved to Context::getContext(): initializeOpenCLDevices(); */ } ~__Module() { ContextImpl::cleanupContext(); } }; static __Module __module; -}//namespace ocl -}//namespace cv +} // namespace ocl +} // namespace cv #if defined(WIN32) && defined(CVAPI_EXPORTS) diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp index 4061c2579e..02df8419ca 100644 --- a/modules/ocl/test/main.cpp +++ b/modules/ocl/test/main.cpp @@ -83,45 +83,52 @@ int main(int argc, char **argv) "{ p | platform | -1 | set platform id }" "{ d | device | 0 | set device id }"; - CommandLineParser cmd(argc, argv, keys); - if (cmd.get("help")) + if (getenv("OPENCV_OPENCL_DEVICE") == NULL) // TODO Remove this after buildbot updates { - cout << "Available options besides google test option:" << endl; - cmd.printParams(); - return 0; - } - string type = cmd.get("type"); - int pid = cmd.get("platform"); - int device = cmd.get("device"); + CommandLineParser cmd(argc, argv, keys); + if (cmd.get("help")) + { + cout << "Available options besides google test option:" << endl; + cmd.printParams(); + return 0; + } + string type = cmd.get("type"); + int pid = cmd.get("platform"); + int device = cmd.get("device"); - print_info(); - int flag = CVCL_DEVICE_TYPE_GPU; - if(type == "cpu") - { - flag = CVCL_DEVICE_TYPE_CPU; + print_info(); + int flag = CVCL_DEVICE_TYPE_GPU; + if(type == "cpu") + { + flag = CVCL_DEVICE_TYPE_CPU; + } + + cv::ocl::PlatformsInfo platformsInfo; + cv::ocl::getOpenCLPlatforms(platformsInfo); + if (pid >= (int)platformsInfo.size()) + { + std::cout << "platform is invalid\n"; + return 1; + } + + cv::ocl::DevicesInfo devicesInfo; + int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); + if (device < 0 || device >= devnums) + { + std::cout << "device/platform invalid\n"; + return 1; + } + + cv::ocl::setDevice(devicesInfo[device]); } - cv::ocl::PlatformsInfo platformsInfo; - cv::ocl::getOpenCLPlatforms(platformsInfo); - if (pid >= (int)platformsInfo.size()) - { - std::cout << "platform is invalid\n"; - return 1; - } + const DeviceInfo& deviceInfo = cv::ocl::Context::getContext()->getDeviceInfo(); - cv::ocl::DevicesInfo devicesInfo; - int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]); - if (device < 0 || device >= devnums) - { - std::cout << "device/platform invalid\n"; - return 1; - } - - cv::ocl::setDevice(devicesInfo[device]); - - cout << "Device type: " << type << endl - << "Platform name: " << devicesInfo[device]->platform->platformName << endl - << "Device name: " << devicesInfo[device]->deviceName << endl; + cout << "Device type: " << (deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU ? + "CPU" : + (deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown")) << endl + << "Platform name: " << deviceInfo.platform->platformName << endl + << "Device name: " << deviceInfo.deviceName << endl; return RUN_ALL_TESTS(); } From 8beb514ecfacc8ddaa7c53011021a84fb56c8fe0 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 30 Sep 2013 21:32:46 +0400 Subject: [PATCH 5/8] ocl: merge with upstream/2.4 --- modules/ocl/src/arithm.cpp | 6 ++-- modules/ocl/src/svm.cpp | 36 ++++++++++------------ modules/ocl/test/test_matrix_operation.cpp | 12 ++++---- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 7d97e67545..f34e0f730f 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -614,7 +614,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) CV_Assert(!src1.empty()); CV_Assert(src2.empty() || (src1.type() == src2.type() && src1.size() == src2.size())); - if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F) + if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double"); } @@ -1261,7 +1261,7 @@ int cv::ocl::countNonZero(const oclMat &src) CV_Error(CV_GpuNotSupported, "selected device doesn't support double"); } - size_t groupnum = src.clCxt->computeUnits(); + size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits; CV_Assert(groupnum != 0); int dbsize = groupnum; @@ -1708,7 +1708,7 @@ void cv::ocl::pow(const oclMat &x, double p, oclMat &y) void cv::ocl::setIdentity(oclMat& src, const Scalar & scalar) { Context *clCxt = Context::getContext(); - if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n"); return; diff --git a/modules/ocl/src/svm.cpp b/modules/ocl/src/svm.cpp index c3df581f40..311bd7d98f 100644 --- a/modules/ocl/src/svm.cpp +++ b/modules/ocl/src/svm.cpp @@ -43,9 +43,13 @@ // //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" + using namespace cv; using namespace ocl; +namespace cv { namespace ocl { + #if 1 typedef float Qfloat; #define QFLOAT_TYPE CV_32F @@ -54,14 +58,6 @@ typedef double Qfloat; #define QFLOAT_TYPE CV_64F #endif -namespace cv -{ -namespace ocl -{ -///////////////////////////OpenCL kernel strings/////////////////////////// -extern const char *svm; -} -} class CvSVMKernel_ocl: public CvSVMKernel { public: @@ -612,7 +608,7 @@ static void matmul_rbf(oclMat& src, oclMat& src_e, oclMat& dst, int src_rows, in args.push_back(make_pair(sizeof(cl_int), (void* )&src2_cols)); args.push_back(make_pair(sizeof(cl_int), (void* )&width)); float gamma = 0.0f; - if(!Context::getContext()->supportsFeature(Context::CL_DOUBLE)) + if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { gamma = (float)gamma1; args.push_back(make_pair(sizeof(cl_float), (void* )&gamma)); @@ -748,7 +744,7 @@ float CvSVM_OCL::predict(const CvMat* samples, CV_OUT CvMat* results) const if(params.kernel_type == CvSVM::RBF) { sv_.upload(sv_temp); - if(!Context::getContext()->supportsFeature(Context::CL_DOUBLE)) + if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { dst = oclMat(sample_count, sv_total, CV_32FC1); } @@ -886,7 +882,7 @@ bool CvSVMSolver_ocl::solve_generic( CvSVMSolutionInfo& si ) if(params->kernel_type == CvSVM::RBF) { src_e = src; - if(!Context::getContext()->supportsFeature(Context::CL_DOUBLE)) + if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { dst = oclMat(sample_count, sample_count, CV_32FC1); } @@ -1053,7 +1049,7 @@ void CvSVMKernel_ocl::calc( int vcount, const int row_idx, Qfloat* results, Mat& //int j; (this->*calc_func_ocl)( vcount, row_idx, results, src); -#if defined HAVE_CLAMDBLAS +// FIXIT #if defined HAVE_CLAMDBLAS const Qfloat max_val = (Qfloat)(FLT_MAX * 1e-3); int j; for( j = 0; j < vcount; j++ ) @@ -1063,7 +1059,7 @@ void CvSVMKernel_ocl::calc( int vcount, const int row_idx, Qfloat* results, Mat& results[j] = max_val; } } -#endif +// FIXIT #endif } bool CvSVMKernel_ocl::create( const CvSVMParams* _params, Calc_ocl _calc_func, Calc _calc_func1 ) { @@ -1115,7 +1111,7 @@ void CvSVMKernel_ocl::calc_non_rbf_base( int vcount, const int row_idx, Qfloat* } void CvSVMKernel_ocl::calc_rbf( int vcount, const int row_idx, Qfloat* results, Mat& src) { - if(!Context::getContext()->supportsFeature(Context::CL_DOUBLE)) + if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)) { for(int m = 0; m < vcount; m++) { @@ -1140,14 +1136,14 @@ void CvSVMKernel_ocl::calc_poly( int vcount, const int row_idx, Qfloat* results, calc_non_rbf_base( vcount, row_idx, results, src); -#if defined HAVE_CLAMDBLAS +//FIXIT #if defined HAVE_CLAMDBLAS CvMat R = cvMat( 1, vcount, QFLOAT_TYPE, results ); if( vcount > 0 ) { cvPow( &R, &R, params->degree ); } -#endif +//FIXIT #endif } @@ -1155,11 +1151,11 @@ void CvSVMKernel_ocl::calc_sigmoid( int vcount, const int row_idx, Qfloat* resul { calc_non_rbf_base( vcount, row_idx, results, src); // TODO: speedup this -#if defined HAVE_CLAMDBLAS +//FIXIT #if defined HAVE_CLAMDBLAS for(int j = 0; j < vcount; j++ ) { Qfloat t = results[j]; - double e = exp(-fabs(t)); + double e = ::exp(-fabs(t)); if( t > 0 ) { results[j] = (Qfloat)((1. - e) / (1. + e)); @@ -1169,7 +1165,7 @@ void CvSVMKernel_ocl::calc_sigmoid( int vcount, const int row_idx, Qfloat* resul results[j] = (Qfloat)((e - 1.) / (e + 1.)); } } -#endif +//FIXIT #endif } CvSVM_OCL::CvSVM_OCL() { @@ -1199,3 +1195,5 @@ void CvSVM_OCL::create_solver( ) { solver = new CvSVMSolver_ocl(¶ms); } + +} } diff --git a/modules/ocl/test/test_matrix_operation.cpp b/modules/ocl/test/test_matrix_operation.cpp index 46e077a6bb..bc8cdf2bb3 100644 --- a/modules/ocl/test/test_matrix_operation.cpp +++ b/modules/ocl/test/test_matrix_operation.cpp @@ -132,7 +132,7 @@ typedef ConvertToTestBase ConvertTo; TEST_P(ConvertTo, Accuracy) { if((src_depth == CV_64F || dst_depth == CV_64F) && - !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) { return; // returns silently } @@ -228,7 +228,7 @@ typedef CopyToTestBase CopyTo; TEST_P(CopyTo, Without_mask) { if((src.depth() == CV_64F) && - !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) { return; // returns silently } @@ -246,7 +246,7 @@ TEST_P(CopyTo, Without_mask) TEST_P(CopyTo, With_mask) { if(src.depth() == CV_64F && - !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) { return; // returns silently } @@ -342,7 +342,7 @@ typedef SetToTestBase SetTo; TEST_P(SetTo, Without_mask) { if(depth == CV_64F && - !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) { return; // returns silently } @@ -360,7 +360,7 @@ TEST_P(SetTo, Without_mask) TEST_P(SetTo, With_mask) { if(depth == CV_64F && - !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) { return; // returns silently } @@ -430,7 +430,7 @@ PARAM_TEST_CASE(convertC3C4, MatType, bool) TEST_P(convertC3C4, Accuracy) { if(depth == CV_64F && - !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::Context::CL_DOUBLE)) + !cv::ocl::Context::getContext()->supportsFeature(cv::ocl::FEATURE_CL_DOUBLE)) { return; // returns silently } From 7f0680fc8be6ae485df2aab0e02ea92616590801 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 3 Oct 2013 17:05:00 +0400 Subject: [PATCH 6/8] ocl: workaround for OpenCL C++ bindings usage: CL/cl.hpp --- .../cl_runtime_opencl11_wrappers.hpp | 231 +++++++++++++++ .../cl_runtime_opencl12_wrappers.hpp | 273 ++++++++++++++++++ modules/ocl/src/cl_context.cpp | 9 + modules/ocl/src/cl_programcache.cpp | 9 + .../ocl/src/cl_runtime/generator/common.py | 23 ++ .../ocl/src/cl_runtime/generator/parser_cl.py | 6 + .../cl_runtime_opencl_wrappers.hpp.in | 6 + 7 files changed, 557 insertions(+) create mode 100644 modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp create mode 100644 modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp create mode 100644 modules/ocl/src/cl_runtime/generator/template/cl_runtime_opencl_wrappers.hpp.in diff --git a/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp new file mode 100644 index 0000000000..2617272b08 --- /dev/null +++ b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp @@ -0,0 +1,231 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ +#define __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ + +// generated by parser_cl.py +#undef clGetPlatformIDs +#define clGetPlatformIDs clGetPlatformIDs_fn +inline cl_int clGetPlatformIDs(cl_uint p0, cl_platform_id* p1, cl_uint* p2) { return clGetPlatformIDs_pfn(p0, p1, p2); } +#undef clGetPlatformInfo +#define clGetPlatformInfo clGetPlatformInfo_fn +inline cl_int clGetPlatformInfo(cl_platform_id p0, cl_platform_info p1, size_t p2, void* p3, size_t* p4) { return clGetPlatformInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceIDs +#define clGetDeviceIDs clGetDeviceIDs_fn +inline cl_int clGetDeviceIDs(cl_platform_id p0, cl_device_type p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clGetDeviceIDs_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceInfo +#define clGetDeviceInfo clGetDeviceInfo_fn +inline cl_int clGetDeviceInfo(cl_device_id p0, cl_device_info p1, size_t p2, void* p3, size_t* p4) { return clGetDeviceInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateContext +#define clCreateContext clCreateContext_fn +inline cl_context clCreateContext(const cl_context_properties* p0, cl_uint p1, const cl_device_id* p2, void (CL_CALLBACK*p3) (const char*, const void*, size_t, void*), void* p4, cl_int* p5) { return clCreateContext_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateContextFromType +#define clCreateContextFromType clCreateContextFromType_fn +inline cl_context clCreateContextFromType(const cl_context_properties* p0, cl_device_type p1, void (CL_CALLBACK*p2) (const char*, const void*, size_t, void*), void* p3, cl_int* p4) { return clCreateContextFromType_pfn(p0, p1, p2, p3, p4); } +#undef clRetainContext +#define clRetainContext clRetainContext_fn +inline cl_int clRetainContext(cl_context p0) { return clRetainContext_pfn(p0); } +#undef clReleaseContext +#define clReleaseContext clReleaseContext_fn +inline cl_int clReleaseContext(cl_context p0) { return clReleaseContext_pfn(p0); } +#undef clGetContextInfo +#define clGetContextInfo clGetContextInfo_fn +inline cl_int clGetContextInfo(cl_context p0, cl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetContextInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateCommandQueue +#define clCreateCommandQueue clCreateCommandQueue_fn +inline cl_command_queue clCreateCommandQueue(cl_context p0, cl_device_id p1, cl_command_queue_properties p2, cl_int* p3) { return clCreateCommandQueue_pfn(p0, p1, p2, p3); } +#undef clRetainCommandQueue +#define clRetainCommandQueue clRetainCommandQueue_fn +inline cl_int clRetainCommandQueue(cl_command_queue p0) { return clRetainCommandQueue_pfn(p0); } +#undef clReleaseCommandQueue +#define clReleaseCommandQueue clReleaseCommandQueue_fn +inline cl_int clReleaseCommandQueue(cl_command_queue p0) { return clReleaseCommandQueue_pfn(p0); } +#undef clGetCommandQueueInfo +#define clGetCommandQueueInfo clGetCommandQueueInfo_fn +inline cl_int clGetCommandQueueInfo(cl_command_queue p0, cl_command_queue_info p1, size_t p2, void* p3, size_t* p4) { return clGetCommandQueueInfo_pfn(p0, p1, p2, p3, p4); } +#undef clSetCommandQueueProperty +#define clSetCommandQueueProperty clSetCommandQueueProperty_fn +inline cl_int clSetCommandQueueProperty(cl_command_queue p0, cl_command_queue_properties p1, cl_bool p2, cl_command_queue_properties* p3) { return clSetCommandQueueProperty_pfn(p0, p1, p2, p3); } +#undef clCreateBuffer +#define clCreateBuffer clCreateBuffer_fn +inline cl_mem clCreateBuffer(cl_context p0, cl_mem_flags p1, size_t p2, void* p3, cl_int* p4) { return clCreateBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSubBuffer +#define clCreateSubBuffer clCreateSubBuffer_fn +inline cl_mem clCreateSubBuffer(cl_mem p0, cl_mem_flags p1, cl_buffer_create_type p2, const void* p3, cl_int* p4) { return clCreateSubBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateImage2D +#define clCreateImage2D clCreateImage2D_fn +inline cl_mem clCreateImage2D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, void* p6, cl_int* p7) { return clCreateImage2D_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } +#undef clCreateImage3D +#define clCreateImage3D clCreateImage3D_fn +inline cl_mem clCreateImage3D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, size_t p6, size_t p7, void* p8, cl_int* p9) { return clCreateImage3D_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clRetainMemObject +#define clRetainMemObject clRetainMemObject_fn +inline cl_int clRetainMemObject(cl_mem p0) { return clRetainMemObject_pfn(p0); } +#undef clReleaseMemObject +#define clReleaseMemObject clReleaseMemObject_fn +inline cl_int clReleaseMemObject(cl_mem p0) { return clReleaseMemObject_pfn(p0); } +#undef clGetSupportedImageFormats +#define clGetSupportedImageFormats clGetSupportedImageFormats_fn +inline cl_int clGetSupportedImageFormats(cl_context p0, cl_mem_flags p1, cl_mem_object_type p2, cl_uint p3, cl_image_format* p4, cl_uint* p5) { return clGetSupportedImageFormats_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetMemObjectInfo +#define clGetMemObjectInfo clGetMemObjectInfo_fn +inline cl_int clGetMemObjectInfo(cl_mem p0, cl_mem_info p1, size_t p2, void* p3, size_t* p4) { return clGetMemObjectInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetImageInfo +#define clGetImageInfo clGetImageInfo_fn +inline cl_int clGetImageInfo(cl_mem p0, cl_image_info p1, size_t p2, void* p3, size_t* p4) { return clGetImageInfo_pfn(p0, p1, p2, p3, p4); } +#undef clSetMemObjectDestructorCallback +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_fn +inline cl_int clSetMemObjectDestructorCallback(cl_mem p0, void (CL_CALLBACK*p1) (cl_mem, void*), void* p2) { return clSetMemObjectDestructorCallback_pfn(p0, p1, p2); } +#undef clCreateSampler +#define clCreateSampler clCreateSampler_fn +inline cl_sampler clCreateSampler(cl_context p0, cl_bool p1, cl_addressing_mode p2, cl_filter_mode p3, cl_int* p4) { return clCreateSampler_pfn(p0, p1, p2, p3, p4); } +#undef clRetainSampler +#define clRetainSampler clRetainSampler_fn +inline cl_int clRetainSampler(cl_sampler p0) { return clRetainSampler_pfn(p0); } +#undef clReleaseSampler +#define clReleaseSampler clReleaseSampler_fn +inline cl_int clReleaseSampler(cl_sampler p0) { return clReleaseSampler_pfn(p0); } +#undef clGetSamplerInfo +#define clGetSamplerInfo clGetSamplerInfo_fn +inline cl_int clGetSamplerInfo(cl_sampler p0, cl_sampler_info p1, size_t p2, void* p3, size_t* p4) { return clGetSamplerInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateProgramWithSource +#define clCreateProgramWithSource clCreateProgramWithSource_fn +inline cl_program clCreateProgramWithSource(cl_context p0, cl_uint p1, const char** p2, const size_t* p3, cl_int* p4) { return clCreateProgramWithSource_pfn(p0, p1, p2, p3, p4); } +#undef clCreateProgramWithBinary +#define clCreateProgramWithBinary clCreateProgramWithBinary_fn +inline cl_program clCreateProgramWithBinary(cl_context p0, cl_uint p1, const cl_device_id* p2, const size_t* p3, const unsigned char** p4, cl_int* p5, cl_int* p6) { return clCreateProgramWithBinary_pfn(p0, p1, p2, p3, p4, p5, p6); } +#undef clRetainProgram +#define clRetainProgram clRetainProgram_fn +inline cl_int clRetainProgram(cl_program p0) { return clRetainProgram_pfn(p0); } +#undef clReleaseProgram +#define clReleaseProgram clReleaseProgram_fn +inline cl_int clReleaseProgram(cl_program p0) { return clReleaseProgram_pfn(p0); } +#undef clBuildProgram +#define clBuildProgram clBuildProgram_fn +inline cl_int clBuildProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, void (CL_CALLBACK*p4) (cl_program, void*), void* p5) { return clBuildProgram_pfn(p0, p1, p2, p3, p4, p5); } +#undef clUnloadCompiler +#define clUnloadCompiler clUnloadCompiler_fn +inline cl_int clUnloadCompiler() { return clUnloadCompiler_pfn(); } +#undef clGetProgramInfo +#define clGetProgramInfo clGetProgramInfo_fn +inline cl_int clGetProgramInfo(cl_program p0, cl_program_info p1, size_t p2, void* p3, size_t* p4) { return clGetProgramInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetProgramBuildInfo +#define clGetProgramBuildInfo clGetProgramBuildInfo_fn +inline cl_int clGetProgramBuildInfo(cl_program p0, cl_device_id p1, cl_program_build_info p2, size_t p3, void* p4, size_t* p5) { return clGetProgramBuildInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateKernel +#define clCreateKernel clCreateKernel_fn +inline cl_kernel clCreateKernel(cl_program p0, const char* p1, cl_int* p2) { return clCreateKernel_pfn(p0, p1, p2); } +#undef clCreateKernelsInProgram +#define clCreateKernelsInProgram clCreateKernelsInProgram_fn +inline cl_int clCreateKernelsInProgram(cl_program p0, cl_uint p1, cl_kernel* p2, cl_uint* p3) { return clCreateKernelsInProgram_pfn(p0, p1, p2, p3); } +#undef clRetainKernel +#define clRetainKernel clRetainKernel_fn +inline cl_int clRetainKernel(cl_kernel p0) { return clRetainKernel_pfn(p0); } +#undef clReleaseKernel +#define clReleaseKernel clReleaseKernel_fn +inline cl_int clReleaseKernel(cl_kernel p0) { return clReleaseKernel_pfn(p0); } +#undef clSetKernelArg +#define clSetKernelArg clSetKernelArg_fn +inline cl_int clSetKernelArg(cl_kernel p0, cl_uint p1, size_t p2, const void* p3) { return clSetKernelArg_pfn(p0, p1, p2, p3); } +#undef clGetKernelInfo +#define clGetKernelInfo clGetKernelInfo_fn +inline cl_int clGetKernelInfo(cl_kernel p0, cl_kernel_info p1, size_t p2, void* p3, size_t* p4) { return clGetKernelInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetKernelWorkGroupInfo +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_fn +inline cl_int clGetKernelWorkGroupInfo(cl_kernel p0, cl_device_id p1, cl_kernel_work_group_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelWorkGroupInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clWaitForEvents +#define clWaitForEvents clWaitForEvents_fn +inline cl_int clWaitForEvents(cl_uint p0, const cl_event* p1) { return clWaitForEvents_pfn(p0, p1); } +#undef clGetEventInfo +#define clGetEventInfo clGetEventInfo_fn +inline cl_int clGetEventInfo(cl_event p0, cl_event_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateUserEvent +#define clCreateUserEvent clCreateUserEvent_fn +inline cl_event clCreateUserEvent(cl_context p0, cl_int* p1) { return clCreateUserEvent_pfn(p0, p1); } +#undef clRetainEvent +#define clRetainEvent clRetainEvent_fn +inline cl_int clRetainEvent(cl_event p0) { return clRetainEvent_pfn(p0); } +#undef clReleaseEvent +#define clReleaseEvent clReleaseEvent_fn +inline cl_int clReleaseEvent(cl_event p0) { return clReleaseEvent_pfn(p0); } +#undef clSetUserEventStatus +#define clSetUserEventStatus clSetUserEventStatus_fn +inline cl_int clSetUserEventStatus(cl_event p0, cl_int p1) { return clSetUserEventStatus_pfn(p0, p1); } +#undef clSetEventCallback +#define clSetEventCallback clSetEventCallback_fn +inline cl_int clSetEventCallback(cl_event p0, cl_int p1, void (CL_CALLBACK*p2) (cl_event, cl_int, void*), void* p3) { return clSetEventCallback_pfn(p0, p1, p2, p3); } +#undef clGetEventProfilingInfo +#define clGetEventProfilingInfo clGetEventProfilingInfo_fn +inline cl_int clGetEventProfilingInfo(cl_event p0, cl_profiling_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventProfilingInfo_pfn(p0, p1, p2, p3, p4); } +#undef clFlush +#define clFlush clFlush_fn +inline cl_int clFlush(cl_command_queue p0) { return clFlush_pfn(p0); } +#undef clFinish +#define clFinish clFinish_fn +inline cl_int clFinish(cl_command_queue p0) { return clFinish_pfn(p0); } +#undef clEnqueueReadBuffer +#define clEnqueueReadBuffer clEnqueueReadBuffer_fn +inline cl_int clEnqueueReadBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueReadBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueReadBufferRect +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_fn +inline cl_int clEnqueueReadBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueReadBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueWriteBuffer +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_fn +inline cl_int clEnqueueWriteBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, const void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueWriteBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueWriteBufferRect +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_fn +inline cl_int clEnqueueWriteBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, const void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueWriteBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueCopyBuffer +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_fn +inline cl_int clEnqueueCopyBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyBufferRect +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_fn +inline cl_int clEnqueueCopyBufferRect(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, cl_uint p10, const cl_event* p11, cl_event* p12) { return clEnqueueCopyBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } +#undef clEnqueueReadImage +#define clEnqueueReadImage clEnqueueReadImage_fn +inline cl_int clEnqueueReadImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueReadImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clEnqueueWriteImage +#define clEnqueueWriteImage clEnqueueWriteImage_fn +inline cl_int clEnqueueWriteImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, const void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueWriteImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clEnqueueCopyImage +#define clEnqueueCopyImage clEnqueueCopyImage_fn +inline cl_int clEnqueueCopyImage(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyImageToBuffer +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_fn +inline cl_int clEnqueueCopyImageToBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImageToBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyBufferToImage +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_fn +inline cl_int clEnqueueCopyBufferToImage(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBufferToImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueMapBuffer +#define clEnqueueMapBuffer clEnqueueMapBuffer_fn +inline void* clEnqueueMapBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8, cl_int* p9) { return clEnqueueMapBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueMapImage +#define clEnqueueMapImage clEnqueueMapImage_fn +inline void* clEnqueueMapImage(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, const size_t* p4, const size_t* p5, size_t* p6, size_t* p7, cl_uint p8, const cl_event* p9, cl_event* p10, cl_int* p11) { return clEnqueueMapImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } +#undef clEnqueueUnmapMemObject +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_fn +inline cl_int clEnqueueUnmapMemObject(cl_command_queue p0, cl_mem p1, void* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueUnmapMemObject_pfn(p0, p1, p2, p3, p4, p5); } +#undef clEnqueueNDRangeKernel +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_fn +inline cl_int clEnqueueNDRangeKernel(cl_command_queue p0, cl_kernel p1, cl_uint p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueNDRangeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueTask +#define clEnqueueTask clEnqueueTask_fn +inline cl_int clEnqueueTask(cl_command_queue p0, cl_kernel p1, cl_uint p2, const cl_event* p3, cl_event* p4) { return clEnqueueTask_pfn(p0, p1, p2, p3, p4); } +#undef clEnqueueNativeKernel +#define clEnqueueNativeKernel clEnqueueNativeKernel_fn +inline cl_int clEnqueueNativeKernel(cl_command_queue p0, void (CL_CALLBACK*p1) (void*), void* p2, size_t p3, cl_uint p4, const cl_mem* p5, const void** p6, cl_uint p7, const cl_event* p8, cl_event* p9) { return clEnqueueNativeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueMarker +#define clEnqueueMarker clEnqueueMarker_fn +inline cl_int clEnqueueMarker(cl_command_queue p0, cl_event* p1) { return clEnqueueMarker_pfn(p0, p1); } +#undef clEnqueueWaitForEvents +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_fn +inline cl_int clEnqueueWaitForEvents(cl_command_queue p0, cl_uint p1, const cl_event* p2) { return clEnqueueWaitForEvents_pfn(p0, p1, p2); } +#undef clEnqueueBarrier +#define clEnqueueBarrier clEnqueueBarrier_fn +inline cl_int clEnqueueBarrier(cl_command_queue p0) { return clEnqueueBarrier_pfn(p0); } +#undef clGetExtensionFunctionAddress +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_fn +inline void* clGetExtensionFunctionAddress(const char* p0) { return clGetExtensionFunctionAddress_pfn(p0); } + +#endif // __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ \ No newline at end of file diff --git a/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp new file mode 100644 index 0000000000..8716450e27 --- /dev/null +++ b/modules/ocl/include/opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp @@ -0,0 +1,273 @@ +// +// AUTOGENERATED, DO NOT EDIT +// +#ifndef __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ +#define __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ + +// generated by parser_cl.py +#undef clGetPlatformIDs +#define clGetPlatformIDs clGetPlatformIDs_fn +inline cl_int clGetPlatformIDs(cl_uint p0, cl_platform_id* p1, cl_uint* p2) { return clGetPlatformIDs_pfn(p0, p1, p2); } +#undef clGetPlatformInfo +#define clGetPlatformInfo clGetPlatformInfo_fn +inline cl_int clGetPlatformInfo(cl_platform_id p0, cl_platform_info p1, size_t p2, void* p3, size_t* p4) { return clGetPlatformInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceIDs +#define clGetDeviceIDs clGetDeviceIDs_fn +inline cl_int clGetDeviceIDs(cl_platform_id p0, cl_device_type p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clGetDeviceIDs_pfn(p0, p1, p2, p3, p4); } +#undef clGetDeviceInfo +#define clGetDeviceInfo clGetDeviceInfo_fn +inline cl_int clGetDeviceInfo(cl_device_id p0, cl_device_info p1, size_t p2, void* p3, size_t* p4) { return clGetDeviceInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSubDevices +#define clCreateSubDevices clCreateSubDevices_fn +inline cl_int clCreateSubDevices(cl_device_id p0, const cl_device_partition_property* p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clCreateSubDevices_pfn(p0, p1, p2, p3, p4); } +#undef clRetainDevice +#define clRetainDevice clRetainDevice_fn +inline cl_int clRetainDevice(cl_device_id p0) { return clRetainDevice_pfn(p0); } +#undef clReleaseDevice +#define clReleaseDevice clReleaseDevice_fn +inline cl_int clReleaseDevice(cl_device_id p0) { return clReleaseDevice_pfn(p0); } +#undef clCreateContext +#define clCreateContext clCreateContext_fn +inline cl_context clCreateContext(const cl_context_properties* p0, cl_uint p1, const cl_device_id* p2, void (CL_CALLBACK*p3) (const char*, const void*, size_t, void*), void* p4, cl_int* p5) { return clCreateContext_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateContextFromType +#define clCreateContextFromType clCreateContextFromType_fn +inline cl_context clCreateContextFromType(const cl_context_properties* p0, cl_device_type p1, void (CL_CALLBACK*p2) (const char*, const void*, size_t, void*), void* p3, cl_int* p4) { return clCreateContextFromType_pfn(p0, p1, p2, p3, p4); } +#undef clRetainContext +#define clRetainContext clRetainContext_fn +inline cl_int clRetainContext(cl_context p0) { return clRetainContext_pfn(p0); } +#undef clReleaseContext +#define clReleaseContext clReleaseContext_fn +inline cl_int clReleaseContext(cl_context p0) { return clReleaseContext_pfn(p0); } +#undef clGetContextInfo +#define clGetContextInfo clGetContextInfo_fn +inline cl_int clGetContextInfo(cl_context p0, cl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetContextInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateCommandQueue +#define clCreateCommandQueue clCreateCommandQueue_fn +inline cl_command_queue clCreateCommandQueue(cl_context p0, cl_device_id p1, cl_command_queue_properties p2, cl_int* p3) { return clCreateCommandQueue_pfn(p0, p1, p2, p3); } +#undef clRetainCommandQueue +#define clRetainCommandQueue clRetainCommandQueue_fn +inline cl_int clRetainCommandQueue(cl_command_queue p0) { return clRetainCommandQueue_pfn(p0); } +#undef clReleaseCommandQueue +#define clReleaseCommandQueue clReleaseCommandQueue_fn +inline cl_int clReleaseCommandQueue(cl_command_queue p0) { return clReleaseCommandQueue_pfn(p0); } +#undef clGetCommandQueueInfo +#define clGetCommandQueueInfo clGetCommandQueueInfo_fn +inline cl_int clGetCommandQueueInfo(cl_command_queue p0, cl_command_queue_info p1, size_t p2, void* p3, size_t* p4) { return clGetCommandQueueInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateBuffer +#define clCreateBuffer clCreateBuffer_fn +inline cl_mem clCreateBuffer(cl_context p0, cl_mem_flags p1, size_t p2, void* p3, cl_int* p4) { return clCreateBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateSubBuffer +#define clCreateSubBuffer clCreateSubBuffer_fn +inline cl_mem clCreateSubBuffer(cl_mem p0, cl_mem_flags p1, cl_buffer_create_type p2, const void* p3, cl_int* p4) { return clCreateSubBuffer_pfn(p0, p1, p2, p3, p4); } +#undef clCreateImage +#define clCreateImage clCreateImage_fn +inline cl_mem clCreateImage(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, const cl_image_desc* p3, void* p4, cl_int* p5) { return clCreateImage_pfn(p0, p1, p2, p3, p4, p5); } +#undef clRetainMemObject +#define clRetainMemObject clRetainMemObject_fn +inline cl_int clRetainMemObject(cl_mem p0) { return clRetainMemObject_pfn(p0); } +#undef clReleaseMemObject +#define clReleaseMemObject clReleaseMemObject_fn +inline cl_int clReleaseMemObject(cl_mem p0) { return clReleaseMemObject_pfn(p0); } +#undef clGetSupportedImageFormats +#define clGetSupportedImageFormats clGetSupportedImageFormats_fn +inline cl_int clGetSupportedImageFormats(cl_context p0, cl_mem_flags p1, cl_mem_object_type p2, cl_uint p3, cl_image_format* p4, cl_uint* p5) { return clGetSupportedImageFormats_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetMemObjectInfo +#define clGetMemObjectInfo clGetMemObjectInfo_fn +inline cl_int clGetMemObjectInfo(cl_mem p0, cl_mem_info p1, size_t p2, void* p3, size_t* p4) { return clGetMemObjectInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetImageInfo +#define clGetImageInfo clGetImageInfo_fn +inline cl_int clGetImageInfo(cl_mem p0, cl_image_info p1, size_t p2, void* p3, size_t* p4) { return clGetImageInfo_pfn(p0, p1, p2, p3, p4); } +#undef clSetMemObjectDestructorCallback +#define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_fn +inline cl_int clSetMemObjectDestructorCallback(cl_mem p0, void (CL_CALLBACK*p1) (cl_mem, void*), void* p2) { return clSetMemObjectDestructorCallback_pfn(p0, p1, p2); } +#undef clCreateSampler +#define clCreateSampler clCreateSampler_fn +inline cl_sampler clCreateSampler(cl_context p0, cl_bool p1, cl_addressing_mode p2, cl_filter_mode p3, cl_int* p4) { return clCreateSampler_pfn(p0, p1, p2, p3, p4); } +#undef clRetainSampler +#define clRetainSampler clRetainSampler_fn +inline cl_int clRetainSampler(cl_sampler p0) { return clRetainSampler_pfn(p0); } +#undef clReleaseSampler +#define clReleaseSampler clReleaseSampler_fn +inline cl_int clReleaseSampler(cl_sampler p0) { return clReleaseSampler_pfn(p0); } +#undef clGetSamplerInfo +#define clGetSamplerInfo clGetSamplerInfo_fn +inline cl_int clGetSamplerInfo(cl_sampler p0, cl_sampler_info p1, size_t p2, void* p3, size_t* p4) { return clGetSamplerInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateProgramWithSource +#define clCreateProgramWithSource clCreateProgramWithSource_fn +inline cl_program clCreateProgramWithSource(cl_context p0, cl_uint p1, const char** p2, const size_t* p3, cl_int* p4) { return clCreateProgramWithSource_pfn(p0, p1, p2, p3, p4); } +#undef clCreateProgramWithBinary +#define clCreateProgramWithBinary clCreateProgramWithBinary_fn +inline cl_program clCreateProgramWithBinary(cl_context p0, cl_uint p1, const cl_device_id* p2, const size_t* p3, const unsigned char** p4, cl_int* p5, cl_int* p6) { return clCreateProgramWithBinary_pfn(p0, p1, p2, p3, p4, p5, p6); } +#undef clCreateProgramWithBuiltInKernels +#define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_fn +inline cl_program clCreateProgramWithBuiltInKernels(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_int* p4) { return clCreateProgramWithBuiltInKernels_pfn(p0, p1, p2, p3, p4); } +#undef clRetainProgram +#define clRetainProgram clRetainProgram_fn +inline cl_int clRetainProgram(cl_program p0) { return clRetainProgram_pfn(p0); } +#undef clReleaseProgram +#define clReleaseProgram clReleaseProgram_fn +inline cl_int clReleaseProgram(cl_program p0) { return clReleaseProgram_pfn(p0); } +#undef clBuildProgram +#define clBuildProgram clBuildProgram_fn +inline cl_int clBuildProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, void (CL_CALLBACK*p4) (cl_program, void*), void* p5) { return clBuildProgram_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCompileProgram +#define clCompileProgram clCompileProgram_fn +inline cl_int clCompileProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, const char** p6, void (CL_CALLBACK*p7) (cl_program, void*), void* p8) { return clCompileProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clLinkProgram +#define clLinkProgram clLinkProgram_fn +inline cl_program clLinkProgram(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, void (CL_CALLBACK*p6) (cl_program, void*), void* p7, cl_int* p8) { return clLinkProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clUnloadPlatformCompiler +#define clUnloadPlatformCompiler clUnloadPlatformCompiler_fn +inline cl_int clUnloadPlatformCompiler(cl_platform_id p0) { return clUnloadPlatformCompiler_pfn(p0); } +#undef clGetProgramInfo +#define clGetProgramInfo clGetProgramInfo_fn +inline cl_int clGetProgramInfo(cl_program p0, cl_program_info p1, size_t p2, void* p3, size_t* p4) { return clGetProgramInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetProgramBuildInfo +#define clGetProgramBuildInfo clGetProgramBuildInfo_fn +inline cl_int clGetProgramBuildInfo(cl_program p0, cl_device_id p1, cl_program_build_info p2, size_t p3, void* p4, size_t* p5) { return clGetProgramBuildInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clCreateKernel +#define clCreateKernel clCreateKernel_fn +inline cl_kernel clCreateKernel(cl_program p0, const char* p1, cl_int* p2) { return clCreateKernel_pfn(p0, p1, p2); } +#undef clCreateKernelsInProgram +#define clCreateKernelsInProgram clCreateKernelsInProgram_fn +inline cl_int clCreateKernelsInProgram(cl_program p0, cl_uint p1, cl_kernel* p2, cl_uint* p3) { return clCreateKernelsInProgram_pfn(p0, p1, p2, p3); } +#undef clRetainKernel +#define clRetainKernel clRetainKernel_fn +inline cl_int clRetainKernel(cl_kernel p0) { return clRetainKernel_pfn(p0); } +#undef clReleaseKernel +#define clReleaseKernel clReleaseKernel_fn +inline cl_int clReleaseKernel(cl_kernel p0) { return clReleaseKernel_pfn(p0); } +#undef clSetKernelArg +#define clSetKernelArg clSetKernelArg_fn +inline cl_int clSetKernelArg(cl_kernel p0, cl_uint p1, size_t p2, const void* p3) { return clSetKernelArg_pfn(p0, p1, p2, p3); } +#undef clGetKernelInfo +#define clGetKernelInfo clGetKernelInfo_fn +inline cl_int clGetKernelInfo(cl_kernel p0, cl_kernel_info p1, size_t p2, void* p3, size_t* p4) { return clGetKernelInfo_pfn(p0, p1, p2, p3, p4); } +#undef clGetKernelArgInfo +#define clGetKernelArgInfo clGetKernelArgInfo_fn +inline cl_int clGetKernelArgInfo(cl_kernel p0, cl_uint p1, cl_kernel_arg_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelArgInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clGetKernelWorkGroupInfo +#define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_fn +inline cl_int clGetKernelWorkGroupInfo(cl_kernel p0, cl_device_id p1, cl_kernel_work_group_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelWorkGroupInfo_pfn(p0, p1, p2, p3, p4, p5); } +#undef clWaitForEvents +#define clWaitForEvents clWaitForEvents_fn +inline cl_int clWaitForEvents(cl_uint p0, const cl_event* p1) { return clWaitForEvents_pfn(p0, p1); } +#undef clGetEventInfo +#define clGetEventInfo clGetEventInfo_fn +inline cl_int clGetEventInfo(cl_event p0, cl_event_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventInfo_pfn(p0, p1, p2, p3, p4); } +#undef clCreateUserEvent +#define clCreateUserEvent clCreateUserEvent_fn +inline cl_event clCreateUserEvent(cl_context p0, cl_int* p1) { return clCreateUserEvent_pfn(p0, p1); } +#undef clRetainEvent +#define clRetainEvent clRetainEvent_fn +inline cl_int clRetainEvent(cl_event p0) { return clRetainEvent_pfn(p0); } +#undef clReleaseEvent +#define clReleaseEvent clReleaseEvent_fn +inline cl_int clReleaseEvent(cl_event p0) { return clReleaseEvent_pfn(p0); } +#undef clSetUserEventStatus +#define clSetUserEventStatus clSetUserEventStatus_fn +inline cl_int clSetUserEventStatus(cl_event p0, cl_int p1) { return clSetUserEventStatus_pfn(p0, p1); } +#undef clSetEventCallback +#define clSetEventCallback clSetEventCallback_fn +inline cl_int clSetEventCallback(cl_event p0, cl_int p1, void (CL_CALLBACK*p2) (cl_event, cl_int, void*), void* p3) { return clSetEventCallback_pfn(p0, p1, p2, p3); } +#undef clGetEventProfilingInfo +#define clGetEventProfilingInfo clGetEventProfilingInfo_fn +inline cl_int clGetEventProfilingInfo(cl_event p0, cl_profiling_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventProfilingInfo_pfn(p0, p1, p2, p3, p4); } +#undef clFlush +#define clFlush clFlush_fn +inline cl_int clFlush(cl_command_queue p0) { return clFlush_pfn(p0); } +#undef clFinish +#define clFinish clFinish_fn +inline cl_int clFinish(cl_command_queue p0) { return clFinish_pfn(p0); } +#undef clEnqueueReadBuffer +#define clEnqueueReadBuffer clEnqueueReadBuffer_fn +inline cl_int clEnqueueReadBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueReadBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueReadBufferRect +#define clEnqueueReadBufferRect clEnqueueReadBufferRect_fn +inline cl_int clEnqueueReadBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueReadBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueWriteBuffer +#define clEnqueueWriteBuffer clEnqueueWriteBuffer_fn +inline cl_int clEnqueueWriteBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, const void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueWriteBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueWriteBufferRect +#define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_fn +inline cl_int clEnqueueWriteBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, const void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueWriteBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); } +#undef clEnqueueFillBuffer +#define clEnqueueFillBuffer clEnqueueFillBuffer_fn +inline cl_int clEnqueueFillBuffer(cl_command_queue p0, cl_mem p1, const void* p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueFillBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyBuffer +#define clEnqueueCopyBuffer clEnqueueCopyBuffer_fn +inline cl_int clEnqueueCopyBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyBufferRect +#define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_fn +inline cl_int clEnqueueCopyBufferRect(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, cl_uint p10, const cl_event* p11, cl_event* p12) { return clEnqueueCopyBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); } +#undef clEnqueueReadImage +#define clEnqueueReadImage clEnqueueReadImage_fn +inline cl_int clEnqueueReadImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueReadImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clEnqueueWriteImage +#define clEnqueueWriteImage clEnqueueWriteImage_fn +inline cl_int clEnqueueWriteImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, const void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueWriteImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); } +#undef clEnqueueFillImage +#define clEnqueueFillImage clEnqueueFillImage_fn +inline cl_int clEnqueueFillImage(cl_command_queue p0, cl_mem p1, const void* p2, const size_t* p3, const size_t* p4, cl_uint p5, const cl_event* p6, cl_event* p7) { return clEnqueueFillImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } +#undef clEnqueueCopyImage +#define clEnqueueCopyImage clEnqueueCopyImage_fn +inline cl_int clEnqueueCopyImage(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyImageToBuffer +#define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_fn +inline cl_int clEnqueueCopyImageToBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImageToBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueCopyBufferToImage +#define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_fn +inline cl_int clEnqueueCopyBufferToImage(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBufferToImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueMapBuffer +#define clEnqueueMapBuffer clEnqueueMapBuffer_fn +inline void* clEnqueueMapBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8, cl_int* p9) { return clEnqueueMapBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueMapImage +#define clEnqueueMapImage clEnqueueMapImage_fn +inline void* clEnqueueMapImage(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, const size_t* p4, const size_t* p5, size_t* p6, size_t* p7, cl_uint p8, const cl_event* p9, cl_event* p10, cl_int* p11) { return clEnqueueMapImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); } +#undef clEnqueueUnmapMemObject +#define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_fn +inline cl_int clEnqueueUnmapMemObject(cl_command_queue p0, cl_mem p1, void* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueUnmapMemObject_pfn(p0, p1, p2, p3, p4, p5); } +#undef clEnqueueMigrateMemObjects +#define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_fn +inline cl_int clEnqueueMigrateMemObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_mem_migration_flags p3, cl_uint p4, const cl_event* p5, cl_event* p6) { return clEnqueueMigrateMemObjects_pfn(p0, p1, p2, p3, p4, p5, p6); } +#undef clEnqueueNDRangeKernel +#define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_fn +inline cl_int clEnqueueNDRangeKernel(cl_command_queue p0, cl_kernel p1, cl_uint p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueNDRangeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); } +#undef clEnqueueTask +#define clEnqueueTask clEnqueueTask_fn +inline cl_int clEnqueueTask(cl_command_queue p0, cl_kernel p1, cl_uint p2, const cl_event* p3, cl_event* p4) { return clEnqueueTask_pfn(p0, p1, p2, p3, p4); } +#undef clEnqueueNativeKernel +#define clEnqueueNativeKernel clEnqueueNativeKernel_fn +inline cl_int clEnqueueNativeKernel(cl_command_queue p0, void (CL_CALLBACK*p1) (void*), void* p2, size_t p3, cl_uint p4, const cl_mem* p5, const void** p6, cl_uint p7, const cl_event* p8, cl_event* p9) { return clEnqueueNativeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueMarkerWithWaitList +#define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_fn +inline cl_int clEnqueueMarkerWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueMarkerWithWaitList_pfn(p0, p1, p2, p3); } +#undef clEnqueueBarrierWithWaitList +#define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_fn +inline cl_int clEnqueueBarrierWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueBarrierWithWaitList_pfn(p0, p1, p2, p3); } +#undef clGetExtensionFunctionAddressForPlatform +#define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_fn +inline void* clGetExtensionFunctionAddressForPlatform(cl_platform_id p0, const char* p1) { return clGetExtensionFunctionAddressForPlatform_pfn(p0, p1); } +#undef clCreateImage2D +#define clCreateImage2D clCreateImage2D_fn +inline cl_mem clCreateImage2D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, void* p6, cl_int* p7) { return clCreateImage2D_pfn(p0, p1, p2, p3, p4, p5, p6, p7); } +#undef clCreateImage3D +#define clCreateImage3D clCreateImage3D_fn +inline cl_mem clCreateImage3D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, size_t p6, size_t p7, void* p8, cl_int* p9) { return clCreateImage3D_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } +#undef clEnqueueMarker +#define clEnqueueMarker clEnqueueMarker_fn +inline cl_int clEnqueueMarker(cl_command_queue p0, cl_event* p1) { return clEnqueueMarker_pfn(p0, p1); } +#undef clEnqueueWaitForEvents +#define clEnqueueWaitForEvents clEnqueueWaitForEvents_fn +inline cl_int clEnqueueWaitForEvents(cl_command_queue p0, cl_uint p1, const cl_event* p2) { return clEnqueueWaitForEvents_pfn(p0, p1, p2); } +#undef clEnqueueBarrier +#define clEnqueueBarrier clEnqueueBarrier_fn +inline cl_int clEnqueueBarrier(cl_command_queue p0) { return clEnqueueBarrier_pfn(p0); } +#undef clUnloadCompiler +#define clUnloadCompiler clUnloadCompiler_fn +inline cl_int clUnloadCompiler() { return clUnloadCompiler_pfn(); } +#undef clGetExtensionFunctionAddress +#define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_fn +inline void* clGetExtensionFunctionAddress(const char* p0) { return clGetExtensionFunctionAddress_pfn(p0); } + +#endif // __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ \ No newline at end of file diff --git a/modules/ocl/src/cl_context.cpp b/modules/ocl/src/cl_context.cpp index e24cc8b358..01785eaa26 100644 --- a/modules/ocl/src/cl_context.cpp +++ b/modules/ocl/src/cl_context.cpp @@ -50,6 +50,15 @@ #include #include "cl_programcache.hpp" +// workaround for OpenCL C++ bindings +#if defined(HAVE_OPENCL12) +#include "opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp" +#elif defined(HAVE_OPENCL11) +#include "opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp" +#else +#error Invalid OpenCL configuration +#endif + #if defined _MSC_VER && _MSC_VER >= 1200 #pragma warning( disable: 4100 4101 4127 4244 4267 4510 4512 4610) #endif diff --git a/modules/ocl/src/cl_programcache.cpp b/modules/ocl/src/cl_programcache.cpp index 7c58e7c489..a34f828557 100644 --- a/modules/ocl/src/cl_programcache.cpp +++ b/modules/ocl/src/cl_programcache.cpp @@ -50,6 +50,15 @@ #include #include "cl_programcache.hpp" +// workaround for OpenCL C++ bindings +#if defined(HAVE_OPENCL12) +#include "opencv2/ocl/cl_runtime/cl_runtime_opencl12_wrappers.hpp" +#elif defined(HAVE_OPENCL11) +#include "opencv2/ocl/cl_runtime/cl_runtime_opencl11_wrappers.hpp" +#else +#error Invalid OpenCL configuration +#endif + #if defined _MSC_VER && _MSC_VER >= 1200 # pragma warning( disable: 4100 4244 4267 4510 4512 4610) #endif diff --git a/modules/ocl/src/cl_runtime/generator/common.py b/modules/ocl/src/cl_runtime/generator/common.py index 99a56096b0..19b21454c8 100644 --- a/modules/ocl/src/cl_runtime/generator/common.py +++ b/modules/ocl/src/cl_runtime/generator/common.py @@ -182,6 +182,29 @@ def generateTemplates(sz, lprefix, switch_name, calling_convention=''): print '};' print '' +@outputToString +def generateInlineWrappers(fns): + print '// generated by %s' % os.path.basename(sys.argv[0]) + for fn in fns: + print '#undef %s' % (fn['name']) + print '#define %s %s_fn' % (fn['name'], fn['name']) + params = [] + call_params = [] + for i in range(0, len(fn['params'])): + t = fn['params'][i] + if t.find('*)') >= 0: + p = re.sub(r'\*\)', (' *p%d)' % i), t, 1) + params.append(p) + else: + params.append('%s p%d' % (t, i)) + call_params.append('p%d' % (i)) + + if len(fn['ret']) == 1 and fn['ret'][0] == 'void': + print 'inline void %s(%s) { %s_pfn(%s); }' \ + % (fn['name'], ', '.join(params), fn['name'], ', '.join(call_params)) + else: + print 'inline %s %s(%s) { return %s_pfn(%s); }' \ + % (' '.join(fn['ret']), fn['name'], ', '.join(params), fn['name'], ', '.join(call_params)) def ProcessTemplate(inputFile, ctx, noteLine='//\n// AUTOGENERATED, DO NOT EDIT\n//'): f = open(inputFile, "r") diff --git a/modules/ocl/src/cl_runtime/generator/parser_cl.py b/modules/ocl/src/cl_runtime/generator/parser_cl.py index e711e4cf09..608b826183 100644 --- a/modules/ocl/src/cl_runtime/generator/parser_cl.py +++ b/modules/ocl/src/cl_runtime/generator/parser_cl.py @@ -10,6 +10,7 @@ try: if len(sys.argv) > 1: outfile = open('../../../include/opencv2/ocl/cl_runtime/' + sys.argv[1] + '.hpp', "w") outfile_impl = open('../' + sys.argv[1] + '_impl.hpp', "w") + outfile_wrappers = open('../../../include/opencv2/ocl/cl_runtime/' + sys.argv[1] + '_wrappers.hpp', "w") if len(sys.argv) > 2: f = open(sys.argv[2], "r") else: @@ -102,6 +103,11 @@ ctx['CL_FN_DECLARATIONS'] = generateFnDeclaration(fns) sys.stdout = outfile ProcessTemplate('template/cl_runtime_opencl.hpp.in', ctx) +ctx['CL_FN_INLINE_WRAPPERS'] = generateInlineWrappers(fns) + +sys.stdout = outfile_wrappers +ProcessTemplate('template/cl_runtime_opencl_wrappers.hpp.in', ctx) + ctx['CL_FN_ENUMS'] = generateEnums(fns) ctx['CL_FN_NAMES'] = generateNames(fns) ctx['CL_FN_DEFINITIONS'] = generateFnDefinition(fns) diff --git a/modules/ocl/src/cl_runtime/generator/template/cl_runtime_opencl_wrappers.hpp.in b/modules/ocl/src/cl_runtime/generator/template/cl_runtime_opencl_wrappers.hpp.in new file mode 100644 index 0000000000..d02d4c5ff2 --- /dev/null +++ b/modules/ocl/src/cl_runtime/generator/template/cl_runtime_opencl_wrappers.hpp.in @@ -0,0 +1,6 @@ +#ifndef __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ +#define __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ + +@CL_FN_INLINE_WRAPPERS@ + +#endif // __OPENCV_OCL_CL_RUNTIME_OPENCL_WRAPPERS_HPP__ \ No newline at end of file From 69c2ef5ed21255c2e67b143b3adc9500a87a1119 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 3 Oct 2013 19:35:01 +0400 Subject: [PATCH 7/8] ocl: update ocl samples --- samples/gpu/super_resolution.cpp | 8 -------- samples/ocl/adaptive_bilateral_filter.cpp | 5 +---- samples/ocl/bgfg_segm.cpp | 5 +---- samples/ocl/clahe.cpp | 3 --- samples/ocl/facedetect.cpp | 12 +----------- samples/ocl/hog.cpp | 2 -- samples/ocl/pyrlk_optical_flow.cpp | 9 +-------- samples/ocl/squares.cpp | 4 +--- samples/ocl/stereo_match.cpp | 14 ++------------ samples/ocl/surf_matcher.cpp | 12 ++---------- samples/ocl/tvl1_optical_flow.cpp | 9 +-------- 11 files changed, 10 insertions(+), 73 deletions(-) diff --git a/samples/gpu/super_resolution.cpp b/samples/gpu/super_resolution.cpp index 07dda775b4..435e711a1a 100644 --- a/samples/gpu/super_resolution.cpp +++ b/samples/gpu/super_resolution.cpp @@ -132,17 +132,9 @@ int main(int argc, const char* argv[]) } #endif #if defined(HAVE_OPENCV_OCL) - std::vectorinfo; if(useCuda) { CV_Assert(!useOcl); - info.clear(); - } - - if(useOcl) - { - CV_Assert(!useCuda); - cv::ocl::getDevice(info); } #endif Ptr superRes; diff --git a/samples/ocl/adaptive_bilateral_filter.cpp b/samples/ocl/adaptive_bilateral_filter.cpp index df226b195d..d3d2521df9 100644 --- a/samples/ocl/adaptive_bilateral_filter.cpp +++ b/samples/ocl/adaptive_bilateral_filter.cpp @@ -25,9 +25,6 @@ int main( int argc, const char** argv ) return -1; } - std::vector infos; - ocl::getDevice(infos); - ocl::oclMat dsrc(src), dABFilter, dBFilter; Size ksize(ks, ks); @@ -48,4 +45,4 @@ int main( int argc, const char** argv ) waitKey(); return 0; -} \ No newline at end of file +} diff --git a/samples/ocl/bgfg_segm.cpp b/samples/ocl/bgfg_segm.cpp index 410f346936..589a34914f 100644 --- a/samples/ocl/bgfg_segm.cpp +++ b/samples/ocl/bgfg_segm.cpp @@ -24,7 +24,7 @@ int main(int argc, const char** argv) if (cmd.get("help")) { cout << "Usage : bgfg_segm [options]" << endl; - cout << "Avaible options:" << endl; + cout << "Available options:" << endl; cmd.printParams(); return 0; } @@ -54,9 +54,6 @@ int main(int argc, const char** argv) return -1; } - std::vectorinfo; - cv::ocl::getDevice(info); - Mat frame; cap >> frame; diff --git a/samples/ocl/clahe.cpp b/samples/ocl/clahe.cpp index 1fbf49fac2..5dc20756b4 100644 --- a/samples/ocl/clahe.cpp +++ b/samples/ocl/clahe.cpp @@ -45,9 +45,6 @@ int main(int argc, char** argv) createTrackbar("Tile Size", "CLAHE", &tilesize, 32, (TrackbarCallback)TSize_Callback); createTrackbar("Clip Limit", "CLAHE", &cliplimit, 20, (TrackbarCallback)Clip_Callback); - vector info; - CV_Assert(ocl::getDevice(info)); - Mat frame, outframe; ocl::oclMat d_outframe; diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp index 711e257e78..be61b79e44 100644 --- a/samples/ocl/facedetect.cpp +++ b/samples/ocl/facedetect.cpp @@ -72,7 +72,7 @@ int main( int argc, const char** argv ) CommandLineParser cmd(argc, argv, keys); if (cmd.get("help")) { - cout << "Avaible options:" << endl; + cout << "Available options:" << endl; cmd.printParams(); return 0; } @@ -120,16 +120,6 @@ int main( int argc, const char** argv ) cvNamedWindow( "result", 1 ); - vector oclinfo; - int devnums = ocl::getDevice(oclinfo); - if( devnums < 1 ) - { - std::cout << "no device found\n"; - return -1; - } - //if you want to use undefault device, set it here - //setDevice(oclinfo[0]); - ocl::setBinpath("./"); if( capture ) { cout << "In capture ..." << endl; diff --git a/samples/ocl/hog.cpp b/samples/ocl/hog.cpp index ec88c14d23..89c8dff828 100644 --- a/samples/ocl/hog.cpp +++ b/samples/ocl/hog.cpp @@ -135,8 +135,6 @@ App::App(CommandLineParser& cmd) void App::run() { - vector oclinfo; - ocl::getDevice(oclinfo); running = true; VideoWriter video_writer; diff --git a/samples/ocl/pyrlk_optical_flow.cpp b/samples/ocl/pyrlk_optical_flow.cpp index cefa928670..5a59803798 100644 --- a/samples/ocl/pyrlk_optical_flow.cpp +++ b/samples/ocl/pyrlk_optical_flow.cpp @@ -86,13 +86,6 @@ static void drawArrows(Mat& frame, const vector& prevPts, const vector< int main(int argc, const char* argv[]) { - static std::vector ocl_info; - ocl::getDevice(ocl_info); - //if you want to use undefault device, set it here - setDevice(ocl_info[0]); - - //set this to save kernel compile time from second time you run - ocl::setBinpath("./"); const char* keys = "{ h | help | false | print help message }" "{ l | left | | specify left image }" @@ -109,7 +102,7 @@ int main(int argc, const char* argv[]) if (cmd.get("help")) { cout << "Usage: pyrlk_optical_flow [options]" << endl; - cout << "Avaible options:" << endl; + cout << "Available options:" << endl; cmd.printParams(); return 0; } diff --git a/samples/ocl/squares.cpp b/samples/ocl/squares.cpp index 48964ffb2e..9e709245da 100644 --- a/samples/ocl/squares.cpp +++ b/samples/ocl/squares.cpp @@ -284,13 +284,11 @@ int main(int argc, char** argv) string outfile = cmd.get("o"); if(inputName.empty()) { - cout << "Avaible options:" << endl; + cout << "Available options:" << endl; cmd.printParams(); return 0; } - vector info; - CV_Assert(ocl::getDevice(info)); int iterations = 10; namedWindow( wndname, 1 ); vector > squares_cpu, squares_ocl; diff --git a/samples/ocl/stereo_match.cpp b/samples/ocl/stereo_match.cpp index 38dda8a94b..86d60d49be 100644 --- a/samples/ocl/stereo_match.cpp +++ b/samples/ocl/stereo_match.cpp @@ -77,28 +77,18 @@ int main(int argc, char** argv) "{ r | right | | specify right image }" "{ m | method | BM | specify match method(BM/BP/CSBP) }" "{ n | ndisp | 64 | specify number of disparity levels }" - "{ s | cpu_ocl | false | use cpu or gpu as ocl device to process the image }" "{ o | output | stereo_match_output.jpg | specify output path when input is images}"; CommandLineParser cmd(argc, argv, keys); if (cmd.get("help")) { - cout << "Avaible options:" << endl; + cout << "Available options:" << endl; cmd.printParams(); return 0; } try { App app(cmd); - int flag = CVCL_DEVICE_TYPE_GPU; - if(cmd.get("s") == true) - flag = CVCL_DEVICE_TYPE_CPU; - - vector info; - if(getDevice(info, flag) == 0) - { - throw runtime_error("Error: Did not find a valid OpenCL device!"); - } - cout << "Device name:" << info[0].DeviceName[0] << endl; + cout << "Device name:" << cv::ocl::Context::getContext()->getDeviceInfo().deviceName << endl; app.run(); } diff --git a/samples/ocl/surf_matcher.cpp b/samples/ocl/surf_matcher.cpp index bee517fbca..4d73323059 100644 --- a/samples/ocl/surf_matcher.cpp +++ b/samples/ocl/surf_matcher.cpp @@ -145,19 +145,11 @@ int main(int argc, char* argv[]) CommandLineParser cmd(argc, argv, keys); if (cmd.get("help")) { - std::cout << "Avaible options:" << std::endl; + std::cout << "Available options:" << std::endl; cmd.printParams(); return 0; } - vector info; - if(cv::ocl::getDevice(info) == 0) - { - std::cout << "Error: Did not find a valid OpenCL device!" << std::endl; - return -1; - } - ocl::setDevice(info[0]); - Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey; oclMat img1, img2; bool useCPU = cmd.get("c"); @@ -190,7 +182,7 @@ int main(int argc, char* argv[]) { std::cout << "Device name:" - << info[0].DeviceName[0] + << cv::ocl::Context::getContext()->getDeviceInfo().deviceName << std::endl; } double surf_time = 0.; diff --git a/samples/ocl/tvl1_optical_flow.cpp b/samples/ocl/tvl1_optical_flow.cpp index 315970b7a3..296dc69338 100644 --- a/samples/ocl/tvl1_optical_flow.cpp +++ b/samples/ocl/tvl1_optical_flow.cpp @@ -80,13 +80,6 @@ static void getFlowField(const Mat& u, const Mat& v, Mat& flowField) int main(int argc, const char* argv[]) { - static std::vector ocl_info; - ocl::getDevice(ocl_info); - //if you want to use undefault device, set it here - setDevice(ocl_info[0]); - - //set this to save kernel compile time from second time you run - ocl::setBinpath("./"); const char* keys = "{ h | help | false | print help message }" "{ l | left | | specify left image }" @@ -101,7 +94,7 @@ int main(int argc, const char* argv[]) if (cmd.get("help")) { cout << "Usage: pyrlk_optical_flow [options]" << endl; - cout << "Avaible options:" << endl; + cout << "Available options:" << endl; cmd.printParams(); return 0; } From d74ca7b39d6a8f60eeb4a71d34d7c5906a07de3e Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 3 Oct 2013 20:00:41 +0400 Subject: [PATCH 8/8] ocl: update module documentation --- .../doc/structures_and_utility_functions.rst | 58 +++++++------------ 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/modules/ocl/doc/structures_and_utility_functions.rst b/modules/ocl/doc/structures_and_utility_functions.rst index c3c93ecbfe..aec3f70bf4 100644 --- a/modules/ocl/doc/structures_and_utility_functions.rst +++ b/modules/ocl/doc/structures_and_utility_functions.rst @@ -3,56 +3,40 @@ Data Structures and Utility Functions .. highlight:: cpp -ocl::Info -------------- -.. ocv:class:: ocl::Info +ocl::getOpenCLPlatforms +----------------------- +Returns the list of OpenCL platforms -this class should be maintained by the user and be passed to getDevice +.. ocv:function:: int ocl::getOpenCLPlatforms( PlatformsInfo& platforms ) -ocl::getDevice ------------------- + :param platforms: Output variable + +ocl::getOpenCLDevices +--------------------- Returns the list of devices -.. ocv:function:: int ocl::getDevice( std::vector & oclinfo, int devicetype=CVCL_DEVICE_TYPE_GPU ) +.. ocv:function:: int ocl::getOpenCLDevices( DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU, const PlatformInfo* platform = NULL ) - :param oclinfo: Output vector of ``ocl::Info`` structures + :param devices: Output variable - :param devicetype: One of ``CVCL_DEVICE_TYPE_GPU``, ``CVCL_DEVICE_TYPE_CPU`` or ``CVCL_DEVICE_TYPE_DEFAULT``. + :param deviceType: Bitmask of ``CVCL_DEVICE_TYPE_GPU``, ``CVCL_DEVICE_TYPE_CPU`` or ``CVCL_DEVICE_TYPE_DEFAULT``. -the function must be called before any other ``cv::ocl`` functions; it initializes ocl runtime. + :param platform: Specifies preferrable platform ocl::setDevice +-------------- +Returns void + +.. ocv:function:: void ocl::setDevice( const DeviceInfo* info ) + + :param info: device info + +ocl::setBinaryPath ------------------ Returns void -.. ocv:function:: void ocl::setDevice( Info &oclinfo, int devnum = 0 ) - - :param oclinfo: Output vector of ``ocl::Info`` structures - - :param devnum: the selected OpenCL device under this platform. - -ocl::setBinpath ------------------- -Returns void - -.. ocv:function:: void ocl::setBinpath(const char *path) +.. ocv:function:: void ocl::setBinaryPath(const char *path) :param path: the path of OpenCL kernel binaries If you call this function and set a valid path, the OCL module will save the compiled kernel to the address in the first time and reload the binary since that. It can save compilation time at the runtime. - -ocl::getoclContext ----------------------- -Returns the pointer to the opencl context - -.. ocv:function:: void* ocl::getoclContext() - -Thefunction are used to get opencl context so that opencv can interactive with other opencl program. - -ocl::getoclCommandQueue --------------------------- -Returns the pointer to the opencl command queue - -.. ocv:function:: void* ocl::getoclCommandQueue() - -Thefunction are used to get opencl command queue so that opencv can interactive with other opencl program.