2013-10-22 18:04:49 +08:00
|
|
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
|
|
//
|
|
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
|
|
// If you do not agree to this license, do not download, install,
|
|
|
|
// copy or use the software.
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// License Agreement
|
|
|
|
// For Open Source Computer Vision Library
|
|
|
|
//
|
|
|
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
|
|
|
// Third party copyrights are property of their respective owners.
|
|
|
|
//
|
|
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
|
|
// are permitted provided that the following conditions are met:
|
|
|
|
//
|
|
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer.
|
|
|
|
//
|
|
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
|
|
// and/or other materials provided with the distribution.
|
|
|
|
//
|
|
|
|
// * The name of the copyright holders may not be used to endorse or promote products
|
|
|
|
// derived from this software without specific prior written permission.
|
|
|
|
//
|
|
|
|
// This software is provided by the copyright holders and contributors "as is" and
|
|
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
|
|
// In no event shall the OpenCV Foundation or contributors be liable for any direct,
|
|
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
|
|
//
|
|
|
|
//M*/
|
|
|
|
|
|
|
|
#include "precomp.hpp"
|
|
|
|
#include <map>
|
|
|
|
|
2013-12-15 03:16:53 +08:00
|
|
|
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
|
|
|
|
|
2013-11-21 17:05:32 +08:00
|
|
|
#ifdef HAVE_OPENCL
|
|
|
|
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
|
|
|
|
#else
|
|
|
|
// TODO FIXIT: This file can't be build without OPENCL
|
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
/*
|
|
|
|
Part of the file is an extract from the standard OpenCL headers from Khronos site.
|
|
|
|
Below is the original copyright.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*******************************************************************************
|
|
|
|
* Copyright (c) 2008 - 2012 The Khronos Group Inc.
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and/or associated documentation files (the
|
|
|
|
* "Materials"), to deal in the Materials without restriction, including
|
|
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
* distribute, sublicense, and/or sell copies of the Materials, and to
|
|
|
|
* permit persons to whom the Materials are furnished to do so, subject to
|
|
|
|
* the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included
|
|
|
|
* in all copies or substantial portions of the Materials.
|
|
|
|
*
|
|
|
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
|
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
|
|
|
******************************************************************************/
|
|
|
|
|
|
|
|
#if 0 //defined __APPLE__
|
|
|
|
#define HAVE_OPENCL 1
|
|
|
|
#else
|
|
|
|
#undef HAVE_OPENCL
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define OPENCV_CL_NOT_IMPLEMENTED -1000
|
|
|
|
|
|
|
|
#ifdef HAVE_OPENCL
|
|
|
|
|
|
|
|
#if defined __APPLE__
|
|
|
|
#include <OpenCL/opencl.h>
|
|
|
|
#else
|
|
|
|
#include <CL/opencl.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static const bool g_haveOpenCL = true;
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
extern "C" {
|
|
|
|
|
|
|
|
struct _cl_platform_id { int dummy; };
|
|
|
|
struct _cl_device_id { int dummy; };
|
|
|
|
struct _cl_context { int dummy; };
|
|
|
|
struct _cl_command_queue { int dummy; };
|
|
|
|
struct _cl_mem { int dummy; };
|
|
|
|
struct _cl_program { int dummy; };
|
|
|
|
struct _cl_kernel { int dummy; };
|
|
|
|
struct _cl_event { int dummy; };
|
|
|
|
struct _cl_sampler { int dummy; };
|
|
|
|
|
|
|
|
typedef struct _cl_platform_id * cl_platform_id;
|
|
|
|
typedef struct _cl_device_id * cl_device_id;
|
|
|
|
typedef struct _cl_context * cl_context;
|
|
|
|
typedef struct _cl_command_queue * cl_command_queue;
|
|
|
|
typedef struct _cl_mem * cl_mem;
|
|
|
|
typedef struct _cl_program * cl_program;
|
|
|
|
typedef struct _cl_kernel * cl_kernel;
|
|
|
|
typedef struct _cl_event * cl_event;
|
|
|
|
typedef struct _cl_sampler * cl_sampler;
|
|
|
|
|
|
|
|
typedef int cl_int;
|
|
|
|
typedef unsigned cl_uint;
|
2013-11-20 09:51:31 +08:00
|
|
|
#if defined (_WIN32) && defined(_MSC_VER)
|
|
|
|
typedef __int64 cl_long;
|
|
|
|
typedef unsigned __int64 cl_ulong;
|
|
|
|
#else
|
|
|
|
typedef long cl_long;
|
|
|
|
typedef unsigned long cl_ulong;
|
|
|
|
#endif
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
|
|
|
|
typedef cl_ulong cl_bitfield;
|
|
|
|
typedef cl_bitfield cl_device_type;
|
|
|
|
typedef cl_uint cl_platform_info;
|
|
|
|
typedef cl_uint cl_device_info;
|
|
|
|
typedef cl_bitfield cl_device_fp_config;
|
|
|
|
typedef cl_uint cl_device_mem_cache_type;
|
|
|
|
typedef cl_uint cl_device_local_mem_type;
|
|
|
|
typedef cl_bitfield cl_device_exec_capabilities;
|
|
|
|
typedef cl_bitfield cl_command_queue_properties;
|
|
|
|
typedef intptr_t cl_device_partition_property;
|
|
|
|
typedef cl_bitfield cl_device_affinity_domain;
|
|
|
|
|
|
|
|
typedef intptr_t cl_context_properties;
|
|
|
|
typedef cl_uint cl_context_info;
|
|
|
|
typedef cl_uint cl_command_queue_info;
|
|
|
|
typedef cl_uint cl_channel_order;
|
|
|
|
typedef cl_uint cl_channel_type;
|
|
|
|
typedef cl_bitfield cl_mem_flags;
|
|
|
|
typedef cl_uint cl_mem_object_type;
|
|
|
|
typedef cl_uint cl_mem_info;
|
|
|
|
typedef cl_bitfield cl_mem_migration_flags;
|
|
|
|
typedef cl_uint cl_image_info;
|
|
|
|
typedef cl_uint cl_buffer_create_type;
|
|
|
|
typedef cl_uint cl_addressing_mode;
|
|
|
|
typedef cl_uint cl_filter_mode;
|
|
|
|
typedef cl_uint cl_sampler_info;
|
|
|
|
typedef cl_bitfield cl_map_flags;
|
|
|
|
typedef cl_uint cl_program_info;
|
|
|
|
typedef cl_uint cl_program_build_info;
|
|
|
|
typedef cl_uint cl_program_binary_type;
|
|
|
|
typedef cl_int cl_build_status;
|
|
|
|
typedef cl_uint cl_kernel_info;
|
|
|
|
typedef cl_uint cl_kernel_arg_info;
|
|
|
|
typedef cl_uint cl_kernel_arg_address_qualifier;
|
|
|
|
typedef cl_uint cl_kernel_arg_access_qualifier;
|
|
|
|
typedef cl_bitfield cl_kernel_arg_type_qualifier;
|
|
|
|
typedef cl_uint cl_kernel_work_group_info;
|
|
|
|
typedef cl_uint cl_event_info;
|
|
|
|
typedef cl_uint cl_command_type;
|
|
|
|
typedef cl_uint cl_profiling_info;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct _cl_image_format {
|
|
|
|
cl_channel_order image_channel_order;
|
|
|
|
cl_channel_type image_channel_data_type;
|
|
|
|
} cl_image_format;
|
|
|
|
|
|
|
|
typedef struct _cl_image_desc {
|
|
|
|
cl_mem_object_type image_type;
|
|
|
|
size_t image_width;
|
|
|
|
size_t image_height;
|
|
|
|
size_t image_depth;
|
|
|
|
size_t image_array_size;
|
|
|
|
size_t image_row_pitch;
|
|
|
|
size_t image_slice_pitch;
|
|
|
|
cl_uint num_mip_levels;
|
|
|
|
cl_uint num_samples;
|
|
|
|
cl_mem buffer;
|
|
|
|
} cl_image_desc;
|
|
|
|
|
|
|
|
typedef struct _cl_buffer_region {
|
|
|
|
size_t origin;
|
|
|
|
size_t size;
|
|
|
|
} cl_buffer_region;
|
|
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
#define CL_SUCCESS 0
|
|
|
|
#define CL_DEVICE_NOT_FOUND -1
|
|
|
|
#define CL_DEVICE_NOT_AVAILABLE -2
|
|
|
|
#define CL_COMPILER_NOT_AVAILABLE -3
|
|
|
|
#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
|
|
|
|
#define CL_OUT_OF_RESOURCES -5
|
|
|
|
#define CL_OUT_OF_HOST_MEMORY -6
|
|
|
|
#define CL_PROFILING_INFO_NOT_AVAILABLE -7
|
|
|
|
#define CL_MEM_COPY_OVERLAP -8
|
|
|
|
#define CL_IMAGE_FORMAT_MISMATCH -9
|
|
|
|
#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
|
|
|
|
#define CL_BUILD_PROGRAM_FAILURE -11
|
|
|
|
#define CL_MAP_FAILURE -12
|
|
|
|
#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
|
|
|
|
#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
|
|
|
|
#define CL_COMPILE_PROGRAM_FAILURE -15
|
|
|
|
#define CL_LINKER_NOT_AVAILABLE -16
|
|
|
|
#define CL_LINK_PROGRAM_FAILURE -17
|
|
|
|
#define CL_DEVICE_PARTITION_FAILED -18
|
|
|
|
#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19
|
|
|
|
|
|
|
|
#define CL_INVALID_VALUE -30
|
|
|
|
#define CL_INVALID_DEVICE_TYPE -31
|
|
|
|
#define CL_INVALID_PLATFORM -32
|
|
|
|
#define CL_INVALID_DEVICE -33
|
|
|
|
#define CL_INVALID_CONTEXT -34
|
|
|
|
#define CL_INVALID_QUEUE_PROPERTIES -35
|
|
|
|
#define CL_INVALID_COMMAND_QUEUE -36
|
|
|
|
#define CL_INVALID_HOST_PTR -37
|
|
|
|
#define CL_INVALID_MEM_OBJECT -38
|
|
|
|
#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39
|
|
|
|
#define CL_INVALID_IMAGE_SIZE -40
|
|
|
|
#define CL_INVALID_SAMPLER -41
|
|
|
|
#define CL_INVALID_BINARY -42
|
|
|
|
#define CL_INVALID_BUILD_OPTIONS -43
|
|
|
|
#define CL_INVALID_PROGRAM -44
|
|
|
|
#define CL_INVALID_PROGRAM_EXECUTABLE -45
|
|
|
|
#define CL_INVALID_KERNEL_NAME -46
|
|
|
|
#define CL_INVALID_KERNEL_DEFINITION -47
|
|
|
|
#define CL_INVALID_KERNEL -48
|
|
|
|
#define CL_INVALID_ARG_INDEX -49
|
|
|
|
#define CL_INVALID_ARG_VALUE -50
|
|
|
|
#define CL_INVALID_ARG_SIZE -51
|
|
|
|
#define CL_INVALID_KERNEL_ARGS -52
|
|
|
|
#define CL_INVALID_WORK_DIMENSION -53
|
|
|
|
#define CL_INVALID_WORK_GROUP_SIZE -54
|
|
|
|
#define CL_INVALID_WORK_ITEM_SIZE -55
|
|
|
|
#define CL_INVALID_GLOBAL_OFFSET -56
|
|
|
|
#define CL_INVALID_EVENT_WAIT_LIST -57
|
|
|
|
#define CL_INVALID_EVENT -58
|
|
|
|
#define CL_INVALID_OPERATION -59
|
|
|
|
#define CL_INVALID_GL_OBJECT -60
|
|
|
|
#define CL_INVALID_BUFFER_SIZE -61
|
|
|
|
#define CL_INVALID_MIP_LEVEL -62
|
|
|
|
#define CL_INVALID_GLOBAL_WORK_SIZE -63
|
|
|
|
#define CL_INVALID_PROPERTY -64
|
|
|
|
#define CL_INVALID_IMAGE_DESCRIPTOR -65
|
|
|
|
#define CL_INVALID_COMPILER_OPTIONS -66
|
|
|
|
#define CL_INVALID_LINKER_OPTIONS -67
|
|
|
|
#define CL_INVALID_DEVICE_PARTITION_COUNT -68
|
|
|
|
|
|
|
|
/*#define CL_VERSION_1_0 1
|
|
|
|
#define CL_VERSION_1_1 1
|
|
|
|
#define CL_VERSION_1_2 1*/
|
|
|
|
|
|
|
|
#define CL_FALSE 0
|
|
|
|
#define CL_TRUE 1
|
|
|
|
#define CL_BLOCKING CL_TRUE
|
|
|
|
#define CL_NON_BLOCKING CL_FALSE
|
|
|
|
|
|
|
|
#define CL_PLATFORM_PROFILE 0x0900
|
|
|
|
#define CL_PLATFORM_VERSION 0x0901
|
|
|
|
#define CL_PLATFORM_NAME 0x0902
|
|
|
|
#define CL_PLATFORM_VENDOR 0x0903
|
|
|
|
#define CL_PLATFORM_EXTENSIONS 0x0904
|
|
|
|
|
|
|
|
#define CL_DEVICE_TYPE_DEFAULT (1 << 0)
|
|
|
|
#define CL_DEVICE_TYPE_CPU (1 << 1)
|
|
|
|
#define CL_DEVICE_TYPE_GPU (1 << 2)
|
|
|
|
#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
|
|
|
|
#define CL_DEVICE_TYPE_CUSTOM (1 << 4)
|
|
|
|
#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
|
|
|
|
#define CL_DEVICE_TYPE 0x1000
|
|
|
|
#define CL_DEVICE_VENDOR_ID 0x1001
|
|
|
|
#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
|
|
|
|
#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
|
|
|
|
#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
|
|
|
|
#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
|
|
|
|
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
|
|
|
|
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
|
|
|
|
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
|
|
|
|
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
|
|
|
|
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
|
|
|
|
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B
|
|
|
|
#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
|
|
|
|
#define CL_DEVICE_ADDRESS_BITS 0x100D
|
|
|
|
#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E
|
|
|
|
#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F
|
|
|
|
#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010
|
|
|
|
#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011
|
|
|
|
#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012
|
|
|
|
#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013
|
|
|
|
#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014
|
|
|
|
#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015
|
|
|
|
#define CL_DEVICE_IMAGE_SUPPORT 0x1016
|
|
|
|
#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017
|
|
|
|
#define CL_DEVICE_MAX_SAMPLERS 0x1018
|
|
|
|
#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019
|
|
|
|
#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A
|
|
|
|
#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B
|
|
|
|
#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C
|
|
|
|
#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D
|
|
|
|
#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E
|
|
|
|
#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
|
|
|
|
#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020
|
|
|
|
#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021
|
|
|
|
#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022
|
|
|
|
#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023
|
|
|
|
#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024
|
|
|
|
#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025
|
|
|
|
#define CL_DEVICE_ENDIAN_LITTLE 0x1026
|
|
|
|
#define CL_DEVICE_AVAILABLE 0x1027
|
|
|
|
#define CL_DEVICE_COMPILER_AVAILABLE 0x1028
|
|
|
|
#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029
|
|
|
|
#define CL_DEVICE_QUEUE_PROPERTIES 0x102A
|
|
|
|
#define CL_DEVICE_NAME 0x102B
|
|
|
|
#define CL_DEVICE_VENDOR 0x102C
|
|
|
|
#define CL_DRIVER_VERSION 0x102D
|
|
|
|
#define CL_DEVICE_PROFILE 0x102E
|
|
|
|
#define CL_DEVICE_VERSION 0x102F
|
|
|
|
#define CL_DEVICE_EXTENSIONS 0x1030
|
|
|
|
#define CL_DEVICE_PLATFORM 0x1031
|
|
|
|
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
|
|
|
|
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
|
|
|
|
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
|
|
|
|
#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
|
|
|
|
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
|
|
|
|
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
|
|
|
|
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
|
|
|
|
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
|
|
|
|
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
|
|
|
|
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
|
|
|
|
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
|
|
|
|
#define CL_DEVICE_OPENCL_C_VERSION 0x103D
|
|
|
|
#define CL_DEVICE_LINKER_AVAILABLE 0x103E
|
|
|
|
#define CL_DEVICE_BUILT_IN_KERNELS 0x103F
|
|
|
|
#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040
|
|
|
|
#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041
|
|
|
|
#define CL_DEVICE_PARENT_DEVICE 0x1042
|
|
|
|
#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043
|
|
|
|
#define CL_DEVICE_PARTITION_PROPERTIES 0x1044
|
|
|
|
#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045
|
|
|
|
#define CL_DEVICE_PARTITION_TYPE 0x1046
|
|
|
|
#define CL_DEVICE_REFERENCE_COUNT 0x1047
|
|
|
|
#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048
|
|
|
|
#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049
|
|
|
|
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
|
|
|
|
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
|
|
|
|
|
|
|
|
#define CL_FP_DENORM (1 << 0)
|
|
|
|
#define CL_FP_INF_NAN (1 << 1)
|
|
|
|
#define CL_FP_ROUND_TO_NEAREST (1 << 2)
|
|
|
|
#define CL_FP_ROUND_TO_ZERO (1 << 3)
|
|
|
|
#define CL_FP_ROUND_TO_INF (1 << 4)
|
|
|
|
#define CL_FP_FMA (1 << 5)
|
|
|
|
#define CL_FP_SOFT_FLOAT (1 << 6)
|
|
|
|
#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7)
|
|
|
|
|
|
|
|
#define CL_NONE 0x0
|
|
|
|
#define CL_READ_ONLY_CACHE 0x1
|
|
|
|
#define CL_READ_WRITE_CACHE 0x2
|
|
|
|
#define CL_LOCAL 0x1
|
|
|
|
#define CL_GLOBAL 0x2
|
|
|
|
#define CL_EXEC_KERNEL (1 << 0)
|
|
|
|
#define CL_EXEC_NATIVE_KERNEL (1 << 1)
|
|
|
|
#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
|
|
|
|
#define CL_QUEUE_PROFILING_ENABLE (1 << 1)
|
|
|
|
|
|
|
|
#define CL_CONTEXT_REFERENCE_COUNT 0x1080
|
|
|
|
#define CL_CONTEXT_DEVICES 0x1081
|
|
|
|
#define CL_CONTEXT_PROPERTIES 0x1082
|
|
|
|
#define CL_CONTEXT_NUM_DEVICES 0x1083
|
|
|
|
#define CL_CONTEXT_PLATFORM 0x1084
|
|
|
|
#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085
|
|
|
|
|
|
|
|
#define CL_DEVICE_PARTITION_EQUALLY 0x1086
|
|
|
|
#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087
|
|
|
|
#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0
|
|
|
|
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088
|
|
|
|
#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0)
|
|
|
|
#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1)
|
|
|
|
#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2)
|
|
|
|
#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3)
|
|
|
|
#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4)
|
|
|
|
#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5)
|
|
|
|
#define CL_QUEUE_CONTEXT 0x1090
|
|
|
|
#define CL_QUEUE_DEVICE 0x1091
|
|
|
|
#define CL_QUEUE_REFERENCE_COUNT 0x1092
|
|
|
|
#define CL_QUEUE_PROPERTIES 0x1093
|
|
|
|
#define CL_MEM_READ_WRITE (1 << 0)
|
|
|
|
#define CL_MEM_WRITE_ONLY (1 << 1)
|
|
|
|
#define CL_MEM_READ_ONLY (1 << 2)
|
|
|
|
#define CL_MEM_USE_HOST_PTR (1 << 3)
|
|
|
|
#define CL_MEM_ALLOC_HOST_PTR (1 << 4)
|
|
|
|
#define CL_MEM_COPY_HOST_PTR (1 << 5)
|
2013-10-22 21:41:28 +08:00
|
|
|
// reserved (1 << 6)
|
2013-10-22 18:04:49 +08:00
|
|
|
#define CL_MEM_HOST_WRITE_ONLY (1 << 7)
|
|
|
|
#define CL_MEM_HOST_READ_ONLY (1 << 8)
|
|
|
|
#define CL_MEM_HOST_NO_ACCESS (1 << 9)
|
|
|
|
#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0)
|
|
|
|
#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1)
|
|
|
|
|
|
|
|
#define CL_R 0x10B0
|
|
|
|
#define CL_A 0x10B1
|
|
|
|
#define CL_RG 0x10B2
|
|
|
|
#define CL_RA 0x10B3
|
|
|
|
#define CL_RGB 0x10B4
|
|
|
|
#define CL_RGBA 0x10B5
|
|
|
|
#define CL_BGRA 0x10B6
|
|
|
|
#define CL_ARGB 0x10B7
|
|
|
|
#define CL_INTENSITY 0x10B8
|
|
|
|
#define CL_LUMINANCE 0x10B9
|
|
|
|
#define CL_Rx 0x10BA
|
|
|
|
#define CL_RGx 0x10BB
|
|
|
|
#define CL_RGBx 0x10BC
|
|
|
|
#define CL_DEPTH 0x10BD
|
|
|
|
#define CL_DEPTH_STENCIL 0x10BE
|
|
|
|
|
|
|
|
#define CL_SNORM_INT8 0x10D0
|
|
|
|
#define CL_SNORM_INT16 0x10D1
|
|
|
|
#define CL_UNORM_INT8 0x10D2
|
|
|
|
#define CL_UNORM_INT16 0x10D3
|
|
|
|
#define CL_UNORM_SHORT_565 0x10D4
|
|
|
|
#define CL_UNORM_SHORT_555 0x10D5
|
|
|
|
#define CL_UNORM_INT_101010 0x10D6
|
|
|
|
#define CL_SIGNED_INT8 0x10D7
|
|
|
|
#define CL_SIGNED_INT16 0x10D8
|
|
|
|
#define CL_SIGNED_INT32 0x10D9
|
|
|
|
#define CL_UNSIGNED_INT8 0x10DA
|
|
|
|
#define CL_UNSIGNED_INT16 0x10DB
|
|
|
|
#define CL_UNSIGNED_INT32 0x10DC
|
|
|
|
#define CL_HALF_FLOAT 0x10DD
|
|
|
|
#define CL_FLOAT 0x10DE
|
|
|
|
#define CL_UNORM_INT24 0x10DF
|
|
|
|
|
|
|
|
#define CL_MEM_OBJECT_BUFFER 0x10F0
|
|
|
|
#define CL_MEM_OBJECT_IMAGE2D 0x10F1
|
|
|
|
#define CL_MEM_OBJECT_IMAGE3D 0x10F2
|
|
|
|
#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
|
|
|
|
#define CL_MEM_OBJECT_IMAGE1D 0x10F4
|
|
|
|
#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
|
|
|
|
#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
|
|
|
|
|
|
|
|
#define CL_MEM_TYPE 0x1100
|
|
|
|
#define CL_MEM_FLAGS 0x1101
|
|
|
|
#define CL_MEM_SIZE 0x1102
|
|
|
|
#define CL_MEM_HOST_PTR 0x1103
|
|
|
|
#define CL_MEM_MAP_COUNT 0x1104
|
|
|
|
#define CL_MEM_REFERENCE_COUNT 0x1105
|
|
|
|
#define CL_MEM_CONTEXT 0x1106
|
|
|
|
#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
|
|
|
|
#define CL_MEM_OFFSET 0x1108
|
|
|
|
|
|
|
|
#define CL_IMAGE_FORMAT 0x1110
|
|
|
|
#define CL_IMAGE_ELEMENT_SIZE 0x1111
|
|
|
|
#define CL_IMAGE_ROW_PITCH 0x1112
|
|
|
|
#define CL_IMAGE_SLICE_PITCH 0x1113
|
|
|
|
#define CL_IMAGE_WIDTH 0x1114
|
|
|
|
#define CL_IMAGE_HEIGHT 0x1115
|
|
|
|
#define CL_IMAGE_DEPTH 0x1116
|
|
|
|
#define CL_IMAGE_ARRAY_SIZE 0x1117
|
|
|
|
#define CL_IMAGE_BUFFER 0x1118
|
|
|
|
#define CL_IMAGE_NUM_MIP_LEVELS 0x1119
|
|
|
|
#define CL_IMAGE_NUM_SAMPLES 0x111A
|
|
|
|
|
|
|
|
#define CL_ADDRESS_NONE 0x1130
|
|
|
|
#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
|
|
|
|
#define CL_ADDRESS_CLAMP 0x1132
|
|
|
|
#define CL_ADDRESS_REPEAT 0x1133
|
|
|
|
#define CL_ADDRESS_MIRRORED_REPEAT 0x1134
|
|
|
|
|
|
|
|
#define CL_FILTER_NEAREST 0x1140
|
|
|
|
#define CL_FILTER_LINEAR 0x1141
|
|
|
|
|
|
|
|
#define CL_SAMPLER_REFERENCE_COUNT 0x1150
|
|
|
|
#define CL_SAMPLER_CONTEXT 0x1151
|
|
|
|
#define CL_SAMPLER_NORMALIZED_COORDS 0x1152
|
|
|
|
#define CL_SAMPLER_ADDRESSING_MODE 0x1153
|
|
|
|
#define CL_SAMPLER_FILTER_MODE 0x1154
|
|
|
|
|
|
|
|
#define CL_MAP_READ (1 << 0)
|
|
|
|
#define CL_MAP_WRITE (1 << 1)
|
|
|
|
#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
|
|
|
|
|
|
|
|
#define CL_PROGRAM_REFERENCE_COUNT 0x1160
|
|
|
|
#define CL_PROGRAM_CONTEXT 0x1161
|
|
|
|
#define CL_PROGRAM_NUM_DEVICES 0x1162
|
|
|
|
#define CL_PROGRAM_DEVICES 0x1163
|
|
|
|
#define CL_PROGRAM_SOURCE 0x1164
|
|
|
|
#define CL_PROGRAM_BINARY_SIZES 0x1165
|
|
|
|
#define CL_PROGRAM_BINARIES 0x1166
|
|
|
|
#define CL_PROGRAM_NUM_KERNELS 0x1167
|
|
|
|
#define CL_PROGRAM_KERNEL_NAMES 0x1168
|
|
|
|
#define CL_PROGRAM_BUILD_STATUS 0x1181
|
|
|
|
#define CL_PROGRAM_BUILD_OPTIONS 0x1182
|
|
|
|
#define CL_PROGRAM_BUILD_LOG 0x1183
|
|
|
|
#define CL_PROGRAM_BINARY_TYPE 0x1184
|
|
|
|
#define CL_PROGRAM_BINARY_TYPE_NONE 0x0
|
|
|
|
#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1
|
|
|
|
#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2
|
|
|
|
#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4
|
|
|
|
|
|
|
|
#define CL_BUILD_SUCCESS 0
|
|
|
|
#define CL_BUILD_NONE -1
|
|
|
|
#define CL_BUILD_ERROR -2
|
|
|
|
#define CL_BUILD_IN_PROGRESS -3
|
|
|
|
|
|
|
|
#define CL_KERNEL_FUNCTION_NAME 0x1190
|
|
|
|
#define CL_KERNEL_NUM_ARGS 0x1191
|
|
|
|
#define CL_KERNEL_REFERENCE_COUNT 0x1192
|
|
|
|
#define CL_KERNEL_CONTEXT 0x1193
|
|
|
|
#define CL_KERNEL_PROGRAM 0x1194
|
|
|
|
#define CL_KERNEL_ATTRIBUTES 0x1195
|
|
|
|
#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196
|
|
|
|
#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197
|
|
|
|
#define CL_KERNEL_ARG_TYPE_NAME 0x1198
|
|
|
|
#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199
|
|
|
|
#define CL_KERNEL_ARG_NAME 0x119A
|
|
|
|
#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B
|
|
|
|
#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C
|
|
|
|
#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D
|
|
|
|
#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E
|
|
|
|
#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0
|
|
|
|
#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1
|
|
|
|
#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2
|
|
|
|
#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3
|
|
|
|
#define CL_KERNEL_ARG_TYPE_NONE 0
|
|
|
|
#define CL_KERNEL_ARG_TYPE_CONST (1 << 0)
|
|
|
|
#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1)
|
|
|
|
#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2)
|
|
|
|
#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
|
|
|
|
#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
|
|
|
|
#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
|
|
|
|
#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
|
|
|
|
#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
|
|
|
|
#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5
|
|
|
|
|
|
|
|
#define CL_EVENT_COMMAND_QUEUE 0x11D0
|
|
|
|
#define CL_EVENT_COMMAND_TYPE 0x11D1
|
|
|
|
#define CL_EVENT_REFERENCE_COUNT 0x11D2
|
|
|
|
#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
|
|
|
|
#define CL_EVENT_CONTEXT 0x11D4
|
|
|
|
|
|
|
|
#define CL_COMMAND_NDRANGE_KERNEL 0x11F0
|
|
|
|
#define CL_COMMAND_TASK 0x11F1
|
|
|
|
#define CL_COMMAND_NATIVE_KERNEL 0x11F2
|
|
|
|
#define CL_COMMAND_READ_BUFFER 0x11F3
|
|
|
|
#define CL_COMMAND_WRITE_BUFFER 0x11F4
|
|
|
|
#define CL_COMMAND_COPY_BUFFER 0x11F5
|
|
|
|
#define CL_COMMAND_READ_IMAGE 0x11F6
|
|
|
|
#define CL_COMMAND_WRITE_IMAGE 0x11F7
|
|
|
|
#define CL_COMMAND_COPY_IMAGE 0x11F8
|
|
|
|
#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
|
|
|
|
#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
|
|
|
|
#define CL_COMMAND_MAP_BUFFER 0x11FB
|
|
|
|
#define CL_COMMAND_MAP_IMAGE 0x11FC
|
|
|
|
#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
|
|
|
|
#define CL_COMMAND_MARKER 0x11FE
|
|
|
|
#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
|
|
|
|
#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200
|
|
|
|
#define CL_COMMAND_READ_BUFFER_RECT 0x1201
|
|
|
|
#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
|
|
|
|
#define CL_COMMAND_COPY_BUFFER_RECT 0x1203
|
|
|
|
#define CL_COMMAND_USER 0x1204
|
|
|
|
#define CL_COMMAND_BARRIER 0x1205
|
|
|
|
#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206
|
|
|
|
#define CL_COMMAND_FILL_BUFFER 0x1207
|
|
|
|
#define CL_COMMAND_FILL_IMAGE 0x1208
|
|
|
|
|
|
|
|
#define CL_COMPLETE 0x0
|
|
|
|
#define CL_RUNNING 0x1
|
|
|
|
#define CL_SUBMITTED 0x2
|
|
|
|
#define CL_QUEUED 0x3
|
|
|
|
#define CL_BUFFER_CREATE_TYPE_REGION 0x1220
|
|
|
|
|
|
|
|
#define CL_PROFILING_COMMAND_QUEUED 0x1280
|
|
|
|
#define CL_PROFILING_COMMAND_SUBMIT 0x1281
|
|
|
|
#define CL_PROFILING_COMMAND_START 0x1282
|
|
|
|
#define CL_PROFILING_COMMAND_END 0x1283
|
|
|
|
|
|
|
|
#define CL_CALLBACK CV_STDCALL
|
|
|
|
|
|
|
|
static volatile bool g_haveOpenCL = false;
|
|
|
|
static const char* oclFuncToCheck = "clEnqueueReadBufferRect";
|
|
|
|
|
|
|
|
#if defined(__APPLE__)
|
|
|
|
#include <dlfcn.h>
|
|
|
|
|
|
|
|
static void* initOpenCLAndLoad(const char* funcname)
|
|
|
|
{
|
|
|
|
static bool initialized = false;
|
|
|
|
static void* handle = 0;
|
|
|
|
if (!handle)
|
|
|
|
{
|
|
|
|
if(!initialized)
|
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
const char* oclpath = getenv("OPENCV_OPENCL_RUNTIME");
|
|
|
|
oclpath = oclpath && strlen(oclpath) > 0 ? oclpath :
|
|
|
|
"/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL";
|
|
|
|
handle = dlopen(oclpath, RTLD_LAZY);
|
2013-10-22 18:04:49 +08:00
|
|
|
initialized = true;
|
|
|
|
g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
|
2013-11-19 00:48:00 +08:00
|
|
|
if( g_haveOpenCL )
|
|
|
|
fprintf(stderr, "Succesffuly loaded OpenCL v1.1+ runtime from %s\n", oclpath);
|
|
|
|
else
|
|
|
|
fprintf(stderr, "Failed to load OpenCL runtime\n");
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
if(!handle)
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-12-01 07:12:19 +08:00
|
|
|
return funcname && handle ? dlsym(handle, funcname) : 0;
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#elif defined WIN32 || defined _WIN32
|
|
|
|
|
|
|
|
#ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
|
|
|
|
#define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
|
|
|
|
#endif
|
|
|
|
#include <windows.h>
|
|
|
|
#if (_WIN32_WINNT >= 0x0602)
|
|
|
|
#include <synchapi.h>
|
|
|
|
#endif
|
|
|
|
#undef small
|
|
|
|
#undef min
|
|
|
|
#undef max
|
|
|
|
#undef abs
|
|
|
|
|
|
|
|
static void* initOpenCLAndLoad(const char* funcname)
|
|
|
|
{
|
|
|
|
static bool initialized = false;
|
|
|
|
static HMODULE handle = 0;
|
|
|
|
if (!handle)
|
|
|
|
{
|
|
|
|
if(!initialized)
|
|
|
|
{
|
|
|
|
handle = LoadLibraryA("OpenCL.dll");
|
|
|
|
initialized = true;
|
2013-10-22 21:41:28 +08:00
|
|
|
g_haveOpenCL = handle != 0 && GetProcAddress(handle, oclFuncToCheck) != 0;
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
if(!handle)
|
|
|
|
return 0;
|
|
|
|
}
|
2013-10-22 21:41:28 +08:00
|
|
|
|
|
|
|
return funcname ? (void*)GetProcAddress(handle, funcname) : 0;
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#elif defined(__linux)
|
|
|
|
|
|
|
|
#include <dlfcn.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
static void* initOpenCLAndLoad(const char* funcname)
|
|
|
|
{
|
|
|
|
static bool initialized = false;
|
|
|
|
static void* handle = 0;
|
|
|
|
if (!handle)
|
|
|
|
{
|
|
|
|
if(!initialized)
|
|
|
|
{
|
2013-10-22 21:41:28 +08:00
|
|
|
handle = dlopen("libOpenCL.so", RTLD_LAZY);
|
2013-10-22 18:04:49 +08:00
|
|
|
if(!handle)
|
2013-10-22 21:41:28 +08:00
|
|
|
handle = dlopen("libCL.so", RTLD_LAZY);
|
2013-10-22 18:04:49 +08:00
|
|
|
initialized = true;
|
|
|
|
g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
|
|
|
|
}
|
|
|
|
if(!handle)
|
|
|
|
return 0;
|
|
|
|
}
|
2013-10-22 21:41:28 +08:00
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
return funcname ? (void*)dlsym(handle, funcname) : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
static void* initOpenCLAndLoad(const char*)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#define OCL_FUNC(rettype, funcname, argsdecl, args) \
|
|
|
|
typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
|
|
|
|
static rettype funcname argsdecl \
|
|
|
|
{ \
|
|
|
|
static funcname##_t funcname##_p = 0; \
|
|
|
|
if( !funcname##_p ) \
|
|
|
|
{ \
|
|
|
|
funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
|
|
|
|
if( !funcname##_p ) \
|
|
|
|
return OPENCV_CL_NOT_IMPLEMENTED; \
|
|
|
|
} \
|
|
|
|
return funcname##_p args; \
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define OCL_FUNC_P(rettype, funcname, argsdecl, args) \
|
|
|
|
typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
|
|
|
|
static rettype funcname argsdecl \
|
|
|
|
{ \
|
|
|
|
static funcname##_t funcname##_p = 0; \
|
|
|
|
if( !funcname##_p ) \
|
|
|
|
{ \
|
|
|
|
funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
|
|
|
|
if( !funcname##_p ) \
|
|
|
|
{ \
|
|
|
|
if( errcode_ret ) \
|
|
|
|
*errcode_ret = OPENCV_CL_NOT_IMPLEMENTED; \
|
|
|
|
return 0; \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
return funcname##_p args; \
|
|
|
|
}
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clGetPlatformIDs,
|
|
|
|
(cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms),
|
|
|
|
(num_entries, platforms, num_platforms))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clGetPlatformInfo,
|
|
|
|
(cl_platform_id platform, cl_platform_info param_name,
|
|
|
|
size_t param_value_size, void * param_value,
|
|
|
|
size_t * param_value_size_ret),
|
|
|
|
(platform, param_name, param_value_size, param_value, param_value_size_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clGetDeviceInfo,
|
|
|
|
(cl_device_id device,
|
|
|
|
cl_device_info param_name,
|
|
|
|
size_t param_value_size,
|
|
|
|
void * param_value,
|
|
|
|
size_t * param_value_size_ret),
|
|
|
|
(device, param_name, param_value_size, param_value, param_value_size_ret))
|
|
|
|
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clGetDeviceIDs,
|
|
|
|
(cl_platform_id platform,
|
2013-10-22 21:41:28 +08:00
|
|
|
cl_device_type device_type,
|
|
|
|
cl_uint num_entries,
|
|
|
|
cl_device_id * devices,
|
2013-10-22 18:04:49 +08:00
|
|
|
cl_uint * num_devices),
|
|
|
|
(platform, device_type, num_entries, devices, num_devices))
|
|
|
|
|
|
|
|
OCL_FUNC_P(cl_context, clCreateContext,
|
|
|
|
(const cl_context_properties * properties,
|
|
|
|
cl_uint num_devices,
|
|
|
|
const cl_device_id * devices,
|
|
|
|
void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
|
|
|
|
void * user_data,
|
|
|
|
cl_int * errcode_ret),
|
|
|
|
(properties, num_devices, devices, pfn_notify, user_data, errcode_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clReleaseContext, (cl_context context), (context))
|
|
|
|
|
|
|
|
/*
|
|
|
|
OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context))
|
|
|
|
|
|
|
|
OCL_FUNC_P(cl_context, clCreateContextFromType,
|
|
|
|
(const cl_context_properties * properties,
|
|
|
|
cl_device_type device_type,
|
|
|
|
void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
|
|
|
|
void * user_data,
|
|
|
|
cl_int * errcode_ret),
|
|
|
|
(properties, device_type, pfn_notify, user_data, errcode_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clGetContextInfo,
|
2013-10-22 21:41:28 +08:00
|
|
|
(cl_context context,
|
|
|
|
cl_context_info param_name,
|
|
|
|
size_t param_value_size,
|
|
|
|
void * param_value,
|
2013-10-22 18:04:49 +08:00
|
|
|
size_t * param_value_size_ret),
|
2013-10-22 21:41:28 +08:00
|
|
|
(context, param_name, param_value_size,
|
2013-10-22 18:04:49 +08:00
|
|
|
param_value, param_value_size_ret))
|
|
|
|
*/
|
|
|
|
OCL_FUNC_P(cl_command_queue, clCreateCommandQueue,
|
2013-10-22 21:41:28 +08:00
|
|
|
(cl_context context,
|
|
|
|
cl_device_id device,
|
2013-10-22 18:04:49 +08:00
|
|
|
cl_command_queue_properties properties,
|
|
|
|
cl_int * errcode_ret),
|
|
|
|
(context, device, properties, errcode_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clReleaseCommandQueue, (cl_command_queue command_queue), (command_queue))
|
|
|
|
|
|
|
|
OCL_FUNC_P(cl_mem, clCreateBuffer,
|
|
|
|
(cl_context context,
|
|
|
|
cl_mem_flags flags,
|
|
|
|
size_t size,
|
|
|
|
void * host_ptr,
|
|
|
|
cl_int * errcode_ret),
|
|
|
|
(context, flags, size, host_ptr, errcode_ret))
|
|
|
|
|
|
|
|
/*
|
|
|
|
OCL_FUNC(cl_int, clRetainCommandQueue, (cl_command_queue command_queue), (command_queue))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clGetCommandQueueInfo,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_command_queue_info param_name,
|
|
|
|
size_t param_value_size,
|
|
|
|
void * param_value,
|
|
|
|
size_t * param_value_size_ret),
|
|
|
|
(command_queue, param_name, param_value_size, param_value, param_value_size_ret))
|
|
|
|
|
|
|
|
OCL_FUNC_P(cl_mem, clCreateSubBuffer,
|
|
|
|
(cl_mem buffer,
|
|
|
|
cl_mem_flags flags,
|
|
|
|
cl_buffer_create_type buffer_create_type,
|
|
|
|
const void * buffer_create_info,
|
|
|
|
cl_int * errcode_ret),
|
|
|
|
(buffer, flags, buffer_create_type, buffer_create_info, errcode_ret))
|
|
|
|
|
|
|
|
OCL_FUNC_P(cl_mem, clCreateImage,
|
|
|
|
(cl_context context,
|
|
|
|
cl_mem_flags flags,
|
|
|
|
const cl_image_format * image_format,
|
2013-10-22 21:41:28 +08:00
|
|
|
const cl_image_desc * image_desc,
|
2013-10-22 18:04:49 +08:00
|
|
|
void * host_ptr,
|
|
|
|
cl_int * errcode_ret),
|
|
|
|
(context, flags, image_format, image_desc, host_ptr, errcode_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clGetSupportedImageFormats,
|
|
|
|
(cl_context context,
|
|
|
|
cl_mem_flags flags,
|
|
|
|
cl_mem_object_type image_type,
|
|
|
|
cl_uint num_entries,
|
|
|
|
cl_image_format * image_formats,
|
|
|
|
cl_uint * num_image_formats),
|
|
|
|
(context, flags, image_type, num_entries, image_formats, num_image_formats))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clGetMemObjectInfo,
|
|
|
|
(cl_mem memobj,
|
|
|
|
cl_mem_info param_name,
|
|
|
|
size_t param_value_size,
|
|
|
|
void * param_value,
|
|
|
|
size_t * param_value_size_ret),
|
|
|
|
(memobj, param_name, param_value_size, param_value, param_value_size_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clGetImageInfo,
|
|
|
|
(cl_mem image,
|
|
|
|
cl_image_info param_name,
|
|
|
|
size_t param_value_size,
|
|
|
|
void * param_value,
|
|
|
|
size_t * param_value_size_ret),
|
|
|
|
(image, param_name, param_value_size, param_value, param_value_size_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clCreateKernelsInProgram,
|
|
|
|
(cl_program program,
|
|
|
|
cl_uint num_kernels,
|
|
|
|
cl_kernel * kernels,
|
|
|
|
cl_uint * num_kernels_ret),
|
|
|
|
(program, num_kernels, kernels, num_kernels_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clRetainKernel, (cl_kernel kernel), (kernel))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clGetKernelArgInfo,
|
|
|
|
(cl_kernel kernel,
|
|
|
|
cl_uint arg_indx,
|
|
|
|
cl_kernel_arg_info param_name,
|
|
|
|
size_t param_value_size,
|
|
|
|
void * param_value,
|
|
|
|
size_t * param_value_size_ret),
|
|
|
|
(kernel, arg_indx, param_name, param_value_size, param_value, param_value_size_ret))
|
2013-10-22 21:41:28 +08:00
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
OCL_FUNC(cl_int, clEnqueueReadImage,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_mem image,
|
|
|
|
cl_bool blocking_read,
|
|
|
|
const size_t * origin[3],
|
|
|
|
const size_t * region[3],
|
|
|
|
size_t row_pitch,
|
|
|
|
size_t slice_pitch,
|
|
|
|
void * ptr,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, image, blocking_read, origin, region,
|
|
|
|
row_pitch, slice_pitch,
|
|
|
|
ptr,
|
|
|
|
num_events_in_wait_list,
|
|
|
|
event_wait_list,
|
|
|
|
event))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueWriteImage,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_mem image,
|
|
|
|
cl_bool blocking_write,
|
|
|
|
const size_t * origin[3],
|
|
|
|
const size_t * region[3],
|
|
|
|
size_t input_row_pitch,
|
|
|
|
size_t input_slice_pitch,
|
|
|
|
const void * ptr,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, image, blocking_write, origin, region, input_row_pitch,
|
|
|
|
input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueFillImage,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_mem image,
|
|
|
|
const void * fill_color,
|
|
|
|
const size_t * origin[3],
|
|
|
|
const size_t * region[3],
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, image, fill_color, origin, region,
|
|
|
|
num_events_in_wait_list, event_wait_list, event))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueCopyImage,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_mem src_image,
|
|
|
|
cl_mem dst_image,
|
|
|
|
const size_t * src_origin[3],
|
|
|
|
const size_t * dst_origin[3],
|
|
|
|
const size_t * region[3],
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, src_image, dst_image, src_origin, dst_origin,
|
|
|
|
region, num_events_in_wait_list, event_wait_list, event))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_mem src_image,
|
|
|
|
cl_mem dst_buffer,
|
|
|
|
const size_t * src_origin[3],
|
|
|
|
const size_t * region[3],
|
|
|
|
size_t dst_offset,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, src_image, dst_buffer, src_origin, region, dst_offset,
|
|
|
|
num_events_in_wait_list, event_wait_list, event))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueCopyBufferToImage,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_mem src_buffer,
|
|
|
|
cl_mem dst_image,
|
|
|
|
size_t src_offset,
|
|
|
|
const size_t * dst_origin[3],
|
|
|
|
const size_t * region[3],
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, src_buffer, dst_image, src_offset, dst_origin,
|
|
|
|
region, num_events_in_wait_list, event_wait_list, event))
|
|
|
|
|
|
|
|
|
|
|
|
OCL_FUNC_P(void*, clEnqueueMapImage,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_mem image,
|
|
|
|
cl_bool blocking_map,
|
|
|
|
cl_map_flags map_flags,
|
|
|
|
const size_t * origin[3],
|
|
|
|
const size_t * region[3],
|
|
|
|
size_t * image_row_pitch,
|
|
|
|
size_t * image_slice_pitch,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event,
|
|
|
|
cl_int * errcode_ret),
|
|
|
|
(command_queue, image, blocking_map, map_flags, origin, region,
|
|
|
|
image_row_pitch, image_slice_pitch, num_events_in_wait_list,
|
|
|
|
event_wait_list, event, errcode_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clRetainProgram, (cl_program program), (program))
|
2013-10-22 21:41:28 +08:00
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
OCL_FUNC(cl_int, clGetKernelInfo,
|
|
|
|
(cl_kernel kernel,
|
|
|
|
cl_kernel_info param_name,
|
|
|
|
size_t param_value_size,
|
|
|
|
void * param_value,
|
|
|
|
size_t * param_value_size_ret),
|
|
|
|
(kernel, param_name, param_value_size, param_value, param_value_size_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj))
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clReleaseMemObject, (cl_mem memobj), (memobj))
|
|
|
|
|
|
|
|
|
|
|
|
OCL_FUNC_P(cl_program, clCreateProgramWithSource,
|
|
|
|
(cl_context context,
|
|
|
|
cl_uint count,
|
|
|
|
const char ** strings,
|
|
|
|
const size_t * lengths,
|
|
|
|
cl_int * errcode_ret),
|
|
|
|
(context, count, strings, lengths, errcode_ret))
|
|
|
|
|
|
|
|
OCL_FUNC_P(cl_program, clCreateProgramWithBinary,
|
|
|
|
(cl_context context,
|
|
|
|
cl_uint num_devices,
|
|
|
|
const cl_device_id * device_list,
|
|
|
|
const size_t * lengths,
|
|
|
|
const unsigned char ** binaries,
|
|
|
|
cl_int * binary_status,
|
|
|
|
cl_int * errcode_ret),
|
|
|
|
(context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clReleaseProgram, (cl_program program), (program))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clBuildProgram,
|
|
|
|
(cl_program program,
|
|
|
|
cl_uint num_devices,
|
|
|
|
const cl_device_id * device_list,
|
2013-10-22 21:41:28 +08:00
|
|
|
const char * options,
|
2013-10-22 18:04:49 +08:00
|
|
|
void (CL_CALLBACK * pfn_notify)(cl_program, void *),
|
|
|
|
void * user_data),
|
|
|
|
(program, num_devices, device_list, options, pfn_notify, user_data))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clGetProgramInfo,
|
|
|
|
(cl_program program,
|
|
|
|
cl_program_info param_name,
|
|
|
|
size_t param_value_size,
|
|
|
|
void * param_value,
|
|
|
|
size_t * param_value_size_ret),
|
|
|
|
(program, param_name, param_value_size, param_value, param_value_size_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clGetProgramBuildInfo,
|
|
|
|
(cl_program program,
|
|
|
|
cl_device_id device,
|
|
|
|
cl_program_build_info param_name,
|
|
|
|
size_t param_value_size,
|
|
|
|
void * param_value,
|
|
|
|
size_t * param_value_size_ret),
|
|
|
|
(program, device, param_name, param_value_size, param_value, param_value_size_ret))
|
2013-10-22 21:41:28 +08:00
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
OCL_FUNC_P(cl_kernel, clCreateKernel,
|
|
|
|
(cl_program program,
|
|
|
|
const char * kernel_name,
|
|
|
|
cl_int * errcode_ret),
|
|
|
|
(program, kernel_name, errcode_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clReleaseKernel, (cl_kernel kernel), (kernel))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clSetKernelArg,
|
|
|
|
(cl_kernel kernel,
|
|
|
|
cl_uint arg_index,
|
|
|
|
size_t arg_size,
|
|
|
|
const void * arg_value),
|
|
|
|
(kernel, arg_index, arg_size, arg_value))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clGetKernelWorkGroupInfo,
|
|
|
|
(cl_kernel kernel,
|
|
|
|
cl_device_id device,
|
|
|
|
cl_kernel_work_group_info param_name,
|
|
|
|
size_t param_value_size,
|
|
|
|
void * param_value,
|
|
|
|
size_t * param_value_size_ret),
|
|
|
|
(kernel, device, param_name, param_value_size, param_value, param_value_size_ret))
|
2013-10-22 21:41:28 +08:00
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
OCL_FUNC(cl_int, clFinish, (cl_command_queue command_queue), (command_queue))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueReadBuffer,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_mem buffer,
|
|
|
|
cl_bool blocking_read,
|
|
|
|
size_t offset,
|
2013-10-22 21:41:28 +08:00
|
|
|
size_t size,
|
2013-10-22 18:04:49 +08:00
|
|
|
void * ptr,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, buffer, blocking_read, offset, size, ptr,
|
|
|
|
num_events_in_wait_list, event_wait_list, event))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueReadBufferRect,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_mem buffer,
|
|
|
|
cl_bool blocking_read,
|
|
|
|
const size_t * buffer_offset,
|
2013-10-22 21:41:28 +08:00
|
|
|
const size_t * host_offset,
|
2013-10-22 18:04:49 +08:00
|
|
|
const size_t * region,
|
|
|
|
size_t buffer_row_pitch,
|
|
|
|
size_t buffer_slice_pitch,
|
|
|
|
size_t host_row_pitch,
|
2013-10-22 21:41:28 +08:00
|
|
|
size_t host_slice_pitch,
|
2013-10-22 18:04:49 +08:00
|
|
|
void * ptr,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, buffer, blocking_read, buffer_offset, host_offset, region, buffer_row_pitch,
|
|
|
|
buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, num_events_in_wait_list,
|
|
|
|
event_wait_list, event))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueWriteBuffer,
|
2013-10-22 21:41:28 +08:00
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_mem buffer,
|
|
|
|
cl_bool blocking_write,
|
|
|
|
size_t offset,
|
|
|
|
size_t size,
|
|
|
|
const void * ptr,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
2013-10-22 18:04:49 +08:00
|
|
|
cl_event * event),
|
|
|
|
(command_queue, buffer, blocking_write, offset, size, ptr,
|
|
|
|
num_events_in_wait_list, event_wait_list, event))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueWriteBufferRect,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_mem buffer,
|
|
|
|
cl_bool blocking_write,
|
|
|
|
const size_t * buffer_offset,
|
2013-10-22 21:41:28 +08:00
|
|
|
const size_t * host_offset,
|
2013-10-22 18:04:49 +08:00
|
|
|
const size_t * region,
|
|
|
|
size_t buffer_row_pitch,
|
|
|
|
size_t buffer_slice_pitch,
|
|
|
|
size_t host_row_pitch,
|
2013-10-22 21:41:28 +08:00
|
|
|
size_t host_slice_pitch,
|
2013-10-22 18:04:49 +08:00
|
|
|
const void * ptr,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, buffer, blocking_write, buffer_offset, host_offset,
|
|
|
|
region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch,
|
|
|
|
host_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
|
|
|
|
|
2013-10-22 21:41:28 +08:00
|
|
|
/*OCL_FUNC(cl_int, clEnqueueFillBuffer,
|
2013-10-22 18:04:49 +08:00
|
|
|
(cl_command_queue command_queue,
|
2013-10-22 21:41:28 +08:00
|
|
|
cl_mem buffer,
|
|
|
|
const void * pattern,
|
|
|
|
size_t pattern_size,
|
|
|
|
size_t offset,
|
|
|
|
size_t size,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
2013-10-22 18:04:49 +08:00
|
|
|
cl_event * event),
|
2013-10-22 21:41:28 +08:00
|
|
|
(command_queue, buffer, pattern, pattern_size, offset, size,
|
|
|
|
num_events_in_wait_list, event_wait_list, event))*/
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueCopyBuffer,
|
2013-10-22 21:41:28 +08:00
|
|
|
(cl_command_queue command_queue,
|
2013-10-22 18:04:49 +08:00
|
|
|
cl_mem src_buffer,
|
2013-10-22 21:41:28 +08:00
|
|
|
cl_mem dst_buffer,
|
2013-10-22 18:04:49 +08:00
|
|
|
size_t src_offset,
|
|
|
|
size_t dst_offset,
|
2013-10-22 21:41:28 +08:00
|
|
|
size_t size,
|
2013-10-22 18:04:49 +08:00
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, src_buffer, dst_buffer, src_offset, dst_offset,
|
|
|
|
size, num_events_in_wait_list, event_wait_list, event))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueCopyBufferRect,
|
2013-10-22 21:41:28 +08:00
|
|
|
(cl_command_queue command_queue,
|
2013-10-22 18:04:49 +08:00
|
|
|
cl_mem src_buffer,
|
2013-10-22 21:41:28 +08:00
|
|
|
cl_mem dst_buffer,
|
2013-10-22 18:04:49 +08:00
|
|
|
const size_t * src_origin,
|
|
|
|
const size_t * dst_origin,
|
2013-10-22 21:41:28 +08:00
|
|
|
const size_t * region,
|
2013-10-22 18:04:49 +08:00
|
|
|
size_t src_row_pitch,
|
|
|
|
size_t src_slice_pitch,
|
|
|
|
size_t dst_row_pitch,
|
|
|
|
size_t dst_slice_pitch,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, src_buffer, dst_buffer, src_origin, dst_origin,
|
|
|
|
region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch,
|
|
|
|
num_events_in_wait_list, event_wait_list, event))
|
|
|
|
|
|
|
|
OCL_FUNC_P(void*, clEnqueueMapBuffer,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_mem buffer,
|
2013-10-22 21:41:28 +08:00
|
|
|
cl_bool blocking_map,
|
2013-10-22 18:04:49 +08:00
|
|
|
cl_map_flags map_flags,
|
|
|
|
size_t offset,
|
|
|
|
size_t size,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event,
|
|
|
|
cl_int * errcode_ret),
|
|
|
|
(command_queue, buffer, blocking_map, map_flags, offset, size,
|
|
|
|
num_events_in_wait_list, event_wait_list, event, errcode_ret))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueUnmapMemObject,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_mem memobj,
|
|
|
|
void * mapped_ptr,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueNDRangeKernel,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_kernel kernel,
|
|
|
|
cl_uint work_dim,
|
|
|
|
const size_t * global_work_offset,
|
|
|
|
const size_t * global_work_size,
|
|
|
|
const size_t * local_work_size,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, kernel, work_dim, global_work_offset, global_work_size,
|
|
|
|
local_work_size, num_events_in_wait_list, event_wait_list, event))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clEnqueueTask,
|
|
|
|
(cl_command_queue command_queue,
|
|
|
|
cl_kernel kernel,
|
|
|
|
cl_uint num_events_in_wait_list,
|
|
|
|
const cl_event * event_wait_list,
|
|
|
|
cl_event * event),
|
|
|
|
(command_queue, kernel, num_events_in_wait_list, event_wait_list, event))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clSetEventCallback,
|
|
|
|
(cl_event event,
|
|
|
|
cl_int command_exec_callback_type ,
|
|
|
|
void (CL_CALLBACK *pfn_event_notify) (cl_event event, cl_int event_command_exec_status, void *user_data),
|
|
|
|
void *user_data),
|
|
|
|
(event, command_exec_callback_type, pfn_event_notify, user_data))
|
|
|
|
|
|
|
|
OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2013-11-21 17:05:32 +08:00
|
|
|
#ifndef CL_VERSION_1_2
|
|
|
|
#define CL_VERSION_1_2
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
namespace cv { namespace ocl {
|
|
|
|
|
2013-10-25 20:46:03 +08:00
|
|
|
struct UMat2D
|
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
UMat2D(const UMat& m)
|
2013-10-25 20:46:03 +08:00
|
|
|
{
|
2013-11-22 22:56:03 +08:00
|
|
|
offset = (int)m.offset;
|
|
|
|
step = (int)m.step;
|
2013-10-25 20:46:03 +08:00
|
|
|
rows = m.rows;
|
|
|
|
cols = m.cols;
|
|
|
|
}
|
2013-11-22 22:56:03 +08:00
|
|
|
int offset;
|
|
|
|
int step;
|
2013-10-25 20:46:03 +08:00
|
|
|
int rows;
|
|
|
|
int cols;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct UMat3D
|
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
UMat3D(const UMat& m)
|
2013-10-25 20:46:03 +08:00
|
|
|
{
|
2013-11-22 22:56:03 +08:00
|
|
|
offset = (int)m.offset;
|
|
|
|
step = (int)m.step.p[1];
|
|
|
|
slicestep = (int)m.step.p[0];
|
|
|
|
slices = (int)m.size.p[0];
|
2013-10-25 20:46:03 +08:00
|
|
|
rows = m.size.p[1];
|
|
|
|
cols = m.size.p[2];
|
|
|
|
}
|
2013-11-22 22:56:03 +08:00
|
|
|
int offset;
|
|
|
|
int slicestep;
|
|
|
|
int step;
|
2013-10-25 20:46:03 +08:00
|
|
|
int slices;
|
|
|
|
int rows;
|
|
|
|
int cols;
|
|
|
|
};
|
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
// Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
|
|
|
|
static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
|
|
|
|
{
|
|
|
|
static uint64 table[256];
|
|
|
|
static bool initialized = false;
|
|
|
|
|
|
|
|
if( !initialized )
|
|
|
|
{
|
|
|
|
for( int i = 0; i < 256; i++ )
|
|
|
|
{
|
|
|
|
uint64 c = i;
|
|
|
|
for( int j = 0; j < 8; j++ )
|
|
|
|
c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
|
|
|
|
table[i] = c;
|
|
|
|
}
|
|
|
|
initialized = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64 crc = ~crc0;
|
|
|
|
for( size_t idx = 0; idx < size; idx++ )
|
|
|
|
crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
|
|
|
|
|
|
|
|
return ~crc;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct HashKey
|
|
|
|
{
|
|
|
|
typedef uint64 part;
|
|
|
|
HashKey(part _a, part _b) : a(_a), b(_b) {}
|
|
|
|
part a, b;
|
|
|
|
};
|
|
|
|
|
|
|
|
inline bool operator == (const HashKey& h1, const HashKey& h2)
|
|
|
|
{
|
|
|
|
return h1.a == h2.a && h1.b == h2.b;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool operator < (const HashKey& h1, const HashKey& h2)
|
|
|
|
{
|
|
|
|
return h1.a < h2.a || (h1.a == h2.a && h1.b < h2.b);
|
|
|
|
}
|
|
|
|
|
2013-12-15 03:16:53 +08:00
|
|
|
static bool g_isOpenCLInitialized = false;
|
2013-11-21 17:05:32 +08:00
|
|
|
static bool g_isOpenCLAvailable = false;
|
2013-12-15 03:16:53 +08:00
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
bool haveOpenCL()
|
|
|
|
{
|
2013-12-15 03:16:53 +08:00
|
|
|
if (!g_isOpenCLInitialized)
|
2013-11-21 17:05:32 +08:00
|
|
|
{
|
2013-12-15 03:16:53 +08:00
|
|
|
try
|
2013-11-21 17:05:32 +08:00
|
|
|
{
|
2013-12-15 03:16:53 +08:00
|
|
|
cl_uint n = 0;
|
|
|
|
g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
|
2013-11-21 17:05:32 +08:00
|
|
|
}
|
2013-12-15 03:16:53 +08:00
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
g_isOpenCLAvailable = false;
|
2013-11-21 17:05:32 +08:00
|
|
|
}
|
2013-12-15 03:16:53 +08:00
|
|
|
g_isOpenCLInitialized = true;
|
2013-11-21 17:05:32 +08:00
|
|
|
}
|
|
|
|
return g_isOpenCLAvailable;
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool useOpenCL()
|
|
|
|
{
|
2013-12-16 19:02:42 +08:00
|
|
|
CoreTLSData* data = coreTlsData.get();
|
2013-10-22 18:04:49 +08:00
|
|
|
if( data->useOpenCL < 0 )
|
|
|
|
data->useOpenCL = (int)haveOpenCL();
|
|
|
|
return data->useOpenCL > 0;
|
|
|
|
}
|
|
|
|
|
2013-10-25 20:46:03 +08:00
|
|
|
void setUseOpenCL(bool flag)
|
|
|
|
{
|
|
|
|
if( haveOpenCL() )
|
|
|
|
{
|
2013-12-16 19:02:42 +08:00
|
|
|
CoreTLSData* data = coreTlsData.get();
|
2013-10-25 20:46:03 +08:00
|
|
|
data->useOpenCL = flag ? 1 : 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-15 03:16:53 +08:00
|
|
|
#ifdef HAVE_CLAMDBLAS
|
|
|
|
|
|
|
|
class AmdBlasHelper
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static AmdBlasHelper & getInstance()
|
|
|
|
{
|
|
|
|
static AmdBlasHelper amdBlas;
|
|
|
|
return amdBlas;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool isAvailable() const
|
|
|
|
{
|
|
|
|
return g_isAmdBlasAvailable;
|
|
|
|
}
|
|
|
|
|
|
|
|
~AmdBlasHelper()
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
clAmdBlasTeardown();
|
|
|
|
}
|
|
|
|
catch (...) { }
|
|
|
|
}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
AmdBlasHelper()
|
|
|
|
{
|
|
|
|
if (!g_isAmdBlasInitialized)
|
|
|
|
{
|
|
|
|
AutoLock lock(m);
|
|
|
|
|
|
|
|
if (!g_isAmdBlasInitialized && haveOpenCL())
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess;
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
g_isAmdBlasAvailable = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
g_isAmdBlasAvailable = false;
|
|
|
|
|
|
|
|
g_isAmdBlasInitialized = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
static Mutex m;
|
|
|
|
static bool g_isAmdBlasInitialized;
|
|
|
|
static bool g_isAmdBlasAvailable;
|
|
|
|
};
|
|
|
|
|
|
|
|
bool AmdBlasHelper::g_isAmdBlasAvailable = false;
|
|
|
|
bool AmdBlasHelper::g_isAmdBlasInitialized = false;
|
|
|
|
Mutex AmdBlasHelper::m;
|
|
|
|
|
|
|
|
bool haveAmdBlas()
|
|
|
|
{
|
|
|
|
return AmdBlasHelper::getInstance().isAvailable();
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
bool haveAmdBlas()
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
void finish2()
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
Queue::getDefault().finish();
|
|
|
|
}
|
|
|
|
|
|
|
|
#define IMPLEMENT_REFCOUNTABLE() \
|
|
|
|
void addref() { CV_XADD(&refcount, 1); } \
|
|
|
|
void release() { if( CV_XADD(&refcount, -1) == 1 ) delete this; } \
|
|
|
|
int refcount
|
|
|
|
|
|
|
|
struct Platform::Impl
|
|
|
|
{
|
|
|
|
Impl()
|
|
|
|
{
|
|
|
|
refcount = 1;
|
|
|
|
handle = 0;
|
|
|
|
initialized = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
~Impl() {}
|
|
|
|
|
|
|
|
void init()
|
|
|
|
{
|
|
|
|
if( !initialized )
|
|
|
|
{
|
|
|
|
//cl_uint num_entries
|
|
|
|
cl_uint n = 0;
|
|
|
|
if( clGetPlatformIDs(1, &handle, &n) < 0 || n == 0 )
|
|
|
|
handle = 0;
|
|
|
|
if( handle != 0 )
|
|
|
|
{
|
|
|
|
char buf[1000];
|
|
|
|
size_t len = 0;
|
|
|
|
clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len);
|
|
|
|
buf[len] = '\0';
|
|
|
|
vendor = String(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
initialized = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
IMPLEMENT_REFCOUNTABLE();
|
|
|
|
|
|
|
|
cl_platform_id handle;
|
|
|
|
String vendor;
|
|
|
|
bool initialized;
|
|
|
|
};
|
|
|
|
|
|
|
|
Platform::Platform()
|
|
|
|
{
|
|
|
|
p = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
Platform::~Platform()
|
|
|
|
{
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
}
|
|
|
|
|
|
|
|
Platform::Platform(const Platform& pl)
|
|
|
|
{
|
|
|
|
p = (Impl*)pl.p;
|
|
|
|
if(p)
|
|
|
|
p->addref();
|
|
|
|
}
|
|
|
|
|
|
|
|
Platform& Platform::operator = (const Platform& pl)
|
|
|
|
{
|
|
|
|
Impl* newp = (Impl*)pl.p;
|
|
|
|
if(newp)
|
|
|
|
newp->addref();
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
p = newp;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
void* Platform::ptr() const
|
|
|
|
{
|
|
|
|
return p ? p->handle : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
Platform& Platform::getDefault()
|
|
|
|
{
|
|
|
|
static Platform p;
|
|
|
|
if( !p.p )
|
|
|
|
{
|
|
|
|
p.p = new Impl;
|
|
|
|
p.p->init();
|
|
|
|
}
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
struct Device::Impl
|
|
|
|
{
|
|
|
|
Impl(void* d)
|
|
|
|
{
|
|
|
|
handle = (cl_device_id)d;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename _TpCL, typename _TpOut>
|
|
|
|
_TpOut getProp(cl_device_info prop) const
|
|
|
|
{
|
|
|
|
_TpCL temp=_TpCL();
|
|
|
|
size_t sz = 0;
|
|
|
|
|
|
|
|
return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) >= 0 &&
|
|
|
|
sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
|
|
|
|
}
|
|
|
|
|
2013-10-23 03:34:16 +08:00
|
|
|
bool getBoolProp(cl_device_info prop) const
|
|
|
|
{
|
|
|
|
cl_bool temp = CL_FALSE;
|
|
|
|
size_t sz = 0;
|
|
|
|
|
|
|
|
return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) >= 0 &&
|
|
|
|
sz == sizeof(temp) ? temp != 0 : false;
|
|
|
|
}
|
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
String getStrProp(cl_device_info prop) const
|
|
|
|
{
|
|
|
|
char buf[1024];
|
|
|
|
size_t sz=0;
|
|
|
|
return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) >= 0 &&
|
|
|
|
sz < sizeof(buf) ? String(buf) : String();
|
|
|
|
}
|
|
|
|
|
|
|
|
IMPLEMENT_REFCOUNTABLE();
|
|
|
|
cl_device_id handle;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
Device::Device()
|
|
|
|
{
|
|
|
|
p = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
Device::Device(void* d)
|
|
|
|
{
|
|
|
|
p = 0;
|
|
|
|
set(d);
|
|
|
|
}
|
|
|
|
|
|
|
|
Device::Device(const Device& d)
|
|
|
|
{
|
|
|
|
p = d.p;
|
|
|
|
if(p)
|
|
|
|
p->addref();
|
|
|
|
}
|
|
|
|
|
|
|
|
Device& Device::operator = (const Device& d)
|
|
|
|
{
|
|
|
|
Impl* newp = (Impl*)d.p;
|
|
|
|
if(newp)
|
|
|
|
newp->addref();
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
p = newp;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
Device::~Device()
|
|
|
|
{
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
}
|
|
|
|
|
|
|
|
void Device::set(void* d)
|
|
|
|
{
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
p = new Impl(d);
|
|
|
|
}
|
|
|
|
|
|
|
|
void* Device::ptr() const
|
|
|
|
{
|
|
|
|
return p ? p->handle : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
String Device::name() const
|
|
|
|
{ return p ? p->getStrProp(CL_DEVICE_NAME) : String(); }
|
|
|
|
|
|
|
|
String Device::extensions() const
|
|
|
|
{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
|
|
|
|
|
|
|
|
String Device::vendor() const
|
|
|
|
{ return p ? p->getStrProp(CL_DEVICE_VENDOR) : String(); }
|
|
|
|
|
|
|
|
String Device::OpenCL_C_Version() const
|
|
|
|
{ return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
|
|
|
|
|
|
|
|
String Device::OpenCLVersion() const
|
|
|
|
{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
|
|
|
|
|
|
|
|
String Device::driverVersion() const
|
2013-11-19 00:48:00 +08:00
|
|
|
{ return p ? p->getStrProp(CL_DRIVER_VERSION) : String(); }
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
int Device::type() const
|
|
|
|
{ return p ? p->getProp<cl_device_type, int>(CL_DEVICE_TYPE) : 0; }
|
|
|
|
|
|
|
|
int Device::addressBits() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS) : 0; }
|
|
|
|
|
|
|
|
bool Device::available() const
|
2013-10-23 03:34:16 +08:00
|
|
|
{ return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
bool Device::compilerAvailable() const
|
2013-10-23 03:34:16 +08:00
|
|
|
{ return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
bool Device::linkerAvailable() const
|
2013-11-21 17:05:32 +08:00
|
|
|
#ifdef CL_VERSION_1_2
|
2013-10-23 03:34:16 +08:00
|
|
|
{ return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
|
2013-11-21 17:05:32 +08:00
|
|
|
#else
|
|
|
|
{ CV_REQUIRE_OPENCL_1_2_ERROR; }
|
|
|
|
#endif
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
int Device::doubleFPConfig() const
|
|
|
|
{ return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG) : 0; }
|
|
|
|
|
|
|
|
int Device::singleFPConfig() const
|
|
|
|
{ return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
|
|
|
|
|
|
|
|
int Device::halfFPConfig() const
|
2013-11-21 17:05:32 +08:00
|
|
|
#ifdef CL_VERSION_1_2
|
2013-10-22 18:04:49 +08:00
|
|
|
{ return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
|
2013-11-21 17:05:32 +08:00
|
|
|
#else
|
|
|
|
{ CV_REQUIRE_OPENCL_1_2_ERROR; }
|
|
|
|
#endif
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
bool Device::endianLittle() const
|
2013-10-23 03:34:16 +08:00
|
|
|
{ return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
bool Device::errorCorrectionSupport() const
|
2013-10-23 03:34:16 +08:00
|
|
|
{ return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
int Device::executionCapabilities() const
|
|
|
|
{ return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
|
|
|
|
|
|
|
|
size_t Device::globalMemCacheSize() const
|
|
|
|
{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
|
|
|
|
|
|
|
|
int Device::globalMemCacheType() const
|
|
|
|
{ return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
|
|
|
|
|
|
|
|
int Device::globalMemCacheLineSize() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
|
|
|
|
|
|
|
|
size_t Device::globalMemSize() const
|
|
|
|
{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
|
|
|
|
|
|
|
|
size_t Device::localMemSize() const
|
|
|
|
{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
|
|
|
|
|
|
|
|
int Device::localMemType() const
|
|
|
|
{ return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
|
|
|
|
|
|
|
|
bool Device::hostUnifiedMemory() const
|
2013-10-23 03:34:16 +08:00
|
|
|
{ return p ? p->getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY) : false; }
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
bool Device::imageSupport() const
|
2013-10-23 03:34:16 +08:00
|
|
|
{ return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
size_t Device::image2DMaxWidth() const
|
|
|
|
{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
|
|
|
|
|
|
|
|
size_t Device::image2DMaxHeight() const
|
|
|
|
{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
|
|
|
|
|
|
|
|
size_t Device::image3DMaxWidth() const
|
|
|
|
{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
|
|
|
|
|
|
|
|
size_t Device::image3DMaxHeight() const
|
|
|
|
{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
|
|
|
|
|
|
|
|
size_t Device::image3DMaxDepth() const
|
|
|
|
{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
|
|
|
|
|
|
|
|
size_t Device::imageMaxBufferSize() const
|
2013-11-21 17:05:32 +08:00
|
|
|
#ifdef CL_VERSION_1_2
|
2013-10-22 18:04:49 +08:00
|
|
|
{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
|
2013-11-21 17:05:32 +08:00
|
|
|
#else
|
|
|
|
{ CV_REQUIRE_OPENCL_1_2_ERROR; }
|
|
|
|
#endif
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
size_t Device::imageMaxArraySize() const
|
2013-11-21 17:05:32 +08:00
|
|
|
#ifdef CL_VERSION_1_2
|
2013-10-22 18:04:49 +08:00
|
|
|
{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
|
2013-11-21 17:05:32 +08:00
|
|
|
#else
|
|
|
|
{ CV_REQUIRE_OPENCL_1_2_ERROR; }
|
|
|
|
#endif
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
int Device::maxClockFrequency() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
|
|
|
|
|
|
|
|
int Device::maxComputeUnits() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS) : 0; }
|
|
|
|
|
|
|
|
int Device::maxConstantArgs() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
|
|
|
|
|
|
|
|
size_t Device::maxConstantBufferSize() const
|
|
|
|
{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
|
|
|
|
|
|
|
|
size_t Device::maxMemAllocSize() const
|
|
|
|
{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
|
|
|
|
|
|
|
|
size_t Device::maxParameterSize() const
|
|
|
|
{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
|
|
|
|
|
|
|
|
int Device::maxReadImageArgs() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
|
|
|
|
|
|
|
|
int Device::maxWriteImageArgs() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
|
|
|
|
|
|
|
|
int Device::maxSamplers() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
|
|
|
|
|
|
|
|
size_t Device::maxWorkGroupSize() const
|
|
|
|
{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE) : 0; }
|
|
|
|
|
|
|
|
int Device::maxWorkItemDims() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
|
|
|
|
|
|
|
|
void Device::maxWorkItemSizes(size_t* sizes) const
|
|
|
|
{
|
|
|
|
if(p)
|
|
|
|
{
|
|
|
|
const int MAX_DIMS = 32;
|
|
|
|
size_t retsz = 0;
|
|
|
|
clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
|
|
|
MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int Device::memBaseAddrAlign() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
|
|
|
|
|
|
|
|
int Device::nativeVectorWidthChar() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
|
|
|
|
|
|
|
|
int Device::nativeVectorWidthShort() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
|
|
|
|
|
|
|
|
int Device::nativeVectorWidthInt() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
|
|
|
|
|
|
|
|
int Device::nativeVectorWidthLong() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
|
|
|
|
|
|
|
|
int Device::nativeVectorWidthFloat() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
|
|
|
|
|
|
|
|
int Device::nativeVectorWidthDouble() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
|
|
|
|
|
|
|
|
int Device::nativeVectorWidthHalf() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
|
|
|
|
|
|
|
|
int Device::preferredVectorWidthChar() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
|
|
|
|
|
|
|
|
int Device::preferredVectorWidthShort() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
|
|
|
|
|
|
|
|
int Device::preferredVectorWidthInt() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
|
|
|
|
|
|
|
|
int Device::preferredVectorWidthLong() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
|
|
|
|
|
|
|
|
int Device::preferredVectorWidthFloat() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
|
|
|
|
|
|
|
|
int Device::preferredVectorWidthDouble() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
|
|
|
|
|
|
|
|
int Device::preferredVectorWidthHalf() const
|
|
|
|
{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
|
|
|
|
|
|
|
|
size_t Device::printfBufferSize() const
|
2013-11-21 17:05:32 +08:00
|
|
|
#ifdef CL_VERSION_1_2
|
2013-10-22 18:04:49 +08:00
|
|
|
{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
|
2013-11-21 17:05:32 +08:00
|
|
|
#else
|
|
|
|
{ CV_REQUIRE_OPENCL_1_2_ERROR; }
|
|
|
|
#endif
|
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
size_t Device::profilingTimerResolution() const
|
|
|
|
{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
|
|
|
|
|
|
|
|
const Device& Device::getDefault()
|
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
const Context2& ctx = Context2::getDefault();
|
2013-12-16 19:02:42 +08:00
|
|
|
int idx = coreTlsData.get()->device;
|
2013-10-22 18:04:49 +08:00
|
|
|
return ctx.device(idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
struct Context2::Impl
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
2013-12-02 05:50:24 +08:00
|
|
|
Impl()
|
|
|
|
{
|
|
|
|
refcount = 1;
|
|
|
|
handle = 0;
|
|
|
|
}
|
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
Impl(int dtype0)
|
|
|
|
{
|
|
|
|
refcount = 1;
|
|
|
|
handle = 0;
|
|
|
|
|
|
|
|
cl_int retval = 0;
|
|
|
|
cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr();
|
|
|
|
cl_context_properties prop[] =
|
|
|
|
{
|
|
|
|
CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
|
|
|
|
0
|
|
|
|
};
|
|
|
|
|
|
|
|
cl_uint i, nd0 = 0, nd = 0;
|
|
|
|
int dtype = dtype0 & 15;
|
|
|
|
clGetDeviceIDs( pl, dtype, 0, 0, &nd0 );
|
|
|
|
if(retval < 0)
|
|
|
|
return;
|
|
|
|
AutoBuffer<void*> dlistbuf(nd0*2+1);
|
|
|
|
cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf;
|
|
|
|
cl_device_id* dlist_new = dlist + nd0;
|
|
|
|
clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 );
|
|
|
|
String name0;
|
|
|
|
|
|
|
|
for(i = 0; i < nd0; i++)
|
|
|
|
{
|
|
|
|
Device d(dlist[i]);
|
|
|
|
if( !d.available() || !d.compilerAvailable() )
|
|
|
|
continue;
|
|
|
|
if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() )
|
|
|
|
continue;
|
|
|
|
if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() )
|
|
|
|
continue;
|
|
|
|
String name = d.name();
|
|
|
|
if( nd != 0 && name != name0 )
|
|
|
|
continue;
|
|
|
|
name0 = name;
|
|
|
|
dlist_new[nd++] = dlist[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
if(nd == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// !!! in the current implementation force the number of devices to 1 !!!
|
|
|
|
nd = 1;
|
|
|
|
|
|
|
|
handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval);
|
|
|
|
bool ok = handle != 0 && retval >= 0;
|
|
|
|
if( ok )
|
|
|
|
{
|
|
|
|
devices.resize(nd);
|
|
|
|
for( i = 0; i < nd; i++ )
|
|
|
|
devices[i].set(dlist_new[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
~Impl()
|
|
|
|
{
|
|
|
|
if(handle)
|
|
|
|
clReleaseContext(handle);
|
|
|
|
devices.clear();
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
Program getProg(const ProgramSource2& src,
|
2013-10-22 18:04:49 +08:00
|
|
|
const String& buildflags, String& errmsg)
|
|
|
|
{
|
|
|
|
String prefix = Program::getPrefix(buildflags);
|
|
|
|
HashKey k(src.hash(), crc64((const uchar*)prefix.c_str(), prefix.size()));
|
|
|
|
phash_t::iterator it = phash.find(k);
|
|
|
|
if( it != phash.end() )
|
|
|
|
return it->second;
|
|
|
|
//String filename = format("%08x%08x_%08x%08x.clb2",
|
|
|
|
Program prog(src, buildflags, errmsg);
|
2013-11-19 00:48:00 +08:00
|
|
|
if(prog.ptr())
|
|
|
|
phash.insert(std::pair<HashKey,Program>(k, prog));
|
2013-10-22 18:04:49 +08:00
|
|
|
return prog;
|
|
|
|
}
|
|
|
|
|
|
|
|
IMPLEMENT_REFCOUNTABLE();
|
|
|
|
|
|
|
|
cl_context handle;
|
|
|
|
std::vector<Device> devices;
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
typedef ProgramSource2::hash_t hash_t;
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
struct HashKey
|
|
|
|
{
|
|
|
|
HashKey(hash_t _a, hash_t _b) : a(_a), b(_b) {}
|
|
|
|
bool operator < (const HashKey& k) const { return a < k.a || (a == k.a && b < k.b); }
|
|
|
|
bool operator == (const HashKey& k) const { return a == k.a && b == k.b; }
|
|
|
|
bool operator != (const HashKey& k) const { return a != k.a || b != k.b; }
|
|
|
|
hash_t a, b;
|
|
|
|
};
|
|
|
|
typedef std::map<HashKey, Program> phash_t;
|
|
|
|
phash_t phash;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
Context2::Context2()
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
p = 0;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
Context2::Context2(int dtype)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
p = 0;
|
|
|
|
create(dtype);
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
bool Context2::create(int dtype0)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
if( !haveOpenCL() )
|
|
|
|
return false;
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
p = new Impl(dtype0);
|
|
|
|
if(!p->handle)
|
|
|
|
{
|
|
|
|
delete p;
|
|
|
|
p = 0;
|
|
|
|
}
|
|
|
|
return p != 0;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
Context2::~Context2()
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
p->release();
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
Context2::Context2(const Context2& c)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
p = (Impl*)c.p;
|
|
|
|
if(p)
|
|
|
|
p->addref();
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
Context2& Context2::operator = (const Context2& c)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
Impl* newp = (Impl*)c.p;
|
|
|
|
if(newp)
|
|
|
|
newp->addref();
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
p = newp;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
void* Context2::ptr() const
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
return p->handle;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
size_t Context2::ndevices() const
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
return p ? p->devices.size() : 0;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
const Device& Context2::device(size_t idx) const
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
static Device dummy;
|
|
|
|
return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
|
|
|
|
}
|
|
|
|
|
2013-12-02 05:50:24 +08:00
|
|
|
Context2& Context2::getDefault(bool initialize)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
static Context2 ctx;
|
2013-12-02 05:50:24 +08:00
|
|
|
if(!ctx.p && haveOpenCL())
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
2013-12-02 05:50:24 +08:00
|
|
|
if (initialize)
|
|
|
|
{
|
|
|
|
// do not create new Context2 right away.
|
|
|
|
// First, try to retrieve existing context of the same type.
|
|
|
|
// In its turn, Platform::getContext() may call Context2::create()
|
|
|
|
// if there is no such context.
|
|
|
|
ctx.create(Device::TYPE_ACCELERATOR);
|
|
|
|
if(!ctx.p)
|
|
|
|
ctx.create(Device::TYPE_DGPU);
|
|
|
|
if(!ctx.p)
|
|
|
|
ctx.create(Device::TYPE_IGPU);
|
|
|
|
if(!ctx.p)
|
|
|
|
ctx.create(Device::TYPE_CPU);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ctx.p = new Impl();
|
|
|
|
}
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return ctx;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
Program Context2::getProg(const ProgramSource2& prog,
|
2013-10-22 18:04:49 +08:00
|
|
|
const String& buildopts, String& errmsg)
|
|
|
|
{
|
|
|
|
return p ? p->getProg(prog, buildopts, errmsg) : Program();
|
|
|
|
}
|
|
|
|
|
2013-12-02 05:50:24 +08:00
|
|
|
void initializeContextFromHandle(Context2& ctx, void* platform, void* _context, void* _device)
|
|
|
|
{
|
|
|
|
cl_context context = (cl_context)_context;
|
|
|
|
cl_device_id device = (cl_device_id)_device;
|
|
|
|
|
|
|
|
// cleanup old context
|
|
|
|
Context2::Impl* impl = ctx._getImpl();
|
|
|
|
if (impl->handle)
|
|
|
|
{
|
|
|
|
cl_int status = clReleaseContext(impl->handle);
|
|
|
|
(void)status;
|
|
|
|
}
|
|
|
|
impl->devices.clear();
|
|
|
|
|
|
|
|
impl->handle = context;
|
|
|
|
impl->devices.resize(1);
|
|
|
|
impl->devices[0].set(device);
|
|
|
|
|
|
|
|
Platform& p = Platform::getDefault();
|
|
|
|
Platform::Impl* pImpl = p._getImpl();
|
|
|
|
pImpl->handle = (cl_platform_id)platform;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
struct Queue::Impl
|
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
Impl(const Context2& c, const Device& d)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
refcount = 1;
|
2013-11-19 00:48:00 +08:00
|
|
|
const Context2* pc = &c;
|
2013-10-22 18:04:49 +08:00
|
|
|
cl_context ch = (cl_context)pc->ptr();
|
|
|
|
if( !ch )
|
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
pc = &Context2::getDefault();
|
2013-10-22 18:04:49 +08:00
|
|
|
ch = (cl_context)pc->ptr();
|
|
|
|
}
|
|
|
|
cl_device_id dh = (cl_device_id)d.ptr();
|
|
|
|
if( !dh )
|
|
|
|
dh = (cl_device_id)pc->device(0).ptr();
|
|
|
|
cl_int retval = 0;
|
|
|
|
handle = clCreateCommandQueue(ch, dh, 0, &retval);
|
|
|
|
}
|
|
|
|
|
|
|
|
~Impl()
|
|
|
|
{
|
2013-12-11 04:31:34 +08:00
|
|
|
#ifdef _WIN32
|
|
|
|
if (!cv::__termination)
|
|
|
|
#endif
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
2013-12-11 04:31:34 +08:00
|
|
|
if(handle)
|
|
|
|
{
|
|
|
|
clFinish(handle);
|
|
|
|
clReleaseCommandQueue(handle);
|
|
|
|
}
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
IMPLEMENT_REFCOUNTABLE();
|
|
|
|
|
|
|
|
cl_command_queue handle;
|
|
|
|
bool initialized;
|
|
|
|
};
|
|
|
|
|
|
|
|
Queue::Queue()
|
|
|
|
{
|
|
|
|
p = 0;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
Queue::Queue(const Context2& c, const Device& d)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
p = 0;
|
|
|
|
create(c, d);
|
|
|
|
}
|
|
|
|
|
|
|
|
Queue::Queue(const Queue& q)
|
|
|
|
{
|
|
|
|
p = q.p;
|
|
|
|
if(p)
|
|
|
|
p->addref();
|
|
|
|
}
|
|
|
|
|
|
|
|
Queue& Queue::operator = (const Queue& q)
|
|
|
|
{
|
|
|
|
Impl* newp = (Impl*)q.p;
|
|
|
|
if(newp)
|
|
|
|
newp->addref();
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
p = newp;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
Queue::~Queue()
|
|
|
|
{
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
bool Queue::create(const Context2& c, const Device& d)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
p = new Impl(c, d);
|
|
|
|
return p->handle != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Queue::finish()
|
|
|
|
{
|
|
|
|
if(p && p->handle)
|
|
|
|
clFinish(p->handle);
|
|
|
|
}
|
|
|
|
|
|
|
|
void* Queue::ptr() const
|
|
|
|
{
|
|
|
|
return p ? p->handle : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
Queue& Queue::getDefault()
|
|
|
|
{
|
2013-12-16 19:02:42 +08:00
|
|
|
Queue& q = coreTlsData.get()->oclQueue;
|
2013-12-01 07:12:19 +08:00
|
|
|
if( !q.p && haveOpenCL() )
|
2013-11-19 00:48:00 +08:00
|
|
|
q.create(Context2::getDefault());
|
2013-10-22 18:04:49 +08:00
|
|
|
return q;
|
|
|
|
}
|
|
|
|
|
|
|
|
static cl_command_queue getQueue(const Queue& q)
|
|
|
|
{
|
|
|
|
cl_command_queue qq = (cl_command_queue)q.ptr();
|
|
|
|
if(!qq)
|
|
|
|
qq = (cl_command_queue)Queue::getDefault().ptr();
|
|
|
|
return qq;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
KernelArg::KernelArg()
|
|
|
|
: flags(0), m(0), obj(0), sz(0), wscale(1)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, const void* _obj, size_t _sz)
|
|
|
|
: flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
KernelArg KernelArg::Constant(const Mat& m)
|
|
|
|
{
|
|
|
|
CV_Assert(m.isContinuous());
|
2013-11-19 00:48:00 +08:00
|
|
|
return KernelArg(CONSTANT, 0, 1, m.data, m.total()*m.elemSize());
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
struct Kernel::Impl
|
|
|
|
{
|
|
|
|
Impl(const char* kname, const Program& prog)
|
|
|
|
{
|
|
|
|
e = 0; refcount = 1;
|
|
|
|
cl_program ph = (cl_program)prog.ptr();
|
|
|
|
cl_int retval = 0;
|
|
|
|
handle = ph != 0 ?
|
|
|
|
clCreateKernel(ph, kname, &retval) : 0;
|
2013-10-25 20:46:03 +08:00
|
|
|
for( int i = 0; i < MAX_ARRS; i++ )
|
|
|
|
u[i] = 0;
|
2013-12-02 03:14:45 +08:00
|
|
|
haveTempDstUMats = false;
|
2013-10-25 20:46:03 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void cleanupUMats()
|
|
|
|
{
|
|
|
|
for( int i = 0; i < MAX_ARRS; i++ )
|
|
|
|
if( u[i] )
|
|
|
|
{
|
|
|
|
if( CV_XADD(&u[i]->urefcount, -1) == 1 )
|
|
|
|
u[i]->currAllocator->deallocate(u[i]);
|
|
|
|
u[i] = 0;
|
|
|
|
}
|
|
|
|
nu = 0;
|
2013-12-02 03:14:45 +08:00
|
|
|
haveTempDstUMats = false;
|
2013-10-25 20:46:03 +08:00
|
|
|
}
|
|
|
|
|
2013-12-02 03:14:45 +08:00
|
|
|
void addUMat(const UMat& m, bool dst)
|
2013-10-25 20:46:03 +08:00
|
|
|
{
|
|
|
|
CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
|
|
|
|
u[nu] = m.u;
|
|
|
|
CV_XADD(&m.u->urefcount, 1);
|
|
|
|
nu++;
|
2013-12-02 03:14:45 +08:00
|
|
|
if(dst && m.u->tempUMat())
|
|
|
|
haveTempDstUMats = true;
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
2013-10-25 20:46:03 +08:00
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
void finit()
|
|
|
|
{
|
2013-10-25 20:46:03 +08:00
|
|
|
cleanupUMats();
|
2013-10-22 18:04:49 +08:00
|
|
|
if(e) { clReleaseEvent(e); e = 0; }
|
|
|
|
release();
|
|
|
|
}
|
|
|
|
|
|
|
|
~Impl()
|
|
|
|
{
|
|
|
|
if(handle)
|
|
|
|
clReleaseKernel(handle);
|
|
|
|
}
|
|
|
|
|
|
|
|
IMPLEMENT_REFCOUNTABLE();
|
|
|
|
|
|
|
|
cl_kernel handle;
|
|
|
|
cl_event e;
|
2013-10-25 20:46:03 +08:00
|
|
|
enum { MAX_ARRS = 16 };
|
|
|
|
UMatData* u[MAX_ARRS];
|
|
|
|
int nu;
|
2013-12-02 03:14:45 +08:00
|
|
|
bool haveTempDstUMats;
|
2013-10-22 18:04:49 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
}}
|
|
|
|
|
|
|
|
extern "C"
|
|
|
|
{
|
|
|
|
static void CL_CALLBACK oclCleanupCallback(cl_event, cl_int, void *p)
|
|
|
|
{
|
|
|
|
((cv::ocl::Kernel::Impl*)p)->finit();
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace cv { namespace ocl {
|
|
|
|
|
|
|
|
Kernel::Kernel()
|
|
|
|
{
|
|
|
|
p = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
Kernel::Kernel(const char* kname, const Program& prog)
|
|
|
|
{
|
|
|
|
p = 0;
|
|
|
|
create(kname, prog);
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
Kernel::Kernel(const char* kname, const ProgramSource2& src,
|
|
|
|
const String& buildopts, String* errmsg)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
p = 0;
|
|
|
|
create(kname, src, buildopts, errmsg);
|
|
|
|
}
|
|
|
|
|
|
|
|
Kernel::Kernel(const Kernel& k)
|
|
|
|
{
|
|
|
|
p = k.p;
|
|
|
|
if(p)
|
|
|
|
p->addref();
|
|
|
|
}
|
|
|
|
|
|
|
|
Kernel& Kernel::operator = (const Kernel& k)
|
|
|
|
{
|
|
|
|
Impl* newp = (Impl*)k.p;
|
|
|
|
if(newp)
|
|
|
|
newp->addref();
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
p = newp;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
Kernel::~Kernel()
|
|
|
|
{
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Kernel::create(const char* kname, const Program& prog)
|
|
|
|
{
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
p = new Impl(kname, prog);
|
|
|
|
if(p->handle == 0)
|
|
|
|
{
|
|
|
|
p->release();
|
|
|
|
p = 0;
|
|
|
|
}
|
|
|
|
return p != 0;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
bool Kernel::create(const char* kname, const ProgramSource2& src,
|
|
|
|
const String& buildopts, String* errmsg)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
if(p)
|
|
|
|
{
|
|
|
|
p->release();
|
|
|
|
p = 0;
|
|
|
|
}
|
2013-11-19 00:48:00 +08:00
|
|
|
String tempmsg;
|
|
|
|
if( !errmsg ) errmsg = &tempmsg;
|
|
|
|
const Program& prog = Context2::getDefault().getProg(src, buildopts, *errmsg);
|
2013-10-22 18:04:49 +08:00
|
|
|
return create(kname, prog);
|
|
|
|
}
|
|
|
|
|
|
|
|
void* Kernel::ptr() const
|
|
|
|
{
|
|
|
|
return p ? p->handle : 0;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
bool Kernel::empty() const
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
return ptr() == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int Kernel::set(int i, const void* value, size_t sz)
|
|
|
|
{
|
|
|
|
CV_Assert(i >= 0);
|
2013-10-25 20:46:03 +08:00
|
|
|
if( i == 0 )
|
|
|
|
p->cleanupUMats();
|
2013-11-19 00:48:00 +08:00
|
|
|
if( !p || !p->handle || clSetKernelArg(p->handle, (cl_uint)i, sz, value) < 0 )
|
|
|
|
return -1;
|
|
|
|
return i+1;
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
int Kernel::set(int i, const UMat& m)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
int Kernel::set(int i, const KernelArg& arg)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
CV_Assert( i >= 0 );
|
|
|
|
if( !p || !p->handle )
|
|
|
|
return -1;
|
2013-11-27 22:30:07 +08:00
|
|
|
if( i == 0 )
|
|
|
|
p->cleanupUMats();
|
2013-10-22 18:04:49 +08:00
|
|
|
if( arg.m )
|
|
|
|
{
|
2013-10-25 20:46:03 +08:00
|
|
|
int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
|
|
|
|
((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
|
2013-11-28 03:30:29 +08:00
|
|
|
bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
|
2013-11-19 00:48:00 +08:00
|
|
|
cl_mem h = (cl_mem)arg.m->handle(accessFlags);
|
|
|
|
|
2013-11-28 03:30:29 +08:00
|
|
|
if (ptronly)
|
|
|
|
clSetKernelArg(p->handle, (cl_uint)i++, sizeof(h), &h);
|
|
|
|
else if( arg.m->dims <= 2 )
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
UMat2D u2d(*arg.m);
|
|
|
|
clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
|
|
|
|
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step);
|
|
|
|
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset);
|
|
|
|
i += 3;
|
|
|
|
|
|
|
|
if( !(arg.flags & KernelArg::NO_SIZE) )
|
|
|
|
{
|
|
|
|
int cols = u2d.cols*arg.wscale;
|
|
|
|
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows);
|
2013-11-27 22:30:07 +08:00
|
|
|
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols);
|
2013-11-19 00:48:00 +08:00
|
|
|
i += 2;
|
|
|
|
}
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
UMat3D u3d(*arg.m);
|
|
|
|
clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h);
|
|
|
|
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep);
|
|
|
|
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step);
|
|
|
|
clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset);
|
|
|
|
i += 4;
|
|
|
|
if( !(arg.flags & KernelArg::NO_SIZE) )
|
|
|
|
{
|
|
|
|
int cols = u3d.cols*arg.wscale;
|
|
|
|
clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.rows);
|
|
|
|
clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows);
|
|
|
|
clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols);
|
|
|
|
i += 3;
|
|
|
|
}
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
2013-12-02 03:14:45 +08:00
|
|
|
p->addUMat(*arg.m, (accessFlags & ACCESS_WRITE) != 0);
|
2013-11-19 00:48:00 +08:00
|
|
|
return i;
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
2013-11-19 00:48:00 +08:00
|
|
|
clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
|
|
|
|
return i+1;
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-12-01 07:12:19 +08:00
|
|
|
bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
|
2013-10-25 20:46:03 +08:00
|
|
|
bool sync, const Queue& q)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
if(!p || !p->handle || p->e != 0)
|
|
|
|
return false;
|
2013-11-27 22:30:07 +08:00
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
cl_command_queue qq = getQueue(q);
|
2013-12-02 00:58:30 +08:00
|
|
|
size_t offset[CV_MAX_DIM] = {0}, globalsize[CV_MAX_DIM] = {1,1,1};
|
2013-12-01 07:12:19 +08:00
|
|
|
size_t total = 1;
|
2013-12-02 00:58:30 +08:00
|
|
|
CV_Assert(_globalsize != 0);
|
2013-12-01 07:12:19 +08:00
|
|
|
for (int i = 0; i < dims; i++)
|
|
|
|
{
|
2013-12-02 22:27:08 +08:00
|
|
|
size_t val = _localsize ? _localsize[i] :
|
|
|
|
dims == 1 ? 64 : dims == 2 ? (16>>i) : dims == 3 ? (8>>(int)(i>0)) : 1;
|
2013-12-02 00:58:30 +08:00
|
|
|
CV_Assert( val > 0 );
|
2013-12-01 07:12:19 +08:00
|
|
|
total *= _globalsize[i];
|
|
|
|
globalsize[i] = ((_globalsize[i] + val - 1)/val)*val;
|
|
|
|
}
|
|
|
|
if( total == 0 )
|
|
|
|
return true;
|
2013-12-02 03:14:45 +08:00
|
|
|
if( p->haveTempDstUMats )
|
|
|
|
sync = true;
|
2013-11-19 00:48:00 +08:00
|
|
|
cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
|
2013-12-02 00:58:30 +08:00
|
|
|
offset, globalsize, _localsize, 0, 0,
|
2013-11-19 00:48:00 +08:00
|
|
|
sync ? 0 : &p->e);
|
|
|
|
if( sync || retval < 0 )
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
clFinish(qq);
|
2013-10-25 20:46:03 +08:00
|
|
|
p->cleanupUMats();
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
p->addref();
|
|
|
|
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
|
|
|
|
}
|
2013-11-19 00:48:00 +08:00
|
|
|
return retval >= 0;
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
bool Kernel::runTask(bool sync, const Queue& q)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
if(!p || !p->handle || p->e != 0)
|
|
|
|
return false;
|
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
cl_command_queue qq = getQueue(q);
|
2013-11-19 00:48:00 +08:00
|
|
|
cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e);
|
|
|
|
if( sync || retval < 0 )
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
clFinish(qq);
|
2013-10-25 20:46:03 +08:00
|
|
|
p->cleanupUMats();
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
p->addref();
|
|
|
|
clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
|
|
|
|
}
|
2013-11-19 00:48:00 +08:00
|
|
|
return retval >= 0;
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
size_t Kernel::workGroupSize() const
|
|
|
|
{
|
|
|
|
if(!p)
|
|
|
|
return 0;
|
|
|
|
size_t val = 0, retsz = 0;
|
|
|
|
cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
|
|
|
|
return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE,
|
|
|
|
sizeof(val), &val, &retsz) >= 0 ? val : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Kernel::compileWorkGroupSize(size_t wsz[]) const
|
|
|
|
{
|
|
|
|
if(!p || !wsz)
|
|
|
|
return 0;
|
|
|
|
size_t retsz = 0;
|
|
|
|
cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
|
|
|
|
return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
|
|
|
|
sizeof(wsz[0]*3), wsz, &retsz) >= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t Kernel::localMemSize() const
|
|
|
|
{
|
|
|
|
if(!p)
|
|
|
|
return 0;
|
|
|
|
size_t retsz = 0;
|
|
|
|
cl_ulong val = 0;
|
|
|
|
cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
|
|
|
|
return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE,
|
|
|
|
sizeof(val), &val, &retsz) >= 0 ? (size_t)val : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
struct Program::Impl
|
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
Impl(const ProgramSource2& _src,
|
2013-10-22 18:04:49 +08:00
|
|
|
const String& _buildflags, String& errmsg)
|
|
|
|
{
|
|
|
|
refcount = 1;
|
2013-11-19 00:48:00 +08:00
|
|
|
const Context2& ctx = Context2::getDefault();
|
2013-10-22 18:04:49 +08:00
|
|
|
src = _src;
|
|
|
|
buildflags = _buildflags;
|
|
|
|
const String& srcstr = src.source();
|
|
|
|
const char* srcptr = srcstr.c_str();
|
|
|
|
size_t srclen = srcstr.size();
|
|
|
|
cl_int retval = 0;
|
|
|
|
|
|
|
|
handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
|
|
|
|
if( handle && retval >= 0 )
|
|
|
|
{
|
2013-11-21 22:27:07 +08:00
|
|
|
int i, n = (int)ctx.ndevices();
|
2013-10-22 18:04:49 +08:00
|
|
|
AutoBuffer<void*> deviceListBuf(n+1);
|
|
|
|
void** deviceList = deviceListBuf;
|
|
|
|
for( i = 0; i < n; i++ )
|
|
|
|
deviceList[i] = ctx.device(i).ptr();
|
2013-11-27 22:30:07 +08:00
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
retval = clBuildProgram(handle, n,
|
|
|
|
(const cl_device_id*)deviceList,
|
|
|
|
buildflags.c_str(), 0, 0);
|
2013-12-01 07:12:19 +08:00
|
|
|
if( retval < 0 )
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
size_t retsz = 0;
|
2013-12-01 07:12:19 +08:00
|
|
|
retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
|
|
|
|
CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
|
2013-12-06 23:04:50 +08:00
|
|
|
if( retval >= 0 && retsz > 1 )
|
2013-12-01 07:12:19 +08:00
|
|
|
{
|
|
|
|
AutoBuffer<char> bufbuf(retsz + 16);
|
|
|
|
char* buf = bufbuf;
|
|
|
|
retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
|
|
|
|
CL_PROGRAM_BUILD_LOG, retsz+1, buf, &retsz);
|
|
|
|
if( retval >= 0 )
|
|
|
|
{
|
|
|
|
errmsg = String(buf);
|
|
|
|
CV_Error_(Error::StsAssert, ("OpenCL program can not be built: %s", errmsg.c_str()));
|
|
|
|
}
|
|
|
|
}
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
2013-11-19 00:48:00 +08:00
|
|
|
CV_Assert(retval >= 0);
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Impl(const String& _buf, const String& _buildflags)
|
|
|
|
{
|
|
|
|
refcount = 1;
|
|
|
|
handle = 0;
|
|
|
|
buildflags = _buildflags;
|
|
|
|
if(_buf.empty())
|
|
|
|
return;
|
|
|
|
String prefix0 = Program::getPrefix(buildflags);
|
2013-11-19 00:48:00 +08:00
|
|
|
const Context2& ctx = Context2::getDefault();
|
2013-10-22 18:04:49 +08:00
|
|
|
const Device& dev = Device::getDefault();
|
|
|
|
const char* pos0 = _buf.c_str();
|
2013-10-22 21:41:28 +08:00
|
|
|
const char* pos1 = strchr(pos0, '\n');
|
2013-10-22 18:04:49 +08:00
|
|
|
if(!pos1)
|
|
|
|
return;
|
2013-10-22 21:41:28 +08:00
|
|
|
const char* pos2 = strchr(pos1+1, '\n');
|
2013-10-22 18:04:49 +08:00
|
|
|
if(!pos2)
|
|
|
|
return;
|
2013-10-22 21:41:28 +08:00
|
|
|
const char* pos3 = strchr(pos2+1, '\n');
|
2013-10-22 18:04:49 +08:00
|
|
|
if(!pos3)
|
|
|
|
return;
|
|
|
|
size_t prefixlen = (pos3 - pos0)+1;
|
|
|
|
String prefix(pos0, prefixlen);
|
|
|
|
if( prefix != prefix0 )
|
|
|
|
return;
|
|
|
|
const uchar* bin = (uchar*)(pos3+1);
|
|
|
|
void* devid = dev.ptr();
|
|
|
|
size_t codelen = _buf.length() - prefixlen;
|
|
|
|
cl_int binstatus = 0, retval = 0;
|
|
|
|
handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), 1, (cl_device_id*)&devid,
|
|
|
|
&codelen, &bin, &binstatus, &retval);
|
|
|
|
}
|
|
|
|
|
|
|
|
String store()
|
|
|
|
{
|
|
|
|
if(!handle)
|
|
|
|
return String();
|
|
|
|
size_t progsz = 0, retsz = 0;
|
|
|
|
String prefix = Program::getPrefix(buildflags);
|
|
|
|
size_t prefixlen = prefix.length();
|
|
|
|
if(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(progsz), &progsz, &retsz) < 0)
|
|
|
|
return String();
|
|
|
|
AutoBuffer<uchar> bufbuf(prefixlen + progsz + 16);
|
|
|
|
uchar* buf = bufbuf;
|
|
|
|
memcpy(buf, prefix.c_str(), prefixlen);
|
|
|
|
buf += prefixlen;
|
|
|
|
if(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(buf), &buf, &retsz) < 0)
|
|
|
|
return String();
|
|
|
|
buf[progsz] = (uchar)'\0';
|
|
|
|
return String((const char*)(uchar*)bufbuf, prefixlen + progsz);
|
|
|
|
}
|
|
|
|
|
|
|
|
~Impl()
|
|
|
|
{
|
|
|
|
if( handle )
|
|
|
|
clReleaseProgram(handle);
|
|
|
|
}
|
|
|
|
|
|
|
|
IMPLEMENT_REFCOUNTABLE();
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
ProgramSource2 src;
|
2013-10-22 18:04:49 +08:00
|
|
|
String buildflags;
|
|
|
|
cl_program handle;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
Program::Program() { p = 0; }
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
Program::Program(const ProgramSource2& src,
|
2013-10-22 18:04:49 +08:00
|
|
|
const String& buildflags, String& errmsg)
|
|
|
|
{
|
|
|
|
p = 0;
|
|
|
|
create(src, buildflags, errmsg);
|
|
|
|
}
|
|
|
|
|
|
|
|
Program::Program(const Program& prog)
|
|
|
|
{
|
|
|
|
p = prog.p;
|
|
|
|
if(p)
|
|
|
|
p->addref();
|
|
|
|
}
|
|
|
|
|
|
|
|
Program& Program::operator = (const Program& prog)
|
|
|
|
{
|
|
|
|
Impl* newp = (Impl*)prog.p;
|
|
|
|
if(newp)
|
|
|
|
newp->addref();
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
p = newp;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
Program::~Program()
|
|
|
|
{
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
bool Program::create(const ProgramSource2& src,
|
2013-10-22 18:04:49 +08:00
|
|
|
const String& buildflags, String& errmsg)
|
|
|
|
{
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
p = new Impl(src, buildflags, errmsg);
|
|
|
|
if(!p->handle)
|
|
|
|
{
|
|
|
|
p->release();
|
|
|
|
p = 0;
|
|
|
|
}
|
|
|
|
return p != 0;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
const ProgramSource2& Program::source() const
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
static ProgramSource2 dummy;
|
2013-10-22 18:04:49 +08:00
|
|
|
return p ? p->src : dummy;
|
|
|
|
}
|
|
|
|
|
|
|
|
void* Program::ptr() const
|
|
|
|
{
|
|
|
|
return p ? p->handle : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Program::read(const String& bin, const String& buildflags)
|
|
|
|
{
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
p = new Impl(bin, buildflags);
|
|
|
|
return p->handle != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Program::write(String& bin) const
|
|
|
|
{
|
|
|
|
if(!p)
|
|
|
|
return false;
|
|
|
|
bin = p->store();
|
|
|
|
return !bin.empty();
|
|
|
|
}
|
|
|
|
|
|
|
|
String Program::getPrefix() const
|
|
|
|
{
|
|
|
|
if(!p)
|
|
|
|
return String();
|
|
|
|
return getPrefix(p->buildflags);
|
|
|
|
}
|
|
|
|
|
|
|
|
String Program::getPrefix(const String& buildflags)
|
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
const Context2& ctx = Context2::getDefault();
|
2013-10-22 18:04:49 +08:00
|
|
|
const Device& dev = ctx.device(0);
|
|
|
|
return format("name=%s\ndriver=%s\nbuildflags=%s\n",
|
|
|
|
dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
struct ProgramSource2::Impl
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
Impl(const char* _src)
|
|
|
|
{
|
|
|
|
init(String(_src));
|
|
|
|
}
|
|
|
|
Impl(const String& _src)
|
|
|
|
{
|
|
|
|
init(_src);
|
|
|
|
}
|
|
|
|
void init(const String& _src)
|
|
|
|
{
|
|
|
|
refcount = 1;
|
|
|
|
src = _src;
|
|
|
|
h = crc64((uchar*)src.c_str(), src.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
IMPLEMENT_REFCOUNTABLE();
|
|
|
|
String src;
|
2013-11-19 00:48:00 +08:00
|
|
|
ProgramSource2::hash_t h;
|
2013-10-22 18:04:49 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
ProgramSource2::ProgramSource2()
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
p = 0;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
ProgramSource2::ProgramSource2(const char* prog)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
p = new Impl(prog);
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
ProgramSource2::ProgramSource2(const String& prog)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
p = new Impl(prog);
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
ProgramSource2::~ProgramSource2()
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
ProgramSource2::ProgramSource2(const ProgramSource2& prog)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
p = prog.p;
|
|
|
|
if(p)
|
|
|
|
p->addref();
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
ProgramSource2& ProgramSource2::operator = (const ProgramSource2& prog)
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
Impl* newp = (Impl*)prog.p;
|
|
|
|
if(newp)
|
|
|
|
newp->addref();
|
|
|
|
if(p)
|
|
|
|
p->release();
|
|
|
|
p = newp;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
const String& ProgramSource2::source() const
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
static String dummy;
|
|
|
|
return p ? p->src : dummy;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
ProgramSource2::hash_t ProgramSource2::hash() const
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
return p ? p->h : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
class OpenCLAllocator : public MatAllocator
|
|
|
|
{
|
|
|
|
public:
|
2013-12-01 07:12:19 +08:00
|
|
|
OpenCLAllocator() { matStdAllocator = Mat::getStdAllocator(); }
|
2013-10-22 18:04:49 +08:00
|
|
|
|
2013-12-01 07:12:19 +08:00
|
|
|
UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step, int flags) const
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
2013-12-01 07:12:19 +08:00
|
|
|
UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags);
|
2013-10-22 18:04:49 +08:00
|
|
|
u->urefcount = 1;
|
|
|
|
u->refcount = 0;
|
|
|
|
return u;
|
|
|
|
}
|
|
|
|
|
2013-12-01 07:12:19 +08:00
|
|
|
void getBestFlags(const Context2& ctx, int /*flags*/, int& createFlags, int& flags0) const
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
const Device& dev = ctx.device(0);
|
|
|
|
createFlags = CL_MEM_READ_WRITE;
|
|
|
|
|
|
|
|
if( dev.hostUnifiedMemory() )
|
|
|
|
flags0 = 0;
|
|
|
|
else
|
|
|
|
flags0 = UMatData::COPY_ON_MAP;
|
|
|
|
}
|
|
|
|
|
2013-12-01 07:12:19 +08:00
|
|
|
UMatData* allocate(int dims, const int* sizes, int type,
|
|
|
|
void* data, size_t* step, int flags) const
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
if(!useOpenCL())
|
2013-12-01 07:12:19 +08:00
|
|
|
return defaultAllocate(dims, sizes, type, data, step, flags);
|
|
|
|
CV_Assert(data == 0);
|
2013-10-22 18:04:49 +08:00
|
|
|
size_t total = CV_ELEM_SIZE(type);
|
|
|
|
for( int i = dims-1; i >= 0; i-- )
|
|
|
|
{
|
|
|
|
if( step )
|
|
|
|
step[i] = total;
|
|
|
|
total *= sizes[i];
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
Context2& ctx = Context2::getDefault();
|
2013-10-22 18:04:49 +08:00
|
|
|
int createFlags = 0, flags0 = 0;
|
2013-12-01 07:12:19 +08:00
|
|
|
getBestFlags(ctx, flags, createFlags, flags0);
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
cl_int retval = 0;
|
|
|
|
void* handle = clCreateBuffer((cl_context)ctx.ptr(),
|
|
|
|
createFlags, total, 0, &retval);
|
|
|
|
if( !handle || retval < 0 )
|
2013-12-01 07:12:19 +08:00
|
|
|
return defaultAllocate(dims, sizes, type, data, step, flags);
|
2013-10-22 18:04:49 +08:00
|
|
|
UMatData* u = new UMatData(this);
|
|
|
|
u->data = 0;
|
|
|
|
u->size = total;
|
|
|
|
u->handle = handle;
|
|
|
|
u->urefcount = 1;
|
|
|
|
u->flags = flags0;
|
|
|
|
|
|
|
|
return u;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool allocate(UMatData* u, int accessFlags) const
|
|
|
|
{
|
|
|
|
if(!u)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
UMatDataAutoLock lock(u);
|
|
|
|
|
|
|
|
if(u->handle == 0)
|
|
|
|
{
|
|
|
|
CV_Assert(u->origdata != 0);
|
2013-11-19 00:48:00 +08:00
|
|
|
Context2& ctx = Context2::getDefault();
|
2013-10-22 18:04:49 +08:00
|
|
|
int createFlags = 0, flags0 = 0;
|
2013-12-01 07:12:19 +08:00
|
|
|
getBestFlags(ctx, accessFlags, createFlags, flags0);
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
cl_context ctx_handle = (cl_context)ctx.ptr();
|
|
|
|
cl_int retval = 0;
|
|
|
|
int tempUMatFlags = UMatData::TEMP_UMAT;
|
|
|
|
u->handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
|
|
|
|
u->size, u->origdata, &retval);
|
|
|
|
if((!u->handle || retval < 0) && !(accessFlags & ACCESS_FAST))
|
|
|
|
{
|
|
|
|
u->handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|createFlags,
|
|
|
|
u->size, u->origdata, &retval);
|
|
|
|
tempUMatFlags = UMatData::TEMP_COPIED_UMAT;
|
|
|
|
}
|
|
|
|
if(!u->handle || retval < 0)
|
|
|
|
return false;
|
|
|
|
u->prevAllocator = u->currAllocator;
|
|
|
|
u->currAllocator = this;
|
|
|
|
u->flags |= tempUMatFlags;
|
|
|
|
}
|
|
|
|
if(accessFlags & ACCESS_WRITE)
|
|
|
|
u->markHostCopyObsolete(true);
|
|
|
|
CV_XADD(&u->urefcount, 1);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-12-02 22:27:08 +08:00
|
|
|
/*void sync(UMatData* u) const
|
2013-12-01 07:12:19 +08:00
|
|
|
{
|
|
|
|
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
|
2013-12-02 02:14:15 +08:00
|
|
|
UMatDataAutoLock lock(u);
|
|
|
|
|
2013-12-02 04:12:20 +08:00
|
|
|
if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
|
2013-12-01 07:12:19 +08:00
|
|
|
{
|
2013-12-02 04:12:20 +08:00
|
|
|
if( u->tempCopiedUMat() )
|
|
|
|
{
|
|
|
|
clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
|
|
|
|
u->size, u->origdata, 0, 0, 0);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
cl_int retval = 0;
|
|
|
|
void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
|
|
|
|
(CL_MAP_READ | CL_MAP_WRITE),
|
|
|
|
0, u->size, 0, 0, 0, &retval);
|
|
|
|
clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
|
|
|
|
clFinish(q);
|
|
|
|
}
|
2013-12-01 07:12:19 +08:00
|
|
|
u->markHostCopyObsolete(false);
|
|
|
|
}
|
|
|
|
else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
|
|
|
|
{
|
|
|
|
clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
|
|
|
|
u->size, u->data, 0, 0, 0);
|
|
|
|
}
|
2013-12-02 22:27:08 +08:00
|
|
|
}*/
|
2013-12-01 07:12:19 +08:00
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
void deallocate(UMatData* u) const
|
|
|
|
{
|
|
|
|
if(!u)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// TODO: !!! when we add Shared Virtual Memory Support,
|
2013-12-01 07:12:19 +08:00
|
|
|
// this function (as well as the others) should be corrected
|
2013-10-22 18:04:49 +08:00
|
|
|
CV_Assert(u->handle != 0 && u->urefcount == 0);
|
|
|
|
if(u->tempUMat())
|
|
|
|
{
|
2013-12-02 22:27:08 +08:00
|
|
|
UMatDataAutoLock lock(u);
|
2013-12-02 04:12:20 +08:00
|
|
|
if( u->hostCopyObsolete() && u->refcount > 0 )
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
2013-12-02 04:12:20 +08:00
|
|
|
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
|
|
|
|
if( u->tempCopiedUMat() )
|
|
|
|
{
|
|
|
|
clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
|
|
|
|
u->size, u->origdata, 0, 0, 0);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
cl_int retval = 0;
|
|
|
|
void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
|
|
|
|
(CL_MAP_READ | CL_MAP_WRITE),
|
|
|
|
0, u->size, 0, 0, 0, &retval);
|
|
|
|
clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
|
|
|
|
clFinish(q);
|
|
|
|
}
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
u->markHostCopyObsolete(false);
|
|
|
|
clReleaseMemObject((cl_mem)u->handle);
|
2013-11-25 21:16:22 +08:00
|
|
|
u->handle = 0;
|
2013-10-22 18:04:49 +08:00
|
|
|
u->currAllocator = u->prevAllocator;
|
2013-12-01 07:12:19 +08:00
|
|
|
if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
|
2013-10-22 21:41:28 +08:00
|
|
|
fastFree(u->data);
|
|
|
|
u->data = u->origdata;
|
2013-10-22 18:04:49 +08:00
|
|
|
if(u->refcount == 0)
|
|
|
|
u->currAllocator->deallocate(u);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2013-12-02 22:27:08 +08:00
|
|
|
CV_Assert(u->refcount == 0);
|
2013-12-01 07:12:19 +08:00
|
|
|
if(u->data && u->copyOnMap() && !(u->flags & UMatData::USER_ALLOCATED))
|
|
|
|
{
|
2013-10-22 18:04:49 +08:00
|
|
|
fastFree(u->data);
|
2013-12-01 07:12:19 +08:00
|
|
|
u->data = 0;
|
|
|
|
}
|
2013-10-22 18:04:49 +08:00
|
|
|
clReleaseMemObject((cl_mem)u->handle);
|
2013-11-25 21:16:22 +08:00
|
|
|
u->handle = 0;
|
2013-10-22 18:04:49 +08:00
|
|
|
delete u;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void map(UMatData* u, int accessFlags) const
|
|
|
|
{
|
|
|
|
if(!u)
|
|
|
|
return;
|
|
|
|
|
|
|
|
CV_Assert( u->handle != 0 );
|
|
|
|
|
|
|
|
UMatDataAutoLock autolock(u);
|
|
|
|
|
|
|
|
if(accessFlags & ACCESS_WRITE)
|
|
|
|
u->markDeviceCopyObsolete(true);
|
|
|
|
|
|
|
|
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
|
|
|
|
|
|
|
|
if( u->refcount == 0 )
|
|
|
|
{
|
|
|
|
if( !u->copyOnMap() )
|
|
|
|
{
|
|
|
|
CV_Assert(u->data == 0);
|
|
|
|
// because there can be other map requests for the same UMat with different access flags,
|
|
|
|
// we use the universal (read-write) access mode.
|
|
|
|
cl_int retval = 0;
|
|
|
|
u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
|
|
|
|
(CL_MAP_READ | CL_MAP_WRITE),
|
|
|
|
0, u->size, 0, 0, 0, &retval);
|
|
|
|
if(u->data && retval >= 0)
|
|
|
|
{
|
|
|
|
u->markHostCopyObsolete(false);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// if map failed, switch to copy-on-map mode for the particular buffer
|
|
|
|
u->flags |= UMatData::COPY_ON_MAP;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(!u->data)
|
|
|
|
{
|
|
|
|
u->data = (uchar*)fastMalloc(u->size);
|
|
|
|
u->markHostCopyObsolete(true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
|
|
|
|
{
|
|
|
|
CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
|
|
|
|
u->size, u->data, 0, 0, 0) >= 0 );
|
|
|
|
u->markHostCopyObsolete(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void unmap(UMatData* u) const
|
|
|
|
{
|
|
|
|
if(!u)
|
|
|
|
return;
|
|
|
|
|
|
|
|
CV_Assert(u->handle != 0);
|
|
|
|
|
|
|
|
UMatDataAutoLock autolock(u);
|
|
|
|
|
|
|
|
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
|
2013-12-01 07:12:19 +08:00
|
|
|
cl_int retval = 0;
|
2013-10-22 18:04:49 +08:00
|
|
|
if( !u->copyOnMap() && u->data )
|
|
|
|
{
|
2013-12-01 07:12:19 +08:00
|
|
|
CV_Assert( (retval = clEnqueueUnmapMemObject(q,
|
|
|
|
(cl_mem)u->handle, u->data, 0, 0, 0)) >= 0 );
|
|
|
|
clFinish(q);
|
2013-10-22 18:04:49 +08:00
|
|
|
u->data = 0;
|
|
|
|
}
|
|
|
|
else if( u->copyOnMap() && u->deviceCopyObsolete() )
|
|
|
|
{
|
2013-12-01 07:12:19 +08:00
|
|
|
CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
|
|
|
|
u->size, u->data, 0, 0, 0)) >= 0 );
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
u->markDeviceCopyObsolete(false);
|
|
|
|
u->markHostCopyObsolete(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool checkContinuous(int dims, const size_t sz[],
|
|
|
|
const size_t srcofs[], const size_t srcstep[],
|
|
|
|
const size_t dstofs[], const size_t dststep[],
|
|
|
|
size_t& total, size_t new_sz[],
|
|
|
|
size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
|
|
|
|
size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
|
|
|
|
{
|
|
|
|
bool iscontinuous = true;
|
|
|
|
srcrawofs = srcofs ? srcofs[dims-1] : 0;
|
|
|
|
dstrawofs = dstofs ? dstofs[dims-1] : 0;
|
|
|
|
total = sz[dims-1];
|
|
|
|
for( int i = dims-2; i >= 0; i-- )
|
|
|
|
{
|
2013-10-25 15:19:40 +08:00
|
|
|
if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
|
2013-10-22 18:04:49 +08:00
|
|
|
iscontinuous = false;
|
|
|
|
total *= sz[i];
|
|
|
|
if( srcofs )
|
|
|
|
srcrawofs += srcofs[i]*srcstep[i];
|
|
|
|
if( dstofs )
|
|
|
|
dstrawofs += dstofs[i]*dststep[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
if( !iscontinuous )
|
|
|
|
{
|
|
|
|
// OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
|
|
|
|
if( dims == 2 )
|
|
|
|
{
|
|
|
|
new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
|
|
|
|
// we assume that new_... arrays are initialized by caller
|
|
|
|
// with 0's, so there is no else branch
|
|
|
|
if( srcofs )
|
|
|
|
{
|
|
|
|
new_srcofs[0] = srcofs[1];
|
|
|
|
new_srcofs[1] = srcofs[0];
|
|
|
|
new_srcofs[2] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( dstofs )
|
|
|
|
{
|
|
|
|
new_dstofs[0] = dstofs[1];
|
|
|
|
new_dstofs[1] = dstofs[0];
|
|
|
|
new_dstofs[2] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
|
|
|
|
new_dststep[0] = dststep[0]; new_dststep[1] = 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// we could check for dims == 3 here,
|
|
|
|
// but from user perspective this one is more informative
|
|
|
|
CV_Assert(dims <= 3);
|
|
|
|
new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
|
|
|
|
if( srcofs )
|
|
|
|
{
|
|
|
|
new_srcofs[0] = srcofs[2];
|
|
|
|
new_srcofs[1] = srcofs[1];
|
|
|
|
new_srcofs[2] = srcofs[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
if( dstofs )
|
|
|
|
{
|
|
|
|
new_dstofs[0] = dstofs[2];
|
|
|
|
new_dstofs[1] = dstofs[1];
|
|
|
|
new_dstofs[2] = dstofs[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
|
|
|
|
new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return iscontinuous;
|
|
|
|
}
|
|
|
|
|
|
|
|
void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
|
|
|
|
const size_t srcofs[], const size_t srcstep[],
|
|
|
|
const size_t dststep[]) const
|
|
|
|
{
|
|
|
|
if(!u)
|
|
|
|
return;
|
|
|
|
UMatDataAutoLock autolock(u);
|
|
|
|
|
|
|
|
if( u->data && !u->hostCopyObsolete() )
|
|
|
|
{
|
|
|
|
Mat::getStdAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
CV_Assert( u->handle != 0 );
|
|
|
|
|
|
|
|
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
|
|
|
|
|
|
|
|
size_t total = 0, new_sz[] = {0, 0, 0};
|
|
|
|
size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
|
|
|
|
size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
|
|
|
|
|
|
|
|
bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
|
|
|
|
total, new_sz,
|
|
|
|
srcrawofs, new_srcofs, new_srcstep,
|
|
|
|
dstrawofs, new_dstofs, new_dststep);
|
|
|
|
if( iscontinuous )
|
|
|
|
{
|
|
|
|
CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
|
|
|
|
srcrawofs, total, dstptr, 0, 0, 0) >= 0 );
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
|
|
|
|
new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1],
|
|
|
|
new_dststep[0], new_dststep[1], dstptr, 0, 0, 0) >= 0 );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
|
|
|
|
const size_t dstofs[], const size_t dststep[],
|
|
|
|
const size_t srcstep[]) const
|
|
|
|
{
|
|
|
|
if(!u)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// there should be no user-visible CPU copies of the UMat which we are going to copy to
|
2013-12-02 00:58:30 +08:00
|
|
|
CV_Assert(u->refcount == 0 || u->tempUMat());
|
2013-10-22 18:04:49 +08:00
|
|
|
|
|
|
|
size_t total = 0, new_sz[] = {0, 0, 0};
|
|
|
|
size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
|
|
|
|
size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
|
|
|
|
|
|
|
|
bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
|
|
|
|
total, new_sz,
|
|
|
|
srcrawofs, new_srcofs, new_srcstep,
|
|
|
|
dstrawofs, new_dstofs, new_dststep);
|
|
|
|
|
|
|
|
UMatDataAutoLock autolock(u);
|
|
|
|
|
|
|
|
// if there is cached CPU copy of the GPU matrix,
|
|
|
|
// we could use it as a destination.
|
|
|
|
// we can do it in 2 cases:
|
|
|
|
// 1. we overwrite the whole content
|
|
|
|
// 2. we overwrite part of the matrix, but the GPU copy is out-of-date
|
|
|
|
if( u->data && (u->hostCopyObsolete() <= u->deviceCopyObsolete() || total == u->size))
|
|
|
|
{
|
|
|
|
Mat::getStdAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
|
|
|
|
u->markHostCopyObsolete(false);
|
|
|
|
u->markDeviceCopyObsolete(true);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
CV_Assert( u->handle != 0 );
|
|
|
|
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
|
|
|
|
|
|
|
|
if( iscontinuous )
|
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
int crc = 0;
|
|
|
|
for( size_t i = 0; i < total; i++ )
|
|
|
|
crc ^= ((uchar*)srcptr)[i];
|
2013-10-22 18:04:49 +08:00
|
|
|
CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle,
|
|
|
|
CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) >= 0 );
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
|
|
|
|
new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1],
|
|
|
|
new_srcstep[0], new_srcstep[1], srcptr, 0, 0, 0) >= 0 );
|
|
|
|
}
|
|
|
|
|
|
|
|
u->markHostCopyObsolete(true);
|
|
|
|
u->markDeviceCopyObsolete(false);
|
|
|
|
|
|
|
|
clFinish(q);
|
|
|
|
}
|
|
|
|
|
|
|
|
void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
|
|
|
|
const size_t srcofs[], const size_t srcstep[],
|
2013-12-02 00:58:30 +08:00
|
|
|
const size_t dstofs[], const size_t dststep[], bool _sync) const
|
2013-10-22 18:04:49 +08:00
|
|
|
{
|
|
|
|
if(!src || !dst)
|
|
|
|
return;
|
|
|
|
|
|
|
|
size_t total = 0, new_sz[] = {0, 0, 0};
|
|
|
|
size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
|
|
|
|
size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
|
|
|
|
|
|
|
|
bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
|
|
|
|
total, new_sz,
|
|
|
|
srcrawofs, new_srcofs, new_srcstep,
|
|
|
|
dstrawofs, new_dstofs, new_dststep);
|
|
|
|
|
|
|
|
UMatDataAutoLock src_autolock(src);
|
|
|
|
UMatDataAutoLock dst_autolock(dst);
|
|
|
|
|
|
|
|
if( !src->handle || (src->data && src->hostCopyObsolete() <= src->deviceCopyObsolete()) )
|
|
|
|
{
|
|
|
|
upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if( !dst->handle || (dst->data && dst->hostCopyObsolete() <= dst->deviceCopyObsolete()) )
|
|
|
|
{
|
|
|
|
download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
|
|
|
|
dst->markHostCopyObsolete(false);
|
|
|
|
dst->markDeviceCopyObsolete(true);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// there should be no user-visible CPU copies of the UMat which we are going to copy to
|
|
|
|
CV_Assert(dst->refcount == 0);
|
|
|
|
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
|
|
|
|
|
|
|
|
if( iscontinuous )
|
|
|
|
{
|
|
|
|
CV_Assert( clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
|
|
|
|
srcrawofs, dstrawofs, total, 0, 0, 0) >= 0 );
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2013-11-19 00:48:00 +08:00
|
|
|
cl_int retval;
|
|
|
|
CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
|
2013-10-22 18:04:49 +08:00
|
|
|
new_srcofs, new_dstofs, new_sz,
|
2013-12-02 00:58:30 +08:00
|
|
|
new_srcstep[0], new_srcstep[1],
|
|
|
|
new_dststep[0], new_dststep[1],
|
2013-11-19 00:48:00 +08:00
|
|
|
0, 0, 0)) >= 0 );
|
2013-10-22 18:04:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
dst->markHostCopyObsolete(true);
|
|
|
|
dst->markDeviceCopyObsolete(false);
|
|
|
|
|
2013-12-02 00:58:30 +08:00
|
|
|
if( _sync )
|
2013-10-22 18:04:49 +08:00
|
|
|
clFinish(q);
|
|
|
|
}
|
2013-12-01 07:12:19 +08:00
|
|
|
|
|
|
|
MatAllocator* matStdAllocator;
|
2013-10-22 18:04:49 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
MatAllocator* getOpenCLAllocator()
|
|
|
|
{
|
|
|
|
static OpenCLAllocator allocator;
|
|
|
|
return &allocator;
|
|
|
|
}
|
|
|
|
|
2013-11-19 00:48:00 +08:00
|
|
|
const char* typeToStr(int t)
|
|
|
|
{
|
|
|
|
static const char* tab[]=
|
|
|
|
{
|
|
|
|
"uchar", "uchar2", "uchar3", "uchar4",
|
|
|
|
"char", "char2", "char3", "char4",
|
|
|
|
"ushort", "ushort2", "ushort3", "ushort4",
|
|
|
|
"short", "short2", "short3", "short4",
|
|
|
|
"int", "int2", "int3", "int4",
|
|
|
|
"float", "float2", "float3", "float4",
|
|
|
|
"double", "double2", "double3", "double4",
|
|
|
|
"?", "?", "?", "?"
|
|
|
|
};
|
|
|
|
int cn = CV_MAT_CN(t);
|
2013-11-20 08:20:24 +08:00
|
|
|
return cn > 4 ? "?" : tab[CV_MAT_DEPTH(t)*4 + cn-1];
|
2013-11-19 00:48:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const char* memopTypeToStr(int t)
|
|
|
|
{
|
|
|
|
static const char* tab[]=
|
|
|
|
{
|
|
|
|
"uchar", "uchar2", "uchar3", "uchar4",
|
|
|
|
"uchar", "uchar2", "uchar3", "uchar4",
|
|
|
|
"ushort", "ushort2", "ushort3", "ushort4",
|
|
|
|
"ushort", "ushort2", "ushort3", "ushort4",
|
|
|
|
"int", "int2", "int3", "int4",
|
|
|
|
"int", "int2", "int3", "int4",
|
2013-12-01 19:37:56 +08:00
|
|
|
"int2", "int4", "?", "int8",
|
2013-11-19 00:48:00 +08:00
|
|
|
"?", "?", "?", "?"
|
|
|
|
};
|
|
|
|
int cn = CV_MAT_CN(t);
|
2013-11-20 08:20:24 +08:00
|
|
|
return cn > 4 ? "?" : tab[CV_MAT_DEPTH(t)*4 + cn-1];
|
2013-11-19 00:48:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
|
|
|
|
{
|
|
|
|
if( sdepth == ddepth )
|
|
|
|
return "noconvert";
|
|
|
|
const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
|
|
|
|
if( ddepth >= CV_32F ||
|
|
|
|
(ddepth == CV_32S && sdepth < CV_32S) ||
|
|
|
|
(ddepth == CV_16S && sdepth <= CV_8S) ||
|
|
|
|
(ddepth == CV_16U && sdepth == CV_8U))
|
|
|
|
{
|
|
|
|
sprintf(buf, "convert_%s", typestr);
|
|
|
|
}
|
|
|
|
else if( sdepth >= CV_32F )
|
|
|
|
{
|
|
|
|
sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
sprintf(buf, "convert_%s_sat", typestr);
|
|
|
|
}
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
2013-10-22 18:04:49 +08:00
|
|
|
}}
|