mirror of
https://github.com/opencv/opencv.git
synced 2024-11-29 22:00:25 +08:00
Merge pull request #2045 from SpecLad:merge-2.4
This commit is contained in:
commit
faddd5b97f
@ -156,6 +156,7 @@ OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" ON
|
||||
OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON IF (NOT ANDROID AND NOT IOS) )
|
||||
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS) )
|
||||
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF WIN32 )
|
||||
OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF WIN32 )
|
||||
|
||||
|
||||
# OpenCV build components
|
||||
@ -207,7 +208,8 @@ OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions"
|
||||
OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
|
||||
OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
|
||||
OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
|
||||
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND ARM) )
|
||||
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND ARM )
|
||||
OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND ARM )
|
||||
OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF )
|
||||
OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF )
|
||||
OCV_OPTION(ENABLE_WINRT_MODE "Build with Windows Runtime support" OFF IF WIN32 )
|
||||
@ -226,6 +228,15 @@ include(cmake/OpenCVVersion.cmake)
|
||||
# Save libs and executables in the same place
|
||||
set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}/bin" CACHE PATH "Output directory for applications" )
|
||||
|
||||
if (ANDROID)
|
||||
if (ANDROID_ABI MATCHES "NEON")
|
||||
set(ENABLE_NEON ON)
|
||||
endif()
|
||||
if (ANDROID_ABI MATCHES "VFPV3")
|
||||
set(ENABLE_VFPV3 ON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ANDROID OR WIN32)
|
||||
set(OPENCV_DOC_INSTALL_PATH doc)
|
||||
elseif(INSTALL_TO_MANGLED_PATHS)
|
||||
@ -820,6 +831,11 @@ if(DEFINED WITH_XINE)
|
||||
status(" Xine:" HAVE_XINE THEN "YES (ver ${ALIASOF_libxine_VERSION})" ELSE NO)
|
||||
endif(DEFINED WITH_XINE)
|
||||
|
||||
if(DEFINED WITH_INTELPERC)
|
||||
status(" Intel PerC:" HAVE_INTELPERC THEN "YES" ELSE NO)
|
||||
endif(DEFINED WITH_INTELPERC)
|
||||
|
||||
|
||||
# ========================== Other third-party libraries ==========================
|
||||
status("")
|
||||
status(" Other third-party libraries:")
|
||||
|
@ -124,6 +124,12 @@ if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
if(ENABLE_SSE2)
|
||||
add_extra_compiler_option(-msse2)
|
||||
endif()
|
||||
if (ENABLE_NEON)
|
||||
add_extra_compiler_option("-mfpu=neon")
|
||||
endif()
|
||||
if (ENABLE_VFPV3 AND NOT ENABLE_NEON)
|
||||
add_extra_compiler_option("-mfpu=vfpv3")
|
||||
endif()
|
||||
|
||||
# SSE3 and further should be disabled under MingW because it generates compiler errors
|
||||
if(NOT MINGW)
|
||||
|
20
cmake/OpenCVFindIntelPerCSDK.cmake
Normal file
20
cmake/OpenCVFindIntelPerCSDK.cmake
Normal file
@ -0,0 +1,20 @@
|
||||
# Main variables:
|
||||
# INTELPERC_LIBRARIES and INTELPERC_INCLUDE to link Intel Perceptial Computing SDK modules
|
||||
# HAVE_INTELPERC for conditional compilation OpenCV with/without Intel Perceptial Computing SDK
|
||||
|
||||
if(X86_64)
|
||||
find_path(INTELPERC_INCLUDE_DIR "pxcsession.h" PATHS "$ENV{PCSDK_DIR}include" DOC "Path to Intel Perceptual Computing SDK interface headers")
|
||||
find_file(INTELPERC_LIBRARIES "libpxc.lib" PATHS "$ENV{PCSDK_DIR}lib/x64" DOC "Path to Intel Perceptual Computing SDK interface libraries")
|
||||
else()
|
||||
find_path(INTELPERC_INCLUDE_DIR "pxcsession.h" PATHS "$ENV{PCSDK_DIR}include" DOC "Path to Intel Perceptual Computing SDK interface headers")
|
||||
find_file(INTELPERC_LIBRARIES "libpxc.lib" PATHS "$ENV{PCSDK_DIR}lib/Win32" DOC "Path to Intel Perceptual Computing SDK interface libraries")
|
||||
endif()
|
||||
|
||||
if(INTELPERC_INCLUDE_DIR AND INTELPERC_LIBRARIES)
|
||||
set(HAVE_INTELPERC TRUE)
|
||||
else()
|
||||
set(HAVE_INTELPERC FALSE)
|
||||
message(WARNING "Intel Perceptual Computing SDK library directory (set by INTELPERC_LIB_DIR variable) is not found or does not have Intel Perceptual Computing SDK libraries.")
|
||||
endif() #if(INTELPERC_INCLUDE_DIR AND INTELPERC_LIBRARIES)
|
||||
|
||||
mark_as_advanced(FORCE INTELPERC_LIBRARIES INTELPERC_INCLUDE_DIR)
|
@ -277,3 +277,8 @@ if (NOT IOS)
|
||||
set(HAVE_QTKIT YES)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# --- Intel Perceptual Computing SDK ---
|
||||
if(WITH_INTELPERC)
|
||||
include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindIntelPerCSDK.cmake")
|
||||
endif(WITH_INTELPERC)
|
||||
|
@ -88,6 +88,9 @@
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#cmakedefine HAVE_INTTYPES_H 1
|
||||
|
||||
/* Intel Perceptual Computing SDK library */
|
||||
#cmakedefine HAVE_INTELPERC
|
||||
|
||||
/* Intel Integrated Performance Primitives */
|
||||
#cmakedefine HAVE_IPP
|
||||
|
||||
|
@ -106,8 +106,8 @@ Enable hardware optimizations
|
||||
-----------------------------
|
||||
|
||||
Depending on target platform architecture different instruction sets can be used. By default
|
||||
compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DUSE_VFPV3=ON``
|
||||
to cmake command line to enable code generation for VFPv3 and ``-DUSE_NEON=ON`` for using
|
||||
compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DENABLE_VFPV3=ON``
|
||||
to cmake command line to enable code generation for VFPv3 and ``-DENABLE_NEON=ON`` for using
|
||||
NEON SIMD extensions.
|
||||
|
||||
TBB is supported on multi core ARM SoCs also.
|
||||
|
79
doc/user_guide/ug_intelperc.rst
Normal file
79
doc/user_guide/ug_intelperc.rst
Normal file
@ -0,0 +1,79 @@
|
||||
*******
|
||||
HighGUI
|
||||
*******
|
||||
|
||||
.. highlight:: cpp
|
||||
|
||||
Using Creative Senz3D and other Intel Perceptual Computing SDK compatible depth sensors
|
||||
=======================================================================================
|
||||
|
||||
Depth sensors compatible with Intel Perceptual Computing SDK are supported through ``VideoCapture`` class. Depth map, RGB image and some other formats of output can be retrieved by using familiar interface of ``VideoCapture``.
|
||||
|
||||
In order to use depth sensor with OpenCV you should do the following preliminary steps:
|
||||
|
||||
#.
|
||||
Install Intel Perceptual Computing SDK (from here http://www.intel.com/software/perceptual).
|
||||
|
||||
#.
|
||||
Configure OpenCV with Intel Perceptual Computing SDK support by setting ``WITH_INTELPERC`` flag in CMake. If Intel Perceptual Computing SDK is found in install folders OpenCV will be built with Intel Perceptual Computing SDK library (see a status ``INTELPERC`` in CMake log). If CMake process doesn't find Intel Perceptual Computing SDK installation folder automatically, the user should change corresponding CMake variables ``INTELPERC_LIB_DIR`` and ``INTELPERC_INCLUDE_DIR`` to the proper value.
|
||||
|
||||
#.
|
||||
Build OpenCV.
|
||||
|
||||
VideoCapture can retrieve the following data:
|
||||
|
||||
#.
|
||||
data given from depth generator:
|
||||
* ``CV_CAP_INTELPERC_DEPTH_MAP`` - each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth. (CV_16UC1)
|
||||
* ``CV_CAP_INTELPERC_UVDEPTH_MAP`` - each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates. (CV_32FC2)
|
||||
* ``CV_CAP_INTELPERC_IR_MAP`` - each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam. (CV_16UC1)
|
||||
#.
|
||||
data given from RGB image generator:
|
||||
* ``CV_CAP_INTELPERC_IMAGE`` - color image. (CV_8UC3)
|
||||
|
||||
In order to get depth map from depth sensor use ``VideoCapture::operator >>``, e. g. ::
|
||||
|
||||
VideoCapture capture( CV_CAP_INTELPERC );
|
||||
for(;;)
|
||||
{
|
||||
Mat depthMap;
|
||||
capture >> depthMap;
|
||||
|
||||
if( waitKey( 30 ) >= 0 )
|
||||
break;
|
||||
}
|
||||
|
||||
For getting several data maps use ``VideoCapture::grab`` and ``VideoCapture::retrieve``, e.g. ::
|
||||
|
||||
VideoCapture capture(CV_CAP_INTELPERC);
|
||||
for(;;)
|
||||
{
|
||||
Mat depthMap;
|
||||
Mat image;
|
||||
Mat irImage;
|
||||
|
||||
capture.grab();
|
||||
|
||||
capture.retrieve( depthMap, CV_CAP_INTELPERC_DEPTH_MAP );
|
||||
capture.retrieve( image, CV_CAP_INTELPERC_IMAGE );
|
||||
capture.retrieve( irImage, CV_CAP_INTELPERC_IR_MAP);
|
||||
|
||||
if( waitKey( 30 ) >= 0 )
|
||||
break;
|
||||
}
|
||||
|
||||
For setting and getting some property of sensor` data generators use ``VideoCapture::set`` and ``VideoCapture::get`` methods respectively, e.g. ::
|
||||
|
||||
VideoCapture capture( CV_CAP_INTELPERC );
|
||||
capture.set( CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_PROFILE_IDX, 0 );
|
||||
cout << "FPS " << capture.get( CV_CAP_INTELPERC_DEPTH_GENERATOR+CV_CAP_PROP_FPS ) << endl;
|
||||
|
||||
Since two types of sensor's data generators are supported (image generator and depth generator), there are two flags that should be used to set/get property of the needed generator:
|
||||
|
||||
* CV_CAP_INTELPERC_IMAGE_GENERATOR -- a flag for access to the image generator properties.
|
||||
|
||||
* CV_CAP_INTELPERC_DEPTH_GENERATOR -- a flag for access to the depth generator properties. This flag value is assumed by default if neither of the two possible values of the property is set.
|
||||
|
||||
For more information please refer to the example of usage intelperc_capture.cpp_ in ``opencv/samples/cpp`` folder.
|
||||
|
||||
.. _intelperc_capture.cpp: https://github.com/Itseez/opencv/tree/master/samples/cpp/intelperc_capture.cpp
|
@ -9,3 +9,4 @@ OpenCV User Guide
|
||||
ug_features2d.rst
|
||||
ug_highgui.rst
|
||||
ug_traincascade.rst
|
||||
ug_intelperc.rst
|
||||
|
@ -903,7 +903,7 @@ So, the function chooses an operation mode depending on the flags and size of th
|
||||
|
||||
* When ``DFT_COMPLEX_OUTPUT`` is set, the output is a complex matrix of the same size as input.
|
||||
|
||||
* When ``DFT_COMPLEX_OUTPUT`` is not set, the output is a real matrix of the same size as input. In case of 2D transform, it uses the packed format as shown above. In case of a single 1D transform, it looks like the first row of the matrix above. In case of multiple 1D transforms (when using the ``DCT_ROWS`` flag), each row of the output matrix looks like the first row of the matrix above.
|
||||
* When ``DFT_COMPLEX_OUTPUT`` is not set, the output is a real matrix of the same size as input. In case of 2D transform, it uses the packed format as shown above. In case of a single 1D transform, it looks like the first row of the matrix above. In case of multiple 1D transforms (when using the ``DFT_ROWS`` flag), each row of the output matrix looks like the first row of the matrix above.
|
||||
|
||||
* If the input array is complex and either ``DFT_INVERSE`` or ``DFT_REAL_OUTPUT`` are not set, the output is a complex array of the same size as input. The function performs a forward or inverse 1D or 2D transform of the whole input array or each row of the input array independently, depending on the flags ``DFT_INVERSE`` and ``DFT_ROWS``.
|
||||
|
||||
|
@ -2577,7 +2577,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
|
||||
|
||||
DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2];
|
||||
|
||||
if( (flags & DFT_ROWS) || src.rows == 1 ||
|
||||
if( (flags & DCT_ROWS) || src.rows == 1 ||
|
||||
(src.cols == 1 && (src.isContinuous() && dst.isContinuous())))
|
||||
{
|
||||
stage = end_stage = 0;
|
||||
@ -2597,7 +2597,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
|
||||
{
|
||||
len = src.cols;
|
||||
count = src.rows;
|
||||
if( len == 1 && !(flags & DFT_ROWS) )
|
||||
if( len == 1 && !(flags & DCT_ROWS) )
|
||||
{
|
||||
len = src.rows;
|
||||
count = 1;
|
||||
|
@ -2760,39 +2760,24 @@ void cv::transpose( InputArray _src, OutputArray _dst )
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////// completeSymm /////////////////////////////////////////
|
||||
|
||||
void cv::completeSymm( InputOutputArray _m, bool LtoR )
|
||||
{
|
||||
Mat m = _m.getMat();
|
||||
CV_Assert( m.dims <= 2 );
|
||||
size_t step = m.step, esz = m.elemSize();
|
||||
CV_Assert( m.dims <= 2 && m.rows == m.cols );
|
||||
|
||||
int i, j, nrows = m.rows, type = m.type();
|
||||
int j0 = 0, j1 = nrows;
|
||||
CV_Assert( m.rows == m.cols );
|
||||
int rows = m.rows;
|
||||
int j0 = 0, j1 = rows;
|
||||
|
||||
if( type == CV_32FC1 || type == CV_32SC1 )
|
||||
uchar* data = m.data;
|
||||
for( int i = 0; i < rows; i++ )
|
||||
{
|
||||
int* data = (int*)m.data;
|
||||
size_t step = m.step/sizeof(data[0]);
|
||||
for( i = 0; i < nrows; i++ )
|
||||
{
|
||||
if( !LtoR ) j1 = i; else j0 = i+1;
|
||||
for( j = j0; j < j1; j++ )
|
||||
data[i*step + j] = data[j*step + i];
|
||||
}
|
||||
if( !LtoR ) j1 = i; else j0 = i+1;
|
||||
for( int j = j0; j < j1; j++ )
|
||||
memcpy(data + (i*step + j*esz), data + (j*step + i*esz), esz);
|
||||
}
|
||||
else if( type == CV_64FC1 )
|
||||
{
|
||||
double* data = (double*)m.data;
|
||||
size_t step = m.step/sizeof(data[0]);
|
||||
for( i = 0; i < nrows; i++ )
|
||||
{
|
||||
if( !LtoR ) j1 = i; else j0 = i+1;
|
||||
for( j = j0; j < j1; j++ )
|
||||
data[i*step + j] = data[j*step + i];
|
||||
}
|
||||
}
|
||||
else
|
||||
CV_Error( CV_StsUnsupportedFormat, "" );
|
||||
}
|
||||
|
||||
|
||||
|
@ -222,6 +222,12 @@ elseif(HAVE_QTKIT)
|
||||
list(APPEND HIGHGUI_LIBRARIES "-framework QTKit" "-framework QuartzCore" "-framework AppKit")
|
||||
endif()
|
||||
|
||||
if(HAVE_INTELPERC)
|
||||
list(APPEND highgui_srcs src/cap_intelperc.cpp)
|
||||
ocv_include_directories(${INTELPERC_INCLUDE_DIR})
|
||||
list(APPEND HIGHGUI_LIBRARIES ${INTELPERC_LIBRARIES})
|
||||
endif(HAVE_INTELPERC)
|
||||
|
||||
if(IOS)
|
||||
add_definitions(-DHAVE_IOS=1)
|
||||
list(APPEND highgui_srcs src/ios_conversions.mm src/cap_ios_abstract_camera.mm src/cap_ios_photo_camera.mm src/cap_ios_video_camera.mm)
|
||||
|
@ -271,7 +271,8 @@ enum { CAP_ANY = 0, // autodetect
|
||||
CAP_XIAPI = 1100, // XIMEA Camera API
|
||||
CAP_AVFOUNDATION = 1200, // AVFoundation framework for iOS (OS X Lion will have the same API)
|
||||
CAP_GIGANETIX = 1300, // Smartek Giganetix GigEVisionSDK
|
||||
CAP_MSMF = 1400 // Microsoft Media Foundation (via videoInput)
|
||||
CAP_MSMF = 1400, // Microsoft Media Foundation (via videoInput)
|
||||
CAP_INTELPERC = 1500 // Intel Perceptual Computing SDK
|
||||
};
|
||||
|
||||
// generic properties (based on DC1394 properties)
|
||||
@ -496,6 +497,26 @@ enum { CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
|
||||
CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006
|
||||
};
|
||||
|
||||
enum { CAP_PROP_INTELPERC_PROFILE_COUNT = 11001,
|
||||
CAP_PROP_INTELPERC_PROFILE_IDX = 11002,
|
||||
CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE = 11003,
|
||||
CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE = 11004,
|
||||
CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD = 11005,
|
||||
CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ = 11006,
|
||||
CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT = 11007
|
||||
};
|
||||
|
||||
// Intel PerC streams
|
||||
enum { CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
|
||||
CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
|
||||
CAP_INTELPERC_GENERATORS_MASK = CAP_INTELPERC_DEPTH_GENERATOR + CAP_INTELPERC_IMAGE_GENERATOR
|
||||
};
|
||||
|
||||
enum { CAP_INTELPERC_DEPTH_MAP = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
|
||||
CAP_INTELPERC_UVDEPTH_MAP = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
|
||||
CAP_INTELPERC_IR_MAP = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
|
||||
CAP_INTELPERC_IMAGE = 3
|
||||
};
|
||||
|
||||
class CV_EXPORTS_W VideoCapture
|
||||
{
|
||||
|
@ -313,7 +313,9 @@ enum
|
||||
|
||||
CV_CAP_AVFOUNDATION = 1200, // AVFoundation framework for iOS (OS X Lion will have the same API)
|
||||
|
||||
CV_CAP_GIGANETIX = 1300 // Smartek Giganetix GigEVisionSDK
|
||||
CV_CAP_GIGANETIX = 1300, // Smartek Giganetix GigEVisionSDK
|
||||
|
||||
CV_CAP_INTELPERC = 1500 // Intel Perceptual Computing SDK
|
||||
};
|
||||
|
||||
/* start capturing frames from camera: index = camera_index + domain_offset (CV_CAP_*) */
|
||||
@ -459,16 +461,29 @@ enum
|
||||
CV_CAP_PROP_IOS_DEVICE_EXPOSURE = 9002,
|
||||
CV_CAP_PROP_IOS_DEVICE_FLASH = 9003,
|
||||
CV_CAP_PROP_IOS_DEVICE_WHITEBALANCE = 9004,
|
||||
CV_CAP_PROP_IOS_DEVICE_TORCH = 9005
|
||||
CV_CAP_PROP_IOS_DEVICE_TORCH = 9005,
|
||||
|
||||
// Properties of cameras available through Smartek Giganetix Ethernet Vision interface
|
||||
/* --- Vladimir Litvinenko (litvinenko.vladimir@gmail.com) --- */
|
||||
,CV_CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
|
||||
CV_CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
|
||||
CV_CAP_PROP_GIGA_FRAME_OFFSET_Y = 10002,
|
||||
CV_CAP_PROP_GIGA_FRAME_WIDTH_MAX = 10003,
|
||||
CV_CAP_PROP_GIGA_FRAME_HEIGH_MAX = 10004,
|
||||
CV_CAP_PROP_GIGA_FRAME_SENS_WIDTH = 10005,
|
||||
CV_CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006
|
||||
CV_CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006,
|
||||
|
||||
CV_CAP_PROP_INTELPERC_PROFILE_COUNT = 11001,
|
||||
CV_CAP_PROP_INTELPERC_PROFILE_IDX = 11002,
|
||||
CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE = 11003,
|
||||
CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE = 11004,
|
||||
CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD = 11005,
|
||||
CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ = 11006,
|
||||
CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT = 11007,
|
||||
|
||||
// Intel PerC streams
|
||||
CV_CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
|
||||
CV_CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
|
||||
CV_CAP_INTELPERC_GENERATORS_MASK = CV_CAP_INTELPERC_DEPTH_GENERATOR + CV_CAP_INTELPERC_IMAGE_GENERATOR
|
||||
};
|
||||
|
||||
enum
|
||||
@ -549,6 +564,14 @@ enum
|
||||
CV_CAP_ANDROID_ANTIBANDING_OFF
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
CV_CAP_INTELPERC_DEPTH_MAP = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
|
||||
CV_CAP_INTELPERC_UVDEPTH_MAP = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
|
||||
CV_CAP_INTELPERC_IR_MAP = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
|
||||
CV_CAP_INTELPERC_IMAGE = 3
|
||||
};
|
||||
|
||||
/* retrieve or set capture properties */
|
||||
CVAPI(double) cvGetCaptureProperty( CvCapture* capture, int property_id );
|
||||
CVAPI(int) cvSetCaptureProperty( CvCapture* capture, int property_id, double value );
|
||||
|
@ -155,6 +155,9 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
|
||||
#endif
|
||||
#ifdef HAVE_GIGE_API
|
||||
CV_CAP_GIGANETIX,
|
||||
#endif
|
||||
#ifdef HAVE_INTELPERC
|
||||
CV_CAP_INTELPERC,
|
||||
#endif
|
||||
-1
|
||||
};
|
||||
@ -193,6 +196,7 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
|
||||
defined(HAVE_AVFOUNDATION) || \
|
||||
defined(HAVE_ANDROID_NATIVE_CAMERA) || \
|
||||
defined(HAVE_GIGE_API) || \
|
||||
defined(HAVE_INTELPERC) || \
|
||||
(0)
|
||||
// local variable to memorize the captured device
|
||||
CvCapture *capture;
|
||||
@ -342,6 +346,13 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
|
||||
break; // CV_CAP_GIGANETIX
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_INTELPERC
|
||||
case CV_CAP_INTELPERC:
|
||||
capture = cvCreateCameraCapture_IntelPerC(index);
|
||||
if (capture)
|
||||
return capture;
|
||||
break; // CV_CAP_INTEL_PERC
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
714
modules/highgui/src/cap_intelperc.cpp
Normal file
714
modules/highgui/src/cap_intelperc.cpp
Normal file
@ -0,0 +1,714 @@
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifdef HAVE_INTELPERC
|
||||
|
||||
#include "pxcsession.h"
|
||||
#include "pxcsmartptr.h"
|
||||
#include "pxccapture.h"
|
||||
|
||||
class CvIntelPerCStreamBase
|
||||
{
|
||||
protected:
|
||||
struct FrameInternal
|
||||
{
|
||||
IplImage* retrieveFrame()
|
||||
{
|
||||
if (m_mat.empty())
|
||||
return NULL;
|
||||
m_iplHeader = IplImage(m_mat);
|
||||
return &m_iplHeader;
|
||||
}
|
||||
cv::Mat m_mat;
|
||||
private:
|
||||
IplImage m_iplHeader;
|
||||
};
|
||||
public:
|
||||
CvIntelPerCStreamBase()
|
||||
: m_profileIdx(-1)
|
||||
, m_frameIdx(0)
|
||||
, m_timeStampStartNS(0)
|
||||
{
|
||||
}
|
||||
virtual ~CvIntelPerCStreamBase()
|
||||
{
|
||||
}
|
||||
|
||||
bool isValid()
|
||||
{
|
||||
return (m_device.IsValid() && m_stream.IsValid());
|
||||
}
|
||||
bool grabFrame()
|
||||
{
|
||||
if (!m_stream.IsValid())
|
||||
return false;
|
||||
if (-1 == m_profileIdx)
|
||||
{
|
||||
if (!setProperty(CV_CAP_PROP_INTELPERC_PROFILE_IDX, 0))
|
||||
return false;
|
||||
}
|
||||
PXCSmartPtr<PXCImage> pxcImage; PXCSmartSP sp;
|
||||
if (PXC_STATUS_NO_ERROR > m_stream->ReadStreamAsync(&pxcImage, &sp))
|
||||
return false;
|
||||
if (PXC_STATUS_NO_ERROR > sp->Synchronize())
|
||||
return false;
|
||||
if (0 == m_timeStampStartNS)
|
||||
m_timeStampStartNS = pxcImage->QueryTimeStamp();
|
||||
m_timeStamp = (double)((pxcImage->QueryTimeStamp() - m_timeStampStartNS) / 10000);
|
||||
m_frameIdx++;
|
||||
return prepareIplImage(pxcImage);
|
||||
}
|
||||
int getProfileIDX() const
|
||||
{
|
||||
return m_profileIdx;
|
||||
}
|
||||
public:
|
||||
virtual bool initStream(PXCSession *session) = 0;
|
||||
virtual double getProperty(int propIdx)
|
||||
{
|
||||
double ret = 0.0;
|
||||
switch (propIdx)
|
||||
{
|
||||
case CV_CAP_PROP_INTELPERC_PROFILE_COUNT:
|
||||
ret = (double)m_profiles.size();
|
||||
break;
|
||||
case CV_CAP_PROP_FRAME_WIDTH :
|
||||
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
|
||||
ret = (double)m_profiles[m_profileIdx].imageInfo.width;
|
||||
break;
|
||||
case CV_CAP_PROP_FRAME_HEIGHT :
|
||||
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
|
||||
ret = (double)m_profiles[m_profileIdx].imageInfo.height;
|
||||
break;
|
||||
case CV_CAP_PROP_FPS :
|
||||
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
|
||||
{
|
||||
ret = ((double)m_profiles[m_profileIdx].frameRateMin.numerator / (double)m_profiles[m_profileIdx].frameRateMin.denominator
|
||||
+ (double)m_profiles[m_profileIdx].frameRateMax.numerator / (double)m_profiles[m_profileIdx].frameRateMax.denominator) / 2.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_POS_FRAMES:
|
||||
ret = (double)m_frameIdx;
|
||||
break;
|
||||
case CV_CAP_PROP_POS_MSEC:
|
||||
ret = m_timeStamp;
|
||||
break;
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
virtual bool setProperty(int propIdx, double propVal)
|
||||
{
|
||||
bool isSet = false;
|
||||
switch (propIdx)
|
||||
{
|
||||
case CV_CAP_PROP_INTELPERC_PROFILE_IDX:
|
||||
{
|
||||
int propValInt = (int)propVal;
|
||||
if ((0 <= propValInt) && (propValInt < m_profiles.size()))
|
||||
{
|
||||
if (m_profileIdx != propValInt)
|
||||
{
|
||||
m_profileIdx = propValInt;
|
||||
if (m_stream.IsValid())
|
||||
m_stream->SetProfile(&m_profiles[m_profileIdx]);
|
||||
m_frameIdx = 0;
|
||||
m_timeStampStartNS = 0;
|
||||
}
|
||||
isSet = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
};
|
||||
return isSet;
|
||||
}
|
||||
protected:
|
||||
PXCSmartPtr<PXCCapture::Device> m_device;
|
||||
bool initDevice(PXCSession *session)
|
||||
{
|
||||
if (NULL == session)
|
||||
return false;
|
||||
|
||||
pxcStatus sts = PXC_STATUS_NO_ERROR;
|
||||
PXCSession::ImplDesc templat;
|
||||
memset(&templat,0,sizeof(templat));
|
||||
templat.group = PXCSession::IMPL_GROUP_SENSOR;
|
||||
templat.subgroup= PXCSession::IMPL_SUBGROUP_VIDEO_CAPTURE;
|
||||
|
||||
for (int modidx = 0; PXC_STATUS_NO_ERROR <= sts; modidx++)
|
||||
{
|
||||
PXCSession::ImplDesc desc;
|
||||
sts = session->QueryImpl(&templat, modidx, &desc);
|
||||
if (PXC_STATUS_NO_ERROR > sts)
|
||||
break;
|
||||
|
||||
PXCSmartPtr<PXCCapture> capture;
|
||||
sts = session->CreateImpl<PXCCapture>(&desc, &capture);
|
||||
if (!capture.IsValid())
|
||||
continue;
|
||||
|
||||
/* enumerate devices */
|
||||
for (int devidx = 0; PXC_STATUS_NO_ERROR <= sts; devidx++)
|
||||
{
|
||||
PXCSmartPtr<PXCCapture::Device> device;
|
||||
sts = capture->CreateDevice(devidx, &device);
|
||||
if (PXC_STATUS_NO_ERROR <= sts)
|
||||
{
|
||||
m_device = device.ReleasePtr();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
PXCSmartPtr<PXCCapture::VideoStream> m_stream;
|
||||
void initStreamImpl(PXCImage::ImageType type)
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return;
|
||||
|
||||
pxcStatus sts = PXC_STATUS_NO_ERROR;
|
||||
/* enumerate streams */
|
||||
for (int streamidx = 0; PXC_STATUS_NO_ERROR <= sts; streamidx++)
|
||||
{
|
||||
PXCCapture::Device::StreamInfo sinfo;
|
||||
sts = m_device->QueryStream(streamidx, &sinfo);
|
||||
if (PXC_STATUS_NO_ERROR > sts)
|
||||
break;
|
||||
if (PXCCapture::VideoStream::CUID != sinfo.cuid)
|
||||
continue;
|
||||
if (type != sinfo.imageType)
|
||||
continue;
|
||||
|
||||
sts = m_device->CreateStream<PXCCapture::VideoStream>(streamidx, &m_stream);
|
||||
if (PXC_STATUS_NO_ERROR == sts)
|
||||
break;
|
||||
m_stream.ReleaseRef();
|
||||
}
|
||||
}
|
||||
protected:
|
||||
std::vector<PXCCapture::VideoStream::ProfileInfo> m_profiles;
|
||||
int m_profileIdx;
|
||||
int m_frameIdx;
|
||||
pxcU64 m_timeStampStartNS;
|
||||
double m_timeStamp;
|
||||
|
||||
virtual bool validProfile(const PXCCapture::VideoStream::ProfileInfo& /*pinfo*/)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
void enumProfiles()
|
||||
{
|
||||
m_profiles.clear();
|
||||
if (!m_stream.IsValid())
|
||||
return;
|
||||
pxcStatus sts = PXC_STATUS_NO_ERROR;
|
||||
for (int profidx = 0; PXC_STATUS_NO_ERROR <= sts; profidx++)
|
||||
{
|
||||
PXCCapture::VideoStream::ProfileInfo pinfo;
|
||||
sts = m_stream->QueryProfile(profidx, &pinfo);
|
||||
if (PXC_STATUS_NO_ERROR > sts)
|
||||
break;
|
||||
if (validProfile(pinfo))
|
||||
m_profiles.push_back(pinfo);
|
||||
}
|
||||
}
|
||||
virtual bool prepareIplImage(PXCImage *pxcImage) = 0;
|
||||
};
|
||||
|
||||
class CvIntelPerCStreamImage
|
||||
: public CvIntelPerCStreamBase
|
||||
{
|
||||
public:
|
||||
CvIntelPerCStreamImage()
|
||||
{
|
||||
}
|
||||
virtual ~CvIntelPerCStreamImage()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool initStream(PXCSession *session)
|
||||
{
|
||||
if (!initDevice(session))
|
||||
return false;
|
||||
initStreamImpl(PXCImage::IMAGE_TYPE_COLOR);
|
||||
if (!m_stream.IsValid())
|
||||
return false;
|
||||
enumProfiles();
|
||||
return true;
|
||||
}
|
||||
virtual double getProperty(int propIdx)
|
||||
{
|
||||
switch (propIdx)
|
||||
{
|
||||
case CV_CAP_PROP_BRIGHTNESS:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_BRIGHTNESS, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_CONTRAST:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_CONTRAST, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_SATURATION:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_SATURATION, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_HUE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_HUE, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_GAMMA:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_GAMMA, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_SHARPNESS:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_SHARPNESS, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_GAIN:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_GAIN, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_BACKLIGHT:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_BACK_LIGHT_COMPENSATION, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_EXPOSURE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_EXPOSURE, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
//Add image stream specific properties
|
||||
}
|
||||
return CvIntelPerCStreamBase::getProperty(propIdx);
|
||||
}
|
||||
virtual bool setProperty(int propIdx, double propVal)
|
||||
{
|
||||
switch (propIdx)
|
||||
{
|
||||
case CV_CAP_PROP_BRIGHTNESS:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_BRIGHTNESS, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_CONTRAST:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_CONTRAST, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_SATURATION:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_SATURATION, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_HUE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_HUE, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_GAMMA:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_GAMMA, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_SHARPNESS:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_SHARPNESS, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_GAIN:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_GAIN, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_BACKLIGHT:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_BACK_LIGHT_COMPENSATION, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_EXPOSURE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_EXPOSURE, (float)propVal));
|
||||
}
|
||||
break;
|
||||
//Add image stream specific properties
|
||||
}
|
||||
return CvIntelPerCStreamBase::setProperty(propIdx, propVal);
|
||||
}
|
||||
public:
|
||||
IplImage* retrieveFrame()
|
||||
{
|
||||
return m_frame.retrieveFrame();
|
||||
}
|
||||
protected:
|
||||
FrameInternal m_frame;
|
||||
bool prepareIplImage(PXCImage *pxcImage)
|
||||
{
|
||||
if (NULL == pxcImage)
|
||||
return false;
|
||||
PXCImage::ImageInfo info;
|
||||
pxcImage->QueryInfo(&info);
|
||||
|
||||
PXCImage::ImageData data;
|
||||
pxcImage->AcquireAccess(PXCImage::ACCESS_READ, PXCImage::COLOR_FORMAT_RGB24, &data);
|
||||
|
||||
if (PXCImage::SURFACE_TYPE_SYSTEM_MEMORY != data.type)
|
||||
return false;
|
||||
|
||||
cv::Mat temp(info.height, info.width, CV_8UC3, data.planes[0], data.pitches[0]);
|
||||
temp.copyTo(m_frame.m_mat);
|
||||
|
||||
pxcImage->ReleaseAccess(&data);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class CvIntelPerCStreamDepth
|
||||
: public CvIntelPerCStreamBase
|
||||
{
|
||||
public:
|
||||
CvIntelPerCStreamDepth()
|
||||
{
|
||||
}
|
||||
virtual ~CvIntelPerCStreamDepth()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool initStream(PXCSession *session)
|
||||
{
|
||||
if (!initDevice(session))
|
||||
return false;
|
||||
initStreamImpl(PXCImage::IMAGE_TYPE_DEPTH);
|
||||
if (!m_stream.IsValid())
|
||||
return false;
|
||||
enumProfiles();
|
||||
return true;
|
||||
}
|
||||
virtual double getProperty(int propIdx)
|
||||
{
|
||||
switch (propIdx)
|
||||
{
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_LOW_CONFIDENCE_VALUE, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_SATURATION_VALUE, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_CONFIDENCE_THRESHOLD, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0f;
|
||||
PXCPointF32 ptf;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryPropertyAsPoint(PXCCapture::Device::PROPERTY_DEPTH_FOCAL_LENGTH, &ptf))
|
||||
return (double)ptf.x;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0f;
|
||||
PXCPointF32 ptf;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryPropertyAsPoint(PXCCapture::Device::PROPERTY_DEPTH_FOCAL_LENGTH, &ptf))
|
||||
return (double)ptf.y;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
//Add depth stream sepcific properties
|
||||
}
|
||||
return CvIntelPerCStreamBase::getProperty(propIdx);
|
||||
}
|
||||
virtual bool setProperty(int propIdx, double propVal)
|
||||
{
|
||||
switch (propIdx)
|
||||
{
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_LOW_CONFIDENCE_VALUE, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_SATURATION_VALUE, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_CONFIDENCE_THRESHOLD, (float)propVal));
|
||||
}
|
||||
break;
|
||||
//Add depth stream sepcific properties
|
||||
}
|
||||
return CvIntelPerCStreamBase::setProperty(propIdx, propVal);
|
||||
}
|
||||
public:
|
||||
IplImage* retrieveDepthFrame()
|
||||
{
|
||||
return m_frameDepth.retrieveFrame();
|
||||
}
|
||||
IplImage* retrieveIRFrame()
|
||||
{
|
||||
return m_frameIR.retrieveFrame();
|
||||
}
|
||||
IplImage* retrieveUVFrame()
|
||||
{
|
||||
return m_frameUV.retrieveFrame();
|
||||
}
|
||||
protected:
|
||||
virtual bool validProfile(const PXCCapture::VideoStream::ProfileInfo& pinfo)
|
||||
{
|
||||
return (PXCImage::COLOR_FORMAT_DEPTH == pinfo.imageInfo.format);
|
||||
}
|
||||
protected:
|
||||
FrameInternal m_frameDepth;
|
||||
FrameInternal m_frameIR;
|
||||
FrameInternal m_frameUV;
|
||||
|
||||
bool prepareIplImage(PXCImage *pxcImage)
|
||||
{
|
||||
if (NULL == pxcImage)
|
||||
return false;
|
||||
PXCImage::ImageInfo info;
|
||||
pxcImage->QueryInfo(&info);
|
||||
|
||||
PXCImage::ImageData data;
|
||||
pxcImage->AcquireAccess(PXCImage::ACCESS_READ, &data);
|
||||
|
||||
if (PXCImage::SURFACE_TYPE_SYSTEM_MEMORY != data.type)
|
||||
return false;
|
||||
|
||||
if (PXCImage::COLOR_FORMAT_DEPTH != data.format)
|
||||
return false;
|
||||
|
||||
{
|
||||
cv::Mat temp(info.height, info.width, CV_16SC1, data.planes[0], data.pitches[0]);
|
||||
temp.copyTo(m_frameDepth.m_mat);
|
||||
}
|
||||
{
|
||||
cv::Mat temp(info.height, info.width, CV_16SC1, data.planes[1], data.pitches[1]);
|
||||
temp.copyTo(m_frameIR.m_mat);
|
||||
}
|
||||
{
|
||||
cv::Mat temp(info.height, info.width, CV_32FC2, data.planes[2], data.pitches[2]);
|
||||
temp.copyTo(m_frameUV.m_mat);
|
||||
}
|
||||
|
||||
pxcImage->ReleaseAccess(&data);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CvCapture_IntelPerC : public CvCapture
|
||||
{
|
||||
public:
|
||||
CvCapture_IntelPerC(int /*index*/)
|
||||
: m_contextOpened(false)
|
||||
{
|
||||
pxcStatus sts = PXCSession_Create(&m_session);
|
||||
if (PXC_STATUS_NO_ERROR > sts)
|
||||
return;
|
||||
m_contextOpened = m_imageStream.initStream(m_session);
|
||||
m_contextOpened &= m_depthStream.initStream(m_session);
|
||||
}
|
||||
virtual ~CvCapture_IntelPerC(){}
|
||||
|
||||
virtual double getProperty(int propIdx)
|
||||
{
|
||||
double propValue = 0;
|
||||
int purePropIdx = propIdx & ~CV_CAP_INTELPERC_GENERATORS_MASK;
|
||||
if (CV_CAP_INTELPERC_IMAGE_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
|
||||
{
|
||||
propValue = m_imageStream.getProperty(purePropIdx);
|
||||
}
|
||||
else if (CV_CAP_INTELPERC_DEPTH_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
|
||||
{
|
||||
propValue = m_depthStream.getProperty(purePropIdx);
|
||||
}
|
||||
else
|
||||
{
|
||||
propValue = m_depthStream.getProperty(purePropIdx);
|
||||
}
|
||||
return propValue;
|
||||
}
|
||||
virtual bool setProperty(int propIdx, double propVal)
|
||||
{
|
||||
bool isSet = false;
|
||||
int purePropIdx = propIdx & ~CV_CAP_INTELPERC_GENERATORS_MASK;
|
||||
if (CV_CAP_INTELPERC_IMAGE_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
|
||||
{
|
||||
isSet = m_imageStream.setProperty(purePropIdx, propVal);
|
||||
}
|
||||
else if (CV_CAP_INTELPERC_DEPTH_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
|
||||
{
|
||||
isSet = m_depthStream.setProperty(purePropIdx, propVal);
|
||||
}
|
||||
else
|
||||
{
|
||||
isSet = m_depthStream.setProperty(purePropIdx, propVal);
|
||||
}
|
||||
return isSet;
|
||||
}
|
||||
|
||||
bool grabFrame()
|
||||
{
|
||||
if (!isOpened())
|
||||
return false;
|
||||
|
||||
bool isGrabbed = false;
|
||||
if (m_depthStream.isValid())
|
||||
isGrabbed = m_depthStream.grabFrame();
|
||||
if ((m_imageStream.isValid()) && (-1 != m_imageStream.getProfileIDX()))
|
||||
isGrabbed &= m_imageStream.grabFrame();
|
||||
|
||||
return isGrabbed;
|
||||
}
|
||||
|
||||
virtual IplImage* retrieveFrame(int outputType)
|
||||
{
|
||||
IplImage* image = 0;
|
||||
switch (outputType)
|
||||
{
|
||||
case CV_CAP_INTELPERC_DEPTH_MAP:
|
||||
image = m_depthStream.retrieveDepthFrame();
|
||||
break;
|
||||
case CV_CAP_INTELPERC_UVDEPTH_MAP:
|
||||
image = m_depthStream.retrieveUVFrame();
|
||||
break;
|
||||
case CV_CAP_INTELPERC_IR_MAP:
|
||||
image = m_depthStream.retrieveIRFrame();
|
||||
break;
|
||||
case CV_CAP_INTELPERC_IMAGE:
|
||||
image = m_imageStream.retrieveFrame();
|
||||
break;
|
||||
}
|
||||
CV_Assert(NULL != image);
|
||||
return image;
|
||||
}
|
||||
|
||||
bool isOpened() const
|
||||
{
|
||||
return m_contextOpened;
|
||||
}
|
||||
protected:
|
||||
bool m_contextOpened;
|
||||
|
||||
PXCSmartPtr<PXCSession> m_session;
|
||||
CvIntelPerCStreamImage m_imageStream;
|
||||
CvIntelPerCStreamDepth m_depthStream;
|
||||
};
|
||||
|
||||
|
||||
CvCapture* cvCreateCameraCapture_IntelPerC(int index)
|
||||
{
|
||||
CvCapture_IntelPerC* capture = new CvCapture_IntelPerC(index);
|
||||
|
||||
if( capture->isOpened() )
|
||||
return capture;
|
||||
|
||||
delete capture;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif //HAVE_INTELPERC
|
@ -128,6 +128,7 @@ CvCapture* cvCreateFileCapture_OpenNI( const char* filename );
|
||||
CvCapture* cvCreateCameraCapture_Android( int index );
|
||||
CvCapture* cvCreateCameraCapture_XIMEA( int index );
|
||||
CvCapture* cvCreateCameraCapture_AVFoundation(int index);
|
||||
CvCapture* cvCreateCameraCapture_IntelPerC(int index);
|
||||
|
||||
|
||||
CVAPI(int) cvHaveImageReader(const char* filename);
|
||||
|
@ -35,6 +35,7 @@
|
||||
defined(HAVE_XIMEA) || \
|
||||
defined(HAVE_AVFOUNDATION) || \
|
||||
defined(HAVE_GIGE_API) || \
|
||||
defined(HAVE_INTELPERC) || \
|
||||
(0)
|
||||
//defined(HAVE_ANDROID_NATIVE_CAMERA) || - enable after #1193
|
||||
# define BUILD_WITH_CAMERA_SUPPORT 1
|
||||
|
@ -3299,7 +3299,10 @@ public:
|
||||
if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) )
|
||||
{
|
||||
bufxy = (*m1)(Rect(x, y, bcols, brows));
|
||||
bufa = (*m2)(Rect(x, y, bcols, brows));
|
||||
|
||||
const ushort* sA = (const ushort*)(m2->data + m2->step*(y+y1)) + x;
|
||||
for( x1 = 0; x1 < bcols; x1++ )
|
||||
A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2-1));
|
||||
}
|
||||
else if( planar_input )
|
||||
{
|
||||
@ -3680,7 +3683,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
|
||||
{
|
||||
for( x = 0; x < size.width; x++ )
|
||||
{
|
||||
int fxy = src2 ? src2[x] : 0;
|
||||
int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1) : 0;
|
||||
dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
|
||||
dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
|
||||
}
|
||||
@ -3689,7 +3692,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
|
||||
{
|
||||
for( x = 0; x < size.width; x++ )
|
||||
{
|
||||
int fxy = src2 ? src2[x] : 0;
|
||||
int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1): 0;
|
||||
dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
|
||||
dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
|
||||
}
|
||||
|
@ -18,6 +18,8 @@ class_ignore_list = (
|
||||
const_ignore_list = (
|
||||
"CV_CAP_OPENNI",
|
||||
"CV_CAP_PROP_OPENNI_",
|
||||
"CV_CAP_INTELPERC",
|
||||
"CV_CAP_PROP_INTELPERC_"
|
||||
"WINDOW_AUTOSIZE",
|
||||
"CV_WND_PROP_",
|
||||
"CV_WINDOW_",
|
||||
|
@ -12,6 +12,7 @@
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2013, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
@ -66,8 +67,8 @@ uint read_sumTex(IMAGE_INT32 img, sampler_t sam, int2 coord, int rows, int cols,
|
||||
uchar read_imgTex(IMAGE_INT8 img, sampler_t sam, float2 coord, int rows, int cols, int elemPerRow)
|
||||
{
|
||||
#ifdef DISABLE_IMAGE2D
|
||||
int x = clamp(convert_int_rte(coord.x), 0, cols - 1);
|
||||
int y = clamp(convert_int_rte(coord.y), 0, rows - 1);
|
||||
int x = clamp(round(coord.x), 0, cols - 1);
|
||||
int y = clamp(round(coord.y), 0, rows - 1);
|
||||
return img[elemPerRow * y + x];
|
||||
#else
|
||||
return (uchar)read_imageui(img, sam, coord).x;
|
||||
@ -98,6 +99,7 @@ __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAM
|
||||
#define CV_PI_F 3.14159265f
|
||||
#endif
|
||||
|
||||
|
||||
// Use integral image to calculate haar wavelets.
|
||||
// N = 2
|
||||
// for simple haar paatern
|
||||
@ -114,10 +116,10 @@ float icvCalcHaarPatternSum_2(
|
||||
|
||||
F d = 0;
|
||||
|
||||
int2 dx1 = convert_int2_rte(ratio * src[0]);
|
||||
int2 dy1 = convert_int2_rte(ratio * src[1]);
|
||||
int2 dx2 = convert_int2_rte(ratio * src[2]);
|
||||
int2 dy2 = convert_int2_rte(ratio * src[3]);
|
||||
int2 dx1 = convert_int2(round(ratio * src[0]));
|
||||
int2 dy1 = convert_int2(round(ratio * src[1]));
|
||||
int2 dx2 = convert_int2(round(ratio * src[2]));
|
||||
int2 dy2 = convert_int2(round(ratio * src[3]));
|
||||
|
||||
F t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
|
||||
@ -136,106 +138,9 @@ float icvCalcHaarPatternSum_2(
|
||||
return (float)d;
|
||||
}
|
||||
|
||||
// N = 3
|
||||
float icvCalcHaarPatternSum_3(
|
||||
IMAGE_INT32 sumTex,
|
||||
__constant float4 *src,
|
||||
int oldSize,
|
||||
int newSize,
|
||||
int y, int x,
|
||||
int rows, int cols, int elemPerRow)
|
||||
{
|
||||
|
||||
float ratio = (float)newSize / oldSize;
|
||||
|
||||
F d = 0;
|
||||
|
||||
int4 dx1 = convert_int4_rte(ratio * src[0]);
|
||||
int4 dy1 = convert_int4_rte(ratio * src[1]);
|
||||
int4 dx2 = convert_int4_rte(ratio * src[2]);
|
||||
int4 dy2 = convert_int4_rte(ratio * src[3]);
|
||||
|
||||
F t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy2.x), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy1.x), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy2.x), rows, cols, elemPerRow );
|
||||
d += t * src[4].x / ((dx2.x - dx1.x) * (dy2.x - dy1.x));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy1.y), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy2.y), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy1.y), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy2.y), rows, cols, elemPerRow );
|
||||
d += t * src[4].y / ((dx2.y - dx1.y) * (dy2.y - dy1.y));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy1.z), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy2.z), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy1.z), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy2.z), rows, cols, elemPerRow );
|
||||
d += t * src[4].z / ((dx2.z - dx1.z) * (dy2.z - dy1.z));
|
||||
|
||||
return (float)d;
|
||||
}
|
||||
|
||||
// N = 4
|
||||
float icvCalcHaarPatternSum_4(
|
||||
IMAGE_INT32 sumTex,
|
||||
__constant float4 *src,
|
||||
int oldSize,
|
||||
int newSize,
|
||||
int y, int x,
|
||||
int rows, int cols, int elemPerRow)
|
||||
{
|
||||
|
||||
float ratio = (float)newSize / oldSize;
|
||||
|
||||
F d = 0;
|
||||
|
||||
int4 dx1 = convert_int4_rte(ratio * src[0]);
|
||||
int4 dy1 = convert_int4_rte(ratio * src[1]);
|
||||
int4 dx2 = convert_int4_rte(ratio * src[2]);
|
||||
int4 dy2 = convert_int4_rte(ratio * src[3]);
|
||||
|
||||
F t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy2.x), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy1.x), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy2.x), rows, cols, elemPerRow );
|
||||
d += t * src[4].x / ((dx2.x - dx1.x) * (dy2.x - dy1.x));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy1.y), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy2.y), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy1.y), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy2.y), rows, cols, elemPerRow );
|
||||
d += t * src[4].y / ((dx2.y - dx1.y) * (dy2.y - dy1.y));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy1.z), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy2.z), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy1.z), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy2.z), rows, cols, elemPerRow );
|
||||
d += t * src[4].z / ((dx2.z - dx1.z) * (dy2.z - dy1.z));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.w, y + dy1.w), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.w, y + dy2.w), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.w, y + dy1.w), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.w, y + dy2.w), rows, cols, elemPerRow );
|
||||
d += t * src[4].w / ((dx2.w - dx1.w) * (dy2.w - dy1.w));
|
||||
|
||||
return (float)d;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Hessian
|
||||
|
||||
__constant float4 c_DX[5] = { (float4)(0, 3, 6, 0), (float4)(2, 2, 2, 0), (float4)(3, 6, 9, 0), (float4)(7, 7, 7, 0), (float4)(1, -2, 1, 0) };
|
||||
__constant float4 c_DY[5] = { (float4)(2, 2, 2, 0), (float4)(0, 3, 6, 0), (float4)(7, 7, 7, 0), (float4)(3, 6, 9, 0), (float4)(1, -2, 1, 0) };
|
||||
__constant float4 c_DXY[5] = { (float4)(1, 5, 1, 5), (float4)(1, 1, 5, 5), (float4)(4, 8, 4, 8), (float4)(4, 4, 8, 8), (float4)(1, -1, -1, 1) };// Use integral image to calculate haar wavelets.
|
||||
|
||||
__inline int calcSize(int octave, int layer)
|
||||
{
|
||||
/* Wavelet size at first layer of first octave. */
|
||||
@ -250,6 +155,24 @@ __inline int calcSize(int octave, int layer)
|
||||
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
|
||||
}
|
||||
|
||||
// Calculate a derivative in an axis-aligned direction (x or y). The "plus1"
|
||||
// boxes contribute 1 * (area), and the "minus2" box contributes -2 * (area).
|
||||
// So the final computation is plus1a + plus1b - 2 * minus2. The corners are
|
||||
// labeled A, B, C, and D, with A being the top left, B being top right, C
|
||||
// being bottom left, and D being bottom right.
|
||||
F calcAxisAlignedDerivative(
|
||||
int plus1a_A, int plus1a_B, int plus1a_C, int plus1a_D, F plus1a_scale,
|
||||
int plus1b_A, int plus1b_B, int plus1b_C, int plus1b_D, F plus1b_scale,
|
||||
int minus2_A, int minus2_B, int minus2_C, int minus2_D, F minus2_scale)
|
||||
{
|
||||
F plus1a = plus1a_A - plus1a_B - plus1a_C + plus1a_D;
|
||||
F plus1b = plus1b_A - plus1b_B - plus1b_C + plus1b_D;
|
||||
F minus2 = minus2_A - minus2_B - minus2_C + minus2_D;
|
||||
|
||||
return (plus1a / plus1a_scale -
|
||||
2.0f * minus2 / minus2_scale +
|
||||
plus1b / plus1b_scale);
|
||||
}
|
||||
|
||||
//calculate targeted layer per-pixel determinant and trace with an integral image
|
||||
__kernel void icvCalcLayerDetAndTrace(
|
||||
@ -264,7 +187,7 @@ __kernel void icvCalcLayerDetAndTrace(
|
||||
int c_octave,
|
||||
int c_layer_rows,
|
||||
int sumTex_step
|
||||
)
|
||||
)
|
||||
{
|
||||
det_step /= sizeof(*det);
|
||||
trace_step /= sizeof(*trace);
|
||||
@ -288,16 +211,103 @@ __kernel void icvCalcLayerDetAndTrace(
|
||||
|
||||
if (size <= c_img_rows && size <= c_img_cols && i < samples_i && j < samples_j)
|
||||
{
|
||||
const float dx = icvCalcHaarPatternSum_3(sumTex, c_DX , 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
|
||||
const float dy = icvCalcHaarPatternSum_3(sumTex, c_DY , 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
|
||||
const float dxy = icvCalcHaarPatternSum_4(sumTex, c_DXY, 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
|
||||
int x = j << c_octave;
|
||||
int y = i << c_octave;
|
||||
|
||||
float ratio = (float)size / 9;
|
||||
|
||||
// Precompute some commonly used values, which are used to offset
|
||||
// texture coordinates in the integral image.
|
||||
int r1 = round(ratio);
|
||||
int r2 = round(ratio * 2.0f);
|
||||
int r3 = round(ratio * 3.0f);
|
||||
int r4 = round(ratio * 4.0f);
|
||||
int r5 = round(ratio * 5.0f);
|
||||
int r6 = round(ratio * 6.0f);
|
||||
int r7 = round(ratio * 7.0f);
|
||||
int r8 = round(ratio * 8.0f);
|
||||
int r9 = round(ratio * 9.0f);
|
||||
|
||||
// Calculate the approximated derivative in the x-direction
|
||||
F d = 0;
|
||||
{
|
||||
// Some of the pixels needed to compute the derivative are
|
||||
// repeated, so we only don't duplicate the fetch here.
|
||||
int t02 = read_sumTex( sumTex, sampler, (int2)(x, y + r2), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t07 = read_sumTex( sumTex, sampler, (int2)(x, y + r7), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t32 = read_sumTex( sumTex, sampler, (int2)(x + r3, y + r2), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t37 = read_sumTex( sumTex, sampler, (int2)(x + r3, y + r7), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t62 = read_sumTex( sumTex, sampler, (int2)(x + r6, y + r2), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t67 = read_sumTex( sumTex, sampler, (int2)(x + r6, y + r7), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t92 = read_sumTex( sumTex, sampler, (int2)(x + r9, y + r2), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t97 = read_sumTex( sumTex, sampler, (int2)(x + r9, y + r7), c_img_rows, c_img_cols, sumTex_step );
|
||||
|
||||
d = calcAxisAlignedDerivative(t02, t07, t32, t37, (r3) * (r7 - r2),
|
||||
t62, t67, t92, t97, (r9 - r6) * (r7 - r2),
|
||||
t32, t37, t62, t67, (r6 - r3) * (r7 - r2));
|
||||
}
|
||||
const float dx = (float)d;
|
||||
|
||||
// Calculate the approximated derivative in the y-direction
|
||||
d = 0;
|
||||
{
|
||||
// Some of the pixels needed to compute the derivative are
|
||||
// repeated, so we only don't duplicate the fetch here.
|
||||
int t20 = read_sumTex( sumTex, sampler, (int2)(x + r2, y), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t23 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r3), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t70 = read_sumTex( sumTex, sampler, (int2)(x + r7, y), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t73 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r3), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t26 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r6), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t76 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r6), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t29 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r9), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t79 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r9), c_img_rows, c_img_cols, sumTex_step );
|
||||
|
||||
d = calcAxisAlignedDerivative(t20, t23, t70, t73, (r7 - r2) * (r3),
|
||||
t26, t29, t76, t79, (r7 - r2) * (r9 - r6),
|
||||
t23, t26, t73, t76, (r7 - r2) * (r6 - r3));
|
||||
}
|
||||
const float dy = (float)d;
|
||||
|
||||
// Calculate the approximated derivative in the xy-direction
|
||||
d = 0;
|
||||
{
|
||||
// There's no saving us here, we just have to get all of the pixels in
|
||||
// separate fetches
|
||||
F t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r1, y + r1), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r1, y + r4), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r4, y + r1), c_img_rows, c_img_cols, sumTex_step );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r4, y + r4), c_img_rows, c_img_cols, sumTex_step );
|
||||
d += t / ((r4 - r1) * (r4 - r1));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r5, y + r1), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r5, y + r4), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r8, y + r1), c_img_rows, c_img_cols, sumTex_step );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r8, y + r4), c_img_rows, c_img_cols, sumTex_step );
|
||||
d -= t / ((r8 - r5) * (r4 - r1));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r1, y + r5), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r1, y + r8), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r4, y + r5), c_img_rows, c_img_cols, sumTex_step );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r4, y + r8), c_img_rows, c_img_cols, sumTex_step );
|
||||
d -= t / ((r4 - r1) * (r8 - r5));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r5, y + r5), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r5, y + r8), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r8, y + r5), c_img_rows, c_img_cols, sumTex_step );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r8, y + r8), c_img_rows, c_img_cols, sumTex_step );
|
||||
d += t / ((r8 - r5) * (r8 - r5));
|
||||
}
|
||||
const float dxy = (float)d;
|
||||
|
||||
det [j + margin + det_step * (layer * c_layer_rows + i + margin)] = dx * dy - 0.81f * dxy * dxy;
|
||||
trace[j + margin + trace_step * (layer * c_layer_rows + i + margin)] = dx + dy;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// NONMAX
|
||||
|
||||
@ -309,10 +319,10 @@ bool within_check(IMAGE_INT32 maskSumTex, int sum_i, int sum_j, int size, int ro
|
||||
|
||||
float d = 0;
|
||||
|
||||
int dx1 = convert_int_rte(ratio * c_DM[0]);
|
||||
int dy1 = convert_int_rte(ratio * c_DM[1]);
|
||||
int dx2 = convert_int_rte(ratio * c_DM[2]);
|
||||
int dy2 = convert_int_rte(ratio * c_DM[3]);
|
||||
int dx1 = round(ratio * c_DM[0]);
|
||||
int dy1 = round(ratio * c_DM[1]);
|
||||
int dx2 = round(ratio * c_DM[2]);
|
||||
int dy2 = round(ratio * c_DM[3]);
|
||||
|
||||
float t = 0;
|
||||
|
||||
@ -572,7 +582,7 @@ void icvFindMaximaInLayer(
|
||||
}
|
||||
|
||||
// solve 3x3 linear system Ax=b for floating point input
|
||||
inline bool solve3x3_float(volatile __local const float4 *A, volatile __local const float *b, volatile __local float *x)
|
||||
inline bool solve3x3_float(const float4 *A, const float *b, float *x)
|
||||
{
|
||||
float det = A[0].x * (A[1].y * A[2].z - A[1].z * A[2].y)
|
||||
- A[0].y * (A[1].x * A[2].z - A[1].z * A[2].x)
|
||||
@ -651,7 +661,7 @@ void icvInterpolateKeypoint(
|
||||
|
||||
if (get_local_id(0) == 0 && get_local_id(1) == 0 && get_local_id(2) == 0)
|
||||
{
|
||||
volatile __local float dD[3];
|
||||
float dD[3];
|
||||
|
||||
//dx
|
||||
dD[0] = -0.5f * (N9[1][1][2] - N9[1][1][0]);
|
||||
@ -660,7 +670,7 @@ void icvInterpolateKeypoint(
|
||||
//ds
|
||||
dD[2] = -0.5f * (N9[2][1][1] - N9[0][1][1]);
|
||||
|
||||
volatile __local float4 H[3];
|
||||
float4 H[3];
|
||||
|
||||
//dxx
|
||||
H[0].x = N9[1][1][0] - 2.0f * N9[1][1][1] + N9[1][1][2];
|
||||
@ -681,7 +691,7 @@ void icvInterpolateKeypoint(
|
||||
//dss
|
||||
H[2].z = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1];
|
||||
|
||||
volatile __local float x[3];
|
||||
float x[3];
|
||||
|
||||
if (solve3x3_float(H, dD, x))
|
||||
{
|
||||
@ -711,7 +721,7 @@ void icvInterpolateKeypoint(
|
||||
sampled in a circle of radius 6s using wavelets of size 4s.
|
||||
We ensure the gradient wavelet size is even to ensure the
|
||||
wavelet pattern is balanced and symmetric around its center */
|
||||
const int grad_wav_size = 2 * convert_int_rte(2.0f * s);
|
||||
const int grad_wav_size = 2 * round(2.0f * s);
|
||||
|
||||
// check when grad_wav_size is too big
|
||||
if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
|
||||
@ -737,9 +747,12 @@ void icvInterpolateKeypoint(
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Orientation
|
||||
|
||||
#define ORI_SEARCH_INC 5
|
||||
#define ORI_WIN 60
|
||||
#define ORI_SAMPLES 113
|
||||
#define ORI_WIN 60
|
||||
#define ORI_SAMPLES 113
|
||||
|
||||
// The distance between samples in the beginning of the the reduction
|
||||
#define ORI_RESPONSE_REDUCTION_WIDTH 48
|
||||
#define ORI_RESPONSE_ARRAY_SIZE (ORI_RESPONSE_REDUCTION_WIDTH * 2)
|
||||
|
||||
__constant float c_aptX[ORI_SAMPLES] = {-6, -5, -5, -5, -5, -5, -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6};
|
||||
__constant float c_aptY[ORI_SAMPLES] = {0, -3, -2, -1, 0, 1, 2, 3, -4, -3, -2, -1, 0, 1, 2, 3, 4, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, -3, -2, -1, 0, 1, 2, 3, 0};
|
||||
@ -833,12 +846,15 @@ void icvCalcOrientation(
|
||||
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
|
||||
|
||||
|
||||
volatile __local float s_X[128];
|
||||
volatile __local float s_Y[128];
|
||||
volatile __local float s_angle[128];
|
||||
__local float s_X[ORI_SAMPLES];
|
||||
__local float s_Y[ORI_SAMPLES];
|
||||
__local float s_angle[ORI_SAMPLES];
|
||||
|
||||
volatile __local float s_sumx[32 * 4];
|
||||
volatile __local float s_sumy[32 * 4];
|
||||
// Need to allocate enough to make the reduction work without accessing
|
||||
// past the end of the array.
|
||||
__local float s_sumx[ORI_RESPONSE_ARRAY_SIZE];
|
||||
__local float s_sumy[ORI_RESPONSE_ARRAY_SIZE];
|
||||
__local float s_mod[ORI_RESPONSE_ARRAY_SIZE];
|
||||
|
||||
/* The sampling intervals and wavelet sized for selecting an orientation
|
||||
and building the keypoint descriptor are defined relative to 's' */
|
||||
@ -849,28 +865,60 @@ void icvCalcOrientation(
|
||||
sampled in a circle of radius 6s using wavelets of size 4s.
|
||||
We ensure the gradient wavelet size is even to ensure the
|
||||
wavelet pattern is balanced and symmetric around its center */
|
||||
const int grad_wav_size = 2 * convert_int_rte(2.0f * s);
|
||||
const int grad_wav_size = 2 * round(2.0f * s);
|
||||
|
||||
// check when grad_wav_size is too big
|
||||
if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size)
|
||||
return;
|
||||
|
||||
// Calc X, Y, angle and store it to shared memory
|
||||
const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
|
||||
const int tid = get_local_id(0);
|
||||
// Initialize values that are only used as part of the reduction later.
|
||||
if (tid < ORI_RESPONSE_ARRAY_SIZE - ORI_LOCAL_SIZE) {
|
||||
s_mod[tid + ORI_LOCAL_SIZE] = 0.0f;
|
||||
}
|
||||
|
||||
float X = 0.0f, Y = 0.0f, angle = 0.0f;
|
||||
float ratio = (float)grad_wav_size / 4;
|
||||
|
||||
if (tid < ORI_SAMPLES)
|
||||
int r2 = round(ratio * 2.0);
|
||||
int r4 = round(ratio * 4.0);
|
||||
for (int i = tid; i < ORI_SAMPLES; i += ORI_LOCAL_SIZE )
|
||||
{
|
||||
float X = 0.0f, Y = 0.0f, angle = 0.0f;
|
||||
const float margin = (float)(grad_wav_size - 1) / 2.0f;
|
||||
const int x = convert_int_rte(featureX[get_group_id(0)] + c_aptX[tid] * s - margin);
|
||||
const int y = convert_int_rte(featureY[get_group_id(0)] + c_aptY[tid] * s - margin);
|
||||
const int x = round(featureX[get_group_id(0)] + c_aptX[i] * s - margin);
|
||||
const int y = round(featureY[get_group_id(0)] + c_aptY[i] * s - margin);
|
||||
|
||||
if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
|
||||
x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
|
||||
x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
|
||||
{
|
||||
X = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NX, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step);
|
||||
Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step);
|
||||
|
||||
float apt = c_aptW[i];
|
||||
|
||||
// Compute the haar sum without fetching duplicate pixels.
|
||||
float t00 = read_sumTex( sumTex, sampler, (int2)(x, y), c_img_rows, c_img_cols, sum_step);
|
||||
float t02 = read_sumTex( sumTex, sampler, (int2)(x, y + r2), c_img_rows, c_img_cols, sum_step);
|
||||
float t04 = read_sumTex( sumTex, sampler, (int2)(x, y + r4), c_img_rows, c_img_cols, sum_step);
|
||||
float t20 = read_sumTex( sumTex, sampler, (int2)(x + r2, y), c_img_rows, c_img_cols, sum_step);
|
||||
float t24 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r4), c_img_rows, c_img_cols, sum_step);
|
||||
float t40 = read_sumTex( sumTex, sampler, (int2)(x + r4, y), c_img_rows, c_img_cols, sum_step);
|
||||
float t42 = read_sumTex( sumTex, sampler, (int2)(x + r4, y + r2), c_img_rows, c_img_cols, sum_step);
|
||||
float t44 = read_sumTex( sumTex, sampler, (int2)(x + r4, y + r4), c_img_rows, c_img_cols, sum_step);
|
||||
|
||||
F t = t00 - t04 - t20 + t24;
|
||||
X -= t / ((r2) * (r4));
|
||||
|
||||
t = t20 - t24 - t40 + t44;
|
||||
X += t / ((r4 - r2) * (r4));
|
||||
|
||||
t = t00 - t02 - t40 + t42;
|
||||
Y += t / ((r2) * (r4));
|
||||
|
||||
t = t02 - t04 - t42 + t44;
|
||||
Y -= t / ((r4) * (r4 - r2));
|
||||
|
||||
X = apt*X;
|
||||
Y = apt*Y;
|
||||
|
||||
angle = atan2(Y, X);
|
||||
|
||||
@ -879,76 +927,61 @@ void icvCalcOrientation(
|
||||
angle *= 180.0f / CV_PI_F;
|
||||
|
||||
}
|
||||
|
||||
s_X[i] = X;
|
||||
s_Y[i] = Y;
|
||||
s_angle[i] = angle;
|
||||
}
|
||||
s_X[tid] = X;
|
||||
s_Y[tid] = Y;
|
||||
s_angle[tid] = angle;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float bestx = 0, besty = 0, best_mod = 0;
|
||||
float sumx = 0.0f, sumy = 0.0f;
|
||||
const int dir = tid * ORI_SEARCH_INC;
|
||||
#pragma unroll
|
||||
for (int i = 0; i < ORI_SAMPLES; ++i) {
|
||||
int angle = round(s_angle[i]);
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 18; ++i)
|
||||
{
|
||||
const int dir = (i * 4 + get_local_id(1)) * ORI_SEARCH_INC;
|
||||
int d = abs(angle - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[i];
|
||||
sumy += s_Y[i];
|
||||
}
|
||||
}
|
||||
s_sumx[tid] = sumx;
|
||||
s_sumy[tid] = sumy;
|
||||
s_mod[tid] = sumx*sumx + sumy*sumy;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
volatile float sumx = 0.0f, sumy = 0.0f;
|
||||
int d = abs(convert_int_rte(s_angle[get_local_id(0)]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx = s_X[get_local_id(0)];
|
||||
sumy = s_Y[get_local_id(0)];
|
||||
}
|
||||
d = abs(convert_int_rte(s_angle[get_local_id(0) + 32]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[get_local_id(0) + 32];
|
||||
sumy += s_Y[get_local_id(0) + 32];
|
||||
}
|
||||
d = abs(convert_int_rte(s_angle[get_local_id(0) + 64]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[get_local_id(0) + 64];
|
||||
sumy += s_Y[get_local_id(0) + 64];
|
||||
}
|
||||
d = abs(convert_int_rte(s_angle[get_local_id(0) + 96]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[get_local_id(0) + 96];
|
||||
sumy += s_Y[get_local_id(0) + 96];
|
||||
}
|
||||
reduce_32_sum(s_sumx + get_local_id(1) * 32, &sumx, get_local_id(0));
|
||||
reduce_32_sum(s_sumy + get_local_id(1) * 32, &sumy, get_local_id(0));
|
||||
|
||||
const float temp_mod = sumx * sumx + sumy * sumy;
|
||||
if (temp_mod > best_mod)
|
||||
{
|
||||
best_mod = temp_mod;
|
||||
bestx = sumx;
|
||||
besty = sumy;
|
||||
// This reduction searches for the longest wavelet response vector. The first
|
||||
// step uses all of the work items in the workgroup to narrow the search
|
||||
// down to the three candidates. It requires s_mod to have a few more
|
||||
// elements alocated past the work-group size, which are pre-initialized to
|
||||
// 0.0f above.
|
||||
for(int t = ORI_RESPONSE_REDUCTION_WIDTH; t >= 3; t /= 2) {
|
||||
if (tid < t) {
|
||||
if (s_mod[tid] < s_mod[tid + t]) {
|
||||
s_mod[tid] = s_mod[tid + t];
|
||||
s_sumx[tid] = s_sumx[tid + t];
|
||||
s_sumy[tid] = s_sumy[tid + t];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
if (get_local_id(0) == 0)
|
||||
{
|
||||
s_X[get_local_id(1)] = bestx;
|
||||
s_Y[get_local_id(1)] = besty;
|
||||
s_angle[get_local_id(1)] = best_mod;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (get_local_id(1) == 0 && get_local_id(0) == 0)
|
||||
// Do the final reduction and write out the result.
|
||||
if (tid == 0)
|
||||
{
|
||||
int bestIdx = 0;
|
||||
|
||||
if (s_angle[1] > s_angle[bestIdx])
|
||||
// The loop above narrowed the search of the longest vector to three
|
||||
// possibilities. Pick the best here.
|
||||
if (s_mod[1] > s_mod[bestIdx])
|
||||
bestIdx = 1;
|
||||
if (s_angle[2] > s_angle[bestIdx])
|
||||
if (s_mod[2] > s_mod[bestIdx])
|
||||
bestIdx = 2;
|
||||
if (s_angle[3] > s_angle[bestIdx])
|
||||
bestIdx = 3;
|
||||
|
||||
float kp_dir = atan2(s_Y[bestIdx], s_X[bestIdx]);
|
||||
float kp_dir = atan2(s_sumy[bestIdx], s_sumx[bestIdx]);
|
||||
if (kp_dir < 0)
|
||||
kp_dir += 2.0f * CV_PI_F;
|
||||
kp_dir *= 180.0f / CV_PI_F;
|
||||
@ -961,7 +994,6 @@ void icvCalcOrientation(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel
|
||||
void icvSetUpright(
|
||||
__global float * keypoints,
|
||||
@ -1035,8 +1067,8 @@ inline float linearFilter(
|
||||
|
||||
float out = 0.0f;
|
||||
|
||||
const int x1 = convert_int_rtn(x);
|
||||
const int y1 = convert_int_rtn(y);
|
||||
const int x1 = round(x);
|
||||
const int y1 = round(y);
|
||||
const int x2 = x1 + 1;
|
||||
const int y2 = y1 + 1;
|
||||
|
||||
|
@ -46,6 +46,7 @@
|
||||
|
||||
#ifdef HAVE_OPENCV_OCL
|
||||
#include <cstdio>
|
||||
#include <sstream>
|
||||
#include "opencl_kernels.hpp"
|
||||
|
||||
using namespace cv;
|
||||
@ -57,18 +58,25 @@ namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
// The number of degrees between orientation samples in calcOrientation
|
||||
const static int ORI_SEARCH_INC = 5;
|
||||
// The local size of the calcOrientation kernel
|
||||
const static int ORI_LOCAL_SIZE = (360 / ORI_SEARCH_INC);
|
||||
|
||||
static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
|
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
|
||||
{
|
||||
char optBuf [100] = {0};
|
||||
char * optBufPtr = optBuf;
|
||||
std::stringstream optsStr;
|
||||
optsStr << "-D ORI_LOCAL_SIZE=" << ORI_LOCAL_SIZE << " ";
|
||||
optsStr << "-D ORI_SEARCH_INC=" << ORI_SEARCH_INC << " ";
|
||||
cl_kernel kernel;
|
||||
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
|
||||
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optsStr.str().c_str());
|
||||
size_t wave_size = queryWaveFrontSize(kernel);
|
||||
CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
|
||||
sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
|
||||
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr);
|
||||
optsStr << "-D WAVE_SIZE=" << wave_size;
|
||||
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optsStr.str().c_str());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -601,8 +609,8 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&surf_.sum.step));
|
||||
|
||||
size_t localThreads[3] = {32, 4, 1};
|
||||
size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
|
||||
size_t localThreads[3] = {ORI_LOCAL_SIZE, 1, 1};
|
||||
size_t globalThreads[3] = {nFeatures * localThreads[0], 1, 1};
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
@ -287,7 +287,7 @@ ocl::createSeparableLinearFilter_GPU
|
||||
----------------------------------------
|
||||
Creates a separable linear filter engine.
|
||||
|
||||
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT)
|
||||
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
|
||||
|
||||
:param srcType: Source array type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
|
||||
|
||||
@ -303,6 +303,8 @@ Creates a separable linear filter engine.
|
||||
|
||||
:param bordertype: Pixel extrapolation method.
|
||||
|
||||
:param imgSize: Source image size to choose optimal method for processing.
|
||||
|
||||
.. seealso:: :ocv:func:`ocl::getLinearRowFilter_GPU`, :ocv:func:`ocl::getLinearColumnFilter_GPU`, :ocv:func:`createSeparableLinearFilter`
|
||||
|
||||
|
||||
@ -334,7 +336,7 @@ ocl::createDerivFilter_GPU
|
||||
------------------------------
|
||||
Creates a filter engine for the generalized Sobel operator.
|
||||
|
||||
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT )
|
||||
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
|
||||
|
||||
:param srcType: Source image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
|
||||
|
||||
@ -348,6 +350,8 @@ Creates a filter engine for the generalized Sobel operator.
|
||||
|
||||
:param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
|
||||
|
||||
:param imgSize: Source image size to choose optimal method for processing.
|
||||
|
||||
.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createDerivFilter`
|
||||
|
||||
|
||||
@ -405,7 +409,7 @@ ocl::createGaussianFilter_GPU
|
||||
---------------------------------
|
||||
Creates a Gaussian filter engine.
|
||||
|
||||
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT)
|
||||
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
|
||||
|
||||
:param type: Source and destination image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` are supported.
|
||||
|
||||
@ -417,6 +421,8 @@ Creates a Gaussian filter engine.
|
||||
|
||||
:param bordertype: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
|
||||
|
||||
:param imgSize: Source image size to choose optimal method for processing.
|
||||
|
||||
.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createGaussianFilter`
|
||||
|
||||
ocl::GaussianBlur
|
||||
|
@ -695,17 +695,17 @@ namespace cv
|
||||
|
||||
//! returns the separable linear filter engine
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
|
||||
const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
|
||||
const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
|
||||
|
||||
//! returns the separable filter engine with the specified filters
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
|
||||
const Ptr<BaseColumnFilter_GPU> &columnFilter);
|
||||
|
||||
//! returns the Gaussian filter engine
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
|
||||
|
||||
//! returns filter engine for the generalized Sobel operator
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) );
|
||||
|
||||
//! applies Laplacian operator to the image
|
||||
// supports only ksize = 1 and ksize = 3
|
||||
@ -1439,8 +1439,10 @@ namespace cv
|
||||
oclMat Dx_;
|
||||
oclMat Dy_;
|
||||
oclMat eig_;
|
||||
oclMat eig_minmax_;
|
||||
oclMat minMaxbuf_;
|
||||
oclMat tmpCorners_;
|
||||
oclMat counter_;
|
||||
};
|
||||
|
||||
inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
|
||||
|
@ -56,8 +56,19 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
|
||||
{
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
int pixels_per_work_item = 1;
|
||||
|
||||
String build_options = format("-D DEPTH_%d", src.depth());
|
||||
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
|
||||
{
|
||||
if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
|
||||
pixels_per_work_item = 4;
|
||||
else if (src.cols % 2 == 0)
|
||||
pixels_per_work_item = 2;
|
||||
else
|
||||
pixels_per_work_item = 1;
|
||||
}
|
||||
|
||||
String build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), src.oclchannels(), bidx, pixels_per_work_item);
|
||||
if (!additionalOptions.empty())
|
||||
build_options = build_options + additionalOptions;
|
||||
|
||||
@ -66,7 +77,6 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
@ -77,6 +87,73 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
|
||||
if (!data2.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
|
||||
|
||||
size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
|
||||
#ifdef ANDROID
|
||||
size_t lt[3] = { 16, 10, 1 };
|
||||
#else
|
||||
size_t lt[3] = { 16, 16, 1 };
|
||||
#endif
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void toHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
|
||||
const std::string & additionalOptions = std::string(),
|
||||
const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
|
||||
{
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d", src.depth(), src.oclchannels(), bidx);
|
||||
if (!additionalOptions.empty())
|
||||
build_options += additionalOptions;
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
if (!data1.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data1.data ));
|
||||
if (!data2.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
|
||||
|
||||
size_t gt[3] = { dst.cols, dst.rows, 1 };
|
||||
#ifdef ANDROID
|
||||
size_t lt[3] = { 16, 10, 1 };
|
||||
#else
|
||||
size_t lt[3] = { 16, 16, 1 };
|
||||
#endif
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void fromGray_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
|
||||
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
|
||||
{
|
||||
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
|
||||
if (!additionalOptions.empty())
|
||||
build_options += additionalOptions;
|
||||
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
if (!data.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
|
||||
|
||||
size_t gt[3] = { dst.cols, dst.rows, 1 };
|
||||
#ifdef ANDROID
|
||||
size_t lt[3] = { 16, 10, 1 };
|
||||
@ -89,7 +166,50 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
|
||||
static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
|
||||
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D dcn=%d", src.depth(), dst.channels());
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
int pixels_per_work_item = 1;
|
||||
|
||||
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
|
||||
{
|
||||
if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
|
||||
pixels_per_work_item = 4;
|
||||
else if (src.cols % 2 == 0)
|
||||
pixels_per_work_item = 2;
|
||||
else
|
||||
pixels_per_work_item = 1;
|
||||
}
|
||||
|
||||
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), dst.channels(), bidx, pixels_per_work_item);
|
||||
if (!additionalOptions.empty())
|
||||
build_options += additionalOptions;
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
if (!data.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
|
||||
|
||||
size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
|
||||
#ifdef ANDROID
|
||||
size_t lt[3] = { 16, 10, 1 };
|
||||
#else
|
||||
size_t lt[3] = { 16, 16, 1 };
|
||||
#endif
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void toRGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
|
||||
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
|
||||
if (!additionalOptions.empty())
|
||||
build_options = build_options + additionalOptions;
|
||||
|
||||
@ -101,7 +221,6 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
@ -119,10 +238,13 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
|
||||
static void fromHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
|
||||
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s", src.depth(),
|
||||
dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
|
||||
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
|
||||
if (!additionalOptions.empty())
|
||||
build_options += additionalOptions;
|
||||
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
@ -136,6 +258,36 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
if (!data.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
|
||||
|
||||
size_t gt[3] = { dst.cols, dst.rows, 1 };
|
||||
#ifdef ANDROID
|
||||
size_t lt[3] = { 16, 10, 1 };
|
||||
#else
|
||||
size_t lt[3] = { 16, 16, 1 };
|
||||
#endif
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
|
||||
{
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s",
|
||||
src.depth(), dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
size_t gt[3] = { dst.cols, dst.rows, 1 };
|
||||
#ifdef ANDROID
|
||||
size_t lt[3] = { 16, 10, 1 };
|
||||
@ -147,8 +299,8 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
|
||||
|
||||
static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d",
|
||||
src.depth(), greenbits, dst.channels());
|
||||
String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d -D bidx=%d",
|
||||
src.depth(), greenbits, dst.channels(), bidx);
|
||||
int src_offset = src.offset >> 1, src_step = src.step >> 1;
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step / dst.elemSize1();
|
||||
|
||||
@ -157,7 +309,6 @@ static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
@ -174,8 +325,8 @@ static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
|
||||
|
||||
static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d",
|
||||
src.depth(), greenbits, src.channels());
|
||||
String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d -D bidx=%d",
|
||||
src.depth(), greenbits, src.channels(), bidx);
|
||||
int src_offset = (int)src.offset, src_step = (int)src.step;
|
||||
int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1;
|
||||
|
||||
@ -184,7 +335,6 @@ static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenb
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
@ -272,7 +422,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
|
||||
CV_Assert(scn == 1);
|
||||
dcn = code == COLOR_GRAY2BGRA ? 4 : 3;
|
||||
dst.create(sz, CV_MAKETYPE(depth, dcn));
|
||||
toRGB_caller(src, dst, 0, "Gray2RGB");
|
||||
fromGray_caller(src, dst, 0, "Gray2RGB");
|
||||
break;
|
||||
}
|
||||
case COLOR_BGR2YUV: case COLOR_RGB2YUV:
|
||||
@ -303,7 +453,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
|
||||
|
||||
Size dstSz(sz.width, sz.height * 2 / 3);
|
||||
dst.create(dstSz, CV_MAKETYPE(depth, dcn));
|
||||
toRGB_caller(src, dst, bidx, "YUV2RGBA_NV12");
|
||||
toRGB_NV12_caller(src, dst, bidx, "YUV2RGBA_NV12");
|
||||
break;
|
||||
}
|
||||
case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb:
|
||||
@ -460,11 +610,11 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
fromRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
|
||||
toHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
|
||||
return;
|
||||
}
|
||||
|
||||
fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
|
||||
toHSV_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
|
||||
break;
|
||||
}
|
||||
case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
|
||||
@ -483,7 +633,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
|
||||
dst.create(sz, CV_MAKETYPE(depth, dcn));
|
||||
|
||||
std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB";
|
||||
toRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
|
||||
fromHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
|
||||
break;
|
||||
}
|
||||
case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA:
|
||||
|
@ -741,6 +741,135 @@ void cv::ocl::filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &ke
|
||||
f->apply(src, dst);
|
||||
}
|
||||
|
||||
const int optimizedSepFilterLocalSize = 16;
|
||||
static void sepFilter2D_SinglePass(const oclMat &src, oclMat &dst,
|
||||
const Mat &row_kernel, const Mat &col_kernel, int bordertype = BORDER_DEFAULT)
|
||||
{
|
||||
size_t lt2[3] = {optimizedSepFilterLocalSize, optimizedSepFilterLocalSize, 1};
|
||||
size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
|
||||
|
||||
unsigned int src_pitch = src.step;
|
||||
unsigned int dst_pitch = dst.step;
|
||||
|
||||
int src_offset_x = (src.offset % src.step) / src.elemSize();
|
||||
int src_offset_y = src.offset / src.step;
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholecols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholerows ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
|
||||
|
||||
String option = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d",(int)lt2[0], (int)lt2[1],
|
||||
row_kernel.rows / 2, col_kernel.rows / 2 );
|
||||
|
||||
option += " -D KERNEL_MATRIX_X=";
|
||||
for(int i=0; i<row_kernel.rows; i++)
|
||||
option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &row_kernel.at<float>(i) ) );
|
||||
option += "0x0";
|
||||
|
||||
option += " -D KERNEL_MATRIX_Y=";
|
||||
for(int i=0; i<col_kernel.rows; i++)
|
||||
option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &col_kernel.at<float>(i) ) );
|
||||
option += "0x0";
|
||||
|
||||
switch(src.type())
|
||||
{
|
||||
case CV_8UC1:
|
||||
option += " -D SRCTYPE=uchar -D CONVERT_SRCTYPE=convert_float -D WORKTYPE=float";
|
||||
break;
|
||||
case CV_32FC1:
|
||||
option += " -D SRCTYPE=float -D CONVERT_SRCTYPE= -D WORKTYPE=float";
|
||||
break;
|
||||
case CV_8UC2:
|
||||
option += " -D SRCTYPE=uchar2 -D CONVERT_SRCTYPE=convert_float2 -D WORKTYPE=float2";
|
||||
break;
|
||||
case CV_32FC2:
|
||||
option += " -D SRCTYPE=float2 -D CONVERT_SRCTYPE= -D WORKTYPE=float2";
|
||||
break;
|
||||
case CV_8UC3:
|
||||
option += " -D SRCTYPE=uchar3 -D CONVERT_SRCTYPE=convert_float3 -D WORKTYPE=float3";
|
||||
break;
|
||||
case CV_32FC3:
|
||||
option += " -D SRCTYPE=float3 -D CONVERT_SRCTYPE= -D WORKTYPE=float3";
|
||||
break;
|
||||
case CV_8UC4:
|
||||
option += " -D SRCTYPE=uchar4 -D CONVERT_SRCTYPE=convert_float4 -D WORKTYPE=float4";
|
||||
break;
|
||||
case CV_32FC4:
|
||||
option += " -D SRCTYPE=float4 -D CONVERT_SRCTYPE= -D WORKTYPE=float4";
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
|
||||
break;
|
||||
}
|
||||
switch(dst.type())
|
||||
{
|
||||
case CV_8UC1:
|
||||
option += " -D DSTTYPE=uchar -D CONVERT_DSTTYPE=convert_uchar_sat";
|
||||
break;
|
||||
case CV_8UC2:
|
||||
option += " -D DSTTYPE=uchar2 -D CONVERT_DSTTYPE=convert_uchar2_sat";
|
||||
break;
|
||||
case CV_8UC3:
|
||||
option += " -D DSTTYPE=uchar3 -D CONVERT_DSTTYPE=convert_uchar3_sat";
|
||||
break;
|
||||
case CV_8UC4:
|
||||
option += " -D DSTTYPE=uchar4 -D CONVERT_DSTTYPE=convert_uchar4_sat";
|
||||
break;
|
||||
case CV_32FC1:
|
||||
option += " -D DSTTYPE=float -D CONVERT_DSTTYPE=";
|
||||
break;
|
||||
case CV_32FC2:
|
||||
option += " -D DSTTYPE=float2 -D CONVERT_DSTTYPE=";
|
||||
break;
|
||||
case CV_32FC3:
|
||||
option += " -D DSTTYPE=float3 -D CONVERT_DSTTYPE=";
|
||||
break;
|
||||
case CV_32FC4:
|
||||
option += " -D DSTTYPE=float4 -D CONVERT_DSTTYPE=";
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
|
||||
break;
|
||||
}
|
||||
switch(bordertype)
|
||||
{
|
||||
case cv::BORDER_CONSTANT:
|
||||
option += " -D BORDER_CONSTANT";
|
||||
break;
|
||||
case cv::BORDER_REPLICATE:
|
||||
option += " -D BORDER_REPLICATE";
|
||||
break;
|
||||
case cv::BORDER_REFLECT:
|
||||
option += " -D BORDER_REFLECT";
|
||||
break;
|
||||
case cv::BORDER_REFLECT101:
|
||||
option += " -D BORDER_REFLECT_101";
|
||||
break;
|
||||
case cv::BORDER_WRAP:
|
||||
option += " -D BORDER_WRAP";
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
|
||||
break;
|
||||
}
|
||||
|
||||
openCLExecuteKernel(src.clCxt, &filtering_sep_filter_singlepass, "sep_filter_singlepass", gt2, lt2, args,
|
||||
-1, -1, option.c_str() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SeparableFilter
|
||||
|
||||
@ -790,6 +919,35 @@ Ptr<FilterEngine_GPU> cv::ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter
|
||||
return makePtr<SeparableFilterEngine_GPU>(rowFilter, columnFilter);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
class SingleStepSeparableFilterEngine_GPU : public FilterEngine_GPU
|
||||
{
|
||||
public:
|
||||
SingleStepSeparableFilterEngine_GPU( const Mat &rowKernel_, const Mat &columnKernel_, const int btype )
|
||||
{
|
||||
bordertype = btype;
|
||||
rowKernel = rowKernel_;
|
||||
columnKernel = columnKernel_;
|
||||
}
|
||||
|
||||
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
|
||||
{
|
||||
normalizeROI(roi, Size(rowKernel.rows, columnKernel.rows), Point(-1,-1), src.size());
|
||||
|
||||
oclMat srcROI = src(roi);
|
||||
oclMat dstROI = dst(roi);
|
||||
|
||||
sepFilter2D_SinglePass(src, dst, rowKernel, columnKernel, bordertype);
|
||||
}
|
||||
|
||||
Mat rowKernel;
|
||||
Mat columnKernel;
|
||||
int bordertype;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
static void GPUFilterBox(const oclMat &src, oclMat &dst,
|
||||
Size &ksize, const Point anchor, const int borderType)
|
||||
{
|
||||
@ -1243,17 +1401,32 @@ Ptr<BaseColumnFilter_GPU> cv::ocl::getLinearColumnFilter_GPU(int /*bufType*/, in
|
||||
}
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::ocl::createSeparableLinearFilter_GPU(int srcType, int dstType,
|
||||
const Mat &rowKernel, const Mat &columnKernel, const Point &anchor, double delta, int bordertype)
|
||||
const Mat &rowKernel, const Mat &columnKernel, const Point &anchor, double delta, int bordertype, Size imgSize )
|
||||
{
|
||||
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(dstType);
|
||||
int cn = CV_MAT_CN(srcType);
|
||||
int bdepth = std::max(std::max(sdepth, ddepth), CV_32F);
|
||||
int bufType = CV_MAKETYPE(bdepth, cn);
|
||||
Context* clCxt = Context::getContext();
|
||||
|
||||
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, bordertype);
|
||||
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, bordertype, delta);
|
||||
//if image size is non-degenerate and large enough
|
||||
//and if filter support is reasonable to satisfy larger local memory requirements,
|
||||
//then we can use single pass routine to avoid extra runtime calls overhead
|
||||
if( clCxt && clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) &&
|
||||
rowKernel.rows <= 21 && columnKernel.rows <= 21 &&
|
||||
(rowKernel.rows & 1) == 1 && (columnKernel.rows & 1) == 1 &&
|
||||
imgSize.width > optimizedSepFilterLocalSize + (rowKernel.rows>>1) &&
|
||||
imgSize.height > optimizedSepFilterLocalSize + (columnKernel.rows>>1) )
|
||||
{
|
||||
return Ptr<FilterEngine_GPU>(new SingleStepSeparableFilterEngine_GPU(rowKernel, columnKernel, bordertype));
|
||||
}
|
||||
else
|
||||
{
|
||||
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, bordertype);
|
||||
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, bordertype, delta);
|
||||
|
||||
return createSeparableFilter_GPU(rowFilter, columnFilter);
|
||||
return createSeparableFilter_GPU(rowFilter, columnFilter);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype)
|
||||
@ -1277,16 +1450,16 @@ void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat
|
||||
|
||||
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
|
||||
|
||||
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, delta, bordertype);
|
||||
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, delta, bordertype, src.size());
|
||||
f->apply(src, dst);
|
||||
}
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType)
|
||||
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType, Size imgSize )
|
||||
{
|
||||
Mat kx, ky;
|
||||
getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
|
||||
return createSeparableLinearFilter_GPU(srcType, dstType,
|
||||
kx, ky, Point(-1, -1), 0, borderType);
|
||||
kx, ky, Point(-1, -1), 0, borderType, imgSize);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -1356,7 +1529,7 @@ void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, d
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Gaussian Filter
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int bordertype)
|
||||
Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int bordertype, Size imgSize)
|
||||
{
|
||||
int depth = CV_MAT_DEPTH(type);
|
||||
|
||||
@ -1383,7 +1556,7 @@ Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, do
|
||||
else
|
||||
ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F));
|
||||
|
||||
return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1, -1), 0.0, bordertype);
|
||||
return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1, -1), 0.0, bordertype, imgSize);
|
||||
}
|
||||
|
||||
void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2, int bordertype)
|
||||
@ -1419,7 +1592,7 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
|
||||
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype, src.size());
|
||||
f->apply(src, dst);
|
||||
}
|
||||
|
||||
|
@ -48,154 +48,142 @@
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
|
||||
// currently sort procedure on the host is more efficient
|
||||
static bool use_cpu_sorter = true;
|
||||
|
||||
namespace
|
||||
// compact structure for corners
|
||||
struct DefCorner
|
||||
{
|
||||
enum SortMethod
|
||||
float eig; //eigenvalue of corner
|
||||
short x; //x coordinate of corner point
|
||||
short y; //y coordinate of corner point
|
||||
} ;
|
||||
|
||||
// compare procedure for corner
|
||||
//it is used for sort on the host side
|
||||
struct DefCornerCompare
|
||||
{
|
||||
CPU_STL,
|
||||
BITONIC,
|
||||
SELECTION
|
||||
};
|
||||
|
||||
const int GROUP_SIZE = 256;
|
||||
|
||||
template<SortMethod method>
|
||||
struct Sorter
|
||||
{
|
||||
//typedef EigType;
|
||||
};
|
||||
|
||||
//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
|
||||
template<>
|
||||
struct Sorter<CPU_STL>
|
||||
{
|
||||
typedef oclMat EigType;
|
||||
static cv::Mutex cs;
|
||||
static Mat mat_eig;
|
||||
|
||||
//prototype
|
||||
static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
|
||||
bool operator()(const DefCorner a, const DefCorner b) const
|
||||
{
|
||||
float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0]));
|
||||
float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
|
||||
return v1 > v2;
|
||||
}
|
||||
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
||||
{
|
||||
cv::AutoLock lock(cs);
|
||||
//temporarily use STL's sort function
|
||||
Mat mat_corners = corners;
|
||||
mat_eig = eig_tex;
|
||||
std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
|
||||
corners = mat_corners;
|
||||
return a.eig > b.eig;
|
||||
}
|
||||
};
|
||||
cv::Mutex Sorter<CPU_STL>::cs;
|
||||
cv::Mat Sorter<CPU_STL>::mat_eig;
|
||||
|
||||
template<>
|
||||
struct Sorter<BITONIC>
|
||||
// sort corner point using opencl bitonicosrt implementation
|
||||
static void sortCorners_caller(oclMat& corners, const int count)
|
||||
{
|
||||
typedef TextureCL EigType;
|
||||
Context * cxt = Context::getContext();
|
||||
int GS = count/2;
|
||||
int LS = min(255,GS);
|
||||
size_t globalThreads[3] = {GS, 1, 1};
|
||||
size_t localThreads[3] = {LS, 1, 1};
|
||||
|
||||
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
||||
// 2^numStages should be equal to count or the output is invalid
|
||||
int numStages = 0;
|
||||
for(int i = count; i > 1; i >>= 1)
|
||||
{
|
||||
Context * cxt = Context::getContext();
|
||||
size_t globalThreads[3] = {count / 2, 1, 1};
|
||||
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
|
||||
|
||||
// 2^numStages should be equal to count or the output is invalid
|
||||
int numStages = 0;
|
||||
for(int i = count; i > 1; i >>= 1)
|
||||
++numStages;
|
||||
}
|
||||
const int argc = 4;
|
||||
std::vector< std::pair<size_t, const void *> > args(argc);
|
||||
std::string kernelname = "sortCorners_bitonicSort";
|
||||
args[0] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
|
||||
args[1] = std::make_pair(sizeof(cl_int), (void *)&count);
|
||||
for(int stage = 0; stage < numStages; ++stage)
|
||||
{
|
||||
args[2] = std::make_pair(sizeof(cl_int), (void *)&stage);
|
||||
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
|
||||
{
|
||||
++numStages;
|
||||
}
|
||||
const int argc = 5;
|
||||
std::vector< std::pair<size_t, const void *> > args(argc);
|
||||
String kernelname = "sortCorners_bitonicSort";
|
||||
args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
|
||||
args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
|
||||
args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
|
||||
for(int stage = 0; stage < numStages; ++stage)
|
||||
{
|
||||
args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
|
||||
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
|
||||
{
|
||||
args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
|
||||
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
args[3] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
|
||||
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template<>
|
||||
struct Sorter<SELECTION>
|
||||
{
|
||||
typedef TextureCL EigType;
|
||||
|
||||
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
||||
{
|
||||
Context * cxt = Context::getContext();
|
||||
|
||||
size_t globalThreads[3] = {count, 1, 1};
|
||||
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
|
||||
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
//local
|
||||
String kernelname = "sortCorners_selectionSortLocal";
|
||||
int lds_size = GROUP_SIZE * sizeof(cl_float2);
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
|
||||
args.push_back( std::make_pair( lds_size, (void*)NULL) );
|
||||
|
||||
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
//final
|
||||
kernelname = "sortCorners_selectionSortFinal";
|
||||
args.pop_back();
|
||||
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
};
|
||||
|
||||
int findCorners_caller(
|
||||
const TextureCL& eig,
|
||||
const float threshold,
|
||||
const oclMat& mask,
|
||||
oclMat& corners,
|
||||
const int max_count)
|
||||
// find corners on matrix and put it into array
|
||||
static void findCorners_caller(
|
||||
const oclMat& eig_mat, //input matrix worth eigenvalues
|
||||
oclMat& eigMinMax, //input with min and max values of eigenvalues
|
||||
const float qualityLevel,
|
||||
const oclMat& mask,
|
||||
oclMat& corners, //output array with detected corners
|
||||
oclMat& counter) //output value with number of detected corners, have to be 0 before call
|
||||
{
|
||||
String opt;
|
||||
std::vector<int> k;
|
||||
Context * cxt = Context::getContext();
|
||||
|
||||
std::vector< std::pair<size_t, const void*> > args;
|
||||
String kernelname = "findCorners";
|
||||
|
||||
const int mask_strip = mask.step / mask.elemSize1();
|
||||
|
||||
oclMat g_counter(1, 1, CV_32SC1);
|
||||
g_counter.setTo(0);
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&(eig_mat.data)));
|
||||
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eig ));
|
||||
int src_pitch = (int)eig_mat.step;
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&src_pitch ));
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&mask.data ));
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&corners.data ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&mask_strip));
|
||||
args.push_back(std::make_pair( sizeof(cl_float), (void*)&threshold ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.rows ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.cols ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&max_count ));
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&g_counter.data ));
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eigMinMax.data ));
|
||||
args.push_back(std::make_pair( sizeof(cl_float), (void*)&qualityLevel ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.rows ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.cols ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&corners.cols ));
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&counter.data ));
|
||||
|
||||
size_t globalThreads[3] = {eig.cols, eig.rows, 1};
|
||||
size_t globalThreads[3] = {eig_mat.cols, eig_mat.rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
if(!mask.empty())
|
||||
opt += " -D WITH_MASK=1";
|
||||
|
||||
const char * opt = mask.empty() ? "" : "-D WITH_MASK";
|
||||
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1, opt);
|
||||
return std::min(Mat(g_counter).at<int>(0), max_count);
|
||||
openCLExecuteKernel(cxt, &imgproc_gftt, "findCorners", globalThreads, localThreads, args, -1, -1, opt.c_str());
|
||||
}
|
||||
|
||||
|
||||
static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
|
||||
{
|
||||
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
|
||||
CV_Assert(groupnum != 0);
|
||||
|
||||
int dbsize = groupnum * 2 * src.elemSize();
|
||||
|
||||
ensureSizeIsEnough(1, dbsize, CV_8UC1, dst);
|
||||
|
||||
cl_mem dst_data = reinterpret_cast<cl_mem>(dst.data);
|
||||
|
||||
int all_cols = src.step / src.elemSize();
|
||||
int pre_cols = (src.offset % src.step) / src.elemSize();
|
||||
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
|
||||
int invalid_cols = pre_cols + sec_cols;
|
||||
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
|
||||
int offset = src.offset / src.elemSize();
|
||||
|
||||
{// first parallel pass
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
|
||||
size_t globalThreads[3] = {groupnum * 256, 1, 1};
|
||||
size_t localThreads[3] = {256, 1, 1};
|
||||
openCLExecuteKernel(src.clCxt, &arithm_minMax, "arithm_op_minMax", globalThreads, localThreads,
|
||||
args, -1, -1, "-D T=float -D DEPTH_5");
|
||||
}
|
||||
|
||||
{// run final "serial" kernel to find accumulate results from threads and reset corner counter
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&tozero.data ));
|
||||
size_t globalThreads[3] = {1, 1, 1};
|
||||
size_t localThreads[3] = {1, 1, 1};
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_gftt, "arithm_op_minMax_final", globalThreads, localThreads,
|
||||
args, -1, -1);
|
||||
}
|
||||
}
|
||||
}//unnamed namespace
|
||||
|
||||
void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
|
||||
{
|
||||
@ -205,67 +193,99 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
|
||||
ensureSizeIsEnough(image.size(), CV_32F, eig_);
|
||||
|
||||
if (useHarrisDetector)
|
||||
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
|
||||
cornerHarris_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
|
||||
else
|
||||
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
|
||||
|
||||
double maxVal = 0;
|
||||
minMax(eig_, NULL, &maxVal);
|
||||
ensureSizeIsEnough(1,1, CV_32SC1, counter_);
|
||||
|
||||
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
|
||||
// find max eigenvalue and reset detected counters
|
||||
minMaxEig_caller(eig_,eig_minmax_,counter_);
|
||||
|
||||
Ptr<TextureCL> eig_tex = bindTexturePtr(eig_);
|
||||
int total = findCorners_caller(
|
||||
*eig_tex,
|
||||
static_cast<float>(maxVal * qualityLevel),
|
||||
// allocate buffer for kernels
|
||||
int corner_array_size = std::max(1024, static_cast<int>(image.size().area() * 0.05));
|
||||
|
||||
if(!use_cpu_sorter)
|
||||
{ // round to 2^n
|
||||
unsigned int n=1;
|
||||
for(n=1;n<(unsigned int)corner_array_size;n<<=1);
|
||||
corner_array_size = (int)n;
|
||||
|
||||
ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
|
||||
|
||||
// set to 0 to be able use bitonic sort on whole 2^n array
|
||||
tmpCorners_.setTo(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
|
||||
}
|
||||
|
||||
int total = tmpCorners_.cols; // by default the number of corner is full array
|
||||
std::vector<DefCorner> tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm
|
||||
|
||||
//find points with high eigenvalue and put it into the output array
|
||||
findCorners_caller(
|
||||
eig_,
|
||||
eig_minmax_,
|
||||
static_cast<float>(qualityLevel),
|
||||
mask,
|
||||
tmpCorners_,
|
||||
tmpCorners_.cols);
|
||||
counter_);
|
||||
|
||||
if(!use_cpu_sorter)
|
||||
{// sort detected corners on deivce side
|
||||
sortCorners_caller(tmpCorners_, corner_array_size);
|
||||
}
|
||||
else
|
||||
{// send non-blocking request to read real non-zero number of corners to sort it on the HOST side
|
||||
openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0,sizeof(int), &total, 0, NULL, NULL));
|
||||
}
|
||||
|
||||
//blocking read whole corners array (sorted or not sorted)
|
||||
openCLReadBuffer(tmpCorners_.clCxt,(cl_mem)tmpCorners_.data,&tmp[0],tmpCorners_.cols*sizeof(DefCorner));
|
||||
|
||||
if (total == 0)
|
||||
{
|
||||
{// check for trivial case
|
||||
corners.release();
|
||||
return;
|
||||
}
|
||||
|
||||
if(use_cpu_sorter)
|
||||
{
|
||||
Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total);
|
||||
}
|
||||
else
|
||||
{
|
||||
//if total is power of 2
|
||||
if(((total - 1) & (total)) == 0)
|
||||
{
|
||||
Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
|
||||
}
|
||||
else
|
||||
{
|
||||
Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
|
||||
}
|
||||
{// sort detected corners on cpu side.
|
||||
tmp.resize(total);
|
||||
std::sort(tmp.begin(), tmp.end(), DefCornerCompare());
|
||||
}
|
||||
|
||||
//estimate maximal size of final output array
|
||||
int total_max = maxCorners > 0 ? std::min(maxCorners, total) : total;
|
||||
int D2 = (int)ceil(minDistance * minDistance);
|
||||
// allocate output buffer
|
||||
std::vector<Point2f> tmp2;
|
||||
tmp2.reserve(total_max);
|
||||
|
||||
|
||||
if (minDistance < 1)
|
||||
{
|
||||
Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1);
|
||||
tmpCorners_(roi_range).copyTo(corners);
|
||||
{// we have not distance restriction. then just copy with conversion maximal allowed points into output array
|
||||
for(int i=0;i<total_max && tmp[i].eig>0.0f;++i)
|
||||
{
|
||||
tmp2.push_back(Point2f(tmp[i].x,tmp[i].y));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<Point2f> tmp(total);
|
||||
downloadPoints(tmpCorners_, tmp);
|
||||
|
||||
std::vector<Point2f> tmp2;
|
||||
tmp2.reserve(total);
|
||||
|
||||
{// we have distance restriction. then start coping to output array from the first element and check distance for each next one
|
||||
const int cell_size = cvRound(minDistance);
|
||||
const int grid_width = (image.cols + cell_size - 1) / cell_size;
|
||||
const int grid_height = (image.rows + cell_size - 1) / cell_size;
|
||||
|
||||
std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
|
||||
std::vector< std::vector<Point2i> > grid(grid_width * grid_height);
|
||||
|
||||
for (int i = 0; i < total; ++i)
|
||||
for (int i = 0; i < total ; ++i)
|
||||
{
|
||||
Point2f p = tmp[i];
|
||||
DefCorner p = tmp[i];
|
||||
|
||||
if(p.eig<=0.0f)
|
||||
break; // condition to stop that is needed for GPU bitonic sort usage.
|
||||
|
||||
bool good = true;
|
||||
|
||||
@ -287,40 +307,42 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
|
||||
{
|
||||
for (int xx = x1; xx <= x2; xx++)
|
||||
{
|
||||
std::vector<Point2f>& m = grid[yy * grid_width + xx];
|
||||
|
||||
if (!m.empty())
|
||||
std::vector<Point2i>& m = grid[yy * grid_width + xx];
|
||||
if (m.empty())
|
||||
continue;
|
||||
for(size_t j = 0; j < m.size(); j++)
|
||||
{
|
||||
for(size_t j = 0; j < m.size(); j++)
|
||||
{
|
||||
float dx = p.x - m[j].x;
|
||||
float dy = p.y - m[j].y;
|
||||
int dx = p.x - m[j].x;
|
||||
int dy = p.y - m[j].y;
|
||||
|
||||
if (dx * dx + dy * dy < minDistance * minDistance)
|
||||
{
|
||||
good = false;
|
||||
goto break_out;
|
||||
}
|
||||
if (dx * dx + dy * dy < D2)
|
||||
{
|
||||
good = false;
|
||||
goto break_out_;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break_out:
|
||||
break_out_:
|
||||
|
||||
if(good)
|
||||
{
|
||||
grid[y_cell * grid_width + x_cell].push_back(p);
|
||||
grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x,p.y));
|
||||
|
||||
tmp2.push_back(p);
|
||||
tmp2.push_back(Point2f(p.x,p.y));
|
||||
|
||||
if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
|
||||
}
|
||||
int final_size = static_cast<int>(tmp2.size());
|
||||
if(final_size>0)
|
||||
corners.upload(Mat(1, final_size, CV_32FC2, &tmp2[0]));
|
||||
else
|
||||
corners.release();
|
||||
}
|
||||
void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v)
|
||||
{
|
||||
|
@ -866,16 +866,17 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
|
||||
if(gcascade->is_stump_based && gsum.clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE))
|
||||
{
|
||||
//setup local group size
|
||||
localThreads[0] = 8;
|
||||
localThreads[1] = 16;
|
||||
//setup local group size for "pixel step" = 1
|
||||
localThreads[0] = 16;
|
||||
localThreads[1] = 32;
|
||||
localThreads[2] = 1;
|
||||
|
||||
//init maximal number of workgroups
|
||||
//calc maximal number of workgroups
|
||||
int WGNumX = 1+(sizev[0].width /(localThreads[0]));
|
||||
int WGNumY = 1+(sizev[0].height/(localThreads[1]));
|
||||
int WGNumZ = loopcount;
|
||||
int WGNum = 0; //accurate number of non -empty workgroups
|
||||
int WGNumTotal = 0; //accurate number of non-empty workgroups
|
||||
int WGNumSampled = 0; //accurate number of workgroups processed only 1/4 part of all pixels. it is made for large images with scale <= 2
|
||||
oclMat oclWGInfo(1,sizeof(cl_int4) * WGNumX*WGNumY*WGNumZ,CV_8U);
|
||||
{
|
||||
cl_int4* pWGInfo = (cl_int4*)clEnqueueMapBuffer(getClCommandQueue(oclWGInfo.clCxt),(cl_mem)oclWGInfo.datastart,true,CL_MAP_WRITE, 0, oclWGInfo.step, 0,0,0,&status);
|
||||
@ -895,12 +896,16 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
if(gx>=(Width-cascade->orig_window_size.width))
|
||||
continue; // no data to process
|
||||
|
||||
if(scaleinfo[z].factor<=2)
|
||||
{
|
||||
WGNumSampled++;
|
||||
}
|
||||
// save no-empty workgroup info into array
|
||||
pWGInfo[WGNum].s[0] = scaleinfo[z].width_height;
|
||||
pWGInfo[WGNum].s[1] = (gx << 16) | gy;
|
||||
pWGInfo[WGNum].s[2] = scaleinfo[z].imgoff;
|
||||
memcpy(&(pWGInfo[WGNum].s[3]),&(scaleinfo[z].factor),sizeof(float));
|
||||
WGNum++;
|
||||
pWGInfo[WGNumTotal].s[0] = scaleinfo[z].width_height;
|
||||
pWGInfo[WGNumTotal].s[1] = (gx << 16) | gy;
|
||||
pWGInfo[WGNumTotal].s[2] = scaleinfo[z].imgoff;
|
||||
memcpy(&(pWGInfo[WGNumTotal].s[3]),&(scaleinfo[z].factor),sizeof(float));
|
||||
WGNumTotal++;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -908,13 +913,8 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
pWGInfo = NULL;
|
||||
}
|
||||
|
||||
// setup global sizes to have linear array of workgroups with WGNum size
|
||||
globalThreads[0] = localThreads[0]*WGNum;
|
||||
globalThreads[1] = localThreads[1];
|
||||
globalThreads[2] = 1;
|
||||
|
||||
#define NODE_SIZE 12
|
||||
// pack node info to have less memory loads
|
||||
// pack node info to have less memory loads on the device side
|
||||
oclMat oclNodesPK(1,sizeof(cl_int) * NODE_SIZE * nodenum,CV_8U);
|
||||
{
|
||||
cl_int status;
|
||||
@ -963,8 +963,6 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
options += format(" -D WND_SIZE_X=%d",cascade->orig_window_size.width);
|
||||
options += format(" -D WND_SIZE_Y=%d",cascade->orig_window_size.height);
|
||||
options += format(" -D STUMP_BASED=%d",gcascade->is_stump_based);
|
||||
options += format(" -D LSx=%d",localThreads[0]);
|
||||
options += format(" -D LSy=%d",localThreads[1]);
|
||||
options += format(" -D SPLITNODE=%d",splitnode);
|
||||
options += format(" -D SPLITSTAGE=%d",splitstage);
|
||||
options += format(" -D OUTPUTSZ=%d",outputsz);
|
||||
@ -972,8 +970,39 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
// init candiate global count by 0
|
||||
int pattern = 0;
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, candidatebuffer, 1, 0, 1 * sizeof(pattern),&pattern, 0, NULL, NULL));
|
||||
// execute face detector
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, localThreads, args, -1, -1, options.c_str());
|
||||
|
||||
if(WGNumTotal>WGNumSampled)
|
||||
{// small images and each pixel is processed
|
||||
// setup global sizes to have linear array of workgroups with WGNum size
|
||||
int pixelstep = 1;
|
||||
size_t LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
|
||||
globalThreads[0] = LS[0]*(WGNumTotal-WGNumSampled);
|
||||
globalThreads[1] = LS[1];
|
||||
globalThreads[2] = 1;
|
||||
String options1 = options;
|
||||
options1 += format(" -D PIXEL_STEP=%d",pixelstep);
|
||||
options1 += format(" -D WGSTART=%d",WGNumSampled);
|
||||
options1 += format(" -D LSx=%d",LS[0]);
|
||||
options1 += format(" -D LSy=%d",LS[1]);
|
||||
// execute face detector
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options1.c_str());
|
||||
}
|
||||
if(WGNumSampled>0)
|
||||
{// large images each 4th pixel is processed
|
||||
// setup global sizes to have linear array of workgroups with WGNum size
|
||||
int pixelstep = 2;
|
||||
size_t LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
|
||||
globalThreads[0] = LS[0]*WGNumSampled;
|
||||
globalThreads[1] = LS[1];
|
||||
globalThreads[2] = 1;
|
||||
String options2 = options;
|
||||
options2 += format(" -D PIXEL_STEP=%d",pixelstep);
|
||||
options2 += format(" -D WGSTART=%d",0);
|
||||
options2 += format(" -D LSx=%d",LS[0]);
|
||||
options2 += format(" -D LSy=%d",LS[1]);
|
||||
// execute face detector
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options2.c_str());
|
||||
}
|
||||
//read candidate buffer back and put it into host list
|
||||
openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
|
||||
assert(candidate[0]<outputsz);
|
||||
|
@ -76,6 +76,11 @@ namespace cv
|
||||
int cdescr_width;
|
||||
int cdescr_height;
|
||||
|
||||
// A shift value and type that allows qangle to be different
|
||||
// sizes on different hardware
|
||||
int qangle_step_shift;
|
||||
int qangle_type;
|
||||
|
||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||
int nblocks_win_x, int nblocks_win_y);
|
||||
|
||||
@ -153,6 +158,7 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
|
||||
hog_device_cpu = true;
|
||||
else
|
||||
hog_device_cpu = false;
|
||||
|
||||
}
|
||||
|
||||
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
|
||||
@ -213,7 +219,7 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
|
||||
effect_size = img.size();
|
||||
|
||||
grad.create(img.size(), CV_32FC2);
|
||||
qangle.create(img.size(), CV_8UC2);
|
||||
qangle.create(img.size(), hog::qangle_type);
|
||||
|
||||
const size_t block_hist_size = getBlockHistogramSize();
|
||||
const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
|
||||
@ -1606,6 +1612,16 @@ void cv::ocl::device::hog::set_up_constants(int nbins,
|
||||
|
||||
int descr_size = descr_width * nblocks_win_y;
|
||||
cdescr_size = descr_size;
|
||||
|
||||
qangle_type = CV_8UC2;
|
||||
qangle_step_shift = 0;
|
||||
// Some Intel devices have low single-byte access performance,
|
||||
// so we change the datatype here.
|
||||
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
|
||||
{
|
||||
qangle_type = CV_32SC2;
|
||||
qangle_step_shift = 2;
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::device::hog::compute_hists(int nbins,
|
||||
@ -1627,7 +1643,7 @@ void cv::ocl::device::hog::compute_hists(int nbins,
|
||||
int blocks_total = img_block_width * img_block_height;
|
||||
|
||||
int grad_quadstep = grad.step >> 2;
|
||||
int qangle_step = qangle.step;
|
||||
int qangle_step = qangle.step >> qangle_step_shift;
|
||||
|
||||
int blocks_in_group = 4;
|
||||
size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
|
||||
@ -1892,7 +1908,7 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width,
|
||||
char correctGamma = (correct_gamma) ? 1 : 0;
|
||||
int img_step = img.step;
|
||||
int grad_quadstep = grad.step >> 3;
|
||||
int qangle_step = qangle.step >> 1;
|
||||
int qangle_step = qangle.step >> (1 + qangle_step_shift);
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
|
||||
@ -1927,7 +1943,7 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width,
|
||||
char correctGamma = (correct_gamma) ? 1 : 0;
|
||||
int img_step = img.step >> 2;
|
||||
int grad_quadstep = grad.step >> 3;
|
||||
int qangle_step = qangle.step >> 1;
|
||||
int qangle_step = qangle.step >> (1 + qangle_step_shift);
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
|
||||
|
@ -1035,67 +1035,117 @@ namespace cv
|
||||
else
|
||||
scale = 1. / scale;
|
||||
|
||||
if (ksize > 0)
|
||||
const int sobel_lsz = 16;
|
||||
if((src.type() == CV_8UC1 || src.type() == CV_32FC1) &&
|
||||
(ksize==3 || ksize==5 || ksize==7 || ksize==-1) &&
|
||||
src.wholerows > sobel_lsz + (ksize>>1) &&
|
||||
src.wholecols > sobel_lsz + (ksize>>1))
|
||||
{
|
||||
Context* clCxt = Context::getContext();
|
||||
if(clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) && src.type() == CV_8UC1 &&
|
||||
src.cols % 8 == 0 && src.rows % 8 == 0 &&
|
||||
ksize==3 &&
|
||||
(borderType ==cv::BORDER_REFLECT ||
|
||||
borderType == cv::BORDER_REPLICATE ||
|
||||
borderType ==cv::BORDER_REFLECT101 ||
|
||||
borderType ==cv::BORDER_WRAP))
|
||||
Dx.create(src.size(), CV_32FC1);
|
||||
Dy.create(src.size(), CV_32FC1);
|
||||
|
||||
CV_Assert(Dx.rows == Dy.rows && Dx.cols == Dy.cols);
|
||||
|
||||
size_t lt2[3] = {sobel_lsz, sobel_lsz, 1};
|
||||
size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
|
||||
|
||||
unsigned int src_pitch = src.step;
|
||||
unsigned int Dx_pitch = Dx.step;
|
||||
unsigned int Dy_pitch = Dy.step;
|
||||
|
||||
int src_offset_x = (src.offset % src.step) / src.elemSize();
|
||||
int src_offset_y = src.offset / src.step;
|
||||
|
||||
float _scale = scale;
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dx_pitch ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dy.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dy_pitch ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholecols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholerows ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.rows ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_float), (void *)&_scale ));
|
||||
|
||||
String option = cv::format("-D BLK_X=%d -D BLK_Y=%d",(int)lt2[0],(int)lt2[1]);
|
||||
switch(src.type())
|
||||
{
|
||||
Dx.create(src.size(), CV_32FC1);
|
||||
Dy.create(src.size(), CV_32FC1);
|
||||
|
||||
const unsigned int block_x = 8;
|
||||
const unsigned int block_y = 8;
|
||||
|
||||
unsigned int src_pitch = src.step;
|
||||
unsigned int dst_pitch = Dx.cols;
|
||||
|
||||
float _scale = scale;
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
|
||||
args.push_back( std::make_pair( sizeof(cl_float) , (void *)&_scale ));
|
||||
size_t gt2[3] = {src.cols, src.rows, 1}, lt2[3] = {block_x, block_y, 1};
|
||||
|
||||
String option = "-D BLK_X=8 -D BLK_Y=8";
|
||||
switch(borderType)
|
||||
{
|
||||
case cv::BORDER_REPLICATE:
|
||||
option += " -D BORDER_REPLICATE";
|
||||
break;
|
||||
case cv::BORDER_REFLECT:
|
||||
option += " -D BORDER_REFLECT";
|
||||
break;
|
||||
case cv::BORDER_REFLECT101:
|
||||
option += " -D BORDER_REFLECT101";
|
||||
break;
|
||||
case cv::BORDER_WRAP:
|
||||
option += " -D BORDER_WRAP";
|
||||
break;
|
||||
}
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_sobel3, "sobel3", gt2, lt2, args, -1, -1, option.c_str() );
|
||||
case CV_8UC1:
|
||||
option += " -D SRCTYPE=uchar";
|
||||
break;
|
||||
case CV_32FC1:
|
||||
option += " -D SRCTYPE=float";
|
||||
break;
|
||||
}
|
||||
else
|
||||
switch(borderType)
|
||||
{
|
||||
case cv::BORDER_CONSTANT:
|
||||
option += " -D BORDER_CONSTANT";
|
||||
break;
|
||||
case cv::BORDER_REPLICATE:
|
||||
option += " -D BORDER_REPLICATE";
|
||||
break;
|
||||
case cv::BORDER_REFLECT:
|
||||
option += " -D BORDER_REFLECT";
|
||||
break;
|
||||
case cv::BORDER_REFLECT101:
|
||||
option += " -D BORDER_REFLECT_101";
|
||||
break;
|
||||
case cv::BORDER_WRAP:
|
||||
option += " -D BORDER_WRAP";
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
|
||||
break;
|
||||
}
|
||||
|
||||
String kernel_name;
|
||||
switch(ksize)
|
||||
{
|
||||
case -1:
|
||||
option += " -D SCHARR";
|
||||
kernel_name = "sobel3";
|
||||
break;
|
||||
case 3:
|
||||
kernel_name = "sobel3";
|
||||
break;
|
||||
case 5:
|
||||
kernel_name = "sobel5";
|
||||
break;
|
||||
case 7:
|
||||
kernel_name = "sobel7";
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadFlag, "Kernel size is not supported!");
|
||||
break;
|
||||
}
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_sobel3, kernel_name, gt2, lt2, args, -1, -1, option.c_str() );
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ksize > 0)
|
||||
{
|
||||
Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType);
|
||||
Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType);
|
||||
Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType);
|
||||
else
|
||||
{
|
||||
Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType);
|
||||
Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType);
|
||||
}
|
||||
}
|
||||
CV_Assert(Dx.offset == 0 && Dy.offset == 0);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
185
modules/ocl/src/opencl/filtering_sep_filter_singlepass.cl
Normal file
185
modules/ocl/src/opencl/filtering_sep_filter_singlepass.cl
Normal file
@ -0,0 +1,185 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////Macro for border type////////////////////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
//CCCCCC|abcdefgh|CCCCCCC
|
||||
#define EXTRAPOLATE(x, maxV)
|
||||
#elif defined BORDER_REPLICATE
|
||||
//aaaaaa|abcdefgh|hhhhhhh
|
||||
#define EXTRAPOLATE(x, maxV) \
|
||||
{ \
|
||||
(x) = max(min((x), (maxV) - 1), 0); \
|
||||
}
|
||||
#elif defined BORDER_WRAP
|
||||
//cdefgh|abcdefgh|abcdefg
|
||||
#define EXTRAPOLATE(x, maxV) \
|
||||
{ \
|
||||
(x) = ( (x) + (maxV) ) % (maxV); \
|
||||
}
|
||||
#elif defined BORDER_REFLECT
|
||||
//fedcba|abcdefgh|hgfedcb
|
||||
#define EXTRAPOLATE(x, maxV) \
|
||||
{ \
|
||||
(x) = min(((maxV)-1)*2-(x)+1, max((x),-(x)-1) ); \
|
||||
}
|
||||
#elif defined BORDER_REFLECT_101
|
||||
//gfedcb|abcdefgh|gfedcba
|
||||
#define EXTRAPOLATE(x, maxV) \
|
||||
{ \
|
||||
(x) = min(((maxV)-1)*2-(x), max((x),-(x)) ); \
|
||||
}
|
||||
#else
|
||||
#error No extrapolation method
|
||||
#endif
|
||||
|
||||
#define SRC(_x,_y) CONVERT_SRCTYPE(((global SRCTYPE*)(Src+(_y)*SrcPitch))[_x])
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
//CCCCCC|abcdefgh|CCCCCCC
|
||||
#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
|
||||
#else
|
||||
#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
|
||||
#endif
|
||||
|
||||
#define DST(_x,_y) (((global DSTTYPE*)(Dst+DstOffset+(_y)*DstPitch))[_x])
|
||||
|
||||
//horizontal and vertical filter kernels
|
||||
//should be defined on host during compile time to avoid overhead
|
||||
__constant uint mat_kernelX[] = {KERNEL_MATRIX_X};
|
||||
__constant uint mat_kernelY[] = {KERNEL_MATRIX_Y};
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(BLK_X,BLK_Y,1))) void sep_filter_singlepass
|
||||
(
|
||||
__global uchar* Src,
|
||||
const uint SrcPitch,
|
||||
const int srcOffsetX,
|
||||
const int srcOffsetY,
|
||||
__global uchar* Dst,
|
||||
const int DstOffset,
|
||||
const uint DstPitch,
|
||||
int width,
|
||||
int height,
|
||||
int dstWidth,
|
||||
int dstHeight
|
||||
)
|
||||
{
|
||||
//RADIUSX, RADIUSY are filter dimensions
|
||||
//BLK_X, BLK_Y are local wrogroup sizes
|
||||
//all these should be defined on host during compile time
|
||||
//first lsmem array for source pixels used in first pass,
|
||||
//second lsmemDy for storing first pass results
|
||||
__local WORKTYPE lsmem[BLK_Y+2*RADIUSY][BLK_X+2*RADIUSX];
|
||||
__local WORKTYPE lsmemDy[BLK_Y][BLK_X+2*RADIUSX];
|
||||
|
||||
//get local and global ids - used as image and local memory array indexes
|
||||
int lix = get_local_id(0);
|
||||
int liy = get_local_id(1);
|
||||
|
||||
int x = (int)get_global_id(0);
|
||||
int y = (int)get_global_id(1);
|
||||
|
||||
//calculate pixel position in source image taking image offset into account
|
||||
int srcX = x + srcOffsetX - RADIUSX;
|
||||
int srcY = y + srcOffsetY - RADIUSY;
|
||||
int xb = srcX;
|
||||
int yb = srcY;
|
||||
|
||||
//extrapolate coordinates, if needed
|
||||
//and read my own source pixel into local memory
|
||||
//with account for extra border pixels, which will be read by starting workitems
|
||||
int clocY = liy;
|
||||
int cSrcY = srcY;
|
||||
do
|
||||
{
|
||||
int yb = cSrcY;
|
||||
EXTRAPOLATE(yb, (height));
|
||||
|
||||
int clocX = lix;
|
||||
int cSrcX = srcX;
|
||||
do
|
||||
{
|
||||
int xb = cSrcX;
|
||||
EXTRAPOLATE(xb,(width));
|
||||
lsmem[clocY][clocX] = ELEM(xb, yb, (width), (height), 0 );
|
||||
|
||||
clocX += BLK_X;
|
||||
cSrcX += BLK_X;
|
||||
}
|
||||
while(clocX < BLK_X+(RADIUSX*2));
|
||||
|
||||
clocY += BLK_Y;
|
||||
cSrcY += BLK_Y;
|
||||
}
|
||||
while(clocY < BLK_Y+(RADIUSY*2));
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//do vertical filter pass
|
||||
//and store intermediate results to second local memory array
|
||||
int i;
|
||||
WORKTYPE sum = 0.0f;
|
||||
int clocX = lix;
|
||||
do
|
||||
{
|
||||
sum = 0.0f;
|
||||
for(i=0; i<=2*RADIUSY; i++)
|
||||
sum = mad(lsmem[liy+i][clocX], as_float(mat_kernelY[i]), sum);
|
||||
lsmemDy[liy][clocX] = sum;
|
||||
clocX += BLK_X;
|
||||
}
|
||||
while(clocX < BLK_X+(RADIUSX*2));
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//if this pixel happened to be out of image borders because of global size rounding,
|
||||
//then just return
|
||||
if( x >= dstWidth || y >=dstHeight ) return;
|
||||
|
||||
//do second horizontal filter pass
|
||||
//and calculate final result
|
||||
sum = 0.0f;
|
||||
for(i=0; i<=2*RADIUSX; i++)
|
||||
sum = mad(lsmemDy[liy][lix+i], as_float(mat_kernelX[i]), sum);
|
||||
|
||||
//store result into destination image
|
||||
DST(x,y) = CONVERT_DSTTYPE(sum);
|
||||
}
|
@ -126,13 +126,11 @@ __kernel void gpuRunHaarClassifierCascadePacked(
|
||||
)
|
||||
|
||||
{
|
||||
// this version used information provided for each workgroup
|
||||
// no empty WG
|
||||
int gid = (int)get_group_id(0);
|
||||
int lid_x = (int)get_local_id(0);
|
||||
int lid_y = (int)get_local_id(1);
|
||||
int lid = lid_y*LSx+lid_x;
|
||||
int4 WGInfo = pWGInfo[gid];
|
||||
int4 WGInfo = pWGInfo[WGSTART+gid];
|
||||
int GroupX = (WGInfo.y >> 16)&0xFFFF;
|
||||
int GroupY = (WGInfo.y >> 0 )& 0xFFFF;
|
||||
int Width = (WGInfo.x >> 16)&0xFFFF;
|
||||
@ -140,8 +138,8 @@ __kernel void gpuRunHaarClassifierCascadePacked(
|
||||
int ImgOffset = WGInfo.z;
|
||||
float ScaleFactor = as_float(WGInfo.w);
|
||||
|
||||
#define DATA_SIZE_X (LSx+WND_SIZE_X)
|
||||
#define DATA_SIZE_Y (LSy+WND_SIZE_Y)
|
||||
#define DATA_SIZE_X (PIXEL_STEP*LSx+WND_SIZE_X)
|
||||
#define DATA_SIZE_Y (PIXEL_STEP*LSy+WND_SIZE_Y)
|
||||
#define DATA_SIZE (DATA_SIZE_X*DATA_SIZE_Y)
|
||||
|
||||
local int SumL[DATA_SIZE];
|
||||
@ -165,9 +163,11 @@ __kernel void gpuRunHaarClassifierCascadePacked(
|
||||
int4 info1 = p;
|
||||
int4 info2 = pq;
|
||||
|
||||
{
|
||||
int xl = lid_x;
|
||||
int yl = lid_y;
|
||||
// calc processed ROI coordinate in local mem
|
||||
int xl = lid_x*PIXEL_STEP;
|
||||
int yl = lid_y*PIXEL_STEP;
|
||||
|
||||
{// calc variance_norm_factor for all stages
|
||||
int OffsetLocal = yl * DATA_SIZE_X + xl;
|
||||
int OffsetGlobal = (GroupY+yl)* pixelstep + (GroupX+xl);
|
||||
|
||||
@ -194,13 +194,13 @@ __kernel void gpuRunHaarClassifierCascadePacked(
|
||||
|
||||
int result = (1.0f>0.0f);
|
||||
for(int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++ )
|
||||
{// iterate until candidate is exist
|
||||
{// iterate until candidate is valid
|
||||
float stage_sum = 0.0f;
|
||||
__global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
|
||||
((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
|
||||
int lcl_off = (yl*DATA_SIZE_X)+(xl);
|
||||
int stagecount = stageinfo->count;
|
||||
float stagethreshold = stageinfo->threshold;
|
||||
int lcl_off = (lid_y*DATA_SIZE_X)+(lid_x);
|
||||
for(int nodeloop = 0; nodeloop < stagecount; nodecounter++,nodeloop++ )
|
||||
{
|
||||
// simple macro to extract shorts from int
|
||||
@ -212,7 +212,7 @@ __kernel void gpuRunHaarClassifierCascadePacked(
|
||||
int4 n1 = pN[1];
|
||||
int4 n2 = pN[2];
|
||||
float nodethreshold = as_float(n2.y) * variance_norm_factor;
|
||||
// calc sum of intensity pixels according to node information
|
||||
// calc sum of intensity pixels according to classifier node information
|
||||
float classsum =
|
||||
(SumL[M0(n0.x)+lcl_off] - SumL[M1(n0.x)+lcl_off] - SumL[M0(n0.y)+lcl_off] + SumL[M1(n0.y)+lcl_off]) * as_float(n1.z) +
|
||||
(SumL[M0(n0.z)+lcl_off] - SumL[M1(n0.z)+lcl_off] - SumL[M0(n0.w)+lcl_off] + SumL[M1(n0.w)+lcl_off]) * as_float(n1.w) +
|
||||
@ -228,8 +228,8 @@ __kernel void gpuRunHaarClassifierCascadePacked(
|
||||
int index = 1+atomic_inc((volatile global int*)candidate); //get index to write global data with face info
|
||||
if(index<OUTPUTSZ)
|
||||
{
|
||||
int x = GroupX+lid_x;
|
||||
int y = GroupY+lid_y;
|
||||
int x = GroupX+xl;
|
||||
int y = GroupY+yl;
|
||||
int4 candidate_result;
|
||||
candidate_result.x = convert_int_rtn(x*ScaleFactor);
|
||||
candidate_result.y = convert_int_rtn(y*ScaleFactor);
|
||||
|
@ -46,33 +46,26 @@
|
||||
#ifndef WITH_MASK
|
||||
#define WITH_MASK 0
|
||||
#endif
|
||||
|
||||
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
|
||||
|
||||
inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
|
||||
{
|
||||
return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
|
||||
}
|
||||
|
||||
inline float ELEM_FLT2(image2d_t _eig, float2 pt)
|
||||
{
|
||||
return read_imagef(_eig, sampler, pt).x;
|
||||
}
|
||||
//macro to read eigenvalue matrix
|
||||
#define GET_SRC_32F(_x, _y) ((__global const float*)(eig + (_y)*eig_pitch))[_x]
|
||||
|
||||
__kernel
|
||||
void findCorners
|
||||
(
|
||||
image2d_t eig,
|
||||
__global const char * mask,
|
||||
__global float2 * corners,
|
||||
const int mask_strip,// in pixels
|
||||
const float threshold,
|
||||
const int rows,
|
||||
const int cols,
|
||||
const int max_count,
|
||||
__global int * g_counter
|
||||
__global const char* eig,
|
||||
const int eig_pitch,
|
||||
__global const char* mask,
|
||||
__global float2* corners,
|
||||
const int mask_strip,// in pixels
|
||||
__global const float* pMinMax,
|
||||
const float qualityLevel,
|
||||
const int rows,
|
||||
const int cols,
|
||||
const int max_count,
|
||||
__global int* g_counter
|
||||
)
|
||||
{
|
||||
float threshold = qualityLevel*pMinMax[1];
|
||||
const int j = get_global_id(0);
|
||||
const int i = get_global_id(1);
|
||||
|
||||
@ -82,39 +75,42 @@ __kernel
|
||||
#endif
|
||||
)
|
||||
{
|
||||
const float val = ELEM_INT2(eig, j, i);
|
||||
const float val = GET_SRC_32F(j, i);
|
||||
|
||||
if (val > threshold)
|
||||
{
|
||||
float maxVal = val;
|
||||
maxVal = fmax(GET_SRC_32F(j - 1, i - 1), maxVal);
|
||||
maxVal = fmax(GET_SRC_32F(j , i - 1), maxVal);
|
||||
maxVal = fmax(GET_SRC_32F(j + 1, i - 1), maxVal);
|
||||
|
||||
maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal);
|
||||
maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal);
|
||||
maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal);
|
||||
maxVal = fmax(GET_SRC_32F(j - 1, i), maxVal);
|
||||
maxVal = fmax(GET_SRC_32F(j + 1, i), maxVal);
|
||||
|
||||
maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal);
|
||||
maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal);
|
||||
|
||||
maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal);
|
||||
maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal);
|
||||
maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal);
|
||||
maxVal = fmax(GET_SRC_32F(j - 1, i + 1), maxVal);
|
||||
maxVal = fmax(GET_SRC_32F(j , i + 1), maxVal);
|
||||
maxVal = fmax(GET_SRC_32F(j + 1, i + 1), maxVal);
|
||||
|
||||
if (val == maxVal)
|
||||
{
|
||||
const int ind = atomic_inc(g_counter);
|
||||
|
||||
if (ind < max_count)
|
||||
corners[ind] = (float2)(j, i);
|
||||
{// pack and store eigenvalue and its coordinates
|
||||
corners[ind].x = val;
|
||||
corners[ind].y = as_float(j|(i<<16));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#undef GET_SRC_32F
|
||||
|
||||
|
||||
//bitonic sort
|
||||
__kernel
|
||||
void sortCorners_bitonicSort
|
||||
(
|
||||
image2d_t eig,
|
||||
__global float2 * corners,
|
||||
const int count,
|
||||
const int stage,
|
||||
@ -140,8 +136,8 @@ __kernel
|
||||
const float2 leftPt = corners[leftId];
|
||||
const float2 rightPt = corners[rightId];
|
||||
|
||||
const float leftVal = ELEM_FLT2(eig, leftPt);
|
||||
const float rightVal = ELEM_FLT2(eig, rightPt);
|
||||
const float leftVal = leftPt.x;
|
||||
const float rightVal = rightPt.x;
|
||||
|
||||
const bool compareResult = leftVal > rightVal;
|
||||
|
||||
@ -152,124 +148,22 @@ __kernel
|
||||
corners[rightId] = sortOrder ? greater : lesser;
|
||||
}
|
||||
|
||||
//selection sort for gfft
|
||||
//kernel is ported from Bolt library:
|
||||
//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
|
||||
// Local sort will firstly sort elements of each workgroup using selection sort
|
||||
// its performance is O(n)
|
||||
__kernel
|
||||
void sortCorners_selectionSortLocal
|
||||
(
|
||||
image2d_t eig,
|
||||
__global float2 * corners,
|
||||
const int count,
|
||||
__local float2 * scratch
|
||||
)
|
||||
// this is simple short serial kernel that makes some short reduction and initialization work
|
||||
// it makes HOST like work to avoid additional sync with HOST to do this short work
|
||||
// data - input/output float2.
|
||||
// input data are sevral (min,max) pairs
|
||||
// output data is one reduced (min,max) pair
|
||||
// g_counter - counter that have to be initialized by 0 for next findCorner call.
|
||||
__kernel void arithm_op_minMax_final(__global float * data, int groupnum,__global int * g_counter)
|
||||
{
|
||||
int i = get_local_id(0); // index in workgroup
|
||||
int numOfGroups = get_num_groups(0); // index in workgroup
|
||||
int groupID = get_group_id(0);
|
||||
int wg = get_local_size(0); // workgroup size = block size
|
||||
int n; // number of elements to be processed for this work group
|
||||
|
||||
int offset = groupID * wg;
|
||||
int same = 0;
|
||||
corners += offset;
|
||||
n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
|
||||
float2 pt1, pt2;
|
||||
|
||||
pt1 = corners[min(i, n)];
|
||||
scratch[i] = pt1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(i >= n)
|
||||
g_counter[0] = 0;
|
||||
float minVal = data[0];
|
||||
float maxVal = data[groupnum];
|
||||
for(int i=1;i<groupnum;++i)
|
||||
{
|
||||
return;
|
||||
minVal = min(minVal,data[i]);
|
||||
maxVal = max(maxVal,data[i+groupnum]);
|
||||
}
|
||||
|
||||
float val1 = ELEM_FLT2(eig, pt1);
|
||||
float val2;
|
||||
|
||||
int pos = 0;
|
||||
for (int j=0;j<n;++j)
|
||||
{
|
||||
pt2 = scratch[j];
|
||||
val2 = ELEM_FLT2(eig, pt2);
|
||||
if(val2 > val1)
|
||||
pos++;//calculate the rank of this element in this work group
|
||||
else
|
||||
{
|
||||
if(val1 > val2)
|
||||
continue;
|
||||
else
|
||||
{
|
||||
// val1 and val2 are same
|
||||
same++;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int j=0; j< same; j++)
|
||||
corners[pos + j] = pt1;
|
||||
}
|
||||
__kernel
|
||||
void sortCorners_selectionSortFinal
|
||||
(
|
||||
image2d_t eig,
|
||||
__global float2 * corners,
|
||||
const int count
|
||||
)
|
||||
{
|
||||
const int i = get_local_id(0); // index in workgroup
|
||||
const int numOfGroups = get_num_groups(0); // index in workgroup
|
||||
const int groupID = get_group_id(0);
|
||||
const int wg = get_local_size(0); // workgroup size = block size
|
||||
int pos = 0, same = 0;
|
||||
const int offset = get_group_id(0) * wg;
|
||||
const int remainder = count - wg*(numOfGroups-1);
|
||||
|
||||
if((offset + i ) >= count)
|
||||
return;
|
||||
float2 pt1, pt2;
|
||||
pt1 = corners[groupID*wg + i];
|
||||
|
||||
float val1 = ELEM_FLT2(eig, pt1);
|
||||
float val2;
|
||||
|
||||
for(int j=0; j<numOfGroups-1; j++ )
|
||||
{
|
||||
for(int k=0; k<wg; k++)
|
||||
{
|
||||
pt2 = corners[j*wg + k];
|
||||
val2 = ELEM_FLT2(eig, pt2);
|
||||
if(val1 > val2)
|
||||
break;
|
||||
else
|
||||
{
|
||||
//Increment only if the value is not the same.
|
||||
if( val2 > val1 )
|
||||
pos++;
|
||||
else
|
||||
same++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(int k=0; k<remainder; k++)
|
||||
{
|
||||
pt2 = corners[(numOfGroups-1)*wg + k];
|
||||
val2 = ELEM_FLT2(eig, pt2);
|
||||
if(val1 > val2)
|
||||
break;
|
||||
else
|
||||
{
|
||||
//Don't increment if the value is the same.
|
||||
//Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false
|
||||
if(val2 > val1)
|
||||
pos++;
|
||||
else
|
||||
same++;
|
||||
}
|
||||
}
|
||||
for (int j=0; j< same; j++)
|
||||
corners[pos + j] = pt1;
|
||||
}
|
||||
data[0] = minVal;
|
||||
data[1] = maxVal;
|
||||
}
|
@ -1,45 +1,97 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////Macro for border type////////////////////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifdef BORDER_REPLICATE
|
||||
//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh
|
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (l_edge) : (i))
|
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (r_edge)-1 : (addr))
|
||||
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (t_edge) :(i))
|
||||
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (b_edge)-1 :(addr))
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
//CCCCCC|abcdefgh|CCCCCCC
|
||||
#define EXTRAPOLATE(x, maxV)
|
||||
#elif defined BORDER_REPLICATE
|
||||
//aaaaaa|abcdefgh|hhhhhhh
|
||||
#define EXTRAPOLATE(x, maxV) \
|
||||
{ \
|
||||
(x) = max(min((x), (maxV) - 1), 0); \
|
||||
}
|
||||
#elif defined BORDER_WRAP
|
||||
//cdefgh|abcdefgh|abcdefg
|
||||
#define EXTRAPOLATE(x, maxV) \
|
||||
{ \
|
||||
(x) = ( (x) + (maxV) ) % (maxV); \
|
||||
}
|
||||
#elif defined BORDER_REFLECT
|
||||
//fedcba|abcdefgh|hgfedcb
|
||||
#define EXTRAPOLATE(x, maxV) \
|
||||
{ \
|
||||
(x) = min( mad24((maxV)-1,2,-(x))+1 , max((x),-(x)-1) ); \
|
||||
}
|
||||
#elif defined BORDER_REFLECT_101
|
||||
//gfedcb|abcdefgh|gfedcba
|
||||
#define EXTRAPOLATE(x, maxV) \
|
||||
{ \
|
||||
(x) = min( mad24((maxV)-1,2,-(x)), max((x),-(x)) ); \
|
||||
}
|
||||
#else
|
||||
#error No extrapolation method
|
||||
#endif
|
||||
|
||||
#ifdef BORDER_REFLECT
|
||||
//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb
|
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i)-1 : (i))
|
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
|
||||
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i)-1 : (i))
|
||||
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
|
||||
#define SRC(_x,_y) convert_float(((global SRCTYPE*)(Src+(_y)*SrcPitch))[_x])
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
//CCCCCC|abcdefgh|CCCCCCC
|
||||
#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
|
||||
#else
|
||||
#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
|
||||
#endif
|
||||
|
||||
#ifdef BORDER_REFLECT101
|
||||
//BORDER_REFLECT101: gfedcb|abcdefgh|gfedcba
|
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i) : (i))
|
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
|
||||
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i) : (i))
|
||||
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
|
||||
#endif
|
||||
#define DSTX(_x,_y) (((global float*)(DstX+DstXOffset+(_y)*DstXPitch))[_x])
|
||||
#define DSTY(_x,_y) (((global float*)(DstY+DstYOffset+(_y)*DstYPitch))[_x])
|
||||
|
||||
#ifdef BORDER_WRAP
|
||||
//BORDER_WRAP: cdefgh|abcdefgh|abcdefg
|
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (i)+(r_edge) : (i))
|
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
|
||||
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (i)+(b_edge) : (i))
|
||||
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
|
||||
#endif
|
||||
#define INIT_AND_READ_LOCAL_SOURCE(width, height, fill_const, kernel_border) \
|
||||
int srcX = x + srcOffsetX - (kernel_border); \
|
||||
int srcY = y + srcOffsetY - (kernel_border); \
|
||||
int xb = srcX; \
|
||||
int yb = srcY; \
|
||||
\
|
||||
EXTRAPOLATE(xb, (width)); \
|
||||
EXTRAPOLATE(yb, (height)); \
|
||||
lsmem[liy][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
|
||||
\
|
||||
if(lix < ((kernel_border)*2)) \
|
||||
{ \
|
||||
int xb = srcX+BLK_X; \
|
||||
EXTRAPOLATE(xb,(width)); \
|
||||
lsmem[liy][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
|
||||
} \
|
||||
if(liy< ((kernel_border)*2)) \
|
||||
{ \
|
||||
int yb = srcY+BLK_Y; \
|
||||
EXTRAPOLATE(yb, (height)); \
|
||||
lsmem[liy+BLK_Y][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
|
||||
} \
|
||||
if(lix<((kernel_border)*2) && liy<((kernel_border)*2)) \
|
||||
{ \
|
||||
int xb = srcX+BLK_X; \
|
||||
int yb = srcY+BLK_Y; \
|
||||
EXTRAPOLATE(xb,(width)); \
|
||||
EXTRAPOLATE(yb,(height)); \
|
||||
lsmem[liy+BLK_Y][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
|
||||
}
|
||||
|
||||
__kernel void sobel3(
|
||||
__global uchar* Src,
|
||||
__global float* DstX,
|
||||
__global float* DstY,
|
||||
int width, int height,
|
||||
uint srcStride, uint dstStride,
|
||||
float scale
|
||||
const uint SrcPitch,
|
||||
const int srcOffsetX,
|
||||
const int srcOffsetY,
|
||||
__global uchar* DstX,
|
||||
const int DstXOffset,
|
||||
const uint DstXPitch,
|
||||
__global uchar* DstY,
|
||||
const int DstYOffset,
|
||||
const uint DstYPitch,
|
||||
int width,
|
||||
int height,
|
||||
int dstWidth,
|
||||
int dstHeight,
|
||||
float scale
|
||||
)
|
||||
{
|
||||
__local float lsmem[BLK_Y+2][BLK_X+2];
|
||||
@ -47,62 +99,249 @@ __kernel void sobel3(
|
||||
int lix = get_local_id(0);
|
||||
int liy = get_local_id(1);
|
||||
|
||||
int gix = get_group_id(0);
|
||||
int giy = get_group_id(1);
|
||||
|
||||
int id_x = get_global_id(0);
|
||||
int id_y = get_global_id(1);
|
||||
|
||||
lsmem[liy+1][lix+1] = convert_float(Src[ id_y * srcStride + id_x ]);
|
||||
|
||||
int id_y_h = ADDR_H(id_y-1, 0,height);
|
||||
int id_y_b = ADDR_B(id_y+1, height,id_y+1);
|
||||
|
||||
int id_x_l = ADDR_L(id_x-1, 0,width);
|
||||
int id_x_r = ADDR_R(id_x+1, width,id_x+1);
|
||||
|
||||
if(liy==0)
|
||||
{
|
||||
lsmem[0][lix+1]=convert_float(Src[ id_y_h * srcStride + id_x ]);
|
||||
|
||||
if(lix==0)
|
||||
lsmem[0][0]=convert_float(Src[ id_y_h * srcStride + id_x_l ]);
|
||||
else if(lix==BLK_X-1)
|
||||
lsmem[0][BLK_X+1]=convert_float(Src[ id_y_h * srcStride + id_x_r ]);
|
||||
}
|
||||
else if(liy==BLK_Y-1)
|
||||
{
|
||||
lsmem[BLK_Y+1][lix+1]=convert_float(Src[ id_y_b * srcStride + id_x ]);
|
||||
|
||||
if(lix==0)
|
||||
lsmem[BLK_Y+1][0]=convert_float(Src[ id_y_b * srcStride + id_x_l ]);
|
||||
else if(lix==BLK_X-1)
|
||||
lsmem[BLK_Y+1][BLK_X+1]=convert_float(Src[ id_y_b * srcStride + id_x_r ]);
|
||||
}
|
||||
|
||||
if(lix==0)
|
||||
lsmem[liy+1][0] = convert_float(Src[ id_y * srcStride + id_x_l ]);
|
||||
else if(lix==BLK_X-1)
|
||||
lsmem[liy+1][BLK_X+1] = convert_float(Src[ id_y * srcStride + id_x_r ]);
|
||||
int x = (int)get_global_id(0);
|
||||
int y = (int)get_global_id(1);
|
||||
|
||||
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 1)
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if( x >= dstWidth || y >=dstHeight ) return;
|
||||
|
||||
float u1 = lsmem[liy][lix];
|
||||
float u2 = lsmem[liy][lix+1];
|
||||
float u3 = lsmem[liy][lix+2];
|
||||
|
||||
float m1 = lsmem[liy+1][lix];
|
||||
float m2 = lsmem[liy+1][lix+1];
|
||||
float m3 = lsmem[liy+1][lix+2];
|
||||
|
||||
float b1 = lsmem[liy+2][lix];
|
||||
float b2 = lsmem[liy+2][lix+1];
|
||||
float b3 = lsmem[liy+2][lix+2];
|
||||
|
||||
//m2 * scale;//
|
||||
float dx = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1 );
|
||||
DstX[ id_y * dstStride + id_x ] = dx * scale;
|
||||
//calc and store dx and dy;//
|
||||
#ifdef SCHARR
|
||||
DSTX(x,y) = mad(10.0f, m3 - m1, 3.0f * (u3 - u1 + b3 - b1)) * scale;
|
||||
DSTY(x,y) = mad(10.0f, b2 - u2, 3.0f * (b1 - u1 + b3 - u3)) * scale;
|
||||
#else
|
||||
DSTX(x,y) = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1) * scale;
|
||||
DSTY(x,y) = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3) * scale;
|
||||
#endif
|
||||
}
|
||||
|
||||
float dy = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3);
|
||||
DstY[ id_y * dstStride + id_x ] = dy * scale;
|
||||
}
|
||||
__kernel void sobel5(
|
||||
__global uchar* Src,
|
||||
const uint SrcPitch,
|
||||
const int srcOffsetX,
|
||||
const int srcOffsetY,
|
||||
__global uchar* DstX,
|
||||
const int DstXOffset,
|
||||
const uint DstXPitch,
|
||||
__global uchar* DstY,
|
||||
const int DstYOffset,
|
||||
const uint DstYPitch,
|
||||
int width,
|
||||
int height,
|
||||
int dstWidth,
|
||||
int dstHeight,
|
||||
float scale
|
||||
)
|
||||
{
|
||||
__local float lsmem[BLK_Y+4][BLK_X+4];
|
||||
|
||||
int lix = get_local_id(0);
|
||||
int liy = get_local_id(1);
|
||||
|
||||
int x = (int)get_global_id(0);
|
||||
int y = (int)get_global_id(1);
|
||||
|
||||
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 2)
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if( x >= dstWidth || y >=dstHeight ) return;
|
||||
|
||||
float t1 = lsmem[liy][lix];
|
||||
float t2 = lsmem[liy][lix+1];
|
||||
float t3 = lsmem[liy][lix+2];
|
||||
float t4 = lsmem[liy][lix+3];
|
||||
float t5 = lsmem[liy][lix+4];
|
||||
|
||||
float u1 = lsmem[liy+1][lix];
|
||||
float u2 = lsmem[liy+1][lix+1];
|
||||
float u3 = lsmem[liy+1][lix+2];
|
||||
float u4 = lsmem[liy+1][lix+3];
|
||||
float u5 = lsmem[liy+1][lix+4];
|
||||
|
||||
float m1 = lsmem[liy+2][lix];
|
||||
float m2 = lsmem[liy+2][lix+1];
|
||||
float m4 = lsmem[liy+2][lix+3];
|
||||
float m5 = lsmem[liy+2][lix+4];
|
||||
|
||||
float l1 = lsmem[liy+3][lix];
|
||||
float l2 = lsmem[liy+3][lix+1];
|
||||
float l3 = lsmem[liy+3][lix+2];
|
||||
float l4 = lsmem[liy+3][lix+3];
|
||||
float l5 = lsmem[liy+3][lix+4];
|
||||
|
||||
float b1 = lsmem[liy+4][lix];
|
||||
float b2 = lsmem[liy+4][lix+1];
|
||||
float b3 = lsmem[liy+4][lix+2];
|
||||
float b4 = lsmem[liy+4][lix+3];
|
||||
float b5 = lsmem[liy+4][lix+4];
|
||||
|
||||
//calc and store dx and dy;//
|
||||
DSTX(x,y) = scale *
|
||||
mad(12.0f, m4 - m2,
|
||||
mad(6.0f, m5 - m1,
|
||||
mad(8.0f, u4 - u2 + l4 - l2,
|
||||
mad(4.0f, u5 - u1 + l5 - l1,
|
||||
mad(2.0f, t4 - t2 + b4 - b2, t5 - t1 + b5 - b1 )
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
DSTY(x,y) = scale *
|
||||
mad(12.0f, l3 - u3,
|
||||
mad(6.0f, b3 - t3,
|
||||
mad(8.0f, l2 - u2 + l4 - u4,
|
||||
mad(4.0f, b2 - t2 + b4 - t4,
|
||||
mad(2.0f, l1 - u1 + l5 - u5, b1 - t1 + b5 - t5 )
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
__kernel void sobel7(
|
||||
__global uchar* Src,
|
||||
const uint SrcPitch,
|
||||
const int srcOffsetX,
|
||||
const int srcOffsetY,
|
||||
__global uchar* DstX,
|
||||
const int DstXOffset,
|
||||
const uint DstXPitch,
|
||||
__global uchar* DstY,
|
||||
const int DstYOffset,
|
||||
const uint DstYPitch,
|
||||
int width,
|
||||
int height,
|
||||
int dstWidth,
|
||||
int dstHeight,
|
||||
float scale
|
||||
)
|
||||
{
|
||||
__local float lsmem[BLK_Y+6][BLK_X+6];
|
||||
|
||||
int lix = get_local_id(0);
|
||||
int liy = get_local_id(1);
|
||||
|
||||
int x = (int)get_global_id(0);
|
||||
int y = (int)get_global_id(1);
|
||||
|
||||
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 3)
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if( x >= dstWidth || y >=dstHeight ) return;
|
||||
|
||||
float tt1 = lsmem[liy][lix];
|
||||
float tt2 = lsmem[liy][lix+1];
|
||||
float tt3 = lsmem[liy][lix+2];
|
||||
float tt4 = lsmem[liy][lix+3];
|
||||
float tt5 = lsmem[liy][lix+4];
|
||||
float tt6 = lsmem[liy][lix+5];
|
||||
float tt7 = lsmem[liy][lix+6];
|
||||
|
||||
float t1 = lsmem[liy+1][lix];
|
||||
float t2 = lsmem[liy+1][lix+1];
|
||||
float t3 = lsmem[liy+1][lix+2];
|
||||
float t4 = lsmem[liy+1][lix+3];
|
||||
float t5 = lsmem[liy+1][lix+4];
|
||||
float t6 = lsmem[liy+1][lix+5];
|
||||
float t7 = lsmem[liy+1][lix+6];
|
||||
|
||||
float u1 = lsmem[liy+2][lix];
|
||||
float u2 = lsmem[liy+2][lix+1];
|
||||
float u3 = lsmem[liy+2][lix+2];
|
||||
float u4 = lsmem[liy+2][lix+3];
|
||||
float u5 = lsmem[liy+2][lix+4];
|
||||
float u6 = lsmem[liy+2][lix+5];
|
||||
float u7 = lsmem[liy+2][lix+6];
|
||||
|
||||
float m1 = lsmem[liy+3][lix];
|
||||
float m2 = lsmem[liy+3][lix+1];
|
||||
float m3 = lsmem[liy+3][lix+2];
|
||||
float m5 = lsmem[liy+3][lix+4];
|
||||
float m6 = lsmem[liy+3][lix+5];
|
||||
float m7 = lsmem[liy+3][lix+6];
|
||||
|
||||
float l1 = lsmem[liy+4][lix];
|
||||
float l2 = lsmem[liy+4][lix+1];
|
||||
float l3 = lsmem[liy+4][lix+2];
|
||||
float l4 = lsmem[liy+4][lix+3];
|
||||
float l5 = lsmem[liy+4][lix+4];
|
||||
float l6 = lsmem[liy+4][lix+5];
|
||||
float l7 = lsmem[liy+4][lix+6];
|
||||
|
||||
float b1 = lsmem[liy+5][lix];
|
||||
float b2 = lsmem[liy+5][lix+1];
|
||||
float b3 = lsmem[liy+5][lix+2];
|
||||
float b4 = lsmem[liy+5][lix+3];
|
||||
float b5 = lsmem[liy+5][lix+4];
|
||||
float b6 = lsmem[liy+5][lix+5];
|
||||
float b7 = lsmem[liy+5][lix+6];
|
||||
|
||||
float bb1 = lsmem[liy+6][lix];
|
||||
float bb2 = lsmem[liy+6][lix+1];
|
||||
float bb3 = lsmem[liy+6][lix+2];
|
||||
float bb4 = lsmem[liy+6][lix+3];
|
||||
float bb5 = lsmem[liy+6][lix+4];
|
||||
float bb6 = lsmem[liy+6][lix+5];
|
||||
float bb7 = lsmem[liy+6][lix+6];
|
||||
|
||||
//calc and store dx and dy
|
||||
DSTX(x,y) = scale *
|
||||
mad(100.0f, m5 - m3,
|
||||
mad(80.0f, m6 - m2,
|
||||
mad(20.0f, m7 - m1,
|
||||
mad(75.0f, u5 - u3 + l5 - l3,
|
||||
mad(60.0f, u6 - u2 + l6 - l2,
|
||||
mad(15.0f, u7 - u1 + l7 - l1,
|
||||
mad(30.0f, t5 - t3 + b5 - b3,
|
||||
mad(24.0f, t6 - t2 + b6 - b2,
|
||||
mad(6.0f, t7 - t1 + b7 - b1,
|
||||
mad(5.0f, tt5 - tt3 + bb5 - bb3,
|
||||
mad(4.0f, tt6 - tt2 + bb6 - bb2, tt7 - tt1 + bb7 - bb1 )
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
DSTY(x,y) = scale *
|
||||
mad(100.0f, l4 - u4,
|
||||
mad(80.0f, b4 - t4,
|
||||
mad(20.0f, bb4 - tt4,
|
||||
mad(75.0f, l5 - u5 + l3 - u3,
|
||||
mad(60.0f, b5 - t5 + b3 - t3,
|
||||
mad(15.0f, bb5 - tt5 + bb3 - tt3,
|
||||
mad(30.0f, l6 - u6 + l2 - u2,
|
||||
mad(24.0f, b6 - t6 + b2 - t2,
|
||||
mad(6.0f, bb6 - tt6 + bb2 - tt2,
|
||||
mad(5.0f, l7 - u7 + l1 - u1,
|
||||
mad(4.0f, b7 - t7 + b1 - t1, bb7 - tt7 + bb1 - tt1 )
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
@ -50,6 +50,14 @@
|
||||
#define NTHREADS 256
|
||||
#define CV_PI_F 3.1415926535897932384626433832795f
|
||||
|
||||
#ifdef INTEL_DEVICE
|
||||
#define QANGLE_TYPE int
|
||||
#define QANGLE_TYPE2 int2
|
||||
#else
|
||||
#define QANGLE_TYPE uchar
|
||||
#define QANGLE_TYPE2 uchar2
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Histogram computation
|
||||
// 12 threads for a cell, 12x4 threads per block
|
||||
@ -59,7 +67,7 @@ __kernel void compute_hists_lut_kernel(
|
||||
const int cnbins, const int cblock_hist_size, const int img_block_width,
|
||||
const int blocks_in_group, const int blocks_total,
|
||||
const int grad_quadstep, const int qangle_step,
|
||||
__global const float* grad, __global const uchar* qangle,
|
||||
__global const float* grad, __global const QANGLE_TYPE* qangle,
|
||||
__global const float* gauss_w_lut,
|
||||
__global float* block_hists, __local float* smem)
|
||||
{
|
||||
@ -86,7 +94,7 @@ __kernel void compute_hists_lut_kernel(
|
||||
|
||||
__global const float* grad_ptr = (gid < blocks_total) ?
|
||||
grad + offset_y * grad_quadstep + (offset_x << 1) : grad;
|
||||
__global const uchar* qangle_ptr = (gid < blocks_total) ?
|
||||
__global const QANGLE_TYPE* qangle_ptr = (gid < blocks_total) ?
|
||||
qangle + offset_y * qangle_step + (offset_x << 1) : qangle;
|
||||
|
||||
__local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) +
|
||||
@ -101,7 +109,7 @@ __kernel void compute_hists_lut_kernel(
|
||||
for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
|
||||
{
|
||||
float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
|
||||
uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]);
|
||||
QANGLE_TYPE2 bin = (QANGLE_TYPE2) (qangle_ptr[0], qangle_ptr[1]);
|
||||
|
||||
grad_ptr += grad_quadstep;
|
||||
qangle_ptr += qangle_step;
|
||||
@ -558,7 +566,7 @@ __kernel void extract_descrs_by_cols_kernel(
|
||||
__kernel void compute_gradients_8UC4_kernel(
|
||||
const int height, const int width,
|
||||
const int img_step, const int grad_quadstep, const int qangle_step,
|
||||
const __global uchar4 * img, __global float * grad, __global uchar * qangle,
|
||||
const __global uchar4 * img, __global float * grad, __global QANGLE_TYPE * qangle,
|
||||
const float angle_scale, const char correct_gamma, const int cnbins)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
@ -660,7 +668,7 @@ __kernel void compute_gradients_8UC4_kernel(
|
||||
__kernel void compute_gradients_8UC1_kernel(
|
||||
const int height, const int width,
|
||||
const int img_step, const int grad_quadstep, const int qangle_step,
|
||||
__global const uchar * img, __global float * grad, __global uchar * qangle,
|
||||
__global const uchar * img, __global float * grad, __global QANGLE_TYPE * qangle,
|
||||
const float angle_scale, const char correct_gamma, const int cnbins)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
|
@ -116,7 +116,7 @@ Mat randomMat(RNG& rng, Size size, int type, double minVal, double maxVal, bool
|
||||
|
||||
Mat m(size0, type);
|
||||
|
||||
rng.fill(m, RNG::UNIFORM, Scalar::all(minVal), Scalar::all(maxVal));
|
||||
rng.fill(m, RNG::UNIFORM, minVal, maxVal);
|
||||
if( size0 == size )
|
||||
return m;
|
||||
return m(Rect((size0.width-size.width)/2, (size0.height-size.height)/2, size.width, size.height));
|
||||
@ -142,7 +142,7 @@ Mat randomMat(RNG& rng, const vector<int>& size, int type, double minVal, double
|
||||
|
||||
Mat m(dims, &size0[0], type);
|
||||
|
||||
rng.fill(m, RNG::UNIFORM, Scalar::all(minVal), Scalar::all(maxVal));
|
||||
rng.fill(m, RNG::UNIFORM, minVal, maxVal);
|
||||
if( eqsize )
|
||||
return m;
|
||||
return m(&r[0]);
|
||||
|
@ -28,14 +28,11 @@ set(CMAKE_MODULE_LINKER_FLAGS "-Wl,--fix-cortex-a8 -Wl,--no-undefined -Wl,--gc-s
|
||||
set(CMAKE_EXE_LINKER_FLAGS "-Wl,--fix-cortex-a8 -Wl,--no-undefined -Wl,--gc-sections -Wl,-z,noexecstack -Wl,-z,relro -Wl,-z,now ${CMAKE_EXE_LINKER_FLAGS}")
|
||||
|
||||
if(USE_NEON)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon")
|
||||
message(WARNING "You use obsolete variable USE_NEON to enable NEON instruction set. Use -DENABLE_NEON=ON instead." )
|
||||
set(ENABLE_NEON TRUE)
|
||||
elseif(USE_VFPV3)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=vfpv3")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfpv3")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=vfpv3-d16")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfpv3-d16")
|
||||
message(WARNING "You use obsolete variable USE_VFPV3 to enable VFPV3 instruction set. Use -DENABLE_VFPV3=ON instead." )
|
||||
set(ENABLE_VFPV3 TRUE)
|
||||
endif()
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH ${CMAKE_FIND_ROOT_PATH} ${ARM_LINUX_SYSROOT})
|
||||
|
376
samples/cpp/intelperc_capture.cpp
Normal file
376
samples/cpp/intelperc_capture.cpp
Normal file
@ -0,0 +1,376 @@
|
||||
// testOpenCVCam.cpp : Defines the entry point for the console application.
|
||||
//
|
||||
|
||||
#include "opencv2/highgui/highgui.hpp"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
using namespace cv;
|
||||
using namespace std;
|
||||
|
||||
static bool g_printStreamSetting = false;
|
||||
static int g_imageStreamProfileIdx = -1;
|
||||
static int g_depthStreamProfileIdx = -1;
|
||||
static bool g_irStreamShow = false;
|
||||
static double g_imageBrightness = -DBL_MAX;
|
||||
static double g_imageContrast = -DBL_MAX;
|
||||
static bool g_printTiming = false;
|
||||
static bool g_showClosedPoint = false;
|
||||
|
||||
|
||||
static int g_closedDepthPoint[2];
|
||||
|
||||
static void printUsage(const char *arg0)
|
||||
{
|
||||
const char *filename = arg0;
|
||||
while (*filename)
|
||||
filename++;
|
||||
while ((arg0 <= filename) && ('\\' != *filename) && ('/' != *filename))
|
||||
filename--;
|
||||
filename++;
|
||||
|
||||
cout << "This program demonstrates usage of camera supported\nby Intel Perceptual computing SDK." << endl << endl;
|
||||
cout << "usage: " << filename << "[-ps] [-isp IDX] [-dsp IDX]\n [-ir] [-imb VAL] [-imc VAL]" << endl << endl;
|
||||
cout << " -ps, print streams setting and profiles" << endl;
|
||||
cout << " -isp IDX, set profile index of the image stream" << endl;
|
||||
cout << " -dsp IDX, set profile index of the depth stream" << endl;
|
||||
cout << " -ir, show data from IR stream" << endl;
|
||||
cout << " -imb VAL, set brighness value for a image stream" << endl;
|
||||
cout << " -imc VAL, set contrast value for a image stream" << endl;
|
||||
cout << " -pts, print frame index and frame time" << endl;
|
||||
cout << " --show-closed, print frame index and frame time" << endl;
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
static void parseCMDLine(int argc, char* argv[])
|
||||
{
|
||||
if( argc == 1 )
|
||||
{
|
||||
printUsage(argv[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
for( int i = 1; i < argc; i++ )
|
||||
{
|
||||
if ((0 == strcmp(argv[i], "--help")) || (0 == strcmp( argv[i], "-h")))
|
||||
{
|
||||
printUsage(argv[0]);
|
||||
exit(0);
|
||||
}
|
||||
else if ((0 == strcmp( argv[i], "--print-streams")) || (0 == strcmp( argv[i], "-ps")))
|
||||
{
|
||||
g_printStreamSetting = true;
|
||||
}
|
||||
else if ((0 == strcmp( argv[i], "--image-stream-prof")) || (0 == strcmp( argv[i], "-isp")))
|
||||
{
|
||||
g_imageStreamProfileIdx = atoi(argv[++i]);
|
||||
}
|
||||
else if ((0 == strcmp( argv[i], "--depth-stream-prof")) || (0 == strcmp( argv[i], "-dsp")))
|
||||
{
|
||||
g_depthStreamProfileIdx = atoi(argv[++i]);
|
||||
}
|
||||
else if (0 == strcmp( argv[i], "-ir"))
|
||||
{
|
||||
g_irStreamShow = true;
|
||||
}
|
||||
else if (0 == strcmp( argv[i], "-imb"))
|
||||
{
|
||||
g_imageBrightness = atof(argv[++i]);
|
||||
}
|
||||
else if (0 == strcmp( argv[i], "-imc"))
|
||||
{
|
||||
g_imageContrast = atof(argv[++i]);
|
||||
}
|
||||
else if (0 == strcmp(argv[i], "-pts"))
|
||||
{
|
||||
g_printTiming = true;
|
||||
}
|
||||
else if (0 == strcmp(argv[i], "--show-closed"))
|
||||
{
|
||||
g_showClosedPoint = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Unsupported command line argument: " << argv[i] << "." << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
if (g_showClosedPoint && (-1 == g_depthStreamProfileIdx))
|
||||
{
|
||||
cerr << "For --show-closed depth profile has be selected" << endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void printStreamProperties(VideoCapture &capture)
|
||||
{
|
||||
size_t profilesCount = (size_t)capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_INTELPERC_PROFILE_COUNT);
|
||||
cout << "Image stream." << endl;
|
||||
cout << " Brightness = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BRIGHTNESS) << endl;
|
||||
cout << " Contrast = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_CONTRAST) << endl;
|
||||
cout << " Saturation = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_SATURATION) << endl;
|
||||
cout << " Hue = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_HUE) << endl;
|
||||
cout << " Gamma = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_GAMMA) << endl;
|
||||
cout << " Sharpness = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_SHARPNESS) << endl;
|
||||
cout << " Gain = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_GAIN) << endl;
|
||||
cout << " Backligh = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BACKLIGHT) << endl;
|
||||
cout << "Image streams profiles:" << endl;
|
||||
for (size_t i = 0; i < profilesCount; i++)
|
||||
{
|
||||
capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)i);
|
||||
cout << " Profile[" << i << "]: ";
|
||||
cout << "width = " <<
|
||||
(int)capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_FRAME_WIDTH);
|
||||
cout << ", height = " <<
|
||||
(int)capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_FRAME_HEIGHT);
|
||||
cout << ", fps = " <<
|
||||
capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_FPS);
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
profilesCount = (size_t)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_COUNT);
|
||||
cout << "Depth stream." << endl;
|
||||
cout << " Low confidence value = " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE) << endl;
|
||||
cout << " Saturation value = " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE) << endl;
|
||||
cout << " Confidence threshold = " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD) << endl;
|
||||
cout << " Focal length = (" << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ) << ", "
|
||||
<< capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT) << ")" << endl;
|
||||
cout << "Depth streams profiles:" << endl;
|
||||
for (size_t i = 0; i < profilesCount; i++)
|
||||
{
|
||||
capture.set(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)i);
|
||||
cout << " Profile[" << i << "]: ";
|
||||
cout << "width = " <<
|
||||
(int)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_FRAME_WIDTH);
|
||||
cout << ", height = " <<
|
||||
(int)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_FRAME_HEIGHT);
|
||||
cout << ", fps = " <<
|
||||
capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_FPS);
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
|
||||
static void imshowImage(const char *winname, Mat &image, VideoCapture &capture)
|
||||
{
|
||||
if (g_showClosedPoint)
|
||||
{
|
||||
Mat uvMap;
|
||||
if (capture.retrieve(uvMap, CAP_INTELPERC_UVDEPTH_MAP))
|
||||
{
|
||||
float *uvmap = (float *)uvMap.ptr() + 2 * (g_closedDepthPoint[0] * uvMap.cols + g_closedDepthPoint[1]);
|
||||
int x = (int)((*uvmap) * image.cols); uvmap++;
|
||||
int y = (int)((*uvmap) * image.rows);
|
||||
|
||||
if ((0 <= x) && (0 <= y))
|
||||
{
|
||||
static const int pointSize = 4;
|
||||
for (int row = y; row < min(y + pointSize, image.rows); row++)
|
||||
{
|
||||
uchar* ptrDst = image.ptr(row) + x * 3 + 2;//+2 -> Red
|
||||
for (int col = 0; col < min(pointSize, image.cols - x); col++, ptrDst+=3)
|
||||
{
|
||||
*ptrDst = 255;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
imshow(winname, image);
|
||||
}
|
||||
static void imshowIR(const char *winname, Mat &ir)
|
||||
{
|
||||
Mat image;
|
||||
if (g_showClosedPoint)
|
||||
{
|
||||
image.create(ir.rows, ir.cols, CV_8UC3);
|
||||
for (int row = 0; row < ir.rows; row++)
|
||||
{
|
||||
uchar* ptrDst = image.ptr(row);
|
||||
short* ptrSrc = (short*)ir.ptr(row);
|
||||
for (int col = 0; col < ir.cols; col++, ptrSrc++)
|
||||
{
|
||||
uchar val = (uchar) ((*ptrSrc) >> 2);
|
||||
*ptrDst = val; ptrDst++;
|
||||
*ptrDst = val; ptrDst++;
|
||||
*ptrDst = val; ptrDst++;
|
||||
}
|
||||
}
|
||||
|
||||
static const int pointSize = 4;
|
||||
for (int row = g_closedDepthPoint[0]; row < min(g_closedDepthPoint[0] + pointSize, image.rows); row++)
|
||||
{
|
||||
uchar* ptrDst = image.ptr(row) + g_closedDepthPoint[1] * 3 + 2;//+2 -> Red
|
||||
for (int col = 0; col < min(pointSize, image.cols - g_closedDepthPoint[1]); col++, ptrDst+=3)
|
||||
{
|
||||
*ptrDst = 255;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
image.create(ir.rows, ir.cols, CV_8UC1);
|
||||
for (int row = 0; row < ir.rows; row++)
|
||||
{
|
||||
uchar* ptrDst = image.ptr(row);
|
||||
short* ptrSrc = (short*)ir.ptr(row);
|
||||
for (int col = 0; col < ir.cols; col++, ptrSrc++, ptrDst++)
|
||||
{
|
||||
*ptrDst = (uchar) ((*ptrSrc) >> 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
imshow(winname, image);
|
||||
}
|
||||
static void imshowDepth(const char *winname, Mat &depth, VideoCapture &capture)
|
||||
{
|
||||
short lowValue = (short)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE);
|
||||
short saturationValue = (short)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE);
|
||||
|
||||
Mat image;
|
||||
if (g_showClosedPoint)
|
||||
{
|
||||
image.create(depth.rows, depth.cols, CV_8UC3);
|
||||
for (int row = 0; row < depth.rows; row++)
|
||||
{
|
||||
uchar* ptrDst = image.ptr(row);
|
||||
short* ptrSrc = (short*)depth.ptr(row);
|
||||
for (int col = 0; col < depth.cols; col++, ptrSrc++)
|
||||
{
|
||||
if ((lowValue == (*ptrSrc)) || (saturationValue == (*ptrSrc)))
|
||||
{
|
||||
*ptrDst = 0; ptrDst++;
|
||||
*ptrDst = 0; ptrDst++;
|
||||
*ptrDst = 0; ptrDst++;
|
||||
}
|
||||
else
|
||||
{
|
||||
uchar val = (uchar) ((*ptrSrc) >> 2);
|
||||
*ptrDst = val; ptrDst++;
|
||||
*ptrDst = val; ptrDst++;
|
||||
*ptrDst = val; ptrDst++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const int pointSize = 4;
|
||||
for (int row = g_closedDepthPoint[0]; row < min(g_closedDepthPoint[0] + pointSize, image.rows); row++)
|
||||
{
|
||||
uchar* ptrDst = image.ptr(row) + g_closedDepthPoint[1] * 3 + 2;//+2 -> Red
|
||||
for (int col = 0; col < min(pointSize, image.cols - g_closedDepthPoint[1]); col++, ptrDst+=3)
|
||||
{
|
||||
*ptrDst = 255;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
image.create(depth.rows, depth.cols, CV_8UC1);
|
||||
for (int row = 0; row < depth.rows; row++)
|
||||
{
|
||||
uchar* ptrDst = image.ptr(row);
|
||||
short* ptrSrc = (short*)depth.ptr(row);
|
||||
for (int col = 0; col < depth.cols; col++, ptrSrc++, ptrDst++)
|
||||
{
|
||||
if ((lowValue == (*ptrSrc)) || (saturationValue == (*ptrSrc)))
|
||||
*ptrDst = 0;
|
||||
else
|
||||
*ptrDst = (uchar) ((*ptrSrc) >> 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
imshow(winname, image);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
parseCMDLine(argc, argv);
|
||||
|
||||
VideoCapture capture;
|
||||
capture.open(CAP_INTELPERC);
|
||||
if (!capture.isOpened())
|
||||
{
|
||||
cerr << "Can not open a capture object." << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (g_printStreamSetting)
|
||||
printStreamProperties(capture);
|
||||
|
||||
if (-1 != g_imageStreamProfileIdx)
|
||||
{
|
||||
if (!capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)g_imageStreamProfileIdx))
|
||||
{
|
||||
cerr << "Can not setup a image stream." << endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (-1 != g_depthStreamProfileIdx)
|
||||
{
|
||||
if (!capture.set(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)g_depthStreamProfileIdx))
|
||||
{
|
||||
cerr << "Can not setup a depth stream." << endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else if (g_irStreamShow)
|
||||
{
|
||||
if (!capture.set(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, 0.0))
|
||||
{
|
||||
cerr << "Can not setup a IR stream." << endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Streams not selected" << endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//Setup additional properies only after set profile of the stream
|
||||
if ( (-10000.0 < g_imageBrightness) && (g_imageBrightness < 10000.0))
|
||||
capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BRIGHTNESS, g_imageBrightness);
|
||||
if ( (0 < g_imageContrast) && (g_imageContrast < 10000.0))
|
||||
capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BRIGHTNESS, g_imageContrast);
|
||||
|
||||
int frame = 0;
|
||||
for(;;frame++)
|
||||
{
|
||||
Mat bgrImage;
|
||||
Mat depthImage;
|
||||
Mat irImage;
|
||||
|
||||
if (!capture.grab())
|
||||
{
|
||||
cout << "Can not grab images." << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((-1 != g_depthStreamProfileIdx) && (capture.retrieve(depthImage, CAP_INTELPERC_DEPTH_MAP)))
|
||||
{
|
||||
if (g_showClosedPoint)
|
||||
{
|
||||
double minVal = 0.0; double maxVal = 0.0;
|
||||
minMaxIdx(depthImage, &minVal, &maxVal, g_closedDepthPoint);
|
||||
}
|
||||
imshowDepth("depth image", depthImage, capture);
|
||||
}
|
||||
if ((g_irStreamShow) && (capture.retrieve(irImage, CAP_INTELPERC_IR_MAP)))
|
||||
imshowIR("ir image", irImage);
|
||||
if ((-1 != g_imageStreamProfileIdx) && (capture.retrieve(bgrImage, CAP_INTELPERC_IMAGE)))
|
||||
imshowImage("color image", bgrImage, capture);
|
||||
|
||||
if (g_printTiming)
|
||||
{
|
||||
cout << "Image frame: " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_POS_FRAMES)
|
||||
<< ", Depth(IR) frame: " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_POS_FRAMES) << endl;
|
||||
cout << "Image frame: " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_POS_MSEC)
|
||||
<< ", Depth(IR) frame: " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_POS_MSEC) << endl;
|
||||
}
|
||||
if( waitKey(30) >= 0 )
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -32,13 +32,13 @@ int main()
|
||||
for (int i = 0; i < image.rows; ++i)
|
||||
for (int j = 0; j < image.cols; ++j)
|
||||
{
|
||||
Mat sampleMat = (Mat_<float>(1,2) << i,j);
|
||||
Mat sampleMat = (Mat_<float>(1,2) << j,i);
|
||||
float response = SVM.predict(sampleMat);
|
||||
|
||||
if (response == 1)
|
||||
image.at<Vec3b>(j, i) = green;
|
||||
image.at<Vec3b>(i,j) = green;
|
||||
else if (response == -1)
|
||||
image.at<Vec3b>(j, i) = blue;
|
||||
image.at<Vec3b>(i,j) = blue;
|
||||
}
|
||||
|
||||
// Show the training data
|
||||
|
@ -14,7 +14,10 @@
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
|
||||
#define LOOP_NUM 1
|
||||
#define MAX_THREADS 10
|
||||
|
||||
|
||||
///////////////////////////single-threading faces detecting///////////////////////////////
|
||||
|
||||
@ -29,23 +32,23 @@ const static Scalar colors[] = { CV_RGB(0,0,255),
|
||||
} ;
|
||||
|
||||
|
||||
int64 work_begin = 0;
|
||||
int64 work_end = 0;
|
||||
int64 work_begin[MAX_THREADS] = {0};
|
||||
int64 work_total[MAX_THREADS] = {0};
|
||||
string inputName, outputName, cascadeName;
|
||||
|
||||
static void workBegin()
|
||||
static void workBegin(int i = 0)
|
||||
{
|
||||
work_begin = getTickCount();
|
||||
work_begin[i] = getTickCount();
|
||||
}
|
||||
|
||||
static void workEnd()
|
||||
static void workEnd(int i = 0)
|
||||
{
|
||||
work_end += (getTickCount() - work_begin);
|
||||
work_total[i] += (getTickCount() - work_begin[i]);
|
||||
}
|
||||
|
||||
static double getTime()
|
||||
static double getTotalTime(int i = 0)
|
||||
{
|
||||
return work_end /((double)cvGetTickFrequency() * 1000.);
|
||||
return work_total[i] /getTickFrequency() * 1000.;
|
||||
}
|
||||
|
||||
|
||||
@ -98,7 +101,6 @@ static int facedetect_one_thread(bool useCPU, double scale )
|
||||
}
|
||||
}
|
||||
|
||||
cvNamedWindow( "result", 1 );
|
||||
if( capture )
|
||||
{
|
||||
cout << "In capture ..." << endl;
|
||||
@ -118,7 +120,6 @@ static int facedetect_one_thread(bool useCPU, double scale )
|
||||
else
|
||||
resize(frameCopy0, frameCopy, Size(), 1./scale, 1./scale, INTER_LINEAR);
|
||||
|
||||
work_end = 0;
|
||||
if(useCPU)
|
||||
detectCPU(frameCopy, faces, cpu_cascade, 1);
|
||||
else
|
||||
@ -132,16 +133,16 @@ static int facedetect_one_thread(bool useCPU, double scale )
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "In image read" << endl;
|
||||
cout << "In image read " << image.size() << endl;
|
||||
vector<Rect> faces;
|
||||
vector<Rect> ref_rst;
|
||||
double accuracy = 0.;
|
||||
detectCPU(image, ref_rst, cpu_cascade, scale);
|
||||
work_end = 0;
|
||||
|
||||
cout << "loops: ";
|
||||
for(int i = 0; i <= LOOP_NUM; i ++)
|
||||
{
|
||||
cout << "loop" << i << endl;
|
||||
cout << i << ", ";
|
||||
if(useCPU)
|
||||
detectCPU(image, faces, cpu_cascade, scale);
|
||||
else
|
||||
@ -152,16 +153,15 @@ static int facedetect_one_thread(bool useCPU, double scale )
|
||||
accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
|
||||
}
|
||||
}
|
||||
if (i == LOOP_NUM)
|
||||
{
|
||||
if (useCPU)
|
||||
cout << "average CPU time (noCamera) : ";
|
||||
else
|
||||
cout << "average GPU time (noCamera) : ";
|
||||
cout << getTime() / LOOP_NUM << " ms" << endl;
|
||||
cout << "accuracy value: " << accuracy <<endl;
|
||||
}
|
||||
}
|
||||
cout << "done!" << endl;
|
||||
if (useCPU)
|
||||
cout << "average CPU time (noCamera) : ";
|
||||
else
|
||||
cout << "average GPU time (noCamera) : ";
|
||||
cout << getTotalTime() / LOOP_NUM << " ms" << endl;
|
||||
cout << "accuracy value: " << accuracy <<endl;
|
||||
|
||||
Draw(image, faces, scale);
|
||||
waitKey(0);
|
||||
}
|
||||
@ -174,9 +174,7 @@ static int facedetect_one_thread(bool useCPU, double scale )
|
||||
///////////////////////////////////////detectfaces with multithreading////////////////////////////////////////////
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1700)
|
||||
|
||||
#define MAX_THREADS 10
|
||||
|
||||
static void detectFaces(std::string fileName)
|
||||
static void detectFaces(std::string fileName, int threadNum)
|
||||
{
|
||||
ocl::OclCascadeClassifier cascade;
|
||||
if(!cascade.load(cascadeName))
|
||||
@ -188,7 +186,7 @@ static void detectFaces(std::string fileName)
|
||||
Mat img = imread(fileName, CV_LOAD_IMAGE_COLOR);
|
||||
if (img.empty())
|
||||
{
|
||||
std::cout << "cann't open file " + fileName <<std::endl;
|
||||
std::cout << '[' << threadNum << "] " << "can't open file " + fileName <<std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -196,23 +194,37 @@ static void detectFaces(std::string fileName)
|
||||
d_img.upload(img);
|
||||
|
||||
std::vector<Rect> oclfaces;
|
||||
cascade.detectMultiScale(d_img, oclfaces, 1.1, 3, 0 | CASCADE_SCALE_IMAGE, Size(30, 30), Size(0, 0));
|
||||
std::thread::id tid = std::this_thread::get_id();
|
||||
std::cout << '[' << threadNum << "] "
|
||||
<< "ThreadID = " << tid
|
||||
<< ", CommandQueue = " << *(void**)ocl::getClCommandQueuePtr()
|
||||
<< endl;
|
||||
for(int i = 0; i <= LOOP_NUM; i++)
|
||||
{
|
||||
if(i>0) workBegin(threadNum);
|
||||
cascade.detectMultiScale(d_img, oclfaces, 1.1, 3, 0|CASCADE_SCALE_IMAGE, Size(30, 30), Size(0, 0));
|
||||
if(i>0) workEnd(threadNum);
|
||||
}
|
||||
std::cout << '[' << threadNum << "] " << "Average time = " << getTotalTime(threadNum) / LOOP_NUM << " ms" << endl;
|
||||
|
||||
for(unsigned int i = 0; i<oclfaces.size(); i++)
|
||||
rectangle(img, Point(oclfaces[i].x, oclfaces[i].y), Point(oclfaces[i].x + oclfaces[i].width, oclfaces[i].y + oclfaces[i].height), colors[i%8], 3);
|
||||
|
||||
std::string::size_type pos = outputName.rfind('.');
|
||||
std::string outputNameTid = outputName + '-' + std::to_string(_threadid);
|
||||
if(pos == std::string::npos)
|
||||
std::string strTid = std::to_string(_threadid);
|
||||
if( !outputName.empty() )
|
||||
{
|
||||
std::cout << "Invalid output file name: " << outputName << std::endl;
|
||||
if(pos == std::string::npos)
|
||||
{
|
||||
std::cout << "Invalid output file name: " << outputName << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string outputNameTid = outputName.substr(0, pos) + "_" + strTid + outputName.substr(pos);
|
||||
imwrite(outputNameTid, img);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
outputNameTid = outputName.substr(0, pos) + "_" + std::to_string(_threadid) + outputName.substr(pos);
|
||||
imwrite(outputNameTid, img);
|
||||
}
|
||||
imshow(outputNameTid, img);
|
||||
imshow(strTid, img);
|
||||
waitKey(0);
|
||||
}
|
||||
|
||||
@ -221,7 +233,7 @@ static void facedetect_multithreading(int nthreads)
|
||||
int thread_number = MAX_THREADS < nthreads ? MAX_THREADS : nthreads;
|
||||
std::vector<std::thread> threads;
|
||||
for(int i = 0; i<thread_number; i++)
|
||||
threads.push_back(std::thread(detectFaces, inputName));
|
||||
threads.push_back(std::thread(detectFaces, inputName, i));
|
||||
for(int i = 0; i<thread_number; i++)
|
||||
threads[i].join();
|
||||
}
|
||||
@ -237,8 +249,7 @@ int main( int argc, const char** argv )
|
||||
" specify template file path }"
|
||||
"{ c scale | 1.0 | scale image }"
|
||||
"{ s use_cpu | false | use cpu or gpu to process the image }"
|
||||
"{ o output | facedetect_output.jpg |"
|
||||
" specify output image save path(only works when input is images) }"
|
||||
"{ o output | | specify output image save path(only works when input is images) }"
|
||||
"{ n thread_num | 1 | set number of threads >= 1 }";
|
||||
|
||||
CommandLineParser cmd(argc, argv, keys);
|
||||
@ -312,8 +323,6 @@ void detectCPU( Mat& img, vector<Rect>& faces,
|
||||
void Draw(Mat& img, vector<Rect>& faces, double scale)
|
||||
{
|
||||
int i = 0;
|
||||
putText(img, format("fps: %.1f", 1000./getTime()), Point(450, 50),
|
||||
FONT_HERSHEY_SIMPLEX, 1, Scalar(0,255,0), 3);
|
||||
for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
|
||||
{
|
||||
Point center;
|
||||
@ -324,8 +333,8 @@ void Draw(Mat& img, vector<Rect>& faces, double scale)
|
||||
radius = cvRound((r->width + r->height)*0.25*scale);
|
||||
circle( img, center, radius, color, 3, 8, 0 );
|
||||
}
|
||||
//imwrite( outputName, img );
|
||||
if(abs(scale-1.0)>.001)
|
||||
//if( !outputName.empty() ) imwrite( outputName, img );
|
||||
if( abs(scale-1.0)>.001 )
|
||||
{
|
||||
resize(img, img, Size((int)(img.cols/scale), (int)(img.rows/scale)));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user