Merge pull request #2045 from SpecLad:merge-2.4

This commit is contained in:
Roman Donchenko 2013-12-25 16:18:00 +04:00 committed by OpenCV Buildbot
commit faddd5b97f
41 changed files with 3627 additions and 952 deletions

View File

@ -156,6 +156,7 @@ OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" ON
OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF WIN32 )
OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF WIN32 )
# OpenCV build components
@ -207,7 +208,8 @@ OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions"
OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND ARM) )
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND ARM )
OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND ARM )
OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF )
OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF )
OCV_OPTION(ENABLE_WINRT_MODE "Build with Windows Runtime support" OFF IF WIN32 )
@ -226,6 +228,15 @@ include(cmake/OpenCVVersion.cmake)
# Save libs and executables in the same place
set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}/bin" CACHE PATH "Output directory for applications" )
if (ANDROID)
if (ANDROID_ABI MATCHES "NEON")
set(ENABLE_NEON ON)
endif()
if (ANDROID_ABI MATCHES "VFPV3")
set(ENABLE_VFPV3 ON)
endif()
endif()
if(ANDROID OR WIN32)
set(OPENCV_DOC_INSTALL_PATH doc)
elseif(INSTALL_TO_MANGLED_PATHS)
@ -820,6 +831,11 @@ if(DEFINED WITH_XINE)
status(" Xine:" HAVE_XINE THEN "YES (ver ${ALIASOF_libxine_VERSION})" ELSE NO)
endif(DEFINED WITH_XINE)
if(DEFINED WITH_INTELPERC)
status(" Intel PerC:" HAVE_INTELPERC THEN "YES" ELSE NO)
endif(DEFINED WITH_INTELPERC)
# ========================== Other third-party libraries ==========================
status("")
status(" Other third-party libraries:")

View File

@ -124,6 +124,12 @@ if(CMAKE_COMPILER_IS_GNUCXX)
if(ENABLE_SSE2)
add_extra_compiler_option(-msse2)
endif()
if (ENABLE_NEON)
add_extra_compiler_option("-mfpu=neon")
endif()
if (ENABLE_VFPV3 AND NOT ENABLE_NEON)
add_extra_compiler_option("-mfpu=vfpv3")
endif()
# SSE3 and further should be disabled under MingW because it generates compiler errors
if(NOT MINGW)

View File

@ -0,0 +1,20 @@
# Main variables:
# INTELPERC_LIBRARIES and INTELPERC_INCLUDE to link Intel Perceptial Computing SDK modules
# HAVE_INTELPERC for conditional compilation OpenCV with/without Intel Perceptial Computing SDK
if(X86_64)
find_path(INTELPERC_INCLUDE_DIR "pxcsession.h" PATHS "$ENV{PCSDK_DIR}include" DOC "Path to Intel Perceptual Computing SDK interface headers")
find_file(INTELPERC_LIBRARIES "libpxc.lib" PATHS "$ENV{PCSDK_DIR}lib/x64" DOC "Path to Intel Perceptual Computing SDK interface libraries")
else()
find_path(INTELPERC_INCLUDE_DIR "pxcsession.h" PATHS "$ENV{PCSDK_DIR}include" DOC "Path to Intel Perceptual Computing SDK interface headers")
find_file(INTELPERC_LIBRARIES "libpxc.lib" PATHS "$ENV{PCSDK_DIR}lib/Win32" DOC "Path to Intel Perceptual Computing SDK interface libraries")
endif()
if(INTELPERC_INCLUDE_DIR AND INTELPERC_LIBRARIES)
set(HAVE_INTELPERC TRUE)
else()
set(HAVE_INTELPERC FALSE)
message(WARNING "Intel Perceptual Computing SDK library directory (set by INTELPERC_LIB_DIR variable) is not found or does not have Intel Perceptual Computing SDK libraries.")
endif() #if(INTELPERC_INCLUDE_DIR AND INTELPERC_LIBRARIES)
mark_as_advanced(FORCE INTELPERC_LIBRARIES INTELPERC_INCLUDE_DIR)

View File

@ -277,3 +277,8 @@ if (NOT IOS)
set(HAVE_QTKIT YES)
endif()
endif()
# --- Intel Perceptual Computing SDK ---
if(WITH_INTELPERC)
include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindIntelPerCSDK.cmake")
endif(WITH_INTELPERC)

View File

@ -88,6 +88,9 @@
/* Define to 1 if you have the <inttypes.h> header file. */
#cmakedefine HAVE_INTTYPES_H 1
/* Intel Perceptual Computing SDK library */
#cmakedefine HAVE_INTELPERC
/* Intel Integrated Performance Primitives */
#cmakedefine HAVE_IPP

View File

@ -106,8 +106,8 @@ Enable hardware optimizations
-----------------------------
Depending on target platform architecture different instruction sets can be used. By default
compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DUSE_VFPV3=ON``
to cmake command line to enable code generation for VFPv3 and ``-DUSE_NEON=ON`` for using
compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DENABLE_VFPV3=ON``
to cmake command line to enable code generation for VFPv3 and ``-DENABLE_NEON=ON`` for using
NEON SIMD extensions.
TBB is supported on multi core ARM SoCs also.

View File

@ -0,0 +1,79 @@
*******
HighGUI
*******
.. highlight:: cpp
Using Creative Senz3D and other Intel Perceptual Computing SDK compatible depth sensors
=======================================================================================
Depth sensors compatible with Intel Perceptual Computing SDK are supported through ``VideoCapture`` class. Depth map, RGB image and some other formats of output can be retrieved by using familiar interface of ``VideoCapture``.
In order to use depth sensor with OpenCV you should do the following preliminary steps:
#.
Install Intel Perceptual Computing SDK (from here http://www.intel.com/software/perceptual).
#.
Configure OpenCV with Intel Perceptual Computing SDK support by setting ``WITH_INTELPERC`` flag in CMake. If Intel Perceptual Computing SDK is found in install folders OpenCV will be built with Intel Perceptual Computing SDK library (see a status ``INTELPERC`` in CMake log). If CMake process doesn't find Intel Perceptual Computing SDK installation folder automatically, the user should change corresponding CMake variables ``INTELPERC_LIB_DIR`` and ``INTELPERC_INCLUDE_DIR`` to the proper value.
#.
Build OpenCV.
VideoCapture can retrieve the following data:
#.
data given from depth generator:
* ``CV_CAP_INTELPERC_DEPTH_MAP`` - each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth. (CV_16UC1)
* ``CV_CAP_INTELPERC_UVDEPTH_MAP`` - each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates. (CV_32FC2)
* ``CV_CAP_INTELPERC_IR_MAP`` - each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam. (CV_16UC1)
#.
data given from RGB image generator:
* ``CV_CAP_INTELPERC_IMAGE`` - color image. (CV_8UC3)
In order to get depth map from depth sensor use ``VideoCapture::operator >>``, e. g. ::
VideoCapture capture( CV_CAP_INTELPERC );
for(;;)
{
Mat depthMap;
capture >> depthMap;
if( waitKey( 30 ) >= 0 )
break;
}
For getting several data maps use ``VideoCapture::grab`` and ``VideoCapture::retrieve``, e.g. ::
VideoCapture capture(CV_CAP_INTELPERC);
for(;;)
{
Mat depthMap;
Mat image;
Mat irImage;
capture.grab();
capture.retrieve( depthMap, CV_CAP_INTELPERC_DEPTH_MAP );
capture.retrieve( image, CV_CAP_INTELPERC_IMAGE );
capture.retrieve( irImage, CV_CAP_INTELPERC_IR_MAP);
if( waitKey( 30 ) >= 0 )
break;
}
For setting and getting some property of sensor` data generators use ``VideoCapture::set`` and ``VideoCapture::get`` methods respectively, e.g. ::
VideoCapture capture( CV_CAP_INTELPERC );
capture.set( CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_PROFILE_IDX, 0 );
cout << "FPS " << capture.get( CV_CAP_INTELPERC_DEPTH_GENERATOR+CV_CAP_PROP_FPS ) << endl;
Since two types of sensor's data generators are supported (image generator and depth generator), there are two flags that should be used to set/get property of the needed generator:
* CV_CAP_INTELPERC_IMAGE_GENERATOR -- a flag for access to the image generator properties.
* CV_CAP_INTELPERC_DEPTH_GENERATOR -- a flag for access to the depth generator properties. This flag value is assumed by default if neither of the two possible values of the property is set.
For more information please refer to the example of usage intelperc_capture.cpp_ in ``opencv/samples/cpp`` folder.
.. _intelperc_capture.cpp: https://github.com/Itseez/opencv/tree/master/samples/cpp/intelperc_capture.cpp

View File

@ -9,3 +9,4 @@ OpenCV User Guide
ug_features2d.rst
ug_highgui.rst
ug_traincascade.rst
ug_intelperc.rst

View File

@ -903,7 +903,7 @@ So, the function chooses an operation mode depending on the flags and size of th
* When ``DFT_COMPLEX_OUTPUT`` is set, the output is a complex matrix of the same size as input.
* When ``DFT_COMPLEX_OUTPUT`` is not set, the output is a real matrix of the same size as input. In case of 2D transform, it uses the packed format as shown above. In case of a single 1D transform, it looks like the first row of the matrix above. In case of multiple 1D transforms (when using the ``DCT_ROWS`` flag), each row of the output matrix looks like the first row of the matrix above.
* When ``DFT_COMPLEX_OUTPUT`` is not set, the output is a real matrix of the same size as input. In case of 2D transform, it uses the packed format as shown above. In case of a single 1D transform, it looks like the first row of the matrix above. In case of multiple 1D transforms (when using the ``DFT_ROWS`` flag), each row of the output matrix looks like the first row of the matrix above.
* If the input array is complex and either ``DFT_INVERSE`` or ``DFT_REAL_OUTPUT`` are not set, the output is a complex array of the same size as input. The function performs a forward or inverse 1D or 2D transform of the whole input array or each row of the input array independently, depending on the flags ``DFT_INVERSE`` and ``DFT_ROWS``.

View File

@ -2577,7 +2577,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2];
if( (flags & DFT_ROWS) || src.rows == 1 ||
if( (flags & DCT_ROWS) || src.rows == 1 ||
(src.cols == 1 && (src.isContinuous() && dst.isContinuous())))
{
stage = end_stage = 0;
@ -2597,7 +2597,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
{
len = src.cols;
count = src.rows;
if( len == 1 && !(flags & DFT_ROWS) )
if( len == 1 && !(flags & DCT_ROWS) )
{
len = src.rows;
count = 1;

View File

@ -2760,39 +2760,24 @@ void cv::transpose( InputArray _src, OutputArray _dst )
}
////////////////////////////////////// completeSymm /////////////////////////////////////////
void cv::completeSymm( InputOutputArray _m, bool LtoR )
{
Mat m = _m.getMat();
CV_Assert( m.dims <= 2 );
size_t step = m.step, esz = m.elemSize();
CV_Assert( m.dims <= 2 && m.rows == m.cols );
int i, j, nrows = m.rows, type = m.type();
int j0 = 0, j1 = nrows;
CV_Assert( m.rows == m.cols );
int rows = m.rows;
int j0 = 0, j1 = rows;
if( type == CV_32FC1 || type == CV_32SC1 )
uchar* data = m.data;
for( int i = 0; i < rows; i++ )
{
int* data = (int*)m.data;
size_t step = m.step/sizeof(data[0]);
for( i = 0; i < nrows; i++ )
{
if( !LtoR ) j1 = i; else j0 = i+1;
for( j = j0; j < j1; j++ )
data[i*step + j] = data[j*step + i];
}
if( !LtoR ) j1 = i; else j0 = i+1;
for( int j = j0; j < j1; j++ )
memcpy(data + (i*step + j*esz), data + (j*step + i*esz), esz);
}
else if( type == CV_64FC1 )
{
double* data = (double*)m.data;
size_t step = m.step/sizeof(data[0]);
for( i = 0; i < nrows; i++ )
{
if( !LtoR ) j1 = i; else j0 = i+1;
for( j = j0; j < j1; j++ )
data[i*step + j] = data[j*step + i];
}
}
else
CV_Error( CV_StsUnsupportedFormat, "" );
}

View File

@ -222,6 +222,12 @@ elseif(HAVE_QTKIT)
list(APPEND HIGHGUI_LIBRARIES "-framework QTKit" "-framework QuartzCore" "-framework AppKit")
endif()
if(HAVE_INTELPERC)
list(APPEND highgui_srcs src/cap_intelperc.cpp)
ocv_include_directories(${INTELPERC_INCLUDE_DIR})
list(APPEND HIGHGUI_LIBRARIES ${INTELPERC_LIBRARIES})
endif(HAVE_INTELPERC)
if(IOS)
add_definitions(-DHAVE_IOS=1)
list(APPEND highgui_srcs src/ios_conversions.mm src/cap_ios_abstract_camera.mm src/cap_ios_photo_camera.mm src/cap_ios_video_camera.mm)

View File

@ -271,7 +271,8 @@ enum { CAP_ANY = 0, // autodetect
CAP_XIAPI = 1100, // XIMEA Camera API
CAP_AVFOUNDATION = 1200, // AVFoundation framework for iOS (OS X Lion will have the same API)
CAP_GIGANETIX = 1300, // Smartek Giganetix GigEVisionSDK
CAP_MSMF = 1400 // Microsoft Media Foundation (via videoInput)
CAP_MSMF = 1400, // Microsoft Media Foundation (via videoInput)
CAP_INTELPERC = 1500 // Intel Perceptual Computing SDK
};
// generic properties (based on DC1394 properties)
@ -496,6 +497,26 @@ enum { CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006
};
enum { CAP_PROP_INTELPERC_PROFILE_COUNT = 11001,
CAP_PROP_INTELPERC_PROFILE_IDX = 11002,
CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE = 11003,
CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE = 11004,
CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD = 11005,
CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ = 11006,
CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT = 11007
};
// Intel PerC streams
enum { CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
CAP_INTELPERC_GENERATORS_MASK = CAP_INTELPERC_DEPTH_GENERATOR + CAP_INTELPERC_IMAGE_GENERATOR
};
enum { CAP_INTELPERC_DEPTH_MAP = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
CAP_INTELPERC_UVDEPTH_MAP = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
CAP_INTELPERC_IR_MAP = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
CAP_INTELPERC_IMAGE = 3
};
class CV_EXPORTS_W VideoCapture
{

View File

@ -313,7 +313,9 @@ enum
CV_CAP_AVFOUNDATION = 1200, // AVFoundation framework for iOS (OS X Lion will have the same API)
CV_CAP_GIGANETIX = 1300 // Smartek Giganetix GigEVisionSDK
CV_CAP_GIGANETIX = 1300, // Smartek Giganetix GigEVisionSDK
CV_CAP_INTELPERC = 1500 // Intel Perceptual Computing SDK
};
/* start capturing frames from camera: index = camera_index + domain_offset (CV_CAP_*) */
@ -459,16 +461,29 @@ enum
CV_CAP_PROP_IOS_DEVICE_EXPOSURE = 9002,
CV_CAP_PROP_IOS_DEVICE_FLASH = 9003,
CV_CAP_PROP_IOS_DEVICE_WHITEBALANCE = 9004,
CV_CAP_PROP_IOS_DEVICE_TORCH = 9005
CV_CAP_PROP_IOS_DEVICE_TORCH = 9005,
// Properties of cameras available through Smartek Giganetix Ethernet Vision interface
/* --- Vladimir Litvinenko (litvinenko.vladimir@gmail.com) --- */
,CV_CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
CV_CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
CV_CAP_PROP_GIGA_FRAME_OFFSET_Y = 10002,
CV_CAP_PROP_GIGA_FRAME_WIDTH_MAX = 10003,
CV_CAP_PROP_GIGA_FRAME_HEIGH_MAX = 10004,
CV_CAP_PROP_GIGA_FRAME_SENS_WIDTH = 10005,
CV_CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006
CV_CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006,
CV_CAP_PROP_INTELPERC_PROFILE_COUNT = 11001,
CV_CAP_PROP_INTELPERC_PROFILE_IDX = 11002,
CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE = 11003,
CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE = 11004,
CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD = 11005,
CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ = 11006,
CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT = 11007,
// Intel PerC streams
CV_CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
CV_CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
CV_CAP_INTELPERC_GENERATORS_MASK = CV_CAP_INTELPERC_DEPTH_GENERATOR + CV_CAP_INTELPERC_IMAGE_GENERATOR
};
enum
@ -549,6 +564,14 @@ enum
CV_CAP_ANDROID_ANTIBANDING_OFF
};
enum
{
CV_CAP_INTELPERC_DEPTH_MAP = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
CV_CAP_INTELPERC_UVDEPTH_MAP = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
CV_CAP_INTELPERC_IR_MAP = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
CV_CAP_INTELPERC_IMAGE = 3
};
/* retrieve or set capture properties */
CVAPI(double) cvGetCaptureProperty( CvCapture* capture, int property_id );
CVAPI(int) cvSetCaptureProperty( CvCapture* capture, int property_id, double value );

View File

@ -155,6 +155,9 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
#endif
#ifdef HAVE_GIGE_API
CV_CAP_GIGANETIX,
#endif
#ifdef HAVE_INTELPERC
CV_CAP_INTELPERC,
#endif
-1
};
@ -193,6 +196,7 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
defined(HAVE_AVFOUNDATION) || \
defined(HAVE_ANDROID_NATIVE_CAMERA) || \
defined(HAVE_GIGE_API) || \
defined(HAVE_INTELPERC) || \
(0)
// local variable to memorize the captured device
CvCapture *capture;
@ -342,6 +346,13 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
break; // CV_CAP_GIGANETIX
#endif
#ifdef HAVE_INTELPERC
case CV_CAP_INTELPERC:
capture = cvCreateCameraCapture_IntelPerC(index);
if (capture)
return capture;
break; // CV_CAP_INTEL_PERC
#endif
}
}

View File

@ -0,0 +1,714 @@
#include "precomp.hpp"
#ifdef HAVE_INTELPERC
#include "pxcsession.h"
#include "pxcsmartptr.h"
#include "pxccapture.h"
class CvIntelPerCStreamBase
{
protected:
struct FrameInternal
{
IplImage* retrieveFrame()
{
if (m_mat.empty())
return NULL;
m_iplHeader = IplImage(m_mat);
return &m_iplHeader;
}
cv::Mat m_mat;
private:
IplImage m_iplHeader;
};
public:
CvIntelPerCStreamBase()
: m_profileIdx(-1)
, m_frameIdx(0)
, m_timeStampStartNS(0)
{
}
virtual ~CvIntelPerCStreamBase()
{
}
bool isValid()
{
return (m_device.IsValid() && m_stream.IsValid());
}
bool grabFrame()
{
if (!m_stream.IsValid())
return false;
if (-1 == m_profileIdx)
{
if (!setProperty(CV_CAP_PROP_INTELPERC_PROFILE_IDX, 0))
return false;
}
PXCSmartPtr<PXCImage> pxcImage; PXCSmartSP sp;
if (PXC_STATUS_NO_ERROR > m_stream->ReadStreamAsync(&pxcImage, &sp))
return false;
if (PXC_STATUS_NO_ERROR > sp->Synchronize())
return false;
if (0 == m_timeStampStartNS)
m_timeStampStartNS = pxcImage->QueryTimeStamp();
m_timeStamp = (double)((pxcImage->QueryTimeStamp() - m_timeStampStartNS) / 10000);
m_frameIdx++;
return prepareIplImage(pxcImage);
}
int getProfileIDX() const
{
return m_profileIdx;
}
public:
virtual bool initStream(PXCSession *session) = 0;
virtual double getProperty(int propIdx)
{
double ret = 0.0;
switch (propIdx)
{
case CV_CAP_PROP_INTELPERC_PROFILE_COUNT:
ret = (double)m_profiles.size();
break;
case CV_CAP_PROP_FRAME_WIDTH :
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
ret = (double)m_profiles[m_profileIdx].imageInfo.width;
break;
case CV_CAP_PROP_FRAME_HEIGHT :
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
ret = (double)m_profiles[m_profileIdx].imageInfo.height;
break;
case CV_CAP_PROP_FPS :
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
{
ret = ((double)m_profiles[m_profileIdx].frameRateMin.numerator / (double)m_profiles[m_profileIdx].frameRateMin.denominator
+ (double)m_profiles[m_profileIdx].frameRateMax.numerator / (double)m_profiles[m_profileIdx].frameRateMax.denominator) / 2.0;
}
break;
case CV_CAP_PROP_POS_FRAMES:
ret = (double)m_frameIdx;
break;
case CV_CAP_PROP_POS_MSEC:
ret = m_timeStamp;
break;
};
return ret;
}
virtual bool setProperty(int propIdx, double propVal)
{
bool isSet = false;
switch (propIdx)
{
case CV_CAP_PROP_INTELPERC_PROFILE_IDX:
{
int propValInt = (int)propVal;
if ((0 <= propValInt) && (propValInt < m_profiles.size()))
{
if (m_profileIdx != propValInt)
{
m_profileIdx = propValInt;
if (m_stream.IsValid())
m_stream->SetProfile(&m_profiles[m_profileIdx]);
m_frameIdx = 0;
m_timeStampStartNS = 0;
}
isSet = true;
}
}
break;
};
return isSet;
}
protected:
PXCSmartPtr<PXCCapture::Device> m_device;
bool initDevice(PXCSession *session)
{
if (NULL == session)
return false;
pxcStatus sts = PXC_STATUS_NO_ERROR;
PXCSession::ImplDesc templat;
memset(&templat,0,sizeof(templat));
templat.group = PXCSession::IMPL_GROUP_SENSOR;
templat.subgroup= PXCSession::IMPL_SUBGROUP_VIDEO_CAPTURE;
for (int modidx = 0; PXC_STATUS_NO_ERROR <= sts; modidx++)
{
PXCSession::ImplDesc desc;
sts = session->QueryImpl(&templat, modidx, &desc);
if (PXC_STATUS_NO_ERROR > sts)
break;
PXCSmartPtr<PXCCapture> capture;
sts = session->CreateImpl<PXCCapture>(&desc, &capture);
if (!capture.IsValid())
continue;
/* enumerate devices */
for (int devidx = 0; PXC_STATUS_NO_ERROR <= sts; devidx++)
{
PXCSmartPtr<PXCCapture::Device> device;
sts = capture->CreateDevice(devidx, &device);
if (PXC_STATUS_NO_ERROR <= sts)
{
m_device = device.ReleasePtr();
return true;
}
}
}
return false;
}
PXCSmartPtr<PXCCapture::VideoStream> m_stream;
void initStreamImpl(PXCImage::ImageType type)
{
if (!m_device.IsValid())
return;
pxcStatus sts = PXC_STATUS_NO_ERROR;
/* enumerate streams */
for (int streamidx = 0; PXC_STATUS_NO_ERROR <= sts; streamidx++)
{
PXCCapture::Device::StreamInfo sinfo;
sts = m_device->QueryStream(streamidx, &sinfo);
if (PXC_STATUS_NO_ERROR > sts)
break;
if (PXCCapture::VideoStream::CUID != sinfo.cuid)
continue;
if (type != sinfo.imageType)
continue;
sts = m_device->CreateStream<PXCCapture::VideoStream>(streamidx, &m_stream);
if (PXC_STATUS_NO_ERROR == sts)
break;
m_stream.ReleaseRef();
}
}
protected:
std::vector<PXCCapture::VideoStream::ProfileInfo> m_profiles;
int m_profileIdx;
int m_frameIdx;
pxcU64 m_timeStampStartNS;
double m_timeStamp;
virtual bool validProfile(const PXCCapture::VideoStream::ProfileInfo& /*pinfo*/)
{
return true;
}
void enumProfiles()
{
m_profiles.clear();
if (!m_stream.IsValid())
return;
pxcStatus sts = PXC_STATUS_NO_ERROR;
for (int profidx = 0; PXC_STATUS_NO_ERROR <= sts; profidx++)
{
PXCCapture::VideoStream::ProfileInfo pinfo;
sts = m_stream->QueryProfile(profidx, &pinfo);
if (PXC_STATUS_NO_ERROR > sts)
break;
if (validProfile(pinfo))
m_profiles.push_back(pinfo);
}
}
virtual bool prepareIplImage(PXCImage *pxcImage) = 0;
};
class CvIntelPerCStreamImage
: public CvIntelPerCStreamBase
{
public:
CvIntelPerCStreamImage()
{
}
virtual ~CvIntelPerCStreamImage()
{
}
virtual bool initStream(PXCSession *session)
{
if (!initDevice(session))
return false;
initStreamImpl(PXCImage::IMAGE_TYPE_COLOR);
if (!m_stream.IsValid())
return false;
enumProfiles();
return true;
}
virtual double getProperty(int propIdx)
{
switch (propIdx)
{
case CV_CAP_PROP_BRIGHTNESS:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_BRIGHTNESS, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_CONTRAST:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_CONTRAST, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_SATURATION:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_SATURATION, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_HUE:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_HUE, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_GAMMA:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_GAMMA, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_SHARPNESS:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_SHARPNESS, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_GAIN:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_GAIN, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_BACKLIGHT:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_BACK_LIGHT_COMPENSATION, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_EXPOSURE:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_EXPOSURE, &fret))
return (double)fret;
return 0.0;
}
break;
//Add image stream specific properties
}
return CvIntelPerCStreamBase::getProperty(propIdx);
}
virtual bool setProperty(int propIdx, double propVal)
{
switch (propIdx)
{
case CV_CAP_PROP_BRIGHTNESS:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_BRIGHTNESS, (float)propVal));
}
break;
case CV_CAP_PROP_CONTRAST:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_CONTRAST, (float)propVal));
}
break;
case CV_CAP_PROP_SATURATION:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_SATURATION, (float)propVal));
}
break;
case CV_CAP_PROP_HUE:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_HUE, (float)propVal));
}
break;
case CV_CAP_PROP_GAMMA:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_GAMMA, (float)propVal));
}
break;
case CV_CAP_PROP_SHARPNESS:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_SHARPNESS, (float)propVal));
}
break;
case CV_CAP_PROP_GAIN:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_GAIN, (float)propVal));
}
break;
case CV_CAP_PROP_BACKLIGHT:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_BACK_LIGHT_COMPENSATION, (float)propVal));
}
break;
case CV_CAP_PROP_EXPOSURE:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_EXPOSURE, (float)propVal));
}
break;
//Add image stream specific properties
}
return CvIntelPerCStreamBase::setProperty(propIdx, propVal);
}
public:
IplImage* retrieveFrame()
{
return m_frame.retrieveFrame();
}
protected:
FrameInternal m_frame;
bool prepareIplImage(PXCImage *pxcImage)
{
if (NULL == pxcImage)
return false;
PXCImage::ImageInfo info;
pxcImage->QueryInfo(&info);
PXCImage::ImageData data;
pxcImage->AcquireAccess(PXCImage::ACCESS_READ, PXCImage::COLOR_FORMAT_RGB24, &data);
if (PXCImage::SURFACE_TYPE_SYSTEM_MEMORY != data.type)
return false;
cv::Mat temp(info.height, info.width, CV_8UC3, data.planes[0], data.pitches[0]);
temp.copyTo(m_frame.m_mat);
pxcImage->ReleaseAccess(&data);
return true;
}
};
class CvIntelPerCStreamDepth
: public CvIntelPerCStreamBase
{
public:
CvIntelPerCStreamDepth()
{
}
virtual ~CvIntelPerCStreamDepth()
{
}
virtual bool initStream(PXCSession *session)
{
if (!initDevice(session))
return false;
initStreamImpl(PXCImage::IMAGE_TYPE_DEPTH);
if (!m_stream.IsValid())
return false;
enumProfiles();
return true;
}
virtual double getProperty(int propIdx)
{
switch (propIdx)
{
case CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_LOW_CONFIDENCE_VALUE, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_SATURATION_VALUE, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_CONFIDENCE_THRESHOLD, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ:
{
if (!m_device.IsValid())
return 0.0f;
PXCPointF32 ptf;
if (PXC_STATUS_NO_ERROR == m_device->QueryPropertyAsPoint(PXCCapture::Device::PROPERTY_DEPTH_FOCAL_LENGTH, &ptf))
return (double)ptf.x;
return 0.0;
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT:
{
if (!m_device.IsValid())
return 0.0f;
PXCPointF32 ptf;
if (PXC_STATUS_NO_ERROR == m_device->QueryPropertyAsPoint(PXCCapture::Device::PROPERTY_DEPTH_FOCAL_LENGTH, &ptf))
return (double)ptf.y;
return 0.0;
}
break;
//Add depth stream sepcific properties
}
return CvIntelPerCStreamBase::getProperty(propIdx);
}
virtual bool setProperty(int propIdx, double propVal)
{
switch (propIdx)
{
case CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_LOW_CONFIDENCE_VALUE, (float)propVal));
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_SATURATION_VALUE, (float)propVal));
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_CONFIDENCE_THRESHOLD, (float)propVal));
}
break;
//Add depth stream sepcific properties
}
return CvIntelPerCStreamBase::setProperty(propIdx, propVal);
}
public:
IplImage* retrieveDepthFrame()
{
return m_frameDepth.retrieveFrame();
}
IplImage* retrieveIRFrame()
{
return m_frameIR.retrieveFrame();
}
IplImage* retrieveUVFrame()
{
return m_frameUV.retrieveFrame();
}
protected:
virtual bool validProfile(const PXCCapture::VideoStream::ProfileInfo& pinfo)
{
return (PXCImage::COLOR_FORMAT_DEPTH == pinfo.imageInfo.format);
}
protected:
FrameInternal m_frameDepth;
FrameInternal m_frameIR;
FrameInternal m_frameUV;
bool prepareIplImage(PXCImage *pxcImage)
{
if (NULL == pxcImage)
return false;
PXCImage::ImageInfo info;
pxcImage->QueryInfo(&info);
PXCImage::ImageData data;
pxcImage->AcquireAccess(PXCImage::ACCESS_READ, &data);
if (PXCImage::SURFACE_TYPE_SYSTEM_MEMORY != data.type)
return false;
if (PXCImage::COLOR_FORMAT_DEPTH != data.format)
return false;
{
cv::Mat temp(info.height, info.width, CV_16SC1, data.planes[0], data.pitches[0]);
temp.copyTo(m_frameDepth.m_mat);
}
{
cv::Mat temp(info.height, info.width, CV_16SC1, data.planes[1], data.pitches[1]);
temp.copyTo(m_frameIR.m_mat);
}
{
cv::Mat temp(info.height, info.width, CV_32FC2, data.planes[2], data.pitches[2]);
temp.copyTo(m_frameUV.m_mat);
}
pxcImage->ReleaseAccess(&data);
return true;
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CvCapture_IntelPerC : public CvCapture
{
public:
CvCapture_IntelPerC(int /*index*/)
: m_contextOpened(false)
{
pxcStatus sts = PXCSession_Create(&m_session);
if (PXC_STATUS_NO_ERROR > sts)
return;
m_contextOpened = m_imageStream.initStream(m_session);
m_contextOpened &= m_depthStream.initStream(m_session);
}
virtual ~CvCapture_IntelPerC(){}
virtual double getProperty(int propIdx)
{
double propValue = 0;
int purePropIdx = propIdx & ~CV_CAP_INTELPERC_GENERATORS_MASK;
if (CV_CAP_INTELPERC_IMAGE_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
{
propValue = m_imageStream.getProperty(purePropIdx);
}
else if (CV_CAP_INTELPERC_DEPTH_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
{
propValue = m_depthStream.getProperty(purePropIdx);
}
else
{
propValue = m_depthStream.getProperty(purePropIdx);
}
return propValue;
}
virtual bool setProperty(int propIdx, double propVal)
{
bool isSet = false;
int purePropIdx = propIdx & ~CV_CAP_INTELPERC_GENERATORS_MASK;
if (CV_CAP_INTELPERC_IMAGE_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
{
isSet = m_imageStream.setProperty(purePropIdx, propVal);
}
else if (CV_CAP_INTELPERC_DEPTH_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
{
isSet = m_depthStream.setProperty(purePropIdx, propVal);
}
else
{
isSet = m_depthStream.setProperty(purePropIdx, propVal);
}
return isSet;
}
bool grabFrame()
{
if (!isOpened())
return false;
bool isGrabbed = false;
if (m_depthStream.isValid())
isGrabbed = m_depthStream.grabFrame();
if ((m_imageStream.isValid()) && (-1 != m_imageStream.getProfileIDX()))
isGrabbed &= m_imageStream.grabFrame();
return isGrabbed;
}
virtual IplImage* retrieveFrame(int outputType)
{
IplImage* image = 0;
switch (outputType)
{
case CV_CAP_INTELPERC_DEPTH_MAP:
image = m_depthStream.retrieveDepthFrame();
break;
case CV_CAP_INTELPERC_UVDEPTH_MAP:
image = m_depthStream.retrieveUVFrame();
break;
case CV_CAP_INTELPERC_IR_MAP:
image = m_depthStream.retrieveIRFrame();
break;
case CV_CAP_INTELPERC_IMAGE:
image = m_imageStream.retrieveFrame();
break;
}
CV_Assert(NULL != image);
return image;
}
bool isOpened() const
{
return m_contextOpened;
}
protected:
bool m_contextOpened;
PXCSmartPtr<PXCSession> m_session;
CvIntelPerCStreamImage m_imageStream;
CvIntelPerCStreamDepth m_depthStream;
};
CvCapture* cvCreateCameraCapture_IntelPerC(int index)
{
CvCapture_IntelPerC* capture = new CvCapture_IntelPerC(index);
if( capture->isOpened() )
return capture;
delete capture;
return 0;
}
#endif //HAVE_INTELPERC

View File

@ -128,6 +128,7 @@ CvCapture* cvCreateFileCapture_OpenNI( const char* filename );
CvCapture* cvCreateCameraCapture_Android( int index );
CvCapture* cvCreateCameraCapture_XIMEA( int index );
CvCapture* cvCreateCameraCapture_AVFoundation(int index);
CvCapture* cvCreateCameraCapture_IntelPerC(int index);
CVAPI(int) cvHaveImageReader(const char* filename);

View File

@ -35,6 +35,7 @@
defined(HAVE_XIMEA) || \
defined(HAVE_AVFOUNDATION) || \
defined(HAVE_GIGE_API) || \
defined(HAVE_INTELPERC) || \
(0)
//defined(HAVE_ANDROID_NATIVE_CAMERA) || - enable after #1193
# define BUILD_WITH_CAMERA_SUPPORT 1

View File

@ -3299,7 +3299,10 @@ public:
if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) )
{
bufxy = (*m1)(Rect(x, y, bcols, brows));
bufa = (*m2)(Rect(x, y, bcols, brows));
const ushort* sA = (const ushort*)(m2->data + m2->step*(y+y1)) + x;
for( x1 = 0; x1 < bcols; x1++ )
A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2-1));
}
else if( planar_input )
{
@ -3680,7 +3683,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
{
for( x = 0; x < size.width; x++ )
{
int fxy = src2 ? src2[x] : 0;
int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1) : 0;
dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
}
@ -3689,7 +3692,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
{
for( x = 0; x < size.width; x++ )
{
int fxy = src2 ? src2[x] : 0;
int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1): 0;
dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
}

View File

@ -18,6 +18,8 @@ class_ignore_list = (
const_ignore_list = (
"CV_CAP_OPENNI",
"CV_CAP_PROP_OPENNI_",
"CV_CAP_INTELPERC",
"CV_CAP_PROP_INTELPERC_"
"WINDOW_AUTOSIZE",
"CV_WND_PROP_",
"CV_WINDOW_",

View File

@ -12,6 +12,7 @@
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2013, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
@ -66,8 +67,8 @@ uint read_sumTex(IMAGE_INT32 img, sampler_t sam, int2 coord, int rows, int cols,
uchar read_imgTex(IMAGE_INT8 img, sampler_t sam, float2 coord, int rows, int cols, int elemPerRow)
{
#ifdef DISABLE_IMAGE2D
int x = clamp(convert_int_rte(coord.x), 0, cols - 1);
int y = clamp(convert_int_rte(coord.y), 0, rows - 1);
int x = clamp(round(coord.x), 0, cols - 1);
int y = clamp(round(coord.y), 0, rows - 1);
return img[elemPerRow * y + x];
#else
return (uchar)read_imageui(img, sam, coord).x;
@ -98,6 +99,7 @@ __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAM
#define CV_PI_F 3.14159265f
#endif
// Use integral image to calculate haar wavelets.
// N = 2
// for simple haar paatern
@ -114,10 +116,10 @@ float icvCalcHaarPatternSum_2(
F d = 0;
int2 dx1 = convert_int2_rte(ratio * src[0]);
int2 dy1 = convert_int2_rte(ratio * src[1]);
int2 dx2 = convert_int2_rte(ratio * src[2]);
int2 dy2 = convert_int2_rte(ratio * src[3]);
int2 dx1 = convert_int2(round(ratio * src[0]));
int2 dy1 = convert_int2(round(ratio * src[1]));
int2 dx2 = convert_int2(round(ratio * src[2]));
int2 dy2 = convert_int2(round(ratio * src[3]));
F t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
@ -136,106 +138,9 @@ float icvCalcHaarPatternSum_2(
return (float)d;
}
// N = 3
float icvCalcHaarPatternSum_3(
IMAGE_INT32 sumTex,
__constant float4 *src,
int oldSize,
int newSize,
int y, int x,
int rows, int cols, int elemPerRow)
{
float ratio = (float)newSize / oldSize;
F d = 0;
int4 dx1 = convert_int4_rte(ratio * src[0]);
int4 dy1 = convert_int4_rte(ratio * src[1]);
int4 dx2 = convert_int4_rte(ratio * src[2]);
int4 dy2 = convert_int4_rte(ratio * src[3]);
F t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy2.x), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy1.x), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy2.x), rows, cols, elemPerRow );
d += t * src[4].x / ((dx2.x - dx1.x) * (dy2.x - dy1.x));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy1.y), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy2.y), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy1.y), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy2.y), rows, cols, elemPerRow );
d += t * src[4].y / ((dx2.y - dx1.y) * (dy2.y - dy1.y));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy1.z), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy2.z), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy1.z), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy2.z), rows, cols, elemPerRow );
d += t * src[4].z / ((dx2.z - dx1.z) * (dy2.z - dy1.z));
return (float)d;
}
// N = 4
float icvCalcHaarPatternSum_4(
IMAGE_INT32 sumTex,
__constant float4 *src,
int oldSize,
int newSize,
int y, int x,
int rows, int cols, int elemPerRow)
{
float ratio = (float)newSize / oldSize;
F d = 0;
int4 dx1 = convert_int4_rte(ratio * src[0]);
int4 dy1 = convert_int4_rte(ratio * src[1]);
int4 dx2 = convert_int4_rte(ratio * src[2]);
int4 dy2 = convert_int4_rte(ratio * src[3]);
F t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy2.x), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy1.x), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy2.x), rows, cols, elemPerRow );
d += t * src[4].x / ((dx2.x - dx1.x) * (dy2.x - dy1.x));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy1.y), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy2.y), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy1.y), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy2.y), rows, cols, elemPerRow );
d += t * src[4].y / ((dx2.y - dx1.y) * (dy2.y - dy1.y));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy1.z), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy2.z), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy1.z), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy2.z), rows, cols, elemPerRow );
d += t * src[4].z / ((dx2.z - dx1.z) * (dy2.z - dy1.z));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.w, y + dy1.w), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.w, y + dy2.w), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.w, y + dy1.w), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.w, y + dy2.w), rows, cols, elemPerRow );
d += t * src[4].w / ((dx2.w - dx1.w) * (dy2.w - dy1.w));
return (float)d;
}
////////////////////////////////////////////////////////////////////////
// Hessian
__constant float4 c_DX[5] = { (float4)(0, 3, 6, 0), (float4)(2, 2, 2, 0), (float4)(3, 6, 9, 0), (float4)(7, 7, 7, 0), (float4)(1, -2, 1, 0) };
__constant float4 c_DY[5] = { (float4)(2, 2, 2, 0), (float4)(0, 3, 6, 0), (float4)(7, 7, 7, 0), (float4)(3, 6, 9, 0), (float4)(1, -2, 1, 0) };
__constant float4 c_DXY[5] = { (float4)(1, 5, 1, 5), (float4)(1, 1, 5, 5), (float4)(4, 8, 4, 8), (float4)(4, 4, 8, 8), (float4)(1, -1, -1, 1) };// Use integral image to calculate haar wavelets.
__inline int calcSize(int octave, int layer)
{
/* Wavelet size at first layer of first octave. */
@ -250,6 +155,24 @@ __inline int calcSize(int octave, int layer)
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
}
// Calculate a derivative in an axis-aligned direction (x or y). The "plus1"
// boxes contribute 1 * (area), and the "minus2" box contributes -2 * (area).
// So the final computation is plus1a + plus1b - 2 * minus2. The corners are
// labeled A, B, C, and D, with A being the top left, B being top right, C
// being bottom left, and D being bottom right.
F calcAxisAlignedDerivative(
int plus1a_A, int plus1a_B, int plus1a_C, int plus1a_D, F plus1a_scale,
int plus1b_A, int plus1b_B, int plus1b_C, int plus1b_D, F plus1b_scale,
int minus2_A, int minus2_B, int minus2_C, int minus2_D, F minus2_scale)
{
F plus1a = plus1a_A - plus1a_B - plus1a_C + plus1a_D;
F plus1b = plus1b_A - plus1b_B - plus1b_C + plus1b_D;
F minus2 = minus2_A - minus2_B - minus2_C + minus2_D;
return (plus1a / plus1a_scale -
2.0f * minus2 / minus2_scale +
plus1b / plus1b_scale);
}
//calculate targeted layer per-pixel determinant and trace with an integral image
__kernel void icvCalcLayerDetAndTrace(
@ -264,7 +187,7 @@ __kernel void icvCalcLayerDetAndTrace(
int c_octave,
int c_layer_rows,
int sumTex_step
)
)
{
det_step /= sizeof(*det);
trace_step /= sizeof(*trace);
@ -288,16 +211,103 @@ __kernel void icvCalcLayerDetAndTrace(
if (size <= c_img_rows && size <= c_img_cols && i < samples_i && j < samples_j)
{
const float dx = icvCalcHaarPatternSum_3(sumTex, c_DX , 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
const float dy = icvCalcHaarPatternSum_3(sumTex, c_DY , 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
const float dxy = icvCalcHaarPatternSum_4(sumTex, c_DXY, 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
int x = j << c_octave;
int y = i << c_octave;
float ratio = (float)size / 9;
// Precompute some commonly used values, which are used to offset
// texture coordinates in the integral image.
int r1 = round(ratio);
int r2 = round(ratio * 2.0f);
int r3 = round(ratio * 3.0f);
int r4 = round(ratio * 4.0f);
int r5 = round(ratio * 5.0f);
int r6 = round(ratio * 6.0f);
int r7 = round(ratio * 7.0f);
int r8 = round(ratio * 8.0f);
int r9 = round(ratio * 9.0f);
// Calculate the approximated derivative in the x-direction
F d = 0;
{
// Some of the pixels needed to compute the derivative are
// repeated, so we only don't duplicate the fetch here.
int t02 = read_sumTex( sumTex, sampler, (int2)(x, y + r2), c_img_rows, c_img_cols, sumTex_step );
int t07 = read_sumTex( sumTex, sampler, (int2)(x, y + r7), c_img_rows, c_img_cols, sumTex_step );
int t32 = read_sumTex( sumTex, sampler, (int2)(x + r3, y + r2), c_img_rows, c_img_cols, sumTex_step );
int t37 = read_sumTex( sumTex, sampler, (int2)(x + r3, y + r7), c_img_rows, c_img_cols, sumTex_step );
int t62 = read_sumTex( sumTex, sampler, (int2)(x + r6, y + r2), c_img_rows, c_img_cols, sumTex_step );
int t67 = read_sumTex( sumTex, sampler, (int2)(x + r6, y + r7), c_img_rows, c_img_cols, sumTex_step );
int t92 = read_sumTex( sumTex, sampler, (int2)(x + r9, y + r2), c_img_rows, c_img_cols, sumTex_step );
int t97 = read_sumTex( sumTex, sampler, (int2)(x + r9, y + r7), c_img_rows, c_img_cols, sumTex_step );
d = calcAxisAlignedDerivative(t02, t07, t32, t37, (r3) * (r7 - r2),
t62, t67, t92, t97, (r9 - r6) * (r7 - r2),
t32, t37, t62, t67, (r6 - r3) * (r7 - r2));
}
const float dx = (float)d;
// Calculate the approximated derivative in the y-direction
d = 0;
{
// Some of the pixels needed to compute the derivative are
// repeated, so we only don't duplicate the fetch here.
int t20 = read_sumTex( sumTex, sampler, (int2)(x + r2, y), c_img_rows, c_img_cols, sumTex_step );
int t23 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r3), c_img_rows, c_img_cols, sumTex_step );
int t70 = read_sumTex( sumTex, sampler, (int2)(x + r7, y), c_img_rows, c_img_cols, sumTex_step );
int t73 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r3), c_img_rows, c_img_cols, sumTex_step );
int t26 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r6), c_img_rows, c_img_cols, sumTex_step );
int t76 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r6), c_img_rows, c_img_cols, sumTex_step );
int t29 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r9), c_img_rows, c_img_cols, sumTex_step );
int t79 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r9), c_img_rows, c_img_cols, sumTex_step );
d = calcAxisAlignedDerivative(t20, t23, t70, t73, (r7 - r2) * (r3),
t26, t29, t76, t79, (r7 - r2) * (r9 - r6),
t23, t26, t73, t76, (r7 - r2) * (r6 - r3));
}
const float dy = (float)d;
// Calculate the approximated derivative in the xy-direction
d = 0;
{
// There's no saving us here, we just have to get all of the pixels in
// separate fetches
F t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + r1, y + r1), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r1, y + r4), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r4, y + r1), c_img_rows, c_img_cols, sumTex_step );
t += read_sumTex( sumTex, sampler, (int2)(x + r4, y + r4), c_img_rows, c_img_cols, sumTex_step );
d += t / ((r4 - r1) * (r4 - r1));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + r5, y + r1), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r5, y + r4), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r8, y + r1), c_img_rows, c_img_cols, sumTex_step );
t += read_sumTex( sumTex, sampler, (int2)(x + r8, y + r4), c_img_rows, c_img_cols, sumTex_step );
d -= t / ((r8 - r5) * (r4 - r1));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + r1, y + r5), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r1, y + r8), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r4, y + r5), c_img_rows, c_img_cols, sumTex_step );
t += read_sumTex( sumTex, sampler, (int2)(x + r4, y + r8), c_img_rows, c_img_cols, sumTex_step );
d -= t / ((r4 - r1) * (r8 - r5));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + r5, y + r5), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r5, y + r8), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r8, y + r5), c_img_rows, c_img_cols, sumTex_step );
t += read_sumTex( sumTex, sampler, (int2)(x + r8, y + r8), c_img_rows, c_img_cols, sumTex_step );
d += t / ((r8 - r5) * (r8 - r5));
}
const float dxy = (float)d;
det [j + margin + det_step * (layer * c_layer_rows + i + margin)] = dx * dy - 0.81f * dxy * dxy;
trace[j + margin + trace_step * (layer * c_layer_rows + i + margin)] = dx + dy;
}
}
////////////////////////////////////////////////////////////////////////
// NONMAX
@ -309,10 +319,10 @@ bool within_check(IMAGE_INT32 maskSumTex, int sum_i, int sum_j, int size, int ro
float d = 0;
int dx1 = convert_int_rte(ratio * c_DM[0]);
int dy1 = convert_int_rte(ratio * c_DM[1]);
int dx2 = convert_int_rte(ratio * c_DM[2]);
int dy2 = convert_int_rte(ratio * c_DM[3]);
int dx1 = round(ratio * c_DM[0]);
int dy1 = round(ratio * c_DM[1]);
int dx2 = round(ratio * c_DM[2]);
int dy2 = round(ratio * c_DM[3]);
float t = 0;
@ -572,7 +582,7 @@ void icvFindMaximaInLayer(
}
// solve 3x3 linear system Ax=b for floating point input
inline bool solve3x3_float(volatile __local const float4 *A, volatile __local const float *b, volatile __local float *x)
inline bool solve3x3_float(const float4 *A, const float *b, float *x)
{
float det = A[0].x * (A[1].y * A[2].z - A[1].z * A[2].y)
- A[0].y * (A[1].x * A[2].z - A[1].z * A[2].x)
@ -651,7 +661,7 @@ void icvInterpolateKeypoint(
if (get_local_id(0) == 0 && get_local_id(1) == 0 && get_local_id(2) == 0)
{
volatile __local float dD[3];
float dD[3];
//dx
dD[0] = -0.5f * (N9[1][1][2] - N9[1][1][0]);
@ -660,7 +670,7 @@ void icvInterpolateKeypoint(
//ds
dD[2] = -0.5f * (N9[2][1][1] - N9[0][1][1]);
volatile __local float4 H[3];
float4 H[3];
//dxx
H[0].x = N9[1][1][0] - 2.0f * N9[1][1][1] + N9[1][1][2];
@ -681,7 +691,7 @@ void icvInterpolateKeypoint(
//dss
H[2].z = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1];
volatile __local float x[3];
float x[3];
if (solve3x3_float(H, dD, x))
{
@ -711,7 +721,7 @@ void icvInterpolateKeypoint(
sampled in a circle of radius 6s using wavelets of size 4s.
We ensure the gradient wavelet size is even to ensure the
wavelet pattern is balanced and symmetric around its center */
const int grad_wav_size = 2 * convert_int_rte(2.0f * s);
const int grad_wav_size = 2 * round(2.0f * s);
// check when grad_wav_size is too big
if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
@ -737,9 +747,12 @@ void icvInterpolateKeypoint(
////////////////////////////////////////////////////////////////////////
// Orientation
#define ORI_SEARCH_INC 5
#define ORI_WIN 60
#define ORI_SAMPLES 113
#define ORI_WIN 60
#define ORI_SAMPLES 113
// The distance between samples in the beginning of the the reduction
#define ORI_RESPONSE_REDUCTION_WIDTH 48
#define ORI_RESPONSE_ARRAY_SIZE (ORI_RESPONSE_REDUCTION_WIDTH * 2)
__constant float c_aptX[ORI_SAMPLES] = {-6, -5, -5, -5, -5, -5, -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6};
__constant float c_aptY[ORI_SAMPLES] = {0, -3, -2, -1, 0, 1, 2, 3, -4, -3, -2, -1, 0, 1, 2, 3, 4, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, -3, -2, -1, 0, 1, 2, 3, 0};
@ -833,12 +846,15 @@ void icvCalcOrientation(
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
volatile __local float s_X[128];
volatile __local float s_Y[128];
volatile __local float s_angle[128];
__local float s_X[ORI_SAMPLES];
__local float s_Y[ORI_SAMPLES];
__local float s_angle[ORI_SAMPLES];
volatile __local float s_sumx[32 * 4];
volatile __local float s_sumy[32 * 4];
// Need to allocate enough to make the reduction work without accessing
// past the end of the array.
__local float s_sumx[ORI_RESPONSE_ARRAY_SIZE];
__local float s_sumy[ORI_RESPONSE_ARRAY_SIZE];
__local float s_mod[ORI_RESPONSE_ARRAY_SIZE];
/* The sampling intervals and wavelet sized for selecting an orientation
and building the keypoint descriptor are defined relative to 's' */
@ -849,28 +865,60 @@ void icvCalcOrientation(
sampled in a circle of radius 6s using wavelets of size 4s.
We ensure the gradient wavelet size is even to ensure the
wavelet pattern is balanced and symmetric around its center */
const int grad_wav_size = 2 * convert_int_rte(2.0f * s);
const int grad_wav_size = 2 * round(2.0f * s);
// check when grad_wav_size is too big
if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size)
return;
// Calc X, Y, angle and store it to shared memory
const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
const int tid = get_local_id(0);
// Initialize values that are only used as part of the reduction later.
if (tid < ORI_RESPONSE_ARRAY_SIZE - ORI_LOCAL_SIZE) {
s_mod[tid + ORI_LOCAL_SIZE] = 0.0f;
}
float X = 0.0f, Y = 0.0f, angle = 0.0f;
float ratio = (float)grad_wav_size / 4;
if (tid < ORI_SAMPLES)
int r2 = round(ratio * 2.0);
int r4 = round(ratio * 4.0);
for (int i = tid; i < ORI_SAMPLES; i += ORI_LOCAL_SIZE )
{
float X = 0.0f, Y = 0.0f, angle = 0.0f;
const float margin = (float)(grad_wav_size - 1) / 2.0f;
const int x = convert_int_rte(featureX[get_group_id(0)] + c_aptX[tid] * s - margin);
const int y = convert_int_rte(featureY[get_group_id(0)] + c_aptY[tid] * s - margin);
const int x = round(featureX[get_group_id(0)] + c_aptX[i] * s - margin);
const int y = round(featureY[get_group_id(0)] + c_aptY[i] * s - margin);
if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
{
X = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NX, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step);
Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step);
float apt = c_aptW[i];
// Compute the haar sum without fetching duplicate pixels.
float t00 = read_sumTex( sumTex, sampler, (int2)(x, y), c_img_rows, c_img_cols, sum_step);
float t02 = read_sumTex( sumTex, sampler, (int2)(x, y + r2), c_img_rows, c_img_cols, sum_step);
float t04 = read_sumTex( sumTex, sampler, (int2)(x, y + r4), c_img_rows, c_img_cols, sum_step);
float t20 = read_sumTex( sumTex, sampler, (int2)(x + r2, y), c_img_rows, c_img_cols, sum_step);
float t24 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r4), c_img_rows, c_img_cols, sum_step);
float t40 = read_sumTex( sumTex, sampler, (int2)(x + r4, y), c_img_rows, c_img_cols, sum_step);
float t42 = read_sumTex( sumTex, sampler, (int2)(x + r4, y + r2), c_img_rows, c_img_cols, sum_step);
float t44 = read_sumTex( sumTex, sampler, (int2)(x + r4, y + r4), c_img_rows, c_img_cols, sum_step);
F t = t00 - t04 - t20 + t24;
X -= t / ((r2) * (r4));
t = t20 - t24 - t40 + t44;
X += t / ((r4 - r2) * (r4));
t = t00 - t02 - t40 + t42;
Y += t / ((r2) * (r4));
t = t02 - t04 - t42 + t44;
Y -= t / ((r4) * (r4 - r2));
X = apt*X;
Y = apt*Y;
angle = atan2(Y, X);
@ -879,76 +927,61 @@ void icvCalcOrientation(
angle *= 180.0f / CV_PI_F;
}
s_X[i] = X;
s_Y[i] = Y;
s_angle[i] = angle;
}
s_X[tid] = X;
s_Y[tid] = Y;
s_angle[tid] = angle;
barrier(CLK_LOCAL_MEM_FENCE);
float bestx = 0, besty = 0, best_mod = 0;
float sumx = 0.0f, sumy = 0.0f;
const int dir = tid * ORI_SEARCH_INC;
#pragma unroll
for (int i = 0; i < ORI_SAMPLES; ++i) {
int angle = round(s_angle[i]);
#pragma unroll
for (int i = 0; i < 18; ++i)
{
const int dir = (i * 4 + get_local_id(1)) * ORI_SEARCH_INC;
int d = abs(angle - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[i];
sumy += s_Y[i];
}
}
s_sumx[tid] = sumx;
s_sumy[tid] = sumy;
s_mod[tid] = sumx*sumx + sumy*sumy;
barrier(CLK_LOCAL_MEM_FENCE);
volatile float sumx = 0.0f, sumy = 0.0f;
int d = abs(convert_int_rte(s_angle[get_local_id(0)]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx = s_X[get_local_id(0)];
sumy = s_Y[get_local_id(0)];
}
d = abs(convert_int_rte(s_angle[get_local_id(0) + 32]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[get_local_id(0) + 32];
sumy += s_Y[get_local_id(0) + 32];
}
d = abs(convert_int_rte(s_angle[get_local_id(0) + 64]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[get_local_id(0) + 64];
sumy += s_Y[get_local_id(0) + 64];
}
d = abs(convert_int_rte(s_angle[get_local_id(0) + 96]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[get_local_id(0) + 96];
sumy += s_Y[get_local_id(0) + 96];
}
reduce_32_sum(s_sumx + get_local_id(1) * 32, &sumx, get_local_id(0));
reduce_32_sum(s_sumy + get_local_id(1) * 32, &sumy, get_local_id(0));
const float temp_mod = sumx * sumx + sumy * sumy;
if (temp_mod > best_mod)
{
best_mod = temp_mod;
bestx = sumx;
besty = sumy;
// This reduction searches for the longest wavelet response vector. The first
// step uses all of the work items in the workgroup to narrow the search
// down to the three candidates. It requires s_mod to have a few more
// elements alocated past the work-group size, which are pre-initialized to
// 0.0f above.
for(int t = ORI_RESPONSE_REDUCTION_WIDTH; t >= 3; t /= 2) {
if (tid < t) {
if (s_mod[tid] < s_mod[tid + t]) {
s_mod[tid] = s_mod[tid + t];
s_sumx[tid] = s_sumx[tid + t];
s_sumy[tid] = s_sumy[tid + t];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (get_local_id(0) == 0)
{
s_X[get_local_id(1)] = bestx;
s_Y[get_local_id(1)] = besty;
s_angle[get_local_id(1)] = best_mod;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(1) == 0 && get_local_id(0) == 0)
// Do the final reduction and write out the result.
if (tid == 0)
{
int bestIdx = 0;
if (s_angle[1] > s_angle[bestIdx])
// The loop above narrowed the search of the longest vector to three
// possibilities. Pick the best here.
if (s_mod[1] > s_mod[bestIdx])
bestIdx = 1;
if (s_angle[2] > s_angle[bestIdx])
if (s_mod[2] > s_mod[bestIdx])
bestIdx = 2;
if (s_angle[3] > s_angle[bestIdx])
bestIdx = 3;
float kp_dir = atan2(s_Y[bestIdx], s_X[bestIdx]);
float kp_dir = atan2(s_sumy[bestIdx], s_sumx[bestIdx]);
if (kp_dir < 0)
kp_dir += 2.0f * CV_PI_F;
kp_dir *= 180.0f / CV_PI_F;
@ -961,7 +994,6 @@ void icvCalcOrientation(
}
}
__kernel
void icvSetUpright(
__global float * keypoints,
@ -1035,8 +1067,8 @@ inline float linearFilter(
float out = 0.0f;
const int x1 = convert_int_rtn(x);
const int y1 = convert_int_rtn(y);
const int x1 = round(x);
const int y1 = round(y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;

View File

@ -46,6 +46,7 @@
#ifdef HAVE_OPENCV_OCL
#include <cstdio>
#include <sstream>
#include "opencl_kernels.hpp"
using namespace cv;
@ -57,18 +58,25 @@ namespace cv
{
namespace ocl
{
// The number of degrees between orientation samples in calcOrientation
const static int ORI_SEARCH_INC = 5;
// The local size of the calcOrientation kernel
const static int ORI_LOCAL_SIZE = (360 / ORI_SEARCH_INC);
static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
{
char optBuf [100] = {0};
char * optBufPtr = optBuf;
std::stringstream optsStr;
optsStr << "-D ORI_LOCAL_SIZE=" << ORI_LOCAL_SIZE << " ";
optsStr << "-D ORI_SEARCH_INC=" << ORI_SEARCH_INC << " ";
cl_kernel kernel;
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optsStr.str().c_str());
size_t wave_size = queryWaveFrontSize(kernel);
CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr);
optsStr << "-D WAVE_SIZE=" << wave_size;
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optsStr.str().c_str());
}
}
}
@ -601,8 +609,8 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&surf_.sum.step));
size_t localThreads[3] = {32, 4, 1};
size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
size_t localThreads[3] = {ORI_LOCAL_SIZE, 1, 1};
size_t globalThreads[3] = {nFeatures * localThreads[0], 1, 1};
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
}

View File

@ -287,7 +287,7 @@ ocl::createSeparableLinearFilter_GPU
----------------------------------------
Creates a separable linear filter engine.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT)
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
:param srcType: Source array type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
@ -303,6 +303,8 @@ Creates a separable linear filter engine.
:param bordertype: Pixel extrapolation method.
:param imgSize: Source image size to choose optimal method for processing.
.. seealso:: :ocv:func:`ocl::getLinearRowFilter_GPU`, :ocv:func:`ocl::getLinearColumnFilter_GPU`, :ocv:func:`createSeparableLinearFilter`
@ -334,7 +336,7 @@ ocl::createDerivFilter_GPU
------------------------------
Creates a filter engine for the generalized Sobel operator.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT )
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
:param srcType: Source image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
@ -348,6 +350,8 @@ Creates a filter engine for the generalized Sobel operator.
:param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
:param imgSize: Source image size to choose optimal method for processing.
.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createDerivFilter`
@ -405,7 +409,7 @@ ocl::createGaussianFilter_GPU
---------------------------------
Creates a Gaussian filter engine.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT)
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
:param type: Source and destination image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` are supported.
@ -417,6 +421,8 @@ Creates a Gaussian filter engine.
:param bordertype: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
:param imgSize: Source image size to choose optimal method for processing.
.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createGaussianFilter`
ocl::GaussianBlur

View File

@ -695,17 +695,17 @@ namespace cv
//! returns the separable linear filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
//! returns the separable filter engine with the specified filters
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
const Ptr<BaseColumnFilter_GPU> &columnFilter);
//! returns the Gaussian filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
//! returns filter engine for the generalized Sobel operator
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) );
//! applies Laplacian operator to the image
// supports only ksize = 1 and ksize = 3
@ -1439,8 +1439,10 @@ namespace cv
oclMat Dx_;
oclMat Dy_;
oclMat eig_;
oclMat eig_minmax_;
oclMat minMaxbuf_;
oclMat tmpCorners_;
oclMat counter_;
};
inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,

View File

@ -56,8 +56,19 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
{
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
int pixels_per_work_item = 1;
String build_options = format("-D DEPTH_%d", src.depth());
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
{
if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
pixels_per_work_item = 4;
else if (src.cols % 2 == 0)
pixels_per_work_item = 2;
else
pixels_per_work_item = 1;
}
String build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), src.oclchannels(), bidx, pixels_per_work_item);
if (!additionalOptions.empty())
build_options = build_options + additionalOptions;
@ -66,7 +77,6 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
@ -77,6 +87,73 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
if (!data2.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
#else
size_t lt[3] = { 16, 16, 1 };
#endif
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void toHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(),
const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
{
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d", src.depth(), src.oclchannels(), bidx);
if (!additionalOptions.empty())
build_options += additionalOptions;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data1.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data1.data ));
if (!data2.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
#else
size_t lt[3] = { 16, 16, 1 };
#endif
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void fromGray_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
{
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
if (!additionalOptions.empty())
build_options += additionalOptions;
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
@ -89,7 +166,50 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
{
String build_options = format("-D DEPTH_%d -D dcn=%d", src.depth(), dst.channels());
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
int pixels_per_work_item = 1;
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
{
if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
pixels_per_work_item = 4;
else if (src.cols % 2 == 0)
pixels_per_work_item = 2;
else
pixels_per_work_item = 1;
}
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), dst.channels(), bidx, pixels_per_work_item);
if (!additionalOptions.empty())
build_options += additionalOptions;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
#else
size_t lt[3] = { 16, 16, 1 };
#endif
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void toRGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
{
String build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
if (!additionalOptions.empty())
build_options = build_options + additionalOptions;
@ -101,7 +221,6 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
@ -119,10 +238,13 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
static void fromHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
{
String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s", src.depth(),
dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
if (!additionalOptions.empty())
build_options += additionalOptions;
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
@ -136,6 +258,36 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
#else
size_t lt[3] = { 16, 16, 1 };
#endif
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
{
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s",
src.depth(), dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
@ -147,8 +299,8 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
{
String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d",
src.depth(), greenbits, dst.channels());
String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d -D bidx=%d",
src.depth(), greenbits, dst.channels(), bidx);
int src_offset = src.offset >> 1, src_step = src.step >> 1;
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step / dst.elemSize1();
@ -157,7 +309,6 @@ static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
@ -174,8 +325,8 @@ static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
{
String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d",
src.depth(), greenbits, src.channels());
String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d -D bidx=%d",
src.depth(), greenbits, src.channels(), bidx);
int src_offset = (int)src.offset, src_step = (int)src.step;
int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1;
@ -184,7 +335,6 @@ static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenb
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
@ -272,7 +422,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
CV_Assert(scn == 1);
dcn = code == COLOR_GRAY2BGRA ? 4 : 3;
dst.create(sz, CV_MAKETYPE(depth, dcn));
toRGB_caller(src, dst, 0, "Gray2RGB");
fromGray_caller(src, dst, 0, "Gray2RGB");
break;
}
case COLOR_BGR2YUV: case COLOR_RGB2YUV:
@ -303,7 +453,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
Size dstSz(sz.width, sz.height * 2 / 3);
dst.create(dstSz, CV_MAKETYPE(depth, dcn));
toRGB_caller(src, dst, bidx, "YUV2RGBA_NV12");
toRGB_NV12_caller(src, dst, bidx, "YUV2RGBA_NV12");
break;
}
case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb:
@ -460,11 +610,11 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
initialized = true;
}
fromRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
toHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
return;
}
fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
toHSV_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
break;
}
case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
@ -483,7 +633,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
dst.create(sz, CV_MAKETYPE(depth, dcn));
std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB";
toRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
fromHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
break;
}
case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA:

View File

@ -741,6 +741,135 @@ void cv::ocl::filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &ke
f->apply(src, dst);
}
const int optimizedSepFilterLocalSize = 16;
static void sepFilter2D_SinglePass(const oclMat &src, oclMat &dst,
const Mat &row_kernel, const Mat &col_kernel, int bordertype = BORDER_DEFAULT)
{
size_t lt2[3] = {optimizedSepFilterLocalSize, optimizedSepFilterLocalSize, 1};
size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
unsigned int src_pitch = src.step;
unsigned int dst_pitch = dst.step;
int src_offset_x = (src.offset % src.step) / src.elemSize();
int src_offset_y = src.offset / src.step;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.offset ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholecols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholerows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
String option = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d",(int)lt2[0], (int)lt2[1],
row_kernel.rows / 2, col_kernel.rows / 2 );
option += " -D KERNEL_MATRIX_X=";
for(int i=0; i<row_kernel.rows; i++)
option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &row_kernel.at<float>(i) ) );
option += "0x0";
option += " -D KERNEL_MATRIX_Y=";
for(int i=0; i<col_kernel.rows; i++)
option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &col_kernel.at<float>(i) ) );
option += "0x0";
switch(src.type())
{
case CV_8UC1:
option += " -D SRCTYPE=uchar -D CONVERT_SRCTYPE=convert_float -D WORKTYPE=float";
break;
case CV_32FC1:
option += " -D SRCTYPE=float -D CONVERT_SRCTYPE= -D WORKTYPE=float";
break;
case CV_8UC2:
option += " -D SRCTYPE=uchar2 -D CONVERT_SRCTYPE=convert_float2 -D WORKTYPE=float2";
break;
case CV_32FC2:
option += " -D SRCTYPE=float2 -D CONVERT_SRCTYPE= -D WORKTYPE=float2";
break;
case CV_8UC3:
option += " -D SRCTYPE=uchar3 -D CONVERT_SRCTYPE=convert_float3 -D WORKTYPE=float3";
break;
case CV_32FC3:
option += " -D SRCTYPE=float3 -D CONVERT_SRCTYPE= -D WORKTYPE=float3";
break;
case CV_8UC4:
option += " -D SRCTYPE=uchar4 -D CONVERT_SRCTYPE=convert_float4 -D WORKTYPE=float4";
break;
case CV_32FC4:
option += " -D SRCTYPE=float4 -D CONVERT_SRCTYPE= -D WORKTYPE=float4";
break;
default:
CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
break;
}
switch(dst.type())
{
case CV_8UC1:
option += " -D DSTTYPE=uchar -D CONVERT_DSTTYPE=convert_uchar_sat";
break;
case CV_8UC2:
option += " -D DSTTYPE=uchar2 -D CONVERT_DSTTYPE=convert_uchar2_sat";
break;
case CV_8UC3:
option += " -D DSTTYPE=uchar3 -D CONVERT_DSTTYPE=convert_uchar3_sat";
break;
case CV_8UC4:
option += " -D DSTTYPE=uchar4 -D CONVERT_DSTTYPE=convert_uchar4_sat";
break;
case CV_32FC1:
option += " -D DSTTYPE=float -D CONVERT_DSTTYPE=";
break;
case CV_32FC2:
option += " -D DSTTYPE=float2 -D CONVERT_DSTTYPE=";
break;
case CV_32FC3:
option += " -D DSTTYPE=float3 -D CONVERT_DSTTYPE=";
break;
case CV_32FC4:
option += " -D DSTTYPE=float4 -D CONVERT_DSTTYPE=";
break;
default:
CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
break;
}
switch(bordertype)
{
case cv::BORDER_CONSTANT:
option += " -D BORDER_CONSTANT";
break;
case cv::BORDER_REPLICATE:
option += " -D BORDER_REPLICATE";
break;
case cv::BORDER_REFLECT:
option += " -D BORDER_REFLECT";
break;
case cv::BORDER_REFLECT101:
option += " -D BORDER_REFLECT_101";
break;
case cv::BORDER_WRAP:
option += " -D BORDER_WRAP";
break;
default:
CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
break;
}
openCLExecuteKernel(src.clCxt, &filtering_sep_filter_singlepass, "sep_filter_singlepass", gt2, lt2, args,
-1, -1, option.c_str() );
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// SeparableFilter
@ -790,6 +919,35 @@ Ptr<FilterEngine_GPU> cv::ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter
return makePtr<SeparableFilterEngine_GPU>(rowFilter, columnFilter);
}
namespace
{
class SingleStepSeparableFilterEngine_GPU : public FilterEngine_GPU
{
public:
SingleStepSeparableFilterEngine_GPU( const Mat &rowKernel_, const Mat &columnKernel_, const int btype )
{
bordertype = btype;
rowKernel = rowKernel_;
columnKernel = columnKernel_;
}
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
{
normalizeROI(roi, Size(rowKernel.rows, columnKernel.rows), Point(-1,-1), src.size());
oclMat srcROI = src(roi);
oclMat dstROI = dst(roi);
sepFilter2D_SinglePass(src, dst, rowKernel, columnKernel, bordertype);
}
Mat rowKernel;
Mat columnKernel;
int bordertype;
};
}
static void GPUFilterBox(const oclMat &src, oclMat &dst,
Size &ksize, const Point anchor, const int borderType)
{
@ -1243,17 +1401,32 @@ Ptr<BaseColumnFilter_GPU> cv::ocl::getLinearColumnFilter_GPU(int /*bufType*/, in
}
Ptr<FilterEngine_GPU> cv::ocl::createSeparableLinearFilter_GPU(int srcType, int dstType,
const Mat &rowKernel, const Mat &columnKernel, const Point &anchor, double delta, int bordertype)
const Mat &rowKernel, const Mat &columnKernel, const Point &anchor, double delta, int bordertype, Size imgSize )
{
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(dstType);
int cn = CV_MAT_CN(srcType);
int bdepth = std::max(std::max(sdepth, ddepth), CV_32F);
int bufType = CV_MAKETYPE(bdepth, cn);
Context* clCxt = Context::getContext();
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, bordertype);
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, bordertype, delta);
//if image size is non-degenerate and large enough
//and if filter support is reasonable to satisfy larger local memory requirements,
//then we can use single pass routine to avoid extra runtime calls overhead
if( clCxt && clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) &&
rowKernel.rows <= 21 && columnKernel.rows <= 21 &&
(rowKernel.rows & 1) == 1 && (columnKernel.rows & 1) == 1 &&
imgSize.width > optimizedSepFilterLocalSize + (rowKernel.rows>>1) &&
imgSize.height > optimizedSepFilterLocalSize + (columnKernel.rows>>1) )
{
return Ptr<FilterEngine_GPU>(new SingleStepSeparableFilterEngine_GPU(rowKernel, columnKernel, bordertype));
}
else
{
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, bordertype);
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, bordertype, delta);
return createSeparableFilter_GPU(rowFilter, columnFilter);
return createSeparableFilter_GPU(rowFilter, columnFilter);
}
}
void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype)
@ -1277,16 +1450,16 @@ void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, delta, bordertype);
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, delta, bordertype, src.size());
f->apply(src, dst);
}
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType)
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType, Size imgSize )
{
Mat kx, ky;
getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
return createSeparableLinearFilter_GPU(srcType, dstType,
kx, ky, Point(-1, -1), 0, borderType);
kx, ky, Point(-1, -1), 0, borderType, imgSize);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -1356,7 +1529,7 @@ void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, d
////////////////////////////////////////////////////////////////////////////////////////////////////
// Gaussian Filter
Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int bordertype)
Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int bordertype, Size imgSize)
{
int depth = CV_MAT_DEPTH(type);
@ -1383,7 +1556,7 @@ Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, do
else
ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F));
return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1, -1), 0.0, bordertype);
return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1, -1), 0.0, bordertype, imgSize);
}
void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2, int bordertype)
@ -1419,7 +1592,7 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
dst.create(src.size(), src.type());
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype, src.size());
f->apply(src, dst);
}

View File

@ -48,154 +48,142 @@
using namespace cv;
using namespace cv::ocl;
// currently sort procedure on the host is more efficient
static bool use_cpu_sorter = true;
namespace
// compact structure for corners
struct DefCorner
{
enum SortMethod
float eig; //eigenvalue of corner
short x; //x coordinate of corner point
short y; //y coordinate of corner point
} ;
// compare procedure for corner
//it is used for sort on the host side
struct DefCornerCompare
{
CPU_STL,
BITONIC,
SELECTION
};
const int GROUP_SIZE = 256;
template<SortMethod method>
struct Sorter
{
//typedef EigType;
};
//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
template<>
struct Sorter<CPU_STL>
{
typedef oclMat EigType;
static cv::Mutex cs;
static Mat mat_eig;
//prototype
static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
bool operator()(const DefCorner a, const DefCorner b) const
{
float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0]));
float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
return v1 > v2;
}
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
{
cv::AutoLock lock(cs);
//temporarily use STL's sort function
Mat mat_corners = corners;
mat_eig = eig_tex;
std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
corners = mat_corners;
return a.eig > b.eig;
}
};
cv::Mutex Sorter<CPU_STL>::cs;
cv::Mat Sorter<CPU_STL>::mat_eig;
template<>
struct Sorter<BITONIC>
// sort corner point using opencl bitonicosrt implementation
static void sortCorners_caller(oclMat& corners, const int count)
{
typedef TextureCL EigType;
Context * cxt = Context::getContext();
int GS = count/2;
int LS = min(255,GS);
size_t globalThreads[3] = {GS, 1, 1};
size_t localThreads[3] = {LS, 1, 1};
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
// 2^numStages should be equal to count or the output is invalid
int numStages = 0;
for(int i = count; i > 1; i >>= 1)
{
Context * cxt = Context::getContext();
size_t globalThreads[3] = {count / 2, 1, 1};
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
// 2^numStages should be equal to count or the output is invalid
int numStages = 0;
for(int i = count; i > 1; i >>= 1)
++numStages;
}
const int argc = 4;
std::vector< std::pair<size_t, const void *> > args(argc);
std::string kernelname = "sortCorners_bitonicSort";
args[0] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
args[1] = std::make_pair(sizeof(cl_int), (void *)&count);
for(int stage = 0; stage < numStages; ++stage)
{
args[2] = std::make_pair(sizeof(cl_int), (void *)&stage);
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
{
++numStages;
}
const int argc = 5;
std::vector< std::pair<size_t, const void *> > args(argc);
String kernelname = "sortCorners_bitonicSort";
args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
for(int stage = 0; stage < numStages; ++stage)
{
args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
{
args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
}
args[3] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
}
}
};
}
template<>
struct Sorter<SELECTION>
{
typedef TextureCL EigType;
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
{
Context * cxt = Context::getContext();
size_t globalThreads[3] = {count, 1, 1};
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
std::vector< std::pair<size_t, const void *> > args;
//local
String kernelname = "sortCorners_selectionSortLocal";
int lds_size = GROUP_SIZE * sizeof(cl_float2);
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
args.push_back( std::make_pair( lds_size, (void*)NULL) );
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
//final
kernelname = "sortCorners_selectionSortFinal";
args.pop_back();
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
}
};
int findCorners_caller(
const TextureCL& eig,
const float threshold,
const oclMat& mask,
oclMat& corners,
const int max_count)
// find corners on matrix and put it into array
static void findCorners_caller(
const oclMat& eig_mat, //input matrix worth eigenvalues
oclMat& eigMinMax, //input with min and max values of eigenvalues
const float qualityLevel,
const oclMat& mask,
oclMat& corners, //output array with detected corners
oclMat& counter) //output value with number of detected corners, have to be 0 before call
{
String opt;
std::vector<int> k;
Context * cxt = Context::getContext();
std::vector< std::pair<size_t, const void*> > args;
String kernelname = "findCorners";
const int mask_strip = mask.step / mask.elemSize1();
oclMat g_counter(1, 1, CV_32SC1);
g_counter.setTo(0);
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&(eig_mat.data)));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eig ));
int src_pitch = (int)eig_mat.step;
args.push_back(std::make_pair( sizeof(cl_int), (void*)&src_pitch ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&mask.data ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&corners.data ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&mask_strip));
args.push_back(std::make_pair( sizeof(cl_float), (void*)&threshold ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.rows ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.cols ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&max_count ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&g_counter.data ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eigMinMax.data ));
args.push_back(std::make_pair( sizeof(cl_float), (void*)&qualityLevel ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.rows ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.cols ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&corners.cols ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&counter.data ));
size_t globalThreads[3] = {eig.cols, eig.rows, 1};
size_t globalThreads[3] = {eig_mat.cols, eig_mat.rows, 1};
size_t localThreads[3] = {16, 16, 1};
if(!mask.empty())
opt += " -D WITH_MASK=1";
const char * opt = mask.empty() ? "" : "-D WITH_MASK";
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1, opt);
return std::min(Mat(g_counter).at<int>(0), max_count);
openCLExecuteKernel(cxt, &imgproc_gftt, "findCorners", globalThreads, localThreads, args, -1, -1, opt.c_str());
}
static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
{
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
CV_Assert(groupnum != 0);
int dbsize = groupnum * 2 * src.elemSize();
ensureSizeIsEnough(1, dbsize, CV_8UC1, dst);
cl_mem dst_data = reinterpret_cast<cl_mem>(dst.data);
int all_cols = src.step / src.elemSize();
int pre_cols = (src.offset % src.step) / src.elemSize();
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
int invalid_cols = pre_cols + sec_cols;
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
int offset = src.offset / src.elemSize();
{// first parallel pass
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
size_t globalThreads[3] = {groupnum * 256, 1, 1};
size_t localThreads[3] = {256, 1, 1};
openCLExecuteKernel(src.clCxt, &arithm_minMax, "arithm_op_minMax", globalThreads, localThreads,
args, -1, -1, "-D T=float -D DEPTH_5");
}
{// run final "serial" kernel to find accumulate results from threads and reset corner counter
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&tozero.data ));
size_t globalThreads[3] = {1, 1, 1};
size_t localThreads[3] = {1, 1, 1};
openCLExecuteKernel(src.clCxt, &imgproc_gftt, "arithm_op_minMax_final", globalThreads, localThreads,
args, -1, -1);
}
}
}//unnamed namespace
void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
{
@ -205,67 +193,99 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
ensureSizeIsEnough(image.size(), CV_32F, eig_);
if (useHarrisDetector)
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
cornerHarris_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
else
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
double maxVal = 0;
minMax(eig_, NULL, &maxVal);
ensureSizeIsEnough(1,1, CV_32SC1, counter_);
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
// find max eigenvalue and reset detected counters
minMaxEig_caller(eig_,eig_minmax_,counter_);
Ptr<TextureCL> eig_tex = bindTexturePtr(eig_);
int total = findCorners_caller(
*eig_tex,
static_cast<float>(maxVal * qualityLevel),
// allocate buffer for kernels
int corner_array_size = std::max(1024, static_cast<int>(image.size().area() * 0.05));
if(!use_cpu_sorter)
{ // round to 2^n
unsigned int n=1;
for(n=1;n<(unsigned int)corner_array_size;n<<=1);
corner_array_size = (int)n;
ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
// set to 0 to be able use bitonic sort on whole 2^n array
tmpCorners_.setTo(0);
}
else
{
ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
}
int total = tmpCorners_.cols; // by default the number of corner is full array
std::vector<DefCorner> tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm
//find points with high eigenvalue and put it into the output array
findCorners_caller(
eig_,
eig_minmax_,
static_cast<float>(qualityLevel),
mask,
tmpCorners_,
tmpCorners_.cols);
counter_);
if(!use_cpu_sorter)
{// sort detected corners on deivce side
sortCorners_caller(tmpCorners_, corner_array_size);
}
else
{// send non-blocking request to read real non-zero number of corners to sort it on the HOST side
openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0,sizeof(int), &total, 0, NULL, NULL));
}
//blocking read whole corners array (sorted or not sorted)
openCLReadBuffer(tmpCorners_.clCxt,(cl_mem)tmpCorners_.data,&tmp[0],tmpCorners_.cols*sizeof(DefCorner));
if (total == 0)
{
{// check for trivial case
corners.release();
return;
}
if(use_cpu_sorter)
{
Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total);
}
else
{
//if total is power of 2
if(((total - 1) & (total)) == 0)
{
Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
}
else
{
Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
}
{// sort detected corners on cpu side.
tmp.resize(total);
std::sort(tmp.begin(), tmp.end(), DefCornerCompare());
}
//estimate maximal size of final output array
int total_max = maxCorners > 0 ? std::min(maxCorners, total) : total;
int D2 = (int)ceil(minDistance * minDistance);
// allocate output buffer
std::vector<Point2f> tmp2;
tmp2.reserve(total_max);
if (minDistance < 1)
{
Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1);
tmpCorners_(roi_range).copyTo(corners);
{// we have not distance restriction. then just copy with conversion maximal allowed points into output array
for(int i=0;i<total_max && tmp[i].eig>0.0f;++i)
{
tmp2.push_back(Point2f(tmp[i].x,tmp[i].y));
}
}
else
{
std::vector<Point2f> tmp(total);
downloadPoints(tmpCorners_, tmp);
std::vector<Point2f> tmp2;
tmp2.reserve(total);
{// we have distance restriction. then start coping to output array from the first element and check distance for each next one
const int cell_size = cvRound(minDistance);
const int grid_width = (image.cols + cell_size - 1) / cell_size;
const int grid_height = (image.rows + cell_size - 1) / cell_size;
std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
std::vector< std::vector<Point2i> > grid(grid_width * grid_height);
for (int i = 0; i < total; ++i)
for (int i = 0; i < total ; ++i)
{
Point2f p = tmp[i];
DefCorner p = tmp[i];
if(p.eig<=0.0f)
break; // condition to stop that is needed for GPU bitonic sort usage.
bool good = true;
@ -287,40 +307,42 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
{
for (int xx = x1; xx <= x2; xx++)
{
std::vector<Point2f>& m = grid[yy * grid_width + xx];
if (!m.empty())
std::vector<Point2i>& m = grid[yy * grid_width + xx];
if (m.empty())
continue;
for(size_t j = 0; j < m.size(); j++)
{
for(size_t j = 0; j < m.size(); j++)
{
float dx = p.x - m[j].x;
float dy = p.y - m[j].y;
int dx = p.x - m[j].x;
int dy = p.y - m[j].y;
if (dx * dx + dy * dy < minDistance * minDistance)
{
good = false;
goto break_out;
}
if (dx * dx + dy * dy < D2)
{
good = false;
goto break_out_;
}
}
}
}
break_out:
break_out_:
if(good)
{
grid[y_cell * grid_width + x_cell].push_back(p);
grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x,p.y));
tmp2.push_back(p);
tmp2.push_back(Point2f(p.x,p.y));
if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
break;
}
}
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
}
int final_size = static_cast<int>(tmp2.size());
if(final_size>0)
corners.upload(Mat(1, final_size, CV_32FC2, &tmp2[0]));
else
corners.release();
}
void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v)
{

View File

@ -866,16 +866,17 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
if(gcascade->is_stump_based && gsum.clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE))
{
//setup local group size
localThreads[0] = 8;
localThreads[1] = 16;
//setup local group size for "pixel step" = 1
localThreads[0] = 16;
localThreads[1] = 32;
localThreads[2] = 1;
//init maximal number of workgroups
//calc maximal number of workgroups
int WGNumX = 1+(sizev[0].width /(localThreads[0]));
int WGNumY = 1+(sizev[0].height/(localThreads[1]));
int WGNumZ = loopcount;
int WGNum = 0; //accurate number of non -empty workgroups
int WGNumTotal = 0; //accurate number of non-empty workgroups
int WGNumSampled = 0; //accurate number of workgroups processed only 1/4 part of all pixels. it is made for large images with scale <= 2
oclMat oclWGInfo(1,sizeof(cl_int4) * WGNumX*WGNumY*WGNumZ,CV_8U);
{
cl_int4* pWGInfo = (cl_int4*)clEnqueueMapBuffer(getClCommandQueue(oclWGInfo.clCxt),(cl_mem)oclWGInfo.datastart,true,CL_MAP_WRITE, 0, oclWGInfo.step, 0,0,0,&status);
@ -895,12 +896,16 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
if(gx>=(Width-cascade->orig_window_size.width))
continue; // no data to process
if(scaleinfo[z].factor<=2)
{
WGNumSampled++;
}
// save no-empty workgroup info into array
pWGInfo[WGNum].s[0] = scaleinfo[z].width_height;
pWGInfo[WGNum].s[1] = (gx << 16) | gy;
pWGInfo[WGNum].s[2] = scaleinfo[z].imgoff;
memcpy(&(pWGInfo[WGNum].s[3]),&(scaleinfo[z].factor),sizeof(float));
WGNum++;
pWGInfo[WGNumTotal].s[0] = scaleinfo[z].width_height;
pWGInfo[WGNumTotal].s[1] = (gx << 16) | gy;
pWGInfo[WGNumTotal].s[2] = scaleinfo[z].imgoff;
memcpy(&(pWGInfo[WGNumTotal].s[3]),&(scaleinfo[z].factor),sizeof(float));
WGNumTotal++;
}
}
}
@ -908,13 +913,8 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
pWGInfo = NULL;
}
// setup global sizes to have linear array of workgroups with WGNum size
globalThreads[0] = localThreads[0]*WGNum;
globalThreads[1] = localThreads[1];
globalThreads[2] = 1;
#define NODE_SIZE 12
// pack node info to have less memory loads
// pack node info to have less memory loads on the device side
oclMat oclNodesPK(1,sizeof(cl_int) * NODE_SIZE * nodenum,CV_8U);
{
cl_int status;
@ -963,8 +963,6 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
options += format(" -D WND_SIZE_X=%d",cascade->orig_window_size.width);
options += format(" -D WND_SIZE_Y=%d",cascade->orig_window_size.height);
options += format(" -D STUMP_BASED=%d",gcascade->is_stump_based);
options += format(" -D LSx=%d",localThreads[0]);
options += format(" -D LSy=%d",localThreads[1]);
options += format(" -D SPLITNODE=%d",splitnode);
options += format(" -D SPLITSTAGE=%d",splitstage);
options += format(" -D OUTPUTSZ=%d",outputsz);
@ -972,8 +970,39 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
// init candiate global count by 0
int pattern = 0;
openCLSafeCall(clEnqueueWriteBuffer(qu, candidatebuffer, 1, 0, 1 * sizeof(pattern),&pattern, 0, NULL, NULL));
// execute face detector
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, localThreads, args, -1, -1, options.c_str());
if(WGNumTotal>WGNumSampled)
{// small images and each pixel is processed
// setup global sizes to have linear array of workgroups with WGNum size
int pixelstep = 1;
size_t LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
globalThreads[0] = LS[0]*(WGNumTotal-WGNumSampled);
globalThreads[1] = LS[1];
globalThreads[2] = 1;
String options1 = options;
options1 += format(" -D PIXEL_STEP=%d",pixelstep);
options1 += format(" -D WGSTART=%d",WGNumSampled);
options1 += format(" -D LSx=%d",LS[0]);
options1 += format(" -D LSy=%d",LS[1]);
// execute face detector
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options1.c_str());
}
if(WGNumSampled>0)
{// large images each 4th pixel is processed
// setup global sizes to have linear array of workgroups with WGNum size
int pixelstep = 2;
size_t LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
globalThreads[0] = LS[0]*WGNumSampled;
globalThreads[1] = LS[1];
globalThreads[2] = 1;
String options2 = options;
options2 += format(" -D PIXEL_STEP=%d",pixelstep);
options2 += format(" -D WGSTART=%d",0);
options2 += format(" -D LSx=%d",LS[0]);
options2 += format(" -D LSy=%d",LS[1]);
// execute face detector
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options2.c_str());
}
//read candidate buffer back and put it into host list
openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
assert(candidate[0]<outputsz);

View File

@ -76,6 +76,11 @@ namespace cv
int cdescr_width;
int cdescr_height;
// A shift value and type that allows qangle to be different
// sizes on different hardware
int qangle_step_shift;
int qangle_type;
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
int nblocks_win_x, int nblocks_win_y);
@ -153,6 +158,7 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
hog_device_cpu = true;
else
hog_device_cpu = false;
}
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
@ -213,7 +219,7 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
effect_size = img.size();
grad.create(img.size(), CV_32FC2);
qangle.create(img.size(), CV_8UC2);
qangle.create(img.size(), hog::qangle_type);
const size_t block_hist_size = getBlockHistogramSize();
const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
@ -1606,6 +1612,16 @@ void cv::ocl::device::hog::set_up_constants(int nbins,
int descr_size = descr_width * nblocks_win_y;
cdescr_size = descr_size;
qangle_type = CV_8UC2;
qangle_step_shift = 0;
// Some Intel devices have low single-byte access performance,
// so we change the datatype here.
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
{
qangle_type = CV_32SC2;
qangle_step_shift = 2;
}
}
void cv::ocl::device::hog::compute_hists(int nbins,
@ -1627,7 +1643,7 @@ void cv::ocl::device::hog::compute_hists(int nbins,
int blocks_total = img_block_width * img_block_height;
int grad_quadstep = grad.step >> 2;
int qangle_step = qangle.step;
int qangle_step = qangle.step >> qangle_step_shift;
int blocks_in_group = 4;
size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
@ -1892,7 +1908,7 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width,
char correctGamma = (correct_gamma) ? 1 : 0;
int img_step = img.step;
int grad_quadstep = grad.step >> 3;
int qangle_step = qangle.step >> 1;
int qangle_step = qangle.step >> (1 + qangle_step_shift);
args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
@ -1927,7 +1943,7 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width,
char correctGamma = (correct_gamma) ? 1 : 0;
int img_step = img.step >> 2;
int grad_quadstep = grad.step >> 3;
int qangle_step = qangle.step >> 1;
int qangle_step = qangle.step >> (1 + qangle_step_shift);
args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));

View File

@ -1035,67 +1035,117 @@ namespace cv
else
scale = 1. / scale;
if (ksize > 0)
const int sobel_lsz = 16;
if((src.type() == CV_8UC1 || src.type() == CV_32FC1) &&
(ksize==3 || ksize==5 || ksize==7 || ksize==-1) &&
src.wholerows > sobel_lsz + (ksize>>1) &&
src.wholecols > sobel_lsz + (ksize>>1))
{
Context* clCxt = Context::getContext();
if(clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) && src.type() == CV_8UC1 &&
src.cols % 8 == 0 && src.rows % 8 == 0 &&
ksize==3 &&
(borderType ==cv::BORDER_REFLECT ||
borderType == cv::BORDER_REPLICATE ||
borderType ==cv::BORDER_REFLECT101 ||
borderType ==cv::BORDER_WRAP))
Dx.create(src.size(), CV_32FC1);
Dy.create(src.size(), CV_32FC1);
CV_Assert(Dx.rows == Dy.rows && Dx.cols == Dy.cols);
size_t lt2[3] = {sobel_lsz, sobel_lsz, 1};
size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
unsigned int src_pitch = src.step;
unsigned int Dx_pitch = Dx.step;
unsigned int Dy_pitch = Dy.step;
int src_offset_x = (src.offset % src.step) / src.elemSize();
int src_offset_y = src.offset / src.step;
float _scale = scale;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.offset ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dx_pitch ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dy.offset ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dy_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholecols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholerows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.rows ));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&_scale ));
String option = cv::format("-D BLK_X=%d -D BLK_Y=%d",(int)lt2[0],(int)lt2[1]);
switch(src.type())
{
Dx.create(src.size(), CV_32FC1);
Dy.create(src.size(), CV_32FC1);
const unsigned int block_x = 8;
const unsigned int block_y = 8;
unsigned int src_pitch = src.step;
unsigned int dst_pitch = Dx.cols;
float _scale = scale;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
args.push_back( std::make_pair( sizeof(cl_float) , (void *)&_scale ));
size_t gt2[3] = {src.cols, src.rows, 1}, lt2[3] = {block_x, block_y, 1};
String option = "-D BLK_X=8 -D BLK_Y=8";
switch(borderType)
{
case cv::BORDER_REPLICATE:
option += " -D BORDER_REPLICATE";
break;
case cv::BORDER_REFLECT:
option += " -D BORDER_REFLECT";
break;
case cv::BORDER_REFLECT101:
option += " -D BORDER_REFLECT101";
break;
case cv::BORDER_WRAP:
option += " -D BORDER_WRAP";
break;
}
openCLExecuteKernel(src.clCxt, &imgproc_sobel3, "sobel3", gt2, lt2, args, -1, -1, option.c_str() );
case CV_8UC1:
option += " -D SRCTYPE=uchar";
break;
case CV_32FC1:
option += " -D SRCTYPE=float";
break;
}
else
switch(borderType)
{
case cv::BORDER_CONSTANT:
option += " -D BORDER_CONSTANT";
break;
case cv::BORDER_REPLICATE:
option += " -D BORDER_REPLICATE";
break;
case cv::BORDER_REFLECT:
option += " -D BORDER_REFLECT";
break;
case cv::BORDER_REFLECT101:
option += " -D BORDER_REFLECT_101";
break;
case cv::BORDER_WRAP:
option += " -D BORDER_WRAP";
break;
default:
CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
break;
}
String kernel_name;
switch(ksize)
{
case -1:
option += " -D SCHARR";
kernel_name = "sobel3";
break;
case 3:
kernel_name = "sobel3";
break;
case 5:
kernel_name = "sobel5";
break;
case 7:
kernel_name = "sobel7";
break;
default:
CV_Error(CV_StsBadFlag, "Kernel size is not supported!");
break;
}
openCLExecuteKernel(src.clCxt, &imgproc_sobel3, kernel_name, gt2, lt2, args, -1, -1, option.c_str() );
}
else
{
if (ksize > 0)
{
Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType);
Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType);
}
}
else
{
Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType);
Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType);
else
{
Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType);
Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType);
}
}
CV_Assert(Dx.offset == 0 && Dy.offset == 0);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,185 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro for border type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef BORDER_CONSTANT
//CCCCCC|abcdefgh|CCCCCCC
#define EXTRAPOLATE(x, maxV)
#elif defined BORDER_REPLICATE
//aaaaaa|abcdefgh|hhhhhhh
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = max(min((x), (maxV) - 1), 0); \
}
#elif defined BORDER_WRAP
//cdefgh|abcdefgh|abcdefg
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = ( (x) + (maxV) ) % (maxV); \
}
#elif defined BORDER_REFLECT
//fedcba|abcdefgh|hgfedcb
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min(((maxV)-1)*2-(x)+1, max((x),-(x)-1) ); \
}
#elif defined BORDER_REFLECT_101
//gfedcb|abcdefgh|gfedcba
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min(((maxV)-1)*2-(x), max((x),-(x)) ); \
}
#else
#error No extrapolation method
#endif
#define SRC(_x,_y) CONVERT_SRCTYPE(((global SRCTYPE*)(Src+(_y)*SrcPitch))[_x])
#ifdef BORDER_CONSTANT
//CCCCCC|abcdefgh|CCCCCCC
#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
#else
#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
#endif
#define DST(_x,_y) (((global DSTTYPE*)(Dst+DstOffset+(_y)*DstPitch))[_x])
//horizontal and vertical filter kernels
//should be defined on host during compile time to avoid overhead
__constant uint mat_kernelX[] = {KERNEL_MATRIX_X};
__constant uint mat_kernelY[] = {KERNEL_MATRIX_Y};
__kernel __attribute__((reqd_work_group_size(BLK_X,BLK_Y,1))) void sep_filter_singlepass
(
__global uchar* Src,
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* Dst,
const int DstOffset,
const uint DstPitch,
int width,
int height,
int dstWidth,
int dstHeight
)
{
//RADIUSX, RADIUSY are filter dimensions
//BLK_X, BLK_Y are local wrogroup sizes
//all these should be defined on host during compile time
//first lsmem array for source pixels used in first pass,
//second lsmemDy for storing first pass results
__local WORKTYPE lsmem[BLK_Y+2*RADIUSY][BLK_X+2*RADIUSX];
__local WORKTYPE lsmemDy[BLK_Y][BLK_X+2*RADIUSX];
//get local and global ids - used as image and local memory array indexes
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
//calculate pixel position in source image taking image offset into account
int srcX = x + srcOffsetX - RADIUSX;
int srcY = y + srcOffsetY - RADIUSY;
int xb = srcX;
int yb = srcY;
//extrapolate coordinates, if needed
//and read my own source pixel into local memory
//with account for extra border pixels, which will be read by starting workitems
int clocY = liy;
int cSrcY = srcY;
do
{
int yb = cSrcY;
EXTRAPOLATE(yb, (height));
int clocX = lix;
int cSrcX = srcX;
do
{
int xb = cSrcX;
EXTRAPOLATE(xb,(width));
lsmem[clocY][clocX] = ELEM(xb, yb, (width), (height), 0 );
clocX += BLK_X;
cSrcX += BLK_X;
}
while(clocX < BLK_X+(RADIUSX*2));
clocY += BLK_Y;
cSrcY += BLK_Y;
}
while(clocY < BLK_Y+(RADIUSY*2));
barrier(CLK_LOCAL_MEM_FENCE);
//do vertical filter pass
//and store intermediate results to second local memory array
int i;
WORKTYPE sum = 0.0f;
int clocX = lix;
do
{
sum = 0.0f;
for(i=0; i<=2*RADIUSY; i++)
sum = mad(lsmem[liy+i][clocX], as_float(mat_kernelY[i]), sum);
lsmemDy[liy][clocX] = sum;
clocX += BLK_X;
}
while(clocX < BLK_X+(RADIUSX*2));
barrier(CLK_LOCAL_MEM_FENCE);
//if this pixel happened to be out of image borders because of global size rounding,
//then just return
if( x >= dstWidth || y >=dstHeight ) return;
//do second horizontal filter pass
//and calculate final result
sum = 0.0f;
for(i=0; i<=2*RADIUSX; i++)
sum = mad(lsmemDy[liy][lix+i], as_float(mat_kernelX[i]), sum);
//store result into destination image
DST(x,y) = CONVERT_DSTTYPE(sum);
}

View File

@ -126,13 +126,11 @@ __kernel void gpuRunHaarClassifierCascadePacked(
)
{
// this version used information provided for each workgroup
// no empty WG
int gid = (int)get_group_id(0);
int lid_x = (int)get_local_id(0);
int lid_y = (int)get_local_id(1);
int lid = lid_y*LSx+lid_x;
int4 WGInfo = pWGInfo[gid];
int4 WGInfo = pWGInfo[WGSTART+gid];
int GroupX = (WGInfo.y >> 16)&0xFFFF;
int GroupY = (WGInfo.y >> 0 )& 0xFFFF;
int Width = (WGInfo.x >> 16)&0xFFFF;
@ -140,8 +138,8 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int ImgOffset = WGInfo.z;
float ScaleFactor = as_float(WGInfo.w);
#define DATA_SIZE_X (LSx+WND_SIZE_X)
#define DATA_SIZE_Y (LSy+WND_SIZE_Y)
#define DATA_SIZE_X (PIXEL_STEP*LSx+WND_SIZE_X)
#define DATA_SIZE_Y (PIXEL_STEP*LSy+WND_SIZE_Y)
#define DATA_SIZE (DATA_SIZE_X*DATA_SIZE_Y)
local int SumL[DATA_SIZE];
@ -165,9 +163,11 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int4 info1 = p;
int4 info2 = pq;
{
int xl = lid_x;
int yl = lid_y;
// calc processed ROI coordinate in local mem
int xl = lid_x*PIXEL_STEP;
int yl = lid_y*PIXEL_STEP;
{// calc variance_norm_factor for all stages
int OffsetLocal = yl * DATA_SIZE_X + xl;
int OffsetGlobal = (GroupY+yl)* pixelstep + (GroupX+xl);
@ -194,13 +194,13 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int result = (1.0f>0.0f);
for(int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++ )
{// iterate until candidate is exist
{// iterate until candidate is valid
float stage_sum = 0.0f;
__global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
int lcl_off = (yl*DATA_SIZE_X)+(xl);
int stagecount = stageinfo->count;
float stagethreshold = stageinfo->threshold;
int lcl_off = (lid_y*DATA_SIZE_X)+(lid_x);
for(int nodeloop = 0; nodeloop < stagecount; nodecounter++,nodeloop++ )
{
// simple macro to extract shorts from int
@ -212,7 +212,7 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int4 n1 = pN[1];
int4 n2 = pN[2];
float nodethreshold = as_float(n2.y) * variance_norm_factor;
// calc sum of intensity pixels according to node information
// calc sum of intensity pixels according to classifier node information
float classsum =
(SumL[M0(n0.x)+lcl_off] - SumL[M1(n0.x)+lcl_off] - SumL[M0(n0.y)+lcl_off] + SumL[M1(n0.y)+lcl_off]) * as_float(n1.z) +
(SumL[M0(n0.z)+lcl_off] - SumL[M1(n0.z)+lcl_off] - SumL[M0(n0.w)+lcl_off] + SumL[M1(n0.w)+lcl_off]) * as_float(n1.w) +
@ -228,8 +228,8 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int index = 1+atomic_inc((volatile global int*)candidate); //get index to write global data with face info
if(index<OUTPUTSZ)
{
int x = GroupX+lid_x;
int y = GroupY+lid_y;
int x = GroupX+xl;
int y = GroupY+yl;
int4 candidate_result;
candidate_result.x = convert_int_rtn(x*ScaleFactor);
candidate_result.y = convert_int_rtn(y*ScaleFactor);

View File

@ -46,33 +46,26 @@
#ifndef WITH_MASK
#define WITH_MASK 0
#endif
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
{
return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
}
inline float ELEM_FLT2(image2d_t _eig, float2 pt)
{
return read_imagef(_eig, sampler, pt).x;
}
//macro to read eigenvalue matrix
#define GET_SRC_32F(_x, _y) ((__global const float*)(eig + (_y)*eig_pitch))[_x]
__kernel
void findCorners
(
image2d_t eig,
__global const char * mask,
__global float2 * corners,
const int mask_strip,// in pixels
const float threshold,
const int rows,
const int cols,
const int max_count,
__global int * g_counter
__global const char* eig,
const int eig_pitch,
__global const char* mask,
__global float2* corners,
const int mask_strip,// in pixels
__global const float* pMinMax,
const float qualityLevel,
const int rows,
const int cols,
const int max_count,
__global int* g_counter
)
{
float threshold = qualityLevel*pMinMax[1];
const int j = get_global_id(0);
const int i = get_global_id(1);
@ -82,39 +75,42 @@ __kernel
#endif
)
{
const float val = ELEM_INT2(eig, j, i);
const float val = GET_SRC_32F(j, i);
if (val > threshold)
{
float maxVal = val;
maxVal = fmax(GET_SRC_32F(j - 1, i - 1), maxVal);
maxVal = fmax(GET_SRC_32F(j , i - 1), maxVal);
maxVal = fmax(GET_SRC_32F(j + 1, i - 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal);
maxVal = fmax(GET_SRC_32F(j - 1, i), maxVal);
maxVal = fmax(GET_SRC_32F(j + 1, i), maxVal);
maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal);
maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal);
maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal);
maxVal = fmax(GET_SRC_32F(j - 1, i + 1), maxVal);
maxVal = fmax(GET_SRC_32F(j , i + 1), maxVal);
maxVal = fmax(GET_SRC_32F(j + 1, i + 1), maxVal);
if (val == maxVal)
{
const int ind = atomic_inc(g_counter);
if (ind < max_count)
corners[ind] = (float2)(j, i);
{// pack and store eigenvalue and its coordinates
corners[ind].x = val;
corners[ind].y = as_float(j|(i<<16));
}
}
}
}
}
#undef GET_SRC_32F
//bitonic sort
__kernel
void sortCorners_bitonicSort
(
image2d_t eig,
__global float2 * corners,
const int count,
const int stage,
@ -140,8 +136,8 @@ __kernel
const float2 leftPt = corners[leftId];
const float2 rightPt = corners[rightId];
const float leftVal = ELEM_FLT2(eig, leftPt);
const float rightVal = ELEM_FLT2(eig, rightPt);
const float leftVal = leftPt.x;
const float rightVal = rightPt.x;
const bool compareResult = leftVal > rightVal;
@ -152,124 +148,22 @@ __kernel
corners[rightId] = sortOrder ? greater : lesser;
}
//selection sort for gfft
//kernel is ported from Bolt library:
//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
// Local sort will firstly sort elements of each workgroup using selection sort
// its performance is O(n)
__kernel
void sortCorners_selectionSortLocal
(
image2d_t eig,
__global float2 * corners,
const int count,
__local float2 * scratch
)
// this is simple short serial kernel that makes some short reduction and initialization work
// it makes HOST like work to avoid additional sync with HOST to do this short work
// data - input/output float2.
// input data are sevral (min,max) pairs
// output data is one reduced (min,max) pair
// g_counter - counter that have to be initialized by 0 for next findCorner call.
__kernel void arithm_op_minMax_final(__global float * data, int groupnum,__global int * g_counter)
{
int i = get_local_id(0); // index in workgroup
int numOfGroups = get_num_groups(0); // index in workgroup
int groupID = get_group_id(0);
int wg = get_local_size(0); // workgroup size = block size
int n; // number of elements to be processed for this work group
int offset = groupID * wg;
int same = 0;
corners += offset;
n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
float2 pt1, pt2;
pt1 = corners[min(i, n)];
scratch[i] = pt1;
barrier(CLK_LOCAL_MEM_FENCE);
if(i >= n)
g_counter[0] = 0;
float minVal = data[0];
float maxVal = data[groupnum];
for(int i=1;i<groupnum;++i)
{
return;
minVal = min(minVal,data[i]);
maxVal = max(maxVal,data[i+groupnum]);
}
float val1 = ELEM_FLT2(eig, pt1);
float val2;
int pos = 0;
for (int j=0;j<n;++j)
{
pt2 = scratch[j];
val2 = ELEM_FLT2(eig, pt2);
if(val2 > val1)
pos++;//calculate the rank of this element in this work group
else
{
if(val1 > val2)
continue;
else
{
// val1 and val2 are same
same++;
}
}
}
for (int j=0; j< same; j++)
corners[pos + j] = pt1;
}
__kernel
void sortCorners_selectionSortFinal
(
image2d_t eig,
__global float2 * corners,
const int count
)
{
const int i = get_local_id(0); // index in workgroup
const int numOfGroups = get_num_groups(0); // index in workgroup
const int groupID = get_group_id(0);
const int wg = get_local_size(0); // workgroup size = block size
int pos = 0, same = 0;
const int offset = get_group_id(0) * wg;
const int remainder = count - wg*(numOfGroups-1);
if((offset + i ) >= count)
return;
float2 pt1, pt2;
pt1 = corners[groupID*wg + i];
float val1 = ELEM_FLT2(eig, pt1);
float val2;
for(int j=0; j<numOfGroups-1; j++ )
{
for(int k=0; k<wg; k++)
{
pt2 = corners[j*wg + k];
val2 = ELEM_FLT2(eig, pt2);
if(val1 > val2)
break;
else
{
//Increment only if the value is not the same.
if( val2 > val1 )
pos++;
else
same++;
}
}
}
for(int k=0; k<remainder; k++)
{
pt2 = corners[(numOfGroups-1)*wg + k];
val2 = ELEM_FLT2(eig, pt2);
if(val1 > val2)
break;
else
{
//Don't increment if the value is the same.
//Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false
if(val2 > val1)
pos++;
else
same++;
}
}
for (int j=0; j< same; j++)
corners[pos + j] = pt1;
}
data[0] = minVal;
data[1] = maxVal;
}

View File

@ -1,45 +1,97 @@
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro for border type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef BORDER_REPLICATE
//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (l_edge) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (r_edge)-1 : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (t_edge) :(i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (b_edge)-1 :(addr))
#ifdef BORDER_CONSTANT
//CCCCCC|abcdefgh|CCCCCCC
#define EXTRAPOLATE(x, maxV)
#elif defined BORDER_REPLICATE
//aaaaaa|abcdefgh|hhhhhhh
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = max(min((x), (maxV) - 1), 0); \
}
#elif defined BORDER_WRAP
//cdefgh|abcdefgh|abcdefg
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = ( (x) + (maxV) ) % (maxV); \
}
#elif defined BORDER_REFLECT
//fedcba|abcdefgh|hgfedcb
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min( mad24((maxV)-1,2,-(x))+1 , max((x),-(x)-1) ); \
}
#elif defined BORDER_REFLECT_101
//gfedcb|abcdefgh|gfedcba
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min( mad24((maxV)-1,2,-(x)), max((x),-(x)) ); \
}
#else
#error No extrapolation method
#endif
#ifdef BORDER_REFLECT
//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i)-1 : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i)-1 : (i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr))
#define SRC(_x,_y) convert_float(((global SRCTYPE*)(Src+(_y)*SrcPitch))[_x])
#ifdef BORDER_CONSTANT
//CCCCCC|abcdefgh|CCCCCCC
#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
#else
#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
#endif
#ifdef BORDER_REFLECT101
//BORDER_REFLECT101: gfedcb|abcdefgh|gfedcba
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i) : (i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
#endif
#define DSTX(_x,_y) (((global float*)(DstX+DstXOffset+(_y)*DstXPitch))[_x])
#define DSTY(_x,_y) (((global float*)(DstY+DstYOffset+(_y)*DstYPitch))[_x])
#ifdef BORDER_WRAP
//BORDER_WRAP: cdefgh|abcdefgh|abcdefg
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (i)+(r_edge) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (i)+(b_edge) : (i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (i)-(b_edge) : (addr))
#endif
#define INIT_AND_READ_LOCAL_SOURCE(width, height, fill_const, kernel_border) \
int srcX = x + srcOffsetX - (kernel_border); \
int srcY = y + srcOffsetY - (kernel_border); \
int xb = srcX; \
int yb = srcY; \
\
EXTRAPOLATE(xb, (width)); \
EXTRAPOLATE(yb, (height)); \
lsmem[liy][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
\
if(lix < ((kernel_border)*2)) \
{ \
int xb = srcX+BLK_X; \
EXTRAPOLATE(xb,(width)); \
lsmem[liy][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
} \
if(liy< ((kernel_border)*2)) \
{ \
int yb = srcY+BLK_Y; \
EXTRAPOLATE(yb, (height)); \
lsmem[liy+BLK_Y][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
} \
if(lix<((kernel_border)*2) && liy<((kernel_border)*2)) \
{ \
int xb = srcX+BLK_X; \
int yb = srcY+BLK_Y; \
EXTRAPOLATE(xb,(width)); \
EXTRAPOLATE(yb,(height)); \
lsmem[liy+BLK_Y][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
}
__kernel void sobel3(
__global uchar* Src,
__global float* DstX,
__global float* DstY,
int width, int height,
uint srcStride, uint dstStride,
float scale
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* DstX,
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
{
__local float lsmem[BLK_Y+2][BLK_X+2];
@ -47,62 +99,249 @@ __kernel void sobel3(
int lix = get_local_id(0);
int liy = get_local_id(1);
int gix = get_group_id(0);
int giy = get_group_id(1);
int id_x = get_global_id(0);
int id_y = get_global_id(1);
lsmem[liy+1][lix+1] = convert_float(Src[ id_y * srcStride + id_x ]);
int id_y_h = ADDR_H(id_y-1, 0,height);
int id_y_b = ADDR_B(id_y+1, height,id_y+1);
int id_x_l = ADDR_L(id_x-1, 0,width);
int id_x_r = ADDR_R(id_x+1, width,id_x+1);
if(liy==0)
{
lsmem[0][lix+1]=convert_float(Src[ id_y_h * srcStride + id_x ]);
if(lix==0)
lsmem[0][0]=convert_float(Src[ id_y_h * srcStride + id_x_l ]);
else if(lix==BLK_X-1)
lsmem[0][BLK_X+1]=convert_float(Src[ id_y_h * srcStride + id_x_r ]);
}
else if(liy==BLK_Y-1)
{
lsmem[BLK_Y+1][lix+1]=convert_float(Src[ id_y_b * srcStride + id_x ]);
if(lix==0)
lsmem[BLK_Y+1][0]=convert_float(Src[ id_y_b * srcStride + id_x_l ]);
else if(lix==BLK_X-1)
lsmem[BLK_Y+1][BLK_X+1]=convert_float(Src[ id_y_b * srcStride + id_x_r ]);
}
if(lix==0)
lsmem[liy+1][0] = convert_float(Src[ id_y * srcStride + id_x_l ]);
else if(lix==BLK_X-1)
lsmem[liy+1][BLK_X+1] = convert_float(Src[ id_y * srcStride + id_x_r ]);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 1)
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float u1 = lsmem[liy][lix];
float u2 = lsmem[liy][lix+1];
float u3 = lsmem[liy][lix+2];
float m1 = lsmem[liy+1][lix];
float m2 = lsmem[liy+1][lix+1];
float m3 = lsmem[liy+1][lix+2];
float b1 = lsmem[liy+2][lix];
float b2 = lsmem[liy+2][lix+1];
float b3 = lsmem[liy+2][lix+2];
//m2 * scale;//
float dx = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1 );
DstX[ id_y * dstStride + id_x ] = dx * scale;
//calc and store dx and dy;//
#ifdef SCHARR
DSTX(x,y) = mad(10.0f, m3 - m1, 3.0f * (u3 - u1 + b3 - b1)) * scale;
DSTY(x,y) = mad(10.0f, b2 - u2, 3.0f * (b1 - u1 + b3 - u3)) * scale;
#else
DSTX(x,y) = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1) * scale;
DSTY(x,y) = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3) * scale;
#endif
}
float dy = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3);
DstY[ id_y * dstStride + id_x ] = dy * scale;
}
__kernel void sobel5(
__global uchar* Src,
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* DstX,
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
{
__local float lsmem[BLK_Y+4][BLK_X+4];
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 2)
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float t1 = lsmem[liy][lix];
float t2 = lsmem[liy][lix+1];
float t3 = lsmem[liy][lix+2];
float t4 = lsmem[liy][lix+3];
float t5 = lsmem[liy][lix+4];
float u1 = lsmem[liy+1][lix];
float u2 = lsmem[liy+1][lix+1];
float u3 = lsmem[liy+1][lix+2];
float u4 = lsmem[liy+1][lix+3];
float u5 = lsmem[liy+1][lix+4];
float m1 = lsmem[liy+2][lix];
float m2 = lsmem[liy+2][lix+1];
float m4 = lsmem[liy+2][lix+3];
float m5 = lsmem[liy+2][lix+4];
float l1 = lsmem[liy+3][lix];
float l2 = lsmem[liy+3][lix+1];
float l3 = lsmem[liy+3][lix+2];
float l4 = lsmem[liy+3][lix+3];
float l5 = lsmem[liy+3][lix+4];
float b1 = lsmem[liy+4][lix];
float b2 = lsmem[liy+4][lix+1];
float b3 = lsmem[liy+4][lix+2];
float b4 = lsmem[liy+4][lix+3];
float b5 = lsmem[liy+4][lix+4];
//calc and store dx and dy;//
DSTX(x,y) = scale *
mad(12.0f, m4 - m2,
mad(6.0f, m5 - m1,
mad(8.0f, u4 - u2 + l4 - l2,
mad(4.0f, u5 - u1 + l5 - l1,
mad(2.0f, t4 - t2 + b4 - b2, t5 - t1 + b5 - b1 )
)
)
)
);
DSTY(x,y) = scale *
mad(12.0f, l3 - u3,
mad(6.0f, b3 - t3,
mad(8.0f, l2 - u2 + l4 - u4,
mad(4.0f, b2 - t2 + b4 - t4,
mad(2.0f, l1 - u1 + l5 - u5, b1 - t1 + b5 - t5 )
)
)
)
);
}
__kernel void sobel7(
__global uchar* Src,
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* DstX,
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
{
__local float lsmem[BLK_Y+6][BLK_X+6];
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 3)
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float tt1 = lsmem[liy][lix];
float tt2 = lsmem[liy][lix+1];
float tt3 = lsmem[liy][lix+2];
float tt4 = lsmem[liy][lix+3];
float tt5 = lsmem[liy][lix+4];
float tt6 = lsmem[liy][lix+5];
float tt7 = lsmem[liy][lix+6];
float t1 = lsmem[liy+1][lix];
float t2 = lsmem[liy+1][lix+1];
float t3 = lsmem[liy+1][lix+2];
float t4 = lsmem[liy+1][lix+3];
float t5 = lsmem[liy+1][lix+4];
float t6 = lsmem[liy+1][lix+5];
float t7 = lsmem[liy+1][lix+6];
float u1 = lsmem[liy+2][lix];
float u2 = lsmem[liy+2][lix+1];
float u3 = lsmem[liy+2][lix+2];
float u4 = lsmem[liy+2][lix+3];
float u5 = lsmem[liy+2][lix+4];
float u6 = lsmem[liy+2][lix+5];
float u7 = lsmem[liy+2][lix+6];
float m1 = lsmem[liy+3][lix];
float m2 = lsmem[liy+3][lix+1];
float m3 = lsmem[liy+3][lix+2];
float m5 = lsmem[liy+3][lix+4];
float m6 = lsmem[liy+3][lix+5];
float m7 = lsmem[liy+3][lix+6];
float l1 = lsmem[liy+4][lix];
float l2 = lsmem[liy+4][lix+1];
float l3 = lsmem[liy+4][lix+2];
float l4 = lsmem[liy+4][lix+3];
float l5 = lsmem[liy+4][lix+4];
float l6 = lsmem[liy+4][lix+5];
float l7 = lsmem[liy+4][lix+6];
float b1 = lsmem[liy+5][lix];
float b2 = lsmem[liy+5][lix+1];
float b3 = lsmem[liy+5][lix+2];
float b4 = lsmem[liy+5][lix+3];
float b5 = lsmem[liy+5][lix+4];
float b6 = lsmem[liy+5][lix+5];
float b7 = lsmem[liy+5][lix+6];
float bb1 = lsmem[liy+6][lix];
float bb2 = lsmem[liy+6][lix+1];
float bb3 = lsmem[liy+6][lix+2];
float bb4 = lsmem[liy+6][lix+3];
float bb5 = lsmem[liy+6][lix+4];
float bb6 = lsmem[liy+6][lix+5];
float bb7 = lsmem[liy+6][lix+6];
//calc and store dx and dy
DSTX(x,y) = scale *
mad(100.0f, m5 - m3,
mad(80.0f, m6 - m2,
mad(20.0f, m7 - m1,
mad(75.0f, u5 - u3 + l5 - l3,
mad(60.0f, u6 - u2 + l6 - l2,
mad(15.0f, u7 - u1 + l7 - l1,
mad(30.0f, t5 - t3 + b5 - b3,
mad(24.0f, t6 - t2 + b6 - b2,
mad(6.0f, t7 - t1 + b7 - b1,
mad(5.0f, tt5 - tt3 + bb5 - bb3,
mad(4.0f, tt6 - tt2 + bb6 - bb2, tt7 - tt1 + bb7 - bb1 )
)
)
)
)
)
)
)
)
)
);
DSTY(x,y) = scale *
mad(100.0f, l4 - u4,
mad(80.0f, b4 - t4,
mad(20.0f, bb4 - tt4,
mad(75.0f, l5 - u5 + l3 - u3,
mad(60.0f, b5 - t5 + b3 - t3,
mad(15.0f, bb5 - tt5 + bb3 - tt3,
mad(30.0f, l6 - u6 + l2 - u2,
mad(24.0f, b6 - t6 + b2 - t2,
mad(6.0f, bb6 - tt6 + bb2 - tt2,
mad(5.0f, l7 - u7 + l1 - u1,
mad(4.0f, b7 - t7 + b1 - t1, bb7 - tt7 + bb1 - tt1 )
)
)
)
)
)
)
)
)
)
);
}

View File

@ -50,6 +50,14 @@
#define NTHREADS 256
#define CV_PI_F 3.1415926535897932384626433832795f
#ifdef INTEL_DEVICE
#define QANGLE_TYPE int
#define QANGLE_TYPE2 int2
#else
#define QANGLE_TYPE uchar
#define QANGLE_TYPE2 uchar2
#endif
//----------------------------------------------------------------------------
// Histogram computation
// 12 threads for a cell, 12x4 threads per block
@ -59,7 +67,7 @@ __kernel void compute_hists_lut_kernel(
const int cnbins, const int cblock_hist_size, const int img_block_width,
const int blocks_in_group, const int blocks_total,
const int grad_quadstep, const int qangle_step,
__global const float* grad, __global const uchar* qangle,
__global const float* grad, __global const QANGLE_TYPE* qangle,
__global const float* gauss_w_lut,
__global float* block_hists, __local float* smem)
{
@ -86,7 +94,7 @@ __kernel void compute_hists_lut_kernel(
__global const float* grad_ptr = (gid < blocks_total) ?
grad + offset_y * grad_quadstep + (offset_x << 1) : grad;
__global const uchar* qangle_ptr = (gid < blocks_total) ?
__global const QANGLE_TYPE* qangle_ptr = (gid < blocks_total) ?
qangle + offset_y * qangle_step + (offset_x << 1) : qangle;
__local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) +
@ -101,7 +109,7 @@ __kernel void compute_hists_lut_kernel(
for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
{
float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]);
QANGLE_TYPE2 bin = (QANGLE_TYPE2) (qangle_ptr[0], qangle_ptr[1]);
grad_ptr += grad_quadstep;
qangle_ptr += qangle_step;
@ -558,7 +566,7 @@ __kernel void extract_descrs_by_cols_kernel(
__kernel void compute_gradients_8UC4_kernel(
const int height, const int width,
const int img_step, const int grad_quadstep, const int qangle_step,
const __global uchar4 * img, __global float * grad, __global uchar * qangle,
const __global uchar4 * img, __global float * grad, __global QANGLE_TYPE * qangle,
const float angle_scale, const char correct_gamma, const int cnbins)
{
const int x = get_global_id(0);
@ -660,7 +668,7 @@ __kernel void compute_gradients_8UC4_kernel(
__kernel void compute_gradients_8UC1_kernel(
const int height, const int width,
const int img_step, const int grad_quadstep, const int qangle_step,
__global const uchar * img, __global float * grad, __global uchar * qangle,
__global const uchar * img, __global float * grad, __global QANGLE_TYPE * qangle,
const float angle_scale, const char correct_gamma, const int cnbins)
{
const int x = get_global_id(0);

View File

@ -116,7 +116,7 @@ Mat randomMat(RNG& rng, Size size, int type, double minVal, double maxVal, bool
Mat m(size0, type);
rng.fill(m, RNG::UNIFORM, Scalar::all(minVal), Scalar::all(maxVal));
rng.fill(m, RNG::UNIFORM, minVal, maxVal);
if( size0 == size )
return m;
return m(Rect((size0.width-size.width)/2, (size0.height-size.height)/2, size.width, size.height));
@ -142,7 +142,7 @@ Mat randomMat(RNG& rng, const vector<int>& size, int type, double minVal, double
Mat m(dims, &size0[0], type);
rng.fill(m, RNG::UNIFORM, Scalar::all(minVal), Scalar::all(maxVal));
rng.fill(m, RNG::UNIFORM, minVal, maxVal);
if( eqsize )
return m;
return m(&r[0]);

View File

@ -28,14 +28,11 @@ set(CMAKE_MODULE_LINKER_FLAGS "-Wl,--fix-cortex-a8 -Wl,--no-undefined -Wl,--gc-s
set(CMAKE_EXE_LINKER_FLAGS "-Wl,--fix-cortex-a8 -Wl,--no-undefined -Wl,--gc-sections -Wl,-z,noexecstack -Wl,-z,relro -Wl,-z,now ${CMAKE_EXE_LINKER_FLAGS}")
if(USE_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon")
message(WARNING "You use obsolete variable USE_NEON to enable NEON instruction set. Use -DENABLE_NEON=ON instead." )
set(ENABLE_NEON TRUE)
elseif(USE_VFPV3)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=vfpv3")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfpv3")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=vfpv3-d16")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfpv3-d16")
message(WARNING "You use obsolete variable USE_VFPV3 to enable VFPV3 instruction set. Use -DENABLE_VFPV3=ON instead." )
set(ENABLE_VFPV3 TRUE)
endif()
set(CMAKE_FIND_ROOT_PATH ${CMAKE_FIND_ROOT_PATH} ${ARM_LINUX_SYSROOT})

View File

@ -0,0 +1,376 @@
// testOpenCVCam.cpp : Defines the entry point for the console application.
//
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
using namespace cv;
using namespace std;
static bool g_printStreamSetting = false;
static int g_imageStreamProfileIdx = -1;
static int g_depthStreamProfileIdx = -1;
static bool g_irStreamShow = false;
static double g_imageBrightness = -DBL_MAX;
static double g_imageContrast = -DBL_MAX;
static bool g_printTiming = false;
static bool g_showClosedPoint = false;
static int g_closedDepthPoint[2];
static void printUsage(const char *arg0)
{
const char *filename = arg0;
while (*filename)
filename++;
while ((arg0 <= filename) && ('\\' != *filename) && ('/' != *filename))
filename--;
filename++;
cout << "This program demonstrates usage of camera supported\nby Intel Perceptual computing SDK." << endl << endl;
cout << "usage: " << filename << "[-ps] [-isp IDX] [-dsp IDX]\n [-ir] [-imb VAL] [-imc VAL]" << endl << endl;
cout << " -ps, print streams setting and profiles" << endl;
cout << " -isp IDX, set profile index of the image stream" << endl;
cout << " -dsp IDX, set profile index of the depth stream" << endl;
cout << " -ir, show data from IR stream" << endl;
cout << " -imb VAL, set brighness value for a image stream" << endl;
cout << " -imc VAL, set contrast value for a image stream" << endl;
cout << " -pts, print frame index and frame time" << endl;
cout << " --show-closed, print frame index and frame time" << endl;
cout << endl;
}
static void parseCMDLine(int argc, char* argv[])
{
if( argc == 1 )
{
printUsage(argv[0]);
}
else
{
for( int i = 1; i < argc; i++ )
{
if ((0 == strcmp(argv[i], "--help")) || (0 == strcmp( argv[i], "-h")))
{
printUsage(argv[0]);
exit(0);
}
else if ((0 == strcmp( argv[i], "--print-streams")) || (0 == strcmp( argv[i], "-ps")))
{
g_printStreamSetting = true;
}
else if ((0 == strcmp( argv[i], "--image-stream-prof")) || (0 == strcmp( argv[i], "-isp")))
{
g_imageStreamProfileIdx = atoi(argv[++i]);
}
else if ((0 == strcmp( argv[i], "--depth-stream-prof")) || (0 == strcmp( argv[i], "-dsp")))
{
g_depthStreamProfileIdx = atoi(argv[++i]);
}
else if (0 == strcmp( argv[i], "-ir"))
{
g_irStreamShow = true;
}
else if (0 == strcmp( argv[i], "-imb"))
{
g_imageBrightness = atof(argv[++i]);
}
else if (0 == strcmp( argv[i], "-imc"))
{
g_imageContrast = atof(argv[++i]);
}
else if (0 == strcmp(argv[i], "-pts"))
{
g_printTiming = true;
}
else if (0 == strcmp(argv[i], "--show-closed"))
{
g_showClosedPoint = true;
}
else
{
cout << "Unsupported command line argument: " << argv[i] << "." << endl;
exit(-1);
}
}
if (g_showClosedPoint && (-1 == g_depthStreamProfileIdx))
{
cerr << "For --show-closed depth profile has be selected" << endl;
exit(-1);
}
}
}
static void printStreamProperties(VideoCapture &capture)
{
size_t profilesCount = (size_t)capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_INTELPERC_PROFILE_COUNT);
cout << "Image stream." << endl;
cout << " Brightness = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BRIGHTNESS) << endl;
cout << " Contrast = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_CONTRAST) << endl;
cout << " Saturation = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_SATURATION) << endl;
cout << " Hue = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_HUE) << endl;
cout << " Gamma = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_GAMMA) << endl;
cout << " Sharpness = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_SHARPNESS) << endl;
cout << " Gain = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_GAIN) << endl;
cout << " Backligh = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BACKLIGHT) << endl;
cout << "Image streams profiles:" << endl;
for (size_t i = 0; i < profilesCount; i++)
{
capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)i);
cout << " Profile[" << i << "]: ";
cout << "width = " <<
(int)capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_FRAME_WIDTH);
cout << ", height = " <<
(int)capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_FRAME_HEIGHT);
cout << ", fps = " <<
capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_FPS);
cout << endl;
}
profilesCount = (size_t)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_COUNT);
cout << "Depth stream." << endl;
cout << " Low confidence value = " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE) << endl;
cout << " Saturation value = " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE) << endl;
cout << " Confidence threshold = " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD) << endl;
cout << " Focal length = (" << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ) << ", "
<< capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT) << ")" << endl;
cout << "Depth streams profiles:" << endl;
for (size_t i = 0; i < profilesCount; i++)
{
capture.set(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)i);
cout << " Profile[" << i << "]: ";
cout << "width = " <<
(int)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_FRAME_WIDTH);
cout << ", height = " <<
(int)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_FRAME_HEIGHT);
cout << ", fps = " <<
capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_FPS);
cout << endl;
}
}
static void imshowImage(const char *winname, Mat &image, VideoCapture &capture)
{
if (g_showClosedPoint)
{
Mat uvMap;
if (capture.retrieve(uvMap, CAP_INTELPERC_UVDEPTH_MAP))
{
float *uvmap = (float *)uvMap.ptr() + 2 * (g_closedDepthPoint[0] * uvMap.cols + g_closedDepthPoint[1]);
int x = (int)((*uvmap) * image.cols); uvmap++;
int y = (int)((*uvmap) * image.rows);
if ((0 <= x) && (0 <= y))
{
static const int pointSize = 4;
for (int row = y; row < min(y + pointSize, image.rows); row++)
{
uchar* ptrDst = image.ptr(row) + x * 3 + 2;//+2 -> Red
for (int col = 0; col < min(pointSize, image.cols - x); col++, ptrDst+=3)
{
*ptrDst = 255;
}
}
}
}
}
imshow(winname, image);
}
static void imshowIR(const char *winname, Mat &ir)
{
Mat image;
if (g_showClosedPoint)
{
image.create(ir.rows, ir.cols, CV_8UC3);
for (int row = 0; row < ir.rows; row++)
{
uchar* ptrDst = image.ptr(row);
short* ptrSrc = (short*)ir.ptr(row);
for (int col = 0; col < ir.cols; col++, ptrSrc++)
{
uchar val = (uchar) ((*ptrSrc) >> 2);
*ptrDst = val; ptrDst++;
*ptrDst = val; ptrDst++;
*ptrDst = val; ptrDst++;
}
}
static const int pointSize = 4;
for (int row = g_closedDepthPoint[0]; row < min(g_closedDepthPoint[0] + pointSize, image.rows); row++)
{
uchar* ptrDst = image.ptr(row) + g_closedDepthPoint[1] * 3 + 2;//+2 -> Red
for (int col = 0; col < min(pointSize, image.cols - g_closedDepthPoint[1]); col++, ptrDst+=3)
{
*ptrDst = 255;
}
}
}
else
{
image.create(ir.rows, ir.cols, CV_8UC1);
for (int row = 0; row < ir.rows; row++)
{
uchar* ptrDst = image.ptr(row);
short* ptrSrc = (short*)ir.ptr(row);
for (int col = 0; col < ir.cols; col++, ptrSrc++, ptrDst++)
{
*ptrDst = (uchar) ((*ptrSrc) >> 2);
}
}
}
imshow(winname, image);
}
static void imshowDepth(const char *winname, Mat &depth, VideoCapture &capture)
{
short lowValue = (short)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE);
short saturationValue = (short)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE);
Mat image;
if (g_showClosedPoint)
{
image.create(depth.rows, depth.cols, CV_8UC3);
for (int row = 0; row < depth.rows; row++)
{
uchar* ptrDst = image.ptr(row);
short* ptrSrc = (short*)depth.ptr(row);
for (int col = 0; col < depth.cols; col++, ptrSrc++)
{
if ((lowValue == (*ptrSrc)) || (saturationValue == (*ptrSrc)))
{
*ptrDst = 0; ptrDst++;
*ptrDst = 0; ptrDst++;
*ptrDst = 0; ptrDst++;
}
else
{
uchar val = (uchar) ((*ptrSrc) >> 2);
*ptrDst = val; ptrDst++;
*ptrDst = val; ptrDst++;
*ptrDst = val; ptrDst++;
}
}
}
static const int pointSize = 4;
for (int row = g_closedDepthPoint[0]; row < min(g_closedDepthPoint[0] + pointSize, image.rows); row++)
{
uchar* ptrDst = image.ptr(row) + g_closedDepthPoint[1] * 3 + 2;//+2 -> Red
for (int col = 0; col < min(pointSize, image.cols - g_closedDepthPoint[1]); col++, ptrDst+=3)
{
*ptrDst = 255;
}
}
}
else
{
image.create(depth.rows, depth.cols, CV_8UC1);
for (int row = 0; row < depth.rows; row++)
{
uchar* ptrDst = image.ptr(row);
short* ptrSrc = (short*)depth.ptr(row);
for (int col = 0; col < depth.cols; col++, ptrSrc++, ptrDst++)
{
if ((lowValue == (*ptrSrc)) || (saturationValue == (*ptrSrc)))
*ptrDst = 0;
else
*ptrDst = (uchar) ((*ptrSrc) >> 2);
}
}
}
imshow(winname, image);
}
int main(int argc, char* argv[])
{
parseCMDLine(argc, argv);
VideoCapture capture;
capture.open(CAP_INTELPERC);
if (!capture.isOpened())
{
cerr << "Can not open a capture object." << endl;
return -1;
}
if (g_printStreamSetting)
printStreamProperties(capture);
if (-1 != g_imageStreamProfileIdx)
{
if (!capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)g_imageStreamProfileIdx))
{
cerr << "Can not setup a image stream." << endl;
return -1;
}
}
if (-1 != g_depthStreamProfileIdx)
{
if (!capture.set(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)g_depthStreamProfileIdx))
{
cerr << "Can not setup a depth stream." << endl;
return -1;
}
}
else if (g_irStreamShow)
{
if (!capture.set(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, 0.0))
{
cerr << "Can not setup a IR stream." << endl;
return -1;
}
}
else
{
cout << "Streams not selected" << endl;
return 0;
}
//Setup additional properies only after set profile of the stream
if ( (-10000.0 < g_imageBrightness) && (g_imageBrightness < 10000.0))
capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BRIGHTNESS, g_imageBrightness);
if ( (0 < g_imageContrast) && (g_imageContrast < 10000.0))
capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BRIGHTNESS, g_imageContrast);
int frame = 0;
for(;;frame++)
{
Mat bgrImage;
Mat depthImage;
Mat irImage;
if (!capture.grab())
{
cout << "Can not grab images." << endl;
return -1;
}
if ((-1 != g_depthStreamProfileIdx) && (capture.retrieve(depthImage, CAP_INTELPERC_DEPTH_MAP)))
{
if (g_showClosedPoint)
{
double minVal = 0.0; double maxVal = 0.0;
minMaxIdx(depthImage, &minVal, &maxVal, g_closedDepthPoint);
}
imshowDepth("depth image", depthImage, capture);
}
if ((g_irStreamShow) && (capture.retrieve(irImage, CAP_INTELPERC_IR_MAP)))
imshowIR("ir image", irImage);
if ((-1 != g_imageStreamProfileIdx) && (capture.retrieve(bgrImage, CAP_INTELPERC_IMAGE)))
imshowImage("color image", bgrImage, capture);
if (g_printTiming)
{
cout << "Image frame: " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_POS_FRAMES)
<< ", Depth(IR) frame: " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_POS_FRAMES) << endl;
cout << "Image frame: " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_POS_MSEC)
<< ", Depth(IR) frame: " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_POS_MSEC) << endl;
}
if( waitKey(30) >= 0 )
break;
}
return 0;
}

View File

@ -32,13 +32,13 @@ int main()
for (int i = 0; i < image.rows; ++i)
for (int j = 0; j < image.cols; ++j)
{
Mat sampleMat = (Mat_<float>(1,2) << i,j);
Mat sampleMat = (Mat_<float>(1,2) << j,i);
float response = SVM.predict(sampleMat);
if (response == 1)
image.at<Vec3b>(j, i) = green;
image.at<Vec3b>(i,j) = green;
else if (response == -1)
image.at<Vec3b>(j, i) = blue;
image.at<Vec3b>(i,j) = blue;
}
// Show the training data

View File

@ -14,7 +14,10 @@
using namespace std;
using namespace cv;
#define LOOP_NUM 1
#define MAX_THREADS 10
///////////////////////////single-threading faces detecting///////////////////////////////
@ -29,23 +32,23 @@ const static Scalar colors[] = { CV_RGB(0,0,255),
} ;
int64 work_begin = 0;
int64 work_end = 0;
int64 work_begin[MAX_THREADS] = {0};
int64 work_total[MAX_THREADS] = {0};
string inputName, outputName, cascadeName;
static void workBegin()
static void workBegin(int i = 0)
{
work_begin = getTickCount();
work_begin[i] = getTickCount();
}
static void workEnd()
static void workEnd(int i = 0)
{
work_end += (getTickCount() - work_begin);
work_total[i] += (getTickCount() - work_begin[i]);
}
static double getTime()
static double getTotalTime(int i = 0)
{
return work_end /((double)cvGetTickFrequency() * 1000.);
return work_total[i] /getTickFrequency() * 1000.;
}
@ -98,7 +101,6 @@ static int facedetect_one_thread(bool useCPU, double scale )
}
}
cvNamedWindow( "result", 1 );
if( capture )
{
cout << "In capture ..." << endl;
@ -118,7 +120,6 @@ static int facedetect_one_thread(bool useCPU, double scale )
else
resize(frameCopy0, frameCopy, Size(), 1./scale, 1./scale, INTER_LINEAR);
work_end = 0;
if(useCPU)
detectCPU(frameCopy, faces, cpu_cascade, 1);
else
@ -132,16 +133,16 @@ static int facedetect_one_thread(bool useCPU, double scale )
}
else
{
cout << "In image read" << endl;
cout << "In image read " << image.size() << endl;
vector<Rect> faces;
vector<Rect> ref_rst;
double accuracy = 0.;
detectCPU(image, ref_rst, cpu_cascade, scale);
work_end = 0;
cout << "loops: ";
for(int i = 0; i <= LOOP_NUM; i ++)
{
cout << "loop" << i << endl;
cout << i << ", ";
if(useCPU)
detectCPU(image, faces, cpu_cascade, scale);
else
@ -152,16 +153,15 @@ static int facedetect_one_thread(bool useCPU, double scale )
accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
}
}
if (i == LOOP_NUM)
{
if (useCPU)
cout << "average CPU time (noCamera) : ";
else
cout << "average GPU time (noCamera) : ";
cout << getTime() / LOOP_NUM << " ms" << endl;
cout << "accuracy value: " << accuracy <<endl;
}
}
cout << "done!" << endl;
if (useCPU)
cout << "average CPU time (noCamera) : ";
else
cout << "average GPU time (noCamera) : ";
cout << getTotalTime() / LOOP_NUM << " ms" << endl;
cout << "accuracy value: " << accuracy <<endl;
Draw(image, faces, scale);
waitKey(0);
}
@ -174,9 +174,7 @@ static int facedetect_one_thread(bool useCPU, double scale )
///////////////////////////////////////detectfaces with multithreading////////////////////////////////////////////
#if defined(_MSC_VER) && (_MSC_VER >= 1700)
#define MAX_THREADS 10
static void detectFaces(std::string fileName)
static void detectFaces(std::string fileName, int threadNum)
{
ocl::OclCascadeClassifier cascade;
if(!cascade.load(cascadeName))
@ -188,7 +186,7 @@ static void detectFaces(std::string fileName)
Mat img = imread(fileName, CV_LOAD_IMAGE_COLOR);
if (img.empty())
{
std::cout << "cann't open file " + fileName <<std::endl;
std::cout << '[' << threadNum << "] " << "can't open file " + fileName <<std::endl;
return;
}
@ -196,23 +194,37 @@ static void detectFaces(std::string fileName)
d_img.upload(img);
std::vector<Rect> oclfaces;
cascade.detectMultiScale(d_img, oclfaces, 1.1, 3, 0 | CASCADE_SCALE_IMAGE, Size(30, 30), Size(0, 0));
std::thread::id tid = std::this_thread::get_id();
std::cout << '[' << threadNum << "] "
<< "ThreadID = " << tid
<< ", CommandQueue = " << *(void**)ocl::getClCommandQueuePtr()
<< endl;
for(int i = 0; i <= LOOP_NUM; i++)
{
if(i>0) workBegin(threadNum);
cascade.detectMultiScale(d_img, oclfaces, 1.1, 3, 0|CASCADE_SCALE_IMAGE, Size(30, 30), Size(0, 0));
if(i>0) workEnd(threadNum);
}
std::cout << '[' << threadNum << "] " << "Average time = " << getTotalTime(threadNum) / LOOP_NUM << " ms" << endl;
for(unsigned int i = 0; i<oclfaces.size(); i++)
rectangle(img, Point(oclfaces[i].x, oclfaces[i].y), Point(oclfaces[i].x + oclfaces[i].width, oclfaces[i].y + oclfaces[i].height), colors[i%8], 3);
std::string::size_type pos = outputName.rfind('.');
std::string outputNameTid = outputName + '-' + std::to_string(_threadid);
if(pos == std::string::npos)
std::string strTid = std::to_string(_threadid);
if( !outputName.empty() )
{
std::cout << "Invalid output file name: " << outputName << std::endl;
if(pos == std::string::npos)
{
std::cout << "Invalid output file name: " << outputName << std::endl;
}
else
{
std::string outputNameTid = outputName.substr(0, pos) + "_" + strTid + outputName.substr(pos);
imwrite(outputNameTid, img);
}
}
else
{
outputNameTid = outputName.substr(0, pos) + "_" + std::to_string(_threadid) + outputName.substr(pos);
imwrite(outputNameTid, img);
}
imshow(outputNameTid, img);
imshow(strTid, img);
waitKey(0);
}
@ -221,7 +233,7 @@ static void facedetect_multithreading(int nthreads)
int thread_number = MAX_THREADS < nthreads ? MAX_THREADS : nthreads;
std::vector<std::thread> threads;
for(int i = 0; i<thread_number; i++)
threads.push_back(std::thread(detectFaces, inputName));
threads.push_back(std::thread(detectFaces, inputName, i));
for(int i = 0; i<thread_number; i++)
threads[i].join();
}
@ -237,8 +249,7 @@ int main( int argc, const char** argv )
" specify template file path }"
"{ c scale | 1.0 | scale image }"
"{ s use_cpu | false | use cpu or gpu to process the image }"
"{ o output | facedetect_output.jpg |"
" specify output image save path(only works when input is images) }"
"{ o output | | specify output image save path(only works when input is images) }"
"{ n thread_num | 1 | set number of threads >= 1 }";
CommandLineParser cmd(argc, argv, keys);
@ -312,8 +323,6 @@ void detectCPU( Mat& img, vector<Rect>& faces,
void Draw(Mat& img, vector<Rect>& faces, double scale)
{
int i = 0;
putText(img, format("fps: %.1f", 1000./getTime()), Point(450, 50),
FONT_HERSHEY_SIMPLEX, 1, Scalar(0,255,0), 3);
for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
{
Point center;
@ -324,8 +333,8 @@ void Draw(Mat& img, vector<Rect>& faces, double scale)
radius = cvRound((r->width + r->height)*0.25*scale);
circle( img, center, radius, color, 3, 8, 0 );
}
//imwrite( outputName, img );
if(abs(scale-1.0)>.001)
//if( !outputName.empty() ) imwrite( outputName, img );
if( abs(scale-1.0)>.001 )
{
resize(img, img, Size((int)(img.cols/scale), (int)(img.rows/scale)));
}