added OpenCVFindIPP.cmake script, which will look for IPP installation at CMake configuration time. First, IPPROOT environment variable will be tested, if not found script will look at default install places.

The script should support IPP from 5.3 up to 7.x versions (although tested on Windows for IPP 6.1 and IPP 7.0 versions only)

Preliminary optimization of HOG with IPP added too. Not yet quite efficient, code for cpu branch should be redesigned in order to have better performance.
This commit is contained in:
Vladimir Dudnik 2010-12-31 16:45:18 +00:00
parent 1a34fa30f4
commit 6309b2d08d
6 changed files with 235 additions and 152 deletions

View File

@ -10,22 +10,26 @@
# ----------------------------------------------------------------------------
set(CMAKE_ALLOW_LOOSE_LOOP_CONSTRUCTS true)
# Add these standard paths to the search paths for FIND_LIBRARY
# to find libraries from these locations first
if(UNIX)
set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /lib /usr/lib)
set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /lib /usr/lib)
endif()
# it _must_ go before PROJECT(OpenCV) in order to work
if (NOT CMAKE_INSTALL_PREFIX)
if (WIN32)
if(WIN32)
set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR} CACHE INTERNAL "" FORCE)
elseif()
set(CMAKE_INSTALL_PREFIX "/usr" CACHE INTERNAL "" FORCE)
endif()
endif()
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "Configs" FORCE)
set(CMAKE_C_FLAGS_MINSIZEREL "" CACHE INTERNAL "" FORCE)
@ -40,14 +44,17 @@ set(CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL "" CACHE INTERNAL "" FORCE)
set(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "" CACHE INTERNAL "" FORCE)
set(CMAKE_VERBOSE OFF CACHE BOOL "Verbose mode")
if (CMAKE_VERBOSE)
set (CMAKE_VERBOSE_MAKEFILE 1)
if(CMAKE_VERBOSE)
set(CMAKE_VERBOSE_MAKEFILE 1)
endif()
project(OpenCV)
cmake_minimum_required(VERSION 2.4)
if(MSVC)
set(CMAKE_USE_RELATIVE_PATHS ON CACHE INTERNAL "" FORCE)
endif()
@ -56,6 +63,7 @@ endif()
#set(CMAKE_CXX_COMPILER "/opt/BullseyeCoverage/bin/g++")
#set(CMAKE_CXX_COMPILER_INIT "/opt/BullseyeCoverage/bin/gcc")
# --------------------------------------------------------------
# Indicate CMake 2.7 and above that we don't want to mix relative
# and absolute paths in linker lib lists.
@ -65,6 +73,7 @@ if(COMMAND cmake_policy)
cmake_policy(SET CMP0003 NEW)
endif()
# ----------------------------------------------------------------------------
# Current version number:
# ----------------------------------------------------------------------------
@ -92,14 +101,16 @@ endif()
# ----------------------------------------------------------------------------
# Build static or dynamic libs?
# ----------------------------------------------------------------------------
# Default: dynamic libraries:
SET(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries (.dll/.so) instead of static ones (.lib/.a)")
IF(BUILD_SHARED_LIBS)
SET(OPENCV_BUILD_SHARED_LIB 1) # For cvconfig.h, etc.
ELSE(BUILD_SHARED_LIBS)
SET(OPENCV_BUILD_SHARED_LIB 0)
ENDIF(BUILD_SHARED_LIBS)
# ----------------------------------------------------------------------------
set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared libraries (.dll/.so) instead of static ones (.lib/.a)")
if(BUILD_SHARED_LIBS)
set(OPENCV_BUILD_SHARED_LIB 1) # For cvconfig.h, etc.
else(BUILD_SHARED_LIBS)
set(OPENCV_BUILD_SHARED_LIB 0)
endif(BUILD_SHARED_LIBS)
# ----------------------------------------------------------------------------
# Variables for cvconfig.h.cmake
@ -111,6 +122,7 @@ set(PACKAGE_STRING "${PACKAGE} ${OPENCV_VERSION}")
set(PACKAGE_TARNAME "${PACKAGE}")
set(PACKAGE_VERSION "${OPENCV_VERSION}")
# ----------------------------------------------------------------------------
# Autodetect if we are in a SVN repository
# ----------------------------------------------------------------------------
@ -133,6 +145,7 @@ else()
set(OPENCV_SVNVERSION "")
endif()
# ----------------------------------------------------------------------------
# Detect GNU version:
# ----------------------------------------------------------------------------
@ -166,6 +179,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
endif()
# ----------------------------------------------------------------------------
# Detect Intel ICC compiler -- for -fPIC in 3rdparty ( UNIX ONLY ):
# see include/opencv/cxtypes.h file for related ICC & CV_ICC defines.
@ -190,6 +204,7 @@ if(UNIX)
endif()
endif()
# ----------------------------------------------------------------------------
# CHECK FOR SYSTEM LIBRARIES, OPTIONS, ETC..
# ----------------------------------------------------------------------------
@ -299,14 +314,16 @@ if(APPLE)
set(WITH_QUICKTIME OFF CACHE BOOL "Use QuickTime for Video I/O insted of QTKit")
endif()
set(WITH_TBB OFF CACHE BOOL "Include TBB support")
set(WITH_TBB OFF CACHE BOOL "Include Intel TBB support")
set(WITH_IPP OFF CACHE BOOL "Include Intel IPP support")
set(WITH_EIGEN2 ON CACHE BOOL "Include Eigen2/Eigen3 support")
set(WITH_CUDA OFF CACHE BOOL "Include NVidia Cuda Runtime support")
if(WIN32)
set(WITH_VIDEOINPUT ON CACHE BOOL "Enable VideoInput support")
set(WITH_VIDEOINPUT ON CACHE BOOL "Enable VideoInput support")
endif()
# ===================================================
# Macros that checks if module have been installed.
# After it adds module to build and define
@ -558,16 +575,16 @@ if (WITH_QT)
find_package (OpenGL QUIET)
#if (NOT WIN32)
if (WITH_QT_OPENGL)
if (QT_QTOPENGL_FOUND AND OPENGL_FOUND)
set(HAVE_QT_OPENGL 1)
add_definitions(-DHAVE_QT_OPENGL)
#link_directories("${OPENGL_LIBRARIES}")
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${OPENGL_LIBRARIES})
endif()
endif()
#endif()
#if (NOT WIN32)
if (WITH_QT_OPENGL)
if (QT_QTOPENGL_FOUND AND OPENGL_FOUND)
set(HAVE_QT_OPENGL 1)
add_definitions(-DHAVE_QT_OPENGL)
#link_directories("${OPENGL_LIBRARIES}")
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${OPENGL_LIBRARIES})
endif()
endif()
#endif()
endif()
endif()
@ -649,6 +666,9 @@ if (WITH_TBB)
elseif(MSVC90)
set(_TBB_LIB_PATH "${_TBB_LIB_PATH}/vc9")
endif()
elseif(MSVC10)
set(_TBB_LIB_PATH "${_TBB_LIB_PATH}/vc10")
endif()
set(TBB_LIB_DIR "${_TBB_LIB_PATH}" CACHE PATH "Full path of TBB library directory")
link_directories("${TBB_LIB_DIR}")
endif()
@ -660,52 +680,70 @@ if (WITH_TBB)
endif()
endif()
endif()
#endif(WITH_TBB)
############################ Intel IPP #############################
set(IPP_FOUND)
if(WITH_IPP)
include(OpenCVFindIPP.cmake)
endif()
if(IPP_FOUND)
add_definitions(-DHAVE_IPP)
include_directories(${IPP_INCLUDE_DIRS})
link_directories(${IPP_LIBRARY_DIRS})
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${IPP_LIBRARIES})
endif()
############################### CUDA ################################
if (WITH_CUDA)
find_package(CUDA 3.2)
if (CUDA_FOUND)
set(HAVE_CUDA 1)
message(STATUS "CUDA detected: " ${CUDA_VERSION})
find_package(CUDA 3.2)
if (CUDA_FOUND)
set(HAVE_CUDA 1)
message(STATUS "CUDA detected: " ${CUDA_VERSION})
set(CUDA_COMPUTE_CAPABILITIES " 1.1 1.2 1.3 2.0 " CACHE STRING "Add or remove compute capability")
set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})
set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES})
set(CUDA_NVCC_FLAGS_NUM "")
set(CUDA_NVCC_FLAGS_NUM "")
while(NOT ${CUDA_NVCC_FLAGS_ARCH} STREQUAL "")
string(REGEX MATCH "[0-9]+.[0-9]+" RESULT_NUM ${CUDA_NVCC_FLAGS_ARCH})
string(REGEX MATCHALL "[0-9]" RESULT_STR ${RESULT_NUM})
string(REGEX REPLACE ";" "\ " RESULT ${RESULT_STR})
list(APPEND CUDA_NVCC_FLAGS_NUM ${RESULT})
string(REGEX REPLACE "${RESULT_NUM}" "\ " CUDA_NVCC_FLAGS_ARCH_STR ${CUDA_NVCC_FLAGS_ARCH})
string(STRIP ${CUDA_NVCC_FLAGS_ARCH_STR} CUDA_NVCC_FLAGS_ARCH)
endwhile()
while(NOT ${CUDA_NVCC_FLAGS_ARCH} STREQUAL "")
string(REGEX MATCH "[0-9]+.[0-9]+" RESULT_NUM ${CUDA_NVCC_FLAGS_ARCH})
string(REGEX MATCHALL "[0-9]" RESULT_STR ${RESULT_NUM})
string(REGEX REPLACE ";" "\ " RESULT ${RESULT_STR})
list(APPEND CUDA_NVCC_FLAGS_NUM ${RESULT})
string(REGEX REPLACE "${RESULT_NUM}" "\ " CUDA_NVCC_FLAGS_ARCH_STR ${CUDA_NVCC_FLAGS_ARCH})
string(STRIP ${CUDA_NVCC_FLAGS_ARCH_STR} CUDA_NVCC_FLAGS_ARCH)
endwhile()
set (OpenCV_CUDA_CC "")
set (loop_var "")
foreach( loop_var IN LISTS CUDA_NVCC_FLAGS_NUM)
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_${loop_var},code=sm_${loop_var})
set (OpenCV_CUDA_CC ${OpenCV_CUDA_CC} -gencode arch=compute_${loop_var},code=sm_${loop_var})
endforeach()
set (OpenCV_CUDA_CC "")
set (loop_var "")
foreach( loop_var IN LISTS CUDA_NVCC_FLAGS_NUM)
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_${loop_var},code=sm_${loop_var})
set (OpenCV_CUDA_CC ${OpenCV_CUDA_CC} -gencode arch=compute_${loop_var},code=sm_${loop_var})
endforeach()
### set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${OpenCV_COMPUTE_CAPABILITIES})
endif()
### set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${OpenCV_COMPUTE_CAPABILITIES})
endif()
endif()
############################### VideoInput ################################
if (WIN32 AND WITH_VIDEOINPUT)
if(CMAKE_CXX_COMPILER MATCHES "dw2")
else()
if(NOT MINGW64)
set(HAVE_VIDEOINPUT 1)
endif()
endif()
if(CMAKE_CXX_COMPILER MATCHES "dw2")
else()
if(NOT MINGW64)
set(HAVE_VIDEOINPUT 1)
endif()
endif()
endif()
############################## Eigen2 ##############################
if(WITH_EIGEN2)
@ -719,82 +757,6 @@ if(WITH_EIGEN2)
endif()
endif()
############################### IPP ################################
set(IPP_FOUND)
set(OPENCV_LOADER_PATH)
if(UNIX)
if(APPLE)
set(OPENCV_LOADER_PATH DYLD_LIBRARY_PATH)
else()
set(OPENCV_LOADER_PATH LD_LIBRARY_PATH)
endif()
endif()
foreach(v "6.1" "6.0" "5.3" "5.2" "5.1")
if(NOT IPP_FOUND)
if(WIN32)
find_path(IPP_PATH "ippi-${v}.dll"
PATHS ${CMAKE_PROGRAM_PATH} ${CMAKE_SYSTEM_PROGRAM_PATH}
DOC "The path to IPP dynamic libraries")
if(NOT IPP_PATH)
find_path(IPP_PATH "ippiem64t-${v}.dll"
PATHS ${CMAKE_PROGRAM_PATH} ${CMAKE_SYSTEM_PROGRAM_PATH}
DOC "The path to IPP dynamic libraries")
endif()
endif()
if(UNIX)
find_path(IPP_PATH "libippi${CMAKE_SHARED_LIBRARY_SUFFIX}.${v}"
PATHS ${CMAKE_LIBRARY_PATH} ${CMAKE_SYSTEM_LIBRARY_PATH} ENV ${OPENCV_LOADER_PATH}
DOC "The path to IPP dynamic libraries")
if(NOT IPP_PATH)
find_path(IPP_PATH "libippiem64t${CMAKE_SHARED_LIBRARY_SUFFIX}.${v}"
PATHS ${CMAKE_LIBRARY_PATH} ${CMAKE_SYSTEM_LIBRARY_PATH} ENV ${OPENCV_LOADER_PATH}
DOC "The path to IPP dynamic libraries")
endif()
endif()
if(IPP_PATH)
file(GLOB IPP_HDRS "${IPP_PATH}/../include")
if(IPP_HDRS)
set(IPP_FOUND TRUE)
endif()
endif()
endif()
endforeach()
message(STATUS "IPP detected: ${IPP_FOUND}")
if(WIN32 AND NOT MSVC)
set(IPP_FOUND)
endif()
set(USE_IPP ${IPP_FOUND} CACHE BOOL "Use IPP when available")
if(IPP_FOUND AND USE_IPP)
add_definitions(-DHAVE_IPP)
include_directories("${IPP_PATH}/../include")
link_directories("${IPP_PATH}/../lib")
file(GLOB em64t_files "${IPP_PATH}/../lib/*em64t*")
set(IPP_ARCH)
if(em64t_files)
set(IPP_ARCH "em64t")
endif()
set(A ${CMAKE_STATIC_LIBRARY_PREFIX})
set(B ${IPP_ARCH}${CMAKE_STATIC_LIBRARY_SUFFIX})
if(WIN32)
set(L l)
else()
set(L)
endif()
set(IPP_LIBS ${A}ippsmerged${B} ${A}ippsemerged${B}
${A}ippimerged${B} ${A}ippiemerged${B}
${A}ippvmmerged${B} ${A}ippvmemerged${B}
${A}ippccmerged${B} ${A}ippccemerged${B}
${A}ippcvmerged${B} ${A}ippcvemerged${B}
${A}ippcore${IPP_ARCH}${L}${CMAKE_STATIC_LIBRARY_SUFFIX})
endif()
################## Extra HighGUI libs on Windows ###################
@ -825,6 +787,7 @@ endif()
################## LATEX ##################
set(BUILD_LATEX_DOCS OFF CACHE BOOL "Build LaTeX OpenCV Documentation")
################### DOXYGEN ###############
find_package(Doxygen)
@ -833,6 +796,7 @@ if(DOXYGEN_FOUND)
set(BUILD_DOXYGEN_DOCS ON CACHE BOOL "Generate HTML docs using Doxygen")
endif()
# ----------------------------------------------------------------------------
# UPDATE CONFIG FILES & SCRIPTS:
#
@ -850,6 +814,7 @@ set(OPENCV_CONFIG_FILE_INCLUDE_DIR "${CMAKE_BINARY_DIR}/" CACHE PATH "Where to c
message(STATUS "Parsing 'cvconfig.h.cmake'")
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cvconfig.h.cmake" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h")
# ---------------------------------------------------------------------------
# The C+//0 include & link directories:
# ---------------------------------------------------------------------------
@ -859,6 +824,7 @@ include_directories("."
"${CMAKE_CURRENT_SOURCE_DIR}/include/opencv"
)
# ----------------------------------------------------------------------------
# Set the maximum level of warnings:
# ----------------------------------------------------------------------------
@ -1054,14 +1020,15 @@ if (WIN32 AND MSVC)
string(REPLACE "/EHsc" "/EHsc-" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
string(REPLACE "/EHsc" "/EHsc-" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
string(REPLACE "/Zm1000" " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "/Zm1000" " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
string(REPLACE "/Zm1000" " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "/Zm1000" " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
endif()
if("${CMAKE_CONFIGURE_LDFLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_CONFIGURE_LDFLAGS}")
endif("${CMAKE_CONFIGURE_LDFLAGS}")
# ----------------------------------------------------------------------------
# PROCESS SUBDIRECTORIES:
# ----------------------------------------------------------------------------
@ -1075,6 +1042,7 @@ else()
set(OPENCV_DOC_INSTALL_PATH share/opencv/doc)
endif()
# --------------------------------------------------------------------------------------------
# Installation for CMake Module: OpenCVConfig.cmake
# Part 1/3: ${BIN_DIR}/OpenCVConfig.cmake -> For use *without* "make install"
@ -1279,7 +1247,7 @@ add_subdirectory(3rdparty)
# ----------------------------------------------------------------------------
# Sumary:
# Summary:
# ----------------------------------------------------------------------------
message(STATUS "")
message(STATUS "General configuration for opencv ${OPENCV_VERSION} =====================================")
@ -1403,8 +1371,11 @@ else()
message(STATUS " Python numpy: NO (Python interface will not cover OpenCV 2.x API)")
endif()
if(IPP_FOUND AND USE_IPP)
message(STATUS " Use IPP: ${IPP_PATH}")
if(WITH_IPP AND IPP_FOUND)
message(STATUS " Use IPP: ${IPP_LATEST_VERSION_STR} [${IPP_LATEST_VERSION_MAJOR}.${IPP_LATEST_VERSION_MINOR}.${IPP_LATEST_VERSION_BUILD}]")
message(STATUS " at: ${IPP_ROOT_DIR}")
elseif(WITH_IPP AND NOT IPP_FOUND)
message(STATUS " Use IPP: IPP not found")
else()
message(STATUS " Use IPP: NO")
endif()
@ -1418,7 +1389,7 @@ endif()
if (HAVE_CUDA)
message(STATUS " Use Cuda: YES")
else()
message(STATUS " Use Cuda: No")
message(STATUS " Use Cuda: NO")
endif()
if(HAVE_EIGEN2)

View File

@ -41,5 +41,4 @@
#include "precomp.hpp"
/* End of file. */

View File

@ -2229,7 +2229,7 @@ void dct( const Mat& src0, Mat& dst, int flags )
Mat src = src0;
int type = src.type(), depth = src.depth();
void /* *spec_dft = 0, */ *spec = 0;
double scale = 1.;
int prev_len = 0, nf = 0, stage, end_stage;
uchar *src_dft_buf = 0, *dst_dft_buf = 0;

View File

@ -278,7 +278,7 @@ binaryOpC1_( const Mat& srcmat1, const Mat& srcmat2, Mat& dstmat )
DT* dst = (DT*)dstmat.data;
size_t step1 = srcmat1.step/sizeof(src1[0]);
size_t step2 = srcmat2.step/sizeof(src2[0]);
size_t step = dstmat.step/sizeof(dst[0]);
size_t step = dstmat.step/sizeof(dst[0]);
Size size = getContinuousSize( srcmat1, srcmat2, dstmat, dstmat.channels() );
if( size.width == 1 )
@ -290,7 +290,8 @@ binaryOpC1_( const Mat& srcmat1, const Mat& srcmat2, Mat& dstmat )
for( ; size.height--; src1 += step1, src2 += step2, dst += step )
{
int x = vecOp(src1, src2, dst, size.width);
int x;
x = vecOp(src1, src2, dst, size.width);
for( ; x <= size.width - 4; x += 4 )
{
DT f0, f1;

View File

@ -434,7 +434,6 @@ private:
void mousePressEvent(QMouseEvent *event);
void mouseReleaseEvent(QMouseEvent *event);
void mouseDoubleClickEvent(QMouseEvent *event);
//void dragMoveEvent(QDragMoveEvent *event);
void drawInstructions(QPainter *painter);
void drawViewOverview(QPainter *painter);
void drawImgRegion(QPainter *painter);

View File

@ -42,7 +42,9 @@
#include "precomp.hpp"
#include <iterator>
#ifdef HAVE_IPP
#include "ipp.h"
#endif
/****************************************************************************************\
The code below is implementation of HOG (Histogram-of-Oriented Gradients)
descriptor and object detection, introduced by Navneet Dalal and Bill Triggs.
@ -216,12 +218,39 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
int _nbins = nbins;
float angleScale = (float)(_nbins/CV_PI);
#ifdef HAVE_IPP
Mat lutimg(img.rows,img.cols,CV_MAKETYPE(CV_32F,cn));
Mat hidxs(1, width, CV_32F);
Ipp32f *pHidxs = (Ipp32f*)hidxs.data;
Ipp32f *pAngles = (Ipp32f*)Angle.data;
IppiSize roiSize;
roiSize.width = img.cols;
roiSize.height = img.rows;
for( y = 0; y < roiSize.height; y++ )
{
const uchar* imgPtr = img.data + y*img.step;
float* imglutPtr = (float*)(lutimg.data + y*lutimg.step);
for( x = 0; x < roiSize.width*cn; x++ )
{
imglutPtr[x] = lut[imgPtr[x]];
}
}
#endif
for( y = 0; y < gradsize.height; y++ )
{
#ifdef HAVE_IPP
const float* imgPtr = (float*)(lutimg.data + lutimg.step*ymap[y]);
const float* prevPtr = (float*)(lutimg.data + lutimg.step*ymap[y-1]);
const float* nextPtr = (float*)(lutimg.data + lutimg.step*ymap[y+1]);
#else
const uchar* imgPtr = img.data + img.step*ymap[y];
const uchar* prevPtr = img.data + img.step*ymap[y-1];
const uchar* nextPtr = img.data + img.step*ymap[y+1];
#endif
float* gradPtr = (float*)grad.ptr(y);
uchar* qanglePtr = (uchar*)qangle.ptr(y);
@ -230,8 +259,13 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
for( x = 0; x < width; x++ )
{
int x1 = xmap[x];
#ifdef HAVE_IPP
dbuf[x] = (float)(imgPtr[xmap[x+1]] - imgPtr[xmap[x-1]]);
dbuf[width + x] = (float)(nextPtr[x1] - prevPtr[x1]);
#else
dbuf[x] = (float)(lut[imgPtr[xmap[x+1]]] - lut[imgPtr[xmap[x-1]]]);
dbuf[width + x] = (float)(lut[nextPtr[x1]] - lut[prevPtr[x1]]);
#endif
}
}
else
@ -239,9 +273,32 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
for( x = 0; x < width; x++ )
{
int x1 = xmap[x]*3;
float dx0, dy0, dx, dy, mag0, mag;
#ifdef HAVE_IPP
const float* p2 = imgPtr + xmap[x+1]*3;
const float* p0 = imgPtr + xmap[x-1]*3;
dx0 = p2[2] - p0[2];
dy0 = nextPtr[x1+2] - prevPtr[x1+2];
mag0 = dx0*dx0 + dy0*dy0;
dx = p2[1] - p0[1];
dy = nextPtr[x1+1] - prevPtr[x1+1];
mag = dx*dx + dy*dy;
if( mag0 < mag )
{
dx0 = dx;
dy0 = dy;
mag0 = mag;
}
dx = p2[0] - p0[0];
dy = nextPtr[x1] - prevPtr[x1];
mag = dx*dx + dy*dy;
#else
const uchar* p2 = imgPtr + xmap[x+1]*3;
const uchar* p0 = imgPtr + xmap[x-1]*3;
float dx0, dy0, dx, dy, mag0, mag;
dx0 = lut[p2[2]] - lut[p0[2]];
dy0 = lut[nextPtr[x1+2]] - lut[prevPtr[x1+2]];
@ -261,7 +318,7 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
dx = lut[p2[0]] - lut[p0[0]];
dy = lut[nextPtr[x1]] - lut[prevPtr[x1]];
mag = dx*dx + dy*dy;
#endif
if( mag0 < mag )
{
dx0 = dx;
@ -273,14 +330,35 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
dbuf[x+width] = dy0;
}
}
cartToPolar( Dx, Dy, Mag, Angle, false );
#ifdef HAVE_IPP
ippsCartToPolar_32f((const Ipp32f*)Dx.data, (const Ipp32f*)Dy.data, (Ipp32f*)Mag.data, pAngles, width);
for( x = 0; x < width; x++ )
{
if(pAngles[x] < 0.f) pAngles[x]+=(Ipp32f)(CV_PI*2.);
}
ippsNormalize_32f(pAngles, pAngles, width, 0.5f/angleScale, 1.f/angleScale);
ippsFloor_32f(pAngles,(Ipp32f*)hidxs.data,width);
ippsSub_32f_I((Ipp32f*)hidxs.data,pAngles,width);
ippsMul_32f_I((Ipp32f*)Mag.data,pAngles,width);
ippsSub_32f_I(pAngles,(Ipp32f*)Mag.data,width);
ippsRealToCplx_32f((Ipp32f*)Mag.data,pAngles,(Ipp32fc*)gradPtr,width);
#else
cartToPolar( Dx, Dy, Mag, Angle, false );
#endif
for( x = 0; x < width; x++ )
{
#ifdef HAVE_IPP
int hidx = (int)pHidxs[x];
#else
float mag = dbuf[x+width*2], angle = dbuf[x+width*3]*angleScale - 0.5f;
int hidx = cvFloor(angle);
angle -= hidx;
gradPtr[x*2] = mag*(1.f - angle);
gradPtr[x*2+1] = mag*angle;
#endif
if( hidx < 0 )
hidx += _nbins;
else if( hidx >= _nbins )
@ -291,9 +369,7 @@ void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
hidx++;
hidx &= hidx < _nbins ? -1 : 0;
qanglePtr[x*2+1] = (uchar)hidx;
gradPtr[x*2] = mag*(1.f - angle);
gradPtr[x*2+1] = mag*angle;
}
}
}
}
@ -576,9 +652,12 @@ const float* HOGCache::getBlock(Point pt, float* buf)
const uchar* qanglePtr = qangle.data + qangle.step*pt.y + pt.x*2;
CV_Assert( blockHist != 0 );
#ifdef HAVE_IPP
ippsZero_32f(blockHist,blockHistogramSize);
#else
for( k = 0; k < blockHistogramSize; k++ )
blockHist[k] = 0.f;
#endif
const PixData* _pixData = &pixData[0];
@ -658,20 +737,40 @@ const float* HOGCache::getBlock(Point pt, float* buf)
void HOGCache::normalizeBlockHistogram(float* _hist) const
{
float* hist = &_hist[0];
#ifdef HAVE_IPP
size_t sz = blockHistogramSize;
#else
size_t i, sz = blockHistogramSize;
#endif
float sum = 0;
#ifdef HAVE_IPP
ippsDotProd_32f(hist,hist,sz,&sum);
#else
for( i = 0; i < sz; i++ )
sum += hist[i]*hist[i];
#endif
float scale = 1.f/(std::sqrt(sum)+sz*0.1f), thresh = (float)descriptor->L2HysThreshold;
#ifdef HAVE_IPP
ippsMulC_32f_I(scale,hist,sz);
ippsThreshold_32f_I( hist, sz, thresh, ippCmpGreater );
ippsDotProd_32f(hist,hist,sz,&sum);
#else
for( i = 0, sum = 0; i < sz; i++ )
{
hist[i] = std::min(hist[i]*scale, thresh);
sum += hist[i]*hist[i];
}
#endif
scale = 1.f/(std::sqrt(sum)+1e-3f);
#ifdef HAVE_IPP
ippsMulC_32f_I(scale,hist,sz);
#else
for( i = 0; i < sz; i++ )
hist[i] *= scale;
#endif
}
@ -741,8 +840,12 @@ void HOGDescriptor::compute(const Mat& img, vector<float>& descriptors,
float* dst = descriptor + bj.histOfs;
const float* src = cache.getBlock(pt, dst);
if( src != dst )
#ifdef HAVE_IPP
ippsCopy_32f(src,dst,blockHistogramSize);
#else
for( int k = 0; k < blockHistogramSize; k++ )
dst[k] = src[k];
#endif
}
}
}
@ -796,18 +899,28 @@ void HOGDescriptor::detect(const Mat& img,
}
double s = rho;
const float* svmVec = &svmDetector[0];
#ifdef HAVE_IPP
int j;
#else
int j, k;
#endif
for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
{
const HOGCache::BlockData& bj = blockData[j];
Point pt = pt0 + bj.imgOffset;
const float* vec = cache.getBlock(pt, &blockHist[0]);
#ifdef HAVE_IPP
Ipp32f partSum;
ippsDotProd_32f(vec,svmVec,blockHistogramSize,&partSum);
s += (double)partSum;
#else
for( k = 0; k <= blockHistogramSize - 4; k += 4 )
s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +
vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];
for( ; k < blockHistogramSize; k++ )
s += vec[k]*svmVec[k];
#endif
}
if( s >= hitThreshold )
hits.push_back(pt0);