mirror of
https://github.com/opencv/opencv.git
synced 2024-12-04 00:39:11 +08:00
Merge remote-tracking branch 'origin/2.4'
Pull requests: #943 from jet47:cuda-5.5-support #944 from jet47:cmake-2.8.11-cuda-fix #912 from SpecLad:contributing #934 from SpecLad:parallel-for #931 from jet47:gpu-test-fixes #932 from bitwangyaoyao:2.4_fixBFM #918 from bitwangyaoyao:2.4_samples #924 from pengx17:2.4_arithm_fix #925 from pengx17:2.4_canny_tmp_fix #927 from bitwangyaoyao:2.4_perf #930 from pengx17:2.4_haar_ext #928 from apavlenko:bugfix_3027 #920 from asmorkalov:android_move #910 from pengx17:2.4_oclgfft #913 from janm399:2.4 #916 from bitwangyaoyao:2.4_fixPyrLK #919 from abidrahmank:2.4 #923 from pengx17:2.4_macfix Conflicts: modules/calib3d/src/stereobm.cpp modules/features2d/src/detectors.cpp modules/gpu/src/error.cpp modules/gpu/src/precomp.hpp modules/imgproc/src/distransform.cpp modules/imgproc/src/morph.cpp modules/ocl/include/opencv2/ocl/ocl.hpp modules/ocl/perf/perf_color.cpp modules/ocl/perf/perf_imgproc.cpp modules/ocl/perf/perf_match_template.cpp modules/ocl/perf/precomp.cpp modules/ocl/perf/precomp.hpp modules/ocl/src/arithm.cpp modules/ocl/src/canny.cpp modules/ocl/src/filtering.cpp modules/ocl/src/haar.cpp modules/ocl/src/hog.cpp modules/ocl/src/imgproc.cpp modules/ocl/src/opencl/haarobjectdetect.cl modules/ocl/src/pyrlk.cpp modules/video/src/bgfg_gaussmix2.cpp modules/video/src/lkpyramid.cpp platforms/linux/scripts/cmake_arm_gnueabi_hardfp.sh platforms/linux/scripts/cmake_arm_gnueabi_softfp.sh platforms/scripts/ABI_compat_generator.py samples/ocl/facedetect.cpp
This commit is contained in:
commit
bae85660da
@ -403,7 +403,7 @@ if(ANDROID)
|
||||
if(NOT ANDROID_TOOLS_Pkg_Revision GREATER 13)
|
||||
message(WARNING "OpenCV requires Android SDK tools revision 14 or newer. Otherwise tests and samples will no be compiled.")
|
||||
endif()
|
||||
elseif(ANT_EXECUTABLE)
|
||||
else()
|
||||
find_package(JNI)
|
||||
endif()
|
||||
|
||||
@ -457,15 +457,15 @@ if(BUILD_EXAMPLES OR BUILD_ANDROID_EXAMPLES OR INSTALL_PYTHON_EXAMPLES)
|
||||
endif()
|
||||
|
||||
if(ANDROID)
|
||||
add_subdirectory(android/service)
|
||||
add_subdirectory(platforms/android/service)
|
||||
endif()
|
||||
|
||||
if(BUILD_ANDROID_PACKAGE)
|
||||
add_subdirectory(android/package)
|
||||
add_subdirectory(platforms/android/package)
|
||||
endif()
|
||||
|
||||
if (ANDROID)
|
||||
add_subdirectory(android/libinfo)
|
||||
add_subdirectory(platforms/android/libinfo)
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
@ -840,7 +840,7 @@ status(" ant:" ANT_EXECUTABLE THEN "${ANT_EXECUTABLE} (ver ${A
|
||||
if(NOT ANDROID)
|
||||
status(" JNI:" JNI_INCLUDE_DIRS THEN "${JNI_INCLUDE_DIRS}" ELSE NO)
|
||||
endif()
|
||||
status(" Java tests:" BUILD_TESTS AND (NOT ANDROID OR CAN_BUILD_ANDROID_PROJECTS) THEN YES ELSE NO)
|
||||
status(" Java tests:" BUILD_TESTS AND (CAN_BUILD_ANDROID_PROJECTS OR HAVE_opencv_java) THEN YES ELSE NO)
|
||||
|
||||
# ========================== documentation ==========================
|
||||
if(BUILD_DOCS)
|
||||
|
@ -1,11 +0,0 @@
|
||||
We greatly appreciate your support and contributions and they are always welcomed!
|
||||
|
||||
Github pull requests are the convenient way to contribute to OpenCV project. Good pull requests have all of these attributes:
|
||||
|
||||
* Are scoped to one specific issue
|
||||
* Include a test to demonstrate the correctness
|
||||
* Update the docs if relevant
|
||||
* Match the [coding style guidelines](http://code.opencv.org/projects/opencv/wiki/CodingStyleGuide)
|
||||
* Don't messed by "oops" commits
|
||||
|
||||
You can find more detailes about contributing process on http://opencv.org/contribute.html
|
11
README
11
README
@ -4,3 +4,14 @@ Homepage: http://opencv.org
|
||||
Online docs: http://docs.opencv.org
|
||||
Q&A forum: http://answers.opencv.org
|
||||
Dev zone: http://code.opencv.org
|
||||
|
||||
Please read before starting work on a pull request:
|
||||
http://code.opencv.org/projects/opencv/wiki/How_to_contribute
|
||||
|
||||
Summary of guidelines:
|
||||
|
||||
* One pull request per issue;
|
||||
* Choose the right base branch;
|
||||
* Include tests and documentation;
|
||||
* Clean up "oops" commits before submitting;
|
||||
* Follow the coding style guide.
|
||||
|
@ -1,4 +1,6 @@
|
||||
# Copyright (c) 2010-2011, Ethan Rublee
|
||||
message(STATUS "Android toolchain was moved to platfroms/android!")
|
||||
message(STATUS "This file is depricated and will be removed!")
|
||||
|
||||
# Copyright (c) 2011-2013, Andrey Kamaev
|
||||
# All rights reserved.
|
||||
#
|
||||
|
1
android/readme.txt
Normal file
1
android/readme.txt
Normal file
@ -0,0 +1 @@
|
||||
All Android specific sources are moved to platforms/android.
|
@ -1,90 +0,0 @@
|
||||
@ECHO OFF
|
||||
|
||||
:: enable command extensions
|
||||
VERIFY BADVALUE 2>NUL
|
||||
SETLOCAL ENABLEEXTENSIONS || (ECHO Unable to enable command extensions. & EXIT \B)
|
||||
|
||||
:: build environment
|
||||
SET SOURCE_DIR=%cd%
|
||||
IF EXIST .\android.toolchain.cmake (SET BUILD_OPENCV=1) ELSE (SET BUILD_OPENCV=0)
|
||||
IF EXIST .\jni\nul (SET BUILD_JAVA_PART=1) ELSE (SET BUILD_JAVA_PART=0)
|
||||
|
||||
:: load configuration
|
||||
PUSHD %~dp0
|
||||
SET SCRIPTS_DIR=%cd%
|
||||
IF EXIST .\wincfg.cmd CALL .\wincfg.cmd
|
||||
POPD
|
||||
|
||||
:: inherit old names
|
||||
IF NOT DEFINED CMAKE SET CMAKE=%CMAKE_EXE%
|
||||
IF NOT DEFINED MAKE SET MAKE=%MAKE_EXE%
|
||||
|
||||
:: defaults
|
||||
IF NOT DEFINED BUILD_DIR SET BUILD_DIR=build
|
||||
IF NOT DEFINED ANDROID_ABI SET ANDROID_ABI=armeabi-v7a
|
||||
SET OPENCV_BUILD_DIR=%SCRIPTS_DIR%\..\%BUILD_DIR%
|
||||
|
||||
:: check that all required variables defined
|
||||
PUSHD .
|
||||
IF NOT DEFINED ANDROID_NDK (ECHO. & ECHO You should set an environment variable ANDROID_NDK to the full path to your copy of Android NDK & GOTO end)
|
||||
(CD "%ANDROID_NDK%") || (ECHO. & ECHO Directory "%ANDROID_NDK%" specified by ANDROID_NDK variable does not exist & GOTO end)
|
||||
|
||||
IF NOT EXIST "%CMAKE%" (ECHO. & ECHO You should set an environment variable CMAKE to the full path to cmake executable & GOTO end)
|
||||
IF NOT EXIST "%MAKE%" (ECHO. & ECHO You should set an environment variable MAKE to the full path to native port of make executable & GOTO end)
|
||||
|
||||
IF NOT %BUILD_JAVA_PART%==1 GOTO required_variables_checked
|
||||
|
||||
IF NOT DEFINED ANDROID_SDK (ECHO. & ECHO You should set an environment variable ANDROID_SDK to the full path to your copy of Android SDK & GOTO end)
|
||||
(CD "%ANDROID_SDK%" 2>NUL) || (ECHO. & ECHO Directory "%ANDROID_SDK%" specified by ANDROID_SDK variable does not exist & GOTO end)
|
||||
|
||||
IF NOT DEFINED ANT_DIR (ECHO. & ECHO You should set an environment variable ANT_DIR to the full path to Apache Ant root & GOTO end)
|
||||
(CD "%ANT_DIR%" 2>NUL) || (ECHO. & ECHO Directory "%ANT_DIR%" specified by ANT_DIR variable does not exist & GOTO end)
|
||||
|
||||
IF NOT DEFINED JAVA_HOME (ECHO. & ECHO You should set an environment variable JAVA_HOME to the full path to JDK & GOTO end)
|
||||
(CD "%JAVA_HOME%" 2>NUL) || (ECHO. & ECHO Directory "%JAVA_HOME%" specified by JAVA_HOME variable does not exist & GOTO end)
|
||||
|
||||
:required_variables_checked
|
||||
POPD
|
||||
|
||||
:: check for ninja
|
||||
echo "%MAKE%"|findstr /i ninja >nul:
|
||||
IF %errorlevel%==1 (SET BUILD_WITH_NINJA=0) ELSE (SET BUILD_WITH_NINJA=1)
|
||||
IF %BUILD_WITH_NINJA%==1 (SET CMAKE_GENERATOR=Ninja) ELSE (SET CMAKE_GENERATOR=MinGW Makefiles)
|
||||
|
||||
:: create build dir
|
||||
IF DEFINED REBUILD rmdir /S /Q "%BUILD_DIR%" 2>NUL
|
||||
MKDIR "%BUILD_DIR%" 2>NUL
|
||||
PUSHD "%BUILD_DIR%" || (ECHO. & ECHO Directory "%BUILD_DIR%" is not found & GOTO end)
|
||||
|
||||
:: run cmake
|
||||
ECHO. & ECHO Runnning cmake...
|
||||
ECHO ANDROID_ABI=%ANDROID_ABI%
|
||||
ECHO.
|
||||
IF NOT %BUILD_OPENCV%==1 GOTO other-cmake
|
||||
:opencv-cmake
|
||||
("%CMAKE%" -G"%CMAKE_GENERATOR%" -DANDROID_ABI="%ANDROID_ABI%" -DCMAKE_TOOLCHAIN_FILE="%SOURCE_DIR%"\android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%MAKE%" %* "%SOURCE_DIR%\..") && GOTO cmakefin
|
||||
ECHO. & ECHO cmake failed & GOTO end
|
||||
:other-cmake
|
||||
("%CMAKE%" -G"%CMAKE_GENERATOR%" -DANDROID_ABI="%ANDROID_ABI%" -DOpenCV_DIR="%OPENCV_BUILD_DIR%" -DCMAKE_TOOLCHAIN_FILE="%OPENCV_BUILD_DIR%\..\android.toolchain.cmake" -DCMAKE_MAKE_PROGRAM="%MAKE%" %* "%SOURCE_DIR%") && GOTO cmakefin
|
||||
ECHO. & ECHO cmake failed & GOTO end
|
||||
:cmakefin
|
||||
|
||||
:: run make
|
||||
ECHO. & ECHO Building native libs...
|
||||
IF %BUILD_WITH_NINJA%==0 ("%MAKE%" -j %NUMBER_OF_PROCESSORS% VERBOSE=%VERBOSE%) || (ECHO. & ECHO make failed & GOTO end)
|
||||
IF %BUILD_WITH_NINJA%==1 ("%MAKE%") || (ECHO. & ECHO ninja failed & GOTO end)
|
||||
|
||||
IF NOT %BUILD_JAVA_PART%==1 GOTO end
|
||||
POPD && PUSHD %SOURCE_DIR%
|
||||
|
||||
:: configure java part
|
||||
ECHO. & ECHO Updating Android project...
|
||||
(CALL "%ANDROID_SDK%\tools\android" update project --name %PROJECT_NAME% --path .) || (ECHO. & ECHO failed to update android project & GOTO end)
|
||||
|
||||
:: compile java part
|
||||
ECHO. & ECHO Compiling Android project...
|
||||
(CALL "%ANT_DIR%\bin\ant" debug) || (ECHO. & ECHO failed to compile android project & GOTO end)
|
||||
|
||||
:end
|
||||
POPD
|
||||
ENDLOCAL
|
@ -1,5 +0,0 @@
|
||||
@ECHO OFF
|
||||
|
||||
PUSHD %~dp0..
|
||||
CALL .\scripts\build.cmd %* -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON
|
||||
POPD
|
@ -1,8 +0,0 @@
|
||||
#!/bin/sh
|
||||
cd `dirname $0`/..
|
||||
|
||||
mkdir -p build_armeabi
|
||||
cd build_armeabi
|
||||
|
||||
cmake -DANDROID_ABI=armeabi -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
|
||||
|
@ -1,8 +0,0 @@
|
||||
#!/bin/sh
|
||||
cd `dirname $0`/..
|
||||
|
||||
mkdir -p build_mips
|
||||
cd build_mips
|
||||
|
||||
cmake -DANDROID_ABI=mips -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
|
||||
|
@ -1,8 +0,0 @@
|
||||
#!/bin/sh
|
||||
cd `dirname $0`/..
|
||||
|
||||
mkdir -p build_neon
|
||||
cd build_neon
|
||||
|
||||
cmake -DANDROID_ABI="armeabi-v7a with NEON" -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
|
||||
|
@ -1,7 +0,0 @@
|
||||
#!/bin/sh
|
||||
cd `dirname $0`/..
|
||||
|
||||
mkdir -p build_service
|
||||
cd build_service
|
||||
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake -DANDROID_TOOLCHAIN_NAME="arm-linux-androideabi-4.4.3" -DANDROID_STL=stlport_static -DANDROID_STL_FORCE_FEATURES=OFF -DBUILD_ANDROID_SERVICE=ON -DANDROID_SOURCE_TREE=~/Projects/AndroidSource/ServiceStub/ $@ ../..
|
@ -1,9 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
cd `dirname $0`/..
|
||||
|
||||
mkdir -p build_x86
|
||||
cd build_x86
|
||||
|
||||
cmake -DANDROID_ABI=x86 -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../..
|
||||
|
@ -1,30 +0,0 @@
|
||||
:: variables required for OpenCV build ::
|
||||
:: Note: all pathes should be specified without tailing slashes!
|
||||
SET ANDROID_NDK=C:\full\path\to\your\copy\of\android\NDK\android-ndk-r7b
|
||||
SET CMAKE_EXE=C:\full\path\to\cmake\utility\cmake.exe
|
||||
SET MAKE_EXE=%ANDROID_NDK%\prebuilt\windows\bin\make.exe
|
||||
|
||||
:: variables required for android-opencv build ::
|
||||
SET ANDROID_SDK=C:\full\path\to\your\copy\of\android\SDK\android-sdk-windows
|
||||
SET ANT_DIR=C:\full\path\to\ant\directory\apache-ant-1.8.2
|
||||
SET JAVA_HOME=C:\full\path\to\JDK\jdk1.6.0_25
|
||||
|
||||
:: configuration options ::
|
||||
:::: general ARM-V7 settings
|
||||
SET ANDROID_ABI=armeabi-v7a
|
||||
SET BUILD_DIR=build
|
||||
|
||||
:::: uncomment following lines to compile for old emulator or old device
|
||||
::SET ANDROID_ABI=armeabi
|
||||
::SET BUILD_DIR=build_armeabi
|
||||
|
||||
:::: uncomment following lines to compile for ARM-V7 with NEON support
|
||||
::SET ANDROID_ABI=armeabi-v7a with NEON
|
||||
::SET BUILD_DIR=build_neon
|
||||
|
||||
:::: uncomment following lines to compile for x86
|
||||
::SET ANDROID_ABI=x86
|
||||
::SET BUILD_DIR=build_x86
|
||||
|
||||
:::: other options
|
||||
::SET ANDROID_NATIVE_API_LEVEL=8 &:: android-3 is enough for native part of OpenCV but android-8 is required for Java API
|
@ -1,89 +0,0 @@
|
||||
# Makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line.
|
||||
SPHINXOPTS =
|
||||
SPHINXBUILD = sphinx-build
|
||||
PAPER =
|
||||
BUILDDIR = _build
|
||||
|
||||
# Internal variables.
|
||||
PAPEROPT_a4 = -D latex_paper_size=a4
|
||||
PAPEROPT_letter = -D latex_paper_size=letter
|
||||
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||
|
||||
.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
|
||||
|
||||
help:
|
||||
@echo "Please use \`make <target>' where <target> is one of"
|
||||
@echo " html to make standalone HTML files"
|
||||
@echo " dirhtml to make HTML files named index.html in directories"
|
||||
@echo " pickle to make pickle files"
|
||||
@echo " json to make JSON files"
|
||||
@echo " htmlhelp to make HTML files and a HTML help project"
|
||||
@echo " qthelp to make HTML files and a qthelp project"
|
||||
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
|
||||
@echo " changes to make an overview of all changed/added/deprecated items"
|
||||
@echo " linkcheck to check all external links for integrity"
|
||||
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
|
||||
|
||||
clean:
|
||||
-rm -rf $(BUILDDIR)/*
|
||||
|
||||
html:
|
||||
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
|
||||
@echo
|
||||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
|
||||
|
||||
dirhtml:
|
||||
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
|
||||
@echo
|
||||
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
|
||||
|
||||
pickle:
|
||||
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
|
||||
@echo
|
||||
@echo "Build finished; now you can process the pickle files."
|
||||
|
||||
json:
|
||||
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
|
||||
@echo
|
||||
@echo "Build finished; now you can process the JSON files."
|
||||
|
||||
htmlhelp:
|
||||
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
|
||||
@echo
|
||||
@echo "Build finished; now you can run HTML Help Workshop with the" \
|
||||
".hhp project file in $(BUILDDIR)/htmlhelp."
|
||||
|
||||
qthelp:
|
||||
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
|
||||
@echo
|
||||
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
|
||||
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
|
||||
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/OpenCVEngine.qhcp"
|
||||
@echo "To view the help file:"
|
||||
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/OpenCVEngine.qhc"
|
||||
|
||||
latex:
|
||||
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||
@echo
|
||||
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
|
||||
@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
|
||||
"run these through (pdf)latex."
|
||||
|
||||
changes:
|
||||
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
|
||||
@echo
|
||||
@echo "The overview file is in $(BUILDDIR)/changes."
|
||||
|
||||
linkcheck:
|
||||
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
|
||||
@echo
|
||||
@echo "Link check complete; look for any errors in the above output " \
|
||||
"or in $(BUILDDIR)/linkcheck/output.txt."
|
||||
|
||||
doctest:
|
||||
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
|
||||
@echo "Testing of doctests in the sources finished, look at the " \
|
||||
"results in $(BUILDDIR)/doctest/output.txt."
|
@ -815,7 +815,7 @@ float CvCascadeBoostTrainData::getVarValue( int vi, int si )
|
||||
}
|
||||
|
||||
|
||||
struct FeatureIdxOnlyPrecalc
|
||||
struct FeatureIdxOnlyPrecalc : ParallelLoopBody
|
||||
{
|
||||
FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u )
|
||||
{
|
||||
@ -825,11 +825,11 @@ struct FeatureIdxOnlyPrecalc
|
||||
idst = _buf->data.i;
|
||||
is_buf_16u = _is_buf_16u;
|
||||
}
|
||||
void operator()( const BlockedRange& range ) const
|
||||
void operator()( const Range& range ) const
|
||||
{
|
||||
cv::AutoBuffer<float> valCache(sample_count);
|
||||
float* valCachePtr = (float*)valCache;
|
||||
for ( int fi = range.begin(); fi < range.end(); fi++)
|
||||
for ( int fi = range.start; fi < range.end; fi++)
|
||||
{
|
||||
for( int si = 0; si < sample_count; si++ )
|
||||
{
|
||||
@ -852,7 +852,7 @@ struct FeatureIdxOnlyPrecalc
|
||||
bool is_buf_16u;
|
||||
};
|
||||
|
||||
struct FeatureValAndIdxPrecalc
|
||||
struct FeatureValAndIdxPrecalc : ParallelLoopBody
|
||||
{
|
||||
FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
|
||||
{
|
||||
@ -863,9 +863,9 @@ struct FeatureValAndIdxPrecalc
|
||||
idst = _buf->data.i;
|
||||
is_buf_16u = _is_buf_16u;
|
||||
}
|
||||
void operator()( const BlockedRange& range ) const
|
||||
void operator()( const Range& range ) const
|
||||
{
|
||||
for ( int fi = range.begin(); fi < range.end(); fi++)
|
||||
for ( int fi = range.start; fi < range.end; fi++)
|
||||
{
|
||||
for( int si = 0; si < sample_count; si++ )
|
||||
{
|
||||
@ -889,7 +889,7 @@ struct FeatureValAndIdxPrecalc
|
||||
bool is_buf_16u;
|
||||
};
|
||||
|
||||
struct FeatureValOnlyPrecalc
|
||||
struct FeatureValOnlyPrecalc : ParallelLoopBody
|
||||
{
|
||||
FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count )
|
||||
{
|
||||
@ -897,9 +897,9 @@ struct FeatureValOnlyPrecalc
|
||||
valCache = _valCache;
|
||||
sample_count = _sample_count;
|
||||
}
|
||||
void operator()( const BlockedRange& range ) const
|
||||
void operator()( const Range& range ) const
|
||||
{
|
||||
for ( int fi = range.begin(); fi < range.end(); fi++)
|
||||
for ( int fi = range.start; fi < range.end; fi++)
|
||||
for( int si = 0; si < sample_count; si++ )
|
||||
valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
|
||||
}
|
||||
@ -913,12 +913,12 @@ void CvCascadeBoostTrainData::precalculate()
|
||||
int minNum = MIN( numPrecalcVal, numPrecalcIdx);
|
||||
|
||||
double proctime = -TIME( 0 );
|
||||
parallel_for( BlockedRange(numPrecalcVal, numPrecalcIdx),
|
||||
FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
|
||||
parallel_for( BlockedRange(0, minNum),
|
||||
FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
|
||||
parallel_for( BlockedRange(minNum, numPrecalcVal),
|
||||
FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
|
||||
parallel_for_( Range(numPrecalcVal, numPrecalcIdx),
|
||||
FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
|
||||
parallel_for_( Range(0, minNum),
|
||||
FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
|
||||
parallel_for_( Range(minNum, numPrecalcVal),
|
||||
FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
|
||||
cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
|
||||
}
|
||||
|
||||
|
@ -26,6 +26,15 @@ if(CUDA_FOUND)
|
||||
set(HAVE_CUBLAS 1)
|
||||
endif()
|
||||
|
||||
if(${CUDA_VERSION} VERSION_LESS "5.5")
|
||||
find_cuda_helper_libs(npp)
|
||||
else()
|
||||
find_cuda_helper_libs(nppc)
|
||||
find_cuda_helper_libs(nppi)
|
||||
find_cuda_helper_libs(npps)
|
||||
set(CUDA_npp_LIBRARY ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY})
|
||||
endif()
|
||||
|
||||
if(WITH_NVCUVID)
|
||||
find_cuda_helper_libs(nvcuvid)
|
||||
set(HAVE_NVCUVID 1)
|
||||
@ -136,8 +145,6 @@ if(CUDA_FOUND)
|
||||
|
||||
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR)
|
||||
|
||||
find_cuda_helper_libs(npp)
|
||||
|
||||
macro(ocv_cuda_compile VAR)
|
||||
foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
|
||||
set(${var}_backup_in_cuda_compile_ "${${var}}")
|
||||
|
@ -162,7 +162,7 @@ if(UNIX)
|
||||
endif()
|
||||
|
||||
if(ANDROID)
|
||||
install(FILES "${OpenCV_SOURCE_DIR}/android/android.toolchain.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/)
|
||||
install(FILES "${OpenCV_SOURCE_DIR}/platforms/android/android.toolchain.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/)
|
||||
endif()
|
||||
|
||||
# --------------------------------------------------------------------------------------------
|
||||
|
@ -53,8 +53,8 @@ if(BUILD_DOCS AND HAVE_SPHINX)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
file(GLOB_RECURSE _OPENCV_FILES_REF "${OpenCV_SOURCE_DIR}/android/service/doc/*.rst")
|
||||
file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OpenCV_SOURCE_DIR}/android/service/doc/*.png" "${OpenCV_SOURCE_DIR}/android/service/doc/*.jpg")
|
||||
file(GLOB_RECURSE _OPENCV_FILES_REF "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.rst")
|
||||
file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.png" "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.jpg")
|
||||
list(APPEND OPENCV_FILES_REF ${_OPENCV_FILES_REF})
|
||||
list(APPEND OPENCV_FILES_REF_PICT ${_OPENCV_FILES_REF_PICT})
|
||||
|
||||
|
@ -239,7 +239,7 @@ latex_documents = [
|
||||
u'', 'manual'),
|
||||
('doc/tutorials/tutorials', 'opencv_tutorials.tex', u'The OpenCV Tutorials',
|
||||
u'', 'manual'),
|
||||
('android/refman', 'opencv2manager.tex', u'The OpenCV Manager Manual',
|
||||
('platforms/android/refman', 'opencv2manager.tex', u'The OpenCV Manager Manual',
|
||||
u'', 'manual'),
|
||||
]
|
||||
|
||||
|
@ -10,7 +10,7 @@ Welcome to opencv documentation!
|
||||
:maxdepth: 2
|
||||
|
||||
modules/refman.rst
|
||||
android/refman.rst
|
||||
platforms/android/refman.rst
|
||||
doc/user_guide/user_guide.rst
|
||||
doc/tutorials/tutorials.rst
|
||||
|
||||
|
@ -6,7 +6,7 @@ set(the_description "Auxiliary module for Android native camera support")
|
||||
set(OPENCV_MODULE_TYPE STATIC)
|
||||
|
||||
ocv_define_module(androidcamera INTERNAL opencv_core log dl)
|
||||
ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/camera_wrapper" "${OpenCV_SOURCE_DIR}/android/service/engine/jni/include")
|
||||
ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/camera_wrapper" "${OpenCV_SOURCE_DIR}/platforms/android/service/engine/jni/include")
|
||||
|
||||
# Android source tree for native camera
|
||||
SET (ANDROID_SOURCE_TREE "ANDROID_SOURCE_TREE-NOTFOUND" CACHE PATH
|
||||
|
@ -117,31 +117,6 @@ namespace cv
|
||||
transform(points, modif_points, transformation);
|
||||
}
|
||||
|
||||
class Mutex
|
||||
{
|
||||
public:
|
||||
Mutex() {
|
||||
}
|
||||
void lock()
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
resultsMutex.lock();
|
||||
#endif
|
||||
}
|
||||
|
||||
void unlock()
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
resultsMutex.unlock();
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
#ifdef HAVE_TBB
|
||||
tbb::mutex resultsMutex;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct CameraParameters
|
||||
{
|
||||
void init(Mat _intrinsics, Mat _distCoeffs)
|
||||
|
@ -120,11 +120,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return dst;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ RGB2RGB()
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
|
||||
__device__ __forceinline__ RGB2RGB(const RGB2RGB& other_)
|
||||
:unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__host__ __device__ __forceinline__ RGB2RGB() {}
|
||||
__host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {}
|
||||
};
|
||||
|
||||
template <> struct RGB2RGB<uchar, 4, 4, 2> : unary_function<uint, uint>
|
||||
@ -141,8 +138,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return dst;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ RGB2RGB():unary_function<uint, uint>(){}
|
||||
__device__ __forceinline__ RGB2RGB(const RGB2RGB& other_):unary_function<uint, uint>(){}
|
||||
__host__ __device__ __forceinline__ RGB2RGB() {}
|
||||
__host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -203,8 +200,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ RGB2RGB5x5():unary_function<uchar3, ushort>(){}
|
||||
__device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5& other_):unary_function<uchar3, ushort>(){}
|
||||
__host__ __device__ __forceinline__ RGB2RGB5x5() {}
|
||||
__host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {}
|
||||
};
|
||||
|
||||
template<int bidx, int green_bits> struct RGB2RGB5x5<4, bidx,green_bits> : unary_function<uint, ushort>
|
||||
@ -214,8 +211,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return RGB2RGB5x5Converter<green_bits, bidx>::cvt(src);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ RGB2RGB5x5():unary_function<uint, ushort>(){}
|
||||
__device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5& other_):unary_function<uint, ushort>(){}
|
||||
__host__ __device__ __forceinline__ RGB2RGB5x5() {}
|
||||
__host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -282,8 +279,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
RGB5x52RGBConverter<green_bits, bidx>::cvt(src, dst);
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ RGB5x52RGB():unary_function<ushort, uchar3>(){}
|
||||
__device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB& other_):unary_function<ushort, uchar3>(){}
|
||||
__host__ __device__ __forceinline__ RGB5x52RGB() {}
|
||||
__host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {}
|
||||
|
||||
};
|
||||
|
||||
@ -295,8 +292,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
RGB5x52RGBConverter<green_bits, bidx>::cvt(src, dst);
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ RGB5x52RGB():unary_function<ushort, uint>(){}
|
||||
__device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB& other_):unary_function<ushort, uint>(){}
|
||||
__host__ __device__ __forceinline__ RGB5x52RGB() {}
|
||||
__host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -325,9 +322,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ Gray2RGB():unary_function<T, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__device__ __forceinline__ Gray2RGB(const Gray2RGB& other_)
|
||||
: unary_function<T, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__host__ __device__ __forceinline__ Gray2RGB() {}
|
||||
__host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {}
|
||||
};
|
||||
|
||||
template <> struct Gray2RGB<uchar, 4> : unary_function<uchar, uint>
|
||||
@ -342,8 +338,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ Gray2RGB():unary_function<uchar, uint>(){}
|
||||
__device__ __forceinline__ Gray2RGB(const Gray2RGB& other_):unary_function<uchar, uint>(){}
|
||||
__host__ __device__ __forceinline__ Gray2RGB() {}
|
||||
__host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -384,8 +380,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return Gray2RGB5x5Converter<green_bits>::cvt(src);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Gray2RGB5x5():unary_function<uchar, ushort>(){}
|
||||
__device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5& other_):unary_function<uchar, ushort>(){}
|
||||
__host__ __device__ __forceinline__ Gray2RGB5x5() {}
|
||||
__host__ __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -426,8 +422,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return RGB5x52GrayConverter<green_bits>::cvt(src);
|
||||
}
|
||||
__device__ __forceinline__ RGB5x52Gray() : unary_function<ushort, uchar>(){}
|
||||
__device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray& other_) : unary_function<ushort, uchar>(){}
|
||||
__host__ __device__ __forceinline__ RGB5x52Gray() {}
|
||||
__host__ __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -467,9 +463,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return RGB2GrayConvert<bidx>(&src.x);
|
||||
}
|
||||
__device__ __forceinline__ RGB2Gray() : unary_function<typename TypeVec<T, scn>::vec_type, T>(){}
|
||||
__device__ __forceinline__ RGB2Gray(const RGB2Gray& other_)
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, T>(){}
|
||||
__host__ __device__ __forceinline__ RGB2Gray() {}
|
||||
__host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {}
|
||||
};
|
||||
|
||||
template <int bidx> struct RGB2Gray<uchar, 4, bidx> : unary_function<uint, uchar>
|
||||
@ -478,8 +473,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return RGB2GrayConvert<bidx>(src);
|
||||
}
|
||||
__device__ __forceinline__ RGB2Gray() : unary_function<uint, uchar>(){}
|
||||
__device__ __forceinline__ RGB2Gray(const RGB2Gray& other_) : unary_function<uint, uchar>(){}
|
||||
__host__ __device__ __forceinline__ RGB2Gray() {}
|
||||
__host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -529,10 +524,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
RGB2YUVConvert<bidx>(&src.x, dst);
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ RGB2YUV()
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__device__ __forceinline__ RGB2YUV(const RGB2YUV& other_)
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__host__ __device__ __forceinline__ RGB2YUV() {}
|
||||
__host__ __device__ __forceinline__ RGB2YUV(const RGB2YUV&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -609,10 +602,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ YUV2RGB()
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__device__ __forceinline__ YUV2RGB(const YUV2RGB& other_)
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__host__ __device__ __forceinline__ YUV2RGB() {}
|
||||
__host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {}
|
||||
};
|
||||
|
||||
template <int bidx> struct YUV2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
|
||||
@ -621,8 +612,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return YUV2RGBConvert<bidx>(src);
|
||||
}
|
||||
__device__ __forceinline__ YUV2RGB() : unary_function<uint, uint>(){}
|
||||
__device__ __forceinline__ YUV2RGB(const YUV2RGB& other_) : unary_function<uint, uint>(){}
|
||||
__host__ __device__ __forceinline__ YUV2RGB() {}
|
||||
__host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -689,10 +680,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
RGB2YCrCbConvert<bidx>(&src.x, dst);
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ RGB2YCrCb()
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb& other_)
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__host__ __device__ __forceinline__ RGB2YCrCb() {}
|
||||
__host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {}
|
||||
};
|
||||
|
||||
template <int bidx> struct RGB2YCrCb<uchar, 4, 4, bidx> : unary_function<uint, uint>
|
||||
@ -702,8 +691,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return RGB2YCrCbConvert<bidx>(src);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ RGB2YCrCb() : unary_function<uint, uint>(){}
|
||||
__device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb& other_) : unary_function<uint, uint>(){}
|
||||
__host__ __device__ __forceinline__ RGB2YCrCb() {}
|
||||
__host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -771,10 +760,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ YCrCb2RGB()
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB& other_)
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__host__ __device__ __forceinline__ YCrCb2RGB() {}
|
||||
__host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {}
|
||||
};
|
||||
|
||||
template <int bidx> struct YCrCb2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
|
||||
@ -783,8 +770,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return YCrCb2RGBConvert<bidx>(src);
|
||||
}
|
||||
__device__ __forceinline__ YCrCb2RGB() : unary_function<uint, uint>(){}
|
||||
__device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB& other_) : unary_function<uint, uint>(){}
|
||||
__host__ __device__ __forceinline__ YCrCb2RGB() {}
|
||||
__host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -849,10 +836,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ RGB2XYZ()
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__device__ __forceinline__ RGB2XYZ(const RGB2XYZ& other_)
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__host__ __device__ __forceinline__ RGB2XYZ() {}
|
||||
__host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {}
|
||||
};
|
||||
|
||||
template <int bidx> struct RGB2XYZ<uchar, 4, 4, bidx> : unary_function<uint, uint>
|
||||
@ -861,8 +846,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return RGB2XYZConvert<bidx>(src);
|
||||
}
|
||||
__device__ __forceinline__ RGB2XYZ() : unary_function<uint, uint>(){}
|
||||
__device__ __forceinline__ RGB2XYZ(const RGB2XYZ& other_) : unary_function<uint, uint>(){}
|
||||
__host__ __device__ __forceinline__ RGB2XYZ() {}
|
||||
__host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -926,10 +911,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ XYZ2RGB()
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__device__ __forceinline__ XYZ2RGB(const XYZ2RGB& other_)
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__host__ __device__ __forceinline__ XYZ2RGB() {}
|
||||
__host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {}
|
||||
};
|
||||
|
||||
template <int bidx> struct XYZ2RGB<uchar, 4, 4, bidx> : unary_function<uint, uint>
|
||||
@ -938,8 +921,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return XYZ2RGBConvert<bidx>(src);
|
||||
}
|
||||
__device__ __forceinline__ XYZ2RGB() : unary_function<uint, uint>(){}
|
||||
__device__ __forceinline__ XYZ2RGB(const XYZ2RGB& other_) : unary_function<uint, uint>(){}
|
||||
__host__ __device__ __forceinline__ XYZ2RGB() {}
|
||||
__host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -1066,10 +1049,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ RGB2HSV()
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__device__ __forceinline__ RGB2HSV(const RGB2HSV& other_)
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__host__ __device__ __forceinline__ RGB2HSV() {}
|
||||
__host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {}
|
||||
};
|
||||
|
||||
template <int bidx, int hr> struct RGB2HSV<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
|
||||
@ -1078,8 +1059,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return RGB2HSVConvert<bidx, hr>(src);
|
||||
}
|
||||
__device__ __forceinline__ RGB2HSV():unary_function<uint, uint>(){}
|
||||
__device__ __forceinline__ RGB2HSV(const RGB2HSV& other_):unary_function<uint, uint>(){}
|
||||
__host__ __device__ __forceinline__ RGB2HSV() {}
|
||||
__host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -1208,10 +1189,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ HSV2RGB()
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__device__ __forceinline__ HSV2RGB(const HSV2RGB& other_)
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__host__ __device__ __forceinline__ HSV2RGB() {}
|
||||
__host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {}
|
||||
};
|
||||
|
||||
template <int bidx, int hr> struct HSV2RGB<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
|
||||
@ -1220,8 +1199,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return HSV2RGBConvert<bidx, hr>(src);
|
||||
}
|
||||
__device__ __forceinline__ HSV2RGB():unary_function<uint, uint>(){}
|
||||
__device__ __forceinline__ HSV2RGB(const HSV2RGB& other_):unary_function<uint, uint>(){}
|
||||
__host__ __device__ __forceinline__ HSV2RGB() {}
|
||||
__host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -1343,10 +1322,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ RGB2HLS()
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__device__ __forceinline__ RGB2HLS(const RGB2HLS& other_)
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__host__ __device__ __forceinline__ RGB2HLS() {}
|
||||
__host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {}
|
||||
};
|
||||
|
||||
template <int bidx, int hr> struct RGB2HLS<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
|
||||
@ -1355,8 +1332,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return RGB2HLSConvert<bidx, hr>(src);
|
||||
}
|
||||
__device__ __forceinline__ RGB2HLS() : unary_function<uint, uint>(){}
|
||||
__device__ __forceinline__ RGB2HLS(const RGB2HLS& other_) : unary_function<uint, uint>(){}
|
||||
__host__ __device__ __forceinline__ RGB2HLS() {}
|
||||
__host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -1485,10 +1462,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ HLS2RGB()
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__device__ __forceinline__ HLS2RGB(const HLS2RGB& other_)
|
||||
: unary_function<typename TypeVec<T, scn>::vec_type, typename TypeVec<T, dcn>::vec_type>(){}
|
||||
__host__ __device__ __forceinline__ HLS2RGB() {}
|
||||
__host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {}
|
||||
};
|
||||
|
||||
template <int bidx, int hr> struct HLS2RGB<uchar, 4, 4, bidx, hr> : unary_function<uint, uint>
|
||||
@ -1497,8 +1472,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return HLS2RGBConvert<bidx, hr>(src);
|
||||
}
|
||||
__device__ __forceinline__ HLS2RGB() : unary_function<uint, uint>(){}
|
||||
__device__ __forceinline__ HLS2RGB(const HLS2RGB& other_) : unary_function<uint, uint>(){}
|
||||
__host__ __device__ __forceinline__ HLS2RGB() {}
|
||||
__host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -1651,8 +1626,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ RGB2Lab() {}
|
||||
__device__ __forceinline__ RGB2Lab(const RGB2Lab& other_) {}
|
||||
__host__ __device__ __forceinline__ RGB2Lab() {}
|
||||
__host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {}
|
||||
};
|
||||
template <int scn, int dcn, bool srgb, int blueIdx>
|
||||
struct RGB2Lab<float, scn, dcn, srgb, blueIdx>
|
||||
@ -1666,8 +1641,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ RGB2Lab() {}
|
||||
__device__ __forceinline__ RGB2Lab(const RGB2Lab& other_) {}
|
||||
__host__ __device__ __forceinline__ RGB2Lab() {}
|
||||
__host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -1764,8 +1739,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ Lab2RGB() {}
|
||||
__device__ __forceinline__ Lab2RGB(const Lab2RGB& other_) {}
|
||||
__host__ __device__ __forceinline__ Lab2RGB() {}
|
||||
__host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {}
|
||||
};
|
||||
template <int scn, int dcn, bool srgb, int blueIdx>
|
||||
struct Lab2RGB<float, scn, dcn, srgb, blueIdx>
|
||||
@ -1779,8 +1754,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ Lab2RGB() {}
|
||||
__device__ __forceinline__ Lab2RGB(const Lab2RGB& other_) {}
|
||||
__host__ __device__ __forceinline__ Lab2RGB() {}
|
||||
__host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -1863,8 +1838,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ RGB2Luv() {}
|
||||
__device__ __forceinline__ RGB2Luv(const RGB2Luv& other_) {}
|
||||
__host__ __device__ __forceinline__ RGB2Luv() {}
|
||||
__host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {}
|
||||
};
|
||||
template <int scn, int dcn, bool srgb, int blueIdx>
|
||||
struct RGB2Luv<float, scn, dcn, srgb, blueIdx>
|
||||
@ -1878,8 +1853,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ RGB2Luv() {}
|
||||
__device__ __forceinline__ RGB2Luv(const RGB2Luv& other_) {}
|
||||
__host__ __device__ __forceinline__ RGB2Luv() {}
|
||||
__host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -1964,8 +1939,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ Luv2RGB() {}
|
||||
__device__ __forceinline__ Luv2RGB(const Luv2RGB& other_) {}
|
||||
__host__ __device__ __forceinline__ Luv2RGB() {}
|
||||
__host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {}
|
||||
};
|
||||
template <int scn, int dcn, bool srgb, int blueIdx>
|
||||
struct Luv2RGB<float, scn, dcn, srgb, blueIdx>
|
||||
@ -1979,8 +1954,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
return dst;
|
||||
}
|
||||
__device__ __forceinline__ Luv2RGB() {}
|
||||
__device__ __forceinline__ Luv2RGB(const Luv2RGB& other_) {}
|
||||
__host__ __device__ __forceinline__ Luv2RGB() {}
|
||||
__host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -63,8 +63,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
__device__ __forceinline__ plus(const plus& other):binary_function<T,T,T>(){}
|
||||
__device__ __forceinline__ plus():binary_function<T,T,T>(){}
|
||||
__host__ __device__ __forceinline__ plus() {}
|
||||
__host__ __device__ __forceinline__ plus(const plus&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct minus : binary_function<T, T, T>
|
||||
@ -74,8 +74,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a - b;
|
||||
}
|
||||
__device__ __forceinline__ minus(const minus& other):binary_function<T,T,T>(){}
|
||||
__device__ __forceinline__ minus():binary_function<T,T,T>(){}
|
||||
__host__ __device__ __forceinline__ minus() {}
|
||||
__host__ __device__ __forceinline__ minus(const minus&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct multiplies : binary_function<T, T, T>
|
||||
@ -85,8 +85,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a * b;
|
||||
}
|
||||
__device__ __forceinline__ multiplies(const multiplies& other):binary_function<T,T,T>(){}
|
||||
__device__ __forceinline__ multiplies():binary_function<T,T,T>(){}
|
||||
__host__ __device__ __forceinline__ multiplies() {}
|
||||
__host__ __device__ __forceinline__ multiplies(const multiplies&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct divides : binary_function<T, T, T>
|
||||
@ -96,8 +96,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a / b;
|
||||
}
|
||||
__device__ __forceinline__ divides(const divides& other):binary_function<T,T,T>(){}
|
||||
__device__ __forceinline__ divides():binary_function<T,T,T>(){}
|
||||
__host__ __device__ __forceinline__ divides() {}
|
||||
__host__ __device__ __forceinline__ divides(const divides&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct modulus : binary_function<T, T, T>
|
||||
@ -107,8 +107,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a % b;
|
||||
}
|
||||
__device__ __forceinline__ modulus(const modulus& other):binary_function<T,T,T>(){}
|
||||
__device__ __forceinline__ modulus():binary_function<T,T,T>(){}
|
||||
__host__ __device__ __forceinline__ modulus() {}
|
||||
__host__ __device__ __forceinline__ modulus(const modulus&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct negate : unary_function<T, T>
|
||||
@ -117,8 +117,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return -a;
|
||||
}
|
||||
__device__ __forceinline__ negate(const negate& other):unary_function<T,T>(){}
|
||||
__device__ __forceinline__ negate():unary_function<T,T>(){}
|
||||
__host__ __device__ __forceinline__ negate() {}
|
||||
__host__ __device__ __forceinline__ negate(const negate&) {}
|
||||
};
|
||||
|
||||
// Comparison Operations
|
||||
@ -129,8 +129,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a == b;
|
||||
}
|
||||
__device__ __forceinline__ equal_to(const equal_to& other):binary_function<T,T,bool>(){}
|
||||
__device__ __forceinline__ equal_to():binary_function<T,T,bool>(){}
|
||||
__host__ __device__ __forceinline__ equal_to() {}
|
||||
__host__ __device__ __forceinline__ equal_to(const equal_to&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct not_equal_to : binary_function<T, T, bool>
|
||||
@ -140,8 +140,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a != b;
|
||||
}
|
||||
__device__ __forceinline__ not_equal_to(const not_equal_to& other):binary_function<T,T,bool>(){}
|
||||
__device__ __forceinline__ not_equal_to():binary_function<T,T,bool>(){}
|
||||
__host__ __device__ __forceinline__ not_equal_to() {}
|
||||
__host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct greater : binary_function<T, T, bool>
|
||||
@ -151,8 +151,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a > b;
|
||||
}
|
||||
__device__ __forceinline__ greater(const greater& other):binary_function<T,T,bool>(){}
|
||||
__device__ __forceinline__ greater():binary_function<T,T,bool>(){}
|
||||
__host__ __device__ __forceinline__ greater() {}
|
||||
__host__ __device__ __forceinline__ greater(const greater&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct less : binary_function<T, T, bool>
|
||||
@ -162,8 +162,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a < b;
|
||||
}
|
||||
__device__ __forceinline__ less(const less& other):binary_function<T,T,bool>(){}
|
||||
__device__ __forceinline__ less():binary_function<T,T,bool>(){}
|
||||
__host__ __device__ __forceinline__ less() {}
|
||||
__host__ __device__ __forceinline__ less(const less&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct greater_equal : binary_function<T, T, bool>
|
||||
@ -173,8 +173,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a >= b;
|
||||
}
|
||||
__device__ __forceinline__ greater_equal(const greater_equal& other):binary_function<T,T,bool>(){}
|
||||
__device__ __forceinline__ greater_equal():binary_function<T,T,bool>(){}
|
||||
__host__ __device__ __forceinline__ greater_equal() {}
|
||||
__host__ __device__ __forceinline__ greater_equal(const greater_equal&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct less_equal : binary_function<T, T, bool>
|
||||
@ -184,8 +184,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a <= b;
|
||||
}
|
||||
__device__ __forceinline__ less_equal(const less_equal& other):binary_function<T,T,bool>(){}
|
||||
__device__ __forceinline__ less_equal():binary_function<T,T,bool>(){}
|
||||
__host__ __device__ __forceinline__ less_equal() {}
|
||||
__host__ __device__ __forceinline__ less_equal(const less_equal&) {}
|
||||
};
|
||||
|
||||
// Logical Operations
|
||||
@ -196,8 +196,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a && b;
|
||||
}
|
||||
__device__ __forceinline__ logical_and(const logical_and& other):binary_function<T,T,bool>(){}
|
||||
__device__ __forceinline__ logical_and():binary_function<T,T,bool>(){}
|
||||
__host__ __device__ __forceinline__ logical_and() {}
|
||||
__host__ __device__ __forceinline__ logical_and(const logical_and&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct logical_or : binary_function<T, T, bool>
|
||||
@ -207,8 +207,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a || b;
|
||||
}
|
||||
__device__ __forceinline__ logical_or(const logical_or& other):binary_function<T,T,bool>(){}
|
||||
__device__ __forceinline__ logical_or():binary_function<T,T,bool>(){}
|
||||
__host__ __device__ __forceinline__ logical_or() {}
|
||||
__host__ __device__ __forceinline__ logical_or(const logical_or&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct logical_not : unary_function<T, bool>
|
||||
@ -217,8 +217,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return !a;
|
||||
}
|
||||
__device__ __forceinline__ logical_not(const logical_not& other):unary_function<T,bool>(){}
|
||||
__device__ __forceinline__ logical_not():unary_function<T,bool>(){}
|
||||
__host__ __device__ __forceinline__ logical_not() {}
|
||||
__host__ __device__ __forceinline__ logical_not(const logical_not&) {}
|
||||
};
|
||||
|
||||
// Bitwise Operations
|
||||
@ -229,8 +229,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a & b;
|
||||
}
|
||||
__device__ __forceinline__ bit_and(const bit_and& other):binary_function<T,T,T>(){}
|
||||
__device__ __forceinline__ bit_and():binary_function<T,T,T>(){}
|
||||
__host__ __device__ __forceinline__ bit_and() {}
|
||||
__host__ __device__ __forceinline__ bit_and(const bit_and&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct bit_or : binary_function<T, T, T>
|
||||
@ -240,8 +240,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a | b;
|
||||
}
|
||||
__device__ __forceinline__ bit_or(const bit_or& other):binary_function<T,T,T>(){}
|
||||
__device__ __forceinline__ bit_or():binary_function<T,T,T>(){}
|
||||
__host__ __device__ __forceinline__ bit_or() {}
|
||||
__host__ __device__ __forceinline__ bit_or(const bit_or&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct bit_xor : binary_function<T, T, T>
|
||||
@ -251,8 +251,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return a ^ b;
|
||||
}
|
||||
__device__ __forceinline__ bit_xor(const bit_xor& other):binary_function<T,T,T>(){}
|
||||
__device__ __forceinline__ bit_xor():binary_function<T,T,T>(){}
|
||||
__host__ __device__ __forceinline__ bit_xor() {}
|
||||
__host__ __device__ __forceinline__ bit_xor(const bit_xor&) {}
|
||||
};
|
||||
|
||||
template <typename T> struct bit_not : unary_function<T, T>
|
||||
@ -261,8 +261,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return ~v;
|
||||
}
|
||||
__device__ __forceinline__ bit_not(const bit_not& other):unary_function<T,T>(){}
|
||||
__device__ __forceinline__ bit_not():unary_function<T,T>(){}
|
||||
__host__ __device__ __forceinline__ bit_not() {}
|
||||
__host__ __device__ __forceinline__ bit_not(const bit_not&) {}
|
||||
};
|
||||
|
||||
// Generalized Identity Operations
|
||||
@ -272,8 +272,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return x;
|
||||
}
|
||||
__device__ __forceinline__ identity(const identity& other):unary_function<T,T>(){}
|
||||
__device__ __forceinline__ identity():unary_function<T,T>(){}
|
||||
__host__ __device__ __forceinline__ identity() {}
|
||||
__host__ __device__ __forceinline__ identity(const identity&) {}
|
||||
};
|
||||
|
||||
template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
|
||||
@ -282,8 +282,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return lhs;
|
||||
}
|
||||
__device__ __forceinline__ project1st(const project1st& other):binary_function<T1,T2,T1>(){}
|
||||
__device__ __forceinline__ project1st():binary_function<T1,T2,T1>(){}
|
||||
__host__ __device__ __forceinline__ project1st() {}
|
||||
__host__ __device__ __forceinline__ project1st(const project1st&) {}
|
||||
};
|
||||
|
||||
template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
|
||||
@ -292,8 +292,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return rhs;
|
||||
}
|
||||
__device__ __forceinline__ project2nd(const project2nd& other):binary_function<T1,T2,T2>(){}
|
||||
__device__ __forceinline__ project2nd():binary_function<T1,T2,T2>(){}
|
||||
__host__ __device__ __forceinline__ project2nd() {}
|
||||
__host__ __device__ __forceinline__ project2nd(const project2nd&) {}
|
||||
};
|
||||
|
||||
// Min/Max Operations
|
||||
@ -302,8 +302,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
template <> struct name<type> : binary_function<type, type, type> \
|
||||
{ \
|
||||
__device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
|
||||
__device__ __forceinline__ name() {}\
|
||||
__device__ __forceinline__ name(const name&) {}\
|
||||
__host__ __device__ __forceinline__ name() {}\
|
||||
__host__ __device__ __forceinline__ name(const name&) {}\
|
||||
};
|
||||
|
||||
template <typename T> struct maximum : binary_function<T, T, T>
|
||||
@ -312,8 +312,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return max(lhs, rhs);
|
||||
}
|
||||
__device__ __forceinline__ maximum() {}
|
||||
__device__ __forceinline__ maximum(const maximum&) {}
|
||||
__host__ __device__ __forceinline__ maximum() {}
|
||||
__host__ __device__ __forceinline__ maximum(const maximum&) {}
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, ::max)
|
||||
@ -332,8 +332,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return min(lhs, rhs);
|
||||
}
|
||||
__device__ __forceinline__ minimum() {}
|
||||
__device__ __forceinline__ minimum(const minimum&) {}
|
||||
__host__ __device__ __forceinline__ minimum() {}
|
||||
__host__ __device__ __forceinline__ minimum(const minimum&) {}
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, ::min)
|
||||
@ -349,7 +349,6 @@ namespace cv { namespace gpu { namespace cudev
|
||||
#undef OPENCV_GPU_IMPLEMENT_MINMAX
|
||||
|
||||
// Math functions
|
||||
///bound=========================================
|
||||
|
||||
template <typename T> struct abs_func : unary_function<T, T>
|
||||
{
|
||||
@ -358,8 +357,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return abs(x);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ abs_func() {}
|
||||
__device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
|
||||
{
|
||||
@ -368,8 +367,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return x;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ abs_func() {}
|
||||
__device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<signed char> : unary_function<signed char, signed char>
|
||||
{
|
||||
@ -378,8 +377,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return ::abs((int)x);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ abs_func() {}
|
||||
__device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<char> : unary_function<char, char>
|
||||
{
|
||||
@ -388,8 +387,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return ::abs((int)x);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ abs_func() {}
|
||||
__device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
|
||||
{
|
||||
@ -398,8 +397,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return x;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ abs_func() {}
|
||||
__device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<short> : unary_function<short, short>
|
||||
{
|
||||
@ -408,8 +407,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return ::abs((int)x);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ abs_func() {}
|
||||
__device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
|
||||
{
|
||||
@ -418,8 +417,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return x;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ abs_func() {}
|
||||
__device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<int> : unary_function<int, int>
|
||||
{
|
||||
@ -428,8 +427,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return ::abs(x);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ abs_func() {}
|
||||
__device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<float> : unary_function<float, float>
|
||||
{
|
||||
@ -438,8 +437,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return ::fabsf(x);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ abs_func() {}
|
||||
__device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
template <> struct abs_func<double> : unary_function<double, double>
|
||||
{
|
||||
@ -448,8 +447,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return ::fabs(x);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ abs_func() {}
|
||||
__device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
__host__ __device__ __forceinline__ abs_func() {}
|
||||
__host__ __device__ __forceinline__ abs_func(const abs_func&) {}
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(name, func) \
|
||||
@ -459,8 +458,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{ \
|
||||
return func ## f(v); \
|
||||
} \
|
||||
__device__ __forceinline__ name ## _func() {} \
|
||||
__device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
}; \
|
||||
template <> struct name ## _func<double> : unary_function<double, double> \
|
||||
{ \
|
||||
@ -468,8 +467,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{ \
|
||||
return func(v); \
|
||||
} \
|
||||
__device__ __forceinline__ name ## _func() {} \
|
||||
__device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
};
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(name, func) \
|
||||
@ -479,6 +478,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{ \
|
||||
return func ## f(v1, v2); \
|
||||
} \
|
||||
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
}; \
|
||||
template <> struct name ## _func<double> : binary_function<double, double, double> \
|
||||
{ \
|
||||
@ -486,6 +487,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{ \
|
||||
return func(v1, v2); \
|
||||
} \
|
||||
__host__ __device__ __forceinline__ name ## _func() {} \
|
||||
__host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
|
||||
@ -522,8 +525,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return src1 * src1 + src2 * src2;
|
||||
}
|
||||
__device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func& other) : binary_function<T, T, float>(){}
|
||||
__device__ __forceinline__ hypot_sqr_func() : binary_function<T, T, float>(){}
|
||||
__host__ __device__ __forceinline__ hypot_sqr_func() {}
|
||||
__host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {}
|
||||
};
|
||||
|
||||
// Saturate Cast Functor
|
||||
@ -533,8 +536,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return saturate_cast<D>(v);
|
||||
}
|
||||
__device__ __forceinline__ saturate_cast_func(const saturate_cast_func& other):unary_function<T, D>(){}
|
||||
__device__ __forceinline__ saturate_cast_func():unary_function<T, D>(){}
|
||||
__host__ __device__ __forceinline__ saturate_cast_func() {}
|
||||
__host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {}
|
||||
};
|
||||
|
||||
// Threshold Functors
|
||||
@ -547,10 +550,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return (src > thresh) * maxVal;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
|
||||
: unary_function<T, T>(), thresh(other.thresh), maxVal(other.maxVal){}
|
||||
|
||||
__device__ __forceinline__ thresh_binary_func():unary_function<T, T>(){}
|
||||
__host__ __device__ __forceinline__ thresh_binary_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
|
||||
: thresh(other.thresh), maxVal(other.maxVal) {}
|
||||
|
||||
const T thresh;
|
||||
const T maxVal;
|
||||
@ -565,10 +567,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return (src <= thresh) * maxVal;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
|
||||
: unary_function<T, T>(), thresh(other.thresh), maxVal(other.maxVal){}
|
||||
|
||||
__device__ __forceinline__ thresh_binary_inv_func():unary_function<T, T>(){}
|
||||
__host__ __device__ __forceinline__ thresh_binary_inv_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
|
||||
: thresh(other.thresh), maxVal(other.maxVal) {}
|
||||
|
||||
const T thresh;
|
||||
const T maxVal;
|
||||
@ -583,10 +584,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return minimum<T>()(src, thresh);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
|
||||
: unary_function<T, T>(), thresh(other.thresh){}
|
||||
|
||||
__device__ __forceinline__ thresh_trunc_func():unary_function<T, T>(){}
|
||||
__host__ __device__ __forceinline__ thresh_trunc_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
|
||||
: thresh(other.thresh) {}
|
||||
|
||||
const T thresh;
|
||||
};
|
||||
@ -599,10 +599,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return (src > thresh) * src;
|
||||
}
|
||||
__device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
|
||||
: unary_function<T, T>(), thresh(other.thresh){}
|
||||
|
||||
__device__ __forceinline__ thresh_to_zero_func():unary_function<T, T>(){}
|
||||
__host__ __device__ __forceinline__ thresh_to_zero_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
|
||||
: thresh(other.thresh) {}
|
||||
|
||||
const T thresh;
|
||||
};
|
||||
@ -615,14 +615,14 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return (src <= thresh) * src;
|
||||
}
|
||||
__device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
|
||||
: unary_function<T, T>(), thresh(other.thresh){}
|
||||
|
||||
__device__ __forceinline__ thresh_to_zero_inv_func():unary_function<T, T>(){}
|
||||
__host__ __device__ __forceinline__ thresh_to_zero_inv_func() {}
|
||||
__host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
|
||||
: thresh(other.thresh) {}
|
||||
|
||||
const T thresh;
|
||||
};
|
||||
//bound!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ============>
|
||||
|
||||
// Function Object Adaptors
|
||||
template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
|
||||
{
|
||||
@ -633,8 +633,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return !pred(x);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ unary_negate(const unary_negate& other) : unary_function<typename Predicate::argument_type, bool>(){}
|
||||
__device__ __forceinline__ unary_negate() : unary_function<typename Predicate::argument_type, bool>(){}
|
||||
__host__ __device__ __forceinline__ unary_negate() {}
|
||||
__host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}
|
||||
|
||||
const Predicate pred;
|
||||
};
|
||||
@ -653,11 +653,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
return !pred(x,y);
|
||||
}
|
||||
__device__ __forceinline__ binary_negate(const binary_negate& other)
|
||||
: binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>(){}
|
||||
|
||||
__device__ __forceinline__ binary_negate() :
|
||||
binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>(){}
|
||||
__host__ __device__ __forceinline__ binary_negate() {}
|
||||
__host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}
|
||||
|
||||
const Predicate pred;
|
||||
};
|
||||
@ -676,8 +674,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return op(arg1, a);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ binder1st(const binder1st& other) :
|
||||
unary_function<typename Op::second_argument_type, typename Op::result_type>(){}
|
||||
__host__ __device__ __forceinline__ binder1st() {}
|
||||
__host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}
|
||||
|
||||
const Op op;
|
||||
const typename Op::first_argument_type arg1;
|
||||
@ -697,8 +695,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return op(a, arg2);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ binder2nd(const binder2nd& other) :
|
||||
unary_function<typename Op::first_argument_type, typename Op::result_type>(), op(other.op), arg2(other.arg2){}
|
||||
__host__ __device__ __forceinline__ binder2nd() {}
|
||||
__host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}
|
||||
|
||||
const Op op;
|
||||
const typename Op::second_argument_type arg2;
|
||||
|
@ -124,8 +124,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
struct WithOutMask
|
||||
{
|
||||
__device__ __forceinline__ WithOutMask(){}
|
||||
__device__ __forceinline__ WithOutMask(const WithOutMask& mask){}
|
||||
__host__ __device__ __forceinline__ WithOutMask(){}
|
||||
__host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}
|
||||
|
||||
__device__ __forceinline__ void next() const
|
||||
{
|
||||
|
@ -212,7 +212,7 @@ static void keepStrongest( int N, std::vector<KeyPoint>& keypoints )
|
||||
}
|
||||
|
||||
namespace {
|
||||
class GridAdaptedFeatureDetectorInvoker
|
||||
class GridAdaptedFeatureDetectorInvoker : public ParallelLoopBody
|
||||
{
|
||||
private:
|
||||
int gridRows_, gridCols_;
|
||||
@ -221,29 +221,24 @@ private:
|
||||
const Mat& image_;
|
||||
const Mat& mask_;
|
||||
const Ptr<FeatureDetector>& detector_;
|
||||
#ifdef HAVE_TBB
|
||||
tbb::mutex* kptLock_;
|
||||
#endif
|
||||
Mutex* kptLock_;
|
||||
|
||||
GridAdaptedFeatureDetectorInvoker& operator=(const GridAdaptedFeatureDetectorInvoker&); // to quiet MSVC
|
||||
|
||||
public:
|
||||
|
||||
GridAdaptedFeatureDetectorInvoker(const Ptr<FeatureDetector>& detector, const Mat& image, const Mat& mask, std::vector<KeyPoint>& keypoints, int maxPerCell, int gridRows, int gridCols
|
||||
#ifdef HAVE_TBB
|
||||
, tbb::mutex* kptLock
|
||||
#endif
|
||||
) : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell),
|
||||
keypoints_(keypoints), image_(image), mask_(mask), detector_(detector)
|
||||
#ifdef HAVE_TBB
|
||||
, kptLock_(kptLock)
|
||||
#endif
|
||||
GridAdaptedFeatureDetectorInvoker(const Ptr<FeatureDetector>& detector, const Mat& image, const Mat& mask,
|
||||
std::vector<KeyPoint>& keypoints, int maxPerCell, int gridRows, int gridCols,
|
||||
cv::Mutex* kptLock)
|
||||
: gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell),
|
||||
keypoints_(keypoints), image_(image), mask_(mask), detector_(detector),
|
||||
kptLock_(kptLock)
|
||||
{
|
||||
}
|
||||
|
||||
void operator() (const BlockedRange& range) const
|
||||
void operator() (const Range& range) const
|
||||
{
|
||||
for (int i = range.begin(); i < range.end(); ++i)
|
||||
for (int i = range.start; i < range.end; ++i)
|
||||
{
|
||||
int celly = i / gridCols_;
|
||||
int cellx = i - celly * gridCols_;
|
||||
@ -268,9 +263,8 @@ public:
|
||||
it->pt.x += col_range.start;
|
||||
it->pt.y += row_range.start;
|
||||
}
|
||||
#ifdef HAVE_TBB
|
||||
tbb::mutex::scoped_lock join_keypoints(*kptLock_);
|
||||
#endif
|
||||
|
||||
cv::AutoLock join_keypoints(*kptLock_);
|
||||
keypoints_.insert( keypoints_.end(), sub_keypoints.begin(), sub_keypoints.end() );
|
||||
}
|
||||
}
|
||||
@ -287,13 +281,9 @@ void GridAdaptedFeatureDetector::detectImpl( const Mat& image, std::vector<KeyPo
|
||||
keypoints.reserve(maxTotalKeypoints);
|
||||
int maxPerCell = maxTotalKeypoints / (gridRows * gridCols);
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
tbb::mutex kptLock;
|
||||
cv::parallel_for(cv::BlockedRange(0, gridRows * gridCols),
|
||||
cv::Mutex kptLock;
|
||||
cv::parallel_for_(cv::Range(0, gridRows * gridCols),
|
||||
GridAdaptedFeatureDetectorInvoker(detector, image, mask, keypoints, maxPerCell, gridRows, gridCols, &kptLock));
|
||||
#else
|
||||
GridAdaptedFeatureDetectorInvoker(detector, image, mask, keypoints, maxPerCell, gridRows, gridCols)(cv::BlockedRange(0, gridRows * gridCols));
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -38,7 +38,7 @@ if(HAVE_CUDA)
|
||||
|
||||
ocv_cuda_compile(cuda_objs ${lib_cuda} ${ncv_cuda})
|
||||
|
||||
set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
|
||||
set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY} ${CUDA_npp_LIBRARY})
|
||||
|
||||
if(WITH_NVCUVID)
|
||||
set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvid_LIBRARY})
|
||||
|
@ -150,7 +150,7 @@ namespace
|
||||
}
|
||||
|
||||
// Computes rotation, translation pair for small subsets if the input data
|
||||
class TransformHypothesesGenerator
|
||||
class TransformHypothesesGenerator : public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
|
||||
@ -160,7 +160,7 @@ namespace
|
||||
num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
|
||||
transl_vectors(transl_vectors_) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
// Input data for generation of the current hypothesis
|
||||
std::vector<int> subset_indices(subset_size);
|
||||
@ -172,7 +172,7 @@ namespace
|
||||
Mat rot_mat(3, 3, CV_64F);
|
||||
Mat transl_vec(1, 3, CV_64F);
|
||||
|
||||
for (int iter = range.begin(); iter < range.end(); ++iter)
|
||||
for (int iter = range.start; iter < range.end; ++iter)
|
||||
{
|
||||
selectRandom(subset_size, num_points, subset_indices);
|
||||
for (int i = 0; i < subset_size; ++i)
|
||||
@ -238,7 +238,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
|
||||
// Generate set of hypotheses using small subsets of the input data
|
||||
TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
|
||||
num_points, subset_size, rot_matrices, transl_vectors);
|
||||
parallel_for(BlockedRange(0, num_iters), body);
|
||||
parallel_for_(Range(0, num_iters), body);
|
||||
|
||||
// Compute scores (i.e. number of inliers) for each hypothesis
|
||||
GpuMat d_object(object);
|
||||
|
@ -67,8 +67,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y,
|
||||
crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z);
|
||||
}
|
||||
__device__ __forceinline__ TransformOp() {}
|
||||
__device__ __forceinline__ TransformOp(const TransformOp&) {}
|
||||
__host__ __device__ __forceinline__ TransformOp() {}
|
||||
__host__ __device__ __forceinline__ TransformOp(const TransformOp&) {}
|
||||
};
|
||||
|
||||
void call(const PtrStepSz<float3> src, const float* rot,
|
||||
@ -106,8 +106,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
(cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z,
|
||||
(cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z);
|
||||
}
|
||||
__device__ __forceinline__ ProjectOp() {}
|
||||
__device__ __forceinline__ ProjectOp(const ProjectOp&) {}
|
||||
__host__ __device__ __forceinline__ ProjectOp() {}
|
||||
__host__ __device__ __forceinline__ ProjectOp(const ProjectOp&) {}
|
||||
};
|
||||
|
||||
void call(const PtrStepSz<float3> src, const float* rot,
|
||||
|
@ -62,8 +62,8 @@ namespace canny
|
||||
return ::abs(x) + ::abs(y);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ L1() {}
|
||||
__device__ __forceinline__ L1(const L1&) {}
|
||||
__host__ __device__ __forceinline__ L1() {}
|
||||
__host__ __device__ __forceinline__ L1(const L1&) {}
|
||||
};
|
||||
struct L2 : binary_function<int, int, float>
|
||||
{
|
||||
@ -72,8 +72,8 @@ namespace canny
|
||||
return ::sqrtf(x * x + y * y);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ L2() {}
|
||||
__device__ __forceinline__ L2(const L2&) {}
|
||||
__host__ __device__ __forceinline__ L2() {}
|
||||
__host__ __device__ __forceinline__ L2(const L2&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -470,8 +470,8 @@ namespace canny
|
||||
return (uchar)(-(e >> 1));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ GetEdges() {}
|
||||
__device__ __forceinline__ GetEdges(const GetEdges&) {}
|
||||
__host__ __device__ __forceinline__ GetEdges() {}
|
||||
__host__ __device__ __forceinline__ GetEdges(const GetEdges&) {}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -162,8 +162,8 @@ namespace arithm
|
||||
return vadd4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAdd4() {}
|
||||
__device__ __forceinline__ VAdd4(const VAdd4& other) {}
|
||||
__host__ __device__ __forceinline__ VAdd4() {}
|
||||
__host__ __device__ __forceinline__ VAdd4(const VAdd4&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@ -175,8 +175,8 @@ namespace arithm
|
||||
return vadd2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAdd2() {}
|
||||
__device__ __forceinline__ VAdd2(const VAdd2& other) {}
|
||||
__host__ __device__ __forceinline__ VAdd2() {}
|
||||
__host__ __device__ __forceinline__ VAdd2(const VAdd2&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@ -188,8 +188,8 @@ namespace arithm
|
||||
return saturate_cast<D>(a + b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ AddMat() {}
|
||||
__device__ __forceinline__ AddMat(const AddMat& other) {}
|
||||
__host__ __device__ __forceinline__ AddMat() {}
|
||||
__host__ __device__ __forceinline__ AddMat(const AddMat&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -397,8 +397,8 @@ namespace arithm
|
||||
return vsub4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VSub4() {}
|
||||
__device__ __forceinline__ VSub4(const VSub4& other) {}
|
||||
__host__ __device__ __forceinline__ VSub4() {}
|
||||
__host__ __device__ __forceinline__ VSub4(const VSub4&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@ -410,8 +410,8 @@ namespace arithm
|
||||
return vsub2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VSub2() {}
|
||||
__device__ __forceinline__ VSub2(const VSub2& other) {}
|
||||
__host__ __device__ __forceinline__ VSub2() {}
|
||||
__host__ __device__ __forceinline__ VSub2(const VSub2&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@ -423,8 +423,8 @@ namespace arithm
|
||||
return saturate_cast<D>(a - b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ SubMat() {}
|
||||
__device__ __forceinline__ SubMat(const SubMat& other) {}
|
||||
__host__ __device__ __forceinline__ SubMat() {}
|
||||
__host__ __device__ __forceinline__ SubMat(const SubMat&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -617,8 +617,8 @@ namespace arithm
|
||||
return res;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Mul_8uc4_32f() {}
|
||||
__device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {}
|
||||
__host__ __device__ __forceinline__ Mul_8uc4_32f() {}
|
||||
__host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {}
|
||||
};
|
||||
|
||||
struct Mul_16sc4_32f : binary_function<short4, float, short4>
|
||||
@ -629,8 +629,8 @@ namespace arithm
|
||||
saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Mul_16sc4_32f() {}
|
||||
__device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {}
|
||||
__host__ __device__ __forceinline__ Mul_16sc4_32f() {}
|
||||
__host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {}
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct Mul : binary_function<T, T, D>
|
||||
@ -640,8 +640,8 @@ namespace arithm
|
||||
return saturate_cast<D>(a * b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Mul() {}
|
||||
__device__ __forceinline__ Mul(const Mul& other) {}
|
||||
__host__ __device__ __forceinline__ Mul() {}
|
||||
__host__ __device__ __forceinline__ Mul(const Mul&) {}
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
|
||||
@ -888,8 +888,8 @@ namespace arithm
|
||||
return b != 0 ? saturate_cast<D>(a / b) : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Div() {}
|
||||
__device__ __forceinline__ Div(const Div& other) {}
|
||||
__host__ __device__ __forceinline__ Div() {}
|
||||
__host__ __device__ __forceinline__ Div(const Div&) {}
|
||||
};
|
||||
template <typename T> struct Div<T, float> : binary_function<T, T, float>
|
||||
{
|
||||
@ -898,8 +898,8 @@ namespace arithm
|
||||
return b != 0 ? static_cast<float>(a) / b : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Div() {}
|
||||
__device__ __forceinline__ Div(const Div& other) {}
|
||||
__host__ __device__ __forceinline__ Div() {}
|
||||
__host__ __device__ __forceinline__ Div(const Div&) {}
|
||||
};
|
||||
template <typename T> struct Div<T, double> : binary_function<T, T, double>
|
||||
{
|
||||
@ -908,8 +908,8 @@ namespace arithm
|
||||
return b != 0 ? static_cast<double>(a) / b : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Div() {}
|
||||
__device__ __forceinline__ Div(const Div& other) {}
|
||||
__host__ __device__ __forceinline__ Div() {}
|
||||
__host__ __device__ __forceinline__ Div(const Div&) {}
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
|
||||
@ -1196,8 +1196,8 @@ namespace arithm
|
||||
return vabsdiff4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAbsDiff4() {}
|
||||
__device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {}
|
||||
__host__ __device__ __forceinline__ VAbsDiff4() {}
|
||||
__host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@ -1209,8 +1209,8 @@ namespace arithm
|
||||
return vabsdiff2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAbsDiff2() {}
|
||||
__device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {}
|
||||
__host__ __device__ __forceinline__ VAbsDiff2() {}
|
||||
__host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@ -1235,8 +1235,8 @@ namespace arithm
|
||||
return saturate_cast<T>(_abs(a - b));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ AbsDiffMat() {}
|
||||
__device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {}
|
||||
__host__ __device__ __forceinline__ AbsDiffMat() {}
|
||||
__host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -1370,8 +1370,8 @@ namespace arithm
|
||||
return saturate_cast<T>(x * x);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Sqr() {}
|
||||
__device__ __forceinline__ Sqr(const Sqr& other) {}
|
||||
__host__ __device__ __forceinline__ Sqr() {}
|
||||
__host__ __device__ __forceinline__ Sqr(const Sqr&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -1466,8 +1466,8 @@ namespace arithm
|
||||
return saturate_cast<T>(f(x));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Exp() {}
|
||||
__device__ __forceinline__ Exp(const Exp& other) {}
|
||||
__host__ __device__ __forceinline__ Exp() {}
|
||||
__host__ __device__ __forceinline__ Exp(const Exp&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -1507,8 +1507,8 @@ namespace arithm
|
||||
return vcmpeq4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpEq4() {}
|
||||
__device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {}
|
||||
__host__ __device__ __forceinline__ VCmpEq4() {}
|
||||
__host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {}
|
||||
};
|
||||
struct VCmpNe4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
@ -1517,8 +1517,8 @@ namespace arithm
|
||||
return vcmpne4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpNe4() {}
|
||||
__device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {}
|
||||
__host__ __device__ __forceinline__ VCmpNe4() {}
|
||||
__host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {}
|
||||
};
|
||||
struct VCmpLt4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
@ -1527,8 +1527,8 @@ namespace arithm
|
||||
return vcmplt4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpLt4() {}
|
||||
__device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {}
|
||||
__host__ __device__ __forceinline__ VCmpLt4() {}
|
||||
__host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {}
|
||||
};
|
||||
struct VCmpLe4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
@ -1537,8 +1537,8 @@ namespace arithm
|
||||
return vcmple4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpLe4() {}
|
||||
__device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {}
|
||||
__host__ __device__ __forceinline__ VCmpLe4() {}
|
||||
__host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@ -2008,8 +2008,8 @@ namespace arithm
|
||||
return vmin4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMin4() {}
|
||||
__device__ __forceinline__ VMin4(const VMin4& other) {}
|
||||
__host__ __device__ __forceinline__ VMin4() {}
|
||||
__host__ __device__ __forceinline__ VMin4(const VMin4&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@ -2021,8 +2021,8 @@ namespace arithm
|
||||
return vmin2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMin2() {}
|
||||
__device__ __forceinline__ VMin2(const VMin2& other) {}
|
||||
__host__ __device__ __forceinline__ VMin2() {}
|
||||
__host__ __device__ __forceinline__ VMin2(const VMin2&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -2100,8 +2100,8 @@ namespace arithm
|
||||
return vmax4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMax4() {}
|
||||
__device__ __forceinline__ VMax4(const VMax4& other) {}
|
||||
__host__ __device__ __forceinline__ VMax4() {}
|
||||
__host__ __device__ __forceinline__ VMax4(const VMax4&) {}
|
||||
};
|
||||
|
||||
////////////////////////////////////
|
||||
@ -2113,8 +2113,8 @@ namespace arithm
|
||||
return vmax2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMax2() {}
|
||||
__device__ __forceinline__ VMax2(const VMax2& other) {}
|
||||
__host__ __device__ __forceinline__ VMax2() {}
|
||||
__host__ __device__ __forceinline__ VMax2(const VMax2&) {}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -188,10 +188,20 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
|
||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
|
||||
|
||||
typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
|
||||
NppiSize oSizeROI, Npp64f* pRetVal);
|
||||
#if CUDA_VERSION < 5050
|
||||
typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal);
|
||||
|
||||
static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
|
||||
static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
|
||||
#else
|
||||
typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
|
||||
NppiSize oSizeROI, Npp64f* pRetVal, Npp8u * pDeviceBuffer);
|
||||
|
||||
typedef NppStatus (*buf_size_func_t)(NppiSize oSizeROI, int* hpBufferSize);
|
||||
|
||||
static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R};
|
||||
|
||||
static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R};
|
||||
#endif
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src1.cols;
|
||||
@ -203,7 +213,16 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
|
||||
|
||||
DeviceBuffer dbuf;
|
||||
|
||||
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
|
||||
#if CUDA_VERSION < 5050
|
||||
nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
|
||||
#else
|
||||
int bufSize;
|
||||
buf_size_funcs[funcIdx](sz, &bufSize);
|
||||
|
||||
GpuMat buf(1, bufSize, CV_8UC1);
|
||||
|
||||
nppSafeCall( funcs[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf, buf.data) );
|
||||
#endif
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
|
@ -352,7 +352,7 @@ GPU_TEST_P(Add_Scalar, WithOutMask)
|
||||
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
|
||||
cv::add(mat, val, dst_gold, cv::noArray(), depth.second);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -383,7 +383,7 @@ GPU_TEST_P(Add_Scalar, WithMask)
|
||||
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
|
||||
cv::add(mat, val, dst_gold, mask, depth.second);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -567,7 +567,7 @@ GPU_TEST_P(Subtract_Scalar, WithOutMask)
|
||||
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
|
||||
cv::subtract(mat, val, dst_gold, cv::noArray(), depth.second);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -598,7 +598,7 @@ GPU_TEST_P(Subtract_Scalar, WithMask)
|
||||
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
|
||||
cv::subtract(mat, val, dst_gold, mask, depth.second);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2148,7 +2148,7 @@ GPU_TEST_P(Min, Scalar)
|
||||
|
||||
cv::Mat dst_gold = cv::min(src, val);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2231,7 +2231,7 @@ GPU_TEST_P(Max, Scalar)
|
||||
|
||||
cv::Mat dst_gold = cv::max(src, val);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -113,6 +113,7 @@ If you use ``cvtColor`` with 8-bit images, the conversion will have some informa
|
||||
The function can do the following transformations:
|
||||
|
||||
*
|
||||
RGB :math:`\leftrightarrow` GRAY ( ``CV_BGR2GRAY, CV_RGB2GRAY, CV_GRAY2BGR, CV_GRAY2RGB`` )
|
||||
Transformations within RGB space like adding/removing the alpha channel, reversing the channel order, conversion to/from 16-bit RGB color (R5:G6:B5 or R5:G5:B5), as well as conversion to/from grayscale using:
|
||||
|
||||
.. math::
|
||||
@ -755,7 +756,7 @@ Runs the GrabCut algorithm.
|
||||
|
||||
* **GC_PR_BGD** defines a possible background pixel.
|
||||
|
||||
* **GC_PR_BGD** defines a possible foreground pixel.
|
||||
* **GC_PR_FGD** defines a possible foreground pixel.
|
||||
|
||||
:param rect: ROI containing a segmented object. The pixels outside of the ROI are marked as "obvious background". The parameter is only used when ``mode==GC_INIT_WITH_RECT`` .
|
||||
|
||||
|
334
modules/imgproc/src/clahe.cpp
Normal file
334
modules/imgproc/src/clahe.cpp
Normal file
@ -0,0 +1,334 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the copyright holders or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// CLAHE
|
||||
|
||||
namespace
|
||||
{
|
||||
class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) :
|
||||
src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale)
|
||||
{
|
||||
}
|
||||
|
||||
void operator ()(const cv::Range& range) const;
|
||||
|
||||
private:
|
||||
cv::Mat src_;
|
||||
mutable cv::Mat lut_;
|
||||
|
||||
cv::Size tileSize_;
|
||||
int tilesX_;
|
||||
int tilesY_;
|
||||
int clipLimit_;
|
||||
float lutScale_;
|
||||
};
|
||||
|
||||
void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const
|
||||
{
|
||||
const int histSize = 256;
|
||||
|
||||
uchar* tileLut = lut_.ptr(range.start);
|
||||
const size_t lut_step = lut_.step;
|
||||
|
||||
for (int k = range.start; k < range.end; ++k, tileLut += lut_step)
|
||||
{
|
||||
const int ty = k / tilesX_;
|
||||
const int tx = k % tilesX_;
|
||||
|
||||
// retrieve tile submatrix
|
||||
|
||||
cv::Rect tileROI;
|
||||
tileROI.x = tx * tileSize_.width;
|
||||
tileROI.y = ty * tileSize_.height;
|
||||
tileROI.width = tileSize_.width;
|
||||
tileROI.height = tileSize_.height;
|
||||
|
||||
const cv::Mat tile = src_(tileROI);
|
||||
|
||||
// calc histogram
|
||||
|
||||
int tileHist[histSize] = {0, };
|
||||
|
||||
int height = tileROI.height;
|
||||
const size_t sstep = tile.step;
|
||||
for (const uchar* ptr = tile.ptr<uchar>(0); height--; ptr += sstep)
|
||||
{
|
||||
int x = 0;
|
||||
for (; x <= tileROI.width - 4; x += 4)
|
||||
{
|
||||
int t0 = ptr[x], t1 = ptr[x+1];
|
||||
tileHist[t0]++; tileHist[t1]++;
|
||||
t0 = ptr[x+2]; t1 = ptr[x+3];
|
||||
tileHist[t0]++; tileHist[t1]++;
|
||||
}
|
||||
|
||||
for (; x < tileROI.width; ++x)
|
||||
tileHist[ptr[x]]++;
|
||||
}
|
||||
|
||||
// clip histogram
|
||||
|
||||
if (clipLimit_ > 0)
|
||||
{
|
||||
// how many pixels were clipped
|
||||
int clipped = 0;
|
||||
for (int i = 0; i < histSize; ++i)
|
||||
{
|
||||
if (tileHist[i] > clipLimit_)
|
||||
{
|
||||
clipped += tileHist[i] - clipLimit_;
|
||||
tileHist[i] = clipLimit_;
|
||||
}
|
||||
}
|
||||
|
||||
// redistribute clipped pixels
|
||||
int redistBatch = clipped / histSize;
|
||||
int residual = clipped - redistBatch * histSize;
|
||||
|
||||
for (int i = 0; i < histSize; ++i)
|
||||
tileHist[i] += redistBatch;
|
||||
|
||||
for (int i = 0; i < residual; ++i)
|
||||
tileHist[i]++;
|
||||
}
|
||||
|
||||
// calc Lut
|
||||
|
||||
int sum = 0;
|
||||
for (int i = 0; i < histSize; ++i)
|
||||
{
|
||||
sum += tileHist[i];
|
||||
tileLut[i] = cv::saturate_cast<uchar>(sum * lutScale_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class CLAHE_Interpolation_Body : public cv::ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) :
|
||||
src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
|
||||
{
|
||||
}
|
||||
|
||||
void operator ()(const cv::Range& range) const;
|
||||
|
||||
private:
|
||||
cv::Mat src_;
|
||||
mutable cv::Mat dst_;
|
||||
cv::Mat lut_;
|
||||
|
||||
cv::Size tileSize_;
|
||||
int tilesX_;
|
||||
int tilesY_;
|
||||
};
|
||||
|
||||
void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const
|
||||
{
|
||||
const size_t lut_step = lut_.step;
|
||||
|
||||
for (int y = range.start; y < range.end; ++y)
|
||||
{
|
||||
const uchar* srcRow = src_.ptr<uchar>(y);
|
||||
uchar* dstRow = dst_.ptr<uchar>(y);
|
||||
|
||||
const float tyf = (static_cast<float>(y) / tileSize_.height) - 0.5f;
|
||||
|
||||
int ty1 = cvFloor(tyf);
|
||||
int ty2 = ty1 + 1;
|
||||
|
||||
const float ya = tyf - ty1;
|
||||
|
||||
ty1 = std::max(ty1, 0);
|
||||
ty2 = std::min(ty2, tilesY_ - 1);
|
||||
|
||||
const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_);
|
||||
const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_);
|
||||
|
||||
for (int x = 0; x < src_.cols; ++x)
|
||||
{
|
||||
const float txf = (static_cast<float>(x) / tileSize_.width) - 0.5f;
|
||||
|
||||
int tx1 = cvFloor(txf);
|
||||
int tx2 = tx1 + 1;
|
||||
|
||||
const float xa = txf - tx1;
|
||||
|
||||
tx1 = std::max(tx1, 0);
|
||||
tx2 = std::min(tx2, tilesX_ - 1);
|
||||
|
||||
const int srcVal = srcRow[x];
|
||||
|
||||
const size_t ind1 = tx1 * lut_step + srcVal;
|
||||
const size_t ind2 = tx2 * lut_step + srcVal;
|
||||
|
||||
float res = 0;
|
||||
|
||||
res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya));
|
||||
res += lutPlane1[ind2] * ((xa) * (1.0f - ya));
|
||||
res += lutPlane2[ind1] * ((1.0f - xa) * (ya));
|
||||
res += lutPlane2[ind2] * ((xa) * (ya));
|
||||
|
||||
dstRow[x] = cv::saturate_cast<uchar>(res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class CLAHE_Impl : public cv::CLAHE
|
||||
{
|
||||
public:
|
||||
CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
|
||||
|
||||
cv::AlgorithmInfo* info() const;
|
||||
|
||||
void apply(cv::InputArray src, cv::OutputArray dst);
|
||||
|
||||
void setClipLimit(double clipLimit);
|
||||
double getClipLimit() const;
|
||||
|
||||
void setTilesGridSize(cv::Size tileGridSize);
|
||||
cv::Size getTilesGridSize() const;
|
||||
|
||||
void collectGarbage();
|
||||
|
||||
private:
|
||||
double clipLimit_;
|
||||
int tilesX_;
|
||||
int tilesY_;
|
||||
|
||||
cv::Mat srcExt_;
|
||||
cv::Mat lut_;
|
||||
};
|
||||
|
||||
CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
|
||||
clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
|
||||
{
|
||||
}
|
||||
|
||||
CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE",
|
||||
obj.info()->addParam(obj, "clipLimit", obj.clipLimit_);
|
||||
obj.info()->addParam(obj, "tilesX", obj.tilesX_);
|
||||
obj.info()->addParam(obj, "tilesY", obj.tilesY_))
|
||||
|
||||
void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
|
||||
{
|
||||
cv::Mat src = _src.getMat();
|
||||
|
||||
CV_Assert( src.type() == CV_8UC1 );
|
||||
|
||||
_dst.create( src.size(), src.type() );
|
||||
cv::Mat dst = _dst.getMat();
|
||||
|
||||
const int histSize = 256;
|
||||
|
||||
lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1);
|
||||
|
||||
cv::Size tileSize;
|
||||
cv::Mat srcForLut;
|
||||
|
||||
if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
|
||||
{
|
||||
tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
|
||||
srcForLut = src;
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101);
|
||||
|
||||
tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
|
||||
srcForLut = srcExt_;
|
||||
}
|
||||
|
||||
const int tileSizeTotal = tileSize.area();
|
||||
const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
|
||||
|
||||
int clipLimit = 0;
|
||||
if (clipLimit_ > 0.0)
|
||||
{
|
||||
clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
|
||||
clipLimit = std::max(clipLimit, 1);
|
||||
}
|
||||
|
||||
CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale);
|
||||
cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody);
|
||||
|
||||
CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_);
|
||||
cv::parallel_for_(cv::Range(0, src.rows), interpolationBody);
|
||||
}
|
||||
|
||||
void CLAHE_Impl::setClipLimit(double clipLimit)
|
||||
{
|
||||
clipLimit_ = clipLimit;
|
||||
}
|
||||
|
||||
double CLAHE_Impl::getClipLimit() const
|
||||
{
|
||||
return clipLimit_;
|
||||
}
|
||||
|
||||
void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
|
||||
{
|
||||
tilesX_ = tileGridSize.width;
|
||||
tilesY_ = tileGridSize.height;
|
||||
}
|
||||
|
||||
cv::Size CLAHE_Impl::getTilesGridSize() const
|
||||
{
|
||||
return cv::Size(tilesX_, tilesY_);
|
||||
}
|
||||
|
||||
void CLAHE_Impl::collectGarbage()
|
||||
{
|
||||
srcExt_.release();
|
||||
lut_.release();
|
||||
}
|
||||
}
|
||||
|
||||
cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize)
|
||||
{
|
||||
return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
|
||||
}
|
@ -1815,7 +1815,7 @@ const int ITUR_BT_601_CGV = -385875;
|
||||
const int ITUR_BT_601_CBV = -74448;
|
||||
|
||||
template<int bIdx, int uIdx>
|
||||
struct YUV420sp2RGB888Invoker
|
||||
struct YUV420sp2RGB888Invoker : ParallelLoopBody
|
||||
{
|
||||
Mat* dst;
|
||||
const uchar* my1, *muv;
|
||||
@ -1824,10 +1824,10 @@ struct YUV420sp2RGB888Invoker
|
||||
YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
|
||||
: dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
int rangeBegin = range.begin() * 2;
|
||||
int rangeEnd = range.end() * 2;
|
||||
int rangeBegin = range.start * 2;
|
||||
int rangeEnd = range.end * 2;
|
||||
|
||||
//R = 1.164(Y - 16) + 1.596(V - 128)
|
||||
//G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
|
||||
@ -1884,7 +1884,7 @@ struct YUV420sp2RGB888Invoker
|
||||
};
|
||||
|
||||
template<int bIdx, int uIdx>
|
||||
struct YUV420sp2RGBA8888Invoker
|
||||
struct YUV420sp2RGBA8888Invoker : ParallelLoopBody
|
||||
{
|
||||
Mat* dst;
|
||||
const uchar* my1, *muv;
|
||||
@ -1893,10 +1893,10 @@ struct YUV420sp2RGBA8888Invoker
|
||||
YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
|
||||
: dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
int rangeBegin = range.begin() * 2;
|
||||
int rangeEnd = range.end() * 2;
|
||||
int rangeBegin = range.start * 2;
|
||||
int rangeEnd = range.end * 2;
|
||||
|
||||
//R = 1.164(Y - 16) + 1.596(V - 128)
|
||||
//G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
|
||||
@ -1957,7 +1957,7 @@ struct YUV420sp2RGBA8888Invoker
|
||||
};
|
||||
|
||||
template<int bIdx>
|
||||
struct YUV420p2RGB888Invoker
|
||||
struct YUV420p2RGB888Invoker : ParallelLoopBody
|
||||
{
|
||||
Mat* dst;
|
||||
const uchar* my1, *mu, *mv;
|
||||
@ -1967,19 +1967,19 @@ struct YUV420p2RGB888Invoker
|
||||
YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
|
||||
: dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
const int rangeBegin = range.begin() * 2;
|
||||
const int rangeEnd = range.end() * 2;
|
||||
const int rangeBegin = range.start * 2;
|
||||
const int rangeEnd = range.end * 2;
|
||||
|
||||
size_t uvsteps[2] = {width/2, stride - width/2};
|
||||
int usIdx = ustepIdx, vsIdx = vstepIdx;
|
||||
|
||||
const uchar* y1 = my1 + rangeBegin * stride;
|
||||
const uchar* u1 = mu + (range.begin() / 2) * stride;
|
||||
const uchar* v1 = mv + (range.begin() / 2) * stride;
|
||||
const uchar* u1 = mu + (range.start / 2) * stride;
|
||||
const uchar* v1 = mv + (range.start / 2) * stride;
|
||||
|
||||
if(range.begin() % 2 == 1)
|
||||
if(range.start % 2 == 1)
|
||||
{
|
||||
u1 += uvsteps[(usIdx++) & 1];
|
||||
v1 += uvsteps[(vsIdx++) & 1];
|
||||
@ -2025,7 +2025,7 @@ struct YUV420p2RGB888Invoker
|
||||
};
|
||||
|
||||
template<int bIdx>
|
||||
struct YUV420p2RGBA8888Invoker
|
||||
struct YUV420p2RGBA8888Invoker : ParallelLoopBody
|
||||
{
|
||||
Mat* dst;
|
||||
const uchar* my1, *mu, *mv;
|
||||
@ -2035,19 +2035,19 @@ struct YUV420p2RGBA8888Invoker
|
||||
YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
|
||||
: dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
int rangeBegin = range.begin() * 2;
|
||||
int rangeEnd = range.end() * 2;
|
||||
int rangeBegin = range.start * 2;
|
||||
int rangeEnd = range.end * 2;
|
||||
|
||||
size_t uvsteps[2] = {width/2, stride - width/2};
|
||||
int usIdx = ustepIdx, vsIdx = vstepIdx;
|
||||
|
||||
const uchar* y1 = my1 + rangeBegin * stride;
|
||||
const uchar* u1 = mu + (range.begin() / 2) * stride;
|
||||
const uchar* v1 = mv + (range.begin() / 2) * stride;
|
||||
const uchar* u1 = mu + (range.start / 2) * stride;
|
||||
const uchar* v1 = mv + (range.start / 2) * stride;
|
||||
|
||||
if(range.begin() % 2 == 1)
|
||||
if(range.start % 2 == 1)
|
||||
{
|
||||
u1 += uvsteps[(usIdx++) & 1];
|
||||
v1 += uvsteps[(vsIdx++) & 1];
|
||||
@ -2102,48 +2102,40 @@ template<int bIdx, int uIdx>
|
||||
inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
|
||||
{
|
||||
YUV420sp2RGB888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
|
||||
#ifdef HAVE_TBB
|
||||
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
|
||||
parallel_for(BlockedRange(0, _dst.rows/2), converter);
|
||||
parallel_for_(Range(0, _dst.rows/2), converter);
|
||||
else
|
||||
#endif
|
||||
converter(BlockedRange(0, _dst.rows/2));
|
||||
converter(Range(0, _dst.rows/2));
|
||||
}
|
||||
|
||||
template<int bIdx, int uIdx>
|
||||
inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
|
||||
{
|
||||
YUV420sp2RGBA8888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
|
||||
#ifdef HAVE_TBB
|
||||
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
|
||||
parallel_for(BlockedRange(0, _dst.rows/2), converter);
|
||||
parallel_for_(Range(0, _dst.rows/2), converter);
|
||||
else
|
||||
#endif
|
||||
converter(BlockedRange(0, _dst.rows/2));
|
||||
converter(Range(0, _dst.rows/2));
|
||||
}
|
||||
|
||||
template<int bIdx>
|
||||
inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
|
||||
{
|
||||
YUV420p2RGB888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
|
||||
#ifdef HAVE_TBB
|
||||
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
|
||||
parallel_for(BlockedRange(0, _dst.rows/2), converter);
|
||||
parallel_for_(Range(0, _dst.rows/2), converter);
|
||||
else
|
||||
#endif
|
||||
converter(BlockedRange(0, _dst.rows/2));
|
||||
converter(Range(0, _dst.rows/2));
|
||||
}
|
||||
|
||||
template<int bIdx>
|
||||
inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
|
||||
{
|
||||
YUV420p2RGBA8888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
|
||||
#ifdef HAVE_TBB
|
||||
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
|
||||
parallel_for(BlockedRange(0, _dst.rows/2), converter);
|
||||
parallel_for_(Range(0, _dst.rows/2), converter);
|
||||
else
|
||||
#endif
|
||||
converter(BlockedRange(0, _dst.rows/2));
|
||||
converter(Range(0, _dst.rows/2));
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB -> YUV420p /////////////////////////////////////
|
||||
@ -2227,7 +2219,7 @@ static void cvtRGBtoYUV420p(const Mat& src, Mat& dst)
|
||||
///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////
|
||||
|
||||
template<int bIdx, int uIdx, int yIdx>
|
||||
struct YUV422toRGB888Invoker
|
||||
struct YUV422toRGB888Invoker : ParallelLoopBody
|
||||
{
|
||||
Mat* dst;
|
||||
const uchar* src;
|
||||
@ -2236,10 +2228,10 @@ struct YUV422toRGB888Invoker
|
||||
YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
|
||||
: dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
int rangeBegin = range.begin();
|
||||
int rangeEnd = range.end();
|
||||
int rangeBegin = range.start;
|
||||
int rangeEnd = range.end;
|
||||
|
||||
const int uidx = 1 - yIdx + uIdx * 2;
|
||||
const int vidx = (2 + uidx) % 4;
|
||||
@ -2273,7 +2265,7 @@ struct YUV422toRGB888Invoker
|
||||
};
|
||||
|
||||
template<int bIdx, int uIdx, int yIdx>
|
||||
struct YUV422toRGBA8888Invoker
|
||||
struct YUV422toRGBA8888Invoker : ParallelLoopBody
|
||||
{
|
||||
Mat* dst;
|
||||
const uchar* src;
|
||||
@ -2282,10 +2274,10 @@ struct YUV422toRGBA8888Invoker
|
||||
YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
|
||||
: dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
int rangeBegin = range.begin();
|
||||
int rangeEnd = range.end();
|
||||
int rangeBegin = range.start;
|
||||
int rangeEnd = range.end;
|
||||
|
||||
const int uidx = 1 - yIdx + uIdx * 2;
|
||||
const int vidx = (2 + uidx) % 4;
|
||||
@ -2326,24 +2318,20 @@ template<int bIdx, int uIdx, int yIdx>
|
||||
inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv)
|
||||
{
|
||||
YUV422toRGB888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
|
||||
#ifdef HAVE_TBB
|
||||
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
|
||||
parallel_for(BlockedRange(0, _dst.rows), converter);
|
||||
parallel_for_(Range(0, _dst.rows), converter);
|
||||
else
|
||||
#endif
|
||||
converter(BlockedRange(0, _dst.rows));
|
||||
converter(Range(0, _dst.rows));
|
||||
}
|
||||
|
||||
template<int bIdx, int uIdx, int yIdx>
|
||||
inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv)
|
||||
{
|
||||
YUV422toRGBA8888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
|
||||
#ifdef HAVE_TBB
|
||||
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
|
||||
parallel_for(BlockedRange(0, _dst.rows), converter);
|
||||
parallel_for_(Range(0, _dst.rows), converter);
|
||||
else
|
||||
#endif
|
||||
converter(BlockedRange(0, _dst.rows));
|
||||
converter(Range(0, _dst.rows));
|
||||
}
|
||||
|
||||
/////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
|
||||
|
@ -442,7 +442,7 @@ static void getDistanceTransformMask( int maskType, float *metrics )
|
||||
}
|
||||
}
|
||||
|
||||
struct DTColumnInvoker
|
||||
struct DTColumnInvoker : ParallelLoopBody
|
||||
{
|
||||
DTColumnInvoker( const Mat* _src, Mat* _dst, const int* _sat_tab, const float* _sqr_tab)
|
||||
{
|
||||
@ -452,9 +452,9 @@ struct DTColumnInvoker
|
||||
sqr_tab = _sqr_tab;
|
||||
}
|
||||
|
||||
void operator()( const BlockedRange& range ) const
|
||||
void operator()( const Range& range ) const
|
||||
{
|
||||
int i, i1 = range.begin(), i2 = range.end();
|
||||
int i, i1 = range.start, i2 = range.end;
|
||||
int m = src->rows;
|
||||
size_t sstep = src->step, dstep = dst->step/sizeof(float);
|
||||
AutoBuffer<int> _d(m);
|
||||
@ -489,7 +489,7 @@ struct DTColumnInvoker
|
||||
};
|
||||
|
||||
|
||||
struct DTRowInvoker
|
||||
struct DTRowInvoker : ParallelLoopBody
|
||||
{
|
||||
DTRowInvoker( Mat* _dst, const float* _sqr_tab, const float* _inv_tab )
|
||||
{
|
||||
@ -498,10 +498,10 @@ struct DTRowInvoker
|
||||
inv_tab = _inv_tab;
|
||||
}
|
||||
|
||||
void operator()( const BlockedRange& range ) const
|
||||
void operator()( const Range& range ) const
|
||||
{
|
||||
const float inf = 1e15f;
|
||||
int i, i1 = range.begin(), i2 = range.end();
|
||||
int i, i1 = range.start, i2 = range.end;
|
||||
int n = dst->cols;
|
||||
AutoBuffer<uchar> _buf((n+2)*2*sizeof(float) + (n+2)*sizeof(int));
|
||||
float* f = (float*)(uchar*)_buf;
|
||||
@ -578,7 +578,7 @@ trueDistTrans( const Mat& src, Mat& dst )
|
||||
for( ; i <= m*3; i++ )
|
||||
sat_tab[i] = i - shift;
|
||||
|
||||
cv::parallel_for(cv::BlockedRange(0, n), cv::DTColumnInvoker(&src, &dst, sat_tab, sqr_tab));
|
||||
cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(&src, &dst, sat_tab, sqr_tab));
|
||||
|
||||
// stage 2: compute modified distance transform for each row
|
||||
float* inv_tab = sqr_tab + n;
|
||||
@ -590,7 +590,7 @@ trueDistTrans( const Mat& src, Mat& dst )
|
||||
sqr_tab[i] = (float)(i*i);
|
||||
}
|
||||
|
||||
cv::parallel_for(cv::BlockedRange(0, m), cv::DTRowInvoker(&dst, sqr_tab, inv_tab));
|
||||
cv::parallel_for_(cv::Range(0, m), cv::DTRowInvoker(&dst, sqr_tab, inv_tab));
|
||||
}
|
||||
|
||||
|
||||
@ -664,7 +664,7 @@ distanceATS_L1_8u( const Mat& src, Mat& dst )
|
||||
// do right edge
|
||||
a = lut[dbase[width-1+dststep]];
|
||||
dbase[width-1] = (uchar)(MIN(a, dbase[width-1]));
|
||||
|
||||
|
||||
for( x = width - 2; x >= 0; x-- )
|
||||
{
|
||||
int b = dbase[x+dststep];
|
||||
|
@ -2985,29 +2985,23 @@ cvCalcProbDensity( const CvHistogram* hist, const CvHistogram* hist_mask,
|
||||
}
|
||||
}
|
||||
|
||||
class EqualizeHistCalcHist_Invoker
|
||||
class EqualizeHistCalcHist_Invoker : public cv::ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
enum {HIST_SZ = 256};
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
typedef tbb::mutex* MutextPtr;
|
||||
#else
|
||||
typedef void* MutextPtr;
|
||||
#endif
|
||||
|
||||
EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, MutextPtr histogramLock)
|
||||
EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, cv::Mutex* histogramLock)
|
||||
: src_(src), globalHistogram_(histogram), histogramLock_(histogramLock)
|
||||
{ }
|
||||
|
||||
void operator()( const cv::BlockedRange& rowRange ) const
|
||||
void operator()( const cv::Range& rowRange ) const
|
||||
{
|
||||
int localHistogram[HIST_SZ] = {0, };
|
||||
|
||||
const size_t sstep = src_.step;
|
||||
|
||||
int width = src_.cols;
|
||||
int height = rowRange.end() - rowRange.begin();
|
||||
int height = rowRange.end - rowRange.start;
|
||||
|
||||
if (src_.isContinuous())
|
||||
{
|
||||
@ -3015,7 +3009,7 @@ public:
|
||||
height = 1;
|
||||
}
|
||||
|
||||
for (const uchar* ptr = src_.ptr<uchar>(rowRange.begin()); height--; ptr += sstep)
|
||||
for (const uchar* ptr = src_.ptr<uchar>(rowRange.start); height--; ptr += sstep)
|
||||
{
|
||||
int x = 0;
|
||||
for (; x <= width - 4; x += 4)
|
||||
@ -3030,9 +3024,7 @@ public:
|
||||
localHistogram[ptr[x]]++;
|
||||
}
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
tbb::mutex::scoped_lock lock(*histogramLock_);
|
||||
#endif
|
||||
cv::AutoLock lock(*histogramLock_);
|
||||
|
||||
for( int i = 0; i < HIST_SZ; i++ )
|
||||
globalHistogram_[i] += localHistogram[i];
|
||||
@ -3040,12 +3032,7 @@ public:
|
||||
|
||||
static bool isWorthParallel( const cv::Mat& src )
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
return ( src.total() >= 640*480 );
|
||||
#else
|
||||
(void)src;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
@ -3053,10 +3040,10 @@ private:
|
||||
|
||||
cv::Mat& src_;
|
||||
int* globalHistogram_;
|
||||
MutextPtr histogramLock_;
|
||||
cv::Mutex* histogramLock_;
|
||||
};
|
||||
|
||||
class EqualizeHistLut_Invoker
|
||||
class EqualizeHistLut_Invoker : public cv::ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
EqualizeHistLut_Invoker( cv::Mat& src, cv::Mat& dst, int* lut )
|
||||
@ -3065,13 +3052,13 @@ public:
|
||||
lut_(lut)
|
||||
{ }
|
||||
|
||||
void operator()( const cv::BlockedRange& rowRange ) const
|
||||
void operator()( const cv::Range& rowRange ) const
|
||||
{
|
||||
const size_t sstep = src_.step;
|
||||
const size_t dstep = dst_.step;
|
||||
|
||||
int width = src_.cols;
|
||||
int height = rowRange.end() - rowRange.begin();
|
||||
int height = rowRange.end - rowRange.start;
|
||||
int* lut = lut_;
|
||||
|
||||
if (src_.isContinuous() && dst_.isContinuous())
|
||||
@ -3080,8 +3067,8 @@ public:
|
||||
height = 1;
|
||||
}
|
||||
|
||||
const uchar* sptr = src_.ptr<uchar>(rowRange.begin());
|
||||
uchar* dptr = dst_.ptr<uchar>(rowRange.begin());
|
||||
const uchar* sptr = src_.ptr<uchar>(rowRange.start);
|
||||
uchar* dptr = dst_.ptr<uchar>(rowRange.start);
|
||||
|
||||
for (; height--; sptr += sstep, dptr += dstep)
|
||||
{
|
||||
@ -3110,12 +3097,7 @@ public:
|
||||
|
||||
static bool isWorthParallel( const cv::Mat& src )
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
return ( src.total() >= 640*480 );
|
||||
#else
|
||||
(void)src;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
@ -3142,23 +3124,18 @@ void cv::equalizeHist( InputArray _src, OutputArray _dst )
|
||||
if(src.empty())
|
||||
return;
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
tbb::mutex histogramLockInstance;
|
||||
EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = &histogramLockInstance;
|
||||
#else
|
||||
EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = 0;
|
||||
#endif
|
||||
Mutex histogramLockInstance;
|
||||
|
||||
const int hist_sz = EqualizeHistCalcHist_Invoker::HIST_SZ;
|
||||
int hist[hist_sz] = {0,};
|
||||
int lut[hist_sz];
|
||||
|
||||
EqualizeHistCalcHist_Invoker calcBody(src, hist, histogramLock);
|
||||
EqualizeHistCalcHist_Invoker calcBody(src, hist, &histogramLockInstance);
|
||||
EqualizeHistLut_Invoker lutBody(src, dst, lut);
|
||||
cv::BlockedRange heightRange(0, src.rows);
|
||||
cv::Range heightRange(0, src.rows);
|
||||
|
||||
if(EqualizeHistCalcHist_Invoker::isWorthParallel(src))
|
||||
parallel_for(heightRange, calcBody);
|
||||
parallel_for_(heightRange, calcBody);
|
||||
else
|
||||
calcBody(heightRange);
|
||||
|
||||
@ -3182,303 +3159,11 @@ void cv::equalizeHist( InputArray _src, OutputArray _dst )
|
||||
}
|
||||
|
||||
if(EqualizeHistLut_Invoker::isWorthParallel(src))
|
||||
parallel_for(heightRange, lutBody);
|
||||
parallel_for_(heightRange, lutBody);
|
||||
else
|
||||
lutBody(heightRange);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// CLAHE
|
||||
|
||||
namespace
|
||||
{
|
||||
class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) :
|
||||
src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale)
|
||||
{
|
||||
}
|
||||
|
||||
void operator ()(const cv::Range& range) const;
|
||||
|
||||
private:
|
||||
cv::Mat src_;
|
||||
mutable cv::Mat lut_;
|
||||
|
||||
cv::Size tileSize_;
|
||||
int tilesX_;
|
||||
int tilesY_;
|
||||
int clipLimit_;
|
||||
float lutScale_;
|
||||
};
|
||||
|
||||
void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const
|
||||
{
|
||||
const int histSize = 256;
|
||||
|
||||
uchar* tileLut = lut_.ptr(range.start);
|
||||
const size_t lut_step = lut_.step;
|
||||
|
||||
for (int k = range.start; k < range.end; ++k, tileLut += lut_step)
|
||||
{
|
||||
const int ty = k / tilesX_;
|
||||
const int tx = k % tilesX_;
|
||||
|
||||
// retrieve tile submatrix
|
||||
|
||||
cv::Rect tileROI;
|
||||
tileROI.x = tx * tileSize_.width;
|
||||
tileROI.y = ty * tileSize_.height;
|
||||
tileROI.width = tileSize_.width;
|
||||
tileROI.height = tileSize_.height;
|
||||
|
||||
const cv::Mat tile = src_(tileROI);
|
||||
|
||||
// calc histogram
|
||||
|
||||
int tileHist[histSize] = {0, };
|
||||
|
||||
int height = tileROI.height;
|
||||
const size_t sstep = tile.step;
|
||||
for (const uchar* ptr = tile.ptr<uchar>(0); height--; ptr += sstep)
|
||||
{
|
||||
int x = 0;
|
||||
for (; x <= tileROI.width - 4; x += 4)
|
||||
{
|
||||
int t0 = ptr[x], t1 = ptr[x+1];
|
||||
tileHist[t0]++; tileHist[t1]++;
|
||||
t0 = ptr[x+2]; t1 = ptr[x+3];
|
||||
tileHist[t0]++; tileHist[t1]++;
|
||||
}
|
||||
|
||||
for (; x < tileROI.width; ++x)
|
||||
tileHist[ptr[x]]++;
|
||||
}
|
||||
|
||||
// clip histogram
|
||||
|
||||
if (clipLimit_ > 0)
|
||||
{
|
||||
// how many pixels were clipped
|
||||
int clipped = 0;
|
||||
for (int i = 0; i < histSize; ++i)
|
||||
{
|
||||
if (tileHist[i] > clipLimit_)
|
||||
{
|
||||
clipped += tileHist[i] - clipLimit_;
|
||||
tileHist[i] = clipLimit_;
|
||||
}
|
||||
}
|
||||
|
||||
// redistribute clipped pixels
|
||||
int redistBatch = clipped / histSize;
|
||||
int residual = clipped - redistBatch * histSize;
|
||||
|
||||
for (int i = 0; i < histSize; ++i)
|
||||
tileHist[i] += redistBatch;
|
||||
|
||||
for (int i = 0; i < residual; ++i)
|
||||
tileHist[i]++;
|
||||
}
|
||||
|
||||
// calc Lut
|
||||
|
||||
int sum = 0;
|
||||
for (int i = 0; i < histSize; ++i)
|
||||
{
|
||||
sum += tileHist[i];
|
||||
tileLut[i] = cv::saturate_cast<uchar>(sum * lutScale_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class CLAHE_Interpolation_Body : public cv::ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) :
|
||||
src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
|
||||
{
|
||||
}
|
||||
|
||||
void operator ()(const cv::Range& range) const;
|
||||
|
||||
private:
|
||||
cv::Mat src_;
|
||||
mutable cv::Mat dst_;
|
||||
cv::Mat lut_;
|
||||
|
||||
cv::Size tileSize_;
|
||||
int tilesX_;
|
||||
int tilesY_;
|
||||
};
|
||||
|
||||
void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const
|
||||
{
|
||||
const size_t lut_step = lut_.step;
|
||||
|
||||
for (int y = range.start; y < range.end; ++y)
|
||||
{
|
||||
const uchar* srcRow = src_.ptr<uchar>(y);
|
||||
uchar* dstRow = dst_.ptr<uchar>(y);
|
||||
|
||||
const float tyf = (static_cast<float>(y) / tileSize_.height) - 0.5f;
|
||||
|
||||
int ty1 = cvFloor(tyf);
|
||||
int ty2 = ty1 + 1;
|
||||
|
||||
const float ya = tyf - ty1;
|
||||
|
||||
ty1 = std::max(ty1, 0);
|
||||
ty2 = std::min(ty2, tilesY_ - 1);
|
||||
|
||||
const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_);
|
||||
const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_);
|
||||
|
||||
for (int x = 0; x < src_.cols; ++x)
|
||||
{
|
||||
const float txf = (static_cast<float>(x) / tileSize_.width) - 0.5f;
|
||||
|
||||
int tx1 = cvFloor(txf);
|
||||
int tx2 = tx1 + 1;
|
||||
|
||||
const float xa = txf - tx1;
|
||||
|
||||
tx1 = std::max(tx1, 0);
|
||||
tx2 = std::min(tx2, tilesX_ - 1);
|
||||
|
||||
const int srcVal = srcRow[x];
|
||||
|
||||
const size_t ind1 = tx1 * lut_step + srcVal;
|
||||
const size_t ind2 = tx2 * lut_step + srcVal;
|
||||
|
||||
float res = 0;
|
||||
|
||||
res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya));
|
||||
res += lutPlane1[ind2] * ((xa) * (1.0f - ya));
|
||||
res += lutPlane2[ind1] * ((1.0f - xa) * (ya));
|
||||
res += lutPlane2[ind2] * ((xa) * (ya));
|
||||
|
||||
dstRow[x] = cv::saturate_cast<uchar>(res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class CLAHE_Impl : public cv::CLAHE
|
||||
{
|
||||
public:
|
||||
CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
|
||||
|
||||
cv::AlgorithmInfo* info() const;
|
||||
|
||||
void apply(cv::InputArray src, cv::OutputArray dst);
|
||||
|
||||
void setClipLimit(double clipLimit);
|
||||
double getClipLimit() const;
|
||||
|
||||
void setTilesGridSize(cv::Size tileGridSize);
|
||||
cv::Size getTilesGridSize() const;
|
||||
|
||||
void collectGarbage();
|
||||
|
||||
private:
|
||||
double clipLimit_;
|
||||
int tilesX_;
|
||||
int tilesY_;
|
||||
|
||||
cv::Mat srcExt_;
|
||||
cv::Mat lut_;
|
||||
};
|
||||
|
||||
CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
|
||||
clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
|
||||
{
|
||||
}
|
||||
|
||||
CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE",
|
||||
obj.info()->addParam(obj, "clipLimit", obj.clipLimit_);
|
||||
obj.info()->addParam(obj, "tilesX", obj.tilesX_);
|
||||
obj.info()->addParam(obj, "tilesY", obj.tilesY_))
|
||||
|
||||
void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
|
||||
{
|
||||
cv::Mat src = _src.getMat();
|
||||
|
||||
CV_Assert( src.type() == CV_8UC1 );
|
||||
|
||||
_dst.create( src.size(), src.type() );
|
||||
cv::Mat dst = _dst.getMat();
|
||||
|
||||
const int histSize = 256;
|
||||
|
||||
lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1);
|
||||
|
||||
cv::Size tileSize;
|
||||
cv::Mat srcForLut;
|
||||
|
||||
if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
|
||||
{
|
||||
tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
|
||||
srcForLut = src;
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101);
|
||||
|
||||
tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
|
||||
srcForLut = srcExt_;
|
||||
}
|
||||
|
||||
const int tileSizeTotal = tileSize.area();
|
||||
const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
|
||||
|
||||
int clipLimit = 0;
|
||||
if (clipLimit_ > 0.0)
|
||||
{
|
||||
clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
|
||||
clipLimit = std::max(clipLimit, 1);
|
||||
}
|
||||
|
||||
CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale);
|
||||
cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody);
|
||||
|
||||
CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_);
|
||||
cv::parallel_for_(cv::Range(0, src.rows), interpolationBody);
|
||||
}
|
||||
|
||||
void CLAHE_Impl::setClipLimit(double clipLimit)
|
||||
{
|
||||
clipLimit_ = clipLimit;
|
||||
}
|
||||
|
||||
double CLAHE_Impl::getClipLimit() const
|
||||
{
|
||||
return clipLimit_;
|
||||
}
|
||||
|
||||
void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
|
||||
{
|
||||
tilesX_ = tileGridSize.width;
|
||||
tilesY_ = tileGridSize.height;
|
||||
}
|
||||
|
||||
cv::Size CLAHE_Impl::getTilesGridSize() const
|
||||
{
|
||||
return cv::Size(tilesX_, tilesY_);
|
||||
}
|
||||
|
||||
void CLAHE_Impl::collectGarbage()
|
||||
{
|
||||
srcExt_.release();
|
||||
lut_.release();
|
||||
}
|
||||
}
|
||||
|
||||
cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize)
|
||||
{
|
||||
return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/* Implementation of RTTI and Generic Functions for CvHistogram */
|
||||
|
@ -1081,7 +1081,7 @@ cv::Mat cv::getStructuringElement(int shape, Size ksize, Point anchor)
|
||||
namespace cv
|
||||
{
|
||||
|
||||
class MorphologyRunner
|
||||
class MorphologyRunner : public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
MorphologyRunner(Mat _src, Mat _dst, int _nStripes, int _iterations,
|
||||
@ -1102,14 +1102,14 @@ public:
|
||||
columnBorderType = _columnBorderType;
|
||||
}
|
||||
|
||||
void operator () ( const BlockedRange& range ) const
|
||||
void operator () ( const Range& range ) const
|
||||
{
|
||||
int row0 = std::min(cvRound(range.begin() * src.rows / nStripes), src.rows);
|
||||
int row1 = std::min(cvRound(range.end() * src.rows / nStripes), src.rows);
|
||||
int row0 = std::min(cvRound(range.start * src.rows / nStripes), src.rows);
|
||||
int row1 = std::min(cvRound(range.end * src.rows / nStripes), src.rows);
|
||||
|
||||
/*if(0)
|
||||
printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n",
|
||||
src.rows, src.cols, range.begin(), range.end(), row0, row1);*/
|
||||
src.rows, src.cols, range.start, range.end, row0, row1);*/
|
||||
|
||||
Mat srcStripe = src.rowRange(row0, row1);
|
||||
Mat dstStripe = dst.rowRange(row0, row1);
|
||||
@ -1173,15 +1173,15 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
|
||||
}
|
||||
|
||||
int nStripes = 1;
|
||||
#if defined HAVE_TBB && defined HAVE_TEGRA_OPTIMIZATION
|
||||
#if defined HAVE_TEGRA_OPTIMIZATION
|
||||
if (src.data != dst.data && iterations == 1 && //NOTE: threads are not used for inplace processing
|
||||
(borderType & BORDER_ISOLATED) == 0 && //TODO: check border types
|
||||
src.rows >= 64 ) //NOTE: just heuristics
|
||||
nStripes = 4;
|
||||
#endif
|
||||
|
||||
parallel_for(BlockedRange(0, nStripes),
|
||||
MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue));
|
||||
parallel_for_(Range(0, nStripes),
|
||||
MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue));
|
||||
|
||||
//Ptr<FilterEngine> f = createMorphologyFilter(op, src.type(),
|
||||
// kernel, anchor, borderType, borderType, borderValue );
|
||||
|
@ -40,10 +40,6 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
#include <tbb/tbb.h>
|
||||
#endif
|
||||
|
||||
CvANN_MLP_TrainParams::CvANN_MLP_TrainParams()
|
||||
{
|
||||
term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 );
|
||||
@ -1022,7 +1018,7 @@ int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw )
|
||||
return iter;
|
||||
}
|
||||
|
||||
struct rprop_loop {
|
||||
struct rprop_loop : cv::ParallelLoopBody {
|
||||
rprop_loop(const CvANN_MLP* _point, double**& _weights, int& _count, int& _ivcount, CvVectors* _x0,
|
||||
int& _l_count, CvMat*& _layer_sizes, int& _ovcount, int& _max_count,
|
||||
CvVectors* _u, const double*& _sw, double& _inv_count, CvMat*& _dEdw, int& _dcount0, double* _E, int _buf_sz)
|
||||
@ -1063,7 +1059,7 @@ struct rprop_loop {
|
||||
int buf_sz;
|
||||
|
||||
|
||||
void operator()( const cv::BlockedRange& range ) const
|
||||
void operator()( const cv::Range& range ) const
|
||||
{
|
||||
double* buf_ptr;
|
||||
double** x = 0;
|
||||
@ -1084,7 +1080,7 @@ struct rprop_loop {
|
||||
buf_ptr += (df[i] - x[i])*2;
|
||||
}
|
||||
|
||||
for(int si = range.begin(); si < range.end(); si++ )
|
||||
for(int si = range.start; si < range.end; si++ )
|
||||
{
|
||||
if (si % dcount0 != 0) continue;
|
||||
int n1, n2, k;
|
||||
@ -1170,36 +1166,33 @@ struct rprop_loop {
|
||||
}
|
||||
|
||||
// backward pass, update dEdw
|
||||
#ifdef HAVE_TBB
|
||||
static tbb::spin_mutex mutex;
|
||||
tbb::spin_mutex::scoped_lock lock;
|
||||
#endif
|
||||
static cv::Mutex mutex;
|
||||
|
||||
for(int i = l_count-1; i > 0; i-- )
|
||||
{
|
||||
n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i];
|
||||
cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] );
|
||||
cvMul( grad1, &_df, grad1 );
|
||||
#ifdef HAVE_TBB
|
||||
lock.acquire(mutex);
|
||||
#endif
|
||||
cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
|
||||
cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
|
||||
cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
|
||||
|
||||
// update bias part of dEdw
|
||||
for( k = 0; k < dcount; k++ )
|
||||
{
|
||||
double* dst = _dEdw.data.db + n1*n2;
|
||||
const double* src = grad1->data.db + k*n2;
|
||||
for(int j = 0; j < n2; j++ )
|
||||
dst[j] += src[j];
|
||||
{
|
||||
cv::AutoLock lock(mutex);
|
||||
cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) );
|
||||
cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] );
|
||||
cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T );
|
||||
|
||||
// update bias part of dEdw
|
||||
for( k = 0; k < dcount; k++ )
|
||||
{
|
||||
double* dst = _dEdw.data.db + n1*n2;
|
||||
const double* src = grad1->data.db + k*n2;
|
||||
for(int j = 0; j < n2; j++ )
|
||||
dst[j] += src[j];
|
||||
}
|
||||
|
||||
if (i > 1)
|
||||
cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
|
||||
}
|
||||
|
||||
if (i > 1)
|
||||
cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] );
|
||||
#ifdef HAVE_TBB
|
||||
lock.release();
|
||||
#endif
|
||||
cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db );
|
||||
if( i > 1 )
|
||||
cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T );
|
||||
@ -1297,7 +1290,7 @@ int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw )
|
||||
double E = 0;
|
||||
|
||||
// first, iterate through all the samples and compute dEdw
|
||||
cv::parallel_for(cv::BlockedRange(0, count),
|
||||
cv::parallel_for_(cv::Range(0, count),
|
||||
rprop_loop(this, weights, count, ivcount, &x0, l_count, layer_sizes,
|
||||
ovcount, max_count, &u, sw, inv_count, dEdw, dcount0, &E, buf_sz)
|
||||
);
|
||||
|
@ -884,7 +884,7 @@ float CvGBTrees::predict_serial( const CvMat* _sample, const CvMat* _missing,
|
||||
}
|
||||
|
||||
|
||||
class Tree_predictor
|
||||
class Tree_predictor : public cv::ParallelLoopBody
|
||||
{
|
||||
private:
|
||||
pCvSeq* weak;
|
||||
@ -894,9 +894,7 @@ private:
|
||||
const CvMat* missing;
|
||||
const float shrinkage;
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
static tbb::spin_mutex SumMutex;
|
||||
#endif
|
||||
static cv::Mutex SumMutex;
|
||||
|
||||
|
||||
public:
|
||||
@ -915,14 +913,11 @@ public:
|
||||
Tree_predictor& operator=( const Tree_predictor& )
|
||||
{ return *this; }
|
||||
|
||||
virtual void operator()(const cv::BlockedRange& range) const
|
||||
virtual void operator()(const cv::Range& range) const
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
tbb::spin_mutex::scoped_lock lock;
|
||||
#endif
|
||||
CvSeqReader reader;
|
||||
int begin = range.begin();
|
||||
int end = range.end();
|
||||
int begin = range.start;
|
||||
int end = range.end;
|
||||
|
||||
int weak_count = end - begin;
|
||||
CvDTree* tree;
|
||||
@ -940,13 +935,11 @@ public:
|
||||
tmp_sum += shrinkage*(float)(tree->predict(sample, missing)->value);
|
||||
}
|
||||
}
|
||||
#ifdef HAVE_TBB
|
||||
lock.acquire(SumMutex);
|
||||
sum[i] += tmp_sum;
|
||||
lock.release();
|
||||
#else
|
||||
sum[i] += tmp_sum;
|
||||
#endif
|
||||
|
||||
{
|
||||
cv::AutoLock lock(SumMutex);
|
||||
sum[i] += tmp_sum;
|
||||
}
|
||||
}
|
||||
} // Tree_predictor::operator()
|
||||
|
||||
@ -954,11 +947,7 @@ public:
|
||||
|
||||
}; // class Tree_predictor
|
||||
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
tbb::spin_mutex Tree_predictor::SumMutex;
|
||||
#endif
|
||||
|
||||
cv::Mutex Tree_predictor::SumMutex;
|
||||
|
||||
|
||||
float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,
|
||||
@ -976,12 +965,7 @@ float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing,
|
||||
Tree_predictor predictor = Tree_predictor(weak_seq, class_count,
|
||||
params.shrinkage, _sample, _missing, sum);
|
||||
|
||||
//#ifdef HAVE_TBB
|
||||
// tbb::parallel_for(cv::BlockedRange(begin, end), predictor,
|
||||
// tbb::auto_partitioner());
|
||||
//#else
|
||||
cv::parallel_for(cv::BlockedRange(begin, end), predictor);
|
||||
//#endif
|
||||
cv::parallel_for_(cv::Range(begin, end), predictor);
|
||||
|
||||
for (int i=0; i<class_count; ++i)
|
||||
sum[i] = sum[i] /** params.shrinkage*/ + base_value;
|
||||
@ -1210,7 +1194,7 @@ void CvGBTrees::read( CvFileStorage* fs, CvFileNode* node )
|
||||
|
||||
//===========================================================================
|
||||
|
||||
class Sample_predictor
|
||||
class Sample_predictor : public cv::ParallelLoopBody
|
||||
{
|
||||
private:
|
||||
const CvGBTrees* gbt;
|
||||
@ -1240,10 +1224,10 @@ public:
|
||||
{}
|
||||
|
||||
|
||||
virtual void operator()(const cv::BlockedRange& range) const
|
||||
virtual void operator()(const cv::Range& range) const
|
||||
{
|
||||
int begin = range.begin();
|
||||
int end = range.end();
|
||||
int begin = range.start;
|
||||
int end = range.end;
|
||||
|
||||
CvMat x;
|
||||
CvMat miss;
|
||||
@ -1299,11 +1283,7 @@ CvGBTrees::calc_error( CvMLData* _data, int type, std::vector<float> *resp )
|
||||
Sample_predictor predictor = Sample_predictor(this, pred_resp, _data->get_values(),
|
||||
_data->get_missing(), _sample_idx);
|
||||
|
||||
//#ifdef HAVE_TBB
|
||||
// tbb::parallel_for(cv::BlockedRange(0,n), predictor, tbb::auto_partitioner());
|
||||
//#else
|
||||
cv::parallel_for(cv::BlockedRange(0,n), predictor);
|
||||
//#endif
|
||||
cv::parallel_for_(cv::Range(0,n), predictor);
|
||||
|
||||
int* sidx = _sample_idx ? _sample_idx->data.i : 0;
|
||||
int r_step = CV_IS_MAT_CONT(response->type) ?
|
||||
|
@ -306,7 +306,7 @@ float CvKNearest::write_results( int k, int k1, int start, int end,
|
||||
return result;
|
||||
}
|
||||
|
||||
struct P1 {
|
||||
struct P1 : cv::ParallelLoopBody {
|
||||
P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors,
|
||||
int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result)
|
||||
{
|
||||
@ -333,10 +333,10 @@ struct P1 {
|
||||
float* result;
|
||||
int buf_sz;
|
||||
|
||||
void operator()( const cv::BlockedRange& range ) const
|
||||
void operator()( const cv::Range& range ) const
|
||||
{
|
||||
cv::AutoBuffer<float> buf(buf_sz);
|
||||
for(int i = range.begin(); i < range.end(); i += 1 )
|
||||
for(int i = range.start; i < range.end; i += 1 )
|
||||
{
|
||||
float* neighbor_responses = &buf[0];
|
||||
float* dist = neighbor_responses + 1*k;
|
||||
@ -410,8 +410,8 @@ float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* _results,
|
||||
int k1 = get_sample_count();
|
||||
k1 = MIN( k1, k );
|
||||
|
||||
cv::parallel_for(cv::BlockedRange(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
|
||||
_results, _neighbor_responses, _dist, &result)
|
||||
cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1,
|
||||
_results, _neighbor_responses, _dist, &result)
|
||||
);
|
||||
|
||||
return result;
|
||||
|
@ -277,7 +277,7 @@ bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _res
|
||||
return result;
|
||||
}
|
||||
|
||||
struct predict_body {
|
||||
struct predict_body : cv::ParallelLoopBody {
|
||||
predict_body(CvMat* _c, CvMat** _cov_rotate_mats, CvMat** _inv_eigen_values, CvMat** _avg,
|
||||
const CvMat* _samples, const int* _vidx, CvMat* _cls_labels,
|
||||
CvMat* _results, float* _value, int _var_count1
|
||||
@ -307,7 +307,7 @@ struct predict_body {
|
||||
float* value;
|
||||
int var_count1;
|
||||
|
||||
void operator()( const cv::BlockedRange& range ) const
|
||||
void operator()( const cv::Range& range ) const
|
||||
{
|
||||
|
||||
int cls = -1;
|
||||
@ -324,7 +324,7 @@ struct predict_body {
|
||||
cv::AutoBuffer<double> buffer(nclasses + var_count1);
|
||||
CvMat diff = cvMat( 1, var_count1, CV_64FC1, &buffer[0] );
|
||||
|
||||
for(int k = range.begin(); k < range.end(); k += 1 )
|
||||
for(int k = range.start; k < range.end; k += 1 )
|
||||
{
|
||||
int ival;
|
||||
double opt = FLT_MAX;
|
||||
@ -397,9 +397,9 @@ float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) c
|
||||
|
||||
const int* vidx = var_idx ? var_idx->data.i : 0;
|
||||
|
||||
cv::parallel_for(cv::BlockedRange(0, samples->rows), predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
|
||||
vidx, cls_labels, results, &value, var_count
|
||||
));
|
||||
cv::parallel_for_(cv::Range(0, samples->rows),
|
||||
predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples,
|
||||
vidx, cls_labels, results, &value, var_count));
|
||||
|
||||
return value;
|
||||
}
|
||||
|
@ -2192,7 +2192,7 @@ float CvSVM::predict( const CvMat* sample, bool returnDFVal ) const
|
||||
return result;
|
||||
}
|
||||
|
||||
struct predict_body_svm {
|
||||
struct predict_body_svm : ParallelLoopBody {
|
||||
predict_body_svm(const CvSVM* _pointer, float* _result, const CvMat* _samples, CvMat* _results)
|
||||
{
|
||||
pointer = _pointer;
|
||||
@ -2206,9 +2206,9 @@ struct predict_body_svm {
|
||||
const CvMat* samples;
|
||||
CvMat* results;
|
||||
|
||||
void operator()( const cv::BlockedRange& range ) const
|
||||
void operator()( const cv::Range& range ) const
|
||||
{
|
||||
for(int i = range.begin(); i < range.end(); i++ )
|
||||
for(int i = range.start; i < range.end; i++ )
|
||||
{
|
||||
CvMat sample;
|
||||
cvGetRow( samples, &sample, i );
|
||||
@ -2224,7 +2224,7 @@ struct predict_body_svm {
|
||||
float CvSVM::predict(const CvMat* samples, CV_OUT CvMat* results) const
|
||||
{
|
||||
float result = 0;
|
||||
cv::parallel_for(cv::BlockedRange(0, samples->rows),
|
||||
cv::parallel_for_(cv::Range(0, samples->rows),
|
||||
predict_body_svm(this, &result, samples, results)
|
||||
);
|
||||
return result;
|
||||
|
@ -258,7 +258,7 @@ interpolateKeypoint( float N9[3][9], int dx, int dy, int ds, KeyPoint& kpt )
|
||||
}
|
||||
|
||||
// Multi-threaded construction of the scale-space pyramid
|
||||
struct SURFBuildInvoker
|
||||
struct SURFBuildInvoker : ParallelLoopBody
|
||||
{
|
||||
SURFBuildInvoker( const Mat& _sum, const std::vector<int>& _sizes,
|
||||
const std::vector<int>& _sampleSteps,
|
||||
@ -271,9 +271,9 @@ struct SURFBuildInvoker
|
||||
traces = &_traces;
|
||||
}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
for( int i=range.begin(); i<range.end(); i++ )
|
||||
for( int i=range.start; i<range.end; i++ )
|
||||
calcLayerDetAndTrace( *sum, (*sizes)[i], (*sampleSteps)[i], (*dets)[i], (*traces)[i] );
|
||||
}
|
||||
|
||||
@ -285,7 +285,7 @@ struct SURFBuildInvoker
|
||||
};
|
||||
|
||||
// Multi-threaded search of the scale-space pyramid for keypoints
|
||||
struct SURFFindInvoker
|
||||
struct SURFFindInvoker : ParallelLoopBody
|
||||
{
|
||||
SURFFindInvoker( const Mat& _sum, const Mat& _mask_sum,
|
||||
const std::vector<Mat>& _dets, const std::vector<Mat>& _traces,
|
||||
@ -310,9 +310,9 @@ struct SURFFindInvoker
|
||||
const std::vector<int>& sizes, std::vector<KeyPoint>& keypoints,
|
||||
int octave, int layer, float hessianThreshold, int sampleStep );
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
for( int i=range.begin(); i<range.end(); i++ )
|
||||
for( int i=range.start; i<range.end; i++ )
|
||||
{
|
||||
int layer = (*middleIndices)[i];
|
||||
int octave = i / nOctaveLayers;
|
||||
@ -333,14 +333,10 @@ struct SURFFindInvoker
|
||||
int nOctaveLayers;
|
||||
float hessianThreshold;
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
static tbb::mutex findMaximaInLayer_m;
|
||||
#endif
|
||||
static Mutex findMaximaInLayer_m;
|
||||
};
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
tbb::mutex SURFFindInvoker::findMaximaInLayer_m;
|
||||
#endif
|
||||
Mutex SURFFindInvoker::findMaximaInLayer_m;
|
||||
|
||||
|
||||
/*
|
||||
@ -437,9 +433,7 @@ void SURFFindInvoker::findMaximaInLayer( const Mat& sum, const Mat& mask_sum,
|
||||
if( interp_ok )
|
||||
{
|
||||
/*printf( "KeyPoint %f %f %d\n", point.pt.x, point.pt.y, point.size );*/
|
||||
#ifdef HAVE_TBB
|
||||
tbb::mutex::scoped_lock lock(findMaximaInLayer_m);
|
||||
#endif
|
||||
cv::AutoLock lock(findMaximaInLayer_m);
|
||||
keypoints.push_back(kpt);
|
||||
}
|
||||
}
|
||||
@ -505,20 +499,20 @@ static void fastHessianDetector( const Mat& sum, const Mat& mask_sum, std::vecto
|
||||
}
|
||||
|
||||
// Calculate hessian determinant and trace samples in each layer
|
||||
parallel_for( BlockedRange(0, nTotalLayers),
|
||||
SURFBuildInvoker(sum, sizes, sampleSteps, dets, traces) );
|
||||
parallel_for_( Range(0, nTotalLayers),
|
||||
SURFBuildInvoker(sum, sizes, sampleSteps, dets, traces) );
|
||||
|
||||
// Find maxima in the determinant of the hessian
|
||||
parallel_for( BlockedRange(0, nMiddleLayers),
|
||||
SURFFindInvoker(sum, mask_sum, dets, traces, sizes,
|
||||
sampleSteps, middleIndices, keypoints,
|
||||
nOctaveLayers, hessianThreshold) );
|
||||
parallel_for_( Range(0, nMiddleLayers),
|
||||
SURFFindInvoker(sum, mask_sum, dets, traces, sizes,
|
||||
sampleSteps, middleIndices, keypoints,
|
||||
nOctaveLayers, hessianThreshold) );
|
||||
|
||||
std::sort(keypoints.begin(), keypoints.end(), KeypointGreater());
|
||||
}
|
||||
|
||||
|
||||
struct SURFInvoker
|
||||
struct SURFInvoker : ParallelLoopBody
|
||||
{
|
||||
enum { ORI_RADIUS = 6, ORI_WIN = 60, PATCH_SZ = 20 };
|
||||
|
||||
@ -566,7 +560,7 @@ struct SURFInvoker
|
||||
}
|
||||
}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
/* X and Y gradient wavelet data */
|
||||
const int NX=2, NY=2;
|
||||
@ -584,7 +578,7 @@ struct SURFInvoker
|
||||
|
||||
int dsize = extended ? 128 : 64;
|
||||
|
||||
int k, k1 = range.begin(), k2 = range.end();
|
||||
int k, k1 = range.start, k2 = range.end;
|
||||
float maxSize = 0;
|
||||
for( k = k1; k < k2; k++ )
|
||||
{
|
||||
@ -952,7 +946,7 @@ void SURF::operator()(InputArray _img, InputArray _mask,
|
||||
|
||||
// we call SURFInvoker in any case, even if we do not need descriptors,
|
||||
// since it computes orientation of each feature.
|
||||
parallel_for(BlockedRange(0, N), SURFInvoker(img, sum, keypoints, descriptors, extended, upright) );
|
||||
parallel_for_(Range(0, N), SURFInvoker(img, sum, keypoints, descriptors, extended, upright) );
|
||||
|
||||
// remove keypoints that were marked for deletion
|
||||
for( i = j = 0; i < N; i++ )
|
||||
|
@ -1164,15 +1164,10 @@ void CascadeClassifier::detectMultiScale( const Mat& image, std::vector<Rect>& o
|
||||
|
||||
int stripCount, stripSize;
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
const int PTS_PER_THREAD = 1000;
|
||||
stripCount = ((processingRectSize.width/yStep)*(processingRectSize.height + yStep-1)/yStep + PTS_PER_THREAD/2)/PTS_PER_THREAD;
|
||||
stripCount = std::min(std::max(stripCount, 1), 100);
|
||||
stripSize = (((processingRectSize.height + stripCount - 1)/stripCount + yStep-1)/yStep)*yStep;
|
||||
#else
|
||||
stripCount = 1;
|
||||
stripSize = processingRectSize.height;
|
||||
#endif
|
||||
|
||||
if( !detectSingleScale( scaledImage, stripCount, processingRectSize, stripSize, yStep, factor, candidates,
|
||||
rejectLevels, levelWeights, outputRejectLevels ) )
|
||||
|
@ -582,7 +582,6 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H,
|
||||
// For each component perform searching
|
||||
for (i = 0; i < kComponents; i++)
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
int error = searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i],
|
||||
b[i], maxXBorder, maxYBorder, scoreThreshold,
|
||||
&(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]),
|
||||
@ -598,13 +597,6 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H,
|
||||
free(partsDisplacementArr);
|
||||
return LATENT_SVM_SEARCH_OBJECT_FAILED;
|
||||
}
|
||||
#else
|
||||
(void)numThreads;
|
||||
searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i],
|
||||
b[i], maxXBorder, maxYBorder, scoreThreshold,
|
||||
&(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]),
|
||||
&(scoreArr[i]), &(partsDisplacementArr[i]));
|
||||
#endif
|
||||
estimateBoxes(pointsArr[i], levelsArr[i], kPointsArr[i],
|
||||
filters[componentIndex]->sizeX, filters[componentIndex]->sizeY, &(oppPointsArr[i]));
|
||||
componentIndex += (kPartFilters[i] + 1);
|
||||
|
@ -121,8 +121,9 @@ namespace cv
|
||||
CV_EXPORTS void setBinpath(const char *path);
|
||||
|
||||
//The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
|
||||
//returns cl_context *
|
||||
CV_EXPORTS void* getoclContext();
|
||||
|
||||
//returns cl_command_queue *
|
||||
CV_EXPORTS void* getoclCommandQueue();
|
||||
|
||||
//explicit call clFinish. The global command queue will be used.
|
||||
@ -460,6 +461,7 @@ namespace cv
|
||||
// support all C1 types
|
||||
|
||||
CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
|
||||
CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf);
|
||||
|
||||
//! finds global minimum and maximum array elements and returns their values with locations
|
||||
// support all C1 types
|
||||
@ -808,7 +810,11 @@ namespace cv
|
||||
CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
|
||||
CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
|
||||
CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
|
||||
CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
|
||||
int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
|
||||
CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
|
||||
CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
|
||||
int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////CascadeClassifier//////////////////////////////////////////////////////////////////
|
||||
@ -826,13 +832,14 @@ namespace cv
|
||||
};
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
class CV_EXPORTS OclCascadeClassifierBuf : public cv::CascadeClassifier
|
||||
{
|
||||
public:
|
||||
OclCascadeClassifierBuf() :
|
||||
m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {}
|
||||
|
||||
~OclCascadeClassifierBuf() {}
|
||||
~OclCascadeClassifierBuf() { release(); }
|
||||
|
||||
void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
|
||||
double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
|
||||
@ -864,6 +871,7 @@ namespace cv
|
||||
oclMat gimg1, gsum, gsqsum;
|
||||
void * buffers;
|
||||
};
|
||||
#endif
|
||||
|
||||
/////////////////////////////// Pyramid /////////////////////////////////////
|
||||
CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
|
||||
@ -1388,6 +1396,51 @@ namespace cv
|
||||
explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {}
|
||||
};
|
||||
|
||||
class CV_EXPORTS GoodFeaturesToTrackDetector_OCL
|
||||
{
|
||||
public:
|
||||
explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
|
||||
int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
|
||||
|
||||
//! return 1 rows matrix with CV_32FC2 type
|
||||
void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
|
||||
//! download points of type Point2f to a vector. the vector's content will be erased
|
||||
void downloadPoints(const oclMat &points, std::vector<Point2f> &points_v);
|
||||
|
||||
int maxCorners;
|
||||
double qualityLevel;
|
||||
double minDistance;
|
||||
|
||||
int blockSize;
|
||||
bool useHarrisDetector;
|
||||
double harrisK;
|
||||
void releaseMemory()
|
||||
{
|
||||
Dx_.release();
|
||||
Dy_.release();
|
||||
eig_.release();
|
||||
minMaxbuf_.release();
|
||||
tmpCorners_.release();
|
||||
}
|
||||
private:
|
||||
oclMat Dx_;
|
||||
oclMat Dy_;
|
||||
oclMat eig_;
|
||||
oclMat minMaxbuf_;
|
||||
oclMat tmpCorners_;
|
||||
};
|
||||
|
||||
inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
|
||||
int blockSize_, bool useHarrisDetector_, double harrisK_)
|
||||
{
|
||||
maxCorners = maxCorners_;
|
||||
qualityLevel = qualityLevel_;
|
||||
minDistance = minDistance_;
|
||||
blockSize = blockSize_;
|
||||
useHarrisDetector = useHarrisDetector_;
|
||||
harrisK = harrisK_;
|
||||
}
|
||||
|
||||
/////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
|
||||
|
||||
class CV_EXPORTS PyrLKOpticalFlow
|
||||
|
@ -47,7 +47,7 @@
|
||||
#define __OPENCV_OCL_PRIVATE_UTIL__
|
||||
|
||||
#if defined __APPLE__
|
||||
#include <OpenCL/OpenCL.h>
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
@ -121,6 +121,33 @@ namespace cv
|
||||
cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
|
||||
void CV_EXPORTS releaseTexture(cl_mem& texture);
|
||||
|
||||
//Represents an image texture object
|
||||
class CV_EXPORTS TextureCL
|
||||
{
|
||||
public:
|
||||
TextureCL(cl_mem tex, int r, int c, int t)
|
||||
: tex_(tex), rows(r), cols(c), type(t) {}
|
||||
~TextureCL()
|
||||
{
|
||||
openCLFree(tex_);
|
||||
}
|
||||
operator cl_mem()
|
||||
{
|
||||
return tex_;
|
||||
}
|
||||
cl_mem const tex_;
|
||||
const int rows;
|
||||
const int cols;
|
||||
const int type;
|
||||
private:
|
||||
//disable assignment
|
||||
void operator=(const TextureCL&);
|
||||
};
|
||||
// bind oclMat to OpenCL image textures and retunrs an TextureCL object
|
||||
// note:
|
||||
// for faster clamping, there is no buffer padding for the constructed texture
|
||||
Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
|
||||
|
||||
// returns whether the current context supports image2d_t format or not
|
||||
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
|
||||
|
||||
@ -132,7 +159,7 @@ namespace cv
|
||||
};
|
||||
template<DEVICE_INFO _it, typename _ty>
|
||||
_ty queryDeviceInfo(cl_kernel kernel = NULL);
|
||||
//info should have been pre-allocated
|
||||
|
||||
template<>
|
||||
int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
|
||||
template<>
|
||||
|
@ -48,7 +48,7 @@
|
||||
///////////// Lut ////////////////////////
|
||||
PERFTEST(lut)
|
||||
{
|
||||
Mat src, lut, dst;
|
||||
Mat src, lut, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_lut, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_8UC3};
|
||||
@ -77,11 +77,6 @@ PERFTEST(lut)
|
||||
ocl::LUT(d_src, d_lut, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::LUT(d_src, d_lut, d_dst);
|
||||
GPU_OFF;
|
||||
@ -90,9 +85,10 @@ PERFTEST(lut)
|
||||
d_src.upload(src);
|
||||
d_lut.upload(lut);
|
||||
ocl::LUT(d_src, d_lut, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -101,7 +97,7 @@ PERFTEST(lut)
|
||||
///////////// Exp ////////////////////////
|
||||
PERFTEST(Exp)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
|
||||
@ -121,11 +117,6 @@ PERFTEST(Exp)
|
||||
ocl::exp(d_src, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 2));
|
||||
|
||||
GPU_ON;
|
||||
ocl::exp(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
@ -133,15 +124,17 @@ PERFTEST(Exp)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::exp(d_src, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 2);
|
||||
}
|
||||
}
|
||||
|
||||
///////////// LOG ////////////////////////
|
||||
PERFTEST(Log)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
|
||||
@ -161,11 +154,6 @@ PERFTEST(Log)
|
||||
ocl::log(d_src, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1));
|
||||
|
||||
GPU_ON;
|
||||
ocl::log(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
@ -173,15 +161,17 @@ PERFTEST(Log)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::log(d_src, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
|
||||
}
|
||||
}
|
||||
|
||||
///////////// Add ////////////////////////
|
||||
PERFTEST(Add)
|
||||
{
|
||||
Mat src1, src2, dst;
|
||||
Mat src1, src2, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_src2, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_32FC1};
|
||||
@ -201,6 +191,7 @@ PERFTEST(Add)
|
||||
CPU_ON;
|
||||
add(src1, src2, dst);
|
||||
CPU_OFF;
|
||||
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
|
||||
@ -208,11 +199,6 @@ PERFTEST(Add)
|
||||
ocl::add(d_src1, d_src2, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::add(d_src1, d_src2, d_dst);
|
||||
GPU_OFF;
|
||||
@ -221,8 +207,10 @@ PERFTEST(Add)
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
ocl::add(d_src1, d_src2, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -231,7 +219,7 @@ PERFTEST(Add)
|
||||
///////////// Mul ////////////////////////
|
||||
PERFTEST(Mul)
|
||||
{
|
||||
Mat src1, src2, dst;
|
||||
Mat src1, src2, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_src2, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_8UC4};
|
||||
@ -260,11 +248,6 @@ PERFTEST(Mul)
|
||||
ocl::multiply(d_src1, d_src2, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::multiply(d_src1, d_src2, d_dst);
|
||||
GPU_OFF;
|
||||
@ -273,8 +256,10 @@ PERFTEST(Mul)
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
ocl::multiply(d_src1, d_src2, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -283,7 +268,7 @@ PERFTEST(Mul)
|
||||
///////////// Div ////////////////////////
|
||||
PERFTEST(Div)
|
||||
{
|
||||
Mat src1, src2, dst;
|
||||
Mat src1, src2, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_src2, d_dst;
|
||||
int all_type[] = {CV_8UC1, CV_8UC4};
|
||||
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
|
||||
@ -304,6 +289,7 @@ PERFTEST(Div)
|
||||
CPU_ON;
|
||||
divide(src1, src2, dst);
|
||||
CPU_OFF;
|
||||
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
|
||||
@ -311,11 +297,6 @@ PERFTEST(Div)
|
||||
ocl::divide(d_src1, d_src2, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1));
|
||||
|
||||
GPU_ON;
|
||||
ocl::divide(d_src1, d_src2, d_dst);
|
||||
GPU_OFF;
|
||||
@ -324,8 +305,10 @@ PERFTEST(Div)
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
ocl::divide(d_src1, d_src2, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
|
||||
}
|
||||
|
||||
}
|
||||
@ -334,7 +317,7 @@ PERFTEST(Div)
|
||||
///////////// Absdiff ////////////////////////
|
||||
PERFTEST(Absdiff)
|
||||
{
|
||||
Mat src1, src2, dst;
|
||||
Mat src1, src2, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_src2, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_8UC4};
|
||||
@ -355,6 +338,7 @@ PERFTEST(Absdiff)
|
||||
CPU_ON;
|
||||
absdiff(src1, src2, dst);
|
||||
CPU_OFF;
|
||||
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
|
||||
@ -362,11 +346,6 @@ PERFTEST(Absdiff)
|
||||
ocl::absdiff(d_src1, d_src2, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::absdiff(d_src1, d_src2, d_dst);
|
||||
GPU_OFF;
|
||||
@ -375,8 +354,10 @@ PERFTEST(Absdiff)
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
ocl::absdiff(d_src1, d_src2, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -385,7 +366,7 @@ PERFTEST(Absdiff)
|
||||
///////////// CartToPolar ////////////////////////
|
||||
PERFTEST(CartToPolar)
|
||||
{
|
||||
Mat src1, src2, dst, dst1;
|
||||
Mat src1, src2, dst, dst1, ocl_dst, ocl_dst1;
|
||||
ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
|
||||
|
||||
int all_type[] = {CV_32FC1};
|
||||
@ -408,6 +389,7 @@ PERFTEST(CartToPolar)
|
||||
CPU_ON;
|
||||
cartToPolar(src1, src2, dst, dst1, 1);
|
||||
CPU_OFF;
|
||||
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
|
||||
@ -415,14 +397,6 @@ PERFTEST(CartToPolar)
|
||||
ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
cv::Mat ocl_mat_dst1;
|
||||
d_dst1.download(ocl_mat_dst1);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst1, dst1, 0.5)&&ExpectedMatNear(ocl_mat_dst, dst, 0.5));
|
||||
|
||||
GPU_ON;
|
||||
ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
|
||||
GPU_OFF;
|
||||
@ -431,9 +405,15 @@ PERFTEST(CartToPolar)
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1);
|
||||
d_dst.download(dst);
|
||||
d_dst1.download(dst1);
|
||||
d_dst.download(ocl_dst);
|
||||
d_dst1.download(ocl_dst1);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
double diff1 = checkNorm(ocl_dst1, dst1);
|
||||
double diff2 = checkNorm(ocl_dst, dst);
|
||||
double max_diff = max(diff1, diff2);
|
||||
TestSystem::instance().setAccurate(max_diff<=.5?1:0, max_diff);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -442,7 +422,7 @@ PERFTEST(CartToPolar)
|
||||
///////////// PolarToCart ////////////////////////
|
||||
PERFTEST(PolarToCart)
|
||||
{
|
||||
Mat src1, src2, dst, dst1;
|
||||
Mat src1, src2, dst, dst1, ocl_dst, ocl_dst1;
|
||||
ocl::oclMat d_src1, d_src2, d_dst, d_dst1;
|
||||
|
||||
int all_type[] = {CV_32FC1};
|
||||
@ -472,14 +452,6 @@ PERFTEST(PolarToCart)
|
||||
ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
cv::Mat ocl_mat_dst1;
|
||||
d_dst1.download(ocl_mat_dst1);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst1, dst1, 0.5)&&ExpectedMatNear(ocl_mat_dst, dst, 0.5));
|
||||
|
||||
GPU_ON;
|
||||
ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
|
||||
GPU_OFF;
|
||||
@ -488,9 +460,15 @@ PERFTEST(PolarToCart)
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1);
|
||||
d_dst.download(dst);
|
||||
d_dst1.download(dst1);
|
||||
d_dst.download(ocl_dst);
|
||||
d_dst1.download(ocl_dst1);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
double diff1 = checkNorm(ocl_dst1, dst1);
|
||||
double diff2 = checkNorm(ocl_dst, dst);
|
||||
double max_diff = max(diff1, diff2);
|
||||
TestSystem::instance().setAccurate(max_diff<=.5?1:0, max_diff);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -499,7 +477,7 @@ PERFTEST(PolarToCart)
|
||||
///////////// Magnitude ////////////////////////
|
||||
PERFTEST(magnitude)
|
||||
{
|
||||
Mat x, y, mag;
|
||||
Mat x, y, mag, ocl_mag;
|
||||
ocl::oclMat d_x, d_y, d_mag;
|
||||
|
||||
int all_type[] = {CV_32FC1};
|
||||
@ -526,11 +504,6 @@ PERFTEST(magnitude)
|
||||
ocl::magnitude(d_x, d_y, d_mag);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_mag.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, mag, 1e-5));
|
||||
|
||||
GPU_ON;
|
||||
ocl::magnitude(d_x, d_y, d_mag);
|
||||
GPU_OFF;
|
||||
@ -539,8 +512,10 @@ PERFTEST(magnitude)
|
||||
d_x.upload(x);
|
||||
d_y.upload(y);
|
||||
ocl::magnitude(d_x, d_y, d_mag);
|
||||
d_mag.download(mag);
|
||||
d_mag.download(ocl_mag);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_mag, mag, 1e-5);
|
||||
}
|
||||
|
||||
}
|
||||
@ -549,7 +524,7 @@ PERFTEST(magnitude)
|
||||
///////////// Transpose ////////////////////////
|
||||
PERFTEST(Transpose)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_8UC4};
|
||||
@ -575,11 +550,6 @@ PERFTEST(Transpose)
|
||||
ocl::transpose(d_src, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-5));
|
||||
|
||||
GPU_ON;
|
||||
ocl::transpose(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
@ -587,8 +557,10 @@ PERFTEST(Transpose)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::transpose(d_src, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
|
||||
}
|
||||
|
||||
}
|
||||
@ -597,7 +569,7 @@ PERFTEST(Transpose)
|
||||
///////////// Flip ////////////////////////
|
||||
PERFTEST(Flip)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_8UC4};
|
||||
@ -623,11 +595,6 @@ PERFTEST(Flip)
|
||||
ocl::flip(d_src, d_dst, 0);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-5));
|
||||
|
||||
GPU_ON;
|
||||
ocl::flip(d_src, d_dst, 0);
|
||||
GPU_OFF;
|
||||
@ -635,8 +602,10 @@ PERFTEST(Flip)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::flip(d_src, d_dst, 0);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
|
||||
}
|
||||
|
||||
}
|
||||
@ -671,7 +640,10 @@ PERFTEST(minMax)
|
||||
ocl::minMax(d_src, &min_val_, &max_val_);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(EeceptDoubleEQ<double>(max_val_, max_val)&&EeceptDoubleEQ<double>(min_val_, min_val));
|
||||
if(EeceptDoubleEQ<double>(max_val_, max_val) && EeceptDoubleEQ<double>(min_val_, min_val))
|
||||
TestSystem::instance().setAccurate(1, max(fabs(max_val_-max_val), fabs(min_val_-min_val)));
|
||||
else
|
||||
TestSystem::instance().setAccurate(0, max(fabs(max_val_-max_val), fabs(min_val_-min_val)));
|
||||
|
||||
GPU_ON;
|
||||
ocl::minMax(d_src, &min_val, &max_val);
|
||||
@ -724,8 +696,6 @@ PERFTEST(minMaxLoc)
|
||||
minlocVal_ = src.at<unsigned char>(min_loc_);
|
||||
maxlocVal = src.at<unsigned char>(max_loc);
|
||||
maxlocVal_ = src.at<unsigned char>(max_loc_);
|
||||
error0 = ::abs(src.at<unsigned char>(min_loc_) - src.at<unsigned char>(min_loc));
|
||||
error1 = ::abs(src.at<unsigned char>(max_loc_) - src.at<unsigned char>(max_loc));
|
||||
}
|
||||
if(src.depth() == 1)
|
||||
{
|
||||
@ -733,8 +703,6 @@ PERFTEST(minMaxLoc)
|
||||
minlocVal_ = src.at<signed char>(min_loc_);
|
||||
maxlocVal = src.at<signed char>(max_loc);
|
||||
maxlocVal_ = src.at<signed char>(max_loc_);
|
||||
error0 = ::abs(src.at<signed char>(min_loc_) - src.at<signed char>(min_loc));
|
||||
error1 = ::abs(src.at<signed char>(max_loc_) - src.at<signed char>(max_loc));
|
||||
}
|
||||
if(src.depth() == 2)
|
||||
{
|
||||
@ -742,8 +710,6 @@ PERFTEST(minMaxLoc)
|
||||
minlocVal_ = src.at<unsigned short>(min_loc_);
|
||||
maxlocVal = src.at<unsigned short>(max_loc);
|
||||
maxlocVal_ = src.at<unsigned short>(max_loc_);
|
||||
error0 = ::abs(src.at<unsigned short>(min_loc_) - src.at<unsigned short>(min_loc));
|
||||
error1 = ::abs(src.at<unsigned short>(max_loc_) - src.at<unsigned short>(max_loc));
|
||||
}
|
||||
if(src.depth() == 3)
|
||||
{
|
||||
@ -751,8 +717,6 @@ PERFTEST(minMaxLoc)
|
||||
minlocVal_ = src.at<signed short>(min_loc_);
|
||||
maxlocVal = src.at<signed short>(max_loc);
|
||||
maxlocVal_ = src.at<signed short>(max_loc_);
|
||||
error0 = ::abs(src.at<signed short>(min_loc_) - src.at<signed short>(min_loc));
|
||||
error1 = ::abs(src.at<signed short>(max_loc_) - src.at<signed short>(max_loc));
|
||||
}
|
||||
if(src.depth() == 4)
|
||||
{
|
||||
@ -760,8 +724,6 @@ PERFTEST(minMaxLoc)
|
||||
minlocVal_ = src.at<int>(min_loc_);
|
||||
maxlocVal = src.at<int>(max_loc);
|
||||
maxlocVal_ = src.at<int>(max_loc_);
|
||||
error0 = ::abs(src.at<int>(min_loc_) - src.at<int>(min_loc));
|
||||
error1 = ::abs(src.at<int>(max_loc_) - src.at<int>(max_loc));
|
||||
}
|
||||
if(src.depth() == 5)
|
||||
{
|
||||
@ -769,8 +731,6 @@ PERFTEST(minMaxLoc)
|
||||
minlocVal_ = src.at<float>(min_loc_);
|
||||
maxlocVal = src.at<float>(max_loc);
|
||||
maxlocVal_ = src.at<float>(max_loc_);
|
||||
error0 = ::abs(src.at<float>(min_loc_) - src.at<float>(min_loc));
|
||||
error1 = ::abs(src.at<float>(max_loc_) - src.at<float>(max_loc));
|
||||
}
|
||||
if(src.depth() == 6)
|
||||
{
|
||||
@ -778,16 +738,16 @@ PERFTEST(minMaxLoc)
|
||||
minlocVal_ = src.at<double>(min_loc_);
|
||||
maxlocVal = src.at<double>(max_loc);
|
||||
maxlocVal_ = src.at<double>(max_loc_);
|
||||
error0 = ::abs(src.at<double>(min_loc_) - src.at<double>(min_loc));
|
||||
error1 = ::abs(src.at<double>(max_loc_) - src.at<double>(max_loc));
|
||||
}
|
||||
|
||||
TestSystem::instance().setAccurate(EeceptDoubleEQ<double>(error1, 0.0)
|
||||
&&EeceptDoubleEQ<double>(error0, 0.0)
|
||||
&&EeceptDoubleEQ<double>(maxlocVal_, maxlocVal)
|
||||
error0 = ::abs(minlocVal_ - minlocVal);
|
||||
error1 = ::abs(maxlocVal_ - maxlocVal);
|
||||
if( EeceptDoubleEQ<double>(maxlocVal_, maxlocVal)
|
||||
&&EeceptDoubleEQ<double>(minlocVal_, minlocVal)
|
||||
&&EeceptDoubleEQ<double>(max_val_, max_val)
|
||||
&&EeceptDoubleEQ<double>(min_val_, min_val));
|
||||
&&EeceptDoubleEQ<double>(min_val_, min_val))
|
||||
TestSystem::instance().setAccurate(1, 0.);
|
||||
else
|
||||
TestSystem::instance().setAccurate(0, max(error0, error1));
|
||||
|
||||
GPU_ON;
|
||||
ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
|
||||
@ -831,11 +791,13 @@ PERFTEST(Sum)
|
||||
gpures = ocl::sum(d_src);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExceptDoubleNear(cpures[3], gpures[3], 0.1)
|
||||
&&ExceptDoubleNear(cpures[2], gpures[2], 0.1)
|
||||
&&ExceptDoubleNear(cpures[1], gpures[1], 0.1)
|
||||
&&ExceptDoubleNear(cpures[0], gpures[0], 0.1));
|
||||
|
||||
vector<double> diffs(4);
|
||||
diffs[3] = fabs(cpures[3] - gpures[3]);
|
||||
diffs[2] = fabs(cpures[2] - gpures[2]);
|
||||
diffs[1] = fabs(cpures[1] - gpures[1]);
|
||||
diffs[0] = fabs(cpures[0] - gpures[0]);
|
||||
double max_diff = *max_element(diffs.begin(), diffs.end());
|
||||
TestSystem::instance().setAccurate(max_diff<0.1?1:0, max_diff);
|
||||
|
||||
GPU_ON;
|
||||
gpures = ocl::sum(d_src);
|
||||
@ -879,7 +841,11 @@ PERFTEST(countNonZero)
|
||||
gpures = ocl::countNonZero(d_src);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate((EeceptDoubleEQ<double>((double)cpures, (double)gpures)));
|
||||
int diff = abs(cpures - gpures);
|
||||
if(diff == 0)
|
||||
TestSystem::instance().setAccurate(1, 0);
|
||||
else
|
||||
TestSystem::instance().setAccurate(0, diff);
|
||||
|
||||
GPU_ON;
|
||||
ocl::countNonZero(d_src);
|
||||
@ -897,7 +863,7 @@ PERFTEST(countNonZero)
|
||||
///////////// Phase ////////////////////////
|
||||
PERFTEST(Phase)
|
||||
{
|
||||
Mat src1, src2, dst;
|
||||
Mat src1, src2, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_src2, d_dst;
|
||||
|
||||
int all_type[] = {CV_32FC1};
|
||||
@ -913,12 +879,12 @@ PERFTEST(Phase)
|
||||
gen(src2, size, size, all_type[j], 0, 256);
|
||||
gen(dst, size, size, all_type[j], 0, 256);
|
||||
|
||||
|
||||
phase(src1, src2, dst, 1);
|
||||
|
||||
CPU_ON;
|
||||
phase(src1, src2, dst, 1);
|
||||
CPU_OFF;
|
||||
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
|
||||
@ -926,11 +892,6 @@ PERFTEST(Phase)
|
||||
ocl::phase(d_src1, d_src2, d_dst, 1);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-2));
|
||||
|
||||
GPU_ON;
|
||||
ocl::phase(d_src1, d_src2, d_dst, 1);
|
||||
GPU_OFF;
|
||||
@ -939,8 +900,10 @@ PERFTEST(Phase)
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
ocl::phase(d_src1, d_src2, d_dst, 1);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-2);
|
||||
}
|
||||
|
||||
}
|
||||
@ -949,7 +912,7 @@ PERFTEST(Phase)
|
||||
///////////// bitwise_and////////////////////////
|
||||
PERFTEST(bitwise_and)
|
||||
{
|
||||
Mat src1, src2, dst;
|
||||
Mat src1, src2, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_src2, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_32SC1};
|
||||
@ -965,7 +928,6 @@ PERFTEST(bitwise_and)
|
||||
gen(src2, size, size, all_type[j], 0, 256);
|
||||
gen(dst, size, size, all_type[j], 0, 256);
|
||||
|
||||
|
||||
bitwise_and(src1, src2, dst);
|
||||
|
||||
CPU_ON;
|
||||
@ -978,11 +940,6 @@ PERFTEST(bitwise_and)
|
||||
ocl::bitwise_and(d_src1, d_src2, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::bitwise_and(d_src1, d_src2, d_dst);
|
||||
GPU_OFF;
|
||||
@ -991,8 +948,10 @@ PERFTEST(bitwise_and)
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
ocl::bitwise_and(d_src1, d_src2, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1001,7 +960,7 @@ PERFTEST(bitwise_and)
|
||||
///////////// bitwise_not////////////////////////
|
||||
PERFTEST(bitwise_not)
|
||||
{
|
||||
Mat src1, dst;
|
||||
Mat src1, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_32SC1};
|
||||
@ -1016,7 +975,6 @@ PERFTEST(bitwise_not)
|
||||
gen(src1, size, size, all_type[j], 0, 256);
|
||||
gen(dst, size, size, all_type[j], 0, 256);
|
||||
|
||||
|
||||
bitwise_not(src1, dst);
|
||||
|
||||
CPU_ON;
|
||||
@ -1028,11 +986,6 @@ PERFTEST(bitwise_not)
|
||||
ocl::bitwise_not(d_src1, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::bitwise_not(d_src1, d_dst);
|
||||
GPU_OFF;
|
||||
@ -1040,8 +993,10 @@ PERFTEST(bitwise_not)
|
||||
GPU_FULL_ON;
|
||||
d_src1.upload(src1);
|
||||
ocl::bitwise_not(d_src1, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1050,7 +1005,7 @@ PERFTEST(bitwise_not)
|
||||
///////////// compare////////////////////////
|
||||
PERFTEST(compare)
|
||||
{
|
||||
Mat src1, src2, dst;
|
||||
Mat src1, src2, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_src2, d_dst;
|
||||
|
||||
int CMP_EQ = 0;
|
||||
@ -1067,12 +1022,12 @@ PERFTEST(compare)
|
||||
gen(src2, size, size, all_type[j], 0, 256);
|
||||
gen(dst, size, size, all_type[j], 0, 256);
|
||||
|
||||
|
||||
compare(src1, src2, dst, CMP_EQ);
|
||||
|
||||
CPU_ON;
|
||||
compare(src1, src2, dst, CMP_EQ);
|
||||
CPU_OFF;
|
||||
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
|
||||
@ -1080,11 +1035,6 @@ PERFTEST(compare)
|
||||
ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 0.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
|
||||
GPU_OFF;
|
||||
@ -1093,8 +1043,10 @@ PERFTEST(compare)
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
ocl::compare(d_src1, d_src2, d_dst, CMP_EQ);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1103,7 +1055,7 @@ PERFTEST(compare)
|
||||
///////////// pow ////////////////////////
|
||||
PERFTEST(pow)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
int all_type[] = {CV_32FC1};
|
||||
@ -1129,11 +1081,6 @@ PERFTEST(pow)
|
||||
ocl::pow(d_src, -2.0, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::pow(d_src, -2.0, d_dst);
|
||||
GPU_OFF;
|
||||
@ -1141,8 +1088,10 @@ PERFTEST(pow)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::pow(d_src, -2.0, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1151,7 +1100,7 @@ PERFTEST(pow)
|
||||
///////////// MagnitudeSqr////////////////////////
|
||||
PERFTEST(MagnitudeSqr)
|
||||
{
|
||||
Mat src1, src2, dst;
|
||||
Mat src1, src2, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_src2, d_dst;
|
||||
|
||||
int all_type[] = {CV_32FC1};
|
||||
@ -1167,53 +1116,36 @@ PERFTEST(MagnitudeSqr)
|
||||
gen(src2, size, size, all_type[t], 0, 256);
|
||||
gen(dst, size, size, all_type[t], 0, 256);
|
||||
|
||||
|
||||
CPU_ON;
|
||||
for (int i = 0; i < src1.rows; ++i)
|
||||
|
||||
for (int j = 0; j < src1.cols; ++j)
|
||||
{
|
||||
float val1 = src1.at<float>(i, j);
|
||||
float val2 = src2.at<float>(i, j);
|
||||
|
||||
((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
|
||||
|
||||
}
|
||||
CPU_OFF;
|
||||
|
||||
CPU_ON;
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
|
||||
for (int i = 0; i < src1.rows; ++i)
|
||||
for (int j = 0; j < src1.cols; ++j)
|
||||
{
|
||||
float val1 = src1.at<float>(i, j);
|
||||
float val2 = src2.at<float>(i, j);
|
||||
WARMUP_ON;
|
||||
ocl::magnitudeSqr(d_src1, d_src2, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
|
||||
GPU_ON;
|
||||
ocl::magnitudeSqr(d_src1, d_src2, d_dst);
|
||||
GPU_OFF;
|
||||
|
||||
}
|
||||
GPU_FULL_ON;
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
ocl::magnitudeSqr(d_src1, d_src2, d_dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
CPU_OFF;
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
|
||||
WARMUP_ON;
|
||||
ocl::magnitudeSqr(d_src1, d_src2, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::magnitudeSqr(d_src1, d_src2, d_dst);
|
||||
GPU_OFF;
|
||||
|
||||
GPU_FULL_ON;
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
ocl::magnitudeSqr(d_src1, d_src2, d_dst);
|
||||
d_dst.download(dst);
|
||||
GPU_FULL_OFF;
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1222,7 +1154,7 @@ PERFTEST(MagnitudeSqr)
|
||||
///////////// AddWeighted////////////////////////
|
||||
PERFTEST(AddWeighted)
|
||||
{
|
||||
Mat src1, src2, dst;
|
||||
Mat src1, src2, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_src2, d_dst;
|
||||
|
||||
double alpha = 2.0, beta = 1.0, gama = 3.0;
|
||||
@ -1252,11 +1184,6 @@ PERFTEST(AddWeighted)
|
||||
ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat_dst;
|
||||
d_dst.download(ocl_mat_dst);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat_dst, dst, 1e-5));
|
||||
|
||||
GPU_ON;
|
||||
ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
|
||||
GPU_OFF;
|
||||
@ -1265,8 +1192,10 @@ PERFTEST(AddWeighted)
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -71,7 +71,7 @@ void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &we
|
||||
}
|
||||
PERFTEST(blend)
|
||||
{
|
||||
Mat src1, src2, weights1, weights2, dst;
|
||||
Mat src1, src2, weights1, weights2, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_8UC4};
|
||||
@ -103,10 +103,6 @@ PERFTEST(blend)
|
||||
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat;
|
||||
d_dst.download(ocl_mat);
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 1.f));
|
||||
|
||||
GPU_ON;
|
||||
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
|
||||
GPU_OFF;
|
||||
@ -117,8 +113,10 @@ PERFTEST(blend)
|
||||
d_weights1.upload(weights1);
|
||||
d_weights2.upload(weights2);
|
||||
ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.f);
|
||||
}
|
||||
}
|
||||
}
|
@ -88,9 +88,6 @@ PERFTEST(BruteForceMatcher)
|
||||
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
|
||||
WARMUP_OFF;
|
||||
|
||||
d_matcher.match(d_query, d_train, d_matches[0]);
|
||||
TestSystem::instance().setAccurate(AssertEQ<size_t>(d_matches[0].size(), matches[0].size()));
|
||||
|
||||
GPU_ON;
|
||||
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
|
||||
GPU_OFF;
|
||||
@ -98,9 +95,15 @@ PERFTEST(BruteForceMatcher)
|
||||
GPU_FULL_ON;
|
||||
d_query.upload(query);
|
||||
d_train.upload(train);
|
||||
d_matcher.match(d_query, d_train, matches[0]);
|
||||
d_matcher.match(d_query, d_train, d_matches[0]);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
int diff = abs((int)d_matches[0].size() - (int)matches[0].size());
|
||||
if(diff == 0)
|
||||
TestSystem::instance().setAccurate(1, 0);
|
||||
else
|
||||
TestSystem::instance().setAccurate(0, diff);
|
||||
|
||||
SUBTEST << size << "; knnMatch";
|
||||
|
||||
matcher.knnMatch(query, train, matches, 2);
|
||||
@ -123,7 +126,11 @@ PERFTEST(BruteForceMatcher)
|
||||
d_matcher.knnMatch(d_query, d_train, d_matches, 2);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(AssertEQ<size_t>(d_matches[0].size(), matches[0].size()));
|
||||
diff = abs((int)d_matches[0].size() - (int)matches[0].size());
|
||||
if(diff == 0)
|
||||
TestSystem::instance().setAccurate(1, 0);
|
||||
else
|
||||
TestSystem::instance().setAccurate(0, diff);
|
||||
|
||||
SUBTEST << size << "; radiusMatch";
|
||||
|
||||
@ -151,6 +158,10 @@ PERFTEST(BruteForceMatcher)
|
||||
d_matcher.radiusMatch(d_query, d_train, d_matches, max_distance);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(AssertEQ<size_t>(d_matches[0].size(), matches[0].size()));
|
||||
diff = abs((int)d_matches[0].size() - (int)matches[0].size());
|
||||
if(diff == 0)
|
||||
TestSystem::instance().setAccurate(1, 0);
|
||||
else
|
||||
TestSystem::instance().setAccurate(0, diff);
|
||||
}
|
||||
}
|
@ -57,7 +57,7 @@ PERFTEST(Canny)
|
||||
|
||||
SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1";
|
||||
|
||||
Mat edges(img.size(), CV_8UC1);
|
||||
Mat edges(img.size(), CV_8UC1), ocl_edges;
|
||||
|
||||
CPU_ON;
|
||||
Canny(img, edges, 50.0, 100.0);
|
||||
@ -71,8 +71,6 @@ PERFTEST(Canny)
|
||||
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExceptedMatSimilar(edges, d_edges, 2e-2));
|
||||
|
||||
GPU_ON;
|
||||
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
|
||||
GPU_OFF;
|
||||
@ -80,6 +78,8 @@ PERFTEST(Canny)
|
||||
GPU_FULL_ON;
|
||||
d_img.upload(img);
|
||||
ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
|
||||
d_edges.download(edges);
|
||||
d_edges.download(ocl_edges);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExceptedMatSimilar(edges, ocl_edges, 2e-2);
|
||||
}
|
@ -48,7 +48,7 @@
|
||||
///////////// cvtColor////////////////////////
|
||||
PERFTEST(cvtColor)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC4};
|
||||
@ -73,10 +73,6 @@ PERFTEST(cvtColor)
|
||||
ocl::cvtColor(d_src, d_dst, COLOR_RGBA2GRAY, 4);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat;
|
||||
d_dst.download(ocl_mat);
|
||||
TestSystem::instance().setAccurate(ExceptedMatSimilar(dst, ocl_mat, 1e-5));
|
||||
|
||||
GPU_ON;
|
||||
ocl::cvtColor(d_src, d_dst, COLOR_RGBA2GRAY, 4);
|
||||
GPU_OFF;
|
||||
@ -84,8 +80,10 @@ PERFTEST(cvtColor)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::cvtColor(d_src, d_dst, COLOR_RGBA2GRAY, 4);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExceptedMatSimilar(dst, ocl_dst, 1e-5);
|
||||
}
|
||||
|
||||
|
||||
|
@ -48,7 +48,7 @@
|
||||
///////////// columnSum////////////////////////
|
||||
PERFTEST(columnSum)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
|
||||
@ -63,23 +63,16 @@ PERFTEST(columnSum)
|
||||
dst.at<float>(0, j) = src.at<float>(0, j);
|
||||
|
||||
for (int i = 1; i < src.rows; ++i)
|
||||
{for (int j = 0; j < src.cols; ++j)
|
||||
{
|
||||
for (int j = 0; j < src.cols; ++j)
|
||||
dst.at<float>(i, j) = dst.at<float>(i - 1 , j) + src.at<float>(i , j);
|
||||
}
|
||||
}
|
||||
|
||||
CPU_OFF;
|
||||
|
||||
d_src.upload(src);
|
||||
|
||||
WARMUP_ON;
|
||||
ocl::columnSum(d_src, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat;
|
||||
d_dst.download(ocl_mat);
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 5e-1));
|
||||
|
||||
GPU_ON;
|
||||
ocl::columnSum(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
@ -87,7 +80,9 @@ PERFTEST(columnSum)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::columnSum(d_src, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1);
|
||||
}
|
||||
}
|
@ -48,7 +48,7 @@
|
||||
///////////// dft ////////////////////////
|
||||
PERFTEST(dft)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
int all_type[] = {CV_32FC2};
|
||||
@ -74,8 +74,6 @@ PERFTEST(dft)
|
||||
ocl::dft(d_src, d_dst, Size(size, size));
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), src.size().area() * 1e-4));
|
||||
|
||||
GPU_ON;
|
||||
ocl::dft(d_src, d_dst, Size(size, size));
|
||||
GPU_OFF;
|
||||
@ -83,8 +81,10 @@ PERFTEST(dft)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::dft(d_src, d_dst, Size(size, size));
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, src.size().area() * 1e-4);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -48,7 +48,7 @@
|
||||
///////////// Blur////////////////////////
|
||||
PERFTEST(Blur)
|
||||
{
|
||||
Mat src1, dst;
|
||||
Mat src1, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_dst;
|
||||
|
||||
Size ksize = Size(3, 3);
|
||||
@ -65,7 +65,6 @@ PERFTEST(Blur)
|
||||
gen(src1, size, size, all_type[j], 0, 256);
|
||||
gen(dst, size, size, all_type[j], 0, 256);
|
||||
|
||||
|
||||
blur(src1, dst, ksize, Point(-1, -1), bordertype);
|
||||
|
||||
CPU_ON;
|
||||
@ -78,8 +77,6 @@ PERFTEST(Blur)
|
||||
ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
|
||||
GPU_OFF;
|
||||
@ -87,8 +84,10 @@ PERFTEST(Blur)
|
||||
GPU_FULL_ON;
|
||||
d_src1.upload(src1);
|
||||
ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -96,7 +95,7 @@ PERFTEST(Blur)
|
||||
///////////// Laplacian////////////////////////
|
||||
PERFTEST(Laplacian)
|
||||
{
|
||||
Mat src1, dst;
|
||||
Mat src1, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_dst;
|
||||
|
||||
int ksize = 3;
|
||||
@ -112,7 +111,6 @@ PERFTEST(Laplacian)
|
||||
gen(src1, size, size, all_type[j], 0, 256);
|
||||
gen(dst, size, size, all_type[j], 0, 256);
|
||||
|
||||
|
||||
Laplacian(src1, dst, -1, ksize, 1);
|
||||
|
||||
CPU_ON;
|
||||
@ -125,8 +123,6 @@ PERFTEST(Laplacian)
|
||||
ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5));
|
||||
|
||||
GPU_ON;
|
||||
ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
|
||||
GPU_OFF;
|
||||
@ -134,8 +130,10 @@ PERFTEST(Laplacian)
|
||||
GPU_FULL_ON;
|
||||
d_src1.upload(src1);
|
||||
ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
|
||||
}
|
||||
|
||||
}
|
||||
@ -144,7 +142,7 @@ PERFTEST(Laplacian)
|
||||
///////////// Erode ////////////////////
|
||||
PERFTEST(Erode)
|
||||
{
|
||||
Mat src, dst, ker;
|
||||
Mat src, dst, ker, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
|
||||
@ -171,8 +169,6 @@ PERFTEST(Erode)
|
||||
ocl::erode(d_src, d_dst, ker);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5));
|
||||
|
||||
GPU_ON;
|
||||
ocl::erode(d_src, d_dst, ker);
|
||||
GPU_OFF;
|
||||
@ -180,8 +176,10 @@ PERFTEST(Erode)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::erode(d_src, d_dst, ker);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
|
||||
}
|
||||
|
||||
}
|
||||
@ -190,7 +188,7 @@ PERFTEST(Erode)
|
||||
///////////// Sobel ////////////////////////
|
||||
PERFTEST(Sobel)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
int dx = 1;
|
||||
@ -218,8 +216,6 @@ PERFTEST(Sobel)
|
||||
ocl::Sobel(d_src, d_dst, -1, dx, dy);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1));
|
||||
|
||||
GPU_ON;
|
||||
ocl::Sobel(d_src, d_dst, -1, dx, dy);
|
||||
GPU_OFF;
|
||||
@ -227,8 +223,10 @@ PERFTEST(Sobel)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::Sobel(d_src, d_dst, -1, dx, dy);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
|
||||
}
|
||||
|
||||
}
|
||||
@ -236,7 +234,7 @@ PERFTEST(Sobel)
|
||||
///////////// Scharr ////////////////////////
|
||||
PERFTEST(Scharr)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
int dx = 1;
|
||||
@ -264,8 +262,6 @@ PERFTEST(Scharr)
|
||||
ocl::Scharr(d_src, d_dst, -1, dx, dy);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1));
|
||||
|
||||
GPU_ON;
|
||||
ocl::Scharr(d_src, d_dst, -1, dx, dy);
|
||||
GPU_OFF;
|
||||
@ -273,8 +269,10 @@ PERFTEST(Scharr)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::Scharr(d_src, d_dst, -1, dx, dy);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1);
|
||||
}
|
||||
|
||||
}
|
||||
@ -283,7 +281,7 @@ PERFTEST(Scharr)
|
||||
///////////// GaussianBlur ////////////////////////
|
||||
PERFTEST(GaussianBlur)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
|
||||
std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
|
||||
|
||||
@ -311,9 +309,6 @@ PERFTEST(GaussianBlur)
|
||||
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1.0));
|
||||
|
||||
|
||||
GPU_ON;
|
||||
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
|
||||
GPU_OFF;
|
||||
@ -321,8 +316,10 @@ PERFTEST(GaussianBlur)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -349,7 +346,7 @@ PERFTEST(filter2D)
|
||||
Mat kernel;
|
||||
gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0);
|
||||
|
||||
Mat dst(src);
|
||||
Mat dst, ocl_dst;
|
||||
dst.setTo(0);
|
||||
cv::filter2D(src, dst, -1, kernel);
|
||||
|
||||
@ -357,17 +354,12 @@ PERFTEST(filter2D)
|
||||
cv::filter2D(src, dst, -1, kernel);
|
||||
CPU_OFF;
|
||||
|
||||
ocl::oclMat d_src(src);
|
||||
ocl::oclMat d_dst(d_src);
|
||||
d_dst.setTo(0);
|
||||
ocl::oclMat d_src(src), d_dst;
|
||||
|
||||
WARMUP_ON;
|
||||
ocl::filter2D(d_src, d_dst, -1, kernel);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5));
|
||||
|
||||
|
||||
GPU_ON;
|
||||
ocl::filter2D(d_src, d_dst, -1, kernel);
|
||||
GPU_OFF;
|
||||
@ -375,8 +367,10 @@ PERFTEST(filter2D)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::filter2D(d_src, d_dst, -1, kernel);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -48,7 +48,7 @@
|
||||
///////////// gemm ////////////////////////
|
||||
PERFTEST(gemm)
|
||||
{
|
||||
Mat src1, src2, src3, dst;
|
||||
Mat src1, src2, src3, dst, ocl_dst;
|
||||
ocl::oclMat d_src1, d_src2, d_src3, d_dst;
|
||||
|
||||
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
|
||||
@ -72,7 +72,6 @@ PERFTEST(gemm)
|
||||
WARMUP_ON;
|
||||
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
|
||||
WARMUP_OFF;
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, src1.cols * src1.rows * 1e-4));
|
||||
|
||||
GPU_ON;
|
||||
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
|
||||
@ -83,7 +82,9 @@ PERFTEST(gemm)
|
||||
d_src2.upload(src2);
|
||||
d_src3.upload(src3);
|
||||
ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(ocl_dst, dst, src1.cols * src1.rows * 1e-4);
|
||||
}
|
||||
}
|
@ -125,8 +125,10 @@ PERFTEST(Haar)
|
||||
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
|
||||
WARMUP_OFF;
|
||||
|
||||
//Testing whether the expected is equal to the actual.
|
||||
TestSystem::instance().setAccurate(ExpectedEQ<vector<Rect>::size_type, vector<Rect>::size_type>(faces.size(), oclfaces.size()));
|
||||
if(faces.size() == oclfaces.size())
|
||||
TestSystem::instance().setAccurate(1, 0);
|
||||
else
|
||||
TestSystem::instance().setAccurate(0, abs((int)faces.size() - (int)oclfaces.size()));
|
||||
|
||||
faces.clear();
|
||||
|
||||
|
@ -146,10 +146,8 @@ PERFTEST(HOG)
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat ocl_mat;
|
||||
ocl_mat = cv::Mat(d_comp);
|
||||
ocl_mat.convertTo(ocl_mat, cv::Mat(comp).type());
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat, cv::Mat(comp), 3));
|
||||
cv::Mat gpu_rst(d_comp), cpu_rst(comp);
|
||||
TestSystem::instance().ExpectedMatNear(gpu_rst, cpu_rst, 3);
|
||||
|
||||
GPU_ON;
|
||||
ocl_hog.detectMultiScale(d_src, found_locations);
|
||||
|
@ -48,7 +48,7 @@
|
||||
///////////// equalizeHist ////////////////////////
|
||||
PERFTEST(equalizeHist)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
int all_type[] = {CV_8UC1};
|
||||
std::string type_name[] = {"CV_8UC1"};
|
||||
|
||||
@ -75,9 +75,6 @@ PERFTEST(equalizeHist)
|
||||
ocl::equalizeHist(d_src, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.1));
|
||||
|
||||
|
||||
GPU_ON;
|
||||
ocl::equalizeHist(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
@ -85,8 +82,10 @@ PERFTEST(equalizeHist)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::equalizeHist(d_src, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.1);
|
||||
}
|
||||
|
||||
}
|
||||
@ -94,7 +93,7 @@ PERFTEST(equalizeHist)
|
||||
/////////// CopyMakeBorder //////////////////////
|
||||
PERFTEST(CopyMakeBorder)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_dst;
|
||||
|
||||
int bordertype = BORDER_CONSTANT;
|
||||
@ -122,9 +121,6 @@ PERFTEST(CopyMakeBorder)
|
||||
ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0));
|
||||
|
||||
|
||||
GPU_ON;
|
||||
ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
|
||||
GPU_OFF;
|
||||
@ -132,8 +128,10 @@ PERFTEST(CopyMakeBorder)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -141,7 +139,7 @@ PERFTEST(CopyMakeBorder)
|
||||
///////////// cornerMinEigenVal ////////////////////////
|
||||
PERFTEST(cornerMinEigenVal)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_dst;
|
||||
|
||||
int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4);
|
||||
@ -155,7 +153,6 @@ PERFTEST(cornerMinEigenVal)
|
||||
{
|
||||
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
|
||||
|
||||
|
||||
gen(src, size, size, all_type[j], 0, 256);
|
||||
|
||||
cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType);
|
||||
@ -170,9 +167,6 @@ PERFTEST(cornerMinEigenVal)
|
||||
ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
|
||||
|
||||
|
||||
GPU_ON;
|
||||
ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
|
||||
GPU_OFF;
|
||||
@ -180,8 +174,10 @@ PERFTEST(cornerMinEigenVal)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -189,7 +185,7 @@ PERFTEST(cornerMinEigenVal)
|
||||
///////////// cornerHarris ////////////////////////
|
||||
PERFTEST(cornerHarris)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_32FC1};
|
||||
@ -215,8 +211,6 @@ PERFTEST(cornerHarris)
|
||||
ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
|
||||
GPU_OFF;
|
||||
@ -224,8 +218,10 @@ PERFTEST(cornerHarris)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
|
||||
}
|
||||
|
||||
|
||||
@ -234,7 +230,7 @@ PERFTEST(cornerHarris)
|
||||
///////////// integral ////////////////////////
|
||||
PERFTEST(integral)
|
||||
{
|
||||
Mat src, sum;
|
||||
Mat src, sum, ocl_sum;
|
||||
ocl::oclMat d_src, d_sum, d_buf;
|
||||
|
||||
int all_type[] = {CV_8UC1};
|
||||
@ -260,12 +256,6 @@ PERFTEST(integral)
|
||||
ocl::integral(d_src, d_sum);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat;
|
||||
d_sum.download(ocl_mat);
|
||||
if(sum.type() == ocl_mat.type()) //we won't test accuracy when cpu function overlow
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(sum, ocl_mat, 0.0));
|
||||
|
||||
|
||||
GPU_ON;
|
||||
ocl::integral(d_src, d_sum);
|
||||
GPU_OFF;
|
||||
@ -273,8 +263,12 @@ PERFTEST(integral)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::integral(d_src, d_sum);
|
||||
d_sum.download(sum);
|
||||
d_sum.download(ocl_sum);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
if(sum.type() == ocl_sum.type()) //we won't test accuracy when cpu function overlow
|
||||
TestSystem::instance().ExpectedMatNear(sum, ocl_sum, 0.0);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -282,7 +276,7 @@ PERFTEST(integral)
|
||||
///////////// WarpAffine ////////////////////////
|
||||
PERFTEST(WarpAffine)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
static const double coeffs[2][3] =
|
||||
@ -319,8 +313,6 @@ PERFTEST(WarpAffine)
|
||||
ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
|
||||
GPU_OFF;
|
||||
@ -328,8 +320,10 @@ PERFTEST(WarpAffine)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -337,7 +331,7 @@ PERFTEST(WarpAffine)
|
||||
///////////// WarpPerspective ////////////////////////
|
||||
PERFTEST(WarpPerspective)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
static const double coeffs[3][3] =
|
||||
@ -374,8 +368,6 @@ PERFTEST(WarpPerspective)
|
||||
ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
|
||||
GPU_OFF;
|
||||
@ -383,8 +375,10 @@ PERFTEST(WarpPerspective)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -393,7 +387,7 @@ PERFTEST(WarpPerspective)
|
||||
///////////// resize ////////////////////////
|
||||
PERFTEST(resize)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
|
||||
@ -420,9 +414,6 @@ PERFTEST(resize)
|
||||
ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
|
||||
|
||||
|
||||
GPU_ON;
|
||||
ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
|
||||
GPU_OFF;
|
||||
@ -430,8 +421,10 @@ PERFTEST(resize)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -456,8 +449,6 @@ PERFTEST(resize)
|
||||
ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
|
||||
GPU_OFF;
|
||||
@ -465,8 +456,10 @@ PERFTEST(resize)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -474,10 +467,9 @@ PERFTEST(resize)
|
||||
///////////// threshold////////////////////////
|
||||
PERFTEST(threshold)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
|
||||
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
|
||||
{
|
||||
SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY";
|
||||
@ -496,9 +488,6 @@ PERFTEST(threshold)
|
||||
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
|
||||
|
||||
|
||||
GPU_ON;
|
||||
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
|
||||
GPU_OFF;
|
||||
@ -506,9 +495,10 @@ PERFTEST(threshold)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
|
||||
}
|
||||
|
||||
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
|
||||
@ -529,8 +519,6 @@ PERFTEST(threshold)
|
||||
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
|
||||
GPU_OFF;
|
||||
@ -538,8 +526,10 @@ PERFTEST(threshold)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0);
|
||||
}
|
||||
}
|
||||
///////////// meanShiftFiltering////////////////////////
|
||||
@ -726,7 +716,7 @@ void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::T
|
||||
PERFTEST(meanShiftFiltering)
|
||||
{
|
||||
int sp = 5, sr = 6;
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
@ -753,11 +743,6 @@ PERFTEST(meanShiftFiltering)
|
||||
ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit);
|
||||
WARMUP_OFF;
|
||||
|
||||
cv::Mat ocl_mat;
|
||||
d_dst.download(ocl_mat);
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 0.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
|
||||
GPU_OFF;
|
||||
@ -765,8 +750,10 @@ PERFTEST(meanShiftFiltering)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
|
||||
}
|
||||
}
|
||||
///////////// meanShiftProc////////////////////////
|
||||
@ -1010,8 +997,9 @@ void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp,
|
||||
}
|
||||
PERFTEST(meanShiftProc)
|
||||
{
|
||||
Mat src, dst, dstCoor_roi;
|
||||
ocl::oclMat d_src, d_dst, d_dstCoor_roi;
|
||||
Mat src;
|
||||
vector<Mat> dst(2), ocl_dst(2);
|
||||
ocl::oclMat d_src, d_dst, d_dstCoor;
|
||||
|
||||
TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1);
|
||||
|
||||
@ -1020,42 +1008,41 @@ PERFTEST(meanShiftProc)
|
||||
SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 ";
|
||||
|
||||
gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
|
||||
gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
|
||||
gen(dstCoor_roi, size, size, CV_16SC2, Scalar::all(0), Scalar::all(256));
|
||||
gen(dst[0], size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
|
||||
gen(dst[1], size, size, CV_16SC2, Scalar::all(0), Scalar::all(256));
|
||||
|
||||
meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit);
|
||||
meanShiftProc_(src, dst[0], dst[1], 5, 6, crit);
|
||||
|
||||
CPU_ON;
|
||||
meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit);
|
||||
meanShiftProc_(src, dst[0], dst[1], 5, 6, crit);
|
||||
CPU_OFF;
|
||||
|
||||
d_src.upload(src);
|
||||
|
||||
WARMUP_ON;
|
||||
ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
|
||||
ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dstCoor_roi, cv::Mat(d_dstCoor_roi), 0.0)
|
||||
&&ExpectedMatNear(dst, cv::Mat(d_dst), 0.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
|
||||
ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
|
||||
GPU_OFF;
|
||||
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
|
||||
d_dst.download(dst);
|
||||
d_dstCoor_roi.download(dstCoor_roi);
|
||||
ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit);
|
||||
d_dst.download(ocl_dst[0]);
|
||||
d_dstCoor.download(ocl_dst[1]);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
vector<double> eps(2, 0.);
|
||||
TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps);
|
||||
}
|
||||
}
|
||||
|
||||
///////////// remap////////////////////////
|
||||
PERFTEST(remap)
|
||||
{
|
||||
Mat src, dst, xmap, ymap;
|
||||
Mat src, dst, xmap, ymap, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst, d_xmap, d_ymap;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_8UC4};
|
||||
@ -1088,7 +1075,6 @@ PERFTEST(remap)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
remap(src, dst, xmap, ymap, interpolation, borderMode);
|
||||
|
||||
CPU_ON;
|
||||
@ -1104,12 +1090,6 @@ PERFTEST(remap)
|
||||
ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
|
||||
WARMUP_OFF;
|
||||
|
||||
if(interpolation == 0)
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
|
||||
else
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 2.0));
|
||||
|
||||
|
||||
GPU_ON;
|
||||
ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
|
||||
GPU_OFF;
|
||||
@ -1117,8 +1097,10 @@ PERFTEST(remap)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 2.0);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -56,11 +56,9 @@
|
||||
PERFTEST(matchTemplate)
|
||||
{
|
||||
//InitMatchTemplate();
|
||||
|
||||
Mat src, templ, dst;
|
||||
Mat src, templ, dst, ocl_dst;
|
||||
int templ_size = 5;
|
||||
|
||||
|
||||
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
|
||||
{
|
||||
int all_type[] = {CV_32FC1, CV_32FC4};
|
||||
@ -82,16 +80,12 @@ PERFTEST(matchTemplate)
|
||||
matchTemplate(src, templ, dst, TM_CCORR);
|
||||
CPU_OFF;
|
||||
|
||||
ocl::oclMat d_src(src), d_templ, d_dst;
|
||||
|
||||
d_templ.upload(templ);
|
||||
ocl::oclMat d_src(src), d_templ(templ), d_dst;
|
||||
|
||||
WARMUP_ON;
|
||||
ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), templ.rows * templ.cols * 1e-1));
|
||||
|
||||
GPU_ON;
|
||||
ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR);
|
||||
GPU_OFF;
|
||||
@ -100,8 +94,10 @@ PERFTEST(matchTemplate)
|
||||
d_src.upload(src);
|
||||
d_templ.upload(templ);
|
||||
ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,8 +127,6 @@ PERFTEST(matchTemplate)
|
||||
ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR_NORMED);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), templ.rows * templ.cols * 1e-1));
|
||||
|
||||
GPU_ON;
|
||||
ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR_NORMED);
|
||||
GPU_OFF;
|
||||
@ -141,8 +135,10 @@ PERFTEST(matchTemplate)
|
||||
d_src.upload(src);
|
||||
d_templ.upload(templ);
|
||||
ocl::matchTemplate(d_src, d_templ, d_dst, TM_CCORR_NORMED);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -48,7 +48,7 @@
|
||||
///////////// ConvertTo////////////////////////
|
||||
PERFTEST(ConvertTo)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_8UC4};
|
||||
@ -77,9 +77,6 @@ PERFTEST(ConvertTo)
|
||||
d_src.convertTo(d_dst, CV_32FC1);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0));
|
||||
|
||||
|
||||
GPU_ON;
|
||||
d_src.convertTo(d_dst, CV_32FC1);
|
||||
GPU_OFF;
|
||||
@ -87,8 +84,10 @@ PERFTEST(ConvertTo)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
d_src.convertTo(d_dst, CV_32FC1);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -96,7 +95,7 @@ PERFTEST(ConvertTo)
|
||||
///////////// copyTo////////////////////////
|
||||
PERFTEST(copyTo)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
ocl::oclMat d_src, d_dst;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_8UC4};
|
||||
@ -125,9 +124,6 @@ PERFTEST(copyTo)
|
||||
d_src.copyTo(d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0));
|
||||
|
||||
|
||||
GPU_ON;
|
||||
d_src.copyTo(d_dst);
|
||||
GPU_OFF;
|
||||
@ -135,8 +131,10 @@ PERFTEST(copyTo)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
d_src.copyTo(d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -144,9 +142,9 @@ PERFTEST(copyTo)
|
||||
///////////// setTo////////////////////////
|
||||
PERFTEST(setTo)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, ocl_src;
|
||||
Scalar val(1, 2, 3, 4);
|
||||
ocl::oclMat d_src, d_dst;
|
||||
ocl::oclMat d_src;
|
||||
|
||||
int all_type[] = {CV_8UC1, CV_8UC4};
|
||||
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
|
||||
@ -171,10 +169,10 @@ PERFTEST(setTo)
|
||||
d_src.setTo(val);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(src, cv::Mat(d_src), 1.0));
|
||||
d_src.download(ocl_src);
|
||||
TestSystem::instance().ExpectedMatNear(src, ocl_src, 1.0);
|
||||
|
||||
|
||||
GPU_ON;
|
||||
GPU_ON;;
|
||||
d_src.setTo(val);
|
||||
GPU_OFF;
|
||||
|
||||
|
@ -48,39 +48,40 @@
|
||||
///////////// norm////////////////////////
|
||||
PERFTEST(norm)
|
||||
{
|
||||
Mat src, buf;
|
||||
ocl::oclMat d_src, d_buf;
|
||||
|
||||
Mat src1, src2, ocl_src1;
|
||||
ocl::oclMat d_src1, d_src2;
|
||||
|
||||
for (int size = Min_Size; size <= Max_Size; size *= Multiple)
|
||||
{
|
||||
SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF";
|
||||
|
||||
gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
|
||||
gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
|
||||
gen(src1, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
|
||||
gen(src2, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1));
|
||||
|
||||
norm(src, NORM_INF);
|
||||
norm(src1, src2, NORM_INF);
|
||||
|
||||
CPU_ON;
|
||||
norm(src, NORM_INF);
|
||||
norm(src1, src2, NORM_INF);
|
||||
CPU_OFF;
|
||||
|
||||
d_src.upload(src);
|
||||
d_buf.upload(buf);
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
|
||||
WARMUP_ON;
|
||||
ocl::norm(d_src, d_buf, NORM_INF);
|
||||
ocl::norm(d_src1, d_src2, NORM_INF);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(src, cv::Mat(d_buf), .5));
|
||||
d_src1.download(ocl_src1);
|
||||
TestSystem::instance().ExpectedMatNear(src1, ocl_src1, .5);
|
||||
|
||||
GPU_ON;
|
||||
ocl::norm(d_src, d_buf, NORM_INF);
|
||||
ocl::norm(d_src1, d_src2, NORM_INF);
|
||||
GPU_OFF;
|
||||
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::norm(d_src, d_buf, NORM_INF);
|
||||
d_src1.upload(src1);
|
||||
d_src2.upload(src2);
|
||||
ocl::norm(d_src1, d_src2, NORM_INF);
|
||||
GPU_FULL_OFF;
|
||||
}
|
||||
}
|
@ -82,8 +82,8 @@ PERFTEST(PyrLKOpticalFlow)
|
||||
SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points";
|
||||
else
|
||||
SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points";
|
||||
Mat nextPts_cpu;
|
||||
Mat status_cpu;
|
||||
Mat ocl_nextPts;
|
||||
Mat ocl_status;
|
||||
|
||||
vector<Point2f> pts;
|
||||
goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0);
|
||||
@ -116,12 +116,6 @@ PERFTEST(PyrLKOpticalFlow)
|
||||
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
|
||||
WARMUP_OFF;
|
||||
|
||||
std::vector<cv::Point2f> ocl_nextPts(d_nextPts.cols);
|
||||
std::vector<unsigned char> ocl_status(d_status.cols);
|
||||
TestSystem::instance().setAccurate(AssertEQ<size_t>(nextPts.size(), ocl_nextPts.size()));
|
||||
TestSystem::instance().setAccurate(AssertEQ<size_t>(status.size(), ocl_status.size()));
|
||||
|
||||
|
||||
GPU_ON;
|
||||
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
|
||||
GPU_OFF;
|
||||
@ -133,17 +127,102 @@ PERFTEST(PyrLKOpticalFlow)
|
||||
d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
|
||||
|
||||
if (!d_nextPts.empty())
|
||||
{
|
||||
d_nextPts.download(nextPts_cpu);
|
||||
}
|
||||
d_nextPts.download(ocl_nextPts);
|
||||
|
||||
if (!d_status.empty())
|
||||
{
|
||||
d_status.download(status_cpu);
|
||||
}
|
||||
|
||||
d_status.download(ocl_status);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
size_t mismatch = 0;
|
||||
for (int i = 0; i < (int)nextPts.size(); ++i)
|
||||
{
|
||||
if(status[i] != ocl_status.at<unsigned char>(0, i)){
|
||||
mismatch++;
|
||||
continue;
|
||||
}
|
||||
if(status[i]){
|
||||
Point2f gpu_rst = ocl_nextPts.at<Point2f>(0, i);
|
||||
Point2f cpu_rst = nextPts[i];
|
||||
if(fabs(gpu_rst.x - cpu_rst.x) >= 1. || fabs(gpu_rst.y - cpu_rst.y) >= 1.)
|
||||
mismatch++;
|
||||
}
|
||||
}
|
||||
double ratio = (double)mismatch / (double)nextPts.size();
|
||||
if(ratio < .02)
|
||||
TestSystem::instance().setAccurate(1, ratio);
|
||||
else
|
||||
TestSystem::instance().setAccurate(0, ratio);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
PERFTEST(tvl1flow)
|
||||
{
|
||||
cv::Mat frame0 = imread("rubberwhale1.png", cv::IMREAD_GRAYSCALE);
|
||||
assert(!frame0.empty());
|
||||
|
||||
cv::Mat frame1 = imread("rubberwhale2.png", cv::IMREAD_GRAYSCALE);
|
||||
assert(!frame1.empty());
|
||||
|
||||
cv::ocl::OpticalFlowDual_TVL1_OCL d_alg;
|
||||
cv::ocl::oclMat d_flowx(frame0.size(), CV_32FC1);
|
||||
cv::ocl::oclMat d_flowy(frame1.size(), CV_32FC1);
|
||||
|
||||
cv::Ptr<cv::DenseOpticalFlow> alg = cv::createOptFlow_DualTVL1();
|
||||
cv::Mat flow;
|
||||
|
||||
|
||||
SUBTEST << frame0.cols << 'x' << frame0.rows << "; rubberwhale1.png; "<<frame1.cols<<'x'<<frame1.rows<<"; rubberwhale2.png";
|
||||
|
||||
alg->calc(frame0, frame1, flow);
|
||||
|
||||
CPU_ON;
|
||||
alg->calc(frame0, frame1, flow);
|
||||
CPU_OFF;
|
||||
|
||||
cv::Mat gold[2];
|
||||
cv::split(flow, gold);
|
||||
|
||||
cv::ocl::oclMat d0(frame0.size(), CV_32FC1);
|
||||
d0.upload(frame0);
|
||||
cv::ocl::oclMat d1(frame1.size(), CV_32FC1);
|
||||
d1.upload(frame1);
|
||||
|
||||
WARMUP_ON;
|
||||
d_alg(d0, d1, d_flowx, d_flowy);
|
||||
WARMUP_OFF;
|
||||
/*
|
||||
double diff1 = 0.0, diff2 = 0.0;
|
||||
if(ExceptedMatSimilar(gold[0], cv::Mat(d_flowx), 3e-3, diff1) == 1
|
||||
&&ExceptedMatSimilar(gold[1], cv::Mat(d_flowy), 3e-3, diff2) == 1)
|
||||
TestSystem::instance().setAccurate(1);
|
||||
else
|
||||
TestSystem::instance().setAccurate(0);
|
||||
|
||||
TestSystem::instance().setDiff(diff1);
|
||||
TestSystem::instance().setDiff(diff2);
|
||||
*/
|
||||
|
||||
|
||||
GPU_ON;
|
||||
d_alg(d0, d1, d_flowx, d_flowy);
|
||||
d_alg.collectGarbage();
|
||||
GPU_OFF;
|
||||
|
||||
|
||||
cv::Mat flowx, flowy;
|
||||
|
||||
GPU_FULL_ON;
|
||||
d0.upload(frame0);
|
||||
d1.upload(frame1);
|
||||
d_alg(d0, d1, d_flowx, d_flowy);
|
||||
d_alg.collectGarbage();
|
||||
d_flowx.download(flowx);
|
||||
d_flowy.download(flowy);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExceptedMatSimilar(gold[0], flowx, 3e-3);
|
||||
TestSystem::instance().ExceptedMatSimilar(gold[1], flowy, 3e-3);
|
||||
}
|
@ -48,7 +48,7 @@
|
||||
///////////// pyrDown //////////////////////
|
||||
PERFTEST(pyrDown)
|
||||
{
|
||||
Mat src, dst;
|
||||
Mat src, dst, ocl_dst;
|
||||
int all_type[] = {CV_8UC1, CV_8UC4};
|
||||
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
|
||||
|
||||
@ -73,9 +73,6 @@ PERFTEST(pyrDown)
|
||||
ocl::pyrDown(d_src, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), dst.depth() == CV_32F ? 1e-4f : 1.0f));
|
||||
|
||||
|
||||
GPU_ON;
|
||||
ocl::pyrDown(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
@ -83,8 +80,53 @@ PERFTEST(pyrDown)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::pyrDown(d_src, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, dst.depth() == CV_32F ? 1e-4f : 1.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////// pyrUp ////////////////////////
|
||||
PERFTEST(pyrUp)
|
||||
{
|
||||
Mat src, dst, ocl_dst;
|
||||
int all_type[] = {CV_8UC1, CV_8UC4};
|
||||
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
|
||||
|
||||
for (int size = 500; size <= 2000; size *= 2)
|
||||
{
|
||||
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
|
||||
{
|
||||
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
|
||||
|
||||
gen(src, size, size, all_type[j], 0, 256);
|
||||
|
||||
pyrUp(src, dst);
|
||||
|
||||
CPU_ON;
|
||||
pyrUp(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
ocl::oclMat d_src(src);
|
||||
ocl::oclMat d_dst;
|
||||
|
||||
WARMUP_ON;
|
||||
ocl::pyrUp(d_src, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
GPU_ON;
|
||||
ocl::pyrUp(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::pyrUp(d_src, d_dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, (src.depth() == CV_32F ? 1e-4f : 1.0));
|
||||
}
|
||||
}
|
||||
}
|
@ -1,89 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Fangfang Bai, fangfang@multicorewareinc.com
|
||||
// Jin Ma, jin@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include "precomp.hpp"
|
||||
|
||||
///////////// pyrUp ////////////////////////
|
||||
PERFTEST(pyrUp)
|
||||
{
|
||||
Mat src, dst;
|
||||
int all_type[] = {CV_8UC1, CV_8UC4};
|
||||
std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
|
||||
|
||||
for (int size = 500; size <= 2000; size *= 2)
|
||||
{
|
||||
for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++)
|
||||
{
|
||||
SUBTEST << size << 'x' << size << "; " << type_name[j] ;
|
||||
|
||||
gen(src, size, size, all_type[j], 0, 256);
|
||||
|
||||
pyrUp(src, dst);
|
||||
|
||||
CPU_ON;
|
||||
pyrUp(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
ocl::oclMat d_src(src);
|
||||
ocl::oclMat d_dst;
|
||||
|
||||
WARMUP_ON;
|
||||
ocl::pyrUp(d_src, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), (src.depth() == CV_32F ? 1e-4f : 1.0)));
|
||||
|
||||
GPU_ON;
|
||||
ocl::pyrUp(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::pyrUp(d_src, d_dst);
|
||||
d_dst.download(dst);
|
||||
GPU_FULL_OFF;
|
||||
}
|
||||
}
|
||||
}
|
@ -48,7 +48,7 @@
|
||||
///////////// Merge////////////////////////
|
||||
PERFTEST(Merge)
|
||||
{
|
||||
Mat dst;
|
||||
Mat dst, ocl_dst;
|
||||
ocl::oclMat d_dst;
|
||||
|
||||
int channels = 4;
|
||||
@ -85,22 +85,20 @@ PERFTEST(Merge)
|
||||
ocl::merge(d_src, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(dst), cv::Mat(d_dst), 0.0));
|
||||
|
||||
GPU_ON;
|
||||
ocl::merge(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
|
||||
GPU_FULL_ON;
|
||||
|
||||
for (int i = 0; i < channels; ++i)
|
||||
{
|
||||
d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i));
|
||||
d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i));
|
||||
}
|
||||
|
||||
ocl::merge(d_src, d_dst);
|
||||
d_dst.download(dst);
|
||||
d_dst.download(ocl_dst);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0);
|
||||
}
|
||||
|
||||
}
|
||||
@ -122,7 +120,7 @@ PERFTEST(Split)
|
||||
|
||||
Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4));
|
||||
|
||||
std::vector<cv::Mat> dst;
|
||||
std::vector<cv::Mat> dst, ocl_dst(4);
|
||||
|
||||
split(src, dst);
|
||||
|
||||
@ -135,22 +133,7 @@ PERFTEST(Split)
|
||||
|
||||
WARMUP_ON;
|
||||
ocl::split(d_src, d_dst);
|
||||
WARMUP_OFF;
|
||||
|
||||
if(d_dst.size() == dst.size())
|
||||
{
|
||||
TestSystem::instance().setAccurate(1);
|
||||
for(size_t i = 0; i < dst.size(); i++)
|
||||
{
|
||||
if(ExpectedMatNear(dst[i], cv::Mat(d_dst[i]), 0.0) == 0)
|
||||
{
|
||||
TestSystem::instance().setAccurate(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}else
|
||||
TestSystem::instance().setAccurate(0);
|
||||
|
||||
WARMUP_OFF;
|
||||
|
||||
GPU_ON;
|
||||
ocl::split(d_src, d_dst);
|
||||
@ -159,7 +142,12 @@ PERFTEST(Split)
|
||||
GPU_FULL_ON;
|
||||
d_src.upload(src);
|
||||
ocl::split(d_src, d_dst);
|
||||
for(size_t i = 0; i < dst.size(); i++)
|
||||
d_dst[i].download(ocl_dst[i]);
|
||||
GPU_FULL_OFF;
|
||||
|
||||
vector<double> eps(4, 0.);
|
||||
TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -114,7 +114,6 @@ void TestSystem::finishCurrentSubtest()
|
||||
return;
|
||||
}
|
||||
|
||||
int is_accurate = is_accurate_;
|
||||
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
|
||||
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
|
||||
double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0;
|
||||
@ -171,8 +170,8 @@ void TestSystem::finishCurrentSubtest()
|
||||
deviation = std::sqrt(sum / gpu_times_.size());
|
||||
}
|
||||
|
||||
printMetrics(is_accurate, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
|
||||
writeMetrics(is_accurate, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation);
|
||||
printMetrics(is_accurate_, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
|
||||
writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation);
|
||||
|
||||
num_subtests_called_++;
|
||||
resetCurrentSubtest();
|
||||
@ -219,7 +218,7 @@ void TestSystem::writeHeading()
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(record_, "NAME,DESCRIPTION,ACCURACY,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
|
||||
fprintf(record_, "NAME,DESCRIPTION,ACCURACY,DIFFERENCE,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
|
||||
|
||||
fflush(record_);
|
||||
}
|
||||
@ -392,7 +391,7 @@ void TestSystem::printMetrics(int is_accurate, double cpu_time, double gpu_time,
|
||||
#endif
|
||||
}
|
||||
|
||||
void TestSystem::writeMetrics(int is_accurate, double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev)
|
||||
void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev)
|
||||
{
|
||||
if (!record_)
|
||||
{
|
||||
@ -402,21 +401,24 @@ void TestSystem::writeMetrics(int is_accurate, double cpu_time, double gpu_time,
|
||||
|
||||
string _is_accurate_;
|
||||
|
||||
if(is_accurate == 1)
|
||||
if(is_accurate_ == 1)
|
||||
_is_accurate_ = "Pass";
|
||||
else if(is_accurate == 0)
|
||||
else if(is_accurate_ == 0)
|
||||
_is_accurate_ = "Fail";
|
||||
else if(is_accurate == -1)
|
||||
else if(is_accurate_ == -1)
|
||||
_is_accurate_ = " ";
|
||||
else
|
||||
{
|
||||
std::cout<<"is_accurate errer: "<<is_accurate<<"\n";
|
||||
std::cout<<"is_accurate errer: "<<is_accurate_<<"\n";
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
fprintf(record_, "%s,%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "",
|
||||
fprintf(record_, "%s,%s,%s,%.2f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n",
|
||||
itname_changed_ ? itname_.c_str() : "",
|
||||
cur_subtest_description_.str().c_str(),
|
||||
_is_accurate_.c_str(), cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
|
||||
_is_accurate_.c_str(),
|
||||
accurate_diff_,
|
||||
cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
|
||||
gpu_min, gpu_max, std_dev);
|
||||
|
||||
if (itname_changed_)
|
||||
@ -469,134 +471,6 @@ void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high)
|
||||
RNG rng(0);
|
||||
rng.fill(mat, RNG::UNIFORM, low, high);
|
||||
}
|
||||
#if 0
|
||||
void gen(Mat &mat, int rows, int cols, int type, int low, int high, int n)
|
||||
{
|
||||
assert(n > 0&&n <= cols * rows);
|
||||
assert(type == CV_8UC1||type == CV_8UC3||type == CV_8UC4
|
||||
||type == CV_32FC1||type == CV_32FC3||type == CV_32FC4);
|
||||
|
||||
RNG rng;
|
||||
//generate random position without duplication
|
||||
std::vector<int> pos;
|
||||
for(int i = 0; i < cols * rows; i++)
|
||||
{
|
||||
pos.push_back(i);
|
||||
}
|
||||
|
||||
for(int i = 0; i < cols * rows; i++)
|
||||
{
|
||||
int temp = i + rng.uniform(0, cols * rows - 1 - i);
|
||||
int temp1 = pos[temp];
|
||||
pos[temp]= pos[i];
|
||||
pos[i] = temp1;
|
||||
}
|
||||
|
||||
std::vector<int> selected_pos;
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
selected_pos.push_back(pos[i]);
|
||||
}
|
||||
|
||||
pos.clear();
|
||||
//end of generating random y without duplication
|
||||
|
||||
if(type == CV_8UC1)
|
||||
{
|
||||
typedef struct coorStruct_
|
||||
{
|
||||
int x;
|
||||
int y;
|
||||
uchar xy;
|
||||
}coorStruct;
|
||||
|
||||
coorStruct coor_struct;
|
||||
|
||||
std::vector<coorStruct> coor;
|
||||
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
coor_struct.x = -1;
|
||||
coor_struct.y = -1;
|
||||
coor_struct.xy = (uchar)rng.uniform(low, high);
|
||||
coor.push_back(coor_struct);
|
||||
}
|
||||
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
coor[i].y = selected_pos[i]/cols;
|
||||
coor[i].x = selected_pos[i]%cols;
|
||||
}
|
||||
selected_pos.clear();
|
||||
|
||||
mat.create(rows, cols, type);
|
||||
mat.setTo(0);
|
||||
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
mat.at<unsigned char>(coor[i].y, coor[i].x) = coor[i].xy;
|
||||
}
|
||||
}
|
||||
|
||||
if(type == CV_8UC4 || type == CV_8UC3)
|
||||
{
|
||||
mat.create(rows, cols, type);
|
||||
mat.setTo(0);
|
||||
|
||||
typedef struct Coor
|
||||
{
|
||||
int x;
|
||||
int y;
|
||||
|
||||
uchar r;
|
||||
uchar g;
|
||||
uchar b;
|
||||
uchar alpha;
|
||||
}coor;
|
||||
|
||||
std::vector<coor> coor_vect;
|
||||
|
||||
coor xy_coor;
|
||||
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
xy_coor.r = (uchar)rng.uniform(low, high);
|
||||
xy_coor.g = (uchar)rng.uniform(low, high);
|
||||
xy_coor.b = (uchar)rng.uniform(low, high);
|
||||
if(type == CV_8UC4)
|
||||
xy_coor.alpha = (uchar)rng.uniform(low, high);
|
||||
|
||||
coor_vect.push_back(xy_coor);
|
||||
}
|
||||
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
coor_vect[i].y = selected_pos[i]/((int)mat.step1()/mat.elemSize());
|
||||
coor_vect[i].x = selected_pos[i]%((int)mat.step1()/mat.elemSize());
|
||||
//printf("coor_vect[%d] = (%d, %d)\n", i, coor_vect[i].y, coor_vect[i].x);
|
||||
}
|
||||
|
||||
if(type == CV_8UC4)
|
||||
{
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
mat.at<unsigned char>(coor_vect[i].y, 4 * coor_vect[i].x) = coor_vect[i].r;
|
||||
mat.at<unsigned char>(coor_vect[i].y, 4 * coor_vect[i].x + 1) = coor_vect[i].g;
|
||||
mat.at<unsigned char>(coor_vect[i].y, 4 * coor_vect[i].x + 2) = coor_vect[i].b;
|
||||
mat.at<unsigned char>(coor_vect[i].y, 4 * coor_vect[i].x + 3) = coor_vect[i].alpha;
|
||||
}
|
||||
}else if(type == CV_8UC3)
|
||||
{
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
mat.at<unsigned char>(coor_vect[i].y, 3 * coor_vect[i].x) = coor_vect[i].r;
|
||||
mat.at<unsigned char>(coor_vect[i].y, 3 * coor_vect[i].x + 1) = coor_vect[i].g;
|
||||
mat.at<unsigned char>(coor_vect[i].y, 3 * coor_vect[i].x + 2) = coor_vect[i].b;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
string abspath(const string &relpath)
|
||||
{
|
||||
@ -619,31 +493,3 @@ double checkSimilarity(const Mat &m1, const Mat &m2)
|
||||
matchTemplate(m1, m2, diff, TM_CCORR_NORMED);
|
||||
return std::abs(diff.at<float>(0, 0) - 1.f);
|
||||
}
|
||||
|
||||
|
||||
int ExpectedMatNear(cv::Mat dst, cv::Mat cpu_dst, double eps)
|
||||
{
|
||||
assert(dst.type() == cpu_dst.type());
|
||||
assert(dst.size() == cpu_dst.size());
|
||||
if(checkNorm(cv::Mat(dst), cv::Mat(cpu_dst)) < eps ||checkNorm(cv::Mat(dst), cv::Mat(cpu_dst)) == eps)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ExceptDoubleNear(double val1, double val2, double abs_error)
|
||||
{
|
||||
const double diff = fabs(val1 - val2);
|
||||
if (diff <= abs_error)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ExceptedMatSimilar(cv::Mat dst, cv::Mat cpu_dst, double eps)
|
||||
{
|
||||
assert(dst.type() == cpu_dst.type());
|
||||
assert(dst.size() == cpu_dst.size());
|
||||
if(checkSimilarity(cv::Mat(cpu_dst), cv::Mat(dst)) <= eps)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
@ -322,9 +322,46 @@ public:
|
||||
itname_changed_ = true;
|
||||
}
|
||||
|
||||
void setAccurate(int is_accurate = -1)
|
||||
void setAccurate(int accurate, double diff)
|
||||
{
|
||||
is_accurate_ = is_accurate;
|
||||
is_accurate_ = accurate;
|
||||
accurate_diff_ = diff;
|
||||
}
|
||||
|
||||
void ExpectMatsNear(vector<Mat>& dst, vector<Mat>& cpu_dst, vector<double>& eps)
|
||||
{
|
||||
assert(dst.size() == cpu_dst.size());
|
||||
assert(cpu_dst.size() == eps.size());
|
||||
is_accurate_ = 1;
|
||||
for(size_t i=0; i<dst.size(); i++)
|
||||
{
|
||||
double cur_diff = checkNorm(dst[i], cpu_dst[i]);
|
||||
accurate_diff_ = max(accurate_diff_, cur_diff);
|
||||
if(cur_diff > eps[i])
|
||||
is_accurate_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void ExpectedMatNear(cv::Mat& dst, cv::Mat& cpu_dst, double eps)
|
||||
{
|
||||
assert(dst.type() == cpu_dst.type());
|
||||
assert(dst.size() == cpu_dst.size());
|
||||
accurate_diff_ = checkNorm(dst, cpu_dst);
|
||||
if(accurate_diff_ <= eps)
|
||||
is_accurate_ = 1;
|
||||
else
|
||||
is_accurate_ = 0;
|
||||
}
|
||||
|
||||
void ExceptedMatSimilar(cv::Mat& dst, cv::Mat& cpu_dst, double eps)
|
||||
{
|
||||
assert(dst.type() == cpu_dst.type());
|
||||
assert(dst.size() == cpu_dst.size());
|
||||
accurate_diff_ = checkSimilarity(cpu_dst, dst);
|
||||
if(accurate_diff_ <= eps)
|
||||
is_accurate_ = 1;
|
||||
else
|
||||
is_accurate_ = 0;
|
||||
}
|
||||
|
||||
std::stringstream &getCurSubtestDescription()
|
||||
@ -342,7 +379,7 @@ private:
|
||||
num_iters_(10), cpu_num_iters_(2),
|
||||
gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0),
|
||||
record_(0), recordname_("performance"), itname_changed_(true),
|
||||
is_accurate_(-1)
|
||||
is_accurate_(-1), accurate_diff_(0.)
|
||||
{
|
||||
cpu_times_.reserve(num_iters_);
|
||||
gpu_times_.reserve(num_iters_);
|
||||
@ -363,6 +400,7 @@ private:
|
||||
gpu_times_.clear();
|
||||
gpu_full_times_.clear();
|
||||
is_accurate_ = -1;
|
||||
accurate_diff_ = 0.;
|
||||
}
|
||||
|
||||
double meanTime(const std::vector<int64> &samples);
|
||||
@ -373,7 +411,7 @@ private:
|
||||
|
||||
void writeHeading();
|
||||
void writeSummary();
|
||||
void writeMetrics(int is_accurate, double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f,
|
||||
void writeMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f,
|
||||
double speedup = 0.0f, double fullspeedup = 0.0f,
|
||||
double gpu_min = 0.0f, double gpu_max = 0.0f, double std_dev = 0.0f);
|
||||
|
||||
@ -425,6 +463,7 @@ private:
|
||||
bool itname_changed_;
|
||||
|
||||
int is_accurate_;
|
||||
double accurate_diff_;
|
||||
};
|
||||
|
||||
|
||||
|
@ -412,11 +412,11 @@ static void arithmetic_scalar_run(const oclMat &src, oclMat &dst, String kernelN
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 ));
|
||||
|
||||
float f_scalar = (float)scalar;
|
||||
if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
|
||||
args.push_back( std::make_pair( sizeof(cl_double), (void *)&scalar ));
|
||||
else
|
||||
{
|
||||
float f_scalar = (float)scalar;
|
||||
args.push_back( std::make_pair( sizeof(cl_float), (void *)&f_scalar));
|
||||
}
|
||||
|
||||
@ -783,45 +783,55 @@ static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
|
||||
template <typename T> void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
|
||||
const oclMat &mask, oclMat &buf)
|
||||
{
|
||||
size_t groupnum = src.clCxt->computeUnits();
|
||||
CV_Assert(groupnum != 0);
|
||||
groupnum = groupnum * 2;
|
||||
int vlen = 8;
|
||||
int dbsize = groupnum * 2 * vlen * sizeof(T) ;
|
||||
Context *clCxt = src.clCxt;
|
||||
cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize);
|
||||
*minVal = std::numeric_limits<double>::max() , *maxVal = -std::numeric_limits<double>::max();
|
||||
|
||||
ensureSizeIsEnough(1, dbsize, CV_8UC1, buf);
|
||||
|
||||
cl_mem buf_data = reinterpret_cast<cl_mem>(buf.data);
|
||||
|
||||
if (mask.empty())
|
||||
{
|
||||
arithmetic_minMax_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax");
|
||||
arithmetic_minMax_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax");
|
||||
}
|
||||
else
|
||||
{
|
||||
arithmetic_minMax_mask_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax_mask");
|
||||
arithmetic_minMax_mask_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax_mask");
|
||||
}
|
||||
T *p = new T[groupnum * vlen * 2];
|
||||
memset(p, 0, dbsize);
|
||||
openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
|
||||
if(minVal != NULL){
|
||||
|
||||
Mat matbuf = Mat(buf);
|
||||
T *p = matbuf.ptr<T>();
|
||||
if(minVal != NULL)
|
||||
{
|
||||
*minVal = std::numeric_limits<double>::max();
|
||||
for(int i = 0; i < vlen * (int)groupnum; i++)
|
||||
{
|
||||
*minVal = *minVal < p[i] ? *minVal : p[i];
|
||||
}
|
||||
}
|
||||
if(maxVal != NULL){
|
||||
if(maxVal != NULL)
|
||||
{
|
||||
*maxVal = -std::numeric_limits<double>::max();
|
||||
for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++)
|
||||
{
|
||||
*maxVal = *maxVal > p[i] ? *maxVal : p[i];
|
||||
}
|
||||
}
|
||||
delete[] p;
|
||||
openCLFree(dstBuffer);
|
||||
}
|
||||
|
||||
typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask);
|
||||
typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf);
|
||||
void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
|
||||
{
|
||||
oclMat buf;
|
||||
minMax_buf(src, minVal, maxVal, mask, buf);
|
||||
}
|
||||
void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf)
|
||||
{
|
||||
CV_Assert(src.oclchannels() == 1);
|
||||
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
||||
@ -841,7 +851,7 @@ void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oc
|
||||
};
|
||||
minMaxFunc func;
|
||||
func = functab[src.depth()];
|
||||
func(src, minVal, maxVal, mask);
|
||||
func(src, minVal, maxVal, mask, buf);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
@ -1688,10 +1698,11 @@ void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, String ker
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 ));
|
||||
|
||||
T scalar;
|
||||
if(_scalar != NULL)
|
||||
{
|
||||
double scalar1 = *((double *)_scalar);
|
||||
T scalar = (T)scalar1;
|
||||
scalar = (T)scalar1;
|
||||
args.push_back( std::make_pair( sizeof(T), (void *)&scalar ));
|
||||
}
|
||||
|
||||
@ -2308,9 +2319,9 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, String
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 ));
|
||||
float pf = p;
|
||||
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE))
|
||||
{
|
||||
float pf = p;
|
||||
args.push_back( std::make_pair( sizeof(cl_float), (void *)&pf ));
|
||||
}
|
||||
else
|
||||
|
@ -244,11 +244,12 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, const oclM
|
||||
{
|
||||
const oclMat zeroMask;
|
||||
const oclMat &tempMask = mask.data ? mask : zeroMask;
|
||||
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
|
||||
if (query.cols <= 64)
|
||||
{
|
||||
matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType);
|
||||
}
|
||||
else if (query.cols <= 128)
|
||||
else if (query.cols <= 128 && !is_cpu)
|
||||
{
|
||||
matchUnrolledCached<16, 128>(query, train, tempMask, trainIdx, distance, distType);
|
||||
}
|
||||
@ -263,11 +264,12 @@ static void matchDispatcher(const oclMat &query, const oclMat *trains, int n, co
|
||||
{
|
||||
const oclMat zeroMask;
|
||||
const oclMat &tempMask = mask.data ? mask : zeroMask;
|
||||
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
|
||||
if (query.cols <= 64)
|
||||
{
|
||||
matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
|
||||
}
|
||||
else if (query.cols <= 128)
|
||||
else if (query.cols <= 128 && !is_cpu)
|
||||
{
|
||||
matchUnrolledCached<16, 128>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
|
||||
}
|
||||
@ -283,11 +285,12 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, float maxD
|
||||
{
|
||||
const oclMat zeroMask;
|
||||
const oclMat &tempMask = mask.data ? mask : zeroMask;
|
||||
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
|
||||
if (query.cols <= 64)
|
||||
{
|
||||
matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
|
||||
}
|
||||
else if (query.cols <= 128)
|
||||
else if (query.cols <= 128 && !is_cpu)
|
||||
{
|
||||
matchUnrolledCached<16, 128>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
|
||||
}
|
||||
@ -465,11 +468,12 @@ static void calcDistanceDispatcher(const oclMat &query, const oclMat &train, con
|
||||
static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
|
||||
const oclMat &trainIdx, const oclMat &distance, int distType)
|
||||
{
|
||||
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
|
||||
if (query.cols <= 64)
|
||||
{
|
||||
knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType);
|
||||
}
|
||||
else if (query.cols <= 128)
|
||||
else if (query.cols <= 128 && !is_cpu)
|
||||
{
|
||||
knn_matchUnrolledCached<16, 128>(query, train, mask, trainIdx, distance, distType);
|
||||
}
|
||||
|
@ -239,7 +239,7 @@ void canny::calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_b
|
||||
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
|
||||
@ -269,12 +269,8 @@ void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
char build_options [15] = "";
|
||||
if(L2Grad)
|
||||
{
|
||||
strcat(build_options, "-D L2GRAD");
|
||||
}
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||
const char * build_options = L2Grad ? "-D L2GRAD":"";
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||
}
|
||||
void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
|
||||
{
|
||||
@ -297,12 +293,8 @@ void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, i
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
char build_options [15] = "";
|
||||
if(L2Grad)
|
||||
{
|
||||
strcat(build_options, "-D L2GRAD");
|
||||
}
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||
const char * build_options = L2Grad ? "-D L2GRAD":"";
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
|
||||
}
|
||||
|
||||
void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh)
|
||||
@ -333,7 +325,7 @@ void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int ro
|
||||
String kernelName = "calcMap";
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols)
|
||||
@ -353,7 +345,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
|
||||
@ -383,7 +375,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE);
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
|
||||
std::swap(st1, st2);
|
||||
}
|
||||
@ -408,5 +400,5 @@ void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols)
|
||||
size_t globalThreads[3] = {cols, rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
@ -356,8 +356,7 @@ static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
|
||||
char compile_option[128];
|
||||
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s",
|
||||
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
|
||||
rectKernel?"-D RECTKERNEL":"",
|
||||
s);
|
||||
s, rectKernel?"-D RECTKERNEL":"");
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||
|
349
modules/ocl/src/gfft.cpp
Normal file
349
modules/ocl/src/gfft.cpp
Normal file
@ -0,0 +1,349 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@outlook.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include <iomanip>
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
|
||||
static bool use_cpu_sorter = true;
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *imgproc_gfft;
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
enum SortMethod
|
||||
{
|
||||
CPU_STL,
|
||||
BITONIC,
|
||||
SELECTION
|
||||
};
|
||||
|
||||
const int GROUP_SIZE = 256;
|
||||
|
||||
template<SortMethod method>
|
||||
struct Sorter
|
||||
{
|
||||
//typedef EigType;
|
||||
};
|
||||
|
||||
//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
|
||||
template<>
|
||||
struct Sorter<CPU_STL>
|
||||
{
|
||||
typedef oclMat EigType;
|
||||
static cv::Mutex cs;
|
||||
static Mat mat_eig;
|
||||
|
||||
//prototype
|
||||
static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
|
||||
{
|
||||
float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0]));
|
||||
float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
|
||||
return v1 > v2;
|
||||
}
|
||||
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
||||
{
|
||||
cv::AutoLock lock(cs);
|
||||
//temporarily use STL's sort function
|
||||
Mat mat_corners = corners;
|
||||
mat_eig = eig_tex;
|
||||
std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
|
||||
corners = mat_corners;
|
||||
}
|
||||
};
|
||||
cv::Mutex Sorter<CPU_STL>::cs;
|
||||
cv::Mat Sorter<CPU_STL>::mat_eig;
|
||||
|
||||
template<>
|
||||
struct Sorter<BITONIC>
|
||||
{
|
||||
typedef TextureCL EigType;
|
||||
|
||||
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
||||
{
|
||||
Context * cxt = Context::getContext();
|
||||
size_t globalThreads[3] = {count / 2, 1, 1};
|
||||
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
|
||||
|
||||
// 2^numStages should be equal to count or the output is invalid
|
||||
int numStages = 0;
|
||||
for(int i = count; i > 1; i >>= 1)
|
||||
{
|
||||
++numStages;
|
||||
}
|
||||
const int argc = 5;
|
||||
std::vector< std::pair<size_t, const void *> > args(argc);
|
||||
String kernelname = "sortCorners_bitonicSort";
|
||||
args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
|
||||
args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
|
||||
args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
|
||||
for(int stage = 0; stage < numStages; ++stage)
|
||||
{
|
||||
args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
|
||||
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
|
||||
{
|
||||
args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
|
||||
openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Sorter<SELECTION>
|
||||
{
|
||||
typedef TextureCL EigType;
|
||||
|
||||
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
||||
{
|
||||
Context * cxt = Context::getContext();
|
||||
|
||||
size_t globalThreads[3] = {count, 1, 1};
|
||||
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
|
||||
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
//local
|
||||
String kernelname = "sortCorners_selectionSortLocal";
|
||||
int lds_size = GROUP_SIZE * sizeof(cl_float2);
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
|
||||
args.push_back( std::make_pair( lds_size, (void*)NULL) );
|
||||
|
||||
openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
//final
|
||||
kernelname = "sortCorners_selectionSortFinal";
|
||||
args.pop_back();
|
||||
openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
};
|
||||
|
||||
int findCorners_caller(
|
||||
const TextureCL& eig,
|
||||
const float threshold,
|
||||
const oclMat& mask,
|
||||
oclMat& corners,
|
||||
const int max_count)
|
||||
{
|
||||
std::vector<int> k;
|
||||
Context * cxt = Context::getContext();
|
||||
|
||||
std::vector< std::pair<size_t, const void*> > args;
|
||||
String kernelname = "findCorners";
|
||||
|
||||
const int mask_strip = mask.step / mask.elemSize1();
|
||||
|
||||
oclMat g_counter(1, 1, CV_32SC1);
|
||||
g_counter.setTo(0);
|
||||
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eig ));
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&mask.data ));
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&corners.data ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&mask_strip));
|
||||
args.push_back(std::make_pair( sizeof(cl_float), (void*)&threshold ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.rows ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.cols ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&max_count ));
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&g_counter.data ));
|
||||
|
||||
size_t globalThreads[3] = {eig.cols, eig.rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
|
||||
const char * opt = mask.empty() ? "" : "-D WITH_MASK";
|
||||
openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1, opt);
|
||||
return std::min(Mat(g_counter).at<int>(0), max_count);
|
||||
}
|
||||
}//unnamed namespace
|
||||
|
||||
void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
|
||||
{
|
||||
CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
|
||||
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
|
||||
|
||||
CV_DbgAssert(support_image2d());
|
||||
|
||||
ensureSizeIsEnough(image.size(), CV_32F, eig_);
|
||||
|
||||
if (useHarrisDetector)
|
||||
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
|
||||
else
|
||||
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
|
||||
|
||||
double maxVal = 0;
|
||||
minMax_buf(eig_, 0, &maxVal, oclMat(), minMaxbuf_);
|
||||
|
||||
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
|
||||
|
||||
Ptr<TextureCL> eig_tex = bindTexturePtr(eig_);
|
||||
int total = findCorners_caller(
|
||||
*eig_tex,
|
||||
static_cast<float>(maxVal * qualityLevel),
|
||||
mask,
|
||||
tmpCorners_,
|
||||
tmpCorners_.cols);
|
||||
|
||||
if (total == 0)
|
||||
{
|
||||
corners.release();
|
||||
return;
|
||||
}
|
||||
if(use_cpu_sorter)
|
||||
{
|
||||
Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total);
|
||||
}
|
||||
else
|
||||
{
|
||||
//if total is power of 2
|
||||
if(((total - 1) & (total)) == 0)
|
||||
{
|
||||
Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
|
||||
}
|
||||
else
|
||||
{
|
||||
Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
|
||||
}
|
||||
}
|
||||
|
||||
if (minDistance < 1)
|
||||
{
|
||||
corners = tmpCorners_(Rect(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1));
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<Point2f> tmp(total);
|
||||
downloadPoints(tmpCorners_, tmp);
|
||||
|
||||
std::vector<Point2f> tmp2;
|
||||
tmp2.reserve(total);
|
||||
|
||||
const int cell_size = cvRound(minDistance);
|
||||
const int grid_width = (image.cols + cell_size - 1) / cell_size;
|
||||
const int grid_height = (image.rows + cell_size - 1) / cell_size;
|
||||
|
||||
std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
|
||||
|
||||
for (int i = 0; i < total; ++i)
|
||||
{
|
||||
Point2f p = tmp[i];
|
||||
|
||||
bool good = true;
|
||||
|
||||
int x_cell = static_cast<int>(p.x / cell_size);
|
||||
int y_cell = static_cast<int>(p.y / cell_size);
|
||||
|
||||
int x1 = x_cell - 1;
|
||||
int y1 = y_cell - 1;
|
||||
int x2 = x_cell + 1;
|
||||
int y2 = y_cell + 1;
|
||||
|
||||
// boundary check
|
||||
x1 = std::max(0, x1);
|
||||
y1 = std::max(0, y1);
|
||||
x2 = std::min(grid_width - 1, x2);
|
||||
y2 = std::min(grid_height - 1, y2);
|
||||
|
||||
for (int yy = y1; yy <= y2; yy++)
|
||||
{
|
||||
for (int xx = x1; xx <= x2; xx++)
|
||||
{
|
||||
std::vector<Point2f>& m = grid[yy * grid_width + xx];
|
||||
|
||||
if (!m.empty())
|
||||
{
|
||||
for(size_t j = 0; j < m.size(); j++)
|
||||
{
|
||||
float dx = p.x - m[j].x;
|
||||
float dy = p.y - m[j].y;
|
||||
|
||||
if (dx * dx + dy * dy < minDistance * minDistance)
|
||||
{
|
||||
good = false;
|
||||
goto break_out;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break_out:
|
||||
|
||||
if(good)
|
||||
{
|
||||
grid[y_cell * grid_width + x_cell].push_back(p);
|
||||
|
||||
tmp2.push_back(p);
|
||||
|
||||
if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
|
||||
}
|
||||
}
|
||||
void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v)
|
||||
{
|
||||
CV_DbgAssert(points.type() == CV_32FC2);
|
||||
points_v.resize(points.cols);
|
||||
openCLSafeCall(clEnqueueReadBuffer(
|
||||
*reinterpret_cast<cl_command_queue*>(getoclCommandQueue()),
|
||||
reinterpret_cast<cl_mem>(points.data),
|
||||
CL_TRUE,
|
||||
0,
|
||||
points.cols * sizeof(Point2f),
|
||||
&points_v[0],
|
||||
0,
|
||||
NULL,
|
||||
NULL));
|
||||
}
|
@ -136,47 +136,22 @@ struct CvHidHaarClassifierCascade
|
||||
};
|
||||
typedef struct
|
||||
{
|
||||
//int rows;
|
||||
//int ystep;
|
||||
int width_height;
|
||||
//int height;
|
||||
int grpnumperline_totalgrp;
|
||||
//int totalgrp;
|
||||
int imgoff;
|
||||
float factor;
|
||||
} detect_piramid_info;
|
||||
|
||||
#if defined WIN32 && !defined __MINGW__ && !defined __MINGW32__
|
||||
#ifdef WIN32
|
||||
#define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT))
|
||||
typedef _ALIGNED_ON(128) struct GpuHidHaarFeature
|
||||
{
|
||||
_ALIGNED_ON(32) struct
|
||||
{
|
||||
_ALIGNED_ON(4) int p0 ;
|
||||
_ALIGNED_ON(4) int p1 ;
|
||||
_ALIGNED_ON(4) int p2 ;
|
||||
_ALIGNED_ON(4) int p3 ;
|
||||
_ALIGNED_ON(4) float weight ;
|
||||
}
|
||||
/*_ALIGNED_ON(32)*/ rect[CV_HAAR_FEATURE_MAX] ;
|
||||
}
|
||||
GpuHidHaarFeature;
|
||||
|
||||
|
||||
typedef _ALIGNED_ON(128) struct GpuHidHaarTreeNode
|
||||
{
|
||||
_ALIGNED_ON(64) int p[CV_HAAR_FEATURE_MAX][4];
|
||||
//_ALIGNED_ON(16) int p1[CV_HAAR_FEATURE_MAX] ;
|
||||
//_ALIGNED_ON(16) int p2[CV_HAAR_FEATURE_MAX] ;
|
||||
//_ALIGNED_ON(16) int p3[CV_HAAR_FEATURE_MAX] ;
|
||||
/*_ALIGNED_ON(16)*/
|
||||
float weight[CV_HAAR_FEATURE_MAX] ;
|
||||
/*_ALIGNED_ON(4)*/
|
||||
float threshold ;
|
||||
_ALIGNED_ON(8) float alpha[2] ;
|
||||
_ALIGNED_ON(16) float alpha[3] ;
|
||||
_ALIGNED_ON(4) int left ;
|
||||
_ALIGNED_ON(4) int right ;
|
||||
// GpuHidHaarFeature feature __attribute__((aligned (128)));
|
||||
}
|
||||
GpuHidHaarTreeNode;
|
||||
|
||||
@ -184,7 +159,6 @@ GpuHidHaarTreeNode;
|
||||
typedef _ALIGNED_ON(32) struct GpuHidHaarClassifier
|
||||
{
|
||||
_ALIGNED_ON(4) int count;
|
||||
//CvHaarFeature* orig_feature;
|
||||
_ALIGNED_ON(8) GpuHidHaarTreeNode *node ;
|
||||
_ALIGNED_ON(8) float *alpha ;
|
||||
}
|
||||
@ -219,32 +193,16 @@ typedef _ALIGNED_ON(64) struct GpuHidHaarClassifierCascade
|
||||
_ALIGNED_ON(4) int p2 ;
|
||||
_ALIGNED_ON(4) int p3 ;
|
||||
_ALIGNED_ON(4) float inv_window_area ;
|
||||
// GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
|
||||
} GpuHidHaarClassifierCascade;
|
||||
#else
|
||||
#define _ALIGNED_ON(_ALIGNMENT) __attribute__((aligned(_ALIGNMENT) ))
|
||||
|
||||
typedef struct _ALIGNED_ON(128) GpuHidHaarFeature
|
||||
{
|
||||
struct _ALIGNED_ON(32)
|
||||
{
|
||||
int p0 _ALIGNED_ON(4);
|
||||
int p1 _ALIGNED_ON(4);
|
||||
int p2 _ALIGNED_ON(4);
|
||||
int p3 _ALIGNED_ON(4);
|
||||
float weight _ALIGNED_ON(4);
|
||||
}
|
||||
rect[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(32);
|
||||
}
|
||||
GpuHidHaarFeature;
|
||||
|
||||
|
||||
typedef struct _ALIGNED_ON(128) GpuHidHaarTreeNode
|
||||
{
|
||||
int p[CV_HAAR_FEATURE_MAX][4] _ALIGNED_ON(64);
|
||||
float weight[CV_HAAR_FEATURE_MAX];// _ALIGNED_ON(16);
|
||||
float threshold;// _ALIGNED_ON(4);
|
||||
float alpha[2] _ALIGNED_ON(8);
|
||||
float alpha[3] _ALIGNED_ON(16);
|
||||
int left _ALIGNED_ON(4);
|
||||
int right _ALIGNED_ON(4);
|
||||
}
|
||||
@ -287,7 +245,6 @@ typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade
|
||||
int p2 _ALIGNED_ON(4);
|
||||
int p3 _ALIGNED_ON(4);
|
||||
float inv_window_area _ALIGNED_ON(4);
|
||||
// GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8)));
|
||||
} GpuHidHaarClassifierCascade;
|
||||
#endif
|
||||
|
||||
@ -295,36 +252,6 @@ const int icv_object_win_border = 1;
|
||||
const float icv_stage_threshold_bias = 0.0001f;
|
||||
double globaltime = 0;
|
||||
|
||||
|
||||
// static CvHaarClassifierCascade * gpuCreateHaarClassifierCascade( int stage_count )
|
||||
// {
|
||||
// CvHaarClassifierCascade *cascade = 0;
|
||||
|
||||
// int block_size = sizeof(*cascade) + stage_count * sizeof(*cascade->stage_classifier);
|
||||
|
||||
// if( stage_count <= 0 )
|
||||
// CV_Error( CV_StsOutOfRange, "Number of stages should be positive" );
|
||||
|
||||
// cascade = (CvHaarClassifierCascade *)cvAlloc( block_size );
|
||||
// memset( cascade, 0, block_size );
|
||||
|
||||
// cascade->stage_classifier = (CvHaarStageClassifier *)(cascade + 1);
|
||||
// cascade->flags = CV_HAAR_MAGIC_VAL;
|
||||
// cascade->count = stage_count;
|
||||
|
||||
// return cascade;
|
||||
// }
|
||||
|
||||
//static int globalcounter = 0;
|
||||
|
||||
// static void gpuReleaseHidHaarClassifierCascade( GpuHidHaarClassifierCascade **_cascade )
|
||||
// {
|
||||
// if( _cascade && *_cascade )
|
||||
// {
|
||||
// cvFree( _cascade );
|
||||
// }
|
||||
// }
|
||||
|
||||
/* create more efficient internal representation of haar classifier cascade */
|
||||
static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, int *totalclassifier)
|
||||
{
|
||||
@ -440,24 +367,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
|
||||
hid_stage_classifier->two_rects = 1;
|
||||
haar_classifier_ptr += stage_classifier->count;
|
||||
|
||||
/*
|
||||
hid_stage_classifier->parent = (stage_classifier->parent == -1)
|
||||
? NULL : stage_classifier_ptr + stage_classifier->parent;
|
||||
hid_stage_classifier->next = (stage_classifier->next == -1)
|
||||
? NULL : stage_classifier_ptr + stage_classifier->next;
|
||||
hid_stage_classifier->child = (stage_classifier->child == -1)
|
||||
? NULL : stage_classifier_ptr + stage_classifier->child;
|
||||
|
||||
out->is_tree |= hid_stage_classifier->next != NULL;
|
||||
*/
|
||||
|
||||
for( j = 0; j < stage_classifier->count; j++ )
|
||||
{
|
||||
CvHaarClassifier *classifier = stage_classifier->classifier + j;
|
||||
GpuHidHaarClassifier *hid_classifier = hid_stage_classifier->classifier + j;
|
||||
int node_count = classifier->count;
|
||||
|
||||
// float* alpha_ptr = (float*)(haar_node_ptr + node_count);
|
||||
float *alpha_ptr = &haar_node_ptr->alpha[0];
|
||||
|
||||
hid_classifier->count = node_count;
|
||||
@ -484,16 +399,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
|
||||
node->p[2][3] = 0;
|
||||
node->weight[2] = 0;
|
||||
}
|
||||
// memset( &(node->feature.rect[2]), 0, sizeof(node->feature.rect[2]) );
|
||||
else
|
||||
hid_stage_classifier->two_rects = 0;
|
||||
|
||||
memcpy( node->alpha, classifier->alpha, (node_count + 1)*sizeof(alpha_ptr[0]));
|
||||
haar_node_ptr = haar_node_ptr + 1;
|
||||
}
|
||||
|
||||
memcpy( alpha_ptr, classifier->alpha, (node_count + 1)*sizeof(alpha_ptr[0]));
|
||||
haar_node_ptr = haar_node_ptr + 1;
|
||||
// (GpuHidHaarTreeNode*)cvAlignPtr(alpha_ptr+node_count+1, sizeof(void*));
|
||||
// (GpuHidHaarTreeNode*)(alpha_ptr+node_count+1);
|
||||
|
||||
out->is_stump_based &= node_count == 1;
|
||||
}
|
||||
}
|
||||
@ -506,25 +417,19 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl
|
||||
|
||||
|
||||
#define sum_elem_ptr(sum,row,col) \
|
||||
((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype)))
|
||||
((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype)))
|
||||
|
||||
#define sqsum_elem_ptr(sqsum,row,col) \
|
||||
((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype)))
|
||||
((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype)))
|
||||
|
||||
#define calc_sum(rect,offset) \
|
||||
((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
|
||||
((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset])
|
||||
|
||||
|
||||
static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_cascade,
|
||||
/* const CvArr* _sum,
|
||||
const CvArr* _sqsum,
|
||||
const CvArr* _tilted_sum,*/
|
||||
double scale,
|
||||
int step)
|
||||
{
|
||||
// CvMat sum_stub, *sum = (CvMat*)_sum;
|
||||
// CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum;
|
||||
// CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum;
|
||||
GpuHidHaarClassifierCascade *cascade;
|
||||
int coi0 = 0, coi1 = 0;
|
||||
int i;
|
||||
@ -540,61 +445,25 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
|
||||
if( scale <= 0 )
|
||||
CV_Error( CV_StsOutOfRange, "Scale must be positive" );
|
||||
|
||||
// sum = cvGetMat( sum, &sum_stub, &coi0 );
|
||||
// sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 );
|
||||
|
||||
if( coi0 || coi1 )
|
||||
CV_Error( CV_BadCOI, "COI is not supported" );
|
||||
|
||||
// if( !CV_ARE_SIZES_EQ( sum, sqsum ))
|
||||
// CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
|
||||
|
||||
// if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 ||
|
||||
// CV_MAT_TYPE(sum->type) != CV_32SC1 )
|
||||
// CV_Error( CV_StsUnsupportedFormat,
|
||||
// "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
|
||||
|
||||
if( !_cascade->hid_cascade )
|
||||
gpuCreateHidHaarClassifierCascade(_cascade, &datasize, &total);
|
||||
|
||||
cascade = (GpuHidHaarClassifierCascade *) _cascade->hid_cascade;
|
||||
stage_classifier = (GpuHidHaarStageClassifier *) (cascade + 1);
|
||||
|
||||
if( cascade->has_tilted_features )
|
||||
{
|
||||
// tilted = cvGetMat( tilted, &tilted_stub, &coi1 );
|
||||
|
||||
// if( CV_MAT_TYPE(tilted->type) != CV_32SC1 )
|
||||
// CV_Error( CV_StsUnsupportedFormat,
|
||||
// "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" );
|
||||
|
||||
// if( sum->step != tilted->step )
|
||||
// CV_Error( CV_StsUnmatchedSizes,
|
||||
// "Sum and tilted_sum must have the same stride (step, widthStep)" );
|
||||
|
||||
// if( !CV_ARE_SIZES_EQ( sum, tilted ))
|
||||
// CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" );
|
||||
// cascade->tilted = *tilted;
|
||||
}
|
||||
|
||||
_cascade->scale = scale;
|
||||
_cascade->real_window_size.width = cvRound( _cascade->orig_window_size.width * scale );
|
||||
_cascade->real_window_size.height = cvRound( _cascade->orig_window_size.height * scale );
|
||||
|
||||
//cascade->sum = *sum;
|
||||
//cascade->sqsum = *sqsum;
|
||||
|
||||
equRect.x = equRect.y = cvRound(scale);
|
||||
equRect.width = cvRound((_cascade->orig_window_size.width - 2) * scale);
|
||||
equRect.height = cvRound((_cascade->orig_window_size.height - 2) * scale);
|
||||
weight_scale = 1. / (equRect.width * equRect.height);
|
||||
cascade->inv_window_area = weight_scale;
|
||||
|
||||
// cascade->pq0 = equRect.y * step + equRect.x;
|
||||
// cascade->pq1 = equRect.y * step + equRect.x + equRect.width ;
|
||||
// cascade->pq2 = (equRect.y + equRect.height)*step + equRect.x;
|
||||
// cascade->pq3 = (equRect.y + equRect.height)*step + equRect.x + equRect.width ;
|
||||
|
||||
cascade->pq0 = equRect.x;
|
||||
cascade->pq1 = equRect.y;
|
||||
cascade->pq2 = equRect.x + equRect.width;
|
||||
@ -617,10 +486,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
|
||||
{
|
||||
CvHaarFeature *feature =
|
||||
&_cascade->stage_classifier[i].classifier[j].haar_feature[l];
|
||||
/* GpuHidHaarClassifier* classifier =
|
||||
cascade->stage_classifier[i].classifier + j; */
|
||||
//GpuHidHaarFeature* hidfeature =
|
||||
// &cascade->stage_classifier[i].classifier[j].node[l].feature;
|
||||
GpuHidHaarTreeNode *hidnode = &stage_classifier[i].classifier[j].node[l];
|
||||
double sum0 = 0, area0 = 0;
|
||||
CvRect r[3];
|
||||
@ -635,8 +500,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
|
||||
/* align blocks */
|
||||
for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ )
|
||||
{
|
||||
//if( !hidfeature->rect[k].p0 )
|
||||
// break;
|
||||
if(!hidnode->p[k][0])
|
||||
break;
|
||||
r[k] = feature->rect[k].r;
|
||||
@ -716,15 +579,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
|
||||
|
||||
if( !feature->tilted )
|
||||
{
|
||||
/* hidfeature->rect[k].p0 = tr.y * sum->cols + tr.x;
|
||||
hidfeature->rect[k].p1 = tr.y * sum->cols + tr.x + tr.width;
|
||||
hidfeature->rect[k].p2 = (tr.y + tr.height) * sum->cols + tr.x;
|
||||
hidfeature->rect[k].p3 = (tr.y + tr.height) * sum->cols + tr.x + tr.width;
|
||||
*/
|
||||
/*hidnode->p0[k] = tr.y * step + tr.x;
|
||||
hidnode->p1[k] = tr.y * step + tr.x + tr.width;
|
||||
hidnode->p2[k] = (tr.y + tr.height) * step + tr.x;
|
||||
hidnode->p3[k] = (tr.y + tr.height) * step + tr.x + tr.width;*/
|
||||
hidnode->p[k][0] = tr.x;
|
||||
hidnode->p[k][1] = tr.y;
|
||||
hidnode->p[k][2] = tr.x + tr.width;
|
||||
@ -732,37 +586,24 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc
|
||||
}
|
||||
else
|
||||
{
|
||||
/* hidfeature->rect[k].p2 = (tr.y + tr.width) * tilted->cols + tr.x + tr.width;
|
||||
hidfeature->rect[k].p3 = (tr.y + tr.width + tr.height) * tilted->cols + tr.x + tr.width - tr.height;
|
||||
hidfeature->rect[k].p0 = tr.y * tilted->cols + tr.x;
|
||||
hidfeature->rect[k].p1 = (tr.y + tr.height) * tilted->cols + tr.x - tr.height;
|
||||
*/
|
||||
|
||||
hidnode->p[k][2] = (tr.y + tr.width) * step + tr.x + tr.width;
|
||||
hidnode->p[k][3] = (tr.y + tr.width + tr.height) * step + tr.x + tr.width - tr.height;
|
||||
hidnode->p[k][0] = tr.y * step + tr.x;
|
||||
hidnode->p[k][1] = (tr.y + tr.height) * step + tr.x - tr.height;
|
||||
}
|
||||
|
||||
//hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio);
|
||||
hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio);
|
||||
if( k == 0 )
|
||||
area0 = tr.width * tr.height;
|
||||
else
|
||||
//sum0 += hidfeature->rect[k].weight * tr.width * tr.height;
|
||||
sum0 += hidnode->weight[k] * tr.width * tr.height;
|
||||
}
|
||||
|
||||
// hidfeature->rect[0].weight = (float)(-sum0/area0);
|
||||
hidnode->weight[0] = (float)(-sum0 / area0);
|
||||
} /* l */
|
||||
} /* j */
|
||||
}
|
||||
}
|
||||
|
||||
static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
|
||||
/*double scale=0.0,*/
|
||||
/*int step*/)
|
||||
static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade)
|
||||
{
|
||||
GpuHidHaarClassifierCascade *cascade;
|
||||
int i;
|
||||
@ -816,11 +657,7 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
|
||||
if(!hidnode->p[k][0])
|
||||
break;
|
||||
r[k] = feature->rect[k].r;
|
||||
// base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) );
|
||||
// base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) );
|
||||
// base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) );
|
||||
// base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) );
|
||||
}
|
||||
}
|
||||
|
||||
nr = k;
|
||||
for( k = 0; k < nr; k++ )
|
||||
@ -838,7 +675,6 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade
|
||||
hidnode->p[k][3] = tr.height;
|
||||
hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio);
|
||||
}
|
||||
//hidnode->weight[0]=(float)(-sum0/area0);
|
||||
} /* l */
|
||||
} /* j */
|
||||
}
|
||||
@ -851,7 +687,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
|
||||
const double GROUP_EPS = 0.2;
|
||||
CvSeq *result_seq = 0;
|
||||
cv::Ptr<CvMemStorage> temp_storage;
|
||||
|
||||
cv::ConcurrentRectVector allCandidates;
|
||||
std::vector<cv::Rect> rectList;
|
||||
@ -909,6 +744,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
if( gimg.cols < minSize.width || gimg.rows < minSize.height )
|
||||
CV_Error(CV_StsError, "Image too small");
|
||||
|
||||
cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
|
||||
if( (flags & CV_HAAR_SCALE_IMAGE) )
|
||||
{
|
||||
CvSize winSize0 = cascade->orig_window_size;
|
||||
@ -951,7 +787,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
|
||||
size_t blocksize = 8;
|
||||
size_t localThreads[3] = { blocksize, blocksize , 1 };
|
||||
size_t globalThreads[3] = { grp_per_CU * gsum.clCxt->computeUnits() *localThreads[0],
|
||||
size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->computeUnits()) *localThreads[0],
|
||||
localThreads[1], 1
|
||||
};
|
||||
int outputsz = 256 * globalThreads[0] / localThreads[0];
|
||||
@ -996,7 +832,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
|
||||
|
||||
stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count);
|
||||
cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
|
||||
|
||||
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode));
|
||||
@ -1043,7 +878,9 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
args.push_back ( std::make_pair(sizeof(cl_int4) , (void *)&pq ));
|
||||
args.push_back ( std::make_pair(sizeof(cl_float) , (void *)&correction ));
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
|
||||
const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options);
|
||||
|
||||
openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
|
||||
|
||||
@ -1058,6 +895,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
|
||||
openCLSafeCall(clReleaseMemObject(nodebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(candidatebuffer));
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1115,7 +953,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode);
|
||||
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,
|
||||
nodenum * sizeof(GpuHidHaarTreeNode));
|
||||
cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, nodebuffer, 1, 0,
|
||||
nodenum * sizeof(GpuHidHaarTreeNode),
|
||||
node, 0, NULL, NULL));
|
||||
@ -1157,7 +994,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
args1.push_back ( std::make_pair(sizeof(cl_int) , (void *)&startnodenum ));
|
||||
|
||||
size_t globalThreads2[3] = {nodenum, 1, 1};
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
|
||||
}
|
||||
|
||||
@ -1193,7 +1029,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
||||
args.push_back ( std::make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
|
||||
args.push_back ( std::make_pair(sizeof(cl_int) , (void *)&nodenum ));
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options);
|
||||
|
||||
candidate = (int *)clEnqueueMapBuffer(qu, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, &status);
|
||||
|
||||
@ -1281,7 +1117,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
|
||||
int blocksize = 8;
|
||||
int grp_per_CU = 12;
|
||||
size_t localThreads[3] = { blocksize, blocksize, 1 };
|
||||
size_t globalThreads[3] = { grp_per_CU * Context::getContext()->computeUnits() * localThreads[0],
|
||||
size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->computeUnits() *localThreads[0],
|
||||
localThreads[1],
|
||||
1 };
|
||||
int outputsz = 256 * globalThreads[0] / localThreads[0];
|
||||
@ -1297,8 +1133,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
|
||||
CvHaarClassifierCascade *cascade = oldCascade;
|
||||
GpuHidHaarClassifierCascade *gcascade;
|
||||
GpuHidHaarStageClassifier *stage;
|
||||
GpuHidHaarClassifier *classifier;
|
||||
GpuHidHaarTreeNode *node;
|
||||
|
||||
if( CV_MAT_DEPTH(gimg.type()) != CV_8U )
|
||||
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit images are supported" );
|
||||
@ -1311,7 +1145,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
|
||||
}
|
||||
|
||||
int *candidate;
|
||||
|
||||
cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
|
||||
if( (flags & CV_HAAR_SCALE_IMAGE) )
|
||||
{
|
||||
int indexy = 0;
|
||||
@ -1337,19 +1171,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
|
||||
|
||||
gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
|
||||
stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
|
||||
classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
|
||||
node = (GpuHidHaarTreeNode *)(classifier->node);
|
||||
|
||||
gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
|
||||
|
||||
cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0,
|
||||
sizeof(GpuHidHaarStageClassifier) * gcascade->count,
|
||||
stage, 0, NULL, NULL));
|
||||
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
|
||||
m_nodenum * sizeof(GpuHidHaarTreeNode),
|
||||
node, 0, NULL, NULL));
|
||||
|
||||
int startstage = 0;
|
||||
int endstage = gcascade->count;
|
||||
@ -1386,17 +1207,23 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
|
||||
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
|
||||
args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
|
||||
const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options);
|
||||
|
||||
candidate = (int *)malloc(4 * sizeof(int) * outputsz);
|
||||
memset(candidate, 0, 4 * sizeof(int) * outputsz);
|
||||
|
||||
openCLReadBuffer( gsum.clCxt, ((OclBuffers *)buffers)->candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
|
||||
|
||||
for(int i = 0; i < outputsz; i++)
|
||||
{
|
||||
if(candidate[4 * i + 2] != 0)
|
||||
{
|
||||
allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
|
||||
candidate[4 * i + 2], candidate[4 * i + 3]));
|
||||
|
||||
}
|
||||
}
|
||||
free((void *)candidate);
|
||||
candidate = NULL;
|
||||
}
|
||||
@ -1404,6 +1231,132 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
|
||||
{
|
||||
cv::ocl::integral(gimg, gsum, gsqsum);
|
||||
|
||||
gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
|
||||
|
||||
int step = gsum.step / 4;
|
||||
int startnode = 0;
|
||||
int splitstage = 3;
|
||||
|
||||
int startstage = 0;
|
||||
int endstage = gcascade->count;
|
||||
|
||||
vector<pair<size_t, const void *> > args;
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->stagebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->scaleinfobuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->candidatebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.rows ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.cols ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&step ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_loopcount ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->pbuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->correctionbuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_nodenum ));
|
||||
|
||||
const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0";
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options);
|
||||
|
||||
candidate = (int *)clEnqueueMapBuffer(qu, ((OclBuffers *)buffers)->candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, NULL);
|
||||
|
||||
for(int i = 0; i < outputsz; i++)
|
||||
{
|
||||
if(candidate[4 * i + 2] != 0)
|
||||
allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
|
||||
candidate[4 * i + 2], candidate[4 * i + 3]));
|
||||
}
|
||||
clEnqueueUnmapMemObject(qu, ((OclBuffers *)buffers)->candidatebuffer, candidate, 0, 0, 0);
|
||||
}
|
||||
rectList.resize(allCandidates.size());
|
||||
if(!allCandidates.empty())
|
||||
std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin());
|
||||
|
||||
if( minNeighbors != 0 || findBiggestObject )
|
||||
groupRectangles(rectList, rweights, std::max(minNeighbors, 1), GROUP_EPS);
|
||||
else
|
||||
rweights.resize(rectList.size(), 0);
|
||||
|
||||
GenResult(faces, rectList, rweights);
|
||||
}
|
||||
|
||||
void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
|
||||
double scaleFactor, int flags,
|
||||
const int outputsz, const size_t localThreads[],
|
||||
CvSize minSize, CvSize maxSize)
|
||||
{
|
||||
if(initialized)
|
||||
{
|
||||
return; // we only allow one time initialization
|
||||
}
|
||||
CvHaarClassifierCascade *cascade = oldCascade;
|
||||
|
||||
if( !CV_IS_HAAR_CLASSIFIER(cascade) )
|
||||
CV_Error( !cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier cascade" );
|
||||
|
||||
if( scaleFactor <= 1 )
|
||||
CV_Error( CV_StsOutOfRange, "scale factor must be > 1" );
|
||||
|
||||
if( cols < minSize.width || rows < minSize.height )
|
||||
CV_Error(CV_StsError, "Image too small");
|
||||
|
||||
int datasize=0;
|
||||
int totalclassifier=0;
|
||||
|
||||
if( !cascade->hid_cascade )
|
||||
{
|
||||
gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier);
|
||||
}
|
||||
|
||||
if( maxSize.height == 0 || maxSize.width == 0 )
|
||||
{
|
||||
maxSize.height = rows;
|
||||
maxSize.width = cols;
|
||||
}
|
||||
|
||||
findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
|
||||
if( findBiggestObject )
|
||||
flags &= ~(CV_HAAR_SCALE_IMAGE | CV_HAAR_DO_CANNY_PRUNING);
|
||||
|
||||
CreateBaseBufs(datasize, totalclassifier, flags, outputsz);
|
||||
CreateFactorRelatedBufs(rows, cols, flags, scaleFactor, localThreads, minSize, maxSize);
|
||||
|
||||
m_scaleFactor = scaleFactor;
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
m_flags = flags;
|
||||
m_minSize = minSize;
|
||||
m_maxSize = maxSize;
|
||||
|
||||
// initialize nodes
|
||||
GpuHidHaarClassifierCascade *gcascade;
|
||||
GpuHidHaarStageClassifier *stage;
|
||||
GpuHidHaarClassifier *classifier;
|
||||
GpuHidHaarTreeNode *node;
|
||||
cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
|
||||
if( (flags & CV_HAAR_SCALE_IMAGE) )
|
||||
{
|
||||
gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
|
||||
stage = (GpuHidHaarStageClassifier *)(gcascade + 1);
|
||||
classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
|
||||
node = (GpuHidHaarTreeNode *)(classifier->node);
|
||||
|
||||
gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 );
|
||||
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0,
|
||||
sizeof(GpuHidHaarStageClassifier) * gcascade->count,
|
||||
stage, 0, NULL, NULL));
|
||||
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
|
||||
m_nodenum * sizeof(GpuHidHaarTreeNode),
|
||||
node, 0, NULL, NULL));
|
||||
}
|
||||
else
|
||||
{
|
||||
gpuSetHaarClassifierCascade(cascade);
|
||||
|
||||
gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
|
||||
@ -1411,15 +1364,12 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
|
||||
classifier = (GpuHidHaarClassifier *)(stage + gcascade->count);
|
||||
node = (GpuHidHaarTreeNode *)(classifier->node);
|
||||
|
||||
cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue();
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0,
|
||||
m_nodenum * sizeof(GpuHidHaarTreeNode),
|
||||
node, 0, NULL, NULL));
|
||||
m_nodenum * sizeof(GpuHidHaarTreeNode),
|
||||
node, 0, NULL, NULL));
|
||||
|
||||
cl_int4 *p = (cl_int4 *)malloc(sizeof(cl_int4) * m_loopcount);
|
||||
float *correction = (float *)malloc(sizeof(float) * m_loopcount);
|
||||
int startstage = 0;
|
||||
int endstage = gcascade->count;
|
||||
double factor;
|
||||
for(int i = 0; i < m_loopcount; i++)
|
||||
{
|
||||
@ -1445,105 +1395,15 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
|
||||
|
||||
size_t globalThreads2[3] = {m_nodenum, 1, 1};
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
|
||||
openCLExecuteKernel(Context::getContext(), &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
|
||||
}
|
||||
|
||||
int step = gsum.step / 4;
|
||||
int startnode = 0;
|
||||
int splitstage = 3;
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL));
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->pbuffer, 1, 0, sizeof(cl_int4)*m_loopcount, p, 0, NULL, NULL));
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->correctionbuffer, 1, 0, sizeof(cl_float)*m_loopcount, correction, 0, NULL, NULL));
|
||||
|
||||
vector<pair<size_t, const void *> > args;
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->stagebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->scaleinfobuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->candidatebuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.rows ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.cols ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&step ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_loopcount ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->pbuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->correctionbuffer ));
|
||||
args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_nodenum ));
|
||||
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
candidate = (int *)clEnqueueMapBuffer(qu, ((OclBuffers *)buffers)->candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, NULL);
|
||||
|
||||
for(int i = 0; i < outputsz; i++)
|
||||
{
|
||||
if(candidate[4 * i + 2] != 0)
|
||||
allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1],
|
||||
candidate[4 * i + 2], candidate[4 * i + 3]));
|
||||
}
|
||||
|
||||
free(p);
|
||||
free(correction);
|
||||
clEnqueueUnmapMemObject(qu, ((OclBuffers *)buffers)->candidatebuffer, candidate, 0, 0, 0);
|
||||
}
|
||||
|
||||
rectList.resize(allCandidates.size());
|
||||
if(!allCandidates.empty())
|
||||
std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin());
|
||||
|
||||
if( minNeighbors != 0 || findBiggestObject )
|
||||
groupRectangles(rectList, rweights, std::max(minNeighbors, 1), GROUP_EPS);
|
||||
else
|
||||
rweights.resize(rectList.size(), 0);
|
||||
|
||||
GenResult(faces, rectList, rweights);
|
||||
}
|
||||
|
||||
void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
|
||||
double scaleFactor, int flags,
|
||||
const int outputsz, const size_t localThreads[],
|
||||
CvSize minSize, CvSize maxSize)
|
||||
{
|
||||
CvHaarClassifierCascade *cascade = oldCascade;
|
||||
|
||||
if( !CV_IS_HAAR_CLASSIFIER(cascade) )
|
||||
CV_Error( !cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier cascade" );
|
||||
|
||||
if( scaleFactor <= 1 )
|
||||
CV_Error( CV_StsOutOfRange, "scale factor must be > 1" );
|
||||
|
||||
if( cols < minSize.width || rows < minSize.height )
|
||||
CV_Error(CV_StsError, "Image too small");
|
||||
|
||||
int datasize=0;
|
||||
int totalclassifier=0;
|
||||
|
||||
if( !cascade->hid_cascade )
|
||||
gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier);
|
||||
|
||||
if( maxSize.height == 0 || maxSize.width == 0 )
|
||||
{
|
||||
maxSize.height = rows;
|
||||
maxSize.width = cols;
|
||||
}
|
||||
|
||||
findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
|
||||
if( findBiggestObject )
|
||||
flags &= ~(CV_HAAR_SCALE_IMAGE | CV_HAAR_DO_CANNY_PRUNING);
|
||||
|
||||
CreateBaseBufs(datasize, totalclassifier, flags, outputsz);
|
||||
CreateFactorRelatedBufs(rows, cols, flags, scaleFactor, localThreads, minSize, maxSize);
|
||||
|
||||
m_scaleFactor = scaleFactor;
|
||||
m_rows = rows;
|
||||
m_cols = cols;
|
||||
m_flags = flags;
|
||||
m_minSize = minSize;
|
||||
m_maxSize = maxSize;
|
||||
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
@ -1642,6 +1502,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
|
||||
CvSize sz;
|
||||
CvSize winSize0 = oldCascade->orig_window_size;
|
||||
detect_piramid_info *scaleinfo;
|
||||
cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
|
||||
if (flags & CV_HAAR_SCALE_IMAGE)
|
||||
{
|
||||
for(factor = 1.f;; factor *= scaleFactor)
|
||||
@ -1743,7 +1604,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
|
||||
((OclBuffers *)buffers)->scaleinfobuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount);
|
||||
}
|
||||
|
||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)cv::ocl::Context::getContext()->oclCommandQueue(), ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0,
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0,
|
||||
sizeof(detect_piramid_info)*loopcount,
|
||||
scaleinfo, 0, NULL, NULL));
|
||||
free(scaleinfo);
|
||||
@ -1755,7 +1616,8 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& f
|
||||
const std::vector<cv::Rect> &rectList,
|
||||
const std::vector<int> &rweights)
|
||||
{
|
||||
CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), cvCreateMemStorage(0) );
|
||||
MemStorage tempStorage(cvCreateMemStorage(0));
|
||||
CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), tempStorage );
|
||||
|
||||
if( findBiggestObject && rectList.size() )
|
||||
{
|
||||
@ -1791,168 +1653,32 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector<cv::Rect>& f
|
||||
|
||||
void cv::ocl::OclCascadeClassifierBuf::release()
|
||||
{
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
|
||||
|
||||
if( (m_flags & CV_HAAR_SCALE_IMAGE) )
|
||||
if(initialized)
|
||||
{
|
||||
cvFree(&oldCascade->hid_cascade);
|
||||
}
|
||||
else
|
||||
{
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
|
||||
}
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer));
|
||||
|
||||
free(buffers);
|
||||
buffers = NULL;
|
||||
if( (m_flags & CV_HAAR_SCALE_IMAGE) )
|
||||
{
|
||||
cvFree(&oldCascade->hid_cascade);
|
||||
}
|
||||
else
|
||||
{
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer));
|
||||
openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer));
|
||||
}
|
||||
|
||||
free(buffers);
|
||||
buffers = NULL;
|
||||
initialized = false;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef _MAX_PATH
|
||||
#define _MAX_PATH 1024
|
||||
#endif
|
||||
|
||||
|
||||
/****************************************************************************************\
|
||||
* Persistence functions *
|
||||
\****************************************************************************************/
|
||||
|
||||
/* field names */
|
||||
|
||||
#define ICV_HAAR_SIZE_NAME "size"
|
||||
#define ICV_HAAR_STAGES_NAME "stages"
|
||||
#define ICV_HAAR_TREES_NAME "trees"
|
||||
#define ICV_HAAR_FEATURE_NAME "feature"
|
||||
#define ICV_HAAR_RECTS_NAME "rects"
|
||||
#define ICV_HAAR_TILTED_NAME "tilted"
|
||||
#define ICV_HAAR_THRESHOLD_NAME "threshold"
|
||||
#define ICV_HAAR_LEFT_NODE_NAME "left_node"
|
||||
#define ICV_HAAR_LEFT_VAL_NAME "left_val"
|
||||
#define ICV_HAAR_RIGHT_NODE_NAME "right_node"
|
||||
#define ICV_HAAR_RIGHT_VAL_NAME "right_val"
|
||||
#define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold"
|
||||
#define ICV_HAAR_PARENT_NAME "parent"
|
||||
#define ICV_HAAR_NEXT_NAME "next"
|
||||
|
||||
static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
|
||||
struct gpuHaarDetectObjects_ScaleImage_Invoker
|
||||
{
|
||||
gpuHaarDetectObjects_ScaleImage_Invoker( const CvHaarClassifierCascade *_cascade,
|
||||
int _stripSize, double _factor,
|
||||
const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1,
|
||||
Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec )
|
||||
{
|
||||
cascade = _cascade;
|
||||
stripSize = _stripSize;
|
||||
factor = _factor;
|
||||
sum1 = _sum1;
|
||||
sqsum1 = _sqsum1;
|
||||
norm1 = _norm1;
|
||||
mask1 = _mask1;
|
||||
equRect = _equRect;
|
||||
vec = &_vec;
|
||||
}
|
||||
|
||||
void operator()( const BlockedRange &range ) const
|
||||
{
|
||||
Size winSize0 = cascade->orig_window_size;
|
||||
Size winSize(cvRound(winSize0.width * factor), cvRound(winSize0.height * factor));
|
||||
int y1 = range.begin() * stripSize, y2 = std::min(range.end() * stripSize, sum1.rows - 1 - winSize0.height);
|
||||
Size ssz(sum1.cols - 1 - winSize0.width, y2 - y1);
|
||||
int x, y, ystep = factor > 2 ? 1 : 2;
|
||||
|
||||
for( y = y1; y < y2; y += ystep )
|
||||
for( x = 0; x < ssz.width; x += ystep )
|
||||
{
|
||||
if( gpuRunHaarClassifierCascade( /*cascade, cvPoint(x, y), 0*/ ) > 0 )
|
||||
vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor),
|
||||
winSize.width, winSize.height));
|
||||
}
|
||||
}
|
||||
|
||||
const CvHaarClassifierCascade *cascade;
|
||||
int stripSize;
|
||||
double factor;
|
||||
Mat sum1, sqsum1, *norm1, *mask1;
|
||||
Rect equRect;
|
||||
ConcurrentRectVector *vec;
|
||||
};
|
||||
|
||||
|
||||
struct gpuHaarDetectObjects_ScaleCascade_Invoker
|
||||
{
|
||||
gpuHaarDetectObjects_ScaleCascade_Invoker( const CvHaarClassifierCascade *_cascade,
|
||||
Size _winsize, const Range &_xrange, double _ystep,
|
||||
size_t _sumstep, const int **_p, const int **_pq,
|
||||
ConcurrentRectVector &_vec )
|
||||
{
|
||||
cascade = _cascade;
|
||||
winsize = _winsize;
|
||||
xrange = _xrange;
|
||||
ystep = _ystep;
|
||||
sumstep = _sumstep;
|
||||
p = _p;
|
||||
pq = _pq;
|
||||
vec = &_vec;
|
||||
}
|
||||
|
||||
void operator()( const BlockedRange &range ) const
|
||||
{
|
||||
int iy, startY = range.begin(), endY = range.end();
|
||||
const int *p0 = p[0], *p1 = p[1], *p2 = p[2], *p3 = p[3];
|
||||
const int *pq0 = pq[0], *pq1 = pq[1], *pq2 = pq[2], *pq3 = pq[3];
|
||||
bool doCannyPruning = p0 != 0;
|
||||
int sstep = (int)(sumstep / sizeof(p0[0]));
|
||||
|
||||
for( iy = startY; iy < endY; iy++ )
|
||||
{
|
||||
int ix, y = cvRound(iy * ystep), ixstep = 1;
|
||||
for( ix = xrange.start; ix < xrange.end; ix += ixstep )
|
||||
{
|
||||
int x = cvRound(ix * ystep); // it should really be ystep, not ixstep
|
||||
|
||||
if( doCannyPruning )
|
||||
{
|
||||
int offset = y * sstep + x;
|
||||
int s = p0[offset] - p1[offset] - p2[offset] + p3[offset];
|
||||
int sq = pq0[offset] - pq1[offset] - pq2[offset] + pq3[offset];
|
||||
if( s < 100 || sq < 20 )
|
||||
{
|
||||
ixstep = 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
int result = gpuRunHaarClassifierCascade(/* cascade, cvPoint(x, y), 0 */);
|
||||
if( result > 0 )
|
||||
vec->push_back(Rect(x, y, winsize.width, winsize.height));
|
||||
ixstep = result != 0 ? 1 : 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const CvHaarClassifierCascade *cascade;
|
||||
double ystep;
|
||||
size_t sumstep;
|
||||
Size winsize;
|
||||
Range xrange;
|
||||
const int **p;
|
||||
const int **pq;
|
||||
ConcurrentRectVector *vec;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -270,7 +270,7 @@ namespace cv
|
||||
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
|
||||
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
|
||||
|
||||
|
||||
float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
if(map1.channels() == 2)
|
||||
{
|
||||
@ -292,7 +292,7 @@ namespace cv
|
||||
args.push_back( std::make_pair(sizeof(cl_int), (void *)&cols));
|
||||
float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
|
||||
|
||||
if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
|
||||
if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
|
||||
{
|
||||
args.push_back( std::make_pair(sizeof(cl_double4), (void *)&borderValue));
|
||||
}
|
||||
@ -326,7 +326,6 @@ namespace cv
|
||||
}
|
||||
else
|
||||
{
|
||||
float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
|
||||
args.push_back( std::make_pair(sizeof(cl_float4), (void *)&borderFloat));
|
||||
}
|
||||
}
|
||||
@ -1210,30 +1209,41 @@ namespace cv
|
||||
void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
|
||||
double k, int borderType)
|
||||
{
|
||||
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
||||
{
|
||||
CV_Error(Error::GpuNotSupported, "select device don't support double");
|
||||
}
|
||||
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
|
||||
oclMat Dx, Dy;
|
||||
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
|
||||
extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
|
||||
dst.create(src.size(), CV_32F);
|
||||
corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), Dx, Dy, dst, borderType);
|
||||
oclMat dx, dy;
|
||||
cornerHarris_dxdy(src, dst, dx, dy, blockSize, ksize, k, borderType);
|
||||
}
|
||||
|
||||
void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
|
||||
void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize,
|
||||
double k, int borderType)
|
||||
{
|
||||
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
||||
{
|
||||
CV_Error(Error::GpuNotSupported, "select device don't support double");
|
||||
}
|
||||
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
|
||||
oclMat Dx, Dy;
|
||||
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
|
||||
extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
|
||||
extractCovData(src, dx, dy, blockSize, ksize, borderType);
|
||||
dst.create(src.size(), CV_32F);
|
||||
corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, Dx, Dy, dst, borderType);
|
||||
corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast<float>(k), dx, dy, dst, borderType);
|
||||
}
|
||||
|
||||
void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
|
||||
{
|
||||
oclMat dx, dy;
|
||||
cornerMinEigenVal_dxdy(src, dst, dx, dy, blockSize, ksize, borderType);
|
||||
}
|
||||
|
||||
void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType)
|
||||
{
|
||||
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
||||
{
|
||||
CV_Error(Error::GpuNotSupported, "select device don't support double");
|
||||
}
|
||||
CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
|
||||
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
|
||||
extractCovData(src, dx, dy, blockSize, ksize, borderType);
|
||||
dst.create(src.size(), CV_32F);
|
||||
corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType);
|
||||
}
|
||||
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
||||
static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps)
|
||||
|
@ -43,9 +43,28 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402
|
||||
#define GCC_DIAG_STR(s) #s
|
||||
#define GCC_DIAG_JOINSTR(x,y) GCC_DIAG_STR(x ## y)
|
||||
# define GCC_DIAG_DO_PRAGMA(x) _Pragma (#x)
|
||||
# define GCC_DIAG_PRAGMA(x) GCC_DIAG_DO_PRAGMA(GCC diagnostic x)
|
||||
# if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406
|
||||
# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(push) \
|
||||
GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x))
|
||||
# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(pop)
|
||||
# else
|
||||
# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x))
|
||||
# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(warning GCC_DIAG_JOINSTR(-W,x))
|
||||
# endif
|
||||
#else
|
||||
# define GCC_DIAG_OFF(x)
|
||||
# define GCC_DIAG_ON(x)
|
||||
#endif
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace cv
|
||||
@ -121,6 +140,9 @@ namespace cv
|
||||
build_options, finish_mode);
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
GCC_DIAG_OFF(deprecated-declarations)
|
||||
#endif
|
||||
cl_mem bindTexture(const oclMat &mat)
|
||||
{
|
||||
cl_mem texture;
|
||||
@ -156,7 +178,7 @@ namespace cv
|
||||
format.image_channel_order = CL_RGBA;
|
||||
break;
|
||||
default:
|
||||
CV_Error(-1, "Image forma is not supported");
|
||||
CV_Error(-1, "Image format is not supported");
|
||||
break;
|
||||
}
|
||||
#ifdef CL_VERSION_1_2
|
||||
@ -180,10 +202,6 @@ namespace cv
|
||||
else
|
||||
#endif
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
texture = clCreateImage2D(
|
||||
(cl_context)mat.clCxt->oclContext(),
|
||||
CL_MEM_READ_WRITE,
|
||||
@ -193,9 +211,6 @@ namespace cv
|
||||
0,
|
||||
NULL,
|
||||
&err);
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
}
|
||||
size_t origin[] = { 0, 0, 0 };
|
||||
size_t region[] = { mat.cols, mat.rows, 1 };
|
||||
@ -225,6 +240,14 @@ namespace cv
|
||||
openCLSafeCall(err);
|
||||
return texture;
|
||||
}
|
||||
#ifdef __GNUC__
|
||||
GCC_DIAG_ON(deprecated-declarations)
|
||||
#endif
|
||||
|
||||
Ptr<TextureCL> bindTexturePtr(const oclMat &mat)
|
||||
{
|
||||
return Ptr<TextureCL>(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type()));
|
||||
}
|
||||
void releaseTexture(cl_mem& texture)
|
||||
{
|
||||
openCLFree(texture);
|
||||
|
@ -127,7 +127,7 @@ __kernel void arithm_add_D2 (__global ushort *src1, int src1_step, int src1_offs
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
#define dst_align ((dst_offset / 2) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
|
||||
@ -165,7 +165,7 @@ __kernel void arithm_add_D3 (__global short *src1, int src1_step, int src1_offse
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 3)
|
||||
#define dst_align ((dst_offset / 2) & 3)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
|
||||
@ -335,7 +335,7 @@ __kernel void arithm_add_with_mask_C1_D2 (__global ushort *src1, int src1_step,
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
#define dst_align ((dst_offset / 2) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -375,7 +375,7 @@ __kernel void arithm_add_with_mask_C1_D3 (__global short *src1, int src1_step, i
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
#define dst_align ((dst_offset / 2) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
@ -507,7 +507,7 @@ __kernel void arithm_add_with_mask_C2_D0 (__global uchar *src1, int src1_step, i
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
#define dst_align ((dst_offset / 2) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
|
@ -126,7 +126,7 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global ushort *src1, int src1_st
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
#define dst_align ((dst_offset / 2) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
|
||||
@ -164,7 +164,7 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global short *src1, int src1_ste
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
#define dst_align ((dst_offset / 2) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
|
||||
@ -288,7 +288,7 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global uchar *src1, int src1_ste
|
||||
#ifdef dst_align
|
||||
#undef dst_align
|
||||
#endif
|
||||
#define dst_align ((dst_offset >> 1) & 1)
|
||||
#define dst_align ((dst_offset / 2) & 1)
|
||||
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
|
||||
int mask_index = mad24(y, mask_step, x + mask_offset - dst_align);
|
||||
|
||||
|
@ -120,7 +120,7 @@ __kernel void morph_C1_D0(__global const uchar * restrict src,
|
||||
int gidy = get_global_id(1);
|
||||
int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel);
|
||||
|
||||
if(gidx+3<cols && gidy<rows && (dst_offset_in_pixel&3)==0)
|
||||
if(gidx+3<cols && gidy<rows && ((dst_offset_in_pixel&3)==0))
|
||||
{
|
||||
*(__global uchar4*)&dst[out_addr] = res;
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
// Wang Weiyan, wangweiyanster@gmail.com
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Nathan, liujun@multicorewareinc.com
|
||||
// Peng Xiao, pengxiao@outlook.com
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
@ -45,27 +46,16 @@
|
||||
typedef int sumtype;
|
||||
typedef float sqsumtype;
|
||||
|
||||
typedef struct __attribute__((aligned (128))) GpuHidHaarFeature
|
||||
{
|
||||
struct __attribute__((aligned (32)))
|
||||
{
|
||||
int p0 __attribute__((aligned (4)));
|
||||
int p1 __attribute__((aligned (4)));
|
||||
int p2 __attribute__((aligned (4)));
|
||||
int p3 __attribute__((aligned (4)));
|
||||
float weight __attribute__((aligned (4)));
|
||||
}
|
||||
rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned (32)));
|
||||
}
|
||||
GpuHidHaarFeature;
|
||||
|
||||
#ifndef STUMP_BASED
|
||||
#define STUMP_BASED 1
|
||||
#endif
|
||||
|
||||
typedef struct __attribute__((aligned (128) )) GpuHidHaarTreeNode
|
||||
{
|
||||
int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned (64)));
|
||||
float weight[CV_HAAR_FEATURE_MAX] /*__attribute__((aligned (16)))*/;
|
||||
float threshold /*__attribute__((aligned (4)))*/;
|
||||
float alpha[2] __attribute__((aligned (8)));
|
||||
float weight[CV_HAAR_FEATURE_MAX];
|
||||
float threshold;
|
||||
float alpha[3] __attribute__((aligned (16)));
|
||||
int left __attribute__((aligned (4)));
|
||||
int right __attribute__((aligned (4)));
|
||||
}
|
||||
@ -111,7 +101,6 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
|
||||
float inv_window_area __attribute__((aligned (4)));
|
||||
} GpuHidHaarClassifierCascade;
|
||||
|
||||
|
||||
__kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCascade(
|
||||
global GpuHidHaarStageClassifier * stagecascadeptr,
|
||||
global int4 * info,
|
||||
@ -234,7 +223,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
float stage_sum = 0.f;
|
||||
int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop);
|
||||
float stagethreshold = as_float(stageinfo.y);
|
||||
for(int nodeloop = 0; nodeloop < stageinfo.x; nodeloop++ )
|
||||
for(int nodeloop = 0; nodeloop < stageinfo.x; )
|
||||
{
|
||||
__global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter);
|
||||
|
||||
@ -242,7 +231,8 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
|
||||
int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
|
||||
float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
|
||||
float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
|
||||
float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0]));
|
||||
|
||||
float nodethreshold = w.w * variance_norm_factor;
|
||||
|
||||
info1.x +=lcl_off;
|
||||
@ -261,8 +251,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
|
||||
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
|
||||
|
||||
stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
|
||||
bool passThres = classsum >= nodethreshold;
|
||||
#if STUMP_BASED
|
||||
stage_sum += passThres ? alpha3.y : alpha3.x;
|
||||
nodecounter++;
|
||||
nodeloop++;
|
||||
#else
|
||||
bool isRootNode = (nodecounter & 1) == 0;
|
||||
if(isRootNode)
|
||||
{
|
||||
if( (passThres && currentnodeptr->right) ||
|
||||
(!passThres && currentnodeptr->left))
|
||||
{
|
||||
nodecounter ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
stage_sum += alpha3.x;
|
||||
nodecounter += 2;
|
||||
nodeloop ++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
stage_sum += passThres ? alpha3.z : alpha3.y;
|
||||
nodecounter ++;
|
||||
nodeloop ++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
result = (stage_sum >= stagethreshold);
|
||||
@ -301,18 +317,20 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
|
||||
if(lcl_compute_win_id < queuecount)
|
||||
{
|
||||
|
||||
int tempnodecounter = lcl_compute_id;
|
||||
float part_sum = 0.f;
|
||||
for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x; lcl_loop++)
|
||||
const int stump_factor = STUMP_BASED ? 1 : 2;
|
||||
int root_offset = 0;
|
||||
for(int lcl_loop=0; lcl_loop<lcl_loops && tempnodecounter<stageinfo.x;)
|
||||
{
|
||||
__global GpuHidHaarTreeNode* currentnodeptr = (nodeptr + nodecounter + tempnodecounter);
|
||||
__global GpuHidHaarTreeNode* currentnodeptr =
|
||||
nodeptr + (nodecounter + tempnodecounter) * stump_factor + root_offset;
|
||||
|
||||
int4 info1 = *(__global int4*)(&(currentnodeptr->p[0][0]));
|
||||
int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0]));
|
||||
int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0]));
|
||||
float4 w = *(__global float4*)(&(currentnodeptr->weight[0]));
|
||||
float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0]));
|
||||
float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0]));
|
||||
float nodethreshold = w.w * variance_norm_factor;
|
||||
|
||||
info1.x +=queue_pixel;
|
||||
@ -332,8 +350,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] -
|
||||
lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z;
|
||||
|
||||
part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
|
||||
tempnodecounter +=lcl_compute_win;
|
||||
bool passThres = classsum >= nodethreshold;
|
||||
#if STUMP_BASED
|
||||
part_sum += passThres ? alpha3.y : alpha3.x;
|
||||
tempnodecounter += lcl_compute_win;
|
||||
lcl_loop++;
|
||||
#else
|
||||
if(root_offset == 0)
|
||||
{
|
||||
if( (passThres && currentnodeptr->right) ||
|
||||
(!passThres && currentnodeptr->left))
|
||||
{
|
||||
root_offset = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
part_sum += alpha3.x;
|
||||
tempnodecounter += lcl_compute_win;
|
||||
lcl_loop++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
part_sum += passThres ? alpha3.z : alpha3.y;
|
||||
tempnodecounter += lcl_compute_win;
|
||||
lcl_loop++;
|
||||
root_offset = 0;
|
||||
}
|
||||
#endif
|
||||
}//end for(int lcl_loop=0;lcl_loop<lcl_loops;lcl_loop++)
|
||||
partialsum[lcl_id]=part_sum;
|
||||
}
|
||||
@ -377,155 +421,3 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
|
||||
}//end for(int grploop=grpidx;grploop<totalgrp;grploop+=grpnumx)
|
||||
}//end for(int scalei = 0; scalei <loopcount; scalei++)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
if(stagecascade->two_rects)
|
||||
{
|
||||
#pragma unroll
|
||||
for( n = 0; n < stagecascade->count; n++ )
|
||||
{
|
||||
t1 = *(node + counter);
|
||||
t = t1.threshold * variance_norm_factor;
|
||||
classsum = calc_sum1(t1,p_offset,0) * t1.weight[0];
|
||||
|
||||
classsum += calc_sum1(t1, p_offset,1) * t1.weight[1];
|
||||
stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0];
|
||||
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma unroll
|
||||
for( n = 0; n < stagecascade->count; n++ )
|
||||
{
|
||||
t = node[counter].threshold*variance_norm_factor;
|
||||
classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0];
|
||||
classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1];
|
||||
|
||||
if( node[counter].p0[2] )
|
||||
classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2];
|
||||
|
||||
stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify
|
||||
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
*/
|
||||
/*
|
||||
__kernel void gpuRunHaarClassifierCascade_ScaleWindow(
|
||||
constant GpuHidHaarClassifierCascade * _cascade,
|
||||
global GpuHidHaarStageClassifier * stagecascadeptr,
|
||||
//global GpuHidHaarClassifier * classifierptr,
|
||||
global GpuHidHaarTreeNode * nodeptr,
|
||||
global int * sum,
|
||||
global float * sqsum,
|
||||
global int * _candidate,
|
||||
int pixel_step,
|
||||
int cols,
|
||||
int rows,
|
||||
int start_stage,
|
||||
int end_stage,
|
||||
//int counts,
|
||||
int nodenum,
|
||||
int ystep,
|
||||
int detect_width,
|
||||
//int detect_height,
|
||||
int loopcount,
|
||||
int outputstep)
|
||||
//float scalefactor)
|
||||
{
|
||||
unsigned int x1 = get_global_id(0);
|
||||
unsigned int y1 = get_global_id(1);
|
||||
int p_offset;
|
||||
int m, n;
|
||||
int result;
|
||||
int counter;
|
||||
float mean, variance_norm_factor;
|
||||
for(int i=0;i<loopcount;i++)
|
||||
{
|
||||
constant GpuHidHaarClassifierCascade * cascade = _cascade + i;
|
||||
global int * candidate = _candidate + i*outputstep;
|
||||
int window_width = cascade->p1 - cascade->p0;
|
||||
int window_height = window_width;
|
||||
result = 1;
|
||||
counter = 0;
|
||||
unsigned int x = mul24(x1,ystep);
|
||||
unsigned int y = mul24(y1,ystep);
|
||||
if((x < cols - window_width - 1) && (y < rows - window_height -1))
|
||||
{
|
||||
global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage;
|
||||
//global GpuHidHaarClassifier *classifier = classifierptr;
|
||||
global GpuHidHaarTreeNode *node = nodeptr + nodenum*i;
|
||||
|
||||
p_offset = mad24(y, pixel_step, x);// modify
|
||||
|
||||
mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) -
|
||||
*(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3))
|
||||
*cascade->inv_window_area;
|
||||
|
||||
variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) -
|
||||
*(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset);
|
||||
variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean;
|
||||
variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify
|
||||
|
||||
// if( cascade->is_stump_based )
|
||||
//{
|
||||
for( m = start_stage; m < end_stage; m++ )
|
||||
{
|
||||
float stage_sum = 0.f;
|
||||
float t, classsum;
|
||||
GpuHidHaarTreeNode t1;
|
||||
|
||||
//#pragma unroll
|
||||
for( n = 0; n < stagecascade->count; n++ )
|
||||
{
|
||||
t1 = *(node + counter);
|
||||
t = t1.threshold * variance_norm_factor;
|
||||
classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1];
|
||||
|
||||
if((t1.p0[2]) && (!stagecascade->two_rects))
|
||||
classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2];
|
||||
|
||||
stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify
|
||||
counter++;
|
||||
}
|
||||
|
||||
if (stage_sum < stagecascade->threshold)
|
||||
{
|
||||
result = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
stagecascade++;
|
||||
|
||||
}
|
||||
if(result)
|
||||
{
|
||||
candidate[4 * (y1 * detect_width + x1)] = x;
|
||||
candidate[4 * (y1 * detect_width + x1) + 1] = y;
|
||||
candidate[4 * (y1 * detect_width + x1)+2] = window_width;
|
||||
candidate[4 * (y1 * detect_width + x1) + 3] = window_height;
|
||||
}
|
||||
//}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
@ -17,7 +17,7 @@
|
||||
// @Authors
|
||||
// Wu Xinglong, wxl370@126.com
|
||||
// Sen Liu, swjtuls1987@126.com
|
||||
//
|
||||
// Peng Xiao, pengxiao@outlook.com
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
@ -49,25 +49,13 @@
|
||||
#define CV_HAAR_FEATURE_MAX 3
|
||||
typedef int sumtype;
|
||||
typedef float sqsumtype;
|
||||
typedef struct __attribute__((aligned(128))) GpuHidHaarFeature
|
||||
{
|
||||
struct __attribute__((aligned(32)))
|
||||
{
|
||||
int p0 __attribute__((aligned(4)));
|
||||
int p1 __attribute__((aligned(4)));
|
||||
int p2 __attribute__((aligned(4)));
|
||||
int p3 __attribute__((aligned(4)));
|
||||
float weight __attribute__((aligned(4)));
|
||||
}
|
||||
rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned(32)));
|
||||
}
|
||||
GpuHidHaarFeature;
|
||||
|
||||
typedef struct __attribute__((aligned(128))) GpuHidHaarTreeNode
|
||||
{
|
||||
int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned(64)));
|
||||
float weight[CV_HAAR_FEATURE_MAX] /*__attribute__((aligned (16)))*/;
|
||||
float threshold /*__attribute__((aligned (4)))*/;
|
||||
float alpha[2] __attribute__((aligned(8)));
|
||||
float alpha[3] __attribute__((aligned(16)));
|
||||
int left __attribute__((aligned(4)));
|
||||
int right __attribute__((aligned(4)));
|
||||
}
|
||||
@ -174,45 +162,83 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
|
||||
const int p_offset = mad24(y, step, x);
|
||||
cascadeinfo.x += p_offset;
|
||||
cascadeinfo.z += p_offset;
|
||||
mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
|
||||
sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)])
|
||||
mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)]
|
||||
- sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
|
||||
sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)]
|
||||
+ sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)])
|
||||
* correction_t;
|
||||
variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
|
||||
sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)];
|
||||
variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)]
|
||||
- sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] -
|
||||
sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)]
|
||||
+ sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)];
|
||||
variance_norm_factor = variance_norm_factor * correction_t - mean * mean;
|
||||
variance_norm_factor = variance_norm_factor >= 0.f ? sqrt(variance_norm_factor) : 1.f;
|
||||
bool result = true;
|
||||
nodecounter = startnode + nodecount * scalei;
|
||||
|
||||
for (int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++)
|
||||
{
|
||||
float stage_sum = 0.f;
|
||||
int stagecount = stagecascadeptr[stageloop].count;
|
||||
for (int nodeloop = 0; nodeloop < stagecount; nodeloop++)
|
||||
for (int nodeloop = 0; nodeloop < stagecount;)
|
||||
{
|
||||
__global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter);
|
||||
int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0]));
|
||||
int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0]));
|
||||
int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0]));
|
||||
float4 w = *(__global float4 *)(&(currentnodeptr->weight[0]));
|
||||
float2 alpha2 = *(__global float2 *)(&(currentnodeptr->alpha[0]));
|
||||
float3 alpha3 = *(__global float3 *)(&(currentnodeptr->alpha[0]));
|
||||
float nodethreshold = w.w * variance_norm_factor;
|
||||
|
||||
info1.x += p_offset;
|
||||
info1.z += p_offset;
|
||||
info2.x += p_offset;
|
||||
info2.z += p_offset;
|
||||
float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)] - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] -
|
||||
sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)] + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x;
|
||||
classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)] - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] -
|
||||
sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)] + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y;
|
||||
info3.x += p_offset;
|
||||
info3.z += p_offset;
|
||||
classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)] - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
|
||||
sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)] + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
|
||||
stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x;
|
||||
float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)]
|
||||
- sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] -
|
||||
sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)]
|
||||
+ sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x;
|
||||
classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)]
|
||||
- sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] -
|
||||
sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)]
|
||||
+ sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y;
|
||||
classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)]
|
||||
- sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
|
||||
sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)]
|
||||
+ sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
|
||||
|
||||
bool passThres = classsum >= nodethreshold;
|
||||
|
||||
#if STUMP_BASED
|
||||
stage_sum += passThres ? alpha3.y : alpha3.x;
|
||||
nodecounter++;
|
||||
nodeloop++;
|
||||
#else
|
||||
bool isRootNode = (nodecounter & 1) == 0;
|
||||
if(isRootNode)
|
||||
{
|
||||
if( (passThres && currentnodeptr->right) ||
|
||||
(!passThres && currentnodeptr->left))
|
||||
{
|
||||
nodecounter ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
stage_sum += alpha3.x;
|
||||
nodecounter += 2;
|
||||
nodeloop ++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
stage_sum += (passThres ? alpha3.z : alpha3.y);
|
||||
nodecounter ++;
|
||||
nodeloop ++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
result = (bool)(stage_sum >= stagecascadeptr[stageloop].threshold);
|
||||
result = (int)(stage_sum >= stagecascadeptr[stageloop].threshold);
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@ -222,7 +248,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
|
||||
int queueindex = atomic_inc(lclcount);
|
||||
lcloutindex[queueindex] = (y << 16) | x;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
int queuecount = lclcount[0];
|
||||
|
||||
@ -277,5 +302,6 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
|
||||
newnode[counter].threshold = t1.threshold;
|
||||
newnode[counter].alpha[0] = t1.alpha[0];
|
||||
newnode[counter].alpha[1] = t1.alpha[1];
|
||||
newnode[counter].alpha[2] = t1.alpha[2];
|
||||
}
|
||||
|
||||
|
@ -297,6 +297,9 @@ calcMap
|
||||
map_step /= sizeof(*map);
|
||||
map_offset /= sizeof(*map);
|
||||
|
||||
mag += mag_offset;
|
||||
map += map_offset;
|
||||
|
||||
__local float smem[18][18];
|
||||
|
||||
int gidx = get_global_id(0);
|
||||
@ -389,7 +392,7 @@ edgesHysteresisLocal
|
||||
(
|
||||
__global int * map,
|
||||
__global ushort2 * st,
|
||||
volatile __global unsigned int * counter,
|
||||
__global unsigned int * counter,
|
||||
int rows,
|
||||
int cols,
|
||||
int map_step,
|
||||
@ -399,6 +402,8 @@ edgesHysteresisLocal
|
||||
map_step /= sizeof(*map);
|
||||
map_offset /= sizeof(*map);
|
||||
|
||||
map += map_offset;
|
||||
|
||||
__local int smem[18][18];
|
||||
|
||||
int gidx = get_global_id(0);
|
||||
@ -416,12 +421,12 @@ edgesHysteresisLocal
|
||||
if(ly < 14)
|
||||
{
|
||||
smem[ly][lx] =
|
||||
map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step + map_offset];
|
||||
map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step];
|
||||
}
|
||||
if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols)
|
||||
{
|
||||
smem[ly + 14][lx] =
|
||||
map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step + map_offset];
|
||||
map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step];
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
@ -482,14 +487,17 @@ edgesHysteresisLocal
|
||||
__constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
|
||||
__constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
|
||||
|
||||
|
||||
#define stack_size 512
|
||||
__kernel
|
||||
void edgesHysteresisGlobal
|
||||
void
|
||||
__attribute__((reqd_work_group_size(128,1,1)))
|
||||
edgesHysteresisGlobal
|
||||
(
|
||||
__global int * map,
|
||||
__global ushort2 * st1,
|
||||
__global ushort2 * st2,
|
||||
volatile __global int * counter,
|
||||
__global int * counter,
|
||||
int rows,
|
||||
int cols,
|
||||
int count,
|
||||
@ -501,6 +509,8 @@ void edgesHysteresisGlobal
|
||||
map_step /= sizeof(*map);
|
||||
map_offset /= sizeof(*map);
|
||||
|
||||
map += map_offset;
|
||||
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
|
||||
@ -510,7 +520,7 @@ void edgesHysteresisGlobal
|
||||
int grp_idx = get_group_id(0);
|
||||
int grp_idy = get_group_id(1);
|
||||
|
||||
volatile __local unsigned int s_counter;
|
||||
__local unsigned int s_counter;
|
||||
__local unsigned int s_ind;
|
||||
|
||||
__local ushort2 s_st[stack_size];
|
||||
@ -564,9 +574,9 @@ void edgesHysteresisGlobal
|
||||
pos.x += c_dx[lidx & 7];
|
||||
pos.y += c_dy[lidx & 7];
|
||||
|
||||
if (map[pos.x + map_offset + pos.y * map_step] == 1)
|
||||
if (map[pos.x + pos.y * map_step] == 1)
|
||||
{
|
||||
map[pos.x + map_offset + pos.y * map_step] = 2;
|
||||
map[pos.x + pos.y * map_step] = 2;
|
||||
|
||||
ind = atomic_inc(&s_counter);
|
||||
|
||||
@ -621,6 +631,6 @@ void getEdges
|
||||
|
||||
if(gidy < rows && gidx < cols)
|
||||
{
|
||||
dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] >> 1));
|
||||
dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step + map_offset] >> 1));
|
||||
}
|
||||
}
|
||||
|
276
modules/ocl/src/opencl/imgproc_gfft.cl
Normal file
276
modules/ocl/src/opencl/imgproc_gfft.cl
Normal file
@ -0,0 +1,276 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@outlook.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef WITH_MASK
|
||||
#define WITH_MASK 0
|
||||
#endif
|
||||
|
||||
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
|
||||
|
||||
inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
|
||||
{
|
||||
return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
|
||||
}
|
||||
|
||||
inline float ELEM_FLT2(image2d_t _eig, float2 pt)
|
||||
{
|
||||
return read_imagef(_eig, sampler, pt).x;
|
||||
}
|
||||
|
||||
__kernel
|
||||
void findCorners
|
||||
(
|
||||
image2d_t eig,
|
||||
__global const char * mask,
|
||||
__global float2 * corners,
|
||||
const int mask_strip,// in pixels
|
||||
const float threshold,
|
||||
const int rows,
|
||||
const int cols,
|
||||
const int max_count,
|
||||
__global int * g_counter
|
||||
)
|
||||
{
|
||||
const int j = get_global_id(0);
|
||||
const int i = get_global_id(1);
|
||||
|
||||
if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1
|
||||
#if WITH_MASK
|
||||
&& mask[i * mask_strip + j] != 0
|
||||
#endif
|
||||
)
|
||||
{
|
||||
const float val = ELEM_INT2(eig, j, i);
|
||||
|
||||
if (val > threshold)
|
||||
{
|
||||
float maxVal = val;
|
||||
|
||||
maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal);
|
||||
maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal);
|
||||
maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal);
|
||||
|
||||
maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal);
|
||||
maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal);
|
||||
|
||||
maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal);
|
||||
maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal);
|
||||
maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal);
|
||||
|
||||
if (val == maxVal)
|
||||
{
|
||||
const int ind = atomic_inc(g_counter);
|
||||
|
||||
if (ind < max_count)
|
||||
corners[ind] = (float2)(j, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//bitonic sort
|
||||
__kernel
|
||||
void sortCorners_bitonicSort
|
||||
(
|
||||
image2d_t eig,
|
||||
__global float2 * corners,
|
||||
const int count,
|
||||
const int stage,
|
||||
const int passOfStage
|
||||
)
|
||||
{
|
||||
const int threadId = get_global_id(0);
|
||||
if(threadId >= count / 2)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent
|
||||
|
||||
const int pairDistance = 1 << (stage - passOfStage);
|
||||
const int blockWidth = 2 * pairDistance;
|
||||
|
||||
const int leftId = min( (threadId % pairDistance)
|
||||
+ (threadId / pairDistance) * blockWidth, count );
|
||||
|
||||
const int rightId = min( leftId + pairDistance, count );
|
||||
|
||||
const float2 leftPt = corners[leftId];
|
||||
const float2 rightPt = corners[rightId];
|
||||
|
||||
const float leftVal = ELEM_FLT2(eig, leftPt);
|
||||
const float rightVal = ELEM_FLT2(eig, rightPt);
|
||||
|
||||
const bool compareResult = leftVal > rightVal;
|
||||
|
||||
float2 greater = compareResult ? leftPt:rightPt;
|
||||
float2 lesser = compareResult ? rightPt:leftPt;
|
||||
|
||||
corners[leftId] = sortOrder ? lesser : greater;
|
||||
corners[rightId] = sortOrder ? greater : lesser;
|
||||
}
|
||||
|
||||
//selection sort for gfft
|
||||
//kernel is ported from Bolt library:
|
||||
//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
|
||||
// Local sort will firstly sort elements of each workgroup using selection sort
|
||||
// its performance is O(n)
|
||||
__kernel
|
||||
void sortCorners_selectionSortLocal
|
||||
(
|
||||
image2d_t eig,
|
||||
__global float2 * corners,
|
||||
const int count,
|
||||
__local float2 * scratch
|
||||
)
|
||||
{
|
||||
int i = get_local_id(0); // index in workgroup
|
||||
int numOfGroups = get_num_groups(0); // index in workgroup
|
||||
int groupID = get_group_id(0);
|
||||
int wg = get_local_size(0); // workgroup size = block size
|
||||
int n; // number of elements to be processed for this work group
|
||||
|
||||
int offset = groupID * wg;
|
||||
int same = 0;
|
||||
corners += offset;
|
||||
n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
|
||||
float2 pt1, pt2;
|
||||
|
||||
pt1 = corners[min(i, n)];
|
||||
scratch[i] = pt1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(i >= n)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
float val1 = ELEM_FLT2(eig, pt1);
|
||||
float val2;
|
||||
|
||||
int pos = 0;
|
||||
for (int j=0;j<n;++j)
|
||||
{
|
||||
pt2 = scratch[j];
|
||||
val2 = ELEM_FLT2(eig, pt2);
|
||||
if(val2 > val1)
|
||||
pos++;//calculate the rank of this element in this work group
|
||||
else
|
||||
{
|
||||
if(val1 > val2)
|
||||
continue;
|
||||
else
|
||||
{
|
||||
// val1 and val2 are same
|
||||
same++;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int j=0; j< same; j++)
|
||||
corners[pos + j] = pt1;
|
||||
}
|
||||
__kernel
|
||||
void sortCorners_selectionSortFinal
|
||||
(
|
||||
image2d_t eig,
|
||||
__global float2 * corners,
|
||||
const int count
|
||||
)
|
||||
{
|
||||
const int i = get_local_id(0); // index in workgroup
|
||||
const int numOfGroups = get_num_groups(0); // index in workgroup
|
||||
const int groupID = get_group_id(0);
|
||||
const int wg = get_local_size(0); // workgroup size = block size
|
||||
int pos = 0, same = 0;
|
||||
const int offset = get_group_id(0) * wg;
|
||||
const int remainder = count - wg*(numOfGroups-1);
|
||||
|
||||
if((offset + i ) >= count)
|
||||
return;
|
||||
float2 pt1, pt2;
|
||||
pt1 = corners[groupID*wg + i];
|
||||
|
||||
float val1 = ELEM_FLT2(eig, pt1);
|
||||
float val2;
|
||||
|
||||
for(int j=0; j<numOfGroups-1; j++ )
|
||||
{
|
||||
for(int k=0; k<wg; k++)
|
||||
{
|
||||
pt2 = corners[j*wg + k];
|
||||
val2 = ELEM_FLT2(eig, pt2);
|
||||
if(val1 > val2)
|
||||
break;
|
||||
else
|
||||
{
|
||||
//Increment only if the value is not the same.
|
||||
if( val2 > val1 )
|
||||
pos++;
|
||||
else
|
||||
same++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(int k=0; k<remainder; k++)
|
||||
{
|
||||
pt2 = corners[(numOfGroups-1)*wg + k];
|
||||
val2 = ELEM_FLT2(eig, pt2);
|
||||
if(val1 > val2)
|
||||
break;
|
||||
else
|
||||
{
|
||||
//Don't increment if the value is the same.
|
||||
//Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false
|
||||
if(val2 > val1)
|
||||
pos++;
|
||||
else
|
||||
same++;
|
||||
}
|
||||
}
|
||||
for (int j=0; j< same; j++)
|
||||
corners[pos + j] = pt1;
|
||||
}
|
||||
|
@ -143,7 +143,7 @@ __kernel void threshold_C1_D5(__global const float * restrict src, __global floa
|
||||
int4 dpos = (int4)(dstart, dstart+1, dstart+2, dstart+3);
|
||||
float4 dVal = *(__global float4*)(dst+dst_offset+gy*dst_step+dstart);
|
||||
int4 con = dpos >= 0 && dpos < dst_cols;
|
||||
ddata = convert_float4(con) != 0 ? ddata : dVal;
|
||||
ddata = convert_float4(con) != (float4)(0) ? ddata : dVal;
|
||||
if(dstart < dst_cols)
|
||||
{
|
||||
*(__global float4*)(dst+dst_offset+gy*dst_step+dstart) = ddata;
|
||||
|
@ -46,145 +46,10 @@
|
||||
|
||||
//#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
|
||||
__kernel void calcSharrDeriv_vertical_C1_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols * cn)
|
||||
{
|
||||
const uchar src_val0 = (src + (y > 0 ? y-1 : rows > 1 ? 1 : 0) * srcStep)[x];
|
||||
const uchar src_val1 = (src + y * srcStep)[x];
|
||||
const uchar src_val2 = (src + (y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0) * srcStep)[x];
|
||||
|
||||
((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10;
|
||||
((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void calcSharrDeriv_vertical_C4_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
if (y < rows && x < cols * cn)
|
||||
{
|
||||
const uchar src_val0 = (src + (y > 0 ? y - 1 : 1) * srcStep)[x];
|
||||
const uchar src_val1 = (src + y * srcStep)[x];
|
||||
const uchar src_val2 = (src + (y < rows - 1 ? y + 1 : rows - 2) * srcStep)[x];
|
||||
|
||||
((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10;
|
||||
((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void calcSharrDeriv_horizontal_C1_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int colsn = cols * cn;
|
||||
|
||||
if (y < rows && x < colsn)
|
||||
{
|
||||
__global const short* dx_buf_row = dx_buf + y * dx_bufStep;
|
||||
__global const short* dy_buf_row = dy_buf + y * dy_bufStep;
|
||||
|
||||
const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn;
|
||||
const int xl = x - cn >= 0 ? x - cn : cn + x;
|
||||
|
||||
((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl];
|
||||
((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void calcSharrDeriv_horizontal_C4_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int colsn = cols * cn;
|
||||
|
||||
if (y < rows && x < colsn)
|
||||
{
|
||||
__global const short* dx_buf_row = dx_buf + y * dx_bufStep;
|
||||
__global const short* dy_buf_row = dy_buf + y * dy_bufStep;
|
||||
|
||||
const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn;
|
||||
const int xl = x - cn >= 0 ? x - cn : cn + x;
|
||||
|
||||
((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl];
|
||||
((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10;
|
||||
}
|
||||
}
|
||||
|
||||
#define W_BITS 14
|
||||
#define W_BITS1 14
|
||||
|
||||
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
|
||||
|
||||
int linearFilter_uchar(__global const uchar* src, int srcStep, int cn, float2 pt, int x, int y)
|
||||
{
|
||||
int2 ipt;
|
||||
ipt.x = convert_int_sat_rtn(pt.x);
|
||||
ipt.y = convert_int_sat_rtn(pt.y);
|
||||
|
||||
float a = pt.x - ipt.x;
|
||||
float b = pt.y - ipt.y;
|
||||
|
||||
int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS));
|
||||
int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS));
|
||||
int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS));
|
||||
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
||||
|
||||
__global const uchar* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn;
|
||||
__global const uchar* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn;
|
||||
|
||||
return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1 - 5);
|
||||
}
|
||||
|
||||
int linearFilter_short(__global const short* src, int srcStep, int cn, float2 pt, int x, int y)
|
||||
{
|
||||
int2 ipt;
|
||||
ipt.x = convert_int_sat_rtn(pt.x);
|
||||
ipt.y = convert_int_sat_rtn(pt.y);
|
||||
|
||||
float a = pt.x - ipt.x;
|
||||
float b = pt.y - ipt.y;
|
||||
|
||||
int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS));
|
||||
int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS));
|
||||
int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS));
|
||||
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
||||
|
||||
__global const short* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn;
|
||||
__global const short* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn;
|
||||
|
||||
return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1);
|
||||
}
|
||||
|
||||
float linearFilter_float(__global const float* src, int srcStep, int cn, float2 pt, float x, float y)
|
||||
{
|
||||
int2 ipt;
|
||||
ipt.x = convert_int_sat_rtn(pt.x);
|
||||
ipt.y = convert_int_sat_rtn(pt.y);
|
||||
|
||||
float a = pt.x - ipt.x;
|
||||
float b = pt.y - ipt.y;
|
||||
|
||||
float iw00 = ((1.0f - a) * (1.0f - b) * (1 << W_BITS));
|
||||
float iw01 = (a * (1.0f - b) * (1 << W_BITS));
|
||||
float iw10 = ((1.0f - a) * b * (1 << W_BITS));
|
||||
float iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
|
||||
|
||||
__global const float* src_row = src + (int)(ipt.y + y) * srcStep / 4 + ipt.x * cn;
|
||||
__global const float* src_row1 = src + (int)(ipt.y + y + 1) * srcStep / 4 + ipt.x * cn;
|
||||
|
||||
return src_row[(int)x] * iw00 + src_row[(int)x + cn] * iw01 + src_row1[(int)x] * iw10 + src_row1[(int)x + cn] * iw11, W_BITS1 - 5;
|
||||
}
|
||||
|
||||
#define BUFFER 64
|
||||
|
||||
#ifndef WAVE_SIZE
|
||||
#define WAVE_SIZE 1
|
||||
#endif
|
||||
#ifdef CPU
|
||||
void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)
|
||||
{
|
||||
@ -193,71 +58,51 @@ void reduce3(float val1, float val2, float val3, __local float* smem1, __local
|
||||
smem3[tid] = val3;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#if BUFFER > 128
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 128];
|
||||
smem2[tid] = val2 += smem2[tid + 128];
|
||||
smem3[tid] = val3 += smem3[tid + 128];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
#if BUFFER > 64
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 64];
|
||||
smem2[tid] = val2 += smem2[tid + 64];
|
||||
smem3[tid] = val3 += smem3[tid + 64];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 32];
|
||||
smem2[tid] = val2 += smem2[tid + 32];
|
||||
smem3[tid] = val3 += smem3[tid + 32];
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
smem2[tid] += smem2[tid + 32];
|
||||
smem3[tid] += smem3[tid + 32];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 16)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 16];
|
||||
smem2[tid] = val2 += smem2[tid + 16];
|
||||
smem3[tid] = val3 += smem3[tid + 16];
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
smem2[tid] += smem2[tid + 16];
|
||||
smem3[tid] += smem3[tid + 16];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 8)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 8];
|
||||
smem2[tid] = val2 += smem2[tid + 8];
|
||||
smem3[tid] = val3 += smem3[tid + 8];
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem2[tid] += smem2[tid + 8];
|
||||
smem3[tid] += smem3[tid + 8];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 4)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 4];
|
||||
smem2[tid] = val2 += smem2[tid + 4];
|
||||
smem3[tid] = val3 += smem3[tid + 4];
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem2[tid] += smem2[tid + 4];
|
||||
smem3[tid] += smem3[tid + 4];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 2)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 2];
|
||||
smem2[tid] = val2 += smem2[tid + 2];
|
||||
smem3[tid] = val3 += smem3[tid + 2];
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
smem2[tid] += smem2[tid + 2];
|
||||
smem3[tid] += smem3[tid + 2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 1)
|
||||
{
|
||||
smem1[BUFFER] = val1 += smem1[tid + 1];
|
||||
smem2[BUFFER] = val2 += smem2[tid + 1];
|
||||
smem3[BUFFER] = val3 += smem3[tid + 1];
|
||||
smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
|
||||
smem2[BUFFER] = smem2[tid] + smem2[tid + 1];
|
||||
smem3[BUFFER] = smem3[tid] + smem3[tid + 1];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
@ -268,63 +113,45 @@ void reduce2(float val1, float val2, volatile __local float* smem1, volatile __l
|
||||
smem2[tid] = val2;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#if BUFFER > 128
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 128]);
|
||||
smem2[tid] = (val2 += smem2[tid + 128]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
#if BUFFER > 64
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 64]);
|
||||
smem2[tid] = (val2 += smem2[tid + 64]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 32]);
|
||||
smem2[tid] = (val2 += smem2[tid + 32]);
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
smem2[tid] += smem2[tid + 32];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 16)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 16]);
|
||||
smem2[tid] = (val2 += smem2[tid + 16]);
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
smem2[tid] += smem2[tid + 16];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 8)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 8]);
|
||||
smem2[tid] = (val2 += smem2[tid + 8]);
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem2[tid] += smem2[tid + 8];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 4)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 4]);
|
||||
smem2[tid] = (val2 += smem2[tid + 4]);
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem2[tid] += smem2[tid + 4];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 2)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 2]);
|
||||
smem2[tid] = (val2 += smem2[tid + 2]);
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
smem2[tid] += smem2[tid + 2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 1)
|
||||
{
|
||||
smem1[BUFFER] = (val1 += smem1[tid + 1]);
|
||||
smem2[BUFFER] = (val2 += smem2[tid + 1]);
|
||||
smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
|
||||
smem2[BUFFER] = smem2[tid] + smem2[tid + 1];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
@ -334,205 +161,146 @@ void reduce1(float val1, volatile __local float* smem1, int tid)
|
||||
smem1[tid] = val1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#if BUFFER > 128
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 128]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
#if BUFFER > 64
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 64]);
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 32]);
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 16)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 16]);
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 8)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 8]);
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 4)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 4]);
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 2)
|
||||
{
|
||||
smem1[tid] = (val1 += smem1[tid + 2]);
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 1)
|
||||
{
|
||||
smem1[BUFFER] = (val1 += smem1[tid + 1]);
|
||||
smem1[BUFFER] = smem1[tid] + smem1[tid + 1];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
#else
|
||||
void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid)
|
||||
void reduce3(float val1, float val2, float val3,
|
||||
__local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
smem2[tid] = val2;
|
||||
smem3[tid] = val3;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#if BUFFER > 128
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 128];
|
||||
smem2[tid] = val2 += smem2[tid + 128];
|
||||
smem3[tid] = val3 += smem3[tid + 128];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
#if BUFFER > 64
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 64];
|
||||
smem2[tid] = val2 += smem2[tid + 64];
|
||||
smem3[tid] = val3 += smem3[tid + 64];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
volatile __local float* vmem1 = smem1;
|
||||
volatile __local float* vmem2 = smem2;
|
||||
volatile __local float* vmem3 = smem3;
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
smem2[tid] += smem2[tid + 32];
|
||||
smem3[tid] += smem3[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16) {
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
smem2[tid] += smem2[tid + 16];
|
||||
smem3[tid] += smem3[tid + 16];
|
||||
#if WAVE_SIZE <16
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 8) {
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem2[tid] += smem2[tid + 8];
|
||||
smem3[tid] += smem3[tid + 8];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 32];
|
||||
vmem2[tid] = val2 += vmem2[tid + 32];
|
||||
vmem3[tid] = val3 += vmem3[tid + 32];
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem2[tid] += smem2[tid + 4];
|
||||
smem3[tid] += smem3[tid + 4];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 16];
|
||||
vmem2[tid] = val2 += vmem2[tid + 16];
|
||||
vmem3[tid] = val3 += vmem3[tid + 16];
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
smem2[tid] += smem2[tid + 2];
|
||||
smem3[tid] += smem3[tid + 2];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 8];
|
||||
vmem2[tid] = val2 += vmem2[tid + 8];
|
||||
vmem3[tid] = val3 += vmem3[tid + 8];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 4];
|
||||
vmem2[tid] = val2 += vmem2[tid + 4];
|
||||
vmem3[tid] = val3 += vmem3[tid + 4];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 2];
|
||||
vmem2[tid] = val2 += vmem2[tid + 2];
|
||||
vmem3[tid] = val3 += vmem3[tid + 2];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 1];
|
||||
vmem2[tid] = val2 += vmem2[tid + 1];
|
||||
vmem3[tid] = val3 += vmem3[tid + 1];
|
||||
smem1[tid] += smem1[tid + 1];
|
||||
smem2[tid] += smem2[tid + 1];
|
||||
smem3[tid] += smem3[tid + 1];
|
||||
}
|
||||
}
|
||||
|
||||
void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid)
|
||||
void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
smem2[tid] = val2;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#if BUFFER > 128
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 128];
|
||||
smem2[tid] = val2 += smem2[tid + 128];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
#if BUFFER > 64
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 64];
|
||||
smem2[tid] = val2 += smem2[tid + 64];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
volatile __local float* vmem1 = smem1;
|
||||
volatile __local float* vmem2 = smem2;
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
smem2[tid] += smem2[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16) {
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
smem2[tid] += smem2[tid + 16];
|
||||
#if WAVE_SIZE <16
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 8) {
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem2[tid] += smem2[tid + 8];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 32];
|
||||
vmem2[tid] = val2 += vmem2[tid + 32];
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem2[tid] += smem2[tid + 4];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 16];
|
||||
vmem2[tid] = val2 += vmem2[tid + 16];
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
smem2[tid] += smem2[tid + 2];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 8];
|
||||
vmem2[tid] = val2 += vmem2[tid + 8];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 4];
|
||||
vmem2[tid] = val2 += vmem2[tid + 4];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 2];
|
||||
vmem2[tid] = val2 += vmem2[tid + 2];
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 1];
|
||||
vmem2[tid] = val2 += vmem2[tid + 1];
|
||||
smem1[tid] += smem1[tid + 1];
|
||||
smem2[tid] += smem2[tid + 1];
|
||||
}
|
||||
}
|
||||
|
||||
void reduce1(float val1, __local float* smem1, int tid)
|
||||
void reduce1(float val1, __local volatile float* smem1, int tid)
|
||||
{
|
||||
smem1[tid] = val1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#if BUFFER > 128
|
||||
if (tid < 128)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 128];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
#if BUFFER > 64
|
||||
if (tid < 64)
|
||||
{
|
||||
smem1[tid] = val1 += smem1[tid + 64];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
volatile __local float* vmem1 = smem1;
|
||||
|
||||
vmem1[tid] = val1 += vmem1[tid + 32];
|
||||
vmem1[tid] = val1 += vmem1[tid + 16];
|
||||
vmem1[tid] = val1 += vmem1[tid + 8];
|
||||
vmem1[tid] = val1 += vmem1[tid + 4];
|
||||
vmem1[tid] = val1 += vmem1[tid + 2];
|
||||
vmem1[tid] = val1 += vmem1[tid + 1];
|
||||
smem1[tid] += smem1[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16) {
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 16];
|
||||
#if WAVE_SIZE <16
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 8) {
|
||||
#endif
|
||||
smem1[tid] += smem1[tid + 8];
|
||||
smem1[tid] += smem1[tid + 4];
|
||||
smem1[tid] += smem1[tid + 2];
|
||||
smem1[tid] += smem1[tid + 1];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#define SCALE (1.0f / (1 << 20))
|
||||
#define THRESHOLD 0.01f
|
||||
#define DIMENSION 21
|
||||
|
||||
// Image read mode
|
||||
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
|
||||
|
@ -61,6 +61,8 @@
|
||||
#include <exception>
|
||||
#include <stdio.h>
|
||||
|
||||
#undef OPENCV_NOSTL
|
||||
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include "opencv2/objdetect.hpp"
|
||||
#include "opencv2/ocl.hpp"
|
||||
@ -74,6 +76,7 @@
|
||||
|
||||
#if defined (HAVE_OPENCL)
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#include "opencv2/ocl/private/util.hpp"
|
||||
#include "safe_call.hpp"
|
||||
|
||||
|
@ -15,8 +15,8 @@
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||
// Yao Wang, yao@multicorewareinc.com
|
||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||
// Yao Wang, yao@multicorewareinc.com
|
||||
// Nathan, liujun@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
@ -54,35 +54,20 @@ namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char *pyrlk;
|
||||
extern const char *pyrlk_no_image;
|
||||
extern const char *operator_setTo;
|
||||
extern const char *operator_convertTo;
|
||||
extern const char *operator_copyToM;
|
||||
extern const char *arithm_mul;
|
||||
extern const char *pyr_down;
|
||||
}
|
||||
}
|
||||
|
||||
struct dim3
|
||||
{
|
||||
unsigned int x, y, z;
|
||||
};
|
||||
|
||||
struct float2
|
||||
{
|
||||
float x, y;
|
||||
};
|
||||
|
||||
struct int2
|
||||
{
|
||||
int x, y;
|
||||
};
|
||||
|
||||
namespace
|
||||
{
|
||||
void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11)
|
||||
static void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11)
|
||||
{
|
||||
winSize.width *= cn;
|
||||
|
||||
@ -102,12 +87,6 @@ void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDe
|
||||
|
||||
block.z = patch.z = 1;
|
||||
}
|
||||
}
|
||||
|
||||
inline int divUp(int total, int grain)
|
||||
{
|
||||
return (total + grain - 1) / grain;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////// ConvertTo ////////////////////////////////
|
||||
@ -448,89 +427,6 @@ static void copyTo(const oclMat &src, oclMat &m )
|
||||
src.data, src.step, src.cols * src.elemSize(), src.rows, src.offset);
|
||||
}
|
||||
|
||||
// static void copyTo(const oclMat &src, oclMat &mat, const oclMat &mask)
|
||||
// {
|
||||
// if (mask.empty())
|
||||
// {
|
||||
// copyTo(src, mat);
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// mat.create(src.size(), src.type());
|
||||
// copy_to_with_mask_cus(src, mat, mask, "copy_to_with_mask");
|
||||
// }
|
||||
// }
|
||||
|
||||
static void arithmetic_run(const oclMat &src1, oclMat &dst, String kernelName, const char **kernelString, void *_scalar)
|
||||
{
|
||||
if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
|
||||
{
|
||||
CV_Error(Error::GpuNotSupported, "Selected device don't support double\r\n");
|
||||
return;
|
||||
}
|
||||
|
||||
//dst.create(src1.size(), src1.type());
|
||||
//CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
|
||||
// src1.rows == src2.rows && src2.rows == dst.rows);
|
||||
CV_Assert(src1.cols == dst.cols &&
|
||||
src1.rows == dst.rows);
|
||||
|
||||
CV_Assert(src1.type() == dst.type());
|
||||
CV_Assert(src1.depth() != CV_8S);
|
||||
|
||||
Context *clCxt = src1.clCxt;
|
||||
//int channels = dst.channels();
|
||||
//int depth = dst.depth();
|
||||
|
||||
//int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
|
||||
// {4, 0, 4, 4, 1, 1, 1},
|
||||
// {4, 0, 4, 4, 1, 1, 1},
|
||||
// {4, 0, 4, 4, 1, 1, 1}
|
||||
//};
|
||||
|
||||
//size_t vector_length = vector_lengths[channels-1][depth];
|
||||
//int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
|
||||
//int cols = divUp(dst.cols * channels + offset_cols, vector_length);
|
||||
|
||||
size_t localThreads[3] = { 16, 16, 1 };
|
||||
//size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
|
||||
// divUp(dst.rows, localThreads[1]) * localThreads[1],
|
||||
// 1
|
||||
// };
|
||||
size_t globalThreads[3] = { src1.cols,
|
||||
src1.rows,
|
||||
1
|
||||
};
|
||||
|
||||
int dst_step1 = dst.cols * dst.elemSize();
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src1.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.offset ));
|
||||
//args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src2.data ));
|
||||
//args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.step ));
|
||||
//args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.rows ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1.cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step1 ));
|
||||
|
||||
//if(_scalar != NULL)
|
||||
//{
|
||||
float scalar1 = *((float *)_scalar);
|
||||
args.push_back( std::make_pair( sizeof(float), (float *)&scalar1 ));
|
||||
//}
|
||||
|
||||
openCLExecuteKernel2(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, src1.depth(), CLFLUSH);
|
||||
}
|
||||
|
||||
static void multiply_cus(const oclMat &src1, oclMat &dst, float scalar)
|
||||
{
|
||||
arithmetic_run(src1, dst, "arithm_muls", &arithm_mul, (void *)(&scalar));
|
||||
}
|
||||
|
||||
static void pyrdown_run_cus(const oclMat &src, const oclMat &dst)
|
||||
{
|
||||
|
||||
@ -576,15 +472,7 @@ static void lkSparse_run(oclMat &I, oclMat &J,
|
||||
size_t localThreads[3] = { 8, isImageSupported ? 8 : 32, 1 };
|
||||
size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1};
|
||||
int cn = I.oclchannels();
|
||||
char calcErr;
|
||||
if (level == 0)
|
||||
{
|
||||
calcErr = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
calcErr = 0;
|
||||
}
|
||||
char calcErr = level==0?1:0;
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
|
||||
@ -614,7 +502,16 @@ static void lkSparse_run(oclMat &I, oclMat &J,
|
||||
|
||||
if(isImageSupported)
|
||||
{
|
||||
openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
|
||||
std::stringstream idxStr;
|
||||
idxStr << kernelName.c_str() << "_C" << I.oclchannels() << "_D" << I.depth();
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str().c_str());
|
||||
int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
|
||||
static char opt[16] = {0};
|
||||
sprintf(opt, " -D WAVE_SIZE=%d", wave_size);
|
||||
|
||||
openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), opt, CLFLUSH);
|
||||
releaseTexture(ITex);
|
||||
releaseTexture(JTex);
|
||||
}
|
||||
@ -656,9 +553,7 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
|
||||
|
||||
oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
|
||||
oclMat temp2 = nextPts.reshape(1);
|
||||
//oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f));
|
||||
multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f);
|
||||
//::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2);
|
||||
multiply(1.0f/(1<<maxLevel)/2.0f, temp1, temp2);
|
||||
|
||||
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
|
||||
//status.setTo(Scalar::all(1));
|
||||
@ -675,7 +570,6 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
|
||||
//ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, err);
|
||||
|
||||
// build the image pyramids.
|
||||
|
||||
prevPyr_.resize(maxLevel + 1);
|
||||
nextPyr_.resize(maxLevel + 1);
|
||||
|
||||
@ -703,7 +597,6 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next
|
||||
}
|
||||
|
||||
// dI/dx ~ Ix, dI/dy ~ Iy
|
||||
|
||||
for (int level = maxLevel; level >= 0; level--)
|
||||
{
|
||||
lkSparse_run(prevPyr_[level], nextPyr_[level],
|
||||
|
@ -47,7 +47,7 @@
|
||||
#define __OPENCV_OPENCL_SAFE_CALL_HPP__
|
||||
|
||||
#if defined __APPLE__
|
||||
#include <OpenCL/OpenCL.h>
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
@ -73,7 +73,6 @@ TEST_P(Canny, Accuracy)
|
||||
double low_thresh = 50.0;
|
||||
double high_thresh = 100.0;
|
||||
|
||||
cv::resize(img, img, cv::Size(512, 384));
|
||||
cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);
|
||||
|
||||
cv::ocl::oclMat edges;
|
||||
|
@ -55,6 +55,12 @@ using namespace testing;
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
extern string workdir;
|
||||
|
||||
namespace
|
||||
{
|
||||
IMPLEMENT_PARAM_CLASS(CascadeName, std::string);
|
||||
CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml"));
|
||||
CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml"));
|
||||
struct getRect
|
||||
{
|
||||
Rect operator ()(const CvAvgComp &e) const
|
||||
@ -62,23 +68,24 @@ struct getRect
|
||||
return e.rect;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
PARAM_TEST_CASE(Haar, double, int)
|
||||
PARAM_TEST_CASE(Haar, double, int, CascadeName)
|
||||
{
|
||||
cv::ocl::OclCascadeClassifier cascade, nestedCascade;
|
||||
cv::ocl::OclCascadeClassifierBuf cascadebuf;
|
||||
cv::CascadeClassifier cpucascade, cpunestedCascade;
|
||||
|
||||
double scale;
|
||||
int flags;
|
||||
std::string cascadeName;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
scale = GET_PARAM(0);
|
||||
flags = GET_PARAM(1);
|
||||
string cascadeName = workdir + "../../data/haarcascades/haarcascade_frontalface_alt.xml";
|
||||
cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(2));
|
||||
|
||||
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) || (!cascadebuf.load( cascadeName )))
|
||||
if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) )
|
||||
{
|
||||
cout << "ERROR: Could not load classifier cascade" << endl;
|
||||
return;
|
||||
@ -115,7 +122,7 @@ TEST_P(Haar, FaceDetect)
|
||||
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
|
||||
oclfaces.resize(vecAvgComp.size());
|
||||
std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
|
||||
|
||||
|
||||
cpucascade.detectMultiScale( smallImg, faces, 1.1, 3,
|
||||
flags,
|
||||
Size(30, 30), Size(0, 0) );
|
||||
@ -136,7 +143,6 @@ TEST_P(Haar, FaceDetectUseBuf)
|
||||
vector<Rect> faces, oclfaces;
|
||||
|
||||
Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
|
||||
MemStorage storage(cvCreateMemStorage(0));
|
||||
cvtColor( img, gray, CV_BGR2GRAY );
|
||||
resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
|
||||
equalizeHist( smallImg, smallImg );
|
||||
@ -144,19 +150,31 @@ TEST_P(Haar, FaceDetectUseBuf)
|
||||
cv::ocl::oclMat image;
|
||||
image.upload(smallImg);
|
||||
|
||||
cv::ocl::OclCascadeClassifierBuf cascadebuf;
|
||||
if( !cascadebuf.load( cascadeName ) )
|
||||
{
|
||||
cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << endl;
|
||||
return;
|
||||
}
|
||||
cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3,
|
||||
flags,
|
||||
Size(30, 30), Size(0, 0) );
|
||||
cascadebuf.release();
|
||||
|
||||
cpucascade.detectMultiScale( smallImg, faces, 1.1, 3,
|
||||
flags,
|
||||
Size(30, 30), Size(0, 0) );
|
||||
EXPECT_EQ(faces.size(), oclfaces.size());
|
||||
|
||||
// intentionally run ocl facedetect again and check if it still works after the first run
|
||||
cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3,
|
||||
flags,
|
||||
Size(30, 30));
|
||||
cascadebuf.release();
|
||||
EXPECT_EQ(faces.size(), oclfaces.size());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(FaceDetect, Haar,
|
||||
Combine(Values(1.0),
|
||||
Values(CV_HAAR_SCALE_IMAGE, 0)));
|
||||
Values(CV_HAAR_SCALE_IMAGE, 0), Values(cascade_frontalface_alt, cascade_frontalface_alt2)));
|
||||
|
||||
#endif // HAVE_OPENCL
|
||||
|
@ -55,6 +55,83 @@ using namespace testing;
|
||||
using namespace std;
|
||||
|
||||
extern string workdir;
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// GoodFeaturesToTrack
|
||||
namespace
|
||||
{
|
||||
IMPLEMENT_PARAM_CLASS(MinDistance, double)
|
||||
}
|
||||
PARAM_TEST_CASE(GoodFeaturesToTrack, MinDistance)
|
||||
{
|
||||
double minDistance;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
minDistance = GET_PARAM(0);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(GoodFeaturesToTrack, Accuracy)
|
||||
{
|
||||
cv::Mat frame = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(frame.empty());
|
||||
|
||||
int maxCorners = 1000;
|
||||
double qualityLevel = 0.01;
|
||||
|
||||
cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
|
||||
|
||||
cv::ocl::oclMat d_pts;
|
||||
detector(oclMat(frame), d_pts);
|
||||
|
||||
ASSERT_FALSE(d_pts.empty());
|
||||
|
||||
std::vector<cv::Point2f> pts(d_pts.cols);
|
||||
|
||||
detector.downloadPoints(d_pts, pts);
|
||||
|
||||
std::vector<cv::Point2f> pts_gold;
|
||||
cv::goodFeaturesToTrack(frame, pts_gold, maxCorners, qualityLevel, minDistance);
|
||||
|
||||
ASSERT_EQ(pts_gold.size(), pts.size());
|
||||
|
||||
size_t mistmatch = 0;
|
||||
for (size_t i = 0; i < pts.size(); ++i)
|
||||
{
|
||||
cv::Point2i a = pts_gold[i];
|
||||
cv::Point2i b = pts[i];
|
||||
|
||||
bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1;
|
||||
|
||||
if (!eq)
|
||||
++mistmatch;
|
||||
}
|
||||
|
||||
double bad_ratio = static_cast<double>(mistmatch) / pts.size();
|
||||
|
||||
ASSERT_LE(bad_ratio, 0.01);
|
||||
}
|
||||
|
||||
TEST_P(GoodFeaturesToTrack, EmptyCorners)
|
||||
{
|
||||
int maxCorners = 1000;
|
||||
double qualityLevel = 0.01;
|
||||
|
||||
cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
|
||||
|
||||
cv::ocl::oclMat src(100, 100, CV_8UC1, cv::Scalar::all(0));
|
||||
cv::ocl::oclMat corners(1, maxCorners, CV_32FC2);
|
||||
|
||||
detector(src, corners);
|
||||
|
||||
ASSERT_TRUE(corners.empty());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack,
|
||||
testing::Values(MinDistance(0.0), MinDistance(3.0)));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
PARAM_TEST_CASE(TVL1, bool)
|
||||
{
|
||||
|
@ -59,17 +59,17 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h,
|
||||
|
||||
switch (src.type()) {
|
||||
case CV_8U:
|
||||
parallel_for(cv::BlockedRange(0, src.rows),
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<uchar>(
|
||||
src, dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_8UC2:
|
||||
parallel_for(cv::BlockedRange(0, src.rows),
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<cv::Vec2b>(
|
||||
src, dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_8UC3:
|
||||
parallel_for(cv::BlockedRange(0, src.rows),
|
||||
parallel_for_(cv::Range(0, src.rows),
|
||||
FastNlMeansDenoisingInvoker<cv::Vec3b>(
|
||||
src, dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
@ -159,19 +159,19 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds
|
||||
|
||||
switch (srcImgs[0].type()) {
|
||||
case CV_8U:
|
||||
parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<uchar>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_8UC2:
|
||||
parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<cv::Vec2b>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, h));
|
||||
break;
|
||||
case CV_8UC3:
|
||||
parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
|
||||
parallel_for_(cv::Range(0, srcImgs[0].rows),
|
||||
FastNlMeansMultiDenoisingInvoker<cv::Vec3b>(
|
||||
srcImgs, imgToDenoiseIndex, temporalWindowSize,
|
||||
dst, templateWindowSize, searchWindowSize, h));
|
||||
|
@ -51,12 +51,12 @@
|
||||
using namespace cv;
|
||||
|
||||
template <typename T>
|
||||
struct FastNlMeansDenoisingInvoker {
|
||||
struct FastNlMeansDenoisingInvoker : ParallelLoopBody {
|
||||
public:
|
||||
FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst,
|
||||
int template_window_size, int search_window_size, const float h);
|
||||
|
||||
void operator() (const BlockedRange& range) const;
|
||||
void operator() (const Range& range) const;
|
||||
|
||||
private:
|
||||
void operator= (const FastNlMeansDenoisingInvoker&);
|
||||
@ -152,9 +152,9 @@ FastNlMeansDenoisingInvoker<T>::FastNlMeansDenoisingInvoker(
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void FastNlMeansDenoisingInvoker<T>::operator() (const BlockedRange& range) const {
|
||||
int row_from = range.begin();
|
||||
int row_to = range.end() - 1;
|
||||
void FastNlMeansDenoisingInvoker<T>::operator() (const Range& range) const {
|
||||
int row_from = range.start;
|
||||
int row_to = range.end - 1;
|
||||
|
||||
Array2d<int> dist_sums(search_window_size_, search_window_size_);
|
||||
|
||||
|
@ -51,13 +51,13 @@
|
||||
using namespace cv;
|
||||
|
||||
template <typename T>
|
||||
struct FastNlMeansMultiDenoisingInvoker {
|
||||
struct FastNlMeansMultiDenoisingInvoker : ParallelLoopBody {
|
||||
public:
|
||||
FastNlMeansMultiDenoisingInvoker(
|
||||
const std::vector<Mat>& srcImgs, int imgToDenoiseIndex, int temporalWindowSize,
|
||||
Mat& dst, int template_window_size, int search_window_size, const float h);
|
||||
|
||||
void operator() (const BlockedRange& range) const;
|
||||
void operator() (const Range& range) const;
|
||||
|
||||
private:
|
||||
void operator= (const FastNlMeansMultiDenoisingInvoker&);
|
||||
@ -171,9 +171,9 @@ FastNlMeansMultiDenoisingInvoker<T>::FastNlMeansMultiDenoisingInvoker(
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void FastNlMeansMultiDenoisingInvoker<T>::operator() (const BlockedRange& range) const {
|
||||
int row_from = range.begin();
|
||||
int row_to = range.end() - 1;
|
||||
void FastNlMeansMultiDenoisingInvoker<T>::operator() (const Range& range) const {
|
||||
int row_from = range.start;
|
||||
int row_to = range.end - 1;
|
||||
|
||||
Array3d<int> dist_sums(temporal_window_size_, search_window_size_, search_window_size_);
|
||||
|
||||
|
@ -43,8 +43,9 @@
|
||||
#ifndef __OPENCV_PRECOMP_H__
|
||||
#define __OPENCV_PRECOMP_H__
|
||||
|
||||
#include "opencv2/photo.hpp"
|
||||
#include "opencv2/core/private.hpp"
|
||||
#include "opencv2/core/utility.hpp"
|
||||
#include "opencv2/photo.hpp"
|
||||
|
||||
#ifdef HAVE_TEGRA_OPTIMIZATION
|
||||
#include "opencv2/photo/photo_tegra.hpp"
|
||||
|
@ -65,7 +65,7 @@ struct DistIdxPair
|
||||
};
|
||||
|
||||
|
||||
struct MatchPairsBody
|
||||
struct MatchPairsBody : ParallelLoopBody
|
||||
{
|
||||
MatchPairsBody(const MatchPairsBody& other)
|
||||
: matcher(other.matcher), features(other.features),
|
||||
@ -76,10 +76,10 @@ struct MatchPairsBody
|
||||
: matcher(_matcher), features(_features),
|
||||
pairwise_matches(_pairwise_matches), near_pairs(_near_pairs) {}
|
||||
|
||||
void operator ()(const BlockedRange &r) const
|
||||
void operator ()(const Range &r) const
|
||||
{
|
||||
const int num_images = static_cast<int>(features.size());
|
||||
for (int i = r.begin(); i < r.end(); ++i)
|
||||
for (int i = r.start; i < r.end; ++i)
|
||||
{
|
||||
int from = near_pairs[i].first;
|
||||
int to = near_pairs[i].second;
|
||||
@ -525,9 +525,9 @@ void FeaturesMatcher::operator ()(const std::vector<ImageFeatures> &features, st
|
||||
MatchPairsBody body(*this, features, pairwise_matches, near_pairs);
|
||||
|
||||
if (is_thread_safe_)
|
||||
parallel_for(BlockedRange(0, static_cast<int>(near_pairs.size())), body);
|
||||
parallel_for_(Range(0, static_cast<int>(near_pairs.size())), body);
|
||||
else
|
||||
body(BlockedRange(0, static_cast<int>(near_pairs.size())));
|
||||
body(Range(0, static_cast<int>(near_pairs.size())));
|
||||
LOGLN_CHAT("");
|
||||
}
|
||||
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include <sstream>
|
||||
#include <cmath>
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/core/utility.hpp"
|
||||
#include "opencv2/stitching.hpp"
|
||||
#include "opencv2/stitching/detail/autocalib.hpp"
|
||||
#include "opencv2/stitching/detail/blenders.hpp"
|
||||
|
@ -702,14 +702,14 @@ void BackgroundSubtractorMOG2Impl::apply(InputArray _image, OutputArray _fgmask,
|
||||
|
||||
parallel_for_(Range(0, image.rows),
|
||||
MOG2Invoker(image, fgmask,
|
||||
(GMM*)bgmodel.data,
|
||||
(float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols),
|
||||
bgmodelUsedModes.data, nmixtures, (float)learningRate,
|
||||
(float)varThreshold,
|
||||
backgroundRatio, varThresholdGen,
|
||||
fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau,
|
||||
bShadowDetection, nShadowDetection),
|
||||
image.total()/(double)(1 << 16));
|
||||
(GMM*)bgmodel.data,
|
||||
(float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols),
|
||||
bgmodelUsedModes.data, nmixtures, (float)learningRate,
|
||||
(float)varThreshold,
|
||||
backgroundRatio, varThresholdGen,
|
||||
fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau,
|
||||
bShadowDetection, nShadowDetection),
|
||||
image.total()/(double)(1 << 16));
|
||||
}
|
||||
|
||||
void BackgroundSubtractorMOG2Impl::getBackgroundImage(OutputArray backgroundImage) const
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user