diff --git a/.gitattributes b/.gitattributes index af704cdf0c..cd4359ba34 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,7 +33,7 @@ CMakeLists.txt text whitespace=tabwidth=2 *.png binary -*.jepg binary +*.jpeg binary *.jpg binary *.exr binary *.ico binary diff --git a/3rdparty/tbb/CMakeLists.txt b/3rdparty/tbb/CMakeLists.txt index af1581349e..03183d1c2a 100644 --- a/3rdparty/tbb/CMakeLists.txt +++ b/3rdparty/tbb/CMakeLists.txt @@ -1,12 +1,30 @@ #Cross compile TBB from source project(tbb) -# 4.1 update 2 - works fine -set(tbb_ver "tbb41_20130116oss") -set(tbb_url "http://threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb41_20130116oss_src.tgz") -set(tbb_md5 "3809790e1001a1b32d59c9fee590ee85") +if (WIN32 AND NOT ARM) + message(FATAL_ERROR "BUILD_TBB option supports Windows on ARM only!\nUse regular official TBB build instead of the BUILD_TBB option!") +endif() + +# 4.1 update 4 - works fine +set(tbb_ver "tbb41_20130613oss") +set(tbb_url "http://threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb41_20130613oss_src.tgz") +set(tbb_md5 "108c8c1e481b0aaea61878289eb28b6a") set(tbb_version_file "version_string.ver") -ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow) +ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow -Wunused-parameter) + +# 4.1 update 3 dev - works fine +#set(tbb_ver "tbb41_20130401oss") +#set(tbb_url "http://threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb41_20130401oss_src.tgz") +#set(tbb_md5 "f2f591a0d2ca8f801e221ce7d9ea84bb") +#set(tbb_version_file "version_string.ver") +#ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow) + +# 4.1 update 2 - works fine +#set(tbb_ver "tbb41_20130116oss") +#set(tbb_url "http://threadingbuildingblocks.org/sites/default/files/software_releases/source/tbb41_20130116oss_src.tgz") +#set(tbb_md5 "3809790e1001a1b32d59c9fee590ee85") +#set(tbb_version_file "version_string.ver") +#ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow) # 4.1 update 1 - works fine #set(tbb_ver "tbb41_20121003oss") @@ -107,7 +125,7 @@ if(NOT EXISTS "${tbb_src_dir}") RESULT_VARIABLE tbb_untar_RESULT) if(NOT tbb_untar_RESULT EQUAL 0 OR NOT EXISTS "${tbb_src_dir}") - message(FATAL_ERROR "Failed to unpack TBB sources") + message(FATAL_ERROR "Failed to unpack TBB sources from ${tbb_tarball} to ${tbb_src_dir} with error ${tbb_untar_RESULT}") endif() endif() @@ -123,13 +141,22 @@ file(GLOB lib_hdrs "${tbb_src_dir}/src/tbb/*.h") list(APPEND lib_srcs "${tbb_src_dir}/src/rml/client/rml_tbb.cpp") if (WIN32) - add_definitions(-D__TBB_DYNAMIC_LOAD_ENABLED=0 - -D__TBB_BUILD=1 - -D_UNICODE - -DUNICODE - -DWINAPI_FAMILY=WINAPI_FAMILY_APP - -DDO_ITT_NOTIFY=0 + add_definitions(/D__TBB_DYNAMIC_LOAD_ENABLED=0 + /D__TBB_BUILD=1 + /DTBB_NO_LEGACY=1 + /D_UNICODE + /DUNICODE + /DWINAPI_FAMILY=WINAPI_FAMILY_APP + /DDO_ITT_NOTIFY=0 + /DUSE_WINTHREAD ) # defines were copied from windows.cl.inc + + if (ARM) + add_definitions(/D_WIN32_WINNT=0x0602 + /D__TBB_WIN32_USE_CL_BUILTINS + ) + endif() + set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} /APPCONTAINER") else() add_definitions(-D__TBB_DYNAMIC_LOAD_ENABLED=0 #required @@ -173,7 +200,23 @@ endif() set(TBB_SOURCE_FILES ${TBB_SOURCE_FILES} "${CMAKE_CURRENT_SOURCE_DIR}/${tbb_version_file}") add_library(tbb ${TBB_SOURCE_FILES}) -target_link_libraries(tbb c m dl) + +if (WIN32) + if (ARM) + set(platform_macro /D_M_ARM=1) + endif() + + add_custom_command(TARGET tbb + PRE_BUILD + COMMAND ${CMAKE_C_COMPILER} /nologo /TC /EP ${tbb_src_dir}\\src\\tbb\\win32-tbb-export.def /DTBB_NO_LEGACY=1 /D_CRT_SECURE_NO_DEPRECATE /D__TBB_BUILD=1 ${platform_macro} /I${tbb_src_dir}\\src /I${tbb_src_dir}\\include > "${tbb_src_dir}\\src\\tbb\\tbb.def" + WORKING_DIRECTORY ${tbb_src_dir}\\src\\tbb + COMMENT "Generating tbb.def file" VERBATIM + ) + + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /DEF:${tbb_src_dir}/src/tbb/tbb.def /DLL /MAP /fixed:no /INCREMENTAL:NO") +else() + target_link_libraries(tbb c m dl) +endif() ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations) string(REPLACE "-Werror=non-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") @@ -182,6 +225,7 @@ set_target_properties(tbb PROPERTIES OUTPUT_NAME tbb DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}" ARCHIVE_OUTPUT_DIRECTORY ${3P_LIBRARY_OUTPUT_PATH} + RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH} ) if(ENABLE_SOLUTION_FOLDERS) diff --git a/CMakeLists.txt b/CMakeLists.txt index 93549c9430..f464b2263c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,6 +103,19 @@ if(UNIX AND NOT ANDROID) endif() endif() +# Add these standard paths to the search paths for FIND_PATH +# to find include files from these locations first +if(MINGW) + if(EXISTS /mingw) + list(APPEND CMAKE_INCLUDE_PATH /mingw) + endif() + if(EXISTS /mingw32) + list(APPEND CMAKE_INCLUDE_PATH /mingw32) + endif() + if(EXISTS /mingw64) + list(APPEND CMAKE_INCLUDE_PATH /mingw64) + endif() +endif() # ---------------------------------------------------------------------------- # OpenCV cmake options @@ -110,7 +123,7 @@ endif() # Optional 3rd party components # =================================================== -OCV_OPTION(WITH_1394 "Include IEEE1394 support" ON IF (UNIX AND NOT ANDROID AND NOT IOS) ) +OCV_OPTION(WITH_1394 "Include IEEE1394 support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_AVFOUNDATION "Use AVFoundation for Video I/O" ON IF IOS) OCV_OPTION(WITH_CARBON "Use Carbon for UI instead of Cocoa" OFF IF APPLE ) OCV_OPTION(WITH_CUDA "Include NVidia Cuda Runtime support" ON IF (CMAKE_VERSION VERSION_GREATER "2.8" AND NOT ANDROID AND NOT IOS) ) @@ -286,6 +299,10 @@ set(OPENCV_CONFIG_FILE_INCLUDE_DIR "${CMAKE_BINARY_DIR}/" CACHE PATH "Where to c add_definitions(-DHAVE_CVCONFIG_H) ocv_include_directories(${OPENCV_CONFIG_FILE_INCLUDE_DIR}) +# ---------------------------------------------------------------------------- +# Path for additional modules +# ---------------------------------------------------------------------------- +set(OPENCV_EXTRA_MODULES_PATH "" CACHE PATH "Where to look for additional OpenCV modules") # ---------------------------------------------------------------------------- # Autodetect if we are in a GIT repository @@ -402,7 +419,7 @@ if(ANDROID) if(NOT ANDROID_TOOLS_Pkg_Revision GREATER 13) message(WARNING "OpenCV requires Android SDK tools revision 14 or newer. Otherwise tests and samples will no be compiled.") endif() -elseif(ANT_EXECUTABLE) +else() find_package(JNI) endif() @@ -456,15 +473,15 @@ if(BUILD_EXAMPLES OR BUILD_ANDROID_EXAMPLES OR INSTALL_PYTHON_EXAMPLES) endif() if(ANDROID) - add_subdirectory(android/service) + add_subdirectory(platforms/android/service) endif() if(BUILD_ANDROID_PACKAGE) - add_subdirectory(android/package) + add_subdirectory(platforms/android/package) endif() if (ANDROID) - add_subdirectory(android/libinfo) + add_subdirectory(platforms/android/libinfo) endif() # ---------------------------------------------------------------------------- @@ -830,7 +847,7 @@ status(" ant:" ANT_EXECUTABLE THEN "${ANT_EXECUTABLE} (ver ${A if(NOT ANDROID) status(" JNI:" JNI_INCLUDE_DIRS THEN "${JNI_INCLUDE_DIRS}" ELSE NO) endif() -status(" Java tests:" BUILD_TESTS AND (NOT ANDROID OR CAN_BUILD_ANDROID_PROJECTS) THEN YES ELSE NO) +status(" Java tests:" BUILD_TESTS AND (CAN_BUILD_ANDROID_PROJECTS OR HAVE_opencv_java) THEN YES ELSE NO) # ========================== documentation ========================== if(BUILD_DOCS) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 8fc54b1b8e..0000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,11 +0,0 @@ -We greatly appreciate your support and contributions and they are always welcomed! - -Github pull requests are the convenient way to contribute to OpenCV project. Good pull requests have all of these attributes: - -* Are scoped to one specific issue -* Include a test to demonstrate the correctness -* Update the docs if relevant -* Match the [coding style guidelines](http://code.opencv.org/projects/opencv/wiki/CodingStyleGuide) -* Don't messed by "oops" commits - -You can find more detailes about contributing process on http://opencv.org/contribute.html \ No newline at end of file diff --git a/README b/README index 9dd45a230b..0799dff89f 100644 --- a/README +++ b/README @@ -4,3 +4,14 @@ Homepage: http://opencv.org Online docs: http://docs.opencv.org Q&A forum: http://answers.opencv.org Dev zone: http://code.opencv.org + +Please read before starting work on a pull request: + http://code.opencv.org/projects/opencv/wiki/How_to_contribute + +Summary of guidelines: + +* One pull request per issue; +* Choose the right base branch; +* Include tests and documentation; +* Clean up "oops" commits before submitting; +* Follow the coding style guide. diff --git a/android/scripts/build.cmd b/android/scripts/build.cmd deleted file mode 100644 index 3e0f1666b6..0000000000 --- a/android/scripts/build.cmd +++ /dev/null @@ -1,90 +0,0 @@ -@ECHO OFF - -:: enable command extensions -VERIFY BADVALUE 2>NUL -SETLOCAL ENABLEEXTENSIONS || (ECHO Unable to enable command extensions. & EXIT \B) - -:: build environment -SET SOURCE_DIR=%cd% -IF EXIST .\android.toolchain.cmake (SET BUILD_OPENCV=1) ELSE (SET BUILD_OPENCV=0) -IF EXIST .\jni\nul (SET BUILD_JAVA_PART=1) ELSE (SET BUILD_JAVA_PART=0) - -:: load configuration -PUSHD %~dp0 -SET SCRIPTS_DIR=%cd% -IF EXIST .\wincfg.cmd CALL .\wincfg.cmd -POPD - -:: inherit old names -IF NOT DEFINED CMAKE SET CMAKE=%CMAKE_EXE% -IF NOT DEFINED MAKE SET MAKE=%MAKE_EXE% - -:: defaults -IF NOT DEFINED BUILD_DIR SET BUILD_DIR=build -IF NOT DEFINED ANDROID_ABI SET ANDROID_ABI=armeabi-v7a -SET OPENCV_BUILD_DIR=%SCRIPTS_DIR%\..\%BUILD_DIR% - -:: check that all required variables defined -PUSHD . -IF NOT DEFINED ANDROID_NDK (ECHO. & ECHO You should set an environment variable ANDROID_NDK to the full path to your copy of Android NDK & GOTO end) -(CD "%ANDROID_NDK%") || (ECHO. & ECHO Directory "%ANDROID_NDK%" specified by ANDROID_NDK variable does not exist & GOTO end) - -IF NOT EXIST "%CMAKE%" (ECHO. & ECHO You should set an environment variable CMAKE to the full path to cmake executable & GOTO end) -IF NOT EXIST "%MAKE%" (ECHO. & ECHO You should set an environment variable MAKE to the full path to native port of make executable & GOTO end) - -IF NOT %BUILD_JAVA_PART%==1 GOTO required_variables_checked - -IF NOT DEFINED ANDROID_SDK (ECHO. & ECHO You should set an environment variable ANDROID_SDK to the full path to your copy of Android SDK & GOTO end) -(CD "%ANDROID_SDK%" 2>NUL) || (ECHO. & ECHO Directory "%ANDROID_SDK%" specified by ANDROID_SDK variable does not exist & GOTO end) - -IF NOT DEFINED ANT_DIR (ECHO. & ECHO You should set an environment variable ANT_DIR to the full path to Apache Ant root & GOTO end) -(CD "%ANT_DIR%" 2>NUL) || (ECHO. & ECHO Directory "%ANT_DIR%" specified by ANT_DIR variable does not exist & GOTO end) - -IF NOT DEFINED JAVA_HOME (ECHO. & ECHO You should set an environment variable JAVA_HOME to the full path to JDK & GOTO end) -(CD "%JAVA_HOME%" 2>NUL) || (ECHO. & ECHO Directory "%JAVA_HOME%" specified by JAVA_HOME variable does not exist & GOTO end) - -:required_variables_checked -POPD - -:: check for ninja -echo "%MAKE%"|findstr /i ninja >nul: -IF %errorlevel%==1 (SET BUILD_WITH_NINJA=0) ELSE (SET BUILD_WITH_NINJA=1) -IF %BUILD_WITH_NINJA%==1 (SET CMAKE_GENERATOR=Ninja) ELSE (SET CMAKE_GENERATOR=MinGW Makefiles) - -:: create build dir -IF DEFINED REBUILD rmdir /S /Q "%BUILD_DIR%" 2>NUL -MKDIR "%BUILD_DIR%" 2>NUL -PUSHD "%BUILD_DIR%" || (ECHO. & ECHO Directory "%BUILD_DIR%" is not found & GOTO end) - -:: run cmake -ECHO. & ECHO Runnning cmake... -ECHO ANDROID_ABI=%ANDROID_ABI% -ECHO. -IF NOT %BUILD_OPENCV%==1 GOTO other-cmake -:opencv-cmake -("%CMAKE%" -G"%CMAKE_GENERATOR%" -DANDROID_ABI="%ANDROID_ABI%" -DCMAKE_TOOLCHAIN_FILE="%SOURCE_DIR%"\android.toolchain.cmake -DCMAKE_MAKE_PROGRAM="%MAKE%" %* "%SOURCE_DIR%\..") && GOTO cmakefin -ECHO. & ECHO cmake failed & GOTO end -:other-cmake -("%CMAKE%" -G"%CMAKE_GENERATOR%" -DANDROID_ABI="%ANDROID_ABI%" -DOpenCV_DIR="%OPENCV_BUILD_DIR%" -DCMAKE_TOOLCHAIN_FILE="%OPENCV_BUILD_DIR%\..\android.toolchain.cmake" -DCMAKE_MAKE_PROGRAM="%MAKE%" %* "%SOURCE_DIR%") && GOTO cmakefin -ECHO. & ECHO cmake failed & GOTO end -:cmakefin - -:: run make -ECHO. & ECHO Building native libs... -IF %BUILD_WITH_NINJA%==0 ("%MAKE%" -j %NUMBER_OF_PROCESSORS% VERBOSE=%VERBOSE%) || (ECHO. & ECHO make failed & GOTO end) -IF %BUILD_WITH_NINJA%==1 ("%MAKE%") || (ECHO. & ECHO ninja failed & GOTO end) - -IF NOT %BUILD_JAVA_PART%==1 GOTO end -POPD && PUSHD %SOURCE_DIR% - -:: configure java part -ECHO. & ECHO Updating Android project... -(CALL "%ANDROID_SDK%\tools\android" update project --name %PROJECT_NAME% --path .) || (ECHO. & ECHO failed to update android project & GOTO end) - -:: compile java part -ECHO. & ECHO Compiling Android project... -(CALL "%ANT_DIR%\bin\ant" debug) || (ECHO. & ECHO failed to compile android project & GOTO end) - -:end -POPD -ENDLOCAL diff --git a/android/scripts/cmake_android.cmd b/android/scripts/cmake_android.cmd deleted file mode 100644 index 212c04b47e..0000000000 --- a/android/scripts/cmake_android.cmd +++ /dev/null @@ -1,5 +0,0 @@ -@ECHO OFF - -PUSHD %~dp0.. -CALL .\scripts\build.cmd %* -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -POPD \ No newline at end of file diff --git a/android/scripts/cmake_android_armeabi.sh b/android/scripts/cmake_android_armeabi.sh deleted file mode 100755 index 9c711d8855..0000000000 --- a/android/scripts/cmake_android_armeabi.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -cd `dirname $0`/.. - -mkdir -p build_armeabi -cd build_armeabi - -cmake -DANDROID_ABI=armeabi -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../.. - diff --git a/android/scripts/cmake_android_mips.sh b/android/scripts/cmake_android_mips.sh deleted file mode 100755 index 17d2ff937e..0000000000 --- a/android/scripts/cmake_android_mips.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -cd `dirname $0`/.. - -mkdir -p build_mips -cd build_mips - -cmake -DANDROID_ABI=mips -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../.. - diff --git a/android/scripts/cmake_android_neon.sh b/android/scripts/cmake_android_neon.sh deleted file mode 100755 index 5e85605b56..0000000000 --- a/android/scripts/cmake_android_neon.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -cd `dirname $0`/.. - -mkdir -p build_neon -cd build_neon - -cmake -DANDROID_ABI="armeabi-v7a with NEON" -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../.. - diff --git a/android/scripts/cmake_android_service.sh b/android/scripts/cmake_android_service.sh deleted file mode 100755 index 0dbd482520..0000000000 --- a/android/scripts/cmake_android_service.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -cd `dirname $0`/.. - -mkdir -p build_service -cd build_service - -cmake -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake -DANDROID_TOOLCHAIN_NAME="arm-linux-androideabi-4.4.3" -DANDROID_STL=stlport_static -DANDROID_STL_FORCE_FEATURES=OFF -DBUILD_ANDROID_SERVICE=ON -DANDROID_SOURCE_TREE=~/Projects/AndroidSource/ServiceStub/ $@ ../.. diff --git a/android/scripts/cmake_android_x86.sh b/android/scripts/cmake_android_x86.sh deleted file mode 100755 index a01df2e668..0000000000 --- a/android/scripts/cmake_android_x86.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh - -cd `dirname $0`/.. - -mkdir -p build_x86 -cd build_x86 - -cmake -DANDROID_ABI=x86 -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../.. - diff --git a/android/scripts/wincfg.cmd.tmpl b/android/scripts/wincfg.cmd.tmpl deleted file mode 100644 index 166a5e7b02..0000000000 --- a/android/scripts/wincfg.cmd.tmpl +++ /dev/null @@ -1,30 +0,0 @@ -:: variables required for OpenCV build :: -:: Note: all pathes should be specified without tailing slashes! -SET ANDROID_NDK=C:\full\path\to\your\copy\of\android\NDK\android-ndk-r7b -SET CMAKE_EXE=C:\full\path\to\cmake\utility\cmake.exe -SET MAKE_EXE=%ANDROID_NDK%\prebuilt\windows\bin\make.exe - -:: variables required for android-opencv build :: -SET ANDROID_SDK=C:\full\path\to\your\copy\of\android\SDK\android-sdk-windows -SET ANT_DIR=C:\full\path\to\ant\directory\apache-ant-1.8.2 -SET JAVA_HOME=C:\full\path\to\JDK\jdk1.6.0_25 - -:: configuration options :: -:::: general ARM-V7 settings -SET ANDROID_ABI=armeabi-v7a -SET BUILD_DIR=build - -:::: uncomment following lines to compile for old emulator or old device -::SET ANDROID_ABI=armeabi -::SET BUILD_DIR=build_armeabi - -:::: uncomment following lines to compile for ARM-V7 with NEON support -::SET ANDROID_ABI=armeabi-v7a with NEON -::SET BUILD_DIR=build_neon - -:::: uncomment following lines to compile for x86 -::SET ANDROID_ABI=x86 -::SET BUILD_DIR=build_x86 - -:::: other options -::SET ANDROID_NATIVE_API_LEVEL=8 &:: android-3 is enough for native part of OpenCV but android-8 is required for Java API diff --git a/android/service/doc/Makefile b/android/service/doc/Makefile deleted file mode 100644 index b8e7bba113..0000000000 --- a/android/service/doc/Makefile +++ /dev/null @@ -1,89 +0,0 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = _build - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . - -.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest - -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - -clean: - -rm -rf $(BUILDDIR)/* - -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/OpenCVEngine.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/OpenCVEngine.qhc" - -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ - "run these through (pdf)latex." - -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." diff --git a/apps/traincascade/boost.cpp b/apps/traincascade/boost.cpp index 2d29f338b0..4f91d5a29d 100644 --- a/apps/traincascade/boost.cpp +++ b/apps/traincascade/boost.cpp @@ -766,7 +766,7 @@ float CvCascadeBoostTrainData::getVarValue( int vi, int si ) } -struct FeatureIdxOnlyPrecalc +struct FeatureIdxOnlyPrecalc : ParallelLoopBody { FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u ) { @@ -776,11 +776,11 @@ struct FeatureIdxOnlyPrecalc idst = _buf->data.i; is_buf_16u = _is_buf_16u; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { cv::AutoBuffer valCache(sample_count); float* valCachePtr = (float*)valCache; - for ( int fi = range.begin(); fi < range.end(); fi++) + for ( int fi = range.start; fi < range.end; fi++) { for( int si = 0; si < sample_count; si++ ) { @@ -803,7 +803,7 @@ struct FeatureIdxOnlyPrecalc bool is_buf_16u; }; -struct FeatureValAndIdxPrecalc +struct FeatureValAndIdxPrecalc : ParallelLoopBody { FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u ) { @@ -814,9 +814,9 @@ struct FeatureValAndIdxPrecalc idst = _buf->data.i; is_buf_16u = _is_buf_16u; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { - for ( int fi = range.begin(); fi < range.end(); fi++) + for ( int fi = range.start; fi < range.end; fi++) { for( int si = 0; si < sample_count; si++ ) { @@ -840,7 +840,7 @@ struct FeatureValAndIdxPrecalc bool is_buf_16u; }; -struct FeatureValOnlyPrecalc +struct FeatureValOnlyPrecalc : ParallelLoopBody { FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count ) { @@ -848,9 +848,9 @@ struct FeatureValOnlyPrecalc valCache = _valCache; sample_count = _sample_count; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { - for ( int fi = range.begin(); fi < range.end(); fi++) + for ( int fi = range.start; fi < range.end; fi++) for( int si = 0; si < sample_count; si++ ) valCache->at(fi,si) = (*featureEvaluator)( fi, si ); } @@ -864,12 +864,12 @@ void CvCascadeBoostTrainData::precalculate() int minNum = MIN( numPrecalcVal, numPrecalcIdx); double proctime = -TIME( 0 ); - parallel_for( BlockedRange(numPrecalcVal, numPrecalcIdx), - FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) ); - parallel_for( BlockedRange(0, minNum), - FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) ); - parallel_for( BlockedRange(minNum, numPrecalcVal), - FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) ); + parallel_for_( Range(numPrecalcVal, numPrecalcIdx), + FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) ); + parallel_for_( Range(0, minNum), + FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) ); + parallel_for_( Range(minNum, numPrecalcVal), + FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) ); cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl; } diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index aeed112ae0..7a91b188ae 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -47,6 +47,9 @@ macro(add_extra_compiler_option option) endif() endmacro() +# OpenCV fails some tests when 'char' is 'unsigned' by default +add_extra_compiler_option(-fsigned-char) + if(MINGW) # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40838 # here we are trying to workaround the problem diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index f3d101ab21..8db667762e 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -26,6 +26,15 @@ if(CUDA_FOUND) set(HAVE_CUBLAS 1) endif() + if(${CUDA_VERSION} VERSION_LESS "5.5") + find_cuda_helper_libs(npp) + else() + find_cuda_helper_libs(nppc) + find_cuda_helper_libs(nppi) + find_cuda_helper_libs(npps) + set(CUDA_npp_LIBRARY ${CUDA_nppc_LIBRARY} ${CUDA_nppi_LIBRARY} ${CUDA_npps_LIBRARY}) + endif() + if(WITH_NVCUVID) find_cuda_helper_libs(nvcuvid) set(HAVE_NVCUVID 1) @@ -136,8 +145,6 @@ if(CUDA_FOUND) mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR) - find_cuda_helper_libs(npp) - macro(ocv_cuda_compile VAR) foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) set(${var}_backup_in_cuda_compile_ "${${var}}") diff --git a/cmake/OpenCVDetectOpenCL.cmake b/cmake/OpenCVDetectOpenCL.cmake index 014066bc7e..2c96274a8c 100644 --- a/cmake/OpenCVDetectOpenCL.cmake +++ b/cmake/OpenCVDetectOpenCL.cmake @@ -44,12 +44,18 @@ if(OPENCL_FOUND) set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR}) set(OPENCL_LIBRARIES ${OPENCL_LIBRARY}) - if (X86_64) + if(WIN32 AND X86_64) set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64/import) - elseif (X86) + elseif(WIN32) set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import) endif() + if(X86_64 AND UNIX) + set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64) + elseif(X86 AND UNIX) + set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32) + endif() + if(WITH_OPENCLAMDFFT) find_path(CLAMDFFT_ROOT_DIR NAMES include/clAmdFft.h @@ -80,7 +86,7 @@ if(OPENCL_FOUND) if(WITH_OPENCLAMDBLAS) find_path(CLAMDBLAS_ROOT_DIR NAMES include/clAmdBlas.h - PATHS ENV CLAMDFFT_PATH ENV ProgramFiles + PATHS ENV CLAMDBLAS_PATH ENV ProgramFiles PATH_SUFFIXES clAmdBlas AMD/clAmdBlas DOC "AMD FFT root directory" NO_DEFAULT_PATH) diff --git a/cmake/OpenCVFindLibsGUI.cmake b/cmake/OpenCVFindLibsGUI.cmake index 2ea864c16f..d685d23feb 100644 --- a/cmake/OpenCVFindLibsGUI.cmake +++ b/cmake/OpenCVFindLibsGUI.cmake @@ -24,7 +24,6 @@ if(WITH_QT) if(Qt5Core_FOUND AND Qt5Gui_FOUND AND Qt5Widgets_FOUND AND Qt5Test_FOUND AND Qt5Concurrent_FOUND) set(HAVE_QT5 ON) set(HAVE_QT ON) - add_definitions(-DHAVE_QT) find_package(Qt5OpenGL) if(Qt5OpenGL_FOUND) set(QT_QTOPENGL_FOUND ON) @@ -33,10 +32,9 @@ if(WITH_QT) endif() if(NOT HAVE_QT) - find_package(Qt4) + find_package(Qt4 REQUIRED QtCore QtGui QtTest) if(QT4_FOUND) set(HAVE_QT TRUE) - add_definitions(-DHAVE_QT) # We need to define the macro this way, using cvconfig.h does not work endif() endif() endif() @@ -61,7 +59,6 @@ if(WITH_OPENGL) list(APPEND OPENCV_LINKER_LIBS ${OPENGL_LIBRARIES}) if(QT_QTOPENGL_FOUND) set(HAVE_QT_OPENGL TRUE) - add_definitions(-DHAVE_QT_OPENGL) else() ocv_include_directories(${OPENGL_INCLUDE_DIR}) endif() diff --git a/cmake/OpenCVFindLibsVideo.cmake b/cmake/OpenCVFindLibsVideo.cmake index fbb47d4861..0ca4828fe6 100644 --- a/cmake/OpenCVFindLibsVideo.cmake +++ b/cmake/OpenCVFindLibsVideo.cmake @@ -81,10 +81,33 @@ endif(WITH_GIGEAPI) # --- Dc1394 --- ocv_clear_vars(HAVE_DC1394 HAVE_DC1394_2) if(WITH_1394) - CHECK_MODULE(libdc1394-2 HAVE_DC1394_2) - if(NOT HAVE_DC1394_2) - CHECK_MODULE(libdc1394 HAVE_DC1394) - endif() + if(WIN32 AND MINGW) + find_path(CMU1394_INCLUDE_PATH "/1394common.h" + PATH_SUFFIXES include + DOC "The path to cmu1394 headers") + find_path(DC1394_2_INCLUDE_PATH "/dc1394/dc1394.h" + PATH_SUFFIXES include + DOC "The path to DC1394 2.x headers") + if(CMU1394_INCLUDE_PATH AND DC1394_2_INCLUDE_PATH) + set(CMU1394_LIB_DIR "${CMU1394_INCLUDE_PATH}/../lib" CACHE PATH "Full path of CMU1394 library directory") + set(DC1394_2_LIB_DIR "${DC1394_2_INCLUDE_PATH}/../lib" CACHE PATH "Full path of DC1394 2.x library directory") + if(EXISTS "${CMU1394_LIB_DIR}/lib1394camera.a" AND EXISTS "${DC1394_2_LIB_DIR}/libdc1394.a") + set(HAVE_DC1394_2 TRUE) + endif() + endif() + if(HAVE_DC1394_2) + ocv_parse_pkg("libdc1394-2" "${DC1394_2_LIB_DIR}/pkgconfig" "") + ocv_include_directories(${DC1394_2_INCLUDE_PATH}) + set(HIGHGUI_LIBRARIES ${HIGHGUI_LIBRARIES} + "${DC1394_2_LIB_DIR}/libdc1394.a" + "${CMU1394_LIB_DIR}/lib1394camera.a") + endif(HAVE_DC1394_2) + else(WIN32 AND MINGW) + CHECK_MODULE(libdc1394-2 HAVE_DC1394_2) + if(NOT HAVE_DC1394_2) + CHECK_MODULE(libdc1394 HAVE_DC1394) + endif() + endif(WIN32 AND MINGW) endif(WITH_1394) # --- xine --- @@ -197,7 +220,7 @@ endif(WITH_MSMF) # --- Extra HighGUI libs on Windows --- if(WIN32) - list(APPEND HIGHGUI_LIBRARIES comctl32 gdi32 ole32 vfw32) + list(APPEND HIGHGUI_LIBRARIES comctl32 gdi32 ole32 setupapi ws2_32 vfw32) if(MINGW64) list(APPEND HIGHGUI_LIBRARIES avifil32 avicap32 winmm msvfw32) list(REMOVE_ITEM HIGHGUI_LIBRARIES vfw32) diff --git a/cmake/OpenCVFindXimea.cmake b/cmake/OpenCVFindXimea.cmake index 5600275f47..27e2a78ad4 100644 --- a/cmake/OpenCVFindXimea.cmake +++ b/cmake/OpenCVFindXimea.cmake @@ -9,6 +9,7 @@ # # Created: 5 Aug 2011 by Marian Zajko (marian.zajko@ximea.com) # Updated: 25 June 2012 by Igor Kuzmin (parafin@ximea.com) +# Updated: 22 October 2012 by Marian Zajko (marian.zajko@ximea.com) # set(XIMEA_FOUND) @@ -18,11 +19,15 @@ set(XIMEA_LIBRARY_DIR) if(WIN32) # Try to find the XIMEA API path in registry. GET_FILENAME_COMPONENT(XIMEA_PATH "[HKEY_CURRENT_USER\\Software\\XIMEA\\CamSupport\\API;Path]" ABSOLUTE) - - if(EXISTS XIMEA_PATH) + + if(EXISTS ${XIMEA_PATH}) set(XIMEA_FOUND 1) # set LIB folders - set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x86") + if(CMAKE_CL_64) + set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x64") + else() + set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x86") + endif() else() set(XIMEA_FOUND 0) endif() @@ -38,5 +43,4 @@ endif() mark_as_advanced(FORCE XIMEA_FOUND) mark_as_advanced(FORCE XIMEA_PATH) -mark_as_advanced(FORCE XIMEA_LIBRARY_DIR) - +mark_as_advanced(FORCE XIMEA_LIBRARY_DIR) \ No newline at end of file diff --git a/cmake/OpenCVGenConfig.cmake b/cmake/OpenCVGenConfig.cmake index 705ccc8df1..c99cae7883 100644 --- a/cmake/OpenCVGenConfig.cmake +++ b/cmake/OpenCVGenConfig.cmake @@ -162,7 +162,7 @@ if(UNIX) endif() if(ANDROID) - install(FILES "${OpenCV_SOURCE_DIR}/android/android.toolchain.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/) + install(FILES "${OpenCV_SOURCE_DIR}/platforms/android/android.toolchain.cmake" DESTINATION ${OPENCV_CONFIG_INSTALL_PATH}/) endif() # -------------------------------------------------------------------------------------------- diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 8312845fe0..81340bd0eb 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -303,7 +303,7 @@ macro(ocv_glob_modules) # collect modules set(OPENCV_INITIAL_PASS ON) foreach(__path ${ARGN}) - ocv_get_real_path(__path "${__path}") + get_filename_component(__path "${__path}" ABSOLUTE) list(FIND __directories_observed "${__path}" __pathIdx) if(__pathIdx GREATER -1) @@ -315,7 +315,7 @@ macro(ocv_glob_modules) if(__ocvmodules) list(SORT __ocvmodules) foreach(mod ${__ocvmodules}) - ocv_get_real_path(__modpath "${__path}/${mod}") + get_filename_component(__modpath "${__path}/${mod}" ABSOLUTE) if(EXISTS "${__modpath}/CMakeLists.txt") list(FIND __directories_observed "${__modpath}" __pathIdx) @@ -470,7 +470,8 @@ endmacro() # ocv_create_module() # ocv_create_module(SKIP_LINK) macro(ocv_create_module) - add_library(${the_module} ${OPENCV_MODULE_TYPE} ${OPENCV_MODULE_${the_module}_HEADERS} ${OPENCV_MODULE_${the_module}_SOURCES}) + add_library(${the_module} ${OPENCV_MODULE_TYPE} ${OPENCV_MODULE_${the_module}_HEADERS} ${OPENCV_MODULE_${the_module}_SOURCES} + "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/opencv_modules.hpp") if(NOT "${ARGN}" STREQUAL "SKIP_LINK") target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_${the_module}_DEPS_EXT} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ARGN}) diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index db24c99708..59366eb03b 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -411,16 +411,6 @@ macro(ocv_regex_escape var regex) endmacro() -# get absolute path with symlinks resolved -macro(ocv_get_real_path VAR PATHSTR) - if(CMAKE_VERSION VERSION_LESS 2.8) - get_filename_component(${VAR} "${PATHSTR}" ABSOLUTE) - else() - get_filename_component(${VAR} "${PATHSTR}" REALPATH) - endif() -endmacro() - - # convert list of paths to full paths macro(ocv_convert_to_full_paths VAR) if(${VAR}) @@ -511,6 +501,13 @@ macro(ocv_parse_header2 LIBNAME HDR_PATH VARNAME) endif() endmacro() +# read single version info from the pkg file +macro(ocv_parse_pkg LIBNAME PKG_PATH SCOPE) + if(EXISTS "${PKG_PATH}/${LIBNAME}.pc") + file(STRINGS "${PKG_PATH}/${LIBNAME}.pc" line_to_parse REGEX "^Version:[ \t]+[0-9.]*.*$" LIMIT_COUNT 1) + STRING(REGEX REPLACE ".*Version: ([^ ]+).*" "\\1" ALIASOF_${LIBNAME}_VERSION "${line_to_parse}" ) + endif() +endmacro() ################################################################################################ # short command to setup source group diff --git a/cmake/templates/cvconfig.h.cmake b/cmake/templates/cvconfig.h.cmake index db46af4b6d..f12730988d 100644 --- a/cmake/templates/cvconfig.h.cmake +++ b/cmake/templates/cvconfig.h.cmake @@ -228,3 +228,9 @@ /* Clp support */ #cmakedefine HAVE_CLP + +/* Qt support */ +#cmakedefine HAVE_QT + +/* Qt OpenGL support */ +#cmakedefine HAVE_QT_OPENGL diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 0f2695fc9a..70f4809d22 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -53,8 +53,8 @@ if(BUILD_DOCS AND HAVE_SPHINX) endif() endforeach() - file(GLOB_RECURSE _OPENCV_FILES_REF "${OpenCV_SOURCE_DIR}/android/service/doc/*.rst") - file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OpenCV_SOURCE_DIR}/android/service/doc/*.png" "${OpenCV_SOURCE_DIR}/android/service/doc/*.jpg") + file(GLOB_RECURSE _OPENCV_FILES_REF "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.rst") + file(GLOB_RECURSE _OPENCV_FILES_REF_PICT "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.png" "${OpenCV_SOURCE_DIR}/platforms/android/service/doc/*.jpg") list(APPEND OPENCV_FILES_REF ${_OPENCV_FILES_REF}) list(APPEND OPENCV_FILES_REF_PICT ${_OPENCV_FILES_REF_PICT}) diff --git a/doc/conf.py b/doc/conf.py index 4c7a15c891..f3f7aec58a 100755 --- a/doc/conf.py +++ b/doc/conf.py @@ -239,7 +239,7 @@ latex_documents = [ u'', 'manual'), ('doc/tutorials/tutorials', 'opencv_tutorials.tex', u'The OpenCV Tutorials', u'', 'manual'), - ('android/refman', 'opencv2manager.tex', u'The OpenCV Manager Manual', + ('platforms/android/refman', 'opencv2manager.tex', u'The OpenCV Manager Manual', u'', 'manual'), ] diff --git a/doc/tutorials/calib3d/camera_calibration/camera_calibration.rst b/doc/tutorials/calib3d/camera_calibration/camera_calibration.rst index 9196c87d6a..6637e2590c 100644 --- a/doc/tutorials/calib3d/camera_calibration/camera_calibration.rst +++ b/doc/tutorials/calib3d/camera_calibration/camera_calibration.rst @@ -12,8 +12,8 @@ For the distortion OpenCV takes into account the radial and tangential factors. .. math:: - x_{corrected} = x( 1 + k_1 r^2 + k_2 r^4 + k^3 r^6) \\ - y_{corrected} = y( 1 + k_1 r^2 + k_2 r^4 + k^3 r^6) + x_{corrected} = x( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6) \\ + y_{corrected} = y( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6) So for an old pixel point at :math:`(x,y)` coordinate in the input image, for a corrected output image its position will be :math:`(x_{corrected} y_{corrected})` . The presence of the radial distortion manifests in form of the "barrel" or "fish-eye" effect. diff --git a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst index 47eafedbc7..54d28890ab 100644 --- a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst +++ b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst @@ -85,7 +85,7 @@ This tutorial code's is shown lines below. You can also download it from `here < std::vector< DMatch > good_matches; for( int i = 0; i < descriptors_1.rows; i++ ) - { if( matches[i].distance < 2*min_dist ) + { if( matches[i].distance <= 2*min_dist ) { good_matches.push_back( matches[i]); } } @@ -127,6 +127,3 @@ Result .. image:: images/Feature_FlannMatcher_Keypoints_Result.jpg :align: center :height: 250pt - - - diff --git a/doc/tutorials/introduction/ios_install/ios_install.rst b/doc/tutorials/introduction/ios_install/ios_install.rst index ace657b21c..8d117a0b42 100644 --- a/doc/tutorials/introduction/ios_install/ios_install.rst +++ b/doc/tutorials/introduction/ios_install/ios_install.rst @@ -37,7 +37,7 @@ Building OpenCV from Source, using CMake and Command Line .. code-block:: bash cd ~/ - python opencv/ios/build_framework.py ios + python opencv/platforms/ios/build_framework.py ios If everything's fine, a few minutes later you will get ~//ios/opencv2.framework. You can add this framework to your Xcode projects. diff --git a/index.rst b/index.rst index 909bf908b8..5f50b66d0f 100644 --- a/index.rst +++ b/index.rst @@ -10,7 +10,7 @@ Welcome to opencv documentation! :maxdepth: 2 modules/refman.rst - android/refman.rst + platforms/android/refman.rst doc/user_guide/user_guide.rst doc/tutorials/tutorials.rst diff --git a/ios/configure-device_xcode.sh b/ios/configure-device_xcode.sh deleted file mode 100755 index 8c28a3e909..0000000000 --- a/ios/configure-device_xcode.sh +++ /dev/null @@ -1 +0,0 @@ -cmake -GXcode -DCMAKE_TOOLCHAIN_FILE=../opencv/ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake -DCMAKE_INSTALL_PREFIX=../OpenCV_iPhoneOS ../opencv diff --git a/ios/configure-simulator_xcode.sh b/ios/configure-simulator_xcode.sh deleted file mode 100755 index 50e00261db..0000000000 --- a/ios/configure-simulator_xcode.sh +++ /dev/null @@ -1 +0,0 @@ -cmake -GXcode -DCMAKE_TOOLCHAIN_FILE=../opencv/ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake -DCMAKE_INSTALL_PREFIX=../OpenCV_iPhoneSimulator ../opencv diff --git a/ios/readme.txt b/ios/readme.txt deleted file mode 100644 index 1441b241b7..0000000000 --- a/ios/readme.txt +++ /dev/null @@ -1,15 +0,0 @@ -Assuming that your build directory is on the same level that opencv source, -From the build directory run - ../opencv/ios/configure-device_xcode.sh -or - ../opencv/ios/configure-simulator_xcode.sh - -Then from the same folder invoke - -xcodebuild -sdk iphoneos -configuration Release -target ALL_BUILD -xcodebuild -sdk iphoneos -configuration Release -target install install - -or - -xcodebuild -sdk iphonesimulator -configuration Release -target ALL_BUILD -xcodebuild -sdk iphonesimulator -configuration Release -target install install \ No newline at end of file diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt index 4a6ed6d11e..3e1ad708e6 100644 --- a/modules/CMakeLists.txt +++ b/modules/CMakeLists.txt @@ -2,4 +2,4 @@ if(NOT OPENCV_MODULES_PATH) set(OPENCV_MODULES_PATH "${CMAKE_CURRENT_SOURCE_DIR}") endif() -ocv_glob_modules(${OPENCV_MODULES_PATH}) +ocv_glob_modules(${OPENCV_MODULES_PATH} ${OPENCV_EXTRA_MODULES_PATH}) diff --git a/modules/androidcamera/CMakeLists.txt b/modules/androidcamera/CMakeLists.txt index d54dd5d208..8ac8ced88e 100644 --- a/modules/androidcamera/CMakeLists.txt +++ b/modules/androidcamera/CMakeLists.txt @@ -6,7 +6,7 @@ set(the_description "Auxiliary module for Android native camera support") set(OPENCV_MODULE_TYPE STATIC) ocv_define_module(androidcamera INTERNAL opencv_core log dl) -ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/camera_wrapper" "${OpenCV_SOURCE_DIR}/android/service/engine/jni/include") +ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/camera_wrapper" "${OpenCV_SOURCE_DIR}/platforms/android/service/engine/jni/include") # Android source tree for native camera SET (ANDROID_SOURCE_TREE "ANDROID_SOURCE_TREE-NOTFOUND" CACHE PATH diff --git a/modules/calib3d/include/opencv2/calib3d/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d/calib3d.hpp index 0d1cc46915..f213a114f4 100644 --- a/modules/calib3d/include/opencv2/calib3d/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d/calib3d.hpp @@ -639,9 +639,9 @@ CV_EXPORTS Mat findFundamentalMat( InputArray points1, InputArray points2, double param1=3., double param2=0.99); //! finds coordinates of epipolar lines corresponding the specified points -CV_EXPORTS void computeCorrespondEpilines( InputArray points, - int whichImage, InputArray F, - OutputArray lines ); +CV_EXPORTS_W void computeCorrespondEpilines( InputArray points, + int whichImage, InputArray F, + OutputArray lines ); CV_EXPORTS_W void triangulatePoints( InputArray projMatr1, InputArray projMatr2, InputArray projPoints1, InputArray projPoints2, diff --git a/modules/calib3d/src/solvepnp.cpp b/modules/calib3d/src/solvepnp.cpp index 25988be48a..3d2c0c2c47 100644 --- a/modules/calib3d/src/solvepnp.cpp +++ b/modules/calib3d/src/solvepnp.cpp @@ -115,31 +115,6 @@ namespace cv transform(points, modif_points, transformation); } - class Mutex - { - public: - Mutex() { - } - void lock() - { -#ifdef HAVE_TBB - resultsMutex.lock(); -#endif - } - - void unlock() - { -#ifdef HAVE_TBB - resultsMutex.unlock(); -#endif - } - - private: -#ifdef HAVE_TBB - tbb::mutex resultsMutex; -#endif - }; - struct CameraParameters { void init(Mat _intrinsics, Mat _distCoeffs) diff --git a/modules/calib3d/src/stereobm.cpp b/modules/calib3d/src/stereobm.cpp index 32514276b5..623883df74 100644 --- a/modules/calib3d/src/stereobm.cpp +++ b/modules/calib3d/src/stereobm.cpp @@ -699,7 +699,7 @@ struct PrefilterInvoker }; -struct FindStereoCorrespInvoker +struct FindStereoCorrespInvoker : ParallelLoopBody { FindStereoCorrespInvoker( const Mat& _left, const Mat& _right, Mat& _disp, CvStereoBMState* _state, @@ -713,12 +713,12 @@ struct FindStereoCorrespInvoker validDisparityRect = _validDisparityRect; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { int cols = left->cols, rows = left->rows; - int _row0 = min(cvRound(range.begin() * rows / nstripes), rows); - int _row1 = min(cvRound(range.end() * rows / nstripes), rows); - uchar *ptr = state->slidingSumBuf->data.ptr + range.begin() * stripeBufSize; + int _row0 = min(cvRound(range.start * rows / nstripes), rows); + int _row1 = min(cvRound(range.end * rows / nstripes), rows); + uchar *ptr = state->slidingSumBuf->data.ptr + range.start * stripeBufSize; int FILTERED = (state->minDisparity - 1)*16; Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0); @@ -871,14 +871,10 @@ static void findStereoCorrespondenceBM( const Mat& left0, const Mat& right0, Mat const bool useShorts = false; #endif -#ifdef HAVE_TBB const double SAD_overhead_coeff = 10.0; double N0 = 8000000 / (useShorts ? 1 : 4); // approx tbb's min number instructions reasonable for one thread double maxStripeSize = min(max(N0 / (width * ndisp), (wsz-1) * SAD_overhead_coeff), (double)height); int nstripes = cvCeil(height / maxStripeSize); -#else - const int nstripes = 1; -#endif int bufSize = max(bufSize0 * nstripes, max(bufSize1 * 2, bufSize2)); @@ -898,9 +894,9 @@ static void findStereoCorrespondenceBM( const Mat& left0, const Mat& right0, Mat state->minDisparity, state->numberOfDisparities, state->SADWindowSize); - parallel_for(BlockedRange(0, nstripes), - FindStereoCorrespInvoker(left, right, disp, state, nstripes, - bufSize0, useShorts, validDisparityRect)); + parallel_for_(Range(0, nstripes), + FindStereoCorrespInvoker(left, right, disp, state, nstripes, + bufSize0, useShorts, validDisparityRect)); if( state->speckleRange >= 0 && state->speckleWindowSize > 0 ) { diff --git a/modules/contrib/src/inputoutput.cpp b/modules/contrib/src/inputoutput.cpp index d10d884c83..a711f242ad 100644 --- a/modules/contrib/src/inputoutput.cpp +++ b/modules/contrib/src/inputoutput.cpp @@ -1,7 +1,7 @@ #include "opencv2/contrib/contrib.hpp" -#ifdef WIN32 +#if defined(WIN32) || defined(_WIN32) #include #include #else diff --git a/modules/core/doc/basic_structures.rst b/modules/core/doc/basic_structures.rst index ca9f5e21a2..3705879228 100644 --- a/modules/core/doc/basic_structures.rst +++ b/modules/core/doc/basic_structures.rst @@ -489,6 +489,9 @@ Various Ptr constructors. .. ocv:function:: Ptr::Ptr(_Tp* _obj) .. ocv:function:: Ptr::Ptr(const Ptr& ptr) + :param _obj: Object for copy. + :param ptr: Object for copy. + Ptr::~Ptr --------- The Ptr destructor. @@ -501,6 +504,8 @@ Assignment operator. .. ocv:function:: Ptr& Ptr::operator = (const Ptr& ptr) + :param ptr: Object for assignment. + Decrements own reference counter (with ``release()``) and increments ptr's reference counter. Ptr::addref @@ -1465,6 +1470,7 @@ Adds elements to the bottom of the matrix. .. ocv:function:: void Mat::push_back( const Mat& m ) :param elem: Added element(s). + :param m: Added line(s). The methods add one or more elements to the bottom of the matrix. They emulate the corresponding method of the STL vector class. When ``elem`` is ``Mat`` , its type and the number of columns must be the same as in the container matrix. @@ -1691,7 +1697,7 @@ Returns the depth of a matrix element. .. ocv:function:: int Mat::depth() const -The method returns the identifier of the matrix element depth (the type of each individual channel). For example, for a 16-bit signed 3-channel array, the method returns ``CV_16S`` . A complete list of matrix types contains the following values: +The method returns the identifier of the matrix element depth (the type of each individual channel). For example, for a 16-bit signed element array, the method returns ``CV_16S`` . A complete list of matrix types contains the following values: * ``CV_8U`` - 8-bit unsigned integers ( ``0..255`` ) @@ -2160,7 +2166,6 @@ Various SparseMat constructors. :param dims: Array dimensionality. :param _sizes: Sparce matrix size on all dementions. :param _type: Sparse matrix data type. - :param try1d: if try1d is true and matrix is a single-column matrix (Nx1), then the sparse matrix will be 1-dimensional. SparseMat::~SparseMat --------------------- @@ -2175,6 +2180,8 @@ Provides sparse matrix assignment operators. .. ocv:function:: SparseMat& SparseMat::operator = (const SparseMat& m) .. ocv:function:: SparseMat& SparseMat::operator = (const Mat& m) + :param m: Matrix for assignment. + The last variant is equivalent to the corresponding constructor with try1d=false. @@ -2202,6 +2209,10 @@ Convert sparse matrix with possible type change and scaling. .. ocv:function:: void SparseMat::convertTo( SparseMat& m, int rtype, double alpha=1 ) const .. ocv:function:: void SparseMat::convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const + :param m: Destination matrix. + :param rtype: Destination matrix type. + :param alpha: Conversion multiplier. + The first version converts arbitrary sparse matrix to dense matrix and multiplies all the matrix elements by the specified scalar. The second versiob converts sparse matrix to dense matrix with optional type conversion and scaling. When rtype=-1, the destination element type will be the same as the sparse matrix element type. @@ -2294,7 +2305,7 @@ The method returns the number of matrix channels. SparseMat::size --------------- -Returns the array of sizes or matrix size by i dimention and 0 if the matrix is not allocated. +Returns the array of sizes or matrix size by i dimension and 0 if the matrix is not allocated. .. ocv:function:: const int* SparseMat::size() const .. ocv:function:: int SparseMat::size(int i) const @@ -2322,6 +2333,11 @@ Compute element hash value from the element indices. .. ocv:function:: size_t SparseMat::hash(int i0, int i1, int i2) const .. ocv:function:: size_t SparseMat::hash(const int* idx) const + :param i0: The first dimension index. + :param i1: The second dimension index. + :param i2: The third dimension index. + :param idx: Array of element indices for multidimensional matices. + SparseMat::ptr -------------- Low-level element-access functions, special variants for 1D, 2D, 3D cases, and the generic one for n-D case. @@ -2331,6 +2347,12 @@ Low-level element-access functions, special variants for 1D, 2D, 3D cases, and t .. ocv:function:: uchar* SparseMat::ptr(int i0, int i1, int i2, bool createMissing, size_t* hashval=0) .. ocv:function:: uchar* SparseMat::ptr(const int* idx, bool createMissing, size_t* hashval=0) + :param i0: The first dimension index. + :param i1: The second dimension index. + :param i2: The third dimension index. + :param idx: Array of element indices for multidimensional matices. + :param createMissing: Create new element with 0 value if it does not exist in SparseMat. + Return pointer to the matrix element. If the element is there (it is non-zero), the pointer to it is returned. If it is not there and ``createMissing=false``, NULL pointer is returned. If it is not there and ``createMissing=true``, the new elementis created and initialized with 0. Pointer to it is returned. If the optional hashval pointer is not ``NULL``, @@ -2344,6 +2366,11 @@ Erase the specified matrix element. When there is no such an element, the method .. ocv:function:: void SparseMat::erase(int i0, int i1, int i2, size_t* hashval=0) .. ocv:function:: void SparseMat::erase(const int* idx, size_t* hashval=0) + :param i0: The first dimension index. + :param i1: The second dimension index. + :param i2: The third dimension index. + :param idx: Array of element indices for multidimensional matices. + SparseMat\_ ----------- .. ocv:class:: SparseMat_ diff --git a/modules/core/doc/clustering.rst b/modules/core/doc/clustering.rst index 46130bc8fd..f58e99ce2c 100644 --- a/modules/core/doc/clustering.rst +++ b/modules/core/doc/clustering.rst @@ -17,12 +17,18 @@ Finds centers of clusters and groups input samples around the clusters. :param samples: Floating-point matrix of input samples, one row per sample. + :param data: Data for clustering. + :param cluster_count: Number of clusters to split the set by. + :param K: Number of clusters to split the set by. + :param labels: Input/output integer array that stores the cluster indices for every sample. :param criteria: The algorithm termination criteria, that is, the maximum number of iterations and/or the desired accuracy. The accuracy is specified as ``criteria.epsilon``. As soon as each of the cluster centers moves by less than ``criteria.epsilon`` on some iteration, the algorithm stops. + :param termcrit: The algorithm termination criteria, that is, the maximum number of iterations and/or the desired accuracy. + :param attempts: Flag to specify the number of times the algorithm is executed using different initial labellings. The algorithm returns the labels that yield the best compactness (see the last function parameter). :param rng: CvRNG state initialized by RNG(). @@ -37,6 +43,8 @@ Finds centers of clusters and groups input samples around the clusters. :param centers: Output matrix of the cluster centers, one row per each cluster center. + :param _centers: Output matrix of the cluster centers, one row per each cluster center. + :param compactness: The returned value that is described below. The function ``kmeans`` implements a k-means algorithm that finds the diff --git a/modules/core/doc/drawing_functions.rst b/modules/core/doc/drawing_functions.rst index 24328f9a54..342301db97 100644 --- a/modules/core/doc/drawing_functions.rst +++ b/modules/core/doc/drawing_functions.rst @@ -234,6 +234,8 @@ Calculates the width and height of a text string. :param text: Input text string. + :param text_string: Input text string in C format. + :param fontFace: Font to use. See the :ocv:func:`putText` for details. :param fontScale: Font scale. See the :ocv:func:`putText` for details. @@ -242,6 +244,12 @@ Calculates the width and height of a text string. :param baseLine: Output parameter - y-coordinate of the baseline relative to the bottom-most text point. + :param baseline: Output parameter - y-coordinate of the baseline relative to the bottom-most text point. + + :param font: Font description in terms of old C API. + + :param text_size: Output parameter - The size of a box that contains the specified text. + The function ``getTextSize`` calculates and returns the size of a box that contains the specified text. That is, the following code renders some text, the tight box surrounding it, and the baseline: :: diff --git a/modules/core/doc/operations_on_arrays.rst b/modules/core/doc/operations_on_arrays.rst index d338444760..bd55993afe 100644 --- a/modules/core/doc/operations_on_arrays.rst +++ b/modules/core/doc/operations_on_arrays.rst @@ -1062,6 +1062,8 @@ Returns the determinant of a square floating-point matrix. :param mtx: input matrix that must have ``CV_32FC1`` or ``CV_64FC1`` type and square size. + :param mat: input matrix that must have ``CV_32FC1`` or ``CV_64FC1`` type and square size. + The function ``determinant`` calculates and returns the determinant of the specified matrix. For small matrices ( ``mtx.cols=mtx.rows<=3`` ), the direct method is used. For larger matrices, the function uses LU factorization with partial pivoting. diff --git a/modules/core/doc/utility_and_system_functions_and_macros.rst b/modules/core/doc/utility_and_system_functions_and_macros.rst index 54198b058a..41cf7e1b72 100644 --- a/modules/core/doc/utility_and_system_functions_and_macros.rst +++ b/modules/core/doc/utility_and_system_functions_and_macros.rst @@ -173,6 +173,8 @@ Checks a condition at runtime and throws exception if it fails .. ocv:function:: CV_Assert(expr) + :param expr: Expression for check. + The macros ``CV_Assert`` (and ``CV_DbgAssert``) evaluate the specified expression. If it is 0, the macros raise an error (see :ocv:func:`error` ). The macro ``CV_Assert`` checks the condition in both Debug and Release configurations while ``CV_DbgAssert`` is only retained in the Debug configuration. @@ -188,8 +190,14 @@ Signals an error and raises an exception. :param status: Error code. Normally, it is a negative value. The list of pre-defined error codes can be found in ``cxerror.h`` . + :param func_name: The function name where error occurs. + :param err_msg: Text of the error message. + :param file_name: The file name where error occurs. + + :param line: The line number where error occurs. + :param args: ``printf`` -like formatted error message in parentheses. The function and the helper macros ``CV_Error`` and ``CV_Error_``: :: @@ -249,6 +257,7 @@ Allocates an aligned memory buffer. .. ocv:cfunction:: void* cvAlloc( size_t size ) :param size: Allocated buffer size. + :param bufSize: Allocated buffer size. The function allocates the buffer of the specified size and returns it. When the buffer size is 16 bytes or more, the returned buffer is aligned to 16 bytes. diff --git a/modules/core/doc/xml_yaml_persistence.rst b/modules/core/doc/xml_yaml_persistence.rst index c7d55d01f5..28bae24508 100644 --- a/modules/core/doc/xml_yaml_persistence.rst +++ b/modules/core/doc/xml_yaml_persistence.rst @@ -181,6 +181,17 @@ Opens a file. .. ocv:function:: bool FileStorage::open(const string& filename, int flags, const string& encoding=string()) + :param filename: Name of the file to open or the text string to read the data from. + Extension of the file (``.xml`` or ``.yml``/``.yaml``) determines its format (XML or YAML respectively). + Also you can append ``.gz`` to work with compressed files, for example ``myHugeMatrix.xml.gz``. + If both ``FileStorage::WRITE`` and ``FileStorage::MEMORY`` flags are specified, ``source`` + is used just to specify the output file format (e.g. ``mydata.xml``, ``.yml`` etc.). + + :param flags: Mode of operation. See FileStorage constructor for more details. + + :param encoding: Encoding of the file. Note that UTF-16 XML encoding is not supported currently and you should use 8-bit encoding instead of it. + + See description of parameters in :ocv:func:`FileStorage::FileStorage`. The method calls :ocv:func:`FileStorage::release` before opening the file. diff --git a/modules/core/include/opencv2/core/core.hpp b/modules/core/include/opencv2/core/core.hpp index 1c8e0e2cac..bc1a68fb77 100644 --- a/modules/core/include/opencv2/core/core.hpp +++ b/modules/core/include/opencv2/core/core.hpp @@ -1322,7 +1322,8 @@ public: EXPR = 6 << KIND_SHIFT, OPENGL_BUFFER = 7 << KIND_SHIFT, OPENGL_TEXTURE = 8 << KIND_SHIFT, - GPU_MAT = 9 << KIND_SHIFT + GPU_MAT = 9 << KIND_SHIFT, + OCL_MAT =10 << KIND_SHIFT }; _InputArray(); @@ -3409,8 +3410,6 @@ public: //! converts dense 2d matrix to the sparse form /*! \param m the input matrix - \param try1d if true and m is a single-column matrix (Nx1), - then the sparse matrix will be 1-dimensional. */ explicit SparseMat(const Mat& m); //! converts old-style sparse matrix to the new-style. All the data is copied @@ -4813,6 +4812,9 @@ public: ~AutoLock() { mutex->unlock(); } protected: Mutex* mutex; +private: + AutoLock(const AutoLock&); + AutoLock& operator = (const AutoLock&); }; } diff --git a/modules/core/include/opencv2/core/internal.hpp b/modules/core/include/opencv2/core/internal.hpp index 5335fa01f8..606c62f8f5 100644 --- a/modules/core/include/opencv2/core/internal.hpp +++ b/modules/core/include/opencv2/core/internal.hpp @@ -50,6 +50,9 @@ #include +#include "opencv2/core/core.hpp" +#include "opencv2/core/types_c.h" + #if defined WIN32 || defined _WIN32 # ifndef WIN32 # define WIN32 @@ -251,6 +254,10 @@ namespace cv body(range); } #endif + + // Returns a static string if there is a parallel framework, + // NULL otherwise. + CV_EXPORTS const char* currentParallelFramework(); } //namespace cv #define CV_INIT_ALGORITHM(classname, algname, memberinit) \ diff --git a/modules/core/perf/perf_reduce.cpp b/modules/core/perf/perf_reduce.cpp index 93d3a14166..7b74b0e7e3 100644 --- a/modules/core/perf/perf_reduce.cpp +++ b/modules/core/perf/perf_reduce.cpp @@ -34,7 +34,8 @@ PERF_TEST_P(Size_MatType_ROp, reduceR, declare.in(src, WARMUP_RNG).out(vec); declare.time(100); - TEST_CYCLE() reduce(src, vec, 0, reduceOp, ddepth); + int runs = 15; + TEST_CYCLE_MULTIRUN(runs) reduce(src, vec, 0, reduceOp, ddepth); SANITY_CHECK(vec, 1); } @@ -65,4 +66,3 @@ PERF_TEST_P(Size_MatType_ROp, reduceC, SANITY_CHECK(vec, 1); } - diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index 5988363d3c..05a0c55524 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -2855,9 +2855,9 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComp if( _mean.data ) { - CV_Assert( _mean.size() == mean_sz ); + CV_Assert( _mean.size() == mean_sz ); _mean.convertTo(mean, ctype); - covar_flags |= CV_COVAR_USE_AVG; + covar_flags |= CV_COVAR_USE_AVG; } calcCovarMatrix( data, covar, mean, covar_flags, ctype ); @@ -2901,6 +2901,36 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComp return *this; } +template +int computeCumulativeEnergy(const Mat& eigenvalues, double retainedVariance) +{ + CV_DbgAssert( eigenvalues.type() == DataType::type ); + + Mat g(eigenvalues.size(), DataType::type); + + for(int ig = 0; ig < g.rows; ig++) + { + g.at(ig, 0) = 0; + for(int im = 0; im <= ig; im++) + { + g.at(ig,0) += eigenvalues.at(im,0); + } + } + + int L; + + for(L = 0; L < eigenvalues.rows; L++) + { + double energy = g.at(L, 0) / g.at(g.rows - 1, 0); + if(energy > retainedVariance) + break; + } + + L = std::max(2, L); + + return L; +} + PCA& PCA::computeVar(InputArray _data, InputArray __mean, int flags, double retainedVariance) { Mat data = _data.getMat(), _mean = __mean.getMat(); @@ -2977,26 +3007,11 @@ PCA& PCA::computeVar(InputArray _data, InputArray __mean, int flags, double reta } // compute the cumulative energy content for each eigenvector - Mat g(eigenvalues.size(), ctype); - - for(int ig = 0; ig < g.rows; ig++) - { - g.at(ig,0) = 0; - for(int im = 0; im <= ig; im++) - { - g.at(ig,0) += eigenvalues.at(im,0); - } - } - int L; - for(L = 0; L < eigenvalues.rows; L++) - { - double energy = g.at(L, 0) / g.at(g.rows - 1, 0); - if(energy > retainedVariance) - break; - } - - L = std::max(2, L); + if (ctype == CV_32F) + L = computeCumulativeEnergy(eigenvalues, retainedVariance); + else + L = computeCumulativeEnergy(eigenvalues, retainedVariance); // use clone() to physically copy the data and thus deallocate the original matrices eigenvalues = eigenvalues.rowRange(0,L).clone(); diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 7acb0e0dbd..5a3600b9b3 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -980,6 +980,11 @@ Mat _InputArray::getMat(int i) const return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat(); } + if( k == OCL_MAT ) + { + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); + } + CV_Assert( k == STD_VECTOR_MAT ); //if( k == STD_VECTOR_MAT ) { @@ -1062,6 +1067,11 @@ void _InputArray::getMatVector(vector& mv) const return; } + if( k == OCL_MAT ) + { + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); + } + CV_Assert( k == STD_VECTOR_MAT ); //if( k == STD_VECTOR_MAT ) { @@ -1189,6 +1199,11 @@ Size _InputArray::size(int i) const return tex->size(); } + if( k == OCL_MAT ) + { + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); + } + CV_Assert( k == GPU_MAT ); //if( k == GPU_MAT ) { @@ -1303,6 +1318,11 @@ bool _InputArray::empty() const if( k == OPENGL_TEXTURE ) return ((const ogl::Texture2D*)obj)->empty(); + if( k == OCL_MAT ) + { + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); + } + CV_Assert( k == GPU_MAT ); //if( k == GPU_MAT ) return ((const gpu::GpuMat*)obj)->empty(); @@ -1523,6 +1543,11 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool all return; } + if( k == OCL_MAT ) + { + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); + } + if( k == NONE ) { CV_Error(CV_StsNullPtr, "create() called for the missing output array" ); @@ -1634,6 +1659,11 @@ void _OutputArray::release() const return; } + if( k == OCL_MAT ) + { + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); + } + CV_Assert( k == STD_VECTOR_MAT ); //if( k == STD_VECTOR_MAT ) { diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp index 0b2a845ac1..0a9ed09871 100644 --- a/modules/core/src/parallel.cpp +++ b/modules/core/src/parallel.cpp @@ -110,8 +110,16 @@ #endif #endif -#if defined HAVE_TBB || defined HAVE_CSTRIPES || defined HAVE_OPENMP || defined HAVE_GCD || defined HAVE_CONCURRENCY - #define HAVE_PARALLEL_FRAMEWORK +#if defined HAVE_TBB && TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202 +# define CV_PARALLEL_FRAMEWORK "tbb" +#elif defined HAVE_CSTRIPES +# define CV_PARALLEL_FRAMEWORK "cstripes" +#elif defined HAVE_OPENMP +# define CV_PARALLEL_FRAMEWORK "openmp" +#elif defined HAVE_GCD +# define CV_PARALLEL_FRAMEWORK "gcd" +#elif defined HAVE_CONCURRENCY +# define CV_PARALLEL_FRAMEWORK "ms-concurrency" #endif namespace cv @@ -121,7 +129,7 @@ namespace cv namespace { -#ifdef HAVE_PARALLEL_FRAMEWORK +#ifdef CV_PARALLEL_FRAMEWORK class ParallelLoopBodyWrapper { public: @@ -218,7 +226,7 @@ public: static SchedPtr pplScheduler; #endif -#endif // HAVE_PARALLEL_FRAMEWORK +#endif // CV_PARALLEL_FRAMEWORK } //namespace @@ -226,7 +234,7 @@ static SchedPtr pplScheduler; void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes) { -#ifdef HAVE_PARALLEL_FRAMEWORK +#ifdef CV_PARALLEL_FRAMEWORK if(numThreads != 0) { @@ -281,7 +289,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, } else -#endif // HAVE_PARALLEL_FRAMEWORK +#endif // CV_PARALLEL_FRAMEWORK { (void)nstripes; body(range); @@ -290,7 +298,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, int cv::getNumThreads(void) { -#ifdef HAVE_PARALLEL_FRAMEWORK +#ifdef CV_PARALLEL_FRAMEWORK if(numThreads == 0) return 1; @@ -333,7 +341,7 @@ int cv::getNumThreads(void) void cv::setNumThreads( int threads ) { (void)threads; -#ifdef HAVE_PARALLEL_FRAMEWORK +#ifdef CV_PARALLEL_FRAMEWORK numThreads = threads; #endif @@ -480,6 +488,14 @@ int cv::getNumberOfCPUs(void) #endif } +const char* cv::currentParallelFramework() { +#ifdef CV_PARALLEL_FRAMEWORK + return CV_PARALLEL_FRAMEWORK; +#else + return NULL; +#endif +} + CV_IMPL void cvSetNumThreads(int nt) { cv::setNumThreads(nt); diff --git a/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst b/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst index 8596ae43db..d7e5eb4c29 100644 --- a/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst +++ b/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst @@ -217,7 +217,7 @@ For each query descriptor, finds the training descriptors not farther than the s :param compactResult: Parameter used when the mask (or masks) is not empty. If ``compactResult`` is false, the ``matches`` vector has the same size as ``queryDescriptors`` rows. If ``compactResult`` is true, the ``matches`` vector does not contain matches for fully masked-out query descriptors. - :param maxDistance: Threshold for the distance between matched descriptors. + :param maxDistance: Threshold for the distance between matched descriptors. Distance means here metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured in Pixels)! For each query descriptor, the methods find such training descriptors that the distance between the query descriptor and the training descriptor is equal or smaller than ``maxDistance``. Found matches are returned in the distance increasing order. diff --git a/modules/features2d/src/detectors.cpp b/modules/features2d/src/detectors.cpp index 2efd5a652a..a1e389a435 100644 --- a/modules/features2d/src/detectors.cpp +++ b/modules/features2d/src/detectors.cpp @@ -214,7 +214,7 @@ static void keepStrongest( int N, vector& keypoints ) } namespace { -class GridAdaptedFeatureDetectorInvoker +class GridAdaptedFeatureDetectorInvoker : public ParallelLoopBody { private: int gridRows_, gridCols_; @@ -223,29 +223,24 @@ private: const Mat& image_; const Mat& mask_; const Ptr& detector_; -#ifdef HAVE_TBB - tbb::mutex* kptLock_; -#endif + Mutex* kptLock_; GridAdaptedFeatureDetectorInvoker& operator=(const GridAdaptedFeatureDetectorInvoker&); // to quiet MSVC public: - GridAdaptedFeatureDetectorInvoker(const Ptr& detector, const Mat& image, const Mat& mask, vector& keypoints, int maxPerCell, int gridRows, int gridCols -#ifdef HAVE_TBB - , tbb::mutex* kptLock -#endif - ) : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell), - keypoints_(keypoints), image_(image), mask_(mask), detector_(detector) -#ifdef HAVE_TBB - , kptLock_(kptLock) -#endif + GridAdaptedFeatureDetectorInvoker(const Ptr& detector, const Mat& image, const Mat& mask, + vector& keypoints, int maxPerCell, int gridRows, int gridCols, + cv::Mutex* kptLock) + : gridRows_(gridRows), gridCols_(gridCols), maxPerCell_(maxPerCell), + keypoints_(keypoints), image_(image), mask_(mask), detector_(detector), + kptLock_(kptLock) { } - void operator() (const BlockedRange& range) const + void operator() (const Range& range) const { - for (int i = range.begin(); i < range.end(); ++i) + for (int i = range.start; i < range.end; ++i) { int celly = i / gridCols_; int cellx = i - celly * gridCols_; @@ -270,9 +265,8 @@ public: it->pt.x += col_range.start; it->pt.y += row_range.start; } -#ifdef HAVE_TBB - tbb::mutex::scoped_lock join_keypoints(*kptLock_); -#endif + + cv::AutoLock join_keypoints(*kptLock_); keypoints_.insert( keypoints_.end(), sub_keypoints.begin(), sub_keypoints.end() ); } } @@ -289,13 +283,9 @@ void GridAdaptedFeatureDetector::detectImpl( const Mat& image, vector& keypoints.reserve(maxTotalKeypoints); int maxPerCell = maxTotalKeypoints / (gridRows * gridCols); -#ifdef HAVE_TBB - tbb::mutex kptLock; - cv::parallel_for(cv::BlockedRange(0, gridRows * gridCols), + cv::Mutex kptLock; + cv::parallel_for_(cv::Range(0, gridRows * gridCols), GridAdaptedFeatureDetectorInvoker(detector, image, mask, keypoints, maxPerCell, gridRows, gridCols, &kptLock)); -#else - GridAdaptedFeatureDetectorInvoker(detector, image, mask, keypoints, maxPerCell, gridRows, gridCols)(cv::BlockedRange(0, gridRows * gridCols)); -#endif } /* diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index 5509226419..0062944bab 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -45,16 +45,16 @@ if(HAVE_CUDA) set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) if(WITH_NVCUVID) - set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvid_LIBRARY}) - endif() + set(cuda_link_libs ${cuda_link_libs} ${CUDA_CUDA_LIBRARY} ${CUDA_nvcuvid_LIBRARY}) - if(WIN32) - find_cuda_helper_libs(nvcuvenc) - set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvenc_LIBRARY}) - endif() + if(WIN32) + find_cuda_helper_libs(nvcuvenc) + set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvenc_LIBRARY}) + endif() - if(WITH_FFMPEG) - set(cuda_link_libs ${cuda_link_libs} ${HIGHGUI_LIBRARIES}) + if(WITH_FFMPEG) + set(cuda_link_libs ${cuda_link_libs} ${HIGHGUI_LIBRARIES}) + endif() endif() else() set(lib_cuda "") diff --git a/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp b/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp index d02027f244..5b422849bd 100644 --- a/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp +++ b/modules/gpu/include/opencv2/gpu/device/detail/color_detail.hpp @@ -120,11 +120,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ RGB2RGB() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - - __device__ __forceinline__ RGB2RGB(const RGB2RGB& other_) - :unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ RGB2RGB() {} + __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {} }; template <> struct RGB2RGB : unary_function @@ -141,8 +138,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ RGB2RGB():unary_function(){} - __device__ __forceinline__ RGB2RGB(const RGB2RGB& other_):unary_function(){} + __host__ __device__ __forceinline__ RGB2RGB() {} + __host__ __device__ __forceinline__ RGB2RGB(const RGB2RGB&) {} }; } @@ -203,8 +200,8 @@ namespace cv { namespace gpu { namespace device return RGB2RGB5x5Converter::cvt(src); } - __device__ __forceinline__ RGB2RGB5x5():unary_function(){} - __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5& other_):unary_function(){} + __host__ __device__ __forceinline__ RGB2RGB5x5() {} + __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {} }; template struct RGB2RGB5x5<4, bidx,green_bits> : unary_function @@ -214,8 +211,8 @@ namespace cv { namespace gpu { namespace device return RGB2RGB5x5Converter::cvt(src); } - __device__ __forceinline__ RGB2RGB5x5():unary_function(){} - __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5& other_):unary_function(){} + __host__ __device__ __forceinline__ RGB2RGB5x5() {} + __host__ __device__ __forceinline__ RGB2RGB5x5(const RGB2RGB5x5&) {} }; } @@ -282,8 +279,8 @@ namespace cv { namespace gpu { namespace device RGB5x52RGBConverter::cvt(src, dst); return dst; } - __device__ __forceinline__ RGB5x52RGB():unary_function(){} - __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB& other_):unary_function(){} + __host__ __device__ __forceinline__ RGB5x52RGB() {} + __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {} }; @@ -295,8 +292,8 @@ namespace cv { namespace gpu { namespace device RGB5x52RGBConverter::cvt(src, dst); return dst; } - __device__ __forceinline__ RGB5x52RGB():unary_function(){} - __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB& other_):unary_function(){} + __host__ __device__ __forceinline__ RGB5x52RGB() {} + __host__ __device__ __forceinline__ RGB5x52RGB(const RGB5x52RGB&) {} }; } @@ -325,9 +322,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ Gray2RGB():unary_function::vec_type>(){} - __device__ __forceinline__ Gray2RGB(const Gray2RGB& other_) - : unary_function::vec_type>(){} + __host__ __device__ __forceinline__ Gray2RGB() {} + __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {} }; template <> struct Gray2RGB : unary_function @@ -342,8 +338,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ Gray2RGB():unary_function(){} - __device__ __forceinline__ Gray2RGB(const Gray2RGB& other_):unary_function(){} + __host__ __device__ __forceinline__ Gray2RGB() {} + __host__ __device__ __forceinline__ Gray2RGB(const Gray2RGB&) {} }; } @@ -384,8 +380,8 @@ namespace cv { namespace gpu { namespace device return Gray2RGB5x5Converter::cvt(src); } - __device__ __forceinline__ Gray2RGB5x5():unary_function(){} - __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5& other_):unary_function(){} + __host__ __device__ __forceinline__ Gray2RGB5x5() {} + __host__ __device__ __forceinline__ Gray2RGB5x5(const Gray2RGB5x5&) {} }; } @@ -426,8 +422,8 @@ namespace cv { namespace gpu { namespace device { return RGB5x52GrayConverter::cvt(src); } - __device__ __forceinline__ RGB5x52Gray() : unary_function(){} - __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray& other_) : unary_function(){} + __host__ __device__ __forceinline__ RGB5x52Gray() {} + __host__ __device__ __forceinline__ RGB5x52Gray(const RGB5x52Gray&) {} }; } @@ -467,9 +463,8 @@ namespace cv { namespace gpu { namespace device { return RGB2GrayConvert(&src.x); } - __device__ __forceinline__ RGB2Gray() : unary_function::vec_type, T>(){} - __device__ __forceinline__ RGB2Gray(const RGB2Gray& other_) - : unary_function::vec_type, T>(){} + __host__ __device__ __forceinline__ RGB2Gray() {} + __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {} }; template struct RGB2Gray : unary_function @@ -478,8 +473,8 @@ namespace cv { namespace gpu { namespace device { return RGB2GrayConvert(src); } - __device__ __forceinline__ RGB2Gray() : unary_function(){} - __device__ __forceinline__ RGB2Gray(const RGB2Gray& other_) : unary_function(){} + __host__ __device__ __forceinline__ RGB2Gray() {} + __host__ __device__ __forceinline__ RGB2Gray(const RGB2Gray&) {} }; } @@ -529,10 +524,8 @@ namespace cv { namespace gpu { namespace device RGB2YUVConvert(&src.x, dst); return dst; } - __device__ __forceinline__ RGB2YUV() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ RGB2YUV(const RGB2YUV& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ RGB2YUV() {} + __host__ __device__ __forceinline__ RGB2YUV(const RGB2YUV&) {} }; } @@ -609,10 +602,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ YUV2RGB() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ YUV2RGB(const YUV2RGB& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ YUV2RGB() {} + __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {} }; template struct YUV2RGB : unary_function @@ -621,8 +612,8 @@ namespace cv { namespace gpu { namespace device { return YUV2RGBConvert(src); } - __device__ __forceinline__ YUV2RGB() : unary_function(){} - __device__ __forceinline__ YUV2RGB(const YUV2RGB& other_) : unary_function(){} + __host__ __device__ __forceinline__ YUV2RGB() {} + __host__ __device__ __forceinline__ YUV2RGB(const YUV2RGB&) {} }; } @@ -689,10 +680,8 @@ namespace cv { namespace gpu { namespace device RGB2YCrCbConvert(&src.x, dst); return dst; } - __device__ __forceinline__ RGB2YCrCb() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ RGB2YCrCb() {} + __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {} }; template struct RGB2YCrCb : unary_function @@ -702,8 +691,8 @@ namespace cv { namespace gpu { namespace device return RGB2YCrCbConvert(src); } - __device__ __forceinline__ RGB2YCrCb() : unary_function(){} - __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb& other_) : unary_function(){} + __host__ __device__ __forceinline__ RGB2YCrCb() {} + __host__ __device__ __forceinline__ RGB2YCrCb(const RGB2YCrCb&) {} }; } @@ -771,10 +760,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ YCrCb2RGB() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ YCrCb2RGB() {} + __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {} }; template struct YCrCb2RGB : unary_function @@ -783,8 +770,8 @@ namespace cv { namespace gpu { namespace device { return YCrCb2RGBConvert(src); } - __device__ __forceinline__ YCrCb2RGB() : unary_function(){} - __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB& other_) : unary_function(){} + __host__ __device__ __forceinline__ YCrCb2RGB() {} + __host__ __device__ __forceinline__ YCrCb2RGB(const YCrCb2RGB&) {} }; } @@ -849,10 +836,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ RGB2XYZ() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ RGB2XYZ(const RGB2XYZ& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ RGB2XYZ() {} + __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {} }; template struct RGB2XYZ : unary_function @@ -861,8 +846,8 @@ namespace cv { namespace gpu { namespace device { return RGB2XYZConvert(src); } - __device__ __forceinline__ RGB2XYZ() : unary_function(){} - __device__ __forceinline__ RGB2XYZ(const RGB2XYZ& other_) : unary_function(){} + __host__ __device__ __forceinline__ RGB2XYZ() {} + __host__ __device__ __forceinline__ RGB2XYZ(const RGB2XYZ&) {} }; } @@ -926,10 +911,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ XYZ2RGB() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ XYZ2RGB(const XYZ2RGB& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ XYZ2RGB() {} + __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {} }; template struct XYZ2RGB : unary_function @@ -938,8 +921,8 @@ namespace cv { namespace gpu { namespace device { return XYZ2RGBConvert(src); } - __device__ __forceinline__ XYZ2RGB() : unary_function(){} - __device__ __forceinline__ XYZ2RGB(const XYZ2RGB& other_) : unary_function(){} + __host__ __device__ __forceinline__ XYZ2RGB() {} + __host__ __device__ __forceinline__ XYZ2RGB(const XYZ2RGB&) {} }; } @@ -1066,10 +1049,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ RGB2HSV() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ RGB2HSV(const RGB2HSV& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ RGB2HSV() {} + __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {} }; template struct RGB2HSV : unary_function @@ -1078,8 +1059,8 @@ namespace cv { namespace gpu { namespace device { return RGB2HSVConvert(src); } - __device__ __forceinline__ RGB2HSV():unary_function(){} - __device__ __forceinline__ RGB2HSV(const RGB2HSV& other_):unary_function(){} + __host__ __device__ __forceinline__ RGB2HSV() {} + __host__ __device__ __forceinline__ RGB2HSV(const RGB2HSV&) {} }; } @@ -1208,10 +1189,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ HSV2RGB() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ HSV2RGB(const HSV2RGB& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ HSV2RGB() {} + __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {} }; template struct HSV2RGB : unary_function @@ -1220,8 +1199,8 @@ namespace cv { namespace gpu { namespace device { return HSV2RGBConvert(src); } - __device__ __forceinline__ HSV2RGB():unary_function(){} - __device__ __forceinline__ HSV2RGB(const HSV2RGB& other_):unary_function(){} + __host__ __device__ __forceinline__ HSV2RGB() {} + __host__ __device__ __forceinline__ HSV2RGB(const HSV2RGB&) {} }; } @@ -1343,10 +1322,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ RGB2HLS() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ RGB2HLS(const RGB2HLS& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ RGB2HLS() {} + __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {} }; template struct RGB2HLS : unary_function @@ -1355,8 +1332,8 @@ namespace cv { namespace gpu { namespace device { return RGB2HLSConvert(src); } - __device__ __forceinline__ RGB2HLS() : unary_function(){} - __device__ __forceinline__ RGB2HLS(const RGB2HLS& other_) : unary_function(){} + __host__ __device__ __forceinline__ RGB2HLS() {} + __host__ __device__ __forceinline__ RGB2HLS(const RGB2HLS&) {} }; } @@ -1485,10 +1462,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ HLS2RGB() - : unary_function::vec_type, typename TypeVec::vec_type>(){} - __device__ __forceinline__ HLS2RGB(const HLS2RGB& other_) - : unary_function::vec_type, typename TypeVec::vec_type>(){} + __host__ __device__ __forceinline__ HLS2RGB() {} + __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {} }; template struct HLS2RGB : unary_function @@ -1497,8 +1472,8 @@ namespace cv { namespace gpu { namespace device { return HLS2RGBConvert(src); } - __device__ __forceinline__ HLS2RGB() : unary_function(){} - __device__ __forceinline__ HLS2RGB(const HLS2RGB& other_) : unary_function(){} + __host__ __device__ __forceinline__ HLS2RGB() {} + __host__ __device__ __forceinline__ HLS2RGB(const HLS2RGB&) {} }; } @@ -1651,8 +1626,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ RGB2Lab() {} - __device__ __forceinline__ RGB2Lab(const RGB2Lab& other_) {} + __host__ __device__ __forceinline__ RGB2Lab() {} + __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {} }; template struct RGB2Lab @@ -1666,8 +1641,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ RGB2Lab() {} - __device__ __forceinline__ RGB2Lab(const RGB2Lab& other_) {} + __host__ __device__ __forceinline__ RGB2Lab() {} + __host__ __device__ __forceinline__ RGB2Lab(const RGB2Lab&) {} }; } @@ -1764,8 +1739,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ Lab2RGB() {} - __device__ __forceinline__ Lab2RGB(const Lab2RGB& other_) {} + __host__ __device__ __forceinline__ Lab2RGB() {} + __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {} }; template struct Lab2RGB @@ -1779,8 +1754,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ Lab2RGB() {} - __device__ __forceinline__ Lab2RGB(const Lab2RGB& other_) {} + __host__ __device__ __forceinline__ Lab2RGB() {} + __host__ __device__ __forceinline__ Lab2RGB(const Lab2RGB&) {} }; } @@ -1863,8 +1838,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ RGB2Luv() {} - __device__ __forceinline__ RGB2Luv(const RGB2Luv& other_) {} + __host__ __device__ __forceinline__ RGB2Luv() {} + __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {} }; template struct RGB2Luv @@ -1878,8 +1853,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ RGB2Luv() {} - __device__ __forceinline__ RGB2Luv(const RGB2Luv& other_) {} + __host__ __device__ __forceinline__ RGB2Luv() {} + __host__ __device__ __forceinline__ RGB2Luv(const RGB2Luv&) {} }; } @@ -1964,8 +1939,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ Luv2RGB() {} - __device__ __forceinline__ Luv2RGB(const Luv2RGB& other_) {} + __host__ __device__ __forceinline__ Luv2RGB() {} + __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {} }; template struct Luv2RGB @@ -1979,8 +1954,8 @@ namespace cv { namespace gpu { namespace device return dst; } - __device__ __forceinline__ Luv2RGB() {} - __device__ __forceinline__ Luv2RGB(const Luv2RGB& other_) {} + __host__ __device__ __forceinline__ Luv2RGB() {} + __host__ __device__ __forceinline__ Luv2RGB(const Luv2RGB&) {} }; } diff --git a/modules/gpu/include/opencv2/gpu/device/functional.hpp b/modules/gpu/include/opencv2/gpu/device/functional.hpp index 6064e8e99c..db264735e3 100644 --- a/modules/gpu/include/opencv2/gpu/device/functional.hpp +++ b/modules/gpu/include/opencv2/gpu/device/functional.hpp @@ -63,8 +63,8 @@ namespace cv { namespace gpu { namespace device { return a + b; } - __device__ __forceinline__ plus(const plus& other):binary_function(){} - __device__ __forceinline__ plus():binary_function(){} + __host__ __device__ __forceinline__ plus() {} + __host__ __device__ __forceinline__ plus(const plus&) {} }; template struct minus : binary_function @@ -74,8 +74,8 @@ namespace cv { namespace gpu { namespace device { return a - b; } - __device__ __forceinline__ minus(const minus& other):binary_function(){} - __device__ __forceinline__ minus():binary_function(){} + __host__ __device__ __forceinline__ minus() {} + __host__ __device__ __forceinline__ minus(const minus&) {} }; template struct multiplies : binary_function @@ -85,8 +85,8 @@ namespace cv { namespace gpu { namespace device { return a * b; } - __device__ __forceinline__ multiplies(const multiplies& other):binary_function(){} - __device__ __forceinline__ multiplies():binary_function(){} + __host__ __device__ __forceinline__ multiplies() {} + __host__ __device__ __forceinline__ multiplies(const multiplies&) {} }; template struct divides : binary_function @@ -96,8 +96,8 @@ namespace cv { namespace gpu { namespace device { return a / b; } - __device__ __forceinline__ divides(const divides& other):binary_function(){} - __device__ __forceinline__ divides():binary_function(){} + __host__ __device__ __forceinline__ divides() {} + __host__ __device__ __forceinline__ divides(const divides&) {} }; template struct modulus : binary_function @@ -107,8 +107,8 @@ namespace cv { namespace gpu { namespace device { return a % b; } - __device__ __forceinline__ modulus(const modulus& other):binary_function(){} - __device__ __forceinline__ modulus():binary_function(){} + __host__ __device__ __forceinline__ modulus() {} + __host__ __device__ __forceinline__ modulus(const modulus&) {} }; template struct negate : unary_function @@ -117,8 +117,8 @@ namespace cv { namespace gpu { namespace device { return -a; } - __device__ __forceinline__ negate(const negate& other):unary_function(){} - __device__ __forceinline__ negate():unary_function(){} + __host__ __device__ __forceinline__ negate() {} + __host__ __device__ __forceinline__ negate(const negate&) {} }; // Comparison Operations @@ -129,8 +129,8 @@ namespace cv { namespace gpu { namespace device { return a == b; } - __device__ __forceinline__ equal_to(const equal_to& other):binary_function(){} - __device__ __forceinline__ equal_to():binary_function(){} + __host__ __device__ __forceinline__ equal_to() {} + __host__ __device__ __forceinline__ equal_to(const equal_to&) {} }; template struct not_equal_to : binary_function @@ -140,8 +140,8 @@ namespace cv { namespace gpu { namespace device { return a != b; } - __device__ __forceinline__ not_equal_to(const not_equal_to& other):binary_function(){} - __device__ __forceinline__ not_equal_to():binary_function(){} + __host__ __device__ __forceinline__ not_equal_to() {} + __host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {} }; template struct greater : binary_function @@ -151,8 +151,8 @@ namespace cv { namespace gpu { namespace device { return a > b; } - __device__ __forceinline__ greater(const greater& other):binary_function(){} - __device__ __forceinline__ greater():binary_function(){} + __host__ __device__ __forceinline__ greater() {} + __host__ __device__ __forceinline__ greater(const greater&) {} }; template struct less : binary_function @@ -162,8 +162,8 @@ namespace cv { namespace gpu { namespace device { return a < b; } - __device__ __forceinline__ less(const less& other):binary_function(){} - __device__ __forceinline__ less():binary_function(){} + __host__ __device__ __forceinline__ less() {} + __host__ __device__ __forceinline__ less(const less&) {} }; template struct greater_equal : binary_function @@ -173,8 +173,8 @@ namespace cv { namespace gpu { namespace device { return a >= b; } - __device__ __forceinline__ greater_equal(const greater_equal& other):binary_function(){} - __device__ __forceinline__ greater_equal():binary_function(){} + __host__ __device__ __forceinline__ greater_equal() {} + __host__ __device__ __forceinline__ greater_equal(const greater_equal&) {} }; template struct less_equal : binary_function @@ -184,8 +184,8 @@ namespace cv { namespace gpu { namespace device { return a <= b; } - __device__ __forceinline__ less_equal(const less_equal& other):binary_function(){} - __device__ __forceinline__ less_equal():binary_function(){} + __host__ __device__ __forceinline__ less_equal() {} + __host__ __device__ __forceinline__ less_equal(const less_equal&) {} }; // Logical Operations @@ -196,8 +196,8 @@ namespace cv { namespace gpu { namespace device { return a && b; } - __device__ __forceinline__ logical_and(const logical_and& other):binary_function(){} - __device__ __forceinline__ logical_and():binary_function(){} + __host__ __device__ __forceinline__ logical_and() {} + __host__ __device__ __forceinline__ logical_and(const logical_and&) {} }; template struct logical_or : binary_function @@ -207,8 +207,8 @@ namespace cv { namespace gpu { namespace device { return a || b; } - __device__ __forceinline__ logical_or(const logical_or& other):binary_function(){} - __device__ __forceinline__ logical_or():binary_function(){} + __host__ __device__ __forceinline__ logical_or() {} + __host__ __device__ __forceinline__ logical_or(const logical_or&) {} }; template struct logical_not : unary_function @@ -217,8 +217,8 @@ namespace cv { namespace gpu { namespace device { return !a; } - __device__ __forceinline__ logical_not(const logical_not& other):unary_function(){} - __device__ __forceinline__ logical_not():unary_function(){} + __host__ __device__ __forceinline__ logical_not() {} + __host__ __device__ __forceinline__ logical_not(const logical_not&) {} }; // Bitwise Operations @@ -229,8 +229,8 @@ namespace cv { namespace gpu { namespace device { return a & b; } - __device__ __forceinline__ bit_and(const bit_and& other):binary_function(){} - __device__ __forceinline__ bit_and():binary_function(){} + __host__ __device__ __forceinline__ bit_and() {} + __host__ __device__ __forceinline__ bit_and(const bit_and&) {} }; template struct bit_or : binary_function @@ -240,8 +240,8 @@ namespace cv { namespace gpu { namespace device { return a | b; } - __device__ __forceinline__ bit_or(const bit_or& other):binary_function(){} - __device__ __forceinline__ bit_or():binary_function(){} + __host__ __device__ __forceinline__ bit_or() {} + __host__ __device__ __forceinline__ bit_or(const bit_or&) {} }; template struct bit_xor : binary_function @@ -251,8 +251,8 @@ namespace cv { namespace gpu { namespace device { return a ^ b; } - __device__ __forceinline__ bit_xor(const bit_xor& other):binary_function(){} - __device__ __forceinline__ bit_xor():binary_function(){} + __host__ __device__ __forceinline__ bit_xor() {} + __host__ __device__ __forceinline__ bit_xor(const bit_xor&) {} }; template struct bit_not : unary_function @@ -261,8 +261,8 @@ namespace cv { namespace gpu { namespace device { return ~v; } - __device__ __forceinline__ bit_not(const bit_not& other):unary_function(){} - __device__ __forceinline__ bit_not():unary_function(){} + __host__ __device__ __forceinline__ bit_not() {} + __host__ __device__ __forceinline__ bit_not(const bit_not&) {} }; // Generalized Identity Operations @@ -272,8 +272,8 @@ namespace cv { namespace gpu { namespace device { return x; } - __device__ __forceinline__ identity(const identity& other):unary_function(){} - __device__ __forceinline__ identity():unary_function(){} + __host__ __device__ __forceinline__ identity() {} + __host__ __device__ __forceinline__ identity(const identity&) {} }; template struct project1st : binary_function @@ -282,8 +282,8 @@ namespace cv { namespace gpu { namespace device { return lhs; } - __device__ __forceinline__ project1st(const project1st& other):binary_function(){} - __device__ __forceinline__ project1st():binary_function(){} + __host__ __device__ __forceinline__ project1st() {} + __host__ __device__ __forceinline__ project1st(const project1st&) {} }; template struct project2nd : binary_function @@ -292,8 +292,8 @@ namespace cv { namespace gpu { namespace device { return rhs; } - __device__ __forceinline__ project2nd(const project2nd& other):binary_function(){} - __device__ __forceinline__ project2nd():binary_function(){} + __host__ __device__ __forceinline__ project2nd() {} + __host__ __device__ __forceinline__ project2nd(const project2nd&) {} }; // Min/Max Operations @@ -302,8 +302,8 @@ namespace cv { namespace gpu { namespace device template <> struct name : binary_function \ { \ __device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \ - __device__ __forceinline__ name() {}\ - __device__ __forceinline__ name(const name&) {}\ + __host__ __device__ __forceinline__ name() {}\ + __host__ __device__ __forceinline__ name(const name&) {}\ }; template struct maximum : binary_function @@ -312,8 +312,8 @@ namespace cv { namespace gpu { namespace device { return max(lhs, rhs); } - __device__ __forceinline__ maximum() {} - __device__ __forceinline__ maximum(const maximum&) {} + __host__ __device__ __forceinline__ maximum() {} + __host__ __device__ __forceinline__ maximum(const maximum&) {} }; OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, ::max) @@ -332,8 +332,8 @@ namespace cv { namespace gpu { namespace device { return min(lhs, rhs); } - __device__ __forceinline__ minimum() {} - __device__ __forceinline__ minimum(const minimum&) {} + __host__ __device__ __forceinline__ minimum() {} + __host__ __device__ __forceinline__ minimum(const minimum&) {} }; OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, ::min) @@ -349,7 +349,6 @@ namespace cv { namespace gpu { namespace device #undef OPENCV_GPU_IMPLEMENT_MINMAX // Math functions -///bound========================================= template struct abs_func : unary_function { @@ -358,8 +357,8 @@ namespace cv { namespace gpu { namespace device return abs(x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -368,8 +367,8 @@ namespace cv { namespace gpu { namespace device return x; } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -378,8 +377,8 @@ namespace cv { namespace gpu { namespace device return ::abs((int)x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -388,8 +387,8 @@ namespace cv { namespace gpu { namespace device return ::abs((int)x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -398,8 +397,8 @@ namespace cv { namespace gpu { namespace device return x; } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -408,8 +407,8 @@ namespace cv { namespace gpu { namespace device return ::abs((int)x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -418,8 +417,8 @@ namespace cv { namespace gpu { namespace device return x; } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -428,8 +427,8 @@ namespace cv { namespace gpu { namespace device return ::abs(x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -438,8 +437,8 @@ namespace cv { namespace gpu { namespace device return ::fabsf(x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; template <> struct abs_func : unary_function { @@ -448,8 +447,8 @@ namespace cv { namespace gpu { namespace device return ::fabs(x); } - __device__ __forceinline__ abs_func() {} - __device__ __forceinline__ abs_func(const abs_func&) {} + __host__ __device__ __forceinline__ abs_func() {} + __host__ __device__ __forceinline__ abs_func(const abs_func&) {} }; #define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(name, func) \ @@ -459,8 +458,8 @@ namespace cv { namespace gpu { namespace device { \ return func ## f(v); \ } \ - __device__ __forceinline__ name ## _func() {} \ - __device__ __forceinline__ name ## _func(const name ## _func&) {} \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ }; \ template <> struct name ## _func : unary_function \ { \ @@ -468,8 +467,8 @@ namespace cv { namespace gpu { namespace device { \ return func(v); \ } \ - __device__ __forceinline__ name ## _func() {} \ - __device__ __forceinline__ name ## _func(const name ## _func&) {} \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ }; #define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(name, func) \ @@ -479,6 +478,8 @@ namespace cv { namespace gpu { namespace device { \ return func ## f(v1, v2); \ } \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ }; \ template <> struct name ## _func : binary_function \ { \ @@ -486,6 +487,8 @@ namespace cv { namespace gpu { namespace device { \ return func(v1, v2); \ } \ + __host__ __device__ __forceinline__ name ## _func() {} \ + __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \ }; OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt) @@ -522,8 +525,8 @@ namespace cv { namespace gpu { namespace device { return src1 * src1 + src2 * src2; } - __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func& other) : binary_function(){} - __device__ __forceinline__ hypot_sqr_func() : binary_function(){} + __host__ __device__ __forceinline__ hypot_sqr_func() {} + __host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {} }; // Saturate Cast Functor @@ -533,8 +536,8 @@ namespace cv { namespace gpu { namespace device { return saturate_cast(v); } - __device__ __forceinline__ saturate_cast_func(const saturate_cast_func& other):unary_function(){} - __device__ __forceinline__ saturate_cast_func():unary_function(){} + __host__ __device__ __forceinline__ saturate_cast_func() {} + __host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {} }; // Threshold Functors @@ -547,10 +550,9 @@ namespace cv { namespace gpu { namespace device return (src > thresh) * maxVal; } - __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other) - : unary_function(), thresh(other.thresh), maxVal(other.maxVal){} - - __device__ __forceinline__ thresh_binary_func():unary_function(){} + __host__ __device__ __forceinline__ thresh_binary_func() {} + __host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other) + : thresh(other.thresh), maxVal(other.maxVal) {} const T thresh; const T maxVal; @@ -565,10 +567,9 @@ namespace cv { namespace gpu { namespace device return (src <= thresh) * maxVal; } - __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other) - : unary_function(), thresh(other.thresh), maxVal(other.maxVal){} - - __device__ __forceinline__ thresh_binary_inv_func():unary_function(){} + __host__ __device__ __forceinline__ thresh_binary_inv_func() {} + __host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other) + : thresh(other.thresh), maxVal(other.maxVal) {} const T thresh; const T maxVal; @@ -583,10 +584,9 @@ namespace cv { namespace gpu { namespace device return minimum()(src, thresh); } - __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other) - : unary_function(), thresh(other.thresh){} - - __device__ __forceinline__ thresh_trunc_func():unary_function(){} + __host__ __device__ __forceinline__ thresh_trunc_func() {} + __host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other) + : thresh(other.thresh) {} const T thresh; }; @@ -599,10 +599,10 @@ namespace cv { namespace gpu { namespace device { return (src > thresh) * src; } - __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other) - : unary_function(), thresh(other.thresh){} - __device__ __forceinline__ thresh_to_zero_func():unary_function(){} + __host__ __device__ __forceinline__ thresh_to_zero_func() {} + __host__ __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other) + : thresh(other.thresh) {} const T thresh; }; @@ -615,14 +615,14 @@ namespace cv { namespace gpu { namespace device { return (src <= thresh) * src; } - __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other) - : unary_function(), thresh(other.thresh){} - __device__ __forceinline__ thresh_to_zero_inv_func():unary_function(){} + __host__ __device__ __forceinline__ thresh_to_zero_inv_func() {} + __host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other) + : thresh(other.thresh) {} const T thresh; }; -//bound!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ============> + // Function Object Adaptors template struct unary_negate : unary_function { @@ -633,8 +633,8 @@ namespace cv { namespace gpu { namespace device return !pred(x); } - __device__ __forceinline__ unary_negate(const unary_negate& other) : unary_function(){} - __device__ __forceinline__ unary_negate() : unary_function(){} + __host__ __device__ __forceinline__ unary_negate() {} + __host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {} const Predicate pred; }; @@ -653,11 +653,9 @@ namespace cv { namespace gpu { namespace device { return !pred(x,y); } - __device__ __forceinline__ binary_negate(const binary_negate& other) - : binary_function(){} - __device__ __forceinline__ binary_negate() : - binary_function(){} + __host__ __device__ __forceinline__ binary_negate() {} + __host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {} const Predicate pred; }; @@ -676,8 +674,8 @@ namespace cv { namespace gpu { namespace device return op(arg1, a); } - __device__ __forceinline__ binder1st(const binder1st& other) : - unary_function(){} + __host__ __device__ __forceinline__ binder1st() {} + __host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {} const Op op; const typename Op::first_argument_type arg1; @@ -697,8 +695,8 @@ namespace cv { namespace gpu { namespace device return op(a, arg2); } - __device__ __forceinline__ binder2nd(const binder2nd& other) : - unary_function(), op(other.op), arg2(other.arg2){} + __host__ __device__ __forceinline__ binder2nd() {} + __host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {} const Op op; const typename Op::second_argument_type arg2; diff --git a/modules/gpu/include/opencv2/gpu/device/limits.hpp b/modules/gpu/include/opencv2/gpu/device/limits.hpp index b040f199d6..595978006c 100644 --- a/modules/gpu/include/opencv2/gpu/device/limits.hpp +++ b/modules/gpu/include/opencv2/gpu/device/limits.hpp @@ -43,193 +43,80 @@ #ifndef __OPENCV_GPU_LIMITS_GPU_HPP__ #define __OPENCV_GPU_LIMITS_GPU_HPP__ -#include +#include +#include #include "common.hpp" namespace cv { namespace gpu { namespace device { - template struct numeric_limits - { - typedef T type; - __device__ __forceinline__ static type min() { return type(); }; - __device__ __forceinline__ static type max() { return type(); }; - __device__ __forceinline__ static type epsilon() { return type(); } - __device__ __forceinline__ static type round_error() { return type(); } - __device__ __forceinline__ static type denorm_min() { return type(); } - __device__ __forceinline__ static type infinity() { return type(); } - __device__ __forceinline__ static type quiet_NaN() { return type(); } - __device__ __forceinline__ static type signaling_NaN() { return T(); } - static const bool is_signed; - }; - template<> struct numeric_limits - { - typedef bool type; - __device__ __forceinline__ static type min() { return false; }; - __device__ __forceinline__ static type max() { return true; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; +template struct numeric_limits; - template<> struct numeric_limits - { - typedef char type; - __device__ __forceinline__ static type min() { return CHAR_MIN; }; - __device__ __forceinline__ static type max() { return CHAR_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = (char)-1 == -1; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static bool min() { return false; } + __device__ __forceinline__ static bool max() { return true; } + static const bool is_signed = false; +}; - template<> struct numeric_limits - { - typedef char type; - __device__ __forceinline__ static type min() { return SCHAR_MIN; }; - __device__ __forceinline__ static type max() { return SCHAR_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = (signed char)-1 == -1; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static signed char min() { return SCHAR_MIN; } + __device__ __forceinline__ static signed char max() { return SCHAR_MAX; } + static const bool is_signed = true; +}; - template<> struct numeric_limits - { - typedef unsigned char type; - __device__ __forceinline__ static type min() { return 0; }; - __device__ __forceinline__ static type max() { return UCHAR_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned char min() { return 0; } + __device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; } + static const bool is_signed = false; +}; - template<> struct numeric_limits - { - typedef short type; - __device__ __forceinline__ static type min() { return SHRT_MIN; }; - __device__ __forceinline__ static type max() { return SHRT_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static short min() { return SHRT_MIN; } + __device__ __forceinline__ static short max() { return SHRT_MAX; } + static const bool is_signed = true; +}; - template<> struct numeric_limits - { - typedef unsigned short type; - __device__ __forceinline__ static type min() { return 0; }; - __device__ __forceinline__ static type max() { return USHRT_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned short min() { return 0; } + __device__ __forceinline__ static unsigned short max() { return USHRT_MAX; } + static const bool is_signed = false; +}; - template<> struct numeric_limits - { - typedef int type; - __device__ __forceinline__ static type min() { return INT_MIN; }; - __device__ __forceinline__ static type max() { return INT_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static int min() { return INT_MIN; } + __device__ __forceinline__ static int max() { return INT_MAX; } + static const bool is_signed = true; +}; +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned int min() { return 0; } + __device__ __forceinline__ static unsigned int max() { return UINT_MAX; } + static const bool is_signed = false; +}; - template<> struct numeric_limits - { - typedef unsigned int type; - __device__ __forceinline__ static type min() { return 0; }; - __device__ __forceinline__ static type max() { return UINT_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static float min() { return FLT_MIN; } + __device__ __forceinline__ static float max() { return FLT_MAX; } + __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; } + static const bool is_signed = true; +}; - template<> struct numeric_limits - { - typedef long type; - __device__ __forceinline__ static type min() { return LONG_MIN; }; - __device__ __forceinline__ static type max() { return LONG_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static double min() { return DBL_MIN; } + __device__ __forceinline__ static double max() { return DBL_MAX; } + __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; } + static const bool is_signed = true; +}; - template<> struct numeric_limits - { - typedef unsigned long type; - __device__ __forceinline__ static type min() { return 0; }; - __device__ __forceinline__ static type max() { return ULONG_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; - - template<> struct numeric_limits - { - typedef float type; - __device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; }; - __device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; }; - __device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; }; - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; - - template<> struct numeric_limits - { - typedef double type; - __device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; }; - __device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; }}} // namespace cv { namespace gpu { namespace device { #endif // __OPENCV_GPU_LIMITS_GPU_HPP__ diff --git a/modules/gpu/include/opencv2/gpu/device/utility.hpp b/modules/gpu/include/opencv2/gpu/device/utility.hpp index 83eaaa21ce..85e81acf08 100644 --- a/modules/gpu/include/opencv2/gpu/device/utility.hpp +++ b/modules/gpu/include/opencv2/gpu/device/utility.hpp @@ -124,8 +124,8 @@ namespace cv { namespace gpu { namespace device struct WithOutMask { - __device__ __forceinline__ WithOutMask(){} - __device__ __forceinline__ WithOutMask(const WithOutMask& mask){} + __host__ __device__ __forceinline__ WithOutMask(){} + __host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){} __device__ __forceinline__ void next() const { diff --git a/modules/gpu/include/opencv2/gpu/device/vec_math.hpp b/modules/gpu/include/opencv2/gpu/device/vec_math.hpp index 1c46dc0c33..a6cb43a2fa 100644 --- a/modules/gpu/include/opencv2/gpu/device/vec_math.hpp +++ b/modules/gpu/include/opencv2/gpu/device/vec_math.hpp @@ -43,288 +43,880 @@ #ifndef __OPENCV_GPU_VECMATH_HPP__ #define __OPENCV_GPU_VECMATH_HPP__ -#include "saturate_cast.hpp" #include "vec_traits.hpp" -#include "functional.hpp" +#include "saturate_cast.hpp" namespace cv { namespace gpu { namespace device { - namespace vec_math_detail - { - template struct SatCastHelper; - template struct SatCastHelper<1, VecD> - { - template static __device__ __forceinline__ VecD cast(const VecS& v) - { - typedef typename VecTraits::elem_type D; - return VecTraits::make(saturate_cast(v.x)); - } - }; - template struct SatCastHelper<2, VecD> - { - template static __device__ __forceinline__ VecD cast(const VecS& v) - { - typedef typename VecTraits::elem_type D; - return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y)); - } - }; - template struct SatCastHelper<3, VecD> - { - template static __device__ __forceinline__ VecD cast(const VecS& v) - { - typedef typename VecTraits::elem_type D; - return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z)); - } - }; - template struct SatCastHelper<4, VecD> - { - template static __device__ __forceinline__ VecD cast(const VecS& v) - { - typedef typename VecTraits::elem_type D; - return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z), saturate_cast(v.w)); - } - }; - template static __device__ __forceinline__ VecD saturate_cast_caller(const VecS& v) +// saturate_cast + +namespace vec_math_detail +{ + template struct SatCastHelper; + template struct SatCastHelper<1, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) { - return SatCastHelper::cn, VecD>::cast(v); + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x)); } - } - - template static __device__ __forceinline__ _Tp saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - - template static __device__ __forceinline__ _Tp saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - - template static __device__ __forceinline__ _Tp saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - - template static __device__ __forceinline__ _Tp saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - template static __device__ __forceinline__ _Tp saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_caller<_Tp>(v);} - -#define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \ - __device__ __forceinline__ TypeVec::result_type, 1>::vec_type op(const type ## 1 & a) \ - { \ - func f; \ - return VecTraits::result_type, 1>::vec_type>::make(f(a.x)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 2>::vec_type op(const type ## 2 & a) \ - { \ - func f; \ - return VecTraits::result_type, 2>::vec_type>::make(f(a.x), f(a.y)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 3>::vec_type op(const type ## 3 & a) \ - { \ - func f; \ - return VecTraits::result_type, 3>::vec_type>::make(f(a.x), f(a.y), f(a.z)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 4>::vec_type op(const type ## 4 & a) \ - { \ - func f; \ - return VecTraits::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \ - } - - namespace vec_math_detail + }; + template struct SatCastHelper<2, VecD> { - template struct BinOpTraits + template static __device__ __forceinline__ VecD cast(const VecS& v) { - typedef int argument_type; - }; - template struct BinOpTraits + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y)); + } + }; + template struct SatCastHelper<3, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) { - typedef T argument_type; - }; - template struct BinOpTraits + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z)); + } + }; + template struct SatCastHelper<4, VecD> + { + template static __device__ __forceinline__ VecD cast(const VecS& v) { - typedef double argument_type; - }; - template struct BinOpTraits - { - typedef double argument_type; - }; - template <> struct BinOpTraits - { - typedef double argument_type; - }; - template struct BinOpTraits - { - typedef float argument_type; - }; - template struct BinOpTraits - { - typedef float argument_type; - }; - template <> struct BinOpTraits - { - typedef float argument_type; - }; - template <> struct BinOpTraits - { - typedef double argument_type; - }; - template <> struct BinOpTraits - { - typedef double argument_type; - }; + typedef typename VecTraits::elem_type D; + return VecTraits::make(saturate_cast(v.x), saturate_cast(v.y), saturate_cast(v.z), saturate_cast(v.w)); + } + }; + + template static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v) + { + return SatCastHelper::cn, VecD>::cast(v); + } +} + +template static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper(v);} + +template static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper(v);} +template static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper(v);} + +// unary operators + +#define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \ + { \ + return VecTraits::make(op (a.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y), op (a.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \ + { \ + return VecTraits::make(op (a.x), op (a.y), op (a.z), op (a.w)); \ } -#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \ - __device__ __forceinline__ TypeVec::result_type, 1>::vec_type op(const type ## 1 & a, const type ## 1 & b) \ +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int) +CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP + +// unary functions + +#define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \ { \ - func f; \ - return VecTraits::result_type, 1>::vec_type>::make(f(a.x, b.x)); \ + return VecTraits::make(func (a.x)); \ } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \ { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \ + return VecTraits::make(func (a.x), func (a.y)); \ } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \ { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \ + return VecTraits::make(func (a.x), func (a.y), func (a.z)); \ } \ - __device__ __forceinline__ TypeVec::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \ { \ - func f; \ - return VecTraits::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \ - { \ - func f; \ - return VecTraits::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \ - } \ - __device__ __forceinline__ TypeVec::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \ - { \ - func f; \ - return VecTraits::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \ - } \ - template \ - __device__ __forceinline__ typename TypeVec::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \ - { \ - func::argument_type> f; \ - return VecTraits::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \ + return VecTraits::make(func (a.x), func (a.y), func (a.z), func (a.w)); \ } -#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator +, plus) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator -, minus) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator *, multiplies) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator /, divides) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator -, negate) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ==, equal_to) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator !=, not_equal_to) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator > , greater) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator < , less) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator >=, greater_equal) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator <=, less_equal) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &&, logical_and) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ||, logical_or) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ! , logical_not) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, max, maximum) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, min, minimum) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, abs, abs_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sqrt, sqrt_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp, exp_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp2, exp2_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp10, exp10_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log, log_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log2, log2_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log10, log10_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sin, sin_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cos, cos_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tan, tan_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asin, asin_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acos, acos_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atan, atan_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sinh, sinh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cosh, cosh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tanh, tanh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asinh, asinh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acosh, acosh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atanh, atanh_func) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot, hypot_func) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, atan2, atan2_func) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, pow, pow_func) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot_sqr, hypot_sqr_func) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double) -#define OPENCV_GPU_IMPLEMENT_VEC_INT_OP(type) \ - OPENCV_GPU_IMPLEMENT_VEC_OP(type) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &, bit_and) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator |, bit_or) \ - OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ^, bit_xor) \ - OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ~, bit_not) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uchar) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(char) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(ushort) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(short) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(int) - OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uint) - OPENCV_GPU_IMPLEMENT_VEC_OP(float) - OPENCV_GPU_IMPLEMENT_VEC_OP(double) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double) + +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float) +CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double) + +#undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC + +// binary operators (vec & vec) + +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \ + { \ + return VecTraits::make(a.x op b.x); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \ + { \ + return VecTraits::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \ + } + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP + +// binary operators (vec & scalar) + +#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \ + __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s); \ + } \ + __device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \ + { \ + return VecTraits::make(s op b.x); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s); \ + } \ + __device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s, a.z op s); \ + } \ + __device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y, s op b.z); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \ + { \ + return VecTraits::make(a.x op s, a.y op s, a.z op s, a.w op s); \ + } \ + __device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \ + { \ + return VecTraits::make(s op b.x, s op b.y, s op b.z, s op b.w); \ + } + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint) + +#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP + +// binary function (vec & vec) + +#define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \ + { \ + return VecTraits::make(func (a.x, b.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \ + { \ + return VecTraits::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \ + } + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double) + +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float) +CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double) + +#undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC + +// binary function (vec & scalar) + +#define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \ + __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \ + { \ + return VecTraits::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \ + } \ + __device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \ + { \ + return VecTraits::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \ + } + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double) + +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double) +CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double) + +#undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC - #undef OPENCV_GPU_IMPLEMENT_VEC_UNOP - #undef OPENCV_GPU_IMPLEMENT_VEC_BINOP - #undef OPENCV_GPU_IMPLEMENT_VEC_OP - #undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP }}} // namespace cv { namespace gpu { namespace device #endif // __OPENCV_GPU_VECMATH_HPP__ diff --git a/modules/gpu/perf/perf_filters.cpp b/modules/gpu/perf/perf_filters.cpp index 40d88aad45..adfc294f6d 100644 --- a/modules/gpu/perf/perf_filters.cpp +++ b/modules/gpu/perf/perf_filters.cpp @@ -72,7 +72,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur, TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize)); - GPU_SANITY_CHECK(dst); + GPU_SANITY_CHECK(dst, 1); } else { diff --git a/modules/gpu/perf/perf_main.cpp b/modules/gpu/perf/perf_main.cpp index a7ac1ccce8..53a19ca412 100644 --- a/modules/gpu/perf/perf_main.cpp +++ b/modules/gpu/perf/perf_main.cpp @@ -44,4 +44,11 @@ using namespace perf; -CV_PERF_TEST_MAIN(gpu, printCudaInfo()) +static const char * impls[] = { +#ifdef HAVE_CUDA + "cuda", +#endif + "plain" +}; + +CV_PERF_TEST_MAIN_WITH_IMPLS(gpu, impls, printCudaInfo()) diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp index 1ab01a75be..672d657b21 100644 --- a/modules/gpu/perf/perf_video.cpp +++ b/modules/gpu/perf/perf_video.cpp @@ -103,7 +103,7 @@ PERF_TEST_P(ImagePair, Video_InterpolateFrames, TEST_CYCLE() cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf); - GPU_SANITY_CHECK(newFrame); + GPU_SANITY_CHECK(newFrame, 1e-4); } else { @@ -142,7 +142,7 @@ PERF_TEST_P(ImagePair, Video_CreateOpticalFlowNeedleMap, TEST_CYCLE() cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors); - GPU_SANITY_CHECK(vertex); + GPU_SANITY_CHECK(vertex, 1e-6); GPU_SANITY_CHECK(colors); } else @@ -219,8 +219,8 @@ PERF_TEST_P(ImagePair, Video_BroxOpticalFlow, TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v); - GPU_SANITY_CHECK(u); - GPU_SANITY_CHECK(v); + GPU_SANITY_CHECK(u, 1e-1); + GPU_SANITY_CHECK(v, 1e-1); } else { diff --git a/modules/gpu/src/calib3d.cpp b/modules/gpu/src/calib3d.cpp index e83213f90f..b84f09d0ab 100644 --- a/modules/gpu/src/calib3d.cpp +++ b/modules/gpu/src/calib3d.cpp @@ -151,7 +151,7 @@ namespace } // Computes rotation, translation pair for small subsets if the input data - class TransformHypothesesGenerator + class TransformHypothesesGenerator : public ParallelLoopBody { public: TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_, @@ -161,7 +161,7 @@ namespace num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_), transl_vectors(transl_vectors_) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { // Input data for generation of the current hypothesis vector subset_indices(subset_size); @@ -173,7 +173,7 @@ namespace Mat rot_mat(3, 3, CV_64F); Mat transl_vec(1, 3, CV_64F); - for (int iter = range.begin(); iter < range.end(); ++iter) + for (int iter = range.start; iter < range.end; ++iter) { selectRandom(subset_size, num_points, subset_indices); for (int i = 0; i < subset_size; ++i) @@ -239,7 +239,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam // Generate set of hypotheses using small subsets of the input data TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat, num_points, subset_size, rot_matrices, transl_vectors); - parallel_for(BlockedRange(0, num_iters), body); + parallel_for_(Range(0, num_iters), body); // Compute scores (i.e. number of inliers) for each hypothesis GpuMat d_object(object); diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp index 814a96bc0a..7b95b69091 100644 --- a/modules/gpu/src/cascadeclassifier.cpp +++ b/modules/gpu/src/cascadeclassifier.cpp @@ -406,7 +406,7 @@ public: GpuMat dclassified(1, 1, CV_32S); cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) ); - PyrLavel level(0, 1.0f, image.size(), NxM, minObjectSize); + PyrLavel level(0, scaleFactor, image.size(), NxM, minObjectSize); while (level.isFeasible(maxObjectSize)) { diff --git a/modules/gpu/src/cuda/calib3d.cu b/modules/gpu/src/cuda/calib3d.cu index 0fd482c41a..f29471f025 100644 --- a/modules/gpu/src/cuda/calib3d.cu +++ b/modules/gpu/src/cuda/calib3d.cu @@ -67,8 +67,8 @@ namespace cv { namespace gpu { namespace device crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y, crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z); } - __device__ __forceinline__ TransformOp() {} - __device__ __forceinline__ TransformOp(const TransformOp&) {} + __host__ __device__ __forceinline__ TransformOp() {} + __host__ __device__ __forceinline__ TransformOp(const TransformOp&) {} }; void call(const PtrStepSz src, const float* rot, @@ -106,8 +106,8 @@ namespace cv { namespace gpu { namespace device (cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z, (cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z); } - __device__ __forceinline__ ProjectOp() {} - __device__ __forceinline__ ProjectOp(const ProjectOp&) {} + __host__ __device__ __forceinline__ ProjectOp() {} + __host__ __device__ __forceinline__ ProjectOp(const ProjectOp&) {} }; void call(const PtrStepSz src, const float* rot, diff --git a/modules/gpu/src/cuda/canny.cu b/modules/gpu/src/cuda/canny.cu index 1afcddc9c9..aab922f22c 100644 --- a/modules/gpu/src/cuda/canny.cu +++ b/modules/gpu/src/cuda/canny.cu @@ -62,8 +62,8 @@ namespace canny return ::abs(x) + ::abs(y); } - __device__ __forceinline__ L1() {} - __device__ __forceinline__ L1(const L1&) {} + __host__ __device__ __forceinline__ L1() {} + __host__ __device__ __forceinline__ L1(const L1&) {} }; struct L2 : binary_function { @@ -72,8 +72,8 @@ namespace canny return ::sqrtf(x * x + y * y); } - __device__ __forceinline__ L2() {} - __device__ __forceinline__ L2(const L2&) {} + __host__ __device__ __forceinline__ L2() {} + __host__ __device__ __forceinline__ L2(const L2&) {} }; } @@ -470,8 +470,8 @@ namespace canny return (uchar)(-(e >> 1)); } - __device__ __forceinline__ GetEdges() {} - __device__ __forceinline__ GetEdges(const GetEdges&) {} + __host__ __device__ __forceinline__ GetEdges() {} + __host__ __device__ __forceinline__ GetEdges(const GetEdges&) {} }; } diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu index 7f3d4ae338..c4d79bd80b 100644 --- a/modules/gpu/src/cuda/ccomponetns.cu +++ b/modules/gpu/src/cuda/ccomponetns.cu @@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace device template __device__ __forceinline__ bool operator() (const I& a, const I& b) const { - I d = a - b; + I d = saturate_cast(a - b); return lo.x <= d.x && d.x <= hi.x && lo.y <= d.y && d.y <= hi.y && lo.z <= d.z && d.z <= hi.z; @@ -169,7 +169,7 @@ namespace cv { namespace gpu { namespace device template __device__ __forceinline__ bool operator() (const I& a, const I& b) const { - I d = a - b; + I d = saturate_cast(a - b); return lo.x <= d.x && d.x <= hi.x && lo.y <= d.y && d.y <= hi.y && lo.z <= d.z && d.z <= hi.z && diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index e9397e534f..876d4ad3c4 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -162,8 +162,8 @@ namespace arithm return vadd4(a, b); } - __device__ __forceinline__ VAdd4() {} - __device__ __forceinline__ VAdd4(const VAdd4& other) {} + __host__ __device__ __forceinline__ VAdd4() {} + __host__ __device__ __forceinline__ VAdd4(const VAdd4&) {} }; //////////////////////////////////// @@ -175,8 +175,8 @@ namespace arithm return vadd2(a, b); } - __device__ __forceinline__ VAdd2() {} - __device__ __forceinline__ VAdd2(const VAdd2& other) {} + __host__ __device__ __forceinline__ VAdd2() {} + __host__ __device__ __forceinline__ VAdd2(const VAdd2&) {} }; //////////////////////////////////// @@ -188,8 +188,8 @@ namespace arithm return saturate_cast(a + b); } - __device__ __forceinline__ AddMat() {} - __device__ __forceinline__ AddMat(const AddMat& other) {} + __host__ __device__ __forceinline__ AddMat() {} + __host__ __device__ __forceinline__ AddMat(const AddMat&) {} }; } @@ -397,8 +397,8 @@ namespace arithm return vsub4(a, b); } - __device__ __forceinline__ VSub4() {} - __device__ __forceinline__ VSub4(const VSub4& other) {} + __host__ __device__ __forceinline__ VSub4() {} + __host__ __device__ __forceinline__ VSub4(const VSub4&) {} }; //////////////////////////////////// @@ -410,8 +410,8 @@ namespace arithm return vsub2(a, b); } - __device__ __forceinline__ VSub2() {} - __device__ __forceinline__ VSub2(const VSub2& other) {} + __host__ __device__ __forceinline__ VSub2() {} + __host__ __device__ __forceinline__ VSub2(const VSub2&) {} }; //////////////////////////////////// @@ -423,8 +423,8 @@ namespace arithm return saturate_cast(a - b); } - __device__ __forceinline__ SubMat() {} - __device__ __forceinline__ SubMat(const SubMat& other) {} + __host__ __device__ __forceinline__ SubMat() {} + __host__ __device__ __forceinline__ SubMat(const SubMat&) {} }; } @@ -617,8 +617,8 @@ namespace arithm return res; } - __device__ __forceinline__ Mul_8uc4_32f() {} - __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {} + __host__ __device__ __forceinline__ Mul_8uc4_32f() {} + __host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {} }; struct Mul_16sc4_32f : binary_function @@ -629,8 +629,8 @@ namespace arithm saturate_cast(a.z * b), saturate_cast(a.w * b)); } - __device__ __forceinline__ Mul_16sc4_32f() {} - __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {} + __host__ __device__ __forceinline__ Mul_16sc4_32f() {} + __host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {} }; template struct Mul : binary_function @@ -640,8 +640,8 @@ namespace arithm return saturate_cast(a * b); } - __device__ __forceinline__ Mul() {} - __device__ __forceinline__ Mul(const Mul& other) {} + __host__ __device__ __forceinline__ Mul() {} + __host__ __device__ __forceinline__ Mul(const Mul&) {} }; template struct MulScale : binary_function @@ -888,8 +888,8 @@ namespace arithm return b != 0 ? saturate_cast(a / b) : 0; } - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} + __host__ __device__ __forceinline__ Div() {} + __host__ __device__ __forceinline__ Div(const Div&) {} }; template struct Div : binary_function { @@ -898,8 +898,8 @@ namespace arithm return b != 0 ? static_cast(a) / b : 0; } - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} + __host__ __device__ __forceinline__ Div() {} + __host__ __device__ __forceinline__ Div(const Div&) {} }; template struct Div : binary_function { @@ -908,8 +908,8 @@ namespace arithm return b != 0 ? static_cast(a) / b : 0; } - __device__ __forceinline__ Div() {} - __device__ __forceinline__ Div(const Div& other) {} + __host__ __device__ __forceinline__ Div() {} + __host__ __device__ __forceinline__ Div(const Div&) {} }; template struct DivScale : binary_function @@ -1196,8 +1196,8 @@ namespace arithm return vabsdiff4(a, b); } - __device__ __forceinline__ VAbsDiff4() {} - __device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {} + __host__ __device__ __forceinline__ VAbsDiff4() {} + __host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {} }; //////////////////////////////////// @@ -1209,8 +1209,8 @@ namespace arithm return vabsdiff2(a, b); } - __device__ __forceinline__ VAbsDiff2() {} - __device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {} + __host__ __device__ __forceinline__ VAbsDiff2() {} + __host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {} }; //////////////////////////////////// @@ -1235,8 +1235,8 @@ namespace arithm return saturate_cast(_abs(a - b)); } - __device__ __forceinline__ AbsDiffMat() {} - __device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {} + __host__ __device__ __forceinline__ AbsDiffMat() {} + __host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {} }; } @@ -1370,8 +1370,8 @@ namespace arithm return saturate_cast(x * x); } - __device__ __forceinline__ Sqr() {} - __device__ __forceinline__ Sqr(const Sqr& other) {} + __host__ __device__ __forceinline__ Sqr() {} + __host__ __device__ __forceinline__ Sqr(const Sqr&) {} }; } @@ -1466,8 +1466,8 @@ namespace arithm return saturate_cast(f(x)); } - __device__ __forceinline__ Exp() {} - __device__ __forceinline__ Exp(const Exp& other) {} + __host__ __device__ __forceinline__ Exp() {} + __host__ __device__ __forceinline__ Exp(const Exp&) {} }; } @@ -1507,8 +1507,8 @@ namespace arithm return vcmpeq4(a, b); } - __device__ __forceinline__ VCmpEq4() {} - __device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {} + __host__ __device__ __forceinline__ VCmpEq4() {} + __host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {} }; struct VCmpNe4 : binary_function { @@ -1517,8 +1517,8 @@ namespace arithm return vcmpne4(a, b); } - __device__ __forceinline__ VCmpNe4() {} - __device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {} + __host__ __device__ __forceinline__ VCmpNe4() {} + __host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {} }; struct VCmpLt4 : binary_function { @@ -1527,8 +1527,8 @@ namespace arithm return vcmplt4(a, b); } - __device__ __forceinline__ VCmpLt4() {} - __device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {} + __host__ __device__ __forceinline__ VCmpLt4() {} + __host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {} }; struct VCmpLe4 : binary_function { @@ -1537,8 +1537,8 @@ namespace arithm return vcmple4(a, b); } - __device__ __forceinline__ VCmpLe4() {} - __device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {} + __host__ __device__ __forceinline__ VCmpLe4() {} + __host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {} }; //////////////////////////////////// @@ -2008,8 +2008,8 @@ namespace arithm return vmin4(a, b); } - __device__ __forceinline__ VMin4() {} - __device__ __forceinline__ VMin4(const VMin4& other) {} + __host__ __device__ __forceinline__ VMin4() {} + __host__ __device__ __forceinline__ VMin4(const VMin4&) {} }; //////////////////////////////////// @@ -2021,8 +2021,8 @@ namespace arithm return vmin2(a, b); } - __device__ __forceinline__ VMin2() {} - __device__ __forceinline__ VMin2(const VMin2& other) {} + __host__ __device__ __forceinline__ VMin2() {} + __host__ __device__ __forceinline__ VMin2(const VMin2&) {} }; } @@ -2100,8 +2100,8 @@ namespace arithm return vmax4(a, b); } - __device__ __forceinline__ VMax4() {} - __device__ __forceinline__ VMax4(const VMax4& other) {} + __host__ __device__ __forceinline__ VMax4() {} + __host__ __device__ __forceinline__ VMax4(const VMax4&) {} }; //////////////////////////////////// @@ -2113,8 +2113,8 @@ namespace arithm return vmax2(a, b); } - __device__ __forceinline__ VMax2() {} - __device__ __forceinline__ VMax2(const VMax2& other) {} + __host__ __device__ __forceinline__ VMax2() {} + __host__ __device__ __forceinline__ VMax2(const VMax2&) {} }; } diff --git a/modules/gpu/src/cuda/hough.cu b/modules/gpu/src/cuda/hough.cu index faec89b95c..59eba26081 100644 --- a/modules/gpu/src/cuda/hough.cu +++ b/modules/gpu/src/cuda/hough.cu @@ -48,6 +48,7 @@ #include "opencv2/gpu/device/common.hpp" #include "opencv2/gpu/device/emulation.hpp" #include "opencv2/gpu/device/vec_math.hpp" +#include "opencv2/gpu/device/functional.hpp" #include "opencv2/gpu/device/limits.hpp" #include "opencv2/gpu/device/dynamic_smem.hpp" @@ -811,7 +812,7 @@ namespace cv { namespace gpu { namespace device const int ind = ::atomicAdd(r_sizes + n, 1); if (ind < maxSize) - r_table(n, ind) = p - templCenter; + r_table(n, ind) = saturate_cast(p - templCenter); } void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount, @@ -855,7 +856,7 @@ namespace cv { namespace gpu { namespace device for (int j = 0; j < r_row_size; ++j) { - short2 c = p - r_row[j]; + int2 c = p - r_row[j]; c.x = __float2int_rn(c.x * idp); c.y = __float2int_rn(c.y * idp); diff --git a/modules/gpu/src/error.cpp b/modules/gpu/src/error.cpp index c155aa83bf..7f5d5f38d5 100644 --- a/modules/gpu/src/error.cpp +++ b/modules/gpu/src/error.cpp @@ -81,48 +81,90 @@ namespace const ErrorEntry npp_errors [] = { - error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ), - error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ), - error_entry( NPP_RESIZE_NO_OPERATION_ERROR ), - #if defined (_MSC_VER) error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ), #endif +#if NPP_VERSION < 5500 error_entry( NPP_BAD_ARG_ERROR ), - error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ), - error_entry( NPP_TEXTURE_BIND_ERROR ), error_entry( NPP_COEFF_ERROR ), error_entry( NPP_RECT_ERROR ), error_entry( NPP_QUAD_ERROR ), + error_entry( NPP_MEMFREE_ERR ), + error_entry( NPP_MEMSET_ERR ), + error_entry( NPP_MEM_ALLOC_ERR ), + error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ), + error_entry( NPP_MIRROR_FLIP_ERR ), + error_entry( NPP_INVALID_INPUT ), + error_entry( NPP_POINTER_ERROR ), + error_entry( NPP_WARNING ), + error_entry( NPP_ODD_ROI_WARNING ), +#else + error_entry( NPP_INVALID_HOST_POINTER_ERROR ), + error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ), + error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ), + error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ), + error_entry( NPP_MEMFREE_ERROR ), + error_entry( NPP_MEMSET_ERROR ), + error_entry( NPP_QUALITY_INDEX_ERROR ), + error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ), + error_entry( NPP_CHANNEL_ORDER_ERROR ), + error_entry( NPP_ZERO_MASK_VALUE_ERROR ), + error_entry( NPP_QUADRANGLE_ERROR ), + error_entry( NPP_RECTANGLE_ERROR ), + error_entry( NPP_COEFFICIENT_ERROR ), + error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ), + error_entry( NPP_COI_ERROR ), + error_entry( NPP_DIVISOR_ERROR ), + error_entry( NPP_CHANNEL_ERROR ), + error_entry( NPP_STRIDE_ERROR ), + error_entry( NPP_ANCHOR_ERROR ), + error_entry( NPP_MASK_SIZE_ERROR ), + error_entry( NPP_MIRROR_FLIP_ERROR ), + error_entry( NPP_MOMENT_00_ZERO_ERROR ), + error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ), + error_entry( NPP_THRESHOLD_ERROR ), + error_entry( NPP_CONTEXT_MATCH_ERROR ), + error_entry( NPP_FFT_FLAG_ERROR ), + error_entry( NPP_FFT_ORDER_ERROR ), + error_entry( NPP_SCALE_RANGE_ERROR ), + error_entry( NPP_DATA_TYPE_ERROR ), + error_entry( NPP_OUT_OFF_RANGE_ERROR ), + error_entry( NPP_DIVIDE_BY_ZERO_ERROR ), + error_entry( NPP_MEMORY_ALLOCATION_ERR ), + error_entry( NPP_RANGE_ERROR ), + error_entry( NPP_BAD_ARGUMENT_ERROR ), + error_entry( NPP_NO_MEMORY_ERROR ), + error_entry( NPP_ERROR_RESERVED ), + error_entry( NPP_NO_OPERATION_WARNING ), + error_entry( NPP_DIVIDE_BY_ZERO_WARNING ), + error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ), +#endif + + error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ), + error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ), + error_entry( NPP_RESIZE_NO_OPERATION_ERROR ), + error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ), + error_entry( NPP_TEXTURE_BIND_ERROR ), error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ), error_entry( NPP_NOT_EVEN_STEP_ERROR ), error_entry( NPP_INTERPOLATION_ERROR ), error_entry( NPP_RESIZE_FACTOR_ERROR ), error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ), - error_entry( NPP_MEMFREE_ERR ), - error_entry( NPP_MEMSET_ERR ), error_entry( NPP_MEMCPY_ERROR ), - error_entry( NPP_MEM_ALLOC_ERR ), - error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ), - error_entry( NPP_MIRROR_FLIP_ERR ), - error_entry( NPP_INVALID_INPUT ), error_entry( NPP_ALIGNMENT_ERROR ), error_entry( NPP_STEP_ERROR ), error_entry( NPP_SIZE_ERROR ), - error_entry( NPP_POINTER_ERROR ), error_entry( NPP_NULL_POINTER_ERROR ), error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ), error_entry( NPP_NOT_IMPLEMENTED_ERROR ), error_entry( NPP_ERROR ), error_entry( NPP_NO_ERROR ), error_entry( NPP_SUCCESS ), - error_entry( NPP_WARNING ), error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ), error_entry( NPP_MISALIGNED_DST_ROI_WARNING ), error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ), - error_entry( NPP_DOUBLE_SIZE_WARNING ), - error_entry( NPP_ODD_ROI_WARNING ) + error_entry( NPP_DOUBLE_SIZE_WARNING ) }; const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]); diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp index 761abb525f..056e5ef701 100644 --- a/modules/gpu/src/matrix_reductions.cpp +++ b/modules/gpu/src/matrix_reductions.cpp @@ -187,10 +187,20 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); - typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, - NppiSize oSizeROI, Npp64f* pRetVal); +#if CUDA_VERSION < 5050 + typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal); - static const npp_norm_diff_func_t npp_norm_diff_func[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; + static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; +#else + typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, + NppiSize oSizeROI, Npp64f* pRetVal, Npp8u * pDeviceBuffer); + + typedef NppStatus (*buf_size_func_t)(NppiSize oSizeROI, int* hpBufferSize); + + static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; + + static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R}; +#endif NppiSize sz; sz.width = src1.cols; @@ -202,7 +212,16 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) DeviceBuffer dbuf; - nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr(), static_cast(src1.step), src2.ptr(), static_cast(src2.step), sz, dbuf) ); +#if CUDA_VERSION < 5050 + nppSafeCall( funcs[funcIdx](src1.ptr(), static_cast(src1.step), src2.ptr(), static_cast(src2.step), sz, dbuf) ); +#else + int bufSize; + buf_size_funcs[funcIdx](sz, &bufSize); + + GpuMat buf(1, bufSize, CV_8UC1); + + nppSafeCall( funcs[funcIdx](src1.ptr(), static_cast(src1.step), src2.ptr(), static_cast(src2.step), sz, dbuf, buf.data) ); +#endif cudaSafeCall( cudaDeviceSynchronize() ); diff --git a/modules/gpu/src/nvidia/core/NCV.hpp b/modules/gpu/src/nvidia/core/NCV.hpp index 0394dba186..80e1da7953 100644 --- a/modules/gpu/src/nvidia/core/NCV.hpp +++ b/modules/gpu/src/nvidia/core/NCV.hpp @@ -130,7 +130,7 @@ typedef int Ncv32s; typedef unsigned int Ncv32u; typedef short Ncv16s; typedef unsigned short Ncv16u; -typedef char Ncv8s; +typedef signed char Ncv8s; typedef unsigned char Ncv8u; typedef float Ncv32f; typedef double Ncv64f; diff --git a/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp b/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp index ec2f16ebb7..c1e06b434e 100644 --- a/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp +++ b/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp @@ -51,7 +51,7 @@ template inline __host__ __device__ TBase _pixMaxVal(); template<> static inline __host__ __device__ Ncv8u _pixMaxVal() {return UCHAR_MAX;} template<> static inline __host__ __device__ Ncv16u _pixMaxVal() {return USHRT_MAX;} template<> static inline __host__ __device__ Ncv32u _pixMaxVal() {return UINT_MAX;} -template<> static inline __host__ __device__ Ncv8s _pixMaxVal() {return CHAR_MAX;} +template<> static inline __host__ __device__ Ncv8s _pixMaxVal() {return SCHAR_MAX;} template<> static inline __host__ __device__ Ncv16s _pixMaxVal() {return SHRT_MAX;} template<> static inline __host__ __device__ Ncv32s _pixMaxVal() {return INT_MAX;} template<> static inline __host__ __device__ Ncv32f _pixMaxVal() {return FLT_MAX;} @@ -61,7 +61,7 @@ template inline __host__ __device__ TBase _pixMinVal(); template<> static inline __host__ __device__ Ncv8u _pixMinVal() {return 0;} template<> static inline __host__ __device__ Ncv16u _pixMinVal() {return 0;} template<> static inline __host__ __device__ Ncv32u _pixMinVal() {return 0;} -template<> static inline __host__ __device__ Ncv8s _pixMinVal() {return CHAR_MIN;} +template<> static inline __host__ __device__ Ncv8s _pixMinVal() {return SCHAR_MIN;} template<> static inline __host__ __device__ Ncv16s _pixMinVal() {return SHRT_MIN;} template<> static inline __host__ __device__ Ncv32s _pixMinVal() {return INT_MIN;} template<> static inline __host__ __device__ Ncv32f _pixMinVal() {return FLT_MIN;} diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp index f219089321..06d5386405 100644 --- a/modules/gpu/src/precomp.hpp +++ b/modules/gpu/src/precomp.hpp @@ -116,11 +116,13 @@ #define CUDART_MINIMUM_REQUIRED_VERSION 4010 #define NPP_MINIMUM_REQUIRED_VERSION 4100 + #define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD) + #if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION) #error "Insufficient Cuda Runtime library version, please update it." #endif - #if (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION) + #if (NPP_VERSION < NPP_MINIMUM_REQUIRED_VERSION) #error "Insufficient NPP version, please update it." #endif diff --git a/modules/gpu/test/test_core.cpp b/modules/gpu/test/test_core.cpp index 1bc952c7a1..b622ad8ea9 100644 --- a/modules/gpu/test/test_core.cpp +++ b/modules/gpu/test/test_core.cpp @@ -352,7 +352,7 @@ GPU_TEST_P(Add_Scalar, WithOutMask) cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); cv::add(mat, val, dst_gold, cv::noArray(), depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0); } } @@ -383,7 +383,7 @@ GPU_TEST_P(Add_Scalar, WithMask) cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); cv::add(mat, val, dst_gold, mask, depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0); } } @@ -567,7 +567,7 @@ GPU_TEST_P(Subtract_Scalar, WithOutMask) cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); cv::subtract(mat, val, dst_gold, cv::noArray(), depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0); } } @@ -598,7 +598,7 @@ GPU_TEST_P(Subtract_Scalar, WithMask) cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); cv::subtract(mat, val, dst_gold, mask, depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0); } } @@ -2148,7 +2148,7 @@ GPU_TEST_P(Min, Scalar) cv::Mat dst_gold = cv::min(src, val); - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5); } } @@ -2231,7 +2231,7 @@ GPU_TEST_P(Max, Scalar) cv::Mat dst_gold = cv::max(src, val); - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5); } } diff --git a/modules/gpu/test/test_optflow.cpp b/modules/gpu/test/test_optflow.cpp index 9e30b92087..53b93a096b 100644 --- a/modules/gpu/test/test_optflow.cpp +++ b/modules/gpu/test/test_optflow.cpp @@ -102,8 +102,8 @@ GPU_TEST_P(BroxOpticalFlow, Regression) for (int i = 0; i < v_gold.rows; ++i) f.read(v_gold.ptr(i), v_gold.cols * sizeof(float)); - EXPECT_MAT_NEAR(u_gold, u, 0); - EXPECT_MAT_NEAR(v_gold, v, 0); + EXPECT_MAT_SIMILAR(u_gold, u, 1e-3); + EXPECT_MAT_SIMILAR(v_gold, v, 1e-3); #else std::ofstream f(fname.c_str(), std::ios_base::binary); diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index 4c60867af3..05ab99a78c 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -95,14 +95,10 @@ elseif(HAVE_QT) endif() include(${QT_USE_FILE}) - if(QT_INCLUDE_DIR) - ocv_include_directories(${QT_INCLUDE_DIR}) - endif() - QT4_ADD_RESOURCES(_RCC_OUTFILES src/window_QT.qrc) QT4_WRAP_CPP(_MOC_OUTFILES src/window_QT.h) - list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES} ${QT_QTTEST_LIBRARY}) + list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES}) list(APPEND highgui_srcs src/window_QT.cpp ${_MOC_OUTFILES} ${_RCC_OUTFILES}) ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag) if(${_have_flag}) @@ -183,7 +179,11 @@ if(HAVE_XIMEA) if(XIMEA_LIBRARY_DIR) link_directories(${XIMEA_LIBRARY_DIR}) endif() - list(APPEND HIGHGUI_LIBRARIES m3api) + if(CMAKE_CL_64) + list(APPEND HIGHGUI_LIBRARIES m3apiX64) + else() + list(APPEND HIGHGUI_LIBRARIES m3api) + endif() endif(HAVE_XIMEA) if(HAVE_FFMPEG) diff --git a/modules/highgui/include/opencv2/highgui/cap_ios.h b/modules/highgui/include/opencv2/highgui/cap_ios.h index 5bd5fe3c67..db3928f13b 100644 --- a/modules/highgui/include/opencv2/highgui/cap_ios.h +++ b/modules/highgui/include/opencv2/highgui/cap_ios.h @@ -1,6 +1,4 @@ -/* - * cap_ios.h - * For iOS video I/O +/* For iOS video I/O * by Eduard Feicho on 29/07/12 * Copyright 2012. All rights reserved. * @@ -90,6 +88,12 @@ - (void)createVideoPreviewLayer; - (void)updateOrientation; +- (void)lockFocus; +- (void)unlockFocus; +- (void)lockExposure; +- (void)unlockExposure; +- (void)lockBalance; +- (void)unlockBalance; @end @@ -116,6 +120,7 @@ BOOL grayscaleMode; BOOL recordVideo; + BOOL rotateVideo; AVAssetWriterInput* recordAssetWriterInput; AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor; AVAssetWriter* recordAssetWriter; @@ -128,6 +133,7 @@ @property (nonatomic, assign) BOOL grayscaleMode; @property (nonatomic, assign) BOOL recordVideo; +@property (nonatomic, assign) BOOL rotateVideo; @property (nonatomic, retain) AVAssetWriterInput* recordAssetWriterInput; @property (nonatomic, retain) AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor; @property (nonatomic, retain) AVAssetWriter* recordAssetWriter; diff --git a/modules/highgui/include/opencv2/highgui/highgui_c.h b/modules/highgui/include/opencv2/highgui/highgui_c.h index 12be9867a2..9204ee81f4 100644 --- a/modules/highgui/include/opencv2/highgui/highgui_c.h +++ b/modules/highgui/include/opencv2/highgui/highgui_c.h @@ -558,9 +558,11 @@ CVAPI(int) cvGetCaptureDomain( CvCapture* capture); /* "black box" video file writer structure */ typedef struct CvVideoWriter CvVideoWriter; +#define CV_FOURCC_MACRO(c1, c2, c3, c4) (((c1) & 255) + (((c2) & 255) << 8) + (((c3) & 255) << 16) + (((c4) & 255) << 24)) + CV_INLINE int CV_FOURCC(char c1, char c2, char c3, char c4) { - return (c1 & 255) + ((c2 & 255) << 8) + ((c3 & 255) << 16) + ((c4 & 255) << 24); + return CV_FOURCC_MACRO(c1, c2, c3, c4); } #define CV_FOURCC_PROMPT -1 /* Open Codec Selection Dialog (Windows only) */ diff --git a/modules/highgui/perf/perf_input.cpp b/modules/highgui/perf/perf_input.cpp index 0c1e8e0a73..414c85365f 100644 --- a/modules/highgui/perf/perf_input.cpp +++ b/modules/highgui/perf/perf_input.cpp @@ -11,11 +11,21 @@ using std::tr1::get; typedef perf::TestBaseWithParam VideoCapture_Reading; +#if defined(HAVE_MSMF) +// MPEG2 is not supported by Media Foundation yet +// http://social.msdn.microsoft.com/Forums/en-US/mediafoundationdevelopment/thread/39a36231-8c01-40af-9af5-3c105d684429 +PERF_TEST_P(VideoCapture_Reading, ReadFile, testing::Values( "highgui/video/big_buck_bunny.avi", + "highgui/video/big_buck_bunny.mov", + "highgui/video/big_buck_bunny.mp4", + "highgui/video/big_buck_bunny.wmv" ) ) + +#else PERF_TEST_P(VideoCapture_Reading, ReadFile, testing::Values( "highgui/video/big_buck_bunny.avi", "highgui/video/big_buck_bunny.mov", "highgui/video/big_buck_bunny.mp4", "highgui/video/big_buck_bunny.mpg", "highgui/video/big_buck_bunny.wmv" ) ) +#endif { string filename = getDataPath(GetParam()); diff --git a/modules/highgui/perf/perf_output.cpp b/modules/highgui/perf/perf_output.cpp index 6428bb4f03..2adfe89655 100644 --- a/modules/highgui/perf/perf_output.cpp +++ b/modules/highgui/perf/perf_output.cpp @@ -22,10 +22,16 @@ PERF_TEST_P(VideoWriter_Writing, WriteFrame, { string filename = getDataPath(get<0>(GetParam())); bool isColor = get<1>(GetParam()); + Mat image = imread(filename, 1); +#if defined(HAVE_MSMF) && !defined(HAVE_VFW) && !defined(HAVE_FFMPEG) // VFW has greater priority + VideoWriter writer(cv::tempfile(".wmv"), CV_FOURCC('W', 'M', 'V', '3'), + 25, cv::Size(image.cols, image.rows), isColor); +#else + VideoWriter writer(cv::tempfile(".avi"), CV_FOURCC('X', 'V', 'I', 'D'), + 25, cv::Size(image.cols, image.rows), isColor); +#endif - VideoWriter writer(cv::tempfile(".avi"), CV_FOURCC('X', 'V', 'I', 'D'), 25, cv::Size(640, 480), isColor); - - TEST_CYCLE() { Mat image = imread(filename, 1); writer << image; } + TEST_CYCLE() { image = imread(filename, 1); writer << image; } bool dummy = writer.isOpened(); SANITY_CHECK(dummy); diff --git a/modules/highgui/perf/perf_precomp.hpp b/modules/highgui/perf/perf_precomp.hpp index 529187d3b2..d6b28b6d23 100644 --- a/modules/highgui/perf/perf_precomp.hpp +++ b/modules/highgui/perf/perf_precomp.hpp @@ -21,6 +21,7 @@ defined(HAVE_QUICKTIME) || \ defined(HAVE_AVFOUNDATION) || \ defined(HAVE_FFMPEG) || \ + defined(HAVE_MSMF) || \ defined(HAVE_VFW) /*defined(HAVE_OPENNI) too specialized */ \ @@ -34,6 +35,7 @@ defined(HAVE_QUICKTIME) || \ defined(HAVE_AVFOUNDATION) || \ defined(HAVE_FFMPEG) || \ + defined(HAVE_MSMF) || \ defined(HAVE_VFW) # define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 1 #else diff --git a/modules/highgui/src/cap.cpp b/modules/highgui/src/cap.cpp index 2c3b3a94c3..cc92da3d0c 100644 --- a/modules/highgui/src/cap.cpp +++ b/modules/highgui/src/cap.cpp @@ -117,6 +117,9 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index) #ifdef HAVE_DSHOW CV_CAP_DSHOW, #endif +#ifdef HAVE_MSMF + CV_CAP_MSMF, +#endif #if 1 CV_CAP_IEEE1394, // identical to CV_CAP_DC1394 #endif @@ -196,13 +199,6 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index) switch (domains[i]) { -#ifdef HAVE_MSMF - case CV_CAP_MSMF: - capture = cvCreateCameraCapture_MSMF (index); - if (capture) - return capture; - break; -#endif #ifdef HAVE_DSHOW case CV_CAP_DSHOW: capture = cvCreateCameraCapture_DShow (index); @@ -210,7 +206,13 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index) return capture; break; #endif - +#ifdef HAVE_MSMF + case CV_CAP_MSMF: + capture = cvCreateCameraCapture_MSMF (index); + if (capture) + return capture; + break; +#endif #ifdef HAVE_TYZX case CV_CAP_STEREO: capture = cvCreateCameraCapture_TYZX (index); @@ -218,14 +220,12 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index) return capture; break; #endif - - case CV_CAP_VFW: #ifdef HAVE_VFW + case CV_CAP_VFW: capture = cvCreateCameraCapture_VFW (index); if (capture) return capture; #endif - #if defined HAVE_LIBV4L || defined HAVE_CAMV4L || defined HAVE_CAMV4L2 || defined HAVE_VIDEOIO capture = cvCreateCameraCapture_V4L (index); if (capture) @@ -358,6 +358,16 @@ CV_IMPL CvCapture * cvCreateFileCapture (const char * filename) if (! result) result = cvCreateFileCapture_FFMPEG_proxy (filename); +#ifdef HAVE_VFW + if (! result) + result = cvCreateFileCapture_VFW (filename); +#endif + +#ifdef HAVE_MSMF + if (! result) + result = cvCreateFileCapture_MSMF (filename); +#endif + #ifdef HAVE_XINE if (! result) result = cvCreateFileCapture_XINE (filename); @@ -406,6 +416,16 @@ CV_IMPL CvVideoWriter* cvCreateVideoWriter( const char* filename, int fourcc, if(!result) result = cvCreateVideoWriter_FFMPEG_proxy (filename, fourcc, fps, frameSize, is_color); +#ifdef HAVE_VFW + if(!result) + result = cvCreateVideoWriter_VFW(filename, fourcc, fps, frameSize, is_color); +#endif + +#ifdef HAVE_MSMF + if (!result) + result = cvCreateVideoWriter_MSMF(filename, fourcc, fps, frameSize, is_color); +#endif + /* #ifdef HAVE_XINE if(!result) result = cvCreateVideoWriter_XINE(filename, fourcc, fps, frameSize, is_color); diff --git a/modules/highgui/src/cap_dc1394_v2.cpp b/modules/highgui/src/cap_dc1394_v2.cpp index f197337cd1..0d5f898186 100644 --- a/modules/highgui/src/cap_dc1394_v2.cpp +++ b/modules/highgui/src/cap_dc1394_v2.cpp @@ -45,7 +45,16 @@ #include #include -#include +#ifdef WIN32 + // On Windows, we have no sys/select.h, but we need to pick up + // select() which is in winsock2. + #ifndef __SYS_SELECT_H__ + #define __SYS_SELECT_H__ 1 + #include + #endif +#else + #include +#endif /*WIN32*/ #include #include #include diff --git a/modules/highgui/src/cap_dshow.cpp b/modules/highgui/src/cap_dshow.cpp index 21fb947b15..b7cfbd94b3 100644 --- a/modules/highgui/src/cap_dshow.cpp +++ b/modules/highgui/src/cap_dshow.cpp @@ -3195,8 +3195,10 @@ IplImage* CvCaptureCAM_DShow::retrieveFrame(int) frame = cvCreateImage( cvSize(w,h), 8, 3 ); } - VI.getPixels( index, (uchar*)frame->imageData, false, true ); - return frame; + if (VI.getPixels( index, (uchar*)frame->imageData, false, true )) + return frame; + else + return NULL; } double CvCaptureCAM_DShow::getProperty( int property_id ) diff --git a/modules/highgui/src/cap_ffmpeg.cpp b/modules/highgui/src/cap_ffmpeg.cpp index 669ebda125..bf73c0810f 100644 --- a/modules/highgui/src/cap_ffmpeg.cpp +++ b/modules/highgui/src/cap_ffmpeg.cpp @@ -209,11 +209,7 @@ CvCapture* cvCreateFileCapture_FFMPEG_proxy(const char * filename) if( result->open( filename )) return result; delete result; -#ifdef HAVE_VFW - return cvCreateFileCapture_VFW(filename); -#else return 0; -#endif } class CvVideoWriter_FFMPEG_proxy : @@ -263,9 +259,5 @@ CvVideoWriter* cvCreateVideoWriter_FFMPEG_proxy( const char* filename, int fourc if( result->open( filename, fourcc, fps, frameSize, isColor != 0 )) return result; delete result; -#ifdef HAVE_VFW - return cvCreateVideoWriter_VFW(filename, fourcc, fps, frameSize, isColor); - #else return 0; -#endif } diff --git a/modules/highgui/src/cap_ios_abstract_camera.mm b/modules/highgui/src/cap_ios_abstract_camera.mm index b6a7d944fa..38e1c12e68 100644 --- a/modules/highgui/src/cap_ios_abstract_camera.mm +++ b/modules/highgui/src/cap_ios_abstract_camera.mm @@ -2,6 +2,7 @@ * cap_ios_abstract_camera.mm * For iOS video I/O * by Eduard Feicho on 29/07/12 + * by Alexander Shishkov on 17/07/13 * Copyright 2012. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -405,4 +406,89 @@ } } +- (void)lockFocus; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isFocusModeSupported:AVCaptureFocusModeLocked]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.focusMode = AVCaptureFocusModeLocked; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for locked focus configuration %@", [error localizedDescription]); + } + } +} + +- (void) unlockFocus; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isFocusModeSupported:AVCaptureFocusModeContinuousAutoFocus]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.focusMode = AVCaptureFocusModeContinuousAutoFocus; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for autofocus configuration %@", [error localizedDescription]); + } + } +} + +- (void)lockExposure; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isExposureModeSupported:AVCaptureExposureModeLocked]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.exposureMode = AVCaptureExposureModeLocked; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for locked exposure configuration %@", [error localizedDescription]); + } + } +} + +- (void) unlockExposure; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isExposureModeSupported:AVCaptureExposureModeContinuousAutoExposure]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.exposureMode = AVCaptureExposureModeContinuousAutoExposure; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for autoexposure configuration %@", [error localizedDescription]); + } + } +} + +- (void)lockBalance; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isWhiteBalanceModeSupported:AVCaptureWhiteBalanceModeLocked]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.whiteBalanceMode = AVCaptureWhiteBalanceModeLocked; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for locked white balance configuration %@", [error localizedDescription]); + } + } +} + +- (void) unlockBalance; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isWhiteBalanceModeSupported:AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.whiteBalanceMode = AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for auto white balance configuration %@", [error localizedDescription]); + } + } +} + @end + diff --git a/modules/highgui/src/cap_ios_video_camera.mm b/modules/highgui/src/cap_ios_video_camera.mm index 1f9ea14bf8..ac85f79ee5 100644 --- a/modules/highgui/src/cap_ios_video_camera.mm +++ b/modules/highgui/src/cap_ios_video_camera.mm @@ -2,6 +2,7 @@ * cap_ios_video_camera.mm * For iOS video I/O * by Eduard Feicho on 29/07/12 + * by Alexander Shishkov on 17/07/13 * Copyright 2012. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,7 +31,6 @@ #import "opencv2/highgui/cap_ios.h" #include "precomp.hpp" - #import @@ -70,6 +70,7 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; @synthesize videoDataOutput; @synthesize recordVideo; +@synthesize rotateVideo; //@synthesize videoFileOutput; @synthesize recordAssetWriterInput; @synthesize recordPixelBufferAdaptor; @@ -85,6 +86,7 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; if (self) { self.useAVCaptureVideoPreviewLayer = NO; self.recordVideo = NO; + self.rotateVideo = NO; } return self; } @@ -269,13 +271,8 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; } - - - #pragma mark - Private Interface - - - (void)createVideoDataOutput; { // Make a video data output @@ -389,6 +386,38 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; [self.parentView.layer addSublayer:self.customPreviewLayer]; } +- (CVPixelBufferRef) pixelBufferFromCGImage: (CGImageRef) image +{ + + CGSize frameSize = CGSizeMake(CGImageGetWidth(image), CGImageGetHeight(image)); + NSDictionary *options = [NSDictionary dictionaryWithObjectsAndKeys: + [NSNumber numberWithBool:NO], kCVPixelBufferCGImageCompatibilityKey, + [NSNumber numberWithBool:NO], kCVPixelBufferCGBitmapContextCompatibilityKey, + nil]; + CVPixelBufferRef pxbuffer = NULL; + CVReturn status = CVPixelBufferCreate(kCFAllocatorDefault, frameSize.width, + frameSize.height, kCVPixelFormatType_32ARGB, (CFDictionaryRef) CFBridgingRetain(options), + &pxbuffer); + NSParameterAssert(status == kCVReturnSuccess && pxbuffer != NULL); + + CVPixelBufferLockBaseAddress(pxbuffer, 0); + void *pxdata = CVPixelBufferGetBaseAddress(pxbuffer); + + + CGColorSpaceRef rgbColorSpace = CGColorSpaceCreateDeviceRGB(); + CGContextRef context = CGBitmapContextCreate(pxdata, frameSize.width, + frameSize.height, 8, 4*frameSize.width, rgbColorSpace, + kCGImageAlphaPremultipliedFirst); + + CGContextDrawImage(context, CGRectMake(0, 0, CGImageGetWidth(image), + CGImageGetHeight(image)), image); + CGColorSpaceRelease(rgbColorSpace); + CGContextRelease(context); + + CVPixelBufferUnlockBaseAddress(pxbuffer, 0); + + return pxbuffer; +} #pragma mark - Protocol AVCaptureVideoDataOutputSampleBufferDelegate @@ -522,7 +551,8 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; } if (self.recordAssetWriterInput.readyForMoreMediaData) { - if (! [self.recordPixelBufferAdaptor appendPixelBuffer:imageBuffer + CVImageBufferRef pixelBuffer = [self pixelBufferFromCGImage:dstImage]; + if (! [self.recordPixelBufferAdaptor appendPixelBuffer:pixelBuffer withPresentationTime:lastSampleTime] ) { NSLog(@"Video Writing Error"); } @@ -543,9 +573,12 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; - (void)updateOrientation; { - NSLog(@"rotate.."); - self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height); - [self layoutPreviewLayer]; + if (self.rotateVideo == YES) + { + NSLog(@"rotate.."); + self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height); + [self layoutPreviewLayer]; + } } @@ -583,3 +616,4 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; } @end + diff --git a/modules/highgui/src/cap_msmf.cpp b/modules/highgui/src/cap_msmf.cpp index 52b780463a..09f65b7e87 100644 --- a/modules/highgui/src/cap_msmf.cpp +++ b/modules/highgui/src/cap_msmf.cpp @@ -53,7 +53,8 @@ #include #include #include -#include "Strsafe.h" +#include +#include #include #include #include @@ -61,18 +62,27 @@ #include #include #include + #pragma warning(disable:4503) #pragma comment(lib, "mfplat") #pragma comment(lib, "mf") #pragma comment(lib, "mfuuid") #pragma comment(lib, "Strmiids") +#pragma comment(lib, "Mfreadwrite") #pragma comment(lib, "MinCore_Downlevel") + +// for ComPtr usage +#include +using namespace Microsoft::WRL; + struct IMFMediaType; struct IMFActivate; struct IMFMediaSource; struct IMFAttributes; + namespace { + template void SafeRelease(T **ppT) { if (*ppT) @@ -81,7 +91,8 @@ template void SafeRelease(T **ppT) *ppT = NULL; } } - /// Class for printing info into consol + +/// Class for printing info into consol class DebugPrintOut { public: @@ -93,6 +104,7 @@ public: private: DebugPrintOut(void); }; + // Structure for collecting info about types of video, which are supported by current video device struct MediaType { @@ -101,14 +113,14 @@ struct MediaType unsigned int width; unsigned int MF_MT_YUV_MATRIX; unsigned int MF_MT_VIDEO_LIGHTING; - unsigned int MF_MT_DEFAULT_STRIDE; + int MF_MT_DEFAULT_STRIDE; // stride is negative if image is bottom-up unsigned int MF_MT_VIDEO_CHROMA_SITING; GUID MF_MT_AM_FORMAT_TYPE; wchar_t *pMF_MT_AM_FORMAT_TYPEName; unsigned int MF_MT_FIXED_SIZE_SAMPLES; unsigned int MF_MT_VIDEO_NOMINAL_RANGE; - unsigned int MF_MT_FRAME_RATE; - unsigned int MF_MT_FRAME_RATE_low; + unsigned int MF_MT_FRAME_RATE_NUMERATOR; + unsigned int MF_MT_FRAME_RATE_DENOMINATOR; unsigned int MF_MT_PIXEL_ASPECT_RATIO; unsigned int MF_MT_PIXEL_ASPECT_RATIO_low; unsigned int MF_MT_ALL_SAMPLES_INDEPENDENT; @@ -127,6 +139,7 @@ struct MediaType ~MediaType(); void Clear(); }; + /// Class for parsing info from IMFMediaType into the local MediaType class FormatReader { @@ -136,9 +149,10 @@ public: private: FormatReader(void); }; + DWORD WINAPI MainThreadFunction( LPVOID lpParam ); typedef void(*emergensyStopEventCallback)(int, void *); -typedef unsigned char BYTE; + class RawImage { public: @@ -156,6 +170,7 @@ private: unsigned char *ri_pixels; RawImage(unsigned int size); }; + // Class for grabbing image from video stream class ImageGrabber : public IMFSampleGrabberSinkCallback { @@ -163,13 +178,21 @@ public: ~ImageGrabber(void); HRESULT initImageGrabber(IMFMediaSource *pSource, GUID VideoFormat); HRESULT startGrabbing(void); + void pauseGrabbing(); + void resumeGrabbing(); void stopGrabbing(); RawImage *getRawImage(); // Function of creation of the instance of the class - static HRESULT CreateInstance(ImageGrabber **ppIG,unsigned int deviceID); + static HRESULT CreateInstance(ImageGrabber **ppIG, unsigned int deviceID, bool synchronous = false); + + const HANDLE ig_hFrameReady; + const HANDLE ig_hFrameGrabbed; + const HANDLE ig_hFinish; + private: bool ig_RIE; bool ig_Close; + bool ig_Synchronous; long m_cRef; unsigned int ig_DeviceID; IMFMediaSource *ig_pSource; @@ -178,19 +201,11 @@ private: RawImage *ig_RIFirst; RawImage *ig_RISecond; RawImage *ig_RIOut; - ImageGrabber(unsigned int deviceID); + ImageGrabber(unsigned int deviceID, bool synchronous); HRESULT CreateTopology(IMFMediaSource *pSource, IMFActivate *pSinkActivate, IMFTopology **ppTopo); - HRESULT AddSourceNode( - IMFTopology *pTopology, - IMFMediaSource *pSource, - IMFPresentationDescriptor *pPD, - IMFStreamDescriptor *pSD, - IMFTopologyNode **ppNode); - HRESULT AddOutputNode( - IMFTopology *pTopology, - IMFActivate *pActivate, - DWORD dwId, - IMFTopologyNode **ppNode); + HRESULT AddSourceNode(IMFTopology *pTopology, IMFMediaSource *pSource, + IMFPresentationDescriptor *pPD, IMFStreamDescriptor *pSD, IMFTopologyNode **ppNode); + HRESULT AddOutputNode(IMFTopology *pTopology, IMFActivate *pActivate, DWORD dwId, IMFTopologyNode **ppNode); // IUnknown methods STDMETHODIMP QueryInterface(REFIID iid, void** ppv); STDMETHODIMP_(ULONG) AddRef(); @@ -208,13 +223,14 @@ private: DWORD dwSampleSize); STDMETHODIMP OnShutdown(); }; + /// Class for controlling of thread of the grabbing raw data from video device class ImageGrabberThread { friend DWORD WINAPI MainThreadFunction( LPVOID lpParam ); public: ~ImageGrabberThread(void); - static HRESULT CreateInstance(ImageGrabberThread **ppIGT, IMFMediaSource *pSource, unsigned int deviceID); + static HRESULT CreateInstance(ImageGrabberThread **ppIGT, IMFMediaSource *pSource, unsigned int deviceID, bool synchronious = false); void start(); void stop(); void setEmergencyStopEvent(void *userData, void(*func)(int, void *)); @@ -222,7 +238,7 @@ public: protected: virtual void run(); private: - ImageGrabberThread(IMFMediaSource *pSource, unsigned int deviceID); + ImageGrabberThread(IMFMediaSource *pSource, unsigned int deviceID, bool synchronious); HANDLE igt_Handle; DWORD igt_ThreadIdArray; ImageGrabber *igt_pImageGrabber; @@ -231,6 +247,7 @@ private: bool igt_stop; unsigned int igt_DeviceID; }; + // Structure for collecting info about one parametr of current video device struct Parametr { @@ -242,6 +259,7 @@ struct Parametr long Flag; Parametr(); }; + // Structure for collecting info about 17 parametrs of current video device struct CamParametrs { @@ -263,11 +281,13 @@ struct CamParametrs Parametr Iris; Parametr Focus; }; + typedef std::wstring String; typedef std::vector vectorNum; typedef std::map SUBTYPEMap; typedef std::map FrameRateMap; typedef void(*emergensyStopEventCallback)(int, void *); + /// Class for controlling of video device class videoDevice { @@ -311,7 +331,7 @@ private: IMFMediaSource *vd_pSource; emergensyStopEventCallback vd_func; void *vd_userData; - long enumerateCaptureFormats(IMFMediaSource *pSource); + HRESULT enumerateCaptureFormats(IMFMediaSource *pSource); long setDeviceFormat(IMFMediaSource *pSource, unsigned long dwFormatIndex); void buildLibraryofTypes(); int findType(unsigned int size, unsigned int frameRate = 0); @@ -319,6 +339,7 @@ private: long initDevice(); long checkDevice(IMFAttributes *pAttributes, IMFActivate **pDevice); }; + /// Class for managing of list of video devices class videoDevices { @@ -334,6 +355,7 @@ private: std::vector vds_Devices; videoDevices(void); }; + // Class for creating of Media Foundation context class Media_Foundation { @@ -344,6 +366,7 @@ public: private: Media_Foundation(void); }; + /// The only visiable class for controlling of video devices in format singelton class videoInput { @@ -393,23 +416,27 @@ public: bool isFrameNew(int deviceID); // Writing of Raw Data pixels from video device with deviceID with correction of RedAndBlue flipping flipRedAndBlue and vertical flipping flipImage bool getPixels(int deviceID, unsigned char * pixels, bool flipRedAndBlue = false, bool flipImage = false); + static void processPixels(unsigned char * src, unsigned char * dst, unsigned int width, unsigned int height, unsigned int bpp, bool bRGB, bool bFlip); private: bool accessToDevices; videoInput(void); - void processPixels(unsigned char * src, unsigned char * dst, unsigned int width, unsigned int height, unsigned int bpp, bool bRGB, bool bFlip); void updateListOfDevices(); }; + DebugPrintOut::DebugPrintOut(void):verbose(true) { } + DebugPrintOut::~DebugPrintOut(void) { } + DebugPrintOut& DebugPrintOut::getInstance() { static DebugPrintOut instance; return instance; } + void DebugPrintOut::printOut(const wchar_t *format, ...) { if(verbose) @@ -430,14 +457,17 @@ void DebugPrintOut::printOut(const wchar_t *format, ...) va_end (args); } } + void DebugPrintOut::setVerbose(bool state) { verbose = state; } + LPCWSTR GetGUIDNameConstNew(const GUID& guid); HRESULT GetGUIDNameNew(const GUID& guid, WCHAR **ppwsz); HRESULT LogAttributeValueByIndexNew(IMFAttributes *pAttr, DWORD index); HRESULT SpecialCaseAttributeValueNew(GUID guid, const PROPVARIANT& var, MediaType &out); + unsigned int *GetParametr(GUID guid, MediaType &out) { if(guid == MF_MT_YUV_MATRIX) @@ -445,7 +475,7 @@ unsigned int *GetParametr(GUID guid, MediaType &out) if(guid == MF_MT_VIDEO_LIGHTING) return &(out.MF_MT_VIDEO_LIGHTING); if(guid == MF_MT_DEFAULT_STRIDE) - return &(out.MF_MT_DEFAULT_STRIDE); + return (unsigned int*)&(out.MF_MT_DEFAULT_STRIDE); if(guid == MF_MT_VIDEO_CHROMA_SITING) return &(out.MF_MT_VIDEO_CHROMA_SITING); if(guid == MF_MT_VIDEO_NOMINAL_RANGE) @@ -462,6 +492,7 @@ unsigned int *GetParametr(GUID guid, MediaType &out) return &(out.MF_MT_INTERLACE_MODE); return NULL; } + HRESULT LogAttributeValueByIndexNew(IMFAttributes *pAttr, DWORD index, MediaType &out) { WCHAR *pGuidName = NULL; @@ -548,6 +579,7 @@ done: PropVariantClear(&var); return hr; } + HRESULT GetGUIDNameNew(const GUID& guid, WCHAR **ppwsz) { HRESULT hr = S_OK; @@ -589,14 +621,17 @@ done: } return hr; } + void LogUINT32AsUINT64New(const PROPVARIANT& var, UINT32 &uHigh, UINT32 &uLow) { Unpack2UINT32AsUINT64(var.uhVal.QuadPart, &uHigh, &uLow); } + float OffsetToFloatNew(const MFOffset& offset) { return offset.value + (static_cast(offset.fract) / 65536.0f); } + HRESULT LogVideoAreaNew(const PROPVARIANT& var) { if (var.caub.cElems < sizeof(MFVideoArea)) @@ -605,8 +640,13 @@ HRESULT LogVideoAreaNew(const PROPVARIANT& var) } return S_OK; } + HRESULT SpecialCaseAttributeValueNew(GUID guid, const PROPVARIANT& var, MediaType &out) { + if (guid == MF_MT_DEFAULT_STRIDE) + { + out.MF_MT_DEFAULT_STRIDE = var.intVal; + } else if (guid == MF_MT_FRAME_SIZE) { UINT32 uHigh = 0, uLow = 0; @@ -620,8 +660,8 @@ HRESULT SpecialCaseAttributeValueNew(GUID guid, const PROPVARIANT& var, MediaTyp { UINT32 uHigh = 0, uLow = 0; LogUINT32AsUINT64New(var, uHigh, uLow); - out.MF_MT_FRAME_RATE = uHigh; - out.MF_MT_FRAME_RATE_low = uLow; + out.MF_MT_FRAME_RATE_NUMERATOR = uHigh; + out.MF_MT_FRAME_RATE_DENOMINATOR = uLow; } else if (guid == MF_MT_FRAME_RATE_RANGE_MAX) @@ -653,9 +693,11 @@ HRESULT SpecialCaseAttributeValueNew(GUID guid, const PROPVARIANT& var, MediaTyp } return S_OK; } + #ifndef IF_EQUAL_RETURN #define IF_EQUAL_RETURN(param, val) if(val == param) return L#val #endif + LPCWSTR GetGUIDNameConstNew(const GUID& guid) { IF_EQUAL_RETURN(guid, MF_MT_MAJOR_TYPE); @@ -800,9 +842,11 @@ LPCWSTR GetGUIDNameConstNew(const GUID& guid) IF_EQUAL_RETURN(guid, MFAudioFormat_ADTS); // WAVE_FORMAT_MPEG_ADTS_AAC return NULL; } + FormatReader::FormatReader(void) { } + MediaType FormatReader::Read(IMFMediaType *pType) { UINT32 count = 0; @@ -833,32 +877,57 @@ MediaType FormatReader::Read(IMFMediaType *pType) } return out; } + FormatReader::~FormatReader(void) { } + #define CHECK_HR(x) if (FAILED(x)) { goto done; } -ImageGrabber::ImageGrabber(unsigned int deviceID): m_cRef(1), ig_DeviceID(deviceID), ig_pSource(NULL), ig_pSession(NULL), ig_pTopology(NULL), ig_RIE(true), ig_Close(false) -{ -} + +ImageGrabber::ImageGrabber(unsigned int deviceID, bool synchronous): + m_cRef(1), + ig_DeviceID(deviceID), + ig_pSource(NULL), + ig_pSession(NULL), + ig_pTopology(NULL), + ig_RIE(true), + ig_Close(false), + ig_Synchronous(synchronous), + ig_hFrameReady(synchronous ? CreateEvent(NULL, FALSE, FALSE, NULL): 0), + ig_hFrameGrabbed(synchronous ? CreateEvent(NULL, FALSE, TRUE, NULL): 0), + ig_hFinish(CreateEvent(NULL, TRUE, FALSE, NULL)) +{} + ImageGrabber::~ImageGrabber(void) { if (ig_pSession) { ig_pSession->Shutdown(); } - //SafeRelease(&ig_pSession); - //SafeRelease(&ig_pTopology); + + CloseHandle(ig_hFinish); + + if (ig_Synchronous) + { + CloseHandle(ig_hFrameReady); + CloseHandle(ig_hFrameGrabbed); + } + + SafeRelease(&ig_pSession); + SafeRelease(&ig_pTopology); DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Destroing instance of the ImageGrabber class \n", ig_DeviceID); + + DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Destroing instance of the ImageGrabber class\n", ig_DeviceID); } + HRESULT ImageGrabber::initImageGrabber(IMFMediaSource *pSource, GUID VideoFormat) { - IMFActivate *pSinkActivate = NULL; - IMFMediaType *pType = NULL; - IMFPresentationDescriptor *pPD = NULL; - IMFStreamDescriptor *pSD = NULL; - IMFMediaTypeHandler *pHandler = NULL; - IMFMediaType *pCurrentType = NULL; + ComPtr pSinkActivate = NULL; + ComPtr pType = NULL; + ComPtr pPD = NULL; + ComPtr pSD = NULL; + ComPtr pHandler = NULL; + ComPtr pCurrentType = NULL; HRESULT hr = S_OK; MediaType MT; // Clean up. @@ -871,30 +940,32 @@ HRESULT ImageGrabber::initImageGrabber(IMFMediaSource *pSource, GUID VideoFormat ig_pSource = pSource; hr = pSource->CreatePresentationDescriptor(&pPD); if (FAILED(hr)) + { goto err; + } BOOL fSelected; hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD); - if (FAILED(hr)) + if (FAILED(hr)) { goto err; + } hr = pSD->GetMediaTypeHandler(&pHandler); - if (FAILED(hr)) + if (FAILED(hr)) { goto err; + } DWORD cTypes = 0; hr = pHandler->GetMediaTypeCount(&cTypes); - if (FAILED(hr)) + if (FAILED(hr)) { goto err; + } if(cTypes > 0) { hr = pHandler->GetCurrentMediaType(&pCurrentType); - if (FAILED(hr)) + if (FAILED(hr)) { goto err; - MT = FormatReader::Read(pCurrentType); + } + MT = FormatReader::Read(pCurrentType.Get()); } err: - SafeRelease(&pPD); - SafeRelease(&pSD); - SafeRelease(&pHandler); - SafeRelease(&pCurrentType); unsigned int sizeRawImage = 0; if(VideoFormat == MFVideoFormat_RGB24) { @@ -910,17 +981,17 @@ err: // Configure the media type that the Sample Grabber will receive. // Setting the major and subtype is usually enough for the topology loader // to resolve the topology. - CHECK_HR(hr = MFCreateMediaType(&pType)); + CHECK_HR(hr = MFCreateMediaType(pType.GetAddressOf())); CHECK_HR(hr = pType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)); CHECK_HR(hr = pType->SetGUID(MF_MT_SUBTYPE, VideoFormat)); // Create the sample grabber sink. - CHECK_HR(hr = MFCreateSampleGrabberSinkActivate(pType, this, &pSinkActivate)); + CHECK_HR(hr = MFCreateSampleGrabberSinkActivate(pType.Get(), this, pSinkActivate.GetAddressOf())); // To run as fast as possible, set this attribute (requires Windows 7): CHECK_HR(hr = pSinkActivate->SetUINT32(MF_SAMPLEGRABBERSINK_IGNORE_CLOCK, TRUE)); // Create the Media Session. CHECK_HR(hr = MFCreateMediaSession(NULL, &ig_pSession)); // Create the topology. - CHECK_HR(hr = CreateTopology(pSource, pSinkActivate, &ig_pTopology)); + CHECK_HR(hr = CreateTopology(pSource, pSinkActivate.Get(), &ig_pTopology)); done: // Clean up. if (FAILED(hr)) @@ -932,10 +1003,10 @@ done: SafeRelease(&ig_pSession); SafeRelease(&ig_pTopology); } - SafeRelease(&pSinkActivate); - SafeRelease(&pType); + return hr; } + void ImageGrabber::stopGrabbing() { if(ig_pSession) @@ -943,16 +1014,17 @@ void ImageGrabber::stopGrabbing() DebugPrintOut *DPO = &DebugPrintOut::getInstance(); DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Stopping of of grabbing of images\n", ig_DeviceID); } + HRESULT ImageGrabber::startGrabbing(void) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - IMFMediaEvent *pEvent = NULL; + ComPtr pEvent = NULL; PROPVARIANT var; PropVariantInit(&var); HRESULT hr = S_OK; - CHECK_HR(hr = ig_pSession->SetTopology(0, ig_pTopology)); - CHECK_HR(hr = ig_pSession->Start(&GUID_NULL, &var)); + hr = ig_pSession->SetTopology(0, ig_pTopology); DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Start Grabbing of the images\n", ig_DeviceID); + hr = ig_pSession->Start(&GUID_NULL, &var); for(;;) { HRESULT hrStatus = S_OK; @@ -992,27 +1064,41 @@ HRESULT ImageGrabber::startGrabbing(void) DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: MEVideoCaptureDeviceRemoved \n", ig_DeviceID); break; } - SafeRelease(&pEvent); + if ((met == MEError) || (met == MENonFatalError)) + { + pEvent->GetStatus(&hrStatus); + DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: MEError | MENonFatalError: %u\n", ig_DeviceID, hrStatus); + break; + } } DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Finish startGrabbing \n", ig_DeviceID); + done: - SafeRelease(&pEvent); - SafeRelease(&ig_pSession); - SafeRelease(&ig_pTopology); + SetEvent(ig_hFinish); + return hr; } + +void ImageGrabber::pauseGrabbing() +{ +} + +void ImageGrabber::resumeGrabbing() +{ +} + HRESULT ImageGrabber::CreateTopology(IMFMediaSource *pSource, IMFActivate *pSinkActivate, IMFTopology **ppTopo) { - IMFTopology *pTopology = NULL; - IMFPresentationDescriptor *pPD = NULL; - IMFStreamDescriptor *pSD = NULL; - IMFMediaTypeHandler *pHandler = NULL; - IMFTopologyNode *pNode1 = NULL; - IMFTopologyNode *pNode2 = NULL; + IMFTopology* pTopology = NULL; + ComPtr pPD = NULL; + ComPtr pSD = NULL; + ComPtr pHandler = NULL; + ComPtr pNode1 = NULL; + ComPtr pNode2 = NULL; HRESULT hr = S_OK; DWORD cStreams = 0; CHECK_HR(hr = MFCreateTopology(&pTopology)); - CHECK_HR(hr = pSource->CreatePresentationDescriptor(&pPD)); + CHECK_HR(hr = pSource->CreatePresentationDescriptor(pPD.GetAddressOf())); CHECK_HR(hr = pPD->GetStreamDescriptorCount(&cStreams)); for (DWORD i = 0; i < cStreams; i++) { @@ -1024,29 +1110,23 @@ HRESULT ImageGrabber::CreateTopology(IMFMediaSource *pSource, IMFActivate *pSink CHECK_HR(hr = pHandler->GetMajorType(&majorType)); if (majorType == MFMediaType_Video && fSelected) { - CHECK_HR(hr = AddSourceNode(pTopology, pSource, pPD, pSD, &pNode1)); - CHECK_HR(hr = AddOutputNode(pTopology, pSinkActivate, 0, &pNode2)); - CHECK_HR(hr = pNode1->ConnectOutput(0, pNode2, 0)); + CHECK_HR(hr = AddSourceNode(pTopology, pSource, pPD.Get(), pSD.Get(), pNode1.GetAddressOf())); + CHECK_HR(hr = AddOutputNode(pTopology, pSinkActivate, 0, pNode2.GetAddressOf())); + CHECK_HR(hr = pNode1->ConnectOutput(0, pNode2.Get(), 0)); break; } else { CHECK_HR(hr = pPD->DeselectStream(i)); } - SafeRelease(&pSD); - SafeRelease(&pHandler); } *ppTopo = pTopology; (*ppTopo)->AddRef(); + done: - SafeRelease(&pTopology); - SafeRelease(&pNode1); - SafeRelease(&pNode2); - SafeRelease(&pPD); - SafeRelease(&pSD); - SafeRelease(&pHandler); return hr; } + HRESULT ImageGrabber::AddSourceNode( IMFTopology *pTopology, // Topology. IMFMediaSource *pSource, // Media source. @@ -1054,43 +1134,45 @@ HRESULT ImageGrabber::AddSourceNode( IMFStreamDescriptor *pSD, // Stream descriptor. IMFTopologyNode **ppNode) // Receives the node pointer. { - IMFTopologyNode *pNode = NULL; + ComPtr pNode = NULL; HRESULT hr = S_OK; - CHECK_HR(hr = MFCreateTopologyNode(MF_TOPOLOGY_SOURCESTREAM_NODE, &pNode)); + CHECK_HR(hr = MFCreateTopologyNode(MF_TOPOLOGY_SOURCESTREAM_NODE, pNode.GetAddressOf())); CHECK_HR(hr = pNode->SetUnknown(MF_TOPONODE_SOURCE, pSource)); CHECK_HR(hr = pNode->SetUnknown(MF_TOPONODE_PRESENTATION_DESCRIPTOR, pPD)); CHECK_HR(hr = pNode->SetUnknown(MF_TOPONODE_STREAM_DESCRIPTOR, pSD)); - CHECK_HR(hr = pTopology->AddNode(pNode)); + CHECK_HR(hr = pTopology->AddNode(pNode.Get())); // Return the pointer to the caller. - *ppNode = pNode; + *ppNode = pNode.Get(); (*ppNode)->AddRef(); + done: - SafeRelease(&pNode); return hr; } + HRESULT ImageGrabber::AddOutputNode( IMFTopology *pTopology, // Topology. IMFActivate *pActivate, // Media sink activation object. DWORD dwId, // Identifier of the stream sink. IMFTopologyNode **ppNode) // Receives the node pointer. { - IMFTopologyNode *pNode = NULL; + ComPtr pNode = NULL; HRESULT hr = S_OK; - CHECK_HR(hr = MFCreateTopologyNode(MF_TOPOLOGY_OUTPUT_NODE, &pNode)); + CHECK_HR(hr = MFCreateTopologyNode(MF_TOPOLOGY_OUTPUT_NODE, pNode.GetAddressOf())); CHECK_HR(hr = pNode->SetObject(pActivate)); CHECK_HR(hr = pNode->SetUINT32(MF_TOPONODE_STREAMID, dwId)); CHECK_HR(hr = pNode->SetUINT32(MF_TOPONODE_NOSHUTDOWN_ON_REMOVE, FALSE)); - CHECK_HR(hr = pTopology->AddNode(pNode)); + CHECK_HR(hr = pTopology->AddNode(pNode.Get())); // Return the pointer to the caller. - *ppNode = pNode; + *ppNode = pNode.Get(); (*ppNode)->AddRef(); + done: - SafeRelease(&pNode); return hr; } -HRESULT ImageGrabber::CreateInstance(ImageGrabber **ppIG, unsigned int deviceID) + +HRESULT ImageGrabber::CreateInstance(ImageGrabber **ppIG, unsigned int deviceID, bool synchronious) { - *ppIG = new (std::nothrow) ImageGrabber(deviceID); + *ppIG = new (std::nothrow) ImageGrabber(deviceID, synchronious); if (ppIG == NULL) { return E_OUTOFMEMORY; @@ -1099,6 +1181,7 @@ HRESULT ImageGrabber::CreateInstance(ImageGrabber **ppIG, unsigned int deviceID) DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Creating instance of ImageGrabber\n", deviceID); return S_OK; } + STDMETHODIMP ImageGrabber::QueryInterface(REFIID riid, void** ppv) { HRESULT hr = E_NOINTERFACE; @@ -1119,10 +1202,12 @@ STDMETHODIMP ImageGrabber::QueryInterface(REFIID riid, void** ppv) } return hr; } + STDMETHODIMP_(ULONG) ImageGrabber::AddRef() { return InterlockedIncrement(&m_cRef); } + STDMETHODIMP_(ULONG) ImageGrabber::Release() { ULONG cRef = InterlockedDecrement(&m_cRef); @@ -1132,38 +1217,45 @@ STDMETHODIMP_(ULONG) ImageGrabber::Release() } return cRef; } + STDMETHODIMP ImageGrabber::OnClockStart(MFTIME hnsSystemTime, LONGLONG llClockStartOffset) { (void)hnsSystemTime; (void)llClockStartOffset; return S_OK; } + STDMETHODIMP ImageGrabber::OnClockStop(MFTIME hnsSystemTime) { (void)hnsSystemTime; return S_OK; } + STDMETHODIMP ImageGrabber::OnClockPause(MFTIME hnsSystemTime) { (void)hnsSystemTime; return S_OK; } + STDMETHODIMP ImageGrabber::OnClockRestart(MFTIME hnsSystemTime) { (void)hnsSystemTime; return S_OK; } + STDMETHODIMP ImageGrabber::OnClockSetRate(MFTIME hnsSystemTime, float flRate) { (void)flRate; (void)hnsSystemTime; return S_OK; } + STDMETHODIMP ImageGrabber::OnSetPresentationClock(IMFPresentationClock* pClock) { (void)pClock; return S_OK; } + STDMETHODIMP ImageGrabber::OnProcessSample(REFGUID guidMajorMediaType, DWORD dwSampleFlags, LONGLONG llSampleTime, LONGLONG llSampleDuration, const BYTE * pSampleBuffer, DWORD dwSampleSize) @@ -1173,6 +1265,16 @@ STDMETHODIMP ImageGrabber::OnProcessSample(REFGUID guidMajorMediaType, DWORD dwS (void)dwSampleFlags; (void)llSampleDuration; (void)dwSampleSize; + + HANDLE tmp[] = {ig_hFinish, ig_hFrameGrabbed, NULL}; + + DWORD status = WaitForMultipleObjects(2, tmp, FALSE, INFINITE); + if (status == WAIT_OBJECT_0) + { + printf("OnProcessFrame called after ig_hFinish event\n"); + return S_OK; + } + if(ig_RIE) { ig_RIFirst->fastCopy(pSampleBuffer); @@ -1183,27 +1285,41 @@ STDMETHODIMP ImageGrabber::OnProcessSample(REFGUID guidMajorMediaType, DWORD dwS ig_RISecond->fastCopy(pSampleBuffer); ig_RIOut = ig_RISecond; } - ig_RIE = !ig_RIE; + + if (ig_Synchronous) + { + SetEvent(ig_hFrameReady); + } + else + { + ig_RIE = !ig_RIE; + } + return S_OK; } + STDMETHODIMP ImageGrabber::OnShutdown() { + SetEvent(ig_hFinish); return S_OK; } + RawImage *ImageGrabber::getRawImage() { return ig_RIOut; } + DWORD WINAPI MainThreadFunction( LPVOID lpParam ) { ImageGrabberThread *pIGT = (ImageGrabberThread *)lpParam; pIGT->run(); return 0; } -HRESULT ImageGrabberThread::CreateInstance(ImageGrabberThread **ppIGT, IMFMediaSource *pSource, unsigned int deviceID) + +HRESULT ImageGrabberThread::CreateInstance(ImageGrabberThread **ppIGT, IMFMediaSource *pSource, unsigned int deviceID, bool synchronious) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - *ppIGT = new (std::nothrow) ImageGrabberThread(pSource, deviceID); + *ppIGT = new (std::nothrow) ImageGrabberThread(pSource, deviceID, synchronious); if (ppIGT == NULL) { DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Memory cannot be allocated\n", deviceID); @@ -1213,10 +1329,14 @@ HRESULT ImageGrabberThread::CreateInstance(ImageGrabberThread **ppIGT, IMFMediaS DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Creating of the instance of ImageGrabberThread\n", deviceID); return S_OK; } -ImageGrabberThread::ImageGrabberThread(IMFMediaSource *pSource, unsigned int deviceID): igt_Handle(NULL), igt_stop(false) + +ImageGrabberThread::ImageGrabberThread(IMFMediaSource *pSource, unsigned int deviceID, bool synchronious): + igt_func(NULL), + igt_Handle(NULL), + igt_stop(false) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - HRESULT hr = ImageGrabber::CreateInstance(&igt_pImageGrabber, deviceID); + HRESULT hr = ImageGrabber::CreateInstance(&igt_pImageGrabber, deviceID, synchronious); igt_DeviceID = deviceID; if(SUCCEEDED(hr)) { @@ -1235,6 +1355,7 @@ ImageGrabberThread::ImageGrabberThread(IMFMediaSource *pSource, unsigned int dev DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i There is a problem with creation of the instance of the ImageGrabber class\n", deviceID); } } + void ImageGrabberThread::setEmergencyStopEvent(void *userData, void(*func)(int, void *)) { if(func) @@ -1243,12 +1364,16 @@ void ImageGrabberThread::setEmergencyStopEvent(void *userData, void(*func)(int, igt_userData = userData; } } + ImageGrabberThread::~ImageGrabberThread(void) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Destroing ImageGrabberThread\n", igt_DeviceID); + if (igt_Handle) + WaitForSingleObject(igt_Handle, INFINITE); delete igt_pImageGrabber; } + void ImageGrabberThread::stop() { igt_stop = true; @@ -1257,16 +1382,18 @@ void ImageGrabberThread::stop() igt_pImageGrabber->stopGrabbing(); } } + void ImageGrabberThread::start() { igt_Handle = CreateThread( - NULL, // default security attributes - 0, // use default stack size - MainThreadFunction, // thread function name - this, // argument to thread function - 0, // use default creation flags + NULL, // default security attributes + 0, // use default stack size + MainThreadFunction, // thread function name + this, // argument to thread function + 0, // use default creation flags &igt_ThreadIdArray); // returns the thread identifier } + void ImageGrabberThread::run() { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -1294,10 +1421,12 @@ void ImageGrabberThread::run() else DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Finish thread\n", igt_DeviceID); } + ImageGrabber *ImageGrabberThread::getImageGrabber() { return igt_pImageGrabber; } + Media_Foundation::Media_Foundation(void) { HRESULT hr = MFStartup(MF_VERSION); @@ -1307,6 +1436,7 @@ Media_Foundation::Media_Foundation(void) DPO->printOut(L"MEDIA FOUNDATION: It cannot be created!!!\n"); } } + Media_Foundation::~Media_Foundation(void) { HRESULT hr = MFShutdown(); @@ -1316,12 +1446,13 @@ Media_Foundation::~Media_Foundation(void) DPO->printOut(L"MEDIA FOUNDATION: Resources cannot be released\n"); } } + bool Media_Foundation::buildListOfDevices() { HRESULT hr = S_OK; - IMFAttributes *pAttributes = NULL; + ComPtr pAttributes = NULL; CoInitialize(NULL); - hr = MFCreateAttributes(&pAttributes, 1); + hr = MFCreateAttributes(pAttributes.GetAddressOf(), 1); if (SUCCEEDED(hr)) { hr = pAttributes->SetGUID( @@ -1332,40 +1463,46 @@ bool Media_Foundation::buildListOfDevices() if (SUCCEEDED(hr)) { videoDevices *vDs = &videoDevices::getInstance(); - hr = vDs->initDevices(pAttributes); + hr = vDs->initDevices(pAttributes.Get()); } else { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); DPO->printOut(L"MEDIA FOUNDATION: The access to the video cameras denied\n"); } - SafeRelease(&pAttributes); + return (SUCCEEDED(hr)); } + Media_Foundation& Media_Foundation::getInstance() { static Media_Foundation instance; return instance; } + RawImage::RawImage(unsigned int size): ri_new(false), ri_pixels(NULL) { ri_size = size; ri_pixels = new unsigned char[size]; memset((void *)ri_pixels,0,ri_size); } + bool RawImage::isNew() { return ri_new; } + unsigned int RawImage::getSize() { return ri_size; } + RawImage::~RawImage(void) { delete []ri_pixels; ri_pixels = NULL; } + long RawImage::CreateInstance(RawImage **ppRImage,unsigned int size) { *ppRImage = new (std::nothrow) RawImage(size); @@ -1375,25 +1512,30 @@ long RawImage::CreateInstance(RawImage **ppRImage,unsigned int size) } return S_OK; } + void RawImage::setCopy(const BYTE * pSampleBuffer) { memcpy(ri_pixels, pSampleBuffer, ri_size); ri_new = true; } + void RawImage::fastCopy(const BYTE * pSampleBuffer) { memcpy(ri_pixels, pSampleBuffer, ri_size); ri_new = true; } + unsigned char * RawImage::getpPixels() { ri_new = false; return ri_pixels; } + videoDevice::videoDevice(void): vd_IsSetuped(false), vd_LockOut(OpenLock), vd_pFriendlyName(NULL), vd_Width(0), vd_Height(0), vd_pSource(NULL), vd_func(NULL), vd_userData(NULL) { } + void videoDevice::setParametrs(CamParametrs parametrs) { if(vd_IsSetuped) @@ -1428,6 +1570,7 @@ void videoDevice::setParametrs(CamParametrs parametrs) } } } + CamParametrs videoDevice::getParametrs() { CamParametrs out; @@ -1472,6 +1615,7 @@ CamParametrs videoDevice::getParametrs() } return out; } + long videoDevice::resetDevice(IMFActivate *pActivate) { HRESULT hr = -1; @@ -1503,6 +1647,7 @@ long videoDevice::resetDevice(IMFActivate *pActivate) } return hr; } + long videoDevice::readInfoOfDevice(IMFActivate *pActivate, unsigned int Num) { HRESULT hr = -1; @@ -1510,6 +1655,7 @@ long videoDevice::readInfoOfDevice(IMFActivate *pActivate, unsigned int Num) hr = resetDevice(pActivate); return hr; } + long videoDevice::checkDevice(IMFAttributes *pAttributes, IMFActivate **pDevice) { HRESULT hr = S_OK; @@ -1568,14 +1714,15 @@ long videoDevice::checkDevice(IMFAttributes *pAttributes, IMFActivate **pDevice) } return hr; } + long videoDevice::initDevice() { HRESULT hr = -1; - IMFAttributes *pAttributes = NULL; - IMFActivate * vd_pActivate= NULL; + ComPtr pAttributes = NULL; + IMFActivate *vd_pActivate = NULL; DebugPrintOut *DPO = &DebugPrintOut::getInstance(); CoInitialize(NULL); - hr = MFCreateAttributes(&pAttributes, 1); + hr = MFCreateAttributes(pAttributes.GetAddressOf(), 1); if (SUCCEEDED(hr)) { hr = pAttributes->SetGUID( @@ -1585,7 +1732,7 @@ long videoDevice::initDevice() } if (SUCCEEDED(hr)) { - hr = checkDevice(pAttributes, &vd_pActivate); + hr = checkDevice(pAttributes.Get(), &vd_pActivate); if (SUCCEEDED(hr) && vd_pActivate) { SafeRelease(&vd_pSource); @@ -1607,9 +1754,10 @@ long videoDevice::initDevice() { DPO->printOut(L"VIDEODEVICE %i: The attribute of video cameras cannot be getting \n", vd_CurrentNumber); } - SafeRelease(&pAttributes); + return hr; } + MediaType videoDevice::getFormat(unsigned int id) { if(id < vd_CurrentFormats.size()) @@ -1713,6 +1861,7 @@ int videoDevice::findType(unsigned int size, unsigned int frameRate) return 0; return VN[0]; } + void videoDevice::buildLibraryofTypes() { unsigned int size; @@ -1722,7 +1871,7 @@ void videoDevice::buildLibraryofTypes() for(; i != vd_CurrentFormats.end(); i++) { size = (*i).MF_MT_FRAME_SIZE; - framerate = (*i).MF_MT_FRAME_RATE; + framerate = (*i).MF_MT_FRAME_RATE_NUMERATOR; FrameRateMap FRM = vd_CaptureFormats[size]; SUBTYPEMap STM = FRM[framerate]; String subType((*i).pMF_MT_SUBTYPEName); @@ -1734,45 +1883,45 @@ void videoDevice::buildLibraryofTypes() count++; } } + long videoDevice::setDeviceFormat(IMFMediaSource *pSource, unsigned long dwFormatIndex) { - IMFPresentationDescriptor *pPD = NULL; - IMFStreamDescriptor *pSD = NULL; - IMFMediaTypeHandler *pHandler = NULL; - IMFMediaType *pType = NULL; - HRESULT hr = pSource->CreatePresentationDescriptor(&pPD); + ComPtr pPD = NULL; + ComPtr pSD = NULL; + ComPtr pHandler = NULL; + ComPtr pType = NULL; + HRESULT hr = pSource->CreatePresentationDescriptor(pPD.GetAddressOf()); if (FAILED(hr)) { goto done; } BOOL fSelected; - hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD); + hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, pSD.GetAddressOf()); if (FAILED(hr)) { goto done; } - hr = pSD->GetMediaTypeHandler(&pHandler); + hr = pSD->GetMediaTypeHandler(pHandler.GetAddressOf()); if (FAILED(hr)) { goto done; } - hr = pHandler->GetMediaTypeByIndex((DWORD)dwFormatIndex, &pType); + hr = pHandler->GetMediaTypeByIndex((DWORD)dwFormatIndex, pType.GetAddressOf()); if (FAILED(hr)) { goto done; } - hr = pHandler->SetCurrentMediaType(pType); + hr = pHandler->SetCurrentMediaType(pType.Get()); + done: - SafeRelease(&pPD); - SafeRelease(&pSD); - SafeRelease(&pHandler); - SafeRelease(&pType); return hr; } + bool videoDevice::isDeviceSetup() { return vd_IsSetuped; } + RawImage * videoDevice::getRawImageOut() { if(!vd_IsSetuped) return NULL; @@ -1785,6 +1934,7 @@ RawImage * videoDevice::getRawImageOut() } return NULL; } + bool videoDevice::isFrameNew() { if(!vd_IsSetuped) return false; @@ -1809,16 +1959,19 @@ bool videoDevice::isFrameNew() } return false; } + bool videoDevice::isDeviceMediaSource() { if(vd_LockOut == MediaSourceLock) return true; return false; } + bool videoDevice::isDeviceRawDataSource() { if(vd_LockOut == RawDataLock) return true; return false; } + bool videoDevice::setupDevice(unsigned int id) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -1849,15 +2002,18 @@ bool videoDevice::setupDevice(unsigned int id) return false; } } + bool videoDevice::setupDevice(unsigned int w, unsigned int h, unsigned int idealFramerate) { unsigned int id = findType(w * h, idealFramerate); return setupDevice(id); } + wchar_t *videoDevice::getName() { return vd_pFriendlyName; } + videoDevice::~videoDevice(void) { closeDevice(); @@ -1865,24 +2021,25 @@ videoDevice::~videoDevice(void) if(vd_pFriendlyName) CoTaskMemFree(vd_pFriendlyName); } -long videoDevice::enumerateCaptureFormats(IMFMediaSource *pSource) + +HRESULT videoDevice::enumerateCaptureFormats(IMFMediaSource *pSource) { - IMFPresentationDescriptor *pPD = NULL; - IMFStreamDescriptor *pSD = NULL; - IMFMediaTypeHandler *pHandler = NULL; - IMFMediaType *pType = NULL; - HRESULT hr = pSource->CreatePresentationDescriptor(&pPD); + ComPtr pPD = NULL; + ComPtr pSD = NULL; + ComPtr pHandler = NULL; + ComPtr pType = NULL; + HRESULT hr = pSource->CreatePresentationDescriptor(pPD.GetAddressOf()); if (FAILED(hr)) { goto done; } BOOL fSelected; - hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD); + hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, pSD.GetAddressOf()); if (FAILED(hr)) { goto done; } - hr = pSD->GetMediaTypeHandler(&pHandler); + hr = pSD->GetMediaTypeHandler(pHandler.GetAddressOf()); if (FAILED(hr)) { goto done; @@ -1895,24 +2052,22 @@ long videoDevice::enumerateCaptureFormats(IMFMediaSource *pSource) } for (DWORD i = 0; i < cTypes; i++) { - hr = pHandler->GetMediaTypeByIndex(i, &pType); + hr = pHandler->GetMediaTypeByIndex(i, pType.GetAddressOf()); if (FAILED(hr)) { goto done; } - MediaType MT = FormatReader::Read(pType); + MediaType MT = FormatReader::Read(pType.Get()); vd_CurrentFormats.push_back(MT); - SafeRelease(&pType); } + done: - SafeRelease(&pPD); - SafeRelease(&pSD); - SafeRelease(&pHandler); - SafeRelease(&pType); return hr; } + videoDevices::videoDevices(void): count(0) {} + void videoDevices::clearDevices() { std::vector::iterator i = vds_Devices.begin(); @@ -1920,10 +2075,12 @@ void videoDevices::clearDevices() delete (*i); vds_Devices.clear(); } + videoDevices::~videoDevices(void) { clearDevices(); } + videoDevice * videoDevices::getDevice(unsigned int i) { if(i >= vds_Devices.size()) @@ -1936,6 +2093,7 @@ videoDevice * videoDevices::getDevice(unsigned int i) } return vds_Devices[i]; } + long videoDevices::initDevices(IMFAttributes *pAttributes) { HRESULT hr = S_OK; @@ -1965,15 +2123,18 @@ long videoDevices::initDevices(IMFAttributes *pAttributes) } return hr; } + size_t videoDevices::getCount() { return vds_Devices.size(); } + videoDevices& videoDevices::getInstance() { static videoDevices instance; return instance; } + Parametr::Parametr() { CurrentValue = 0; @@ -1983,6 +2144,7 @@ Parametr::Parametr() Default = 0; Flag = 0; } + MediaType::MediaType() { pMF_MT_AM_FORMAT_TYPEName = NULL; @@ -1990,10 +2152,12 @@ MediaType::MediaType() pMF_MT_SUBTYPEName = NULL; Clear(); } + MediaType::~MediaType() { Clear(); } + void MediaType::Clear() { MF_MT_FRAME_SIZE = 0; @@ -2005,8 +2169,8 @@ void MediaType::Clear() MF_MT_VIDEO_CHROMA_SITING = 0; MF_MT_FIXED_SIZE_SAMPLES = 0; MF_MT_VIDEO_NOMINAL_RANGE = 0; - MF_MT_FRAME_RATE = 0; - MF_MT_FRAME_RATE_low = 0; + MF_MT_FRAME_RATE_NUMERATOR = 0; + MF_MT_FRAME_RATE_DENOMINATOR = 0; MF_MT_PIXEL_ASPECT_RATIO = 0; MF_MT_PIXEL_ASPECT_RATIO_low = 0; MF_MT_ALL_SAMPLES_INDEPENDENT = 0; @@ -2021,6 +2185,7 @@ void MediaType::Clear() memset(&MF_MT_AM_FORMAT_TYPE, 0, sizeof(GUID)); memset(&MF_MT_SUBTYPE, 0, sizeof(GUID)); } + videoInput::videoInput(void): accessToDevices(false) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2029,6 +2194,7 @@ videoInput::videoInput(void): accessToDevices(false) if(!accessToDevices) DPO->printOut(L"INITIALIZATION: Ther is not any suitable video device\n"); } + void videoInput::updateListOfDevices() { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2037,11 +2203,13 @@ void videoInput::updateListOfDevices() if(!accessToDevices) DPO->printOut(L"UPDATING: Ther is not any suitable video device\n"); } + videoInput::~videoInput(void) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); DPO->printOut(L"\n***** CLOSE VIDEOINPUT LIBRARY - 2013 *****\n\n"); } + IMFMediaSource *videoInput::getMediaSource(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2063,6 +2231,7 @@ IMFMediaSource *videoInput::getMediaSource(int deviceID) } return NULL; } + bool videoInput::setupDevice(int deviceID, unsigned int id) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2089,6 +2258,7 @@ bool videoInput::setupDevice(int deviceID, unsigned int id) } return false; } + bool videoInput::setupDevice(int deviceID, unsigned int w, unsigned int h, unsigned int idealFramerate) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2115,6 +2285,7 @@ bool videoInput::setupDevice(int deviceID, unsigned int w, unsigned int h, unsig } return false; } + MediaType videoInput::getFormat(int deviceID, unsigned int id) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2136,6 +2307,7 @@ MediaType videoInput::getFormat(int deviceID, unsigned int id) } return MediaType(); } + bool videoInput::isDeviceSetup(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2157,6 +2329,7 @@ bool videoInput::isDeviceSetup(int deviceID) } return false; } + bool videoInput::isDeviceMediaSource(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2178,6 +2351,7 @@ bool videoInput::isDeviceMediaSource(int deviceID) } return false; } + bool videoInput::isDeviceRawDataSource(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2202,6 +2376,7 @@ bool videoInput::isDeviceRawDataSource(int deviceID) } return false; } + bool videoInput::isFrameNew(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2230,6 +2405,7 @@ bool videoInput::isFrameNew(int deviceID) } return false; } + unsigned int videoInput::getCountFormats(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2251,12 +2427,14 @@ unsigned int videoInput::getCountFormats(int deviceID) } return 0; } + void videoInput::closeAllDevices() { videoDevices *VDS = &videoDevices::getInstance(); for(unsigned int i = 0; i < VDS->getCount(); i++) closeDevice(i); } + void videoInput::setParametrs(int deviceID, CamParametrs parametrs) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2277,6 +2455,7 @@ void videoInput::setParametrs(int deviceID, CamParametrs parametrs) DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } } + CamParametrs videoInput::getParametrs(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2299,6 +2478,7 @@ CamParametrs videoInput::getParametrs(int deviceID) } return out; } + void videoInput::closeDevice(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2319,6 +2499,7 @@ void videoInput::closeDevice(int deviceID) DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } } + unsigned int videoInput::getWidth(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2340,6 +2521,7 @@ unsigned int videoInput::getWidth(int deviceID) } return 0; } + unsigned int videoInput::getHeight(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2361,6 +2543,7 @@ unsigned int videoInput::getHeight(int deviceID) } return 0; } + wchar_t *videoInput::getNameVideoDevice(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2382,6 +2565,7 @@ wchar_t *videoInput::getNameVideoDevice(int deviceID) } return L"Empty"; } + unsigned int videoInput::listDevices(bool silent) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2405,20 +2589,24 @@ unsigned int videoInput::listDevices(bool silent) } return out; } + videoInput& videoInput::getInstance() { static videoInput instance; return instance; } + bool videoInput::isDevicesAcceable() { return accessToDevices; } + void videoInput::setVerbose(bool state) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); DPO->setVerbose(state); } + void videoInput::setEmergencyStopEvent(int deviceID, void *userData, void(*func)(int, void *)) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); @@ -2442,6 +2630,7 @@ void videoInput::setEmergencyStopEvent(int deviceID, void *userData, void(*func) DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } } + bool videoInput::getPixels(int deviceID, unsigned char * dstBuffer, bool flipRedAndBlue, bool flipImage) { bool success = false; @@ -2491,6 +2680,7 @@ bool videoInput::getPixels(int deviceID, unsigned char * dstBuffer, bool flipRed } return success; } + void videoInput::processPixels(unsigned char * src, unsigned char * dst, unsigned int width, unsigned int height, unsigned int bpp, bool bRGB, bool bFlip) { @@ -2553,6 +2743,7 @@ void videoInput::processPixels(unsigned char * src, unsigned char * dst, unsigne } } } + /******* Capturing video from camera via Microsoft Media Foundation **********/ class CvCaptureCAM_MSMF : public CvCapture { @@ -2568,33 +2759,35 @@ public: virtual int getCaptureDomain() { return CV_CAP_MSMF; } // Return the type of the capture object: CV_CAP_VFW, etc... protected: void init(); - int index, width, height,fourcc; - int widthSet, heightSet; + int index, width, height, fourcc; IplImage* frame; videoInput VI; }; + struct SuppressVideoInputMessages { SuppressVideoInputMessages() { videoInput::setVerbose(true); } }; + static SuppressVideoInputMessages do_it; + CvCaptureCAM_MSMF::CvCaptureCAM_MSMF(): index(-1), width(-1), height(-1), fourcc(-1), - widthSet(-1), - heightSet(-1), - frame(0), + frame(NULL), VI(videoInput::getInstance()) { CoInitialize(0); } + CvCaptureCAM_MSMF::~CvCaptureCAM_MSMF() { close(); CoUninitialize(); } + void CvCaptureCAM_MSMF::close() { if( index >= 0 ) @@ -2603,8 +2796,9 @@ void CvCaptureCAM_MSMF::close() index = -1; cvReleaseImage(&frame); } - widthSet = heightSet = width = height = -1; + width = height = -1; } + // Initialize camera input bool CvCaptureCAM_MSMF::open( int _index ) { @@ -2621,10 +2815,14 @@ bool CvCaptureCAM_MSMF::open( int _index ) index = try_index; return true; } + bool CvCaptureCAM_MSMF::grabFrame() { - return true; + while (VI.isDeviceSetup(index) && !VI.isFrameNew(index)) + Sleep(1); + return VI.isDeviceSetup(index); } + IplImage* CvCaptureCAM_MSMF::retrieveFrame(int) { if( !frame || (int)VI.getWidth(index) != frame->width || (int)VI.getHeight(index) != frame->height ) @@ -2637,6 +2835,7 @@ IplImage* CvCaptureCAM_MSMF::retrieveFrame(int) VI.getPixels( index, (uchar*)frame->imageData, false, true ); return frame; } + double CvCaptureCAM_MSMF::getProperty( int property_id ) { // image format proprrties @@ -2646,50 +2845,7 @@ double CvCaptureCAM_MSMF::getProperty( int property_id ) return VI.getWidth(index); case CV_CAP_PROP_FRAME_HEIGHT: return VI.getHeight(index); - case CV_CAP_PROP_FOURCC: - // FIXME: implement method in VideoInput back end - //return VI.getFourcc(index); - ; - case CV_CAP_PROP_FPS: - // FIXME: implement method in VideoInput back end - //return VI.getFPS(index); - ; } - // video filter properties - switch( property_id ) - { - case CV_CAP_PROP_BRIGHTNESS: - case CV_CAP_PROP_CONTRAST: - case CV_CAP_PROP_HUE: - case CV_CAP_PROP_SATURATION: - case CV_CAP_PROP_SHARPNESS: - case CV_CAP_PROP_GAMMA: - case CV_CAP_PROP_MONOCROME: - case CV_CAP_PROP_WHITE_BALANCE_BLUE_U: - case CV_CAP_PROP_BACKLIGHT: - case CV_CAP_PROP_GAIN: - // FIXME: implement method in VideoInput back end - // if ( VI.getVideoSettingFilter(index, VI.getVideoPropertyFromCV(property_id), min_value, - // max_value, stepping_delta, current_value, flags,defaultValue) ) - // return (double)current_value; - return 0.; - } - // camera properties - switch( property_id ) - { - case CV_CAP_PROP_PAN: - case CV_CAP_PROP_TILT: - case CV_CAP_PROP_ROLL: - case CV_CAP_PROP_ZOOM: - case CV_CAP_PROP_EXPOSURE: - case CV_CAP_PROP_IRIS: - case CV_CAP_PROP_FOCUS: - // FIXME: implement method in VideoInput back end - // if (VI.getVideoSettingCamera(index,VI.getCameraPropertyFromCV(property_id),min_value, - // max_value,stepping_delta,current_value,flags,defaultValue) ) return (double)current_value; - return 0.; - } - // unknown parameter or value not available return -1; } bool CvCaptureCAM_MSMF::setProperty( int property_id, double value ) @@ -2706,91 +2862,272 @@ bool CvCaptureCAM_MSMF::setProperty( int property_id, double value ) height = cvRound(value); handled = true; break; - case CV_CAP_PROP_FOURCC: - fourcc = (int)(unsigned long)(value); - if ( fourcc == -1 ) { - // following cvCreateVideo usage will pop up caprturepindialog here if fourcc=-1 - // TODO - how to create a capture pin dialog - } - handled = true; - break; - case CV_CAP_PROP_FPS: - // FIXME: implement method in VideoInput back end - // int fps = cvRound(value); - // if (fps != VI.getFPS(index)) - // { - // VI.stopDevice(index); - // VI.setIdealFramerate(index,fps); - // if (widthSet > 0 && heightSet > 0) - // VI.setupDevice(index, widthSet, heightSet); - // else - // VI.setupDevice(index); - // } - // return VI.isDeviceSetup(index); - ; } + if ( handled ) { - // a stream setting if( width > 0 && height > 0 ) { - if( width != (int)VI.getWidth(index) || height != (int)VI.getHeight(index) )//|| fourcc != VI.getFourcc(index) ) + if( width != (int)VI.getWidth(index) || height != (int)VI.getHeight(index) && VI.isDeviceSetup(index))//|| fourcc != VI.getFourcc(index) ) { - // FIXME: implement method in VideoInput back end - // int fps = static_cast(VI.getFPS(index)); - // VI.stopDevice(index); - // VI.setIdealFramerate(index, fps); - // VI.setupDeviceFourcc(index, width, height, fourcc); + VI.closeDevice(index); + VI.setupDevice(index, width, height); } - bool success = VI.isDeviceSetup(index); - if (success) - { - widthSet = width; - heightSet = height; - width = height = fourcc = -1; - } - return success; + return VI.isDeviceSetup(index); } return true; } - // show video/camera filter dialog - // FIXME: implement method in VideoInput back end - // if ( property_id == CV_CAP_PROP_SETTINGS ) { - // VI.showSettingsWindow(index); - // return true; - // } - //video Filter properties - switch( property_id ) - { - case CV_CAP_PROP_BRIGHTNESS: - case CV_CAP_PROP_CONTRAST: - case CV_CAP_PROP_HUE: - case CV_CAP_PROP_SATURATION: - case CV_CAP_PROP_SHARPNESS: - case CV_CAP_PROP_GAMMA: - case CV_CAP_PROP_MONOCROME: - case CV_CAP_PROP_WHITE_BALANCE_BLUE_U: - case CV_CAP_PROP_BACKLIGHT: - case CV_CAP_PROP_GAIN: - // FIXME: implement method in VideoInput back end - //return VI.setVideoSettingFilter(index,VI.getVideoPropertyFromCV(property_id),(long)value); - ; - } - //camera properties - switch( property_id ) - { - case CV_CAP_PROP_PAN: - case CV_CAP_PROP_TILT: - case CV_CAP_PROP_ROLL: - case CV_CAP_PROP_ZOOM: - case CV_CAP_PROP_EXPOSURE: - case CV_CAP_PROP_IRIS: - case CV_CAP_PROP_FOCUS: - // FIXME: implement method in VideoInput back end - //return VI.setVideoSettingCamera(index,VI.getCameraPropertyFromCV(property_id),(long)value); - ; - } + return false; } + +class CvCaptureFile_MSMF : public CvCapture +{ +public: + CvCaptureFile_MSMF(); + virtual ~CvCaptureFile_MSMF(); + + virtual bool open( const char* filename ); + virtual void close(); + + virtual double getProperty(int); + virtual bool setProperty(int, double); + virtual bool grabFrame(); + virtual IplImage* retrieveFrame(int); + virtual int getCaptureDomain() { return CV_CAP_MSMF; } +protected: + ImageGrabberThread* grabberThread; + IMFMediaSource* videoFileSource; + std::vector captureFormats; + int captureFormatIndex; + IplImage* frame; + bool isOpened; + + HRESULT enumerateCaptureFormats(IMFMediaSource *pSource); + HRESULT getSourceDuration(IMFMediaSource *pSource, MFTIME *pDuration); +}; + +CvCaptureFile_MSMF::CvCaptureFile_MSMF(): + grabberThread(NULL), + videoFileSource(NULL), + captureFormatIndex(0), + frame(NULL), + isOpened(false) +{ + MFStartup(MF_VERSION); +} + +CvCaptureFile_MSMF::~CvCaptureFile_MSMF() +{ + close(); + MFShutdown(); +} + +bool CvCaptureFile_MSMF::open(const char* filename) +{ + if (!filename) + return false; + + wchar_t* unicodeFileName = new wchar_t[strlen(filename)+1]; + MultiByteToWideChar(CP_ACP, 0, filename, -1, unicodeFileName, strlen(filename)+1); + + HRESULT hr = S_OK; + + MF_OBJECT_TYPE ObjectType = MF_OBJECT_INVALID; + + ComPtr pSourceResolver = NULL; + IUnknown* pUnkSource = NULL; + + hr = MFCreateSourceResolver(pSourceResolver.GetAddressOf()); + + if (SUCCEEDED(hr)) + { + hr = pSourceResolver->CreateObjectFromURL( + unicodeFileName, + MF_RESOLUTION_MEDIASOURCE, + NULL, // Optional property store. + &ObjectType, + &pUnkSource + ); + } + + // Get the IMFMediaSource from the IUnknown pointer. + if (SUCCEEDED(hr)) + { + hr = pUnkSource->QueryInterface(IID_PPV_ARGS(&videoFileSource)); + } + + SafeRelease(&pUnkSource); + + if (SUCCEEDED(hr)) + { + hr = enumerateCaptureFormats(videoFileSource); + } + + if (SUCCEEDED(hr)) + { + hr = ImageGrabberThread::CreateInstance(&grabberThread, videoFileSource, (unsigned int)-2, true); + } + + if (SUCCEEDED(hr)) + { + grabberThread->start(); + } + + isOpened = SUCCEEDED(hr); + + return isOpened; +} + +void CvCaptureFile_MSMF::close() +{ + if (grabberThread) + { + isOpened = false; + SetEvent(grabberThread->getImageGrabber()->ig_hFinish); + grabberThread->stop(); + delete grabberThread; + } + + if (videoFileSource) + { + videoFileSource->Shutdown(); + } +} + +bool CvCaptureFile_MSMF::setProperty(int property_id, double value) +{ + // image capture properties + // FIXME: implement method in VideoInput back end + (void) property_id; + (void) value; + return false; +} + +double CvCaptureFile_MSMF::getProperty(int property_id) +{ + // image format proprrties + switch( property_id ) + { + case CV_CAP_PROP_FRAME_WIDTH: + return captureFormats[captureFormatIndex].width; + case CV_CAP_PROP_FRAME_HEIGHT: + return captureFormats[captureFormatIndex].height; + case CV_CAP_PROP_FRAME_COUNT: + { + MFTIME duration; + getSourceDuration(this->videoFileSource, &duration); + double fps = ((double)captureFormats[captureFormatIndex].MF_MT_FRAME_RATE_NUMERATOR) / + ((double)captureFormats[captureFormatIndex].MF_MT_FRAME_RATE_DENOMINATOR); + return (double)floor(((double)duration/1e7)*fps+0.5); + } + case CV_CAP_PROP_FOURCC: + return captureFormats[captureFormatIndex].MF_MT_SUBTYPE.Data1; + case CV_CAP_PROP_FPS: + return ((double)captureFormats[captureFormatIndex].MF_MT_FRAME_RATE_NUMERATOR) / + ((double)captureFormats[captureFormatIndex].MF_MT_FRAME_RATE_DENOMINATOR); + } + + return -1; +} + +bool CvCaptureFile_MSMF::grabFrame() +{ + DWORD waitResult = (DWORD)-1; + if (isOpened) + { + SetEvent(grabberThread->getImageGrabber()->ig_hFrameGrabbed); + HANDLE tmp[] = {grabberThread->getImageGrabber()->ig_hFrameReady, grabberThread->getImageGrabber()->ig_hFinish, 0}; + waitResult = WaitForMultipleObjects(2, tmp, FALSE, INFINITE); + } + + return isOpened && grabberThread->getImageGrabber()->getRawImage()->isNew() && (waitResult == WAIT_OBJECT_0); +} + +IplImage* CvCaptureFile_MSMF::retrieveFrame(int) +{ + unsigned int width = captureFormats[captureFormatIndex].width; + unsigned int height = captureFormats[captureFormatIndex].height; + unsigned int bytes = 3; + if( !frame || (int)width != frame->width || (int)height != frame->height ) + { + if (frame) + cvReleaseImage( &frame ); + frame = cvCreateImage( cvSize(width,height), 8, 3 ); + } + + RawImage *RIOut = grabberThread->getImageGrabber()->getRawImage(); + unsigned int size = bytes * width * height; + + bool verticalFlip = captureFormats[captureFormatIndex].MF_MT_DEFAULT_STRIDE < 0; + + if(RIOut && size == RIOut->getSize()) + { + videoInput::processPixels(RIOut->getpPixels(), (unsigned char*)frame->imageData, width, + height, bytes, false, verticalFlip); + } + + return frame; +} + +HRESULT CvCaptureFile_MSMF::enumerateCaptureFormats(IMFMediaSource *pSource) +{ + ComPtr pPD = NULL; + ComPtr pSD = NULL; + ComPtr pHandler = NULL; + ComPtr pType = NULL; + HRESULT hr = pSource->CreatePresentationDescriptor(pPD.GetAddressOf()); + if (FAILED(hr)) + { + goto done; + } + + BOOL fSelected; + hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, pSD.GetAddressOf()); + if (FAILED(hr)) + { + goto done; + } + hr = pSD->GetMediaTypeHandler(pHandler.GetAddressOf()); + if (FAILED(hr)) + { + goto done; + } + DWORD cTypes = 0; + hr = pHandler->GetMediaTypeCount(&cTypes); + if (FAILED(hr)) + { + goto done; + } + for (DWORD i = 0; i < cTypes; i++) + { + hr = pHandler->GetMediaTypeByIndex(i, pType.GetAddressOf()); + if (FAILED(hr)) + { + goto done; + } + MediaType MT = FormatReader::Read(pType.Get()); + captureFormats.push_back(MT); + } + +done: + return hr; +} + +HRESULT CvCaptureFile_MSMF::getSourceDuration(IMFMediaSource *pSource, MFTIME *pDuration) +{ + *pDuration = 0; + + IMFPresentationDescriptor *pPD = NULL; + + HRESULT hr = pSource->CreatePresentationDescriptor(&pPD); + if (SUCCEEDED(hr)) + { + hr = pPD->GetUINT64(MF_PD_DURATION, (UINT64*)pDuration); + pPD->Release(); + } + return hr; +} + CvCapture* cvCreateCameraCapture_MSMF( int index ) { CvCaptureCAM_MSMF* capture = new CvCaptureCAM_MSMF; @@ -2807,4 +3144,392 @@ CvCapture* cvCreateCameraCapture_MSMF( int index ) delete capture; return 0; } + +CvCapture* cvCreateFileCapture_MSMF (const char* filename) +{ + CvCaptureFile_MSMF* capture = new CvCaptureFile_MSMF; + try + { + if( capture->open(filename) ) + return capture; + else + { + delete capture; + return NULL; + } + } + catch(...) + { + delete capture; + throw; + } +} + +// +// +// Media Foundation-based Video Writer +// +// + +class CvVideoWriter_MSMF : public CvVideoWriter +{ +public: + CvVideoWriter_MSMF(); + virtual ~CvVideoWriter_MSMF(); + virtual bool open(const char* filename, int fourcc, + double fps, CvSize frameSize, bool isColor); + virtual void close(); + virtual bool writeFrame(const IplImage* img); + +private: + UINT32 videoWidth; + UINT32 videoHeight; + double fps; + UINT32 bitRate; + UINT32 frameSize; + GUID encodingFormat; + GUID inputFormat; + + DWORD streamIndex; + ComPtr sinkWriter; + + bool initiated; + + LONGLONG rtStart; + UINT64 rtDuration; + + HRESULT InitializeSinkWriter(const char* filename); + static const GUID FourCC2GUID(int fourcc); + HRESULT WriteFrame(DWORD *videoFrameBuffer, const LONGLONG& rtStart, const LONGLONG& rtDuration); +}; + +CvVideoWriter_MSMF::CvVideoWriter_MSMF(): + initiated(false) +{ +} + +CvVideoWriter_MSMF::~CvVideoWriter_MSMF() +{ + close(); +} + +const GUID CvVideoWriter_MSMF::FourCC2GUID(int fourcc) +{ + switch(fourcc) + { + case CV_FOURCC_MACRO('d', 'v', '2', '5'): + return MFVideoFormat_DV25; break; + case CV_FOURCC_MACRO('d', 'v', '5', '0'): + return MFVideoFormat_DV50; break; + case CV_FOURCC_MACRO('d', 'v', 'c', ' '): + return MFVideoFormat_DVC; break; + case CV_FOURCC_MACRO('d', 'v', 'h', '1'): + return MFVideoFormat_DVH1; break; + case CV_FOURCC_MACRO('d', 'v', 'h', 'd'): + return MFVideoFormat_DVHD; break; + case CV_FOURCC_MACRO('d', 'v', 's', 'd'): + return MFVideoFormat_DVSD; break; + case CV_FOURCC_MACRO('d', 'v', 's', 'l'): + return MFVideoFormat_DVSL; break; + case CV_FOURCC_MACRO('H', '2', '6', '3'): + return MFVideoFormat_H263; break; + case CV_FOURCC_MACRO('H', '2', '6', '4'): + return MFVideoFormat_H264; break; + case CV_FOURCC_MACRO('M', '4', 'S', '2'): + return MFVideoFormat_M4S2; break; + case CV_FOURCC_MACRO('M', 'J', 'P', 'G'): + return MFVideoFormat_MJPG; break; + case CV_FOURCC_MACRO('M', 'P', '4', '3'): + return MFVideoFormat_MP43; break; + case CV_FOURCC_MACRO('M', 'P', '4', 'S'): + return MFVideoFormat_MP4S; break; + case CV_FOURCC_MACRO('M', 'P', '4', 'V'): + return MFVideoFormat_MP4V; break; + case CV_FOURCC_MACRO('M', 'P', 'G', '1'): + return MFVideoFormat_MPG1; break; + case CV_FOURCC_MACRO('M', 'S', 'S', '1'): + return MFVideoFormat_MSS1; break; + case CV_FOURCC_MACRO('M', 'S', 'S', '2'): + return MFVideoFormat_MSS2; break; + case CV_FOURCC_MACRO('W', 'M', 'V', '1'): + return MFVideoFormat_WMV1; break; + case CV_FOURCC_MACRO('W', 'M', 'V', '2'): + return MFVideoFormat_WMV2; break; + case CV_FOURCC_MACRO('W', 'M', 'V', '3'): + return MFVideoFormat_WMV3; break; + case CV_FOURCC_MACRO('W', 'V', 'C', '1'): + return MFVideoFormat_WVC1; break; + default: + return MFVideoFormat_H264; + } +} + +bool CvVideoWriter_MSMF::open( const char* filename, int fourcc, + double _fps, CvSize frameSize, bool /*isColor*/ ) +{ + videoWidth = frameSize.width; + videoHeight = frameSize.height; + fps = _fps; + bitRate = (UINT32)fps*videoWidth*videoHeight; // 1-bit per pixel + encodingFormat = FourCC2GUID(fourcc); + inputFormat = MFVideoFormat_RGB32; + + HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); + if (SUCCEEDED(hr)) + { + hr = MFStartup(MF_VERSION); + if (SUCCEEDED(hr)) + { + hr = InitializeSinkWriter(filename); + if (SUCCEEDED(hr)) + { + initiated = true; + rtStart = 0; + MFFrameRateToAverageTimePerFrame((UINT32)fps, 1, &rtDuration); + } + } + } + + return SUCCEEDED(hr); +} + +void CvVideoWriter_MSMF::close() +{ + if (!initiated) + { + return; + } + + initiated = false; + sinkWriter->Finalize(); + MFShutdown(); +} + +bool CvVideoWriter_MSMF::writeFrame(const IplImage* img) +{ + if (!img) + return false; + + int length = img->width * img->height * 4; + DWORD* target = new DWORD[length]; + + for (int rowIdx = 0; rowIdx < img->height; rowIdx++) + { + char* rowStart = img->imageData + rowIdx*img->widthStep; + for (int colIdx = 0; colIdx < img->width; colIdx++) + { + BYTE b = rowStart[colIdx * img->nChannels + 0]; + BYTE g = rowStart[colIdx * img->nChannels + 1]; + BYTE r = rowStart[colIdx * img->nChannels + 2]; + + target[rowIdx*img->width+colIdx] = (r << 16) + (g << 8) + b; + } + } + + // Send frame to the sink writer. + HRESULT hr = WriteFrame(target, rtStart, rtDuration); + if (FAILED(hr)) + { + delete[] target; + return false; + } + rtStart += rtDuration; + + delete[] target; + + return true; +} + +HRESULT CvVideoWriter_MSMF::InitializeSinkWriter(const char* filename) +{ + ComPtr spAttr; + ComPtr mediaTypeOut; + ComPtr mediaTypeIn; + ComPtr spByteStream; + + MFCreateAttributes(&spAttr, 10); + spAttr->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, true); + + wchar_t* unicodeFileName = new wchar_t[strlen(filename)+1]; + MultiByteToWideChar(CP_ACP, 0, filename, -1, unicodeFileName, strlen(filename)+1); + + HRESULT hr = MFCreateSinkWriterFromURL(unicodeFileName, NULL, spAttr.Get(), &sinkWriter); + + delete[] unicodeFileName; + + // Set the output media type. + if (SUCCEEDED(hr)) + { + hr = MFCreateMediaType(&mediaTypeOut); + } + if (SUCCEEDED(hr)) + { + hr = mediaTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); + } + if (SUCCEEDED(hr)) + { + hr = mediaTypeOut->SetGUID(MF_MT_SUBTYPE, encodingFormat); + } + if (SUCCEEDED(hr)) + { + hr = mediaTypeOut->SetUINT32(MF_MT_AVG_BITRATE, bitRate); + } + if (SUCCEEDED(hr)) + { + hr = mediaTypeOut->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive); + } + if (SUCCEEDED(hr)) + { + hr = MFSetAttributeSize(mediaTypeOut.Get(), MF_MT_FRAME_SIZE, videoWidth, videoHeight); + } + if (SUCCEEDED(hr)) + { + hr = MFSetAttributeRatio(mediaTypeOut.Get(), MF_MT_FRAME_RATE, (UINT32)fps, 1); + } + if (SUCCEEDED(hr)) + { + hr = MFSetAttributeRatio(mediaTypeOut.Get(), MF_MT_PIXEL_ASPECT_RATIO, 1, 1); + } + + if (SUCCEEDED(hr)) + { + hr = sinkWriter->AddStream(mediaTypeOut.Get(), &streamIndex); + } + + // Set the input media type. + if (SUCCEEDED(hr)) + { + hr = MFCreateMediaType(&mediaTypeIn); + } + if (SUCCEEDED(hr)) + { + hr = mediaTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); + } + if (SUCCEEDED(hr)) + { + hr = mediaTypeIn->SetGUID(MF_MT_SUBTYPE, inputFormat); + } + if (SUCCEEDED(hr)) + { + hr = mediaTypeIn->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive); + } + if (SUCCEEDED(hr)) + { + hr = MFSetAttributeSize(mediaTypeIn.Get(), MF_MT_FRAME_SIZE, videoWidth, videoHeight); + } + if (SUCCEEDED(hr)) + { + hr = MFSetAttributeRatio(mediaTypeIn.Get(), MF_MT_FRAME_RATE, (UINT32)fps, 1); + } + if (SUCCEEDED(hr)) + { + hr = MFSetAttributeRatio(mediaTypeIn.Get(), MF_MT_PIXEL_ASPECT_RATIO, 1, 1); + } + + if (SUCCEEDED(hr)) + { + hr = sinkWriter->SetInputMediaType(streamIndex, mediaTypeIn.Get(), NULL); + } + + // Tell the sink writer to start accepting data. + if (SUCCEEDED(hr)) + { + hr = sinkWriter->BeginWriting(); + } + + return hr; +} + +HRESULT CvVideoWriter_MSMF::WriteFrame(DWORD *videoFrameBuffer, const LONGLONG& Start, const LONGLONG& Duration) +{ + ComPtr sample; + ComPtr buffer; + + const LONG cbWidth = 4 * videoWidth; + const DWORD cbBuffer = cbWidth * videoHeight; + + BYTE *pData = NULL; + + // Create a new memory buffer. + HRESULT hr = MFCreateMemoryBuffer(cbBuffer, &buffer); + + // Lock the buffer and copy the video frame to the buffer. + if (SUCCEEDED(hr)) + { + hr = buffer->Lock(&pData, NULL, NULL); + } + + if (SUCCEEDED(hr)) + { +#if defined(_M_ARM) + hr = MFCopyImage( + pData, // Destination buffer. + -cbWidth, // Destination stride. + (BYTE*)videoFrameBuffer, // First row in source image. + cbWidth, // Source stride. + cbWidth, // Image width in bytes. + videoHeight // Image height in pixels. + ); +#else + hr = MFCopyImage( + pData, // Destination buffer. + cbWidth, // Destination stride. + (BYTE*)videoFrameBuffer, // First row in source image. + cbWidth, // Source stride. + cbWidth, // Image width in bytes. + videoHeight // Image height in pixels. + ); +#endif + } + + if (buffer) + { + buffer->Unlock(); + } + + // Set the data length of the buffer. + if (SUCCEEDED(hr)) + { + hr = buffer->SetCurrentLength(cbBuffer); + } + + // Create a media sample and add the buffer to the sample. + if (SUCCEEDED(hr)) + { + hr = MFCreateSample(&sample); + } + if (SUCCEEDED(hr)) + { + hr = sample->AddBuffer(buffer.Get()); + } + + // Set the time stamp and the duration. + if (SUCCEEDED(hr)) + { + hr = sample->SetSampleTime(Start); + } + if (SUCCEEDED(hr)) + { + hr = sample->SetSampleDuration(Duration); + } + + // Send the sample to the Sink Writer. + if (SUCCEEDED(hr)) + { + hr = sinkWriter->WriteSample(streamIndex, sample.Get()); + } + + return hr; +} + +CvVideoWriter* cvCreateVideoWriter_MSMF( const char* filename, int fourcc, + double fps, CvSize frameSize, int isColor ) +{ + CvVideoWriter_MSMF* writer = new CvVideoWriter_MSMF; + if( writer->open( filename, fourcc, fps, frameSize, isColor != 0 )) + return writer; + delete writer; + return NULL; +} + #endif \ No newline at end of file diff --git a/modules/highgui/src/cap_vfw.cpp b/modules/highgui/src/cap_vfw.cpp index d419a48912..d845953f8e 100644 --- a/modules/highgui/src/cap_vfw.cpp +++ b/modules/highgui/src/cap_vfw.cpp @@ -613,8 +613,10 @@ bool CvVideoWriter_VFW::open( const char* filename, int _fourcc, double _fps, Cv close(); return false; } + return true; } - return true; + else + return false; } diff --git a/modules/highgui/src/cap_ximea.cpp b/modules/highgui/src/cap_ximea.cpp index dbb8f58683..5acf2c09d1 100644 --- a/modules/highgui/src/cap_ximea.cpp +++ b/modules/highgui/src/cap_ximea.cpp @@ -20,25 +20,24 @@ public: virtual IplImage* retrieveFrame(int); virtual int getCaptureDomain() { return CV_CAP_XIAPI; } // Return the type of the capture object: CV_CAP_VFW, etc... -protected: +private: void init(); void errMsg(const char* msg, int errNum); + void resetCvImage(); + int getBpp(); IplImage* frame; HANDLE hmv; DWORD numDevices; - XI_IMG image; - int width; - int height; - int format; int timeout; + XI_IMG image; }; /**********************************************************************************/ CvCapture* cvCreateCameraCapture_XIMEA( int index ) { - CvCaptureCAM_XIMEA* capture = new CvCaptureCAM_XIMEA; + CvCaptureCAM_XIMEA* capture = new CvCaptureCAM_XIMEA; if( capture->open( index )) return capture; @@ -79,18 +78,19 @@ bool CvCaptureCAM_XIMEA::open( int wIndex ) // always use auto white ballance mvret = xiSetParamInt( hmv, XI_PRM_AUTO_WB, 1); if(mvret != XI_OK) goto error; + + // default image format RGB24 + mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, XI_RGB24); + if(mvret != XI_OK) goto error; + int width = 0; mvret = xiGetParamInt( hmv, XI_PRM_WIDTH, &width); if(mvret != XI_OK) goto error; + int height = 0; mvret = xiGetParamInt( hmv, XI_PRM_HEIGHT, &height); if(mvret != XI_OK) goto error; - // default image format RGB24 - format = XI_RGB24; - mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, format); - if(mvret != XI_OK) goto error; - // allocate frame buffer for RGB24 image frame = cvCreateImage(cvSize( width, height), IPL_DEPTH_8U, 3); @@ -103,10 +103,10 @@ bool CvCaptureCAM_XIMEA::open( int wIndex ) errMsg("StartAcquisition XI_DEVICE failed", mvret); goto error; } - return true; error: + errMsg("Open XI_DEVICE failed", mvret); xiCloseDevice(hmv); hmv = NULL; return false; @@ -116,18 +116,19 @@ error: void CvCaptureCAM_XIMEA::close() { - if(hmv) - { - xiStopAcquisition(hmv); - xiCloseDevice(hmv); - hmv = NULL; - } + if(frame) + cvReleaseImage(&frame); + + xiStopAcquisition(hmv); + xiCloseDevice(hmv); + hmv = NULL; } /**********************************************************************************/ bool CvCaptureCAM_XIMEA::grabFrame() { + memset(&image, 0, sizeof(XI_IMG)); image.size = sizeof(XI_IMG); int mvret = xiGetImage( hmv, timeout, &image); @@ -151,31 +152,18 @@ bool CvCaptureCAM_XIMEA::grabFrame() IplImage* CvCaptureCAM_XIMEA::retrieveFrame(int) { // update cvImage after format has changed - if( (int)image.width != width || (int)image.height != height || image.frm != (XI_IMG_FORMAT)format) - { - cvReleaseImage(&frame); - switch( image.frm) - { - case XI_MONO8 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 1); break; - case XI_MONO16 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_16U, 1); break; - case XI_RGB24 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 3); break; - case XI_RGB32 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 4); break; - default : - return frame; - } - // update global image format - format = image.frm; - width = image.width; - height = image.height; - } - + resetCvImage(); + // copy pixel data switch( image.frm) { - case XI_MONO8 : memcpy( frame->imageData, image.bp, image.width*image.height); break; - case XI_MONO16 : memcpy( frame->imageData, image.bp, image.width*image.height*sizeof(WORD)); break; - case XI_RGB24 : memcpy( frame->imageData, image.bp, image.width*image.height*3); break; - case XI_RGB32 : memcpy( frame->imageData, image.bp, image.width*image.height*sizeof(DWORD)); break; + case XI_MONO8 : + case XI_RAW8 : memcpy( frame->imageData, image.bp, image.width*image.height); break; + case XI_MONO16 : + case XI_RAW16 : memcpy( frame->imageData, image.bp, image.width*image.height*sizeof(WORD)); break; + case XI_RGB24 : + case XI_RGB_PLANAR : memcpy( frame->imageData, image.bp, image.width*image.height*3); break; + case XI_RGB32 : memcpy( frame->imageData, image.bp, image.width*image.height*4); break; default: break; } return frame; @@ -183,6 +171,35 @@ IplImage* CvCaptureCAM_XIMEA::retrieveFrame(int) /**********************************************************************************/ +void CvCaptureCAM_XIMEA::resetCvImage() +{ + int width = 0, height = 0, format = 0; + xiGetParamInt( hmv, XI_PRM_WIDTH, &width); + xiGetParamInt( hmv, XI_PRM_HEIGHT, &height); + xiGetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, &format); + + if( (int)image.width != width || (int)image.height != height || image.frm != (XI_IMG_FORMAT)format) + { + if(frame) cvReleaseImage(&frame); + frame = NULL; + + switch( image.frm) + { + case XI_MONO8 : + case XI_RAW8 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 1); break; + case XI_MONO16 : + case XI_RAW16 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_16U, 1); break; + case XI_RGB24 : + case XI_RGB_PLANAR : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 3); break; + case XI_RGB32 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 4); break; + default : + return; + } + } + cvZero(frame); +} +/**********************************************************************************/ + double CvCaptureCAM_XIMEA::getProperty( int property_id ) { if(hmv == NULL) @@ -238,20 +255,14 @@ bool CvCaptureCAM_XIMEA::setProperty( int property_id, double value ) switch(property_id) { // OCV parameters - case CV_CAP_PROP_FRAME_WIDTH : mvret = xiSetParamInt( hmv, XI_PRM_WIDTH, ival); - if(mvret == XI_OK) width = ival; - break; - case CV_CAP_PROP_FRAME_HEIGHT : mvret = xiSetParamInt( hmv, XI_PRM_HEIGHT, ival); - if(mvret == XI_OK) height = ival; - break; + case CV_CAP_PROP_FRAME_WIDTH : mvret = xiSetParamInt( hmv, XI_PRM_WIDTH, ival); break; + case CV_CAP_PROP_FRAME_HEIGHT : mvret = xiSetParamInt( hmv, XI_PRM_HEIGHT, ival); break; case CV_CAP_PROP_FPS : mvret = xiSetParamFloat( hmv, XI_PRM_FRAMERATE, fval); break; case CV_CAP_PROP_GAIN : mvret = xiSetParamFloat( hmv, XI_PRM_GAIN, fval); break; case CV_CAP_PROP_EXPOSURE : mvret = xiSetParamInt( hmv, XI_PRM_EXPOSURE, ival); break; // XIMEA camera properties case CV_CAP_PROP_XI_DOWNSAMPLING : mvret = xiSetParamInt( hmv, XI_PRM_DOWNSAMPLING, ival); break; - case CV_CAP_PROP_XI_DATA_FORMAT : mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, ival); - if(mvret == XI_OK) format = ival; - break; + case CV_CAP_PROP_XI_DATA_FORMAT : mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, ival); break; case CV_CAP_PROP_XI_OFFSET_X : mvret = xiSetParamInt( hmv, XI_PRM_OFFSET_X, ival); break; case CV_CAP_PROP_XI_OFFSET_Y : mvret = xiSetParamInt( hmv, XI_PRM_OFFSET_Y, ival); break; case CV_CAP_PROP_XI_TRG_SOURCE : mvret = xiSetParamInt( hmv, XI_PRM_TRG_SOURCE, ival); break; @@ -288,7 +299,7 @@ bool CvCaptureCAM_XIMEA::setProperty( int property_id, double value ) void CvCaptureCAM_XIMEA::errMsg(const char* msg, int errNum) { #if defined WIN32 || defined _WIN32 - char buf[512]; + char buf[512]=""; sprintf( buf, "%s : %d\n", msg, errNum); OutputDebugString(buf); #else @@ -296,4 +307,22 @@ void CvCaptureCAM_XIMEA::errMsg(const char* msg, int errNum) #endif } +/**********************************************************************************/ + +int CvCaptureCAM_XIMEA::getBpp() +{ + switch( image.frm) + { + case XI_MONO8 : + case XI_RAW8 : return 1; + case XI_MONO16 : + case XI_RAW16 : return 2; + case XI_RGB24 : + case XI_RGB_PLANAR : return 3; + case XI_RGB32 : return 4; + default : + return 0; + } +} + /**********************************************************************************/ \ No newline at end of file diff --git a/modules/highgui/src/precomp.hpp b/modules/highgui/src/precomp.hpp index aa327d6d7c..dcd4afdc01 100644 --- a/modules/highgui/src/precomp.hpp +++ b/modules/highgui/src/precomp.hpp @@ -119,6 +119,9 @@ CvVideoWriter* cvCreateVideoWriter_VFW( const char* filename, int fourcc, double fps, CvSize frameSize, int is_color ); CvCapture* cvCreateCameraCapture_DShow( int index ); CvCapture* cvCreateCameraCapture_MSMF( int index ); +CvCapture* cvCreateFileCapture_MSMF (const char* filename); +CvVideoWriter* cvCreateVideoWriter_MSMF( const char* filename, int fourcc, + double fps, CvSize frameSize, int is_color ); CvCapture* cvCreateCameraCapture_OpenNI( int index ); CvCapture* cvCreateFileCapture_OpenNI( const char* filename ); CvCapture* cvCreateCameraCapture_Android( int index ); diff --git a/modules/highgui/src/window.cpp b/modules/highgui/src/window.cpp index 6d29534643..1e47bf6ee3 100644 --- a/modules/highgui/src/window.cpp +++ b/modules/highgui/src/window.cpp @@ -256,12 +256,17 @@ namespace void cv::imshow( const string& winname, InputArray _img ) { + const Size size = _img.size(); #ifndef HAVE_OPENGL - Mat img = _img.getMat(); - CvMat c_img = img; - cvShowImage(winname.c_str(), &c_img); + CV_Assert(size.width>0 && size.height>0); + { + Mat img = _img.getMat(); + CvMat c_img = img; + cvShowImage(winname.c_str(), &c_img); + } #else const double useGl = getWindowProperty(winname, WND_PROP_OPENGL); + CV_Assert(size.width>0 && size.height>0); if (useGl <= 0) { @@ -275,7 +280,6 @@ void cv::imshow( const string& winname, InputArray _img ) if (autoSize > 0) { - Size size = _img.size(); resizeWindow(winname, size.width, size.height); } diff --git a/modules/highgui/src/window_QT.cpp b/modules/highgui/src/window_QT.cpp index 50f2b9e787..64d57ab269 100644 --- a/modules/highgui/src/window_QT.cpp +++ b/modules/highgui/src/window_QT.cpp @@ -38,6 +38,7 @@ //--------------------Google Code 2010 -- Yannick Verdie--------------------// +#include "precomp.hpp" #if defined(HAVE_QT) @@ -2473,35 +2474,33 @@ void DefaultViewPort::saveView() if (!fileName.isEmpty()) //save the picture { QString extension = fileName.right(3); - - // (no need anymore) create the image resized to receive the 'screenshot' - // image2Draw_qt_resized = QImage(viewport()->width(), viewport()->height(),QImage::Format_RGB888); - - QPainter saveimage(&image2Draw_qt_resized); - this->render(&saveimage); + + // Create a new pixmap to render the viewport into + QPixmap viewportPixmap(viewport()->size()); + viewport()->render(&viewportPixmap); // Save it.. if (QString::compare(extension, "png", Qt::CaseInsensitive) == 0) { - image2Draw_qt_resized.save(fileName, "PNG"); + viewportPixmap.save(fileName, "PNG"); return; } if (QString::compare(extension, "jpg", Qt::CaseInsensitive) == 0) { - image2Draw_qt_resized.save(fileName, "JPG"); + viewportPixmap.save(fileName, "JPG"); return; } if (QString::compare(extension, "bmp", Qt::CaseInsensitive) == 0) { - image2Draw_qt_resized.save(fileName, "BMP"); + viewportPixmap.save(fileName, "BMP"); return; } if (QString::compare(extension, "jpeg", Qt::CaseInsensitive) == 0) { - image2Draw_qt_resized.save(fileName, "JPEG"); + viewportPixmap.save(fileName, "JPEG"); return; } @@ -2651,17 +2650,16 @@ void DefaultViewPort::paintEvent(QPaintEvent* evnt) //Now disable matrixWorld for overlay display myPainter.setWorldMatrixEnabled(false); + //overlay pixel values if zoomed in far enough + if (param_matrixWorld.m11()*ratioX >= threshold_zoom_img_region && + param_matrixWorld.m11()*ratioY >= threshold_zoom_img_region) + { + drawImgRegion(&myPainter); + } + //in mode zoom/panning if (param_matrixWorld.m11() > 1) { - if (param_matrixWorld.m11() >= threshold_zoom_img_region) - { - if (centralWidget->param_flags == CV_WINDOW_NORMAL) - startDisplayInfo("WARNING: The values displayed are the resized image's values. If you want the original image's values, use CV_WINDOW_AUTOSIZE", 1000); - - drawImgRegion(&myPainter); - } - drawViewOverview(&myPainter); } @@ -2887,22 +2885,24 @@ void DefaultViewPort::drawStatusBar() //accept only CV_8UC1 and CV_8UC8 image for now void DefaultViewPort::drawImgRegion(QPainter *painter) { - if (nbChannelOriginImage!=CV_8UC1 && nbChannelOriginImage!=CV_8UC3) return; - qreal offsetX = param_matrixWorld.dx()/param_matrixWorld.m11(); + double pixel_width = param_matrixWorld.m11()*ratioX; + double pixel_height = param_matrixWorld.m11()*ratioY; + + qreal offsetX = param_matrixWorld.dx()/pixel_width; offsetX = offsetX - floor(offsetX); - qreal offsetY = param_matrixWorld.dy()/param_matrixWorld.m11(); + qreal offsetY = param_matrixWorld.dy()/pixel_height; offsetY = offsetY - floor(offsetY); QSize view = size(); QVarLengthArray linesX; - for (qreal _x = offsetX*param_matrixWorld.m11(); _x < view.width(); _x += param_matrixWorld.m11() ) + for (qreal _x = offsetX*pixel_width; _x < view.width(); _x += pixel_width ) linesX.append(QLineF(_x, 0, _x, view.height())); QVarLengthArray linesY; - for (qreal _y = offsetY*param_matrixWorld.m11(); _y < view.height(); _y += param_matrixWorld.m11() ) + for (qreal _y = offsetY*pixel_height; _y < view.height(); _y += pixel_height ) linesY.append(QLineF(0, _y, view.width(), _y)); @@ -2910,27 +2910,25 @@ void DefaultViewPort::drawImgRegion(QPainter *painter) int original_font_size = f.pointSize(); //change font size //f.setPointSize(4+(param_matrixWorld.m11()-threshold_zoom_img_region)/5); - f.setPixelSize(10+(param_matrixWorld.m11()-threshold_zoom_img_region)/5); + f.setPixelSize(10+(pixel_height-threshold_zoom_img_region)/5); painter->setFont(f); - QString val; - QRgb rgbValue; - QPointF point1;//sorry, I do not know how to name it - QPointF point2;//idem - for (int j=-1;j= 0 && point2.y() >= 0) - rgbValue = image2Draw_qt_resized.pixel(QPoint(point2.x(),point2.y())); + QRgb rgbValue; + if (image2Draw_qt.valid(point_in_image)) + rgbValue = image2Draw_qt.pixel(point_in_image); else rgbValue = qRgb(0,0,0); @@ -2943,29 +2941,29 @@ void DefaultViewPort::drawImgRegion(QPainter *painter) painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()/2), Qt::AlignCenter, val); */ + QString val; val = tr("%1").arg(qRed(rgbValue)); painter->setPen(QPen(Qt::red, 1)); - painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()/3), + painter->drawText(QRect(pos_in_view.x(),pos_in_view.y(),pixel_width,pixel_height/3), Qt::AlignCenter, val); val = tr("%1").arg(qGreen(rgbValue)); painter->setPen(QPen(Qt::green, 1)); - painter->drawText(QRect(point1.x(),point1.y()+param_matrixWorld.m11()/3,param_matrixWorld.m11(),param_matrixWorld.m11()/3), + painter->drawText(QRect(pos_in_view.x(),pos_in_view.y()+pixel_height/3,pixel_width,pixel_height/3), Qt::AlignCenter, val); val = tr("%1").arg(qBlue(rgbValue)); painter->setPen(QPen(Qt::blue, 1)); - painter->drawText(QRect(point1.x(),point1.y()+2*param_matrixWorld.m11()/3,param_matrixWorld.m11(),param_matrixWorld.m11()/3), + painter->drawText(QRect(pos_in_view.x(),pos_in_view.y()+2*pixel_height/3,pixel_width,pixel_height/3), Qt::AlignCenter, val); } if (nbChannelOriginImage==CV_8UC1) { - - val = tr("%1").arg(qRed(rgbValue)); - painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()), + QString val = tr("%1").arg(qRed(rgbValue)); + painter->drawText(QRect(pos_in_view.x(),pos_in_view.y(),pixel_width,pixel_height), Qt::AlignCenter, val); } } diff --git a/modules/highgui/src/window_QT.h b/modules/highgui/src/window_QT.h index 089997f514..a96a8c6e69 100644 --- a/modules/highgui/src/window_QT.h +++ b/modules/highgui/src/window_QT.h @@ -522,7 +522,6 @@ private: CvMat* image2Draw_mat; QImage image2Draw_qt; - QImage image2Draw_qt_resized; int nbChannelOriginImage; //for mouse callback diff --git a/modules/highgui/test/test_precomp.hpp b/modules/highgui/test/test_precomp.hpp index 0d0bd80228..be06c0643a 100644 --- a/modules/highgui/test/test_precomp.hpp +++ b/modules/highgui/test/test_precomp.hpp @@ -47,7 +47,8 @@ defined(HAVE_QUICKTIME) || \ defined(HAVE_AVFOUNDATION) || \ /*defined(HAVE_OPENNI) || too specialized */ \ - defined(HAVE_FFMPEG) + defined(HAVE_FFMPEG) || \ + defined(HAVE_MSMF) # define BUILD_WITH_VIDEO_INPUT_SUPPORT 1 #else # define BUILD_WITH_VIDEO_INPUT_SUPPORT 0 @@ -57,7 +58,8 @@ defined(HAVE_GSTREAMER) || \ defined(HAVE_QUICKTIME) || \ defined(HAVE_AVFOUNDATION) || \ - defined(HAVE_FFMPEG) + defined(HAVE_FFMPEG) || \ + defined(HAVE_MSMF) # define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 1 #else # define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 0 diff --git a/modules/highgui/test/test_video_io.cpp b/modules/highgui/test/test_video_io.cpp index b0c2e53ba5..5d4de7ecb0 100644 --- a/modules/highgui/test/test_video_io.cpp +++ b/modules/highgui/test/test_video_io.cpp @@ -54,6 +54,35 @@ string fourccToString(int fourcc) return format("%c%c%c%c", fourcc & 255, (fourcc >> 8) & 255, (fourcc >> 16) & 255, (fourcc >> 24) & 255); } +#ifdef HAVE_MSMF +const VideoFormat g_specific_fmt_list[] = +{ + /*VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', '2', '5')), + VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', '5', '0')), + VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', 'c', ' ')), + VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', 'h', '1')), + VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', 'h', 'd')), + VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', 's', 'd')), + VideoFormat("wmv", CV_FOURCC_MACRO('d', 'v', 's', 'l')), + VideoFormat("wmv", CV_FOURCC_MACRO('H', '2', '6', '3')), + VideoFormat("wmv", CV_FOURCC_MACRO('M', '4', 'S', '2')), + VideoFormat("avi", CV_FOURCC_MACRO('M', 'J', 'P', 'G')), + VideoFormat("mp4", CV_FOURCC_MACRO('M', 'P', '4', 'S')), + VideoFormat("mp4", CV_FOURCC_MACRO('M', 'P', '4', 'V')), + VideoFormat("wmv", CV_FOURCC_MACRO('M', 'P', '4', '3')), + VideoFormat("wmv", CV_FOURCC_MACRO('M', 'P', 'G', '1')), + VideoFormat("wmv", CV_FOURCC_MACRO('M', 'S', 'S', '1')), + VideoFormat("wmv", CV_FOURCC_MACRO('M', 'S', 'S', '2')),*/ +#if !defined(_M_ARM) + VideoFormat("wmv", CV_FOURCC_MACRO('W', 'M', 'V', '1')), + VideoFormat("wmv", CV_FOURCC_MACRO('W', 'M', 'V', '2')), +#endif + VideoFormat("wmv", CV_FOURCC_MACRO('W', 'M', 'V', '3')), + VideoFormat("avi", CV_FOURCC_MACRO('H', '2', '6', '4')), + //VideoFormat("wmv", CV_FOURCC_MACRO('W', 'V', 'C', '1')), + VideoFormat() +}; +#else const VideoFormat g_specific_fmt_list[] = { VideoFormat("avi", CV_FOURCC('X', 'V', 'I', 'D')), @@ -63,17 +92,17 @@ const VideoFormat g_specific_fmt_list[] = VideoFormat("mkv", CV_FOURCC('X', 'V', 'I', 'D')), VideoFormat("mkv", CV_FOURCC('M', 'P', 'E', 'G')), VideoFormat("mkv", CV_FOURCC('M', 'J', 'P', 'G')), - VideoFormat("mov", CV_FOURCC('m', 'p', '4', 'v')), VideoFormat() }; +#endif } class CV_HighGuiTest : public cvtest::BaseTest { protected: - void ImageTest(const string& dir); + void ImageTest (const string& dir); void VideoTest (const string& dir, const cvtest::VideoFormat& fmt); void SpecificImageTest (const string& dir); void SpecificVideoTest (const string& dir, const cvtest::VideoFormat& fmt); @@ -242,19 +271,19 @@ void CV_HighGuiTest::VideoTest(const string& dir, const cvtest::VideoFormat& fmt for(;;) { - IplImage * img = cvQueryFrame( cap ); + IplImage* img = cvQueryFrame( cap ); if (!img) break; frames.push_back(Mat(img).clone()); - if (writer == 0) + if (writer == NULL) { writer = cvCreateVideoWriter(tmp_name.c_str(), fmt.fourcc, 24, cvGetSize(img)); - if (writer == 0) + if (writer == NULL) { - ts->printf(ts->LOG, "can't create writer (with fourcc : %d)\n", + ts->printf(ts->LOG, "can't create writer (with fourcc : %s)\n", cvtest::fourccToString(fmt.fourcc).c_str()); cvReleaseCapture( &cap ); ts->set_failed_test_info(ts->FAIL_MISMATCH); @@ -290,15 +319,22 @@ void CV_HighGuiTest::VideoTest(const string& dir, const cvtest::VideoFormat& fmt double psnr = PSNR(img1, img); if (psnr < thresDbell) { - printf("Too low psnr = %gdb\n", psnr); - // imwrite("img.png", img); - // imwrite("img1.png", img1); + ts->printf(ts->LOG, "Too low frame %d psnr = %gdb\n", i, psnr); ts->set_failed_test_info(ts->FAIL_MISMATCH); + + //imwrite("original.png", img); + //imwrite("after_test.png", img1); + //Mat diff; + //absdiff(img, img1, diff); + //imwrite("diff.png", diff); + break; } } + printf("Before saved release for %s\n", tmp_name.c_str()); cvReleaseCapture( &saved ); + printf("After release\n"); ts->printf(ts->LOG, "end test function : ImagesVideo \n"); } diff --git a/modules/imgproc/doc/miscellaneous_transformations.rst b/modules/imgproc/doc/miscellaneous_transformations.rst index 4ebf6d5ee5..9fd8df517a 100644 --- a/modules/imgproc/doc/miscellaneous_transformations.rst +++ b/modules/imgproc/doc/miscellaneous_transformations.rst @@ -116,6 +116,7 @@ If you use ``cvtColor`` with 8-bit images, the conversion will have some informa The function can do the following transformations: * + RGB :math:`\leftrightarrow` GRAY ( ``CV_BGR2GRAY, CV_RGB2GRAY, CV_GRAY2BGR, CV_GRAY2RGB`` ) Transformations within RGB space like adding/removing the alpha channel, reversing the channel order, conversion to/from 16-bit RGB color (R5:G6:B5 or R5:G5:B5), as well as conversion to/from grayscale using: .. math:: @@ -765,7 +766,7 @@ Runs the GrabCut algorithm. * **GC_PR_BGD** defines a possible background pixel. - * **GC_PR_BGD** defines a possible foreground pixel. + * **GC_PR_FGD** defines a possible foreground pixel. :param rect: ROI containing a segmented object. The pixels outside of the ROI are marked as "obvious background". The parameter is only used when ``mode==GC_INIT_WITH_RECT`` . diff --git a/modules/imgproc/perf/perf_cvt_color.cpp b/modules/imgproc/perf/perf_cvt_color.cpp index 9b87afe99c..601beb8996 100644 --- a/modules/imgproc/perf/perf_cvt_color.cpp +++ b/modules/imgproc/perf/perf_cvt_color.cpp @@ -258,7 +258,8 @@ PERF_TEST_P(Size_CvtMode, cvtColor8u, declare.time(100); declare.in(src, WARMUP_RNG).out(dst); - TEST_CYCLE() cvtColor(src, dst, mode, ch.dcn); + int runs = sz.width <= 320 ? 70 : 5; + TEST_CYCLE_MULTIRUN(runs) cvtColor(src, dst, mode, ch.dcn); SANITY_CHECK(dst, 1); } @@ -334,7 +335,8 @@ PERF_TEST_P(Size_CvtMode3, cvtColorRGB2YUV420p, declare.time(100); declare.in(src, WARMUP_RNG).out(dst); - TEST_CYCLE() cvtColor(src, dst, mode, ch.dcn); + int runs = (sz.width <= 640) ? 10 : 1; + TEST_CYCLE_MULTIRUN(runs) cvtColor(src, dst, mode, ch.dcn); SANITY_CHECK(dst, 1); } diff --git a/modules/imgproc/perf/perf_morph.cpp b/modules/imgproc/perf/perf_morph.cpp index 9aadeaff52..d3dbba38fb 100644 --- a/modules/imgproc/perf/perf_morph.cpp +++ b/modules/imgproc/perf/perf_morph.cpp @@ -19,7 +19,8 @@ PERF_TEST_P(Size_MatType, erode, TYPICAL_MATS_MORPH) declare.in(src, WARMUP_RNG).out(dst); - TEST_CYCLE() erode(src, dst, noArray()); + int runs = (sz.width <= 320) ? 15 : 1; + TEST_CYCLE_MULTIRUN(runs) erode(src, dst, noArray()); SANITY_CHECK(dst); } diff --git a/modules/imgproc/perf/perf_remap.cpp b/modules/imgproc/perf/perf_remap.cpp index 334c5ff960..92c6007a2b 100644 --- a/modules/imgproc/perf/perf_remap.cpp +++ b/modules/imgproc/perf/perf_remap.cpp @@ -63,7 +63,8 @@ PERF_TEST_P( TestRemap, Remap, declare.in(src, WARMUP_RNG).out(dst).time(20); - TEST_CYCLE() remap(src, dst, map1, map2, inter_type); + int runs = (sz.width <= 640) ? 3 : 1; + TEST_CYCLE_MULTIRUN(runs) remap(src, dst, map1, map2, inter_type); SANITY_CHECK(dst); } diff --git a/modules/imgproc/perf/perf_resize.cpp b/modules/imgproc/perf/perf_resize.cpp index 7aef05ee52..ea959a627a 100644 --- a/modules/imgproc/perf/perf_resize.cpp +++ b/modules/imgproc/perf/perf_resize.cpp @@ -85,7 +85,8 @@ PERF_TEST_P(MatInfo_Size_Scale, ResizeAreaFast, declare.in(src, WARMUP_RNG).out(dst); - TEST_CYCLE() resize(src, dst, dst.size(), 0, 0, INTER_AREA); + int runs = 15; + TEST_CYCLE_MULTIRUN(runs) resize(src, dst, dst.size(), 0, 0, INTER_AREA); //difference equal to 1 is allowed because of different possible rounding modes: round-to-nearest vs bankers' rounding SANITY_CHECK(dst, 1); diff --git a/modules/imgproc/perf/perf_threshold.cpp b/modules/imgproc/perf/perf_threshold.cpp index 61255e2283..9ccafd6b54 100644 --- a/modules/imgproc/perf/perf_threshold.cpp +++ b/modules/imgproc/perf/perf_threshold.cpp @@ -32,7 +32,7 @@ PERF_TEST_P(Size_MatType_ThreshType, threshold, declare.in(src, WARMUP_RNG).out(dst); - int runs = (sz.width <= 640) ? 8 : 1; + int runs = (sz.width <= 640) ? 40 : 1; TEST_CYCLE_MULTIRUN(runs) threshold(src, dst, thresh, maxval, threshType); SANITY_CHECK(dst); @@ -51,7 +51,8 @@ PERF_TEST_P(Size_Only, threshold_otsu, testing::Values(TYPICAL_MAT_SIZES)) declare.in(src, WARMUP_RNG).out(dst); - TEST_CYCLE() threshold(src, dst, 0, maxval, THRESH_BINARY|THRESH_OTSU); + int runs = 15; + TEST_CYCLE_MULTIRUN(runs) threshold(src, dst, 0, maxval, THRESH_BINARY|THRESH_OTSU); SANITY_CHECK(dst); } diff --git a/modules/imgproc/src/clahe.cpp b/modules/imgproc/src/clahe.cpp new file mode 100644 index 0000000000..4ce479713e --- /dev/null +++ b/modules/imgproc/src/clahe.cpp @@ -0,0 +1,334 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, NVIDIA Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the copyright holders or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +// ---------------------------------------------------------------------- +// CLAHE + +namespace +{ + class CLAHE_CalcLut_Body : public cv::ParallelLoopBody + { + public: + CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) : + src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale) + { + } + + void operator ()(const cv::Range& range) const; + + private: + cv::Mat src_; + mutable cv::Mat lut_; + + cv::Size tileSize_; + int tilesX_; + int tilesY_; + int clipLimit_; + float lutScale_; + }; + + void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const + { + const int histSize = 256; + + uchar* tileLut = lut_.ptr(range.start); + const size_t lut_step = lut_.step; + + for (int k = range.start; k < range.end; ++k, tileLut += lut_step) + { + const int ty = k / tilesX_; + const int tx = k % tilesX_; + + // retrieve tile submatrix + + cv::Rect tileROI; + tileROI.x = tx * tileSize_.width; + tileROI.y = ty * tileSize_.height; + tileROI.width = tileSize_.width; + tileROI.height = tileSize_.height; + + const cv::Mat tile = src_(tileROI); + + // calc histogram + + int tileHist[histSize] = {0, }; + + int height = tileROI.height; + const size_t sstep = tile.step; + for (const uchar* ptr = tile.ptr(0); height--; ptr += sstep) + { + int x = 0; + for (; x <= tileROI.width - 4; x += 4) + { + int t0 = ptr[x], t1 = ptr[x+1]; + tileHist[t0]++; tileHist[t1]++; + t0 = ptr[x+2]; t1 = ptr[x+3]; + tileHist[t0]++; tileHist[t1]++; + } + + for (; x < tileROI.width; ++x) + tileHist[ptr[x]]++; + } + + // clip histogram + + if (clipLimit_ > 0) + { + // how many pixels were clipped + int clipped = 0; + for (int i = 0; i < histSize; ++i) + { + if (tileHist[i] > clipLimit_) + { + clipped += tileHist[i] - clipLimit_; + tileHist[i] = clipLimit_; + } + } + + // redistribute clipped pixels + int redistBatch = clipped / histSize; + int residual = clipped - redistBatch * histSize; + + for (int i = 0; i < histSize; ++i) + tileHist[i] += redistBatch; + + for (int i = 0; i < residual; ++i) + tileHist[i]++; + } + + // calc Lut + + int sum = 0; + for (int i = 0; i < histSize; ++i) + { + sum += tileHist[i]; + tileLut[i] = cv::saturate_cast(sum * lutScale_); + } + } + } + + class CLAHE_Interpolation_Body : public cv::ParallelLoopBody + { + public: + CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) : + src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY) + { + } + + void operator ()(const cv::Range& range) const; + + private: + cv::Mat src_; + mutable cv::Mat dst_; + cv::Mat lut_; + + cv::Size tileSize_; + int tilesX_; + int tilesY_; + }; + + void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const + { + const size_t lut_step = lut_.step; + + for (int y = range.start; y < range.end; ++y) + { + const uchar* srcRow = src_.ptr(y); + uchar* dstRow = dst_.ptr(y); + + const float tyf = (static_cast(y) / tileSize_.height) - 0.5f; + + int ty1 = cvFloor(tyf); + int ty2 = ty1 + 1; + + const float ya = tyf - ty1; + + ty1 = std::max(ty1, 0); + ty2 = std::min(ty2, tilesY_ - 1); + + const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_); + const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_); + + for (int x = 0; x < src_.cols; ++x) + { + const float txf = (static_cast(x) / tileSize_.width) - 0.5f; + + int tx1 = cvFloor(txf); + int tx2 = tx1 + 1; + + const float xa = txf - tx1; + + tx1 = std::max(tx1, 0); + tx2 = std::min(tx2, tilesX_ - 1); + + const int srcVal = srcRow[x]; + + const size_t ind1 = tx1 * lut_step + srcVal; + const size_t ind2 = tx2 * lut_step + srcVal; + + float res = 0; + + res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya)); + res += lutPlane1[ind2] * ((xa) * (1.0f - ya)); + res += lutPlane2[ind1] * ((1.0f - xa) * (ya)); + res += lutPlane2[ind2] * ((xa) * (ya)); + + dstRow[x] = cv::saturate_cast(res); + } + } + } + + class CLAHE_Impl : public cv::CLAHE + { + public: + CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8); + + cv::AlgorithmInfo* info() const; + + void apply(cv::InputArray src, cv::OutputArray dst); + + void setClipLimit(double clipLimit); + double getClipLimit() const; + + void setTilesGridSize(cv::Size tileGridSize); + cv::Size getTilesGridSize() const; + + void collectGarbage(); + + private: + double clipLimit_; + int tilesX_; + int tilesY_; + + cv::Mat srcExt_; + cv::Mat lut_; + }; + + CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) : + clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY) + { + } + + CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE", + obj.info()->addParam(obj, "clipLimit", obj.clipLimit_); + obj.info()->addParam(obj, "tilesX", obj.tilesX_); + obj.info()->addParam(obj, "tilesY", obj.tilesY_)) + + void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst) + { + cv::Mat src = _src.getMat(); + + CV_Assert( src.type() == CV_8UC1 ); + + _dst.create( src.size(), src.type() ); + cv::Mat dst = _dst.getMat(); + + const int histSize = 256; + + lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1); + + cv::Size tileSize; + cv::Mat srcForLut; + + if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0) + { + tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_); + srcForLut = src; + } + else + { + cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101); + + tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_); + srcForLut = srcExt_; + } + + const int tileSizeTotal = tileSize.area(); + const float lutScale = static_cast(histSize - 1) / tileSizeTotal; + + int clipLimit = 0; + if (clipLimit_ > 0.0) + { + clipLimit = static_cast(clipLimit_ * tileSizeTotal / histSize); + clipLimit = std::max(clipLimit, 1); + } + + CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale); + cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody); + + CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_); + cv::parallel_for_(cv::Range(0, src.rows), interpolationBody); + } + + void CLAHE_Impl::setClipLimit(double clipLimit) + { + clipLimit_ = clipLimit; + } + + double CLAHE_Impl::getClipLimit() const + { + return clipLimit_; + } + + void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize) + { + tilesX_ = tileGridSize.width; + tilesY_ = tileGridSize.height; + } + + cv::Size CLAHE_Impl::getTilesGridSize() const + { + return cv::Size(tilesX_, tilesY_); + } + + void CLAHE_Impl::collectGarbage() + { + srcExt_.release(); + lut_.release(); + } +} + +cv::Ptr cv::createCLAHE(double clipLimit, cv::Size tileGridSize) +{ + return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height); +} diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 3799d435e3..41ca2db9c0 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -2755,7 +2755,7 @@ const int ITUR_BT_601_CGV = -385875; const int ITUR_BT_601_CBV = -74448; template -struct YUV420sp2RGB888Invoker +struct YUV420sp2RGB888Invoker : ParallelLoopBody { Mat* dst; const uchar* my1, *muv; @@ -2764,10 +2764,10 @@ struct YUV420sp2RGB888Invoker YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv) : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin() * 2; - int rangeEnd = range.end() * 2; + int rangeBegin = range.start * 2; + int rangeEnd = range.end * 2; //R = 1.164(Y - 16) + 1.596(V - 128) //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) @@ -2824,7 +2824,7 @@ struct YUV420sp2RGB888Invoker }; template -struct YUV420sp2RGBA8888Invoker +struct YUV420sp2RGBA8888Invoker : ParallelLoopBody { Mat* dst; const uchar* my1, *muv; @@ -2833,10 +2833,10 @@ struct YUV420sp2RGBA8888Invoker YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv) : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin() * 2; - int rangeEnd = range.end() * 2; + int rangeBegin = range.start * 2; + int rangeEnd = range.end * 2; //R = 1.164(Y - 16) + 1.596(V - 128) //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) @@ -2897,7 +2897,7 @@ struct YUV420sp2RGBA8888Invoker }; template -struct YUV420p2RGB888Invoker +struct YUV420p2RGB888Invoker : ParallelLoopBody { Mat* dst; const uchar* my1, *mu, *mv; @@ -2907,19 +2907,19 @@ struct YUV420p2RGB888Invoker YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx) : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - const int rangeBegin = range.begin() * 2; - const int rangeEnd = range.end() * 2; + const int rangeBegin = range.start * 2; + const int rangeEnd = range.end * 2; size_t uvsteps[2] = {width/2, stride - width/2}; int usIdx = ustepIdx, vsIdx = vstepIdx; const uchar* y1 = my1 + rangeBegin * stride; - const uchar* u1 = mu + (range.begin() / 2) * stride; - const uchar* v1 = mv + (range.begin() / 2) * stride; + const uchar* u1 = mu + (range.start / 2) * stride; + const uchar* v1 = mv + (range.start / 2) * stride; - if(range.begin() % 2 == 1) + if(range.start % 2 == 1) { u1 += uvsteps[(usIdx++) & 1]; v1 += uvsteps[(vsIdx++) & 1]; @@ -2965,7 +2965,7 @@ struct YUV420p2RGB888Invoker }; template -struct YUV420p2RGBA8888Invoker +struct YUV420p2RGBA8888Invoker : ParallelLoopBody { Mat* dst; const uchar* my1, *mu, *mv; @@ -2975,19 +2975,19 @@ struct YUV420p2RGBA8888Invoker YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx) : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin() * 2; - int rangeEnd = range.end() * 2; + int rangeBegin = range.start * 2; + int rangeEnd = range.end * 2; size_t uvsteps[2] = {width/2, stride - width/2}; int usIdx = ustepIdx, vsIdx = vstepIdx; const uchar* y1 = my1 + rangeBegin * stride; - const uchar* u1 = mu + (range.begin() / 2) * stride; - const uchar* v1 = mv + (range.begin() / 2) * stride; + const uchar* u1 = mu + (range.start / 2) * stride; + const uchar* v1 = mv + (range.start / 2) * stride; - if(range.begin() % 2 == 1) + if(range.start % 2 == 1) { u1 += uvsteps[(usIdx++) & 1]; v1 += uvsteps[(vsIdx++) & 1]; @@ -3042,48 +3042,40 @@ template inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv) { YUV420sp2RGB888Invoker converter(&_dst, _stride, _y1, _uv); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows/2), converter); + parallel_for_(Range(0, _dst.rows/2), converter); else -#endif - converter(BlockedRange(0, _dst.rows/2)); + converter(Range(0, _dst.rows/2)); } template inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv) { YUV420sp2RGBA8888Invoker converter(&_dst, _stride, _y1, _uv); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows/2), converter); + parallel_for_(Range(0, _dst.rows/2), converter); else -#endif - converter(BlockedRange(0, _dst.rows/2)); + converter(Range(0, _dst.rows/2)); } template inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx) { YUV420p2RGB888Invoker converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows/2), converter); + parallel_for_(Range(0, _dst.rows/2), converter); else -#endif - converter(BlockedRange(0, _dst.rows/2)); + converter(Range(0, _dst.rows/2)); } template inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx) { YUV420p2RGBA8888Invoker converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows/2), converter); + parallel_for_(Range(0, _dst.rows/2), converter); else -#endif - converter(BlockedRange(0, _dst.rows/2)); + converter(Range(0, _dst.rows/2)); } ///////////////////////////////////// RGB -> YUV420p ///////////////////////////////////// @@ -3167,7 +3159,7 @@ static void cvtRGBtoYUV420p(const Mat& src, Mat& dst) ///////////////////////////////////// YUV422 -> RGB ///////////////////////////////////// template -struct YUV422toRGB888Invoker +struct YUV422toRGB888Invoker : ParallelLoopBody { Mat* dst; const uchar* src; @@ -3176,10 +3168,10 @@ struct YUV422toRGB888Invoker YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv) : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin(); - int rangeEnd = range.end(); + int rangeBegin = range.start; + int rangeEnd = range.end; const int uidx = 1 - yIdx + uIdx * 2; const int vidx = (2 + uidx) % 4; @@ -3213,7 +3205,7 @@ struct YUV422toRGB888Invoker }; template -struct YUV422toRGBA8888Invoker +struct YUV422toRGBA8888Invoker : ParallelLoopBody { Mat* dst; const uchar* src; @@ -3222,10 +3214,10 @@ struct YUV422toRGBA8888Invoker YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv) : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {} - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int rangeBegin = range.begin(); - int rangeEnd = range.end(); + int rangeBegin = range.start; + int rangeEnd = range.end; const int uidx = 1 - yIdx + uIdx * 2; const int vidx = (2 + uidx) % 4; @@ -3266,24 +3258,20 @@ template inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv) { YUV422toRGB888Invoker converter(&_dst, _stride, _yuv); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows), converter); + parallel_for_(Range(0, _dst.rows), converter); else -#endif - converter(BlockedRange(0, _dst.rows)); + converter(Range(0, _dst.rows)); } template inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv) { YUV422toRGBA8888Invoker converter(&_dst, _stride, _yuv); -#ifdef HAVE_TBB if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION) - parallel_for(BlockedRange(0, _dst.rows), converter); + parallel_for_(Range(0, _dst.rows), converter); else -#endif - converter(BlockedRange(0, _dst.rows)); + converter(Range(0, _dst.rows)); } /////////////////////////// RGBA <-> mRGBA (alpha premultiplied) ////////////// diff --git a/modules/imgproc/src/distransform.cpp b/modules/imgproc/src/distransform.cpp index 89d3a550f4..d3e6f90242 100644 --- a/modules/imgproc/src/distransform.cpp +++ b/modules/imgproc/src/distransform.cpp @@ -443,7 +443,7 @@ icvGetDistanceTransformMask( int maskType, float *metrics ) namespace cv { -struct DTColumnInvoker +struct DTColumnInvoker : ParallelLoopBody { DTColumnInvoker( const CvMat* _src, CvMat* _dst, const int* _sat_tab, const float* _sqr_tab) { @@ -453,9 +453,9 @@ struct DTColumnInvoker sqr_tab = _sqr_tab; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { - int i, i1 = range.begin(), i2 = range.end(); + int i, i1 = range.start, i2 = range.end; int m = src->rows; size_t sstep = src->step, dstep = dst->step/sizeof(float); AutoBuffer _d(m); @@ -490,7 +490,7 @@ struct DTColumnInvoker }; -struct DTRowInvoker +struct DTRowInvoker : ParallelLoopBody { DTRowInvoker( CvMat* _dst, const float* _sqr_tab, const float* _inv_tab ) { @@ -499,10 +499,10 @@ struct DTRowInvoker inv_tab = _inv_tab; } - void operator()( const BlockedRange& range ) const + void operator()( const Range& range ) const { const float inf = 1e15f; - int i, i1 = range.begin(), i2 = range.end(); + int i, i1 = range.start, i2 = range.end; int n = dst->cols; AutoBuffer _buf((n+2)*2*sizeof(float) + (n+2)*sizeof(int)); float* f = (float*)(uchar*)_buf; @@ -586,7 +586,7 @@ icvTrueDistTrans( const CvMat* src, CvMat* dst ) for( ; i <= m*3; i++ ) sat_tab[i] = i - shift; - cv::parallel_for(cv::BlockedRange(0, n), cv::DTColumnInvoker(src, dst, sat_tab, sqr_tab)); + cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(src, dst, sat_tab, sqr_tab)); // stage 2: compute modified distance transform for each row float* inv_tab = sqr_tab + n; @@ -598,7 +598,7 @@ icvTrueDistTrans( const CvMat* src, CvMat* dst ) sqr_tab[i] = (float)(i*i); } - cv::parallel_for(cv::BlockedRange(0, m), cv::DTRowInvoker(dst, sqr_tab, inv_tab)); + cv::parallel_for_(cv::Range(0, m), cv::DTRowInvoker(dst, sqr_tab, inv_tab)); } diff --git a/modules/imgproc/src/histogram.cpp b/modules/imgproc/src/histogram.cpp index 22dd9beb1f..bfcdee515f 100644 --- a/modules/imgproc/src/histogram.cpp +++ b/modules/imgproc/src/histogram.cpp @@ -2986,29 +2986,23 @@ cvCalcProbDensity( const CvHistogram* hist, const CvHistogram* hist_mask, } } -class EqualizeHistCalcHist_Invoker +class EqualizeHistCalcHist_Invoker : public cv::ParallelLoopBody { public: enum {HIST_SZ = 256}; -#ifdef HAVE_TBB - typedef tbb::mutex* MutextPtr; -#else - typedef void* MutextPtr; -#endif - - EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, MutextPtr histogramLock) + EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, cv::Mutex* histogramLock) : src_(src), globalHistogram_(histogram), histogramLock_(histogramLock) { } - void operator()( const cv::BlockedRange& rowRange ) const + void operator()( const cv::Range& rowRange ) const { int localHistogram[HIST_SZ] = {0, }; const size_t sstep = src_.step; int width = src_.cols; - int height = rowRange.end() - rowRange.begin(); + int height = rowRange.end - rowRange.start; if (src_.isContinuous()) { @@ -3016,7 +3010,7 @@ public: height = 1; } - for (const uchar* ptr = src_.ptr(rowRange.begin()); height--; ptr += sstep) + for (const uchar* ptr = src_.ptr(rowRange.start); height--; ptr += sstep) { int x = 0; for (; x <= width - 4; x += 4) @@ -3031,9 +3025,7 @@ public: localHistogram[ptr[x]]++; } -#ifdef HAVE_TBB - tbb::mutex::scoped_lock lock(*histogramLock_); -#endif + cv::AutoLock lock(*histogramLock_); for( int i = 0; i < HIST_SZ; i++ ) globalHistogram_[i] += localHistogram[i]; @@ -3041,12 +3033,7 @@ public: static bool isWorthParallel( const cv::Mat& src ) { -#ifdef HAVE_TBB return ( src.total() >= 640*480 ); -#else - (void)src; - return false; -#endif } private: @@ -3054,10 +3041,10 @@ private: cv::Mat& src_; int* globalHistogram_; - MutextPtr histogramLock_; + cv::Mutex* histogramLock_; }; -class EqualizeHistLut_Invoker +class EqualizeHistLut_Invoker : public cv::ParallelLoopBody { public: EqualizeHistLut_Invoker( cv::Mat& src, cv::Mat& dst, int* lut ) @@ -3066,13 +3053,13 @@ public: lut_(lut) { } - void operator()( const cv::BlockedRange& rowRange ) const + void operator()( const cv::Range& rowRange ) const { const size_t sstep = src_.step; const size_t dstep = dst_.step; int width = src_.cols; - int height = rowRange.end() - rowRange.begin(); + int height = rowRange.end - rowRange.start; int* lut = lut_; if (src_.isContinuous() && dst_.isContinuous()) @@ -3081,8 +3068,8 @@ public: height = 1; } - const uchar* sptr = src_.ptr(rowRange.begin()); - uchar* dptr = dst_.ptr(rowRange.begin()); + const uchar* sptr = src_.ptr(rowRange.start); + uchar* dptr = dst_.ptr(rowRange.start); for (; height--; sptr += sstep, dptr += dstep) { @@ -3111,12 +3098,7 @@ public: static bool isWorthParallel( const cv::Mat& src ) { -#ifdef HAVE_TBB return ( src.total() >= 640*480 ); -#else - (void)src; - return false; -#endif } private: @@ -3143,23 +3125,18 @@ void cv::equalizeHist( InputArray _src, OutputArray _dst ) if(src.empty()) return; -#ifdef HAVE_TBB - tbb::mutex histogramLockInstance; - EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = &histogramLockInstance; -#else - EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = 0; -#endif + Mutex histogramLockInstance; const int hist_sz = EqualizeHistCalcHist_Invoker::HIST_SZ; int hist[hist_sz] = {0,}; int lut[hist_sz]; - EqualizeHistCalcHist_Invoker calcBody(src, hist, histogramLock); + EqualizeHistCalcHist_Invoker calcBody(src, hist, &histogramLockInstance); EqualizeHistLut_Invoker lutBody(src, dst, lut); - cv::BlockedRange heightRange(0, src.rows); + cv::Range heightRange(0, src.rows); if(EqualizeHistCalcHist_Invoker::isWorthParallel(src)) - parallel_for(heightRange, calcBody); + parallel_for_(heightRange, calcBody); else calcBody(heightRange); @@ -3183,303 +3160,11 @@ void cv::equalizeHist( InputArray _src, OutputArray _dst ) } if(EqualizeHistLut_Invoker::isWorthParallel(src)) - parallel_for(heightRange, lutBody); + parallel_for_(heightRange, lutBody); else lutBody(heightRange); } -// ---------------------------------------------------------------------- -// CLAHE - -namespace -{ - class CLAHE_CalcLut_Body : public cv::ParallelLoopBody - { - public: - CLAHE_CalcLut_Body(const cv::Mat& src, cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY, int clipLimit, float lutScale) : - src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY), clipLimit_(clipLimit), lutScale_(lutScale) - { - } - - void operator ()(const cv::Range& range) const; - - private: - cv::Mat src_; - mutable cv::Mat lut_; - - cv::Size tileSize_; - int tilesX_; - int tilesY_; - int clipLimit_; - float lutScale_; - }; - - void CLAHE_CalcLut_Body::operator ()(const cv::Range& range) const - { - const int histSize = 256; - - uchar* tileLut = lut_.ptr(range.start); - const size_t lut_step = lut_.step; - - for (int k = range.start; k < range.end; ++k, tileLut += lut_step) - { - const int ty = k / tilesX_; - const int tx = k % tilesX_; - - // retrieve tile submatrix - - cv::Rect tileROI; - tileROI.x = tx * tileSize_.width; - tileROI.y = ty * tileSize_.height; - tileROI.width = tileSize_.width; - tileROI.height = tileSize_.height; - - const cv::Mat tile = src_(tileROI); - - // calc histogram - - int tileHist[histSize] = {0, }; - - int height = tileROI.height; - const size_t sstep = tile.step; - for (const uchar* ptr = tile.ptr(0); height--; ptr += sstep) - { - int x = 0; - for (; x <= tileROI.width - 4; x += 4) - { - int t0 = ptr[x], t1 = ptr[x+1]; - tileHist[t0]++; tileHist[t1]++; - t0 = ptr[x+2]; t1 = ptr[x+3]; - tileHist[t0]++; tileHist[t1]++; - } - - for (; x < tileROI.width; ++x) - tileHist[ptr[x]]++; - } - - // clip histogram - - if (clipLimit_ > 0) - { - // how many pixels were clipped - int clipped = 0; - for (int i = 0; i < histSize; ++i) - { - if (tileHist[i] > clipLimit_) - { - clipped += tileHist[i] - clipLimit_; - tileHist[i] = clipLimit_; - } - } - - // redistribute clipped pixels - int redistBatch = clipped / histSize; - int residual = clipped - redistBatch * histSize; - - for (int i = 0; i < histSize; ++i) - tileHist[i] += redistBatch; - - for (int i = 0; i < residual; ++i) - tileHist[i]++; - } - - // calc Lut - - int sum = 0; - for (int i = 0; i < histSize; ++i) - { - sum += tileHist[i]; - tileLut[i] = cv::saturate_cast(sum * lutScale_); - } - } - } - - class CLAHE_Interpolation_Body : public cv::ParallelLoopBody - { - public: - CLAHE_Interpolation_Body(const cv::Mat& src, cv::Mat& dst, const cv::Mat& lut, cv::Size tileSize, int tilesX, int tilesY) : - src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY) - { - } - - void operator ()(const cv::Range& range) const; - - private: - cv::Mat src_; - mutable cv::Mat dst_; - cv::Mat lut_; - - cv::Size tileSize_; - int tilesX_; - int tilesY_; - }; - - void CLAHE_Interpolation_Body::operator ()(const cv::Range& range) const - { - const size_t lut_step = lut_.step; - - for (int y = range.start; y < range.end; ++y) - { - const uchar* srcRow = src_.ptr(y); - uchar* dstRow = dst_.ptr(y); - - const float tyf = (static_cast(y) / tileSize_.height) - 0.5f; - - int ty1 = cvFloor(tyf); - int ty2 = ty1 + 1; - - const float ya = tyf - ty1; - - ty1 = std::max(ty1, 0); - ty2 = std::min(ty2, tilesY_ - 1); - - const uchar* lutPlane1 = lut_.ptr(ty1 * tilesX_); - const uchar* lutPlane2 = lut_.ptr(ty2 * tilesX_); - - for (int x = 0; x < src_.cols; ++x) - { - const float txf = (static_cast(x) / tileSize_.width) - 0.5f; - - int tx1 = cvFloor(txf); - int tx2 = tx1 + 1; - - const float xa = txf - tx1; - - tx1 = std::max(tx1, 0); - tx2 = std::min(tx2, tilesX_ - 1); - - const int srcVal = srcRow[x]; - - const size_t ind1 = tx1 * lut_step + srcVal; - const size_t ind2 = tx2 * lut_step + srcVal; - - float res = 0; - - res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya)); - res += lutPlane1[ind2] * ((xa) * (1.0f - ya)); - res += lutPlane2[ind1] * ((1.0f - xa) * (ya)); - res += lutPlane2[ind2] * ((xa) * (ya)); - - dstRow[x] = cv::saturate_cast(res); - } - } - } - - class CLAHE_Impl : public cv::CLAHE - { - public: - CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8); - - cv::AlgorithmInfo* info() const; - - void apply(cv::InputArray src, cv::OutputArray dst); - - void setClipLimit(double clipLimit); - double getClipLimit() const; - - void setTilesGridSize(cv::Size tileGridSize); - cv::Size getTilesGridSize() const; - - void collectGarbage(); - - private: - double clipLimit_; - int tilesX_; - int tilesY_; - - cv::Mat srcExt_; - cv::Mat lut_; - }; - - CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) : - clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY) - { - } - - CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE", - obj.info()->addParam(obj, "clipLimit", obj.clipLimit_); - obj.info()->addParam(obj, "tilesX", obj.tilesX_); - obj.info()->addParam(obj, "tilesY", obj.tilesY_)) - - void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst) - { - cv::Mat src = _src.getMat(); - - CV_Assert( src.type() == CV_8UC1 ); - - _dst.create( src.size(), src.type() ); - cv::Mat dst = _dst.getMat(); - - const int histSize = 256; - - lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1); - - cv::Size tileSize; - cv::Mat srcForLut; - - if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0) - { - tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_); - srcForLut = src; - } - else - { - cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101); - - tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_); - srcForLut = srcExt_; - } - - const int tileSizeTotal = tileSize.area(); - const float lutScale = static_cast(histSize - 1) / tileSizeTotal; - - int clipLimit = 0; - if (clipLimit_ > 0.0) - { - clipLimit = static_cast(clipLimit_ * tileSizeTotal / histSize); - clipLimit = std::max(clipLimit, 1); - } - - CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale); - cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody); - - CLAHE_Interpolation_Body interpolationBody(src, dst, lut_, tileSize, tilesX_, tilesY_); - cv::parallel_for_(cv::Range(0, src.rows), interpolationBody); - } - - void CLAHE_Impl::setClipLimit(double clipLimit) - { - clipLimit_ = clipLimit; - } - - double CLAHE_Impl::getClipLimit() const - { - return clipLimit_; - } - - void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize) - { - tilesX_ = tileGridSize.width; - tilesY_ = tileGridSize.height; - } - - cv::Size CLAHE_Impl::getTilesGridSize() const - { - return cv::Size(tilesX_, tilesY_); - } - - void CLAHE_Impl::collectGarbage() - { - srcExt_.release(); - lut_.release(); - } -} - -cv::Ptr cv::createCLAHE(double clipLimit, cv::Size tileGridSize) -{ - return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height); -} - // ---------------------------------------------------------------------- /* Implementation of RTTI and Generic Functions for CvHistogram */ diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index a63e08ff01..53d2347ec4 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1081,7 +1081,7 @@ cv::Mat cv::getStructuringElement(int shape, Size ksize, Point anchor) namespace cv { -class MorphologyRunner +class MorphologyRunner : public ParallelLoopBody { public: MorphologyRunner(Mat _src, Mat _dst, int _nStripes, int _iterations, @@ -1102,14 +1102,14 @@ public: columnBorderType = _columnBorderType; } - void operator () ( const BlockedRange& range ) const + void operator () ( const Range& range ) const { - int row0 = min(cvRound(range.begin() * src.rows / nStripes), src.rows); - int row1 = min(cvRound(range.end() * src.rows / nStripes), src.rows); + int row0 = min(cvRound(range.start * src.rows / nStripes), src.rows); + int row1 = min(cvRound(range.end * src.rows / nStripes), src.rows); /*if(0) printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n", - src.rows, src.cols, range.begin(), range.end(), row0, row1);*/ + src.rows, src.cols, range.start, range.end, row0, row1);*/ Mat srcStripe = src.rowRange(row0, row1); Mat dstStripe = dst.rowRange(row0, row1); @@ -1173,15 +1173,15 @@ static void morphOp( int op, InputArray _src, OutputArray _dst, } int nStripes = 1; -#if defined HAVE_TBB && defined HAVE_TEGRA_OPTIMIZATION +#if defined HAVE_TEGRA_OPTIMIZATION if (src.data != dst.data && iterations == 1 && //NOTE: threads are not used for inplace processing (borderType & BORDER_ISOLATED) == 0 && //TODO: check border types src.rows >= 64 ) //NOTE: just heuristics nStripes = 4; #endif - parallel_for(BlockedRange(0, nStripes), - MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue)); + parallel_for_(Range(0, nStripes), + MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue)); //Ptr f = createMorphologyFilter(op, src.type(), // kernel, anchor, borderType, borderType, borderValue ); diff --git a/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java b/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java index 8bcaf58a05..db806b6fc9 100644 --- a/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java +++ b/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java @@ -585,4 +585,18 @@ public class Calib3dTest extends OpenCVTestCase { public void testValidateDisparityMatMatIntIntInt() { fail("Not yet implemented"); } + + public void testComputeCorrespondEpilines() + { + Mat fundamental = new Mat(3, 3, CvType.CV_64F); + fundamental.put(0, 0, 0, -0.577, 0.288, 0.577, 0, 0.288, -0.288, -0.288, 0); + MatOfPoint2f left = new MatOfPoint2f(); + left.alloc(1); + left.put(0, 0, 2, 3); //add(new Point(x, y)); + Mat lines = new Mat(); + Mat truth = new Mat(1, 1, CvType.CV_32FC3); + truth.put(0, 0, -0.70735186, 0.70686162, -0.70588124); + Calib3d.computeCorrespondEpilines(left, 1, fundamental, lines); + assertMatEqual(truth, lines, EPS); + } } diff --git a/modules/java/generator/src/java/android+CameraBridgeViewBase.java b/modules/java/generator/src/java/android+CameraBridgeViewBase.java index 6c5c3294ff..c0c9f5bde7 100644 --- a/modules/java/generator/src/java/android+CameraBridgeViewBase.java +++ b/modules/java/generator/src/java/android+CameraBridgeViewBase.java @@ -81,6 +81,14 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac styledAttrs.recycle(); } + /** + * Sets the camera index + * @param cameraIndex new camera index + */ + public void setCameraIndex(int cameraIndex) { + this.mCameraIndex = cameraIndex; + } + public interface CvCameraViewListener { /** * This method is invoked when camera preview has started. After this method is invoked diff --git a/modules/java/generator/src/java/core+MatOfByte.java b/modules/java/generator/src/java/core+MatOfByte.java index 0ebdb66733..b3fe5691ee 100644 --- a/modules/java/generator/src/java/core+MatOfByte.java +++ b/modules/java/generator/src/java/core+MatOfByte.java @@ -14,7 +14,7 @@ public class MatOfByte extends Mat { protected MatOfByte(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfByte extends Mat { public MatOfByte(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfDouble.java b/modules/java/generator/src/java/core+MatOfDouble.java index cca5251105..4eb7cbc280 100644 --- a/modules/java/generator/src/java/core+MatOfDouble.java +++ b/modules/java/generator/src/java/core+MatOfDouble.java @@ -14,7 +14,7 @@ public class MatOfDouble extends Mat { protected MatOfDouble(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfDouble extends Mat { public MatOfDouble(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfFloat.java b/modules/java/generator/src/java/core+MatOfFloat.java index ce73b6f638..96bbeab9fb 100644 --- a/modules/java/generator/src/java/core+MatOfFloat.java +++ b/modules/java/generator/src/java/core+MatOfFloat.java @@ -14,7 +14,7 @@ public class MatOfFloat extends Mat { protected MatOfFloat(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfFloat extends Mat { public MatOfFloat(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfFloat4.java b/modules/java/generator/src/java/core+MatOfFloat4.java index 8a3e51014f..aaa97b7990 100644 --- a/modules/java/generator/src/java/core+MatOfFloat4.java +++ b/modules/java/generator/src/java/core+MatOfFloat4.java @@ -14,7 +14,7 @@ public class MatOfFloat4 extends Mat { protected MatOfFloat4(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfFloat4 extends Mat { public MatOfFloat4(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfFloat6.java b/modules/java/generator/src/java/core+MatOfFloat6.java index 1e23101a72..68e6249b6d 100644 --- a/modules/java/generator/src/java/core+MatOfFloat6.java +++ b/modules/java/generator/src/java/core+MatOfFloat6.java @@ -14,7 +14,7 @@ public class MatOfFloat6 extends Mat { protected MatOfFloat6(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfFloat6 extends Mat { public MatOfFloat6(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfInt.java b/modules/java/generator/src/java/core+MatOfInt.java index 80c5b3a5c2..33e5124e4f 100644 --- a/modules/java/generator/src/java/core+MatOfInt.java +++ b/modules/java/generator/src/java/core+MatOfInt.java @@ -15,7 +15,7 @@ public class MatOfInt extends Mat { protected MatOfInt(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -26,7 +26,7 @@ public class MatOfInt extends Mat { public MatOfInt(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfInt4.java b/modules/java/generator/src/java/core+MatOfInt4.java index 60277103cc..c924233a6c 100644 --- a/modules/java/generator/src/java/core+MatOfInt4.java +++ b/modules/java/generator/src/java/core+MatOfInt4.java @@ -15,7 +15,7 @@ public class MatOfInt4 extends Mat { protected MatOfInt4(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -26,7 +26,7 @@ public class MatOfInt4 extends Mat { public MatOfInt4(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfKeyPoint.java b/modules/java/generator/src/java/core+MatOfKeyPoint.java index b91fedcee8..b402fe1245 100644 --- a/modules/java/generator/src/java/core+MatOfKeyPoint.java +++ b/modules/java/generator/src/java/core+MatOfKeyPoint.java @@ -16,7 +16,7 @@ public class MatOfKeyPoint extends Mat { protected MatOfKeyPoint(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -27,7 +27,7 @@ public class MatOfKeyPoint extends Mat { public MatOfKeyPoint(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfPoint.java b/modules/java/generator/src/java/core+MatOfPoint.java index 23eeed0ebb..6d23ed1162 100644 --- a/modules/java/generator/src/java/core+MatOfPoint.java +++ b/modules/java/generator/src/java/core+MatOfPoint.java @@ -14,7 +14,7 @@ public class MatOfPoint extends Mat { protected MatOfPoint(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfPoint extends Mat { public MatOfPoint(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfPoint2f.java b/modules/java/generator/src/java/core+MatOfPoint2f.java index ba4be4ac5e..0c6960730b 100644 --- a/modules/java/generator/src/java/core+MatOfPoint2f.java +++ b/modules/java/generator/src/java/core+MatOfPoint2f.java @@ -14,7 +14,7 @@ public class MatOfPoint2f extends Mat { protected MatOfPoint2f(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfPoint2f extends Mat { public MatOfPoint2f(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfPoint3.java b/modules/java/generator/src/java/core+MatOfPoint3.java index 16e21301ef..0c8374f250 100644 --- a/modules/java/generator/src/java/core+MatOfPoint3.java +++ b/modules/java/generator/src/java/core+MatOfPoint3.java @@ -14,7 +14,7 @@ public class MatOfPoint3 extends Mat { protected MatOfPoint3(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfPoint3 extends Mat { public MatOfPoint3(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfPoint3f.java b/modules/java/generator/src/java/core+MatOfPoint3f.java index 97e2a95702..b0d50d4500 100644 --- a/modules/java/generator/src/java/core+MatOfPoint3f.java +++ b/modules/java/generator/src/java/core+MatOfPoint3f.java @@ -14,7 +14,7 @@ public class MatOfPoint3f extends Mat { protected MatOfPoint3f(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfPoint3f extends Mat { public MatOfPoint3f(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfRect.java b/modules/java/generator/src/java/core+MatOfRect.java index 2e58bfe897..3844d9dfbf 100644 --- a/modules/java/generator/src/java/core+MatOfRect.java +++ b/modules/java/generator/src/java/core+MatOfRect.java @@ -15,7 +15,7 @@ public class MatOfRect extends Mat { protected MatOfRect(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -26,7 +26,7 @@ public class MatOfRect extends Mat { public MatOfRect(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/ml/src/ann_mlp.cpp b/modules/ml/src/ann_mlp.cpp index 438872ae8c..7323ab57a7 100644 --- a/modules/ml/src/ann_mlp.cpp +++ b/modules/ml/src/ann_mlp.cpp @@ -40,10 +40,6 @@ #include "precomp.hpp" -#ifdef HAVE_TBB -#include -#endif - CvANN_MLP_TrainParams::CvANN_MLP_TrainParams() { term_crit = cvTermCriteria( CV_TERMCRIT_ITER + CV_TERMCRIT_EPS, 1000, 0.01 ); @@ -255,7 +251,7 @@ void CvANN_MLP::create( const CvMat* _layer_sizes, int _activ_func, buf_sz += (l_dst[0] + l_dst[l_count-1]*2)*2; CV_CALL( wbuf = cvCreateMat( 1, buf_sz, CV_64F )); - CV_CALL( weights = (double**)cvAlloc( (l_count+1)*sizeof(weights[0]) )); + CV_CALL( weights = (double**)cvAlloc( (l_count+2)*sizeof(weights[0]) )); weights[0] = wbuf->data.db; weights[1] = weights[0] + l_dst[0]*2; @@ -1022,7 +1018,7 @@ int CvANN_MLP::train_backprop( CvVectors x0, CvVectors u, const double* sw ) return iter; } -struct rprop_loop { +struct rprop_loop : cv::ParallelLoopBody { rprop_loop(const CvANN_MLP* _point, double**& _weights, int& _count, int& _ivcount, CvVectors* _x0, int& _l_count, CvMat*& _layer_sizes, int& _ovcount, int& _max_count, CvVectors* _u, const double*& _sw, double& _inv_count, CvMat*& _dEdw, int& _dcount0, double* _E, int _buf_sz) @@ -1063,7 +1059,7 @@ struct rprop_loop { int buf_sz; - void operator()( const cv::BlockedRange& range ) const + void operator()( const cv::Range& range ) const { double* buf_ptr; double** x = 0; @@ -1084,7 +1080,7 @@ struct rprop_loop { buf_ptr += (df[i] - x[i])*2; } - for(int si = range.begin(); si < range.end(); si++ ) + for(int si = range.start; si < range.end; si++ ) { if (si % dcount0 != 0) continue; int n1, n2, k; @@ -1170,36 +1166,33 @@ struct rprop_loop { } // backward pass, update dEdw - #ifdef HAVE_TBB - static tbb::spin_mutex mutex; - tbb::spin_mutex::scoped_lock lock; - #endif + static cv::Mutex mutex; + for(int i = l_count-1; i > 0; i-- ) { n1 = layer_sizes->data.i[i-1]; n2 = layer_sizes->data.i[i]; cvInitMatHeader( &_df, dcount, n2, CV_64F, df[i] ); cvMul( grad1, &_df, grad1 ); - #ifdef HAVE_TBB - lock.acquire(mutex); - #endif - cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) ); - cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] ); - cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T ); - // update bias part of dEdw - for( k = 0; k < dcount; k++ ) - { - double* dst = _dEdw.data.db + n1*n2; - const double* src = grad1->data.db + k*n2; - for(int j = 0; j < n2; j++ ) - dst[j] += src[j]; + { + cv::AutoLock lock(mutex); + cvInitMatHeader( &_dEdw, n1, n2, CV_64F, dEdw->data.db+(weights[i]-weights[0]) ); + cvInitMatHeader( x1, dcount, n1, CV_64F, x[i-1] ); + cvGEMM( x1, grad1, 1, &_dEdw, 1, &_dEdw, CV_GEMM_A_T ); + + // update bias part of dEdw + for( k = 0; k < dcount; k++ ) + { + double* dst = _dEdw.data.db + n1*n2; + const double* src = grad1->data.db + k*n2; + for(int j = 0; j < n2; j++ ) + dst[j] += src[j]; + } + + if (i > 1) + cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] ); } - if (i > 1) - cvInitMatHeader( &_w, n1, n2, CV_64F, weights[i] ); - #ifdef HAVE_TBB - lock.release(); - #endif cvInitMatHeader( grad2, dcount, n1, CV_64F, grad2->data.db ); if( i > 1 ) cvGEMM( grad1, &_w, 1, 0, 0, grad2, CV_GEMM_B_T ); @@ -1297,7 +1290,7 @@ int CvANN_MLP::train_rprop( CvVectors x0, CvVectors u, const double* sw ) double E = 0; // first, iterate through all the samples and compute dEdw - cv::parallel_for(cv::BlockedRange(0, count), + cv::parallel_for_(cv::Range(0, count), rprop_loop(this, weights, count, ivcount, &x0, l_count, layer_sizes, ovcount, max_count, &u, sw, inv_count, dEdw, dcount0, &E, buf_sz) ); diff --git a/modules/ml/src/gbt.cpp b/modules/ml/src/gbt.cpp index 6671a3495b..b52ffbe5a3 100644 --- a/modules/ml/src/gbt.cpp +++ b/modules/ml/src/gbt.cpp @@ -900,7 +900,7 @@ float CvGBTrees::predict_serial( const CvMat* _sample, const CvMat* _missing, } -class Tree_predictor +class Tree_predictor : public cv::ParallelLoopBody { private: pCvSeq* weak; @@ -910,9 +910,7 @@ private: const CvMat* missing; const float shrinkage; -#ifdef HAVE_TBB - static tbb::spin_mutex SumMutex; -#endif + static cv::Mutex SumMutex; public: @@ -931,14 +929,11 @@ public: Tree_predictor& operator=( const Tree_predictor& ) { return *this; } - virtual void operator()(const cv::BlockedRange& range) const + virtual void operator()(const cv::Range& range) const { -#ifdef HAVE_TBB - tbb::spin_mutex::scoped_lock lock; -#endif CvSeqReader reader; - int begin = range.begin(); - int end = range.end(); + int begin = range.start; + int end = range.end; int weak_count = end - begin; CvDTree* tree; @@ -956,13 +951,11 @@ public: tmp_sum += shrinkage*(float)(tree->predict(sample, missing)->value); } } -#ifdef HAVE_TBB - lock.acquire(SumMutex); - sum[i] += tmp_sum; - lock.release(); -#else - sum[i] += tmp_sum; -#endif + + { + cv::AutoLock lock(SumMutex); + sum[i] += tmp_sum; + } } } // Tree_predictor::operator() @@ -970,11 +963,7 @@ public: }; // class Tree_predictor - -#ifdef HAVE_TBB -tbb::spin_mutex Tree_predictor::SumMutex; -#endif - +cv::Mutex Tree_predictor::SumMutex; float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing, @@ -992,12 +981,7 @@ float CvGBTrees::predict( const CvMat* _sample, const CvMat* _missing, Tree_predictor predictor = Tree_predictor(weak_seq, class_count, params.shrinkage, _sample, _missing, sum); -//#ifdef HAVE_TBB -// tbb::parallel_for(cv::BlockedRange(begin, end), predictor, -// tbb::auto_partitioner()); -//#else - cv::parallel_for(cv::BlockedRange(begin, end), predictor); -//#endif + cv::parallel_for_(cv::Range(begin, end), predictor); for (int i=0; i *resp ) Sample_predictor predictor = Sample_predictor(this, pred_resp, _data->get_values(), _data->get_missing(), _sample_idx); -//#ifdef HAVE_TBB -// tbb::parallel_for(cv::BlockedRange(0,n), predictor, tbb::auto_partitioner()); -//#else - cv::parallel_for(cv::BlockedRange(0,n), predictor); -//#endif + cv::parallel_for_(cv::Range(0,n), predictor); int* sidx = _sample_idx ? _sample_idx->data.i : 0; int r_step = CV_IS_MAT_CONT(response->type) ? diff --git a/modules/ml/src/knearest.cpp b/modules/ml/src/knearest.cpp index 3c2f9ebada..6b6f5e6afa 100644 --- a/modules/ml/src/knearest.cpp +++ b/modules/ml/src/knearest.cpp @@ -306,7 +306,7 @@ float CvKNearest::write_results( int k, int k1, int start, int end, return result; } -struct P1 { +struct P1 : cv::ParallelLoopBody { P1(const CvKNearest* _pointer, int _buf_sz, int _k, const CvMat* __samples, const float** __neighbors, int _k1, CvMat* __results, CvMat* __neighbor_responses, CvMat* __dist, float* _result) { @@ -333,10 +333,10 @@ struct P1 { float* result; int buf_sz; - void operator()( const cv::BlockedRange& range ) const + void operator()( const cv::Range& range ) const { cv::AutoBuffer buf(buf_sz); - for(int i = range.begin(); i < range.end(); i += 1 ) + for(int i = range.start; i < range.end; i += 1 ) { float* neighbor_responses = &buf[0]; float* dist = neighbor_responses + 1*k; @@ -410,8 +410,8 @@ float CvKNearest::find_nearest( const CvMat* _samples, int k, CvMat* _results, int k1 = get_sample_count(); k1 = MIN( k1, k ); - cv::parallel_for(cv::BlockedRange(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1, - _results, _neighbor_responses, _dist, &result) + cv::parallel_for_(cv::Range(0, count), P1(this, buf_sz, k, _samples, _neighbors, k1, + _results, _neighbor_responses, _dist, &result) ); return result; diff --git a/modules/ml/src/nbayes.cpp b/modules/ml/src/nbayes.cpp index 15146d6f4e..f1f7a24ec0 100644 --- a/modules/ml/src/nbayes.cpp +++ b/modules/ml/src/nbayes.cpp @@ -277,7 +277,7 @@ bool CvNormalBayesClassifier::train( const CvMat* _train_data, const CvMat* _res return result; } -struct predict_body { +struct predict_body : cv::ParallelLoopBody { predict_body(CvMat* _c, CvMat** _cov_rotate_mats, CvMat** _inv_eigen_values, CvMat** _avg, const CvMat* _samples, const int* _vidx, CvMat* _cls_labels, CvMat* _results, float* _value, int _var_count1 @@ -307,7 +307,7 @@ struct predict_body { float* value; int var_count1; - void operator()( const cv::BlockedRange& range ) const + void operator()( const cv::Range& range ) const { int cls = -1; @@ -324,7 +324,7 @@ struct predict_body { cv::AutoBuffer buffer(nclasses + var_count1); CvMat diff = cvMat( 1, var_count1, CV_64FC1, &buffer[0] ); - for(int k = range.begin(); k < range.end(); k += 1 ) + for(int k = range.start; k < range.end; k += 1 ) { int ival; double opt = FLT_MAX; @@ -397,9 +397,9 @@ float CvNormalBayesClassifier::predict( const CvMat* samples, CvMat* results ) c const int* vidx = var_idx ? var_idx->data.i : 0; - cv::parallel_for(cv::BlockedRange(0, samples->rows), predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples, - vidx, cls_labels, results, &value, var_count - )); + cv::parallel_for_(cv::Range(0, samples->rows), + predict_body(c, cov_rotate_mats, inv_eigen_values, avg, samples, + vidx, cls_labels, results, &value, var_count)); return value; } diff --git a/modules/ml/src/svm.cpp b/modules/ml/src/svm.cpp index 9752848b9a..2e1b2e3565 100644 --- a/modules/ml/src/svm.cpp +++ b/modules/ml/src/svm.cpp @@ -2143,7 +2143,7 @@ float CvSVM::predict( const CvMat* sample, bool returnDFVal ) const return result; } -struct predict_body_svm { +struct predict_body_svm : ParallelLoopBody { predict_body_svm(const CvSVM* _pointer, float* _result, const CvMat* _samples, CvMat* _results) { pointer = _pointer; @@ -2157,9 +2157,9 @@ struct predict_body_svm { const CvMat* samples; CvMat* results; - void operator()( const cv::BlockedRange& range ) const + void operator()( const cv::Range& range ) const { - for(int i = range.begin(); i < range.end(); i++ ) + for(int i = range.start; i < range.end; i++ ) { CvMat sample; cvGetRow( samples, &sample, i ); @@ -2175,7 +2175,7 @@ struct predict_body_svm { float CvSVM::predict(const CvMat* samples, CV_OUT CvMat* results) const { float result = 0; - cv::parallel_for(cv::BlockedRange(0, samples->rows), + cv::parallel_for_(cv::Range(0, samples->rows), predict_body_svm(this, &result, samples, results) ); return result; diff --git a/modules/nonfree/doc/background_subtraction.rst b/modules/nonfree/doc/background_subtraction.rst deleted file mode 100644 index 11603ca566..0000000000 --- a/modules/nonfree/doc/background_subtraction.rst +++ /dev/null @@ -1,79 +0,0 @@ -Background Subtraction -====================== - -.. highlight:: cpp - - - -gpu::VIBE_GPU -------------- -.. ocv:class:: gpu::VIBE_GPU - -Class used for background/foreground segmentation. :: - - class VIBE_GPU - { - public: - explicit VIBE_GPU(unsigned long rngSeed = 1234567); - - void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null()); - - void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null()); - - void release(); - - ... - }; - -The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [VIBE2011]_. - - - -gpu::VIBE_GPU::VIBE_GPU ------------------------ -The constructor. - -.. ocv:function:: gpu::VIBE_GPU::VIBE_GPU(unsigned long rngSeed = 1234567) - - :param rngSeed: Value used to initiate a random sequence. - -Default constructor sets all parameters to default values. - - - -gpu::VIBE_GPU::initialize -------------------------- -Initialize background model and allocates all inner buffers. - -.. ocv:function:: void gpu::VIBE_GPU::initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null()) - - :param firstFrame: First frame from video sequence. - - :param stream: Stream for the asynchronous version. - - - -gpu::VIBE_GPU::operator() -------------------------- -Updates the background model and returns the foreground mask - -.. ocv:function:: void gpu::VIBE_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null()) - - :param frame: Next video frame. - - :param fgmask: The output foreground mask as an 8-bit binary image. - - :param stream: Stream for the asynchronous version. - - - -gpu::VIBE_GPU::release ----------------------- -Releases all inner buffer's memory. - -.. ocv:function:: void gpu::VIBE_GPU::release() - - - - -.. [VIBE2011] O. Barnich and M. Van D Roogenbroeck. *ViBe: A universal background subtraction algorithm for video sequences*. IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011 diff --git a/modules/nonfree/doc/nonfree.rst b/modules/nonfree/doc/nonfree.rst index f8fa1d6eba..e524ea82f8 100644 --- a/modules/nonfree/doc/nonfree.rst +++ b/modules/nonfree/doc/nonfree.rst @@ -8,4 +8,3 @@ The module contains algorithms that may be patented in some countries or have so :maxdepth: 2 feature_detection - background_subtraction diff --git a/modules/nonfree/include/opencv2/nonfree/gpu.hpp b/modules/nonfree/include/opencv2/nonfree/gpu.hpp index c8a24e01ec..3cb0b47621 100644 --- a/modules/nonfree/include/opencv2/nonfree/gpu.hpp +++ b/modules/nonfree/include/opencv2/nonfree/gpu.hpp @@ -125,41 +125,6 @@ public: GpuMat maxPosBuffer; }; -/*! - * The class implements the following algorithm: - * "ViBe: A universal background subtraction algorithm for video sequences" - * O. Barnich and M. Van D Roogenbroeck - * IEEE Transactions on Image Processing, 20(6) :1709-1724, June 2011 - */ -class CV_EXPORTS VIBE_GPU -{ -public: - //! the default constructor - explicit VIBE_GPU(unsigned long rngSeed = 1234567); - - //! re-initiaization method - void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null()); - - //! the update operator - void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null()); - - //! releases all inner buffers - void release(); - - int nbSamples; // number of samples per pixel - int reqMatches; // #_min - int radius; // R - int subsamplingFactor; // amount of random subsampling - -private: - Size frameSize_; - - unsigned long rngSeed_; - GpuMat randStates_; - - GpuMat samples_; -}; - } // namespace gpu } // namespace cv diff --git a/modules/nonfree/perf/perf_gpu.cpp b/modules/nonfree/perf/perf_gpu.cpp index aa8516b1c4..9f451deaba 100644 --- a/modules/nonfree/perf/perf_gpu.cpp +++ b/modules/nonfree/perf/perf_gpu.cpp @@ -50,18 +50,6 @@ using namespace std; using namespace testing; using namespace perf; -#if defined(HAVE_XINE) || \ - defined(HAVE_GSTREAMER) || \ - defined(HAVE_QUICKTIME) || \ - defined(HAVE_AVFOUNDATION) || \ - defined(HAVE_FFMPEG) || \ - defined(WIN32) /* assume that we have ffmpeg */ - -# define BUILD_WITH_VIDEO_INPUT_SUPPORT 1 -#else -# define BUILD_WITH_VIDEO_INPUT_SUPPORT 0 -#endif - ////////////////////////////////////////////////////////////////////// // SURF @@ -108,75 +96,4 @@ PERF_TEST_P(Image, GPU_SURF, } } -////////////////////////////////////////////////////// -// VIBE - -#if BUILD_WITH_VIDEO_INPUT_SUPPORT - -DEF_PARAM_TEST(Video_Cn, string, int); - -PERF_TEST_P(Video_Cn, GPU_VIBE, - Combine(Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"), - GPU_CHANNELS_1_3_4)) -{ - const string inputFile = perf::TestBase::getDataPath(GET_PARAM(0)); - const int cn = GET_PARAM(1); - - cv::VideoCapture cap(inputFile); - ASSERT_TRUE(cap.isOpened()); - - cv::Mat frame; - cap >> frame; - ASSERT_FALSE(frame.empty()); - - if (cn != 3) - { - cv::Mat temp; - if (cn == 1) - cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); - else - cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); - cv::swap(temp, frame); - } - - if (PERF_RUN_GPU()) - { - cv::gpu::GpuMat d_frame(frame); - cv::gpu::VIBE_GPU vibe; - cv::gpu::GpuMat foreground; - - vibe(d_frame, foreground); - - for (int i = 0; i < 10; ++i) - { - cap >> frame; - ASSERT_FALSE(frame.empty()); - - if (cn != 3) - { - cv::Mat temp; - if (cn == 1) - cv::cvtColor(frame, temp, cv::COLOR_BGR2GRAY); - else - cv::cvtColor(frame, temp, cv::COLOR_BGR2BGRA); - cv::swap(temp, frame); - } - - d_frame.upload(frame); - - startTimer(); next(); - vibe(d_frame, foreground); - stopTimer(); - } - - GPU_SANITY_CHECK(foreground); - } - else - { - FAIL_NO_CPU(); - } -} - -#endif - #endif diff --git a/modules/nonfree/perf/perf_main.cpp b/modules/nonfree/perf/perf_main.cpp index de1242149e..d5f4a1a512 100644 --- a/modules/nonfree/perf/perf_main.cpp +++ b/modules/nonfree/perf/perf_main.cpp @@ -1,4 +1,11 @@ #include "perf_precomp.hpp" #include "opencv2/ts/gpu_perf.hpp" -CV_PERF_TEST_MAIN(nonfree, perf::printCudaInfo()) +static const char * impls[] = { +#ifdef HAVE_CUDA + "cuda", +#endif + "plain" +}; + +CV_PERF_TEST_MAIN_WITH_IMPLS(nonfree, impls, perf::printCudaInfo()) diff --git a/modules/nonfree/src/cuda/vibe.cu b/modules/nonfree/src/cuda/vibe.cu deleted file mode 100644 index ba678abae2..0000000000 --- a/modules/nonfree/src/cuda/vibe.cu +++ /dev/null @@ -1,271 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "opencv2/opencv_modules.hpp" - -#ifdef HAVE_OPENCV_GPU - -#include "opencv2/gpu/device/common.hpp" - -namespace cv { namespace gpu { namespace device -{ - namespace vibe - { - void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor); - - void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream); - - void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream); - } -}}} - -namespace cv { namespace gpu { namespace device -{ - namespace vibe - { - __constant__ int c_nbSamples; - __constant__ int c_reqMatches; - __constant__ int c_radius; - __constant__ int c_subsamplingFactor; - - void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor) - { - cudaSafeCall( cudaMemcpyToSymbol(c_nbSamples, &nbSamples, sizeof(int)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_reqMatches, &reqMatches, sizeof(int)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_radius, &radius, sizeof(int)) ); - cudaSafeCall( cudaMemcpyToSymbol(c_subsamplingFactor, &subsamplingFactor, sizeof(int)) ); - } - - __device__ __forceinline__ uint nextRand(uint& state) - { - const unsigned int CV_RNG_COEFF = 4164903690U; - state = state * CV_RNG_COEFF + (state >> 16); - return state; - } - - __constant__ int c_xoff[9] = {-1, 0, 1, -1, 1, -1, 0, 1, 0}; - __constant__ int c_yoff[9] = {-1, -1, -1, 0, 0, 1, 1, 1, 0}; - - __device__ __forceinline__ int2 chooseRandomNeighbor(int x, int y, uint& randState, int count = 8) - { - int idx = nextRand(randState) % count; - - return make_int2(x + c_xoff[idx], y + c_yoff[idx]); - } - - __device__ __forceinline__ uchar cvt(uchar val) - { - return val; - } - __device__ __forceinline__ uchar4 cvt(const uchar3& val) - { - return make_uchar4(val.x, val.y, val.z, 0); - } - __device__ __forceinline__ uchar4 cvt(const uchar4& val) - { - return val; - } - - template - __global__ void init(const PtrStepSz frame, PtrStep samples, PtrStep randStates) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= frame.cols || y >= frame.rows) - return; - - uint localState = randStates(y, x); - - for (int k = 0; k < c_nbSamples; ++k) - { - int2 np = chooseRandomNeighbor(x, y, localState, 9); - - np.x = ::max(0, ::min(np.x, frame.cols - 1)); - np.y = ::max(0, ::min(np.y, frame.rows - 1)); - - SrcT pix = frame(np.y, np.x); - - samples(k * frame.rows + y, x) = cvt(pix); - } - - randStates(y, x) = localState; - } - - template - void init_caller(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(init, cudaFuncCachePreferL1) ); - - init<<>>((PtrStepSz) frame, (PtrStepSz) samples, randStates); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream) - { - typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream); - static const func_t funcs[] = - { - 0, init_caller, 0, init_caller, init_caller - }; - - funcs[cn](frame, samples, randStates, stream); - } - - __device__ __forceinline__ int calcDist(uchar a, uchar b) - { - return ::abs(a - b); - } - __device__ __forceinline__ int calcDist(const uchar3& a, const uchar4& b) - { - return (::abs(a.x - b.x) + ::abs(a.y - b.y) + ::abs(a.z - b.z)) / 3; - } - __device__ __forceinline__ int calcDist(const uchar4& a, const uchar4& b) - { - return (::abs(a.x - b.x) + ::abs(a.y - b.y) + ::abs(a.z - b.z)) / 3; - } - - template - __global__ void update(const PtrStepSz frame, PtrStepb fgmask, PtrStep samples, PtrStep randStates) - { - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= frame.cols || y >= frame.rows) - return; - - uint localState = randStates(y, x); - - SrcT imgPix = frame(y, x); - - // comparison with the model - - int count = 0; - for (int k = 0; (count < c_reqMatches) && (k < c_nbSamples); ++k) - { - SampleT samplePix = samples(k * frame.rows + y, x); - - int distance = calcDist(imgPix, samplePix); - - if (distance < c_radius) - ++count; - } - - // pixel classification according to reqMatches - - fgmask(y, x) = (uchar) (-(count < c_reqMatches)); - - if (count >= c_reqMatches) - { - // the pixel belongs to the background - - // gets a random number between 0 and subsamplingFactor-1 - int randomNumber = nextRand(localState) % c_subsamplingFactor; - - // update of the current pixel model - if (randomNumber == 0) - { - // random subsampling - - int k = nextRand(localState) % c_nbSamples; - - samples(k * frame.rows + y, x) = cvt(imgPix); - } - - // update of a neighboring pixel model - randomNumber = nextRand(localState) % c_subsamplingFactor; - - if (randomNumber == 0) - { - // random subsampling - - // chooses a neighboring pixel randomly - int2 np = chooseRandomNeighbor(x, y, localState); - - np.x = ::max(0, ::min(np.x, frame.cols - 1)); - np.y = ::max(0, ::min(np.y, frame.rows - 1)); - - // chooses the value to be replaced randomly - int k = nextRand(localState) % c_nbSamples; - - samples(k * frame.rows + np.y, np.x) = cvt(imgPix); - } - } - - randStates(y, x) = localState; - } - - template - void update_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream) - { - dim3 block(32, 8); - dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y)); - - cudaSafeCall( cudaFuncSetCacheConfig(update, cudaFuncCachePreferL1) ); - - update<<>>((PtrStepSz) frame, fgmask, (PtrStepSz) samples, randStates); - cudaSafeCall( cudaGetLastError() ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream) - { - typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream); - static const func_t funcs[] = - { - 0, update_caller, 0, update_caller, update_caller - }; - - funcs[cn](frame, fgmask, samples, randStates, stream); - } - } -}}} - -#endif /* HAVE_OPENCV_GPU */ diff --git a/modules/nonfree/src/sift.cpp b/modules/nonfree/src/sift.cpp index 58ebd31016..5a7fd89407 100644 --- a/modules/nonfree/src/sift.cpp +++ b/modules/nonfree/src/sift.cpp @@ -774,9 +774,6 @@ void SIFT::operator()(InputArray _image, InputArray _mask, findScaleSpaceExtrema(gpyr, dogpyr, keypoints); KeyPointsFilter::removeDuplicated( keypoints ); - if( !mask.empty() ) - KeyPointsFilter::runByPixelsMask( keypoints, mask ); - if( nfeatures > 0 ) KeyPointsFilter::retainBest(keypoints, nfeatures); //t = (double)getTickCount() - t; @@ -791,6 +788,9 @@ void SIFT::operator()(InputArray _image, InputArray _mask, kpt.pt *= scale; kpt.size *= scale; } + + if( !mask.empty() ) + KeyPointsFilter::runByPixelsMask( keypoints, mask ); } else { diff --git a/modules/nonfree/src/surf.cpp b/modules/nonfree/src/surf.cpp index bb6d53e4b9..2fc459fb61 100644 --- a/modules/nonfree/src/surf.cpp +++ b/modules/nonfree/src/surf.cpp @@ -258,7 +258,7 @@ interpolateKeypoint( float N9[3][9], int dx, int dy, int ds, KeyPoint& kpt ) } // Multi-threaded construction of the scale-space pyramid -struct SURFBuildInvoker +struct SURFBuildInvoker : ParallelLoopBody { SURFBuildInvoker( const Mat& _sum, const vector& _sizes, const vector& _sampleSteps, @@ -271,9 +271,9 @@ struct SURFBuildInvoker traces = &_traces; } - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - for( int i=range.begin(); i& _dets, const vector& _traces, @@ -310,9 +310,9 @@ struct SURFFindInvoker const vector& sizes, vector& keypoints, int octave, int layer, float hessianThreshold, int sampleStep ); - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - for( int i=range.begin(); i > &args, int channels, int depth) { - char * pSURF_OPTIONS = SURF_OPTIONS; - static bool OPTION_INIT = false; - if(!OPTION_INIT) + char optBuf [100] = {0}; + char * optBufPtr = optBuf; + if( !use_image2d ) { - if( !USE_IMAGE2d ) - { - strcat(pSURF_OPTIONS, noImage2dOption); - pSURF_OPTIONS += strlen(noImage2dOption); - } - - size_t wave_size = 0; - queryDeviceInfo(WAVEFRONT_SIZE, &wave_size); - std::sprintf(pSURF_OPTIONS, "-D WAVE_SIZE=%d", static_cast(wave_size)); - OPTION_INIT = true; + strcat(optBufPtr, noImage2dOption); + optBufPtr += strlen(noImage2dOption); } - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, SURF_OPTIONS); + cl_kernel kernel; + kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr); + size_t wave_size = queryDeviceInfo(kernel); + CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS); + sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast(wave_size)); + openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr); } } } @@ -161,22 +158,12 @@ public: counters.setTo(Scalar::all(0)); integral(img, surf_.sum); - if(support_image2d()) + use_image2d = support_image2d(); + if(use_image2d) { - try - { - bindImgTex(img, imgTex); - bindImgTex(surf_.sum, sumTex); - USE_IMAGE2d = true; - } - catch (const cv::Exception& e) - { - USE_IMAGE2d = false; - if(e.code != CL_IMAGE_FORMAT_NOT_SUPPORTED && e.code != -217) - { - throw e; - } - } + bindImgTex(img, imgTex); + bindImgTex(surf_.sum, sumTex); + finish(); } maskSumTex = 0; diff --git a/modules/nonfree/src/vibe_gpu.cpp b/modules/nonfree/src/vibe_gpu.cpp deleted file mode 100644 index e34862765d..0000000000 --- a/modules/nonfree/src/vibe_gpu.cpp +++ /dev/null @@ -1,141 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -#if defined(HAVE_OPENCV_GPU) - -#if !defined HAVE_CUDA || defined(CUDA_DISABLER) - -cv::gpu::VIBE_GPU::VIBE_GPU(unsigned long) { throw_nogpu(); } -void cv::gpu::VIBE_GPU::initialize(const GpuMat&, Stream&) { throw_nogpu(); } -void cv::gpu::VIBE_GPU::operator()(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } -void cv::gpu::VIBE_GPU::release() {} - -#else - -namespace cv { namespace gpu { namespace device -{ - namespace vibe - { - void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor); - - void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream); - - void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream); - } -}}} - -namespace -{ - const int defaultNbSamples = 20; - const int defaultReqMatches = 2; - const int defaultRadius = 20; - const int defaultSubsamplingFactor = 16; -} - -cv::gpu::VIBE_GPU::VIBE_GPU(unsigned long rngSeed) : - frameSize_(0, 0), rngSeed_(rngSeed) -{ - nbSamples = defaultNbSamples; - reqMatches = defaultReqMatches; - radius = defaultRadius; - subsamplingFactor = defaultSubsamplingFactor; -} - -void cv::gpu::VIBE_GPU::initialize(const GpuMat& firstFrame, Stream& s) -{ - using namespace cv::gpu::device::vibe; - - CV_Assert(firstFrame.type() == CV_8UC1 || firstFrame.type() == CV_8UC3 || firstFrame.type() == CV_8UC4); - - cudaStream_t stream = StreamAccessor::getStream(s); - - loadConstants(nbSamples, reqMatches, radius, subsamplingFactor); - - frameSize_ = firstFrame.size(); - - if (randStates_.size() != frameSize_) - { - cv::RNG rng(rngSeed_); - cv::Mat h_randStates(frameSize_, CV_8UC4); - rng.fill(h_randStates, cv::RNG::UNIFORM, 0, 255); - randStates_.upload(h_randStates); - } - - int ch = firstFrame.channels(); - int sample_ch = ch == 1 ? 1 : 4; - - samples_.create(nbSamples * frameSize_.height, frameSize_.width, CV_8UC(sample_ch)); - - init_gpu(firstFrame, ch, samples_, randStates_, stream); -} - -void cv::gpu::VIBE_GPU::operator()(const GpuMat& frame, GpuMat& fgmask, Stream& s) -{ - using namespace cv::gpu::device::vibe; - - CV_Assert(frame.depth() == CV_8U); - - int ch = frame.channels(); - int sample_ch = ch == 1 ? 1 : 4; - - if (frame.size() != frameSize_ || sample_ch != samples_.channels()) - initialize(frame); - - fgmask.create(frameSize_, CV_8UC1); - - update_gpu(frame, ch, fgmask, samples_, randStates_, StreamAccessor::getStream(s)); -} - -void cv::gpu::VIBE_GPU::release() -{ - frameSize_ = Size(0, 0); - - randStates_.release(); - - samples_.release(); -} - -#endif - -#endif // defined(HAVE_OPENCV_GPU) diff --git a/modules/nonfree/test/test_features2d.cpp b/modules/nonfree/test/test_features2d.cpp index 001d628aaa..4cce77b9d5 100644 --- a/modules/nonfree/test/test_features2d.cpp +++ b/modules/nonfree/test/test_features2d.cpp @@ -1146,3 +1146,76 @@ protected: TEST(Features2d_SIFTHomographyTest, regression) { CV_DetectPlanarTest test("SIFT", 80); test.safe_run(); } TEST(Features2d_SURFHomographyTest, regression) { CV_DetectPlanarTest test("SURF", 80); test.safe_run(); } +class FeatureDetectorUsingMaskTest : public cvtest::BaseTest +{ +public: + FeatureDetectorUsingMaskTest(const Ptr& featureDetector) : + featureDetector_(featureDetector) + { + CV_Assert(!featureDetector_.empty()); + } + +protected: + + void run(int) + { + const int nStepX = 2; + const int nStepY = 2; + + const string imageFilename = string(ts->get_data_path()) + "/features2d/tsukuba.png"; + + Mat image = imread(imageFilename); + if(image.empty()) + { + ts->printf(cvtest::TS::LOG, "Image %s can not be read.\n", imageFilename.c_str()); + ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA); + return; + } + + Mat mask(image.size(), CV_8U); + + const int stepX = image.size().width / nStepX; + const int stepY = image.size().height / nStepY; + + vector keyPoints; + vector points; + for(int i=0; idetect(image, keyPoints, mask); + KeyPoint::convert(keyPoints, points); + + for(size_t k=0; kprintf(cvtest::TS::LOG, "The feature point is outside of the mask."); + ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT); + return; + } + } + } + + ts->set_failed_test_info( cvtest::TS::OK ); + } + + Ptr featureDetector_; +}; + +TEST(Features2d_SIFT_using_mask, regression) +{ + FeatureDetectorUsingMaskTest test(Algorithm::create("Feature2D.SIFT")); + test.safe_run(); +} + +TEST(DISABLED_Features2d_SURF_using_mask, regression) +{ + FeatureDetectorUsingMaskTest test(Algorithm::create("Feature2D.SURF")); + test.safe_run(); +} + diff --git a/modules/nonfree/test/test_gpu.cpp b/modules/nonfree/test/test_gpu.cpp index 30aec352cd..3f63eeddf2 100644 --- a/modules/nonfree/test/test_gpu.cpp +++ b/modules/nonfree/test/test_gpu.cpp @@ -191,42 +191,4 @@ INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine( testing::Values(SURF_Extended(false), SURF_Extended(true)), testing::Values(SURF_Upright(false), SURF_Upright(true)))); -////////////////////////////////////////////////////// -// VIBE - -PARAM_TEST_CASE(VIBE, cv::Size, MatType, UseRoi) -{ -}; - -GPU_TEST_P(VIBE, Accuracy) -{ - const cv::Size size = GET_PARAM(0); - const int type = GET_PARAM(1); - const bool useRoi = GET_PARAM(2); - - const cv::Mat fullfg(size, CV_8UC1, cv::Scalar::all(255)); - - cv::Mat frame = randomMat(size, type, 0.0, 100); - cv::gpu::GpuMat d_frame = loadMat(frame, useRoi); - - cv::gpu::VIBE_GPU vibe; - cv::gpu::GpuMat d_fgmask = createMat(size, CV_8UC1, useRoi); - vibe.initialize(d_frame); - - for (int i = 0; i < 20; ++i) - vibe(d_frame, d_fgmask); - - frame = randomMat(size, type, 160, 255); - d_frame = loadMat(frame, useRoi); - vibe(d_frame, d_fgmask); - - // now fgmask should be entirely foreground - ASSERT_MAT_NEAR(fullfg, d_fgmask, 0); -} - -INSTANTIATE_TEST_CASE_P(GPU_Video, VIBE, testing::Combine( - DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4)), - WHOLE_SUBMAT)); - #endif diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp index 46a232ed6a..9e78dce243 100644 --- a/modules/objdetect/src/cascadedetect.cpp +++ b/modules/objdetect/src/cascadedetect.cpp @@ -1141,7 +1141,7 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector& object Size windowSize( cvRound(originalWindowSize.width*factor), cvRound(originalWindowSize.height*factor) ); Size scaledImageSize( cvRound( grayImage.cols/factor ), cvRound( grayImage.rows/factor ) ); - Size processingRectSize( scaledImageSize.width - originalWindowSize.width + 1, scaledImageSize.height - originalWindowSize.height + 1 ); + Size processingRectSize( scaledImageSize.width - originalWindowSize.width, scaledImageSize.height - originalWindowSize.height ); if( processingRectSize.width <= 0 || processingRectSize.height <= 0 ) break; @@ -1165,15 +1165,10 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector& object int stripCount, stripSize; - #ifdef HAVE_TBB const int PTS_PER_THREAD = 1000; stripCount = ((processingRectSize.width/yStep)*(processingRectSize.height + yStep-1)/yStep + PTS_PER_THREAD/2)/PTS_PER_THREAD; stripCount = std::min(std::max(stripCount, 1), 100); stripSize = (((processingRectSize.height + stripCount - 1)/stripCount + yStep-1)/yStep)*yStep; - #else - stripCount = 1; - stripSize = processingRectSize.height; - #endif if( !detectSingleScale( scaledImage, stripCount, processingRectSize, stripSize, yStep, factor, candidates, rejectLevels, levelWeights, outputRejectLevels ) ) diff --git a/modules/objdetect/src/latentsvm.cpp b/modules/objdetect/src/latentsvm.cpp index 521f0fdf56..5a45965e77 100644 --- a/modules/objdetect/src/latentsvm.cpp +++ b/modules/objdetect/src/latentsvm.cpp @@ -582,7 +582,6 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H, // For each component perform searching for (i = 0; i < kComponents; i++) { -#ifdef HAVE_TBB int error = searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i], b[i], maxXBorder, maxYBorder, scoreThreshold, &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), @@ -598,13 +597,6 @@ int searchObjectThresholdSomeComponents(const CvLSVMFeaturePyramid *H, free(partsDisplacementArr); return LATENT_SVM_SEARCH_OBJECT_FAILED; } -#else - (void)numThreads; - searchObjectThreshold(H, &(filters[componentIndex]), kPartFilters[i], - b[i], maxXBorder, maxYBorder, scoreThreshold, - &(pointsArr[i]), &(levelsArr[i]), &(kPointsArr[i]), - &(scoreArr[i]), &(partsDisplacementArr[i])); -#endif estimateBoxes(pointsArr[i], levelsArr[i], kPointsArr[i], filters[componentIndex]->sizeX, filters[componentIndex]->sizeY, &(oppPointsArr[i])); componentIndex += (kPartFilters[i] + 1); diff --git a/modules/ocl/CMakeLists.txt b/modules/ocl/CMakeLists.txt index a7cd3a0715..05b28b83fe 100644 --- a/modules/ocl/CMakeLists.txt +++ b/modules/ocl/CMakeLists.txt @@ -3,5 +3,5 @@ if(NOT HAVE_OPENCL) endif() set(the_description "OpenCL-accelerated Computer Vision") -ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video) +ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_calib3d) ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow) diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 6e34d27881..8bd1c9f112 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -118,12 +118,10 @@ namespace cv //the devnum is the index of the selected device in DeviceName vector of INfo CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0); - //optional function, if you want save opencl binary kernel to the file, set its path - CV_EXPORTS void setBinpath(const char *path); - //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue + //returns cl_context * CV_EXPORTS void* getoclContext(); - + //returns cl_command_queue * CV_EXPORTS void* getoclCommandQueue(); //explicit call clFinish. The global command queue will be used. @@ -133,6 +131,9 @@ namespace cv //getDevice also need to be called before this function CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0); + //returns true when global OpenCL context is initialized + CV_EXPORTS bool initialized(); + //////////////////////////////// Error handling //////////////////////// CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func); @@ -143,7 +144,7 @@ namespace cv protected: Context(); friend class auto_ptr; - + friend bool initialized(); private: static auto_ptr clCxt; static int val; @@ -180,6 +181,29 @@ namespace cv bool finish = true, bool measureKernelTime = false, bool cleanUp = true); + //! Enable or disable OpenCL program binary caching onto local disk + // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the + // compiled OpenCL program to be cached to the path automatically as "path/*.clb" + // binary file, which will be reused when the OpenCV executable is started again. + // + // Caching mode is controlled by the following enums + // Notes + // 1. the feature is by default enabled when OpenCV is built in release mode. + // 2. the CACHE_DEBUG / CACHE_RELEASE flags only effectively work with MSVC compiler; + // for GNU compilers, the function always treats the build as release mode (enabled by default). + enum + { + CACHE_NONE = 0, // do not cache OpenCL binary + CACHE_DEBUG = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC) + CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC) + CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary + CACHE_UPDATE = 0x1 << 2 // if the binary cache file with the same name is already on the disk, it will be updated. + }; + CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./"); + + //! set where binary cache to be saved to + CV_EXPORTS void setBinpath(const char *path); + class CV_EXPORTS oclMatExpr; //////////////////////////////// oclMat //////////////////////////////// class CV_EXPORTS oclMat @@ -224,6 +248,11 @@ namespace cv operator Mat() const; void download(cv::Mat &m) const; + //! convert to _InputArray + operator _InputArray(); + + //! convert to _OutputArray + operator _OutputArray(); //! returns a new oclMatrix header for the specified row oclMat row(int y) const; @@ -363,6 +392,9 @@ namespace cv int wholecols; }; + // convert InputArray/OutputArray to oclMat references + CV_EXPORTS oclMat& getOclMatRef(InputArray src); + CV_EXPORTS oclMat& getOclMatRef(OutputArray src); ///////////////////// mat split and merge ///////////////////////////////// //! Compose a multi-channel array from several single-channel arrays @@ -407,6 +439,9 @@ namespace cv //! computes element-wise product of the two arrays (c = a * b) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1); + //! multiplies matrix to a number (dst = scalar * src) + // supports CV_32FC1 only + CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst); //! computes element-wise quotient of the two arrays (c = a / b) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1); @@ -458,6 +493,7 @@ namespace cv // support all C1 types CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat()); + CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf); //! finds global minimum and maximum array elements and returns their values with locations // support all C1 types @@ -478,6 +514,10 @@ namespace cv CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist); //! only 8UC1 and 256 bins is supported now CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst); + + //! only 8UC1 is supported now + CV_EXPORTS Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); + //! bilateralFilter // supports 8UC1 8UC4 CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT); @@ -684,6 +724,8 @@ namespace cv } //! applies non-separable 2D linear filter to the image + // Note, at the moment this function only works when anchor point is in the kernel center + // and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); @@ -786,7 +828,11 @@ namespace cv CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum); CV_EXPORTS void integral(const oclMat &src, oclMat &sum); CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT); + CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy, + int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT); CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT); + CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy, + int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////CascadeClassifier////////////////////////////////////////////////////////////////// @@ -808,7 +854,7 @@ namespace cv OclCascadeClassifierBuf() : m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {} - ~OclCascadeClassifierBuf() {} + ~OclCascadeClassifierBuf() { release(); } void detectMultiScale(oclMat &image, CV_OUT std::vector& faces, double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0, @@ -866,7 +912,6 @@ namespace cv std::vector image_sqsums; }; - //! computes the proximity map for the raster template and the image where the template is searched for // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4 // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4 @@ -877,71 +922,36 @@ namespace cv // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4 CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf); - - ///////////////////////////////////////////// Canny ///////////////////////////////////////////// - struct CV_EXPORTS CannyBuf; - - - //! compute edges of the input image using Canny operator - // Support CV_8UC1 only - CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); - CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); - CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false); - CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false); - - struct CV_EXPORTS CannyBuf - { - CannyBuf() : counter(NULL) {} - ~CannyBuf() { release(); } - explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL) - { - create(image_size, apperture_size); - } - CannyBuf(const oclMat &dx_, const oclMat &dy_); - - void create(const Size &image_size, int apperture_size = 3); - - - void release(); - - - oclMat dx, dy; - oclMat dx_buf, dy_buf; - oclMat edgeBuf; - oclMat trackBuf1, trackBuf2; - void *counter; - Ptr filterDX, filterDY; - }; ///////////////////////////////////////// clAmdFft related ///////////////////////////////////////// @@ -966,159 +976,69 @@ namespace cv const oclMat &src3, double beta, oclMat &dst, int flags = 0); //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector ////////////// - struct CV_EXPORTS HOGDescriptor - { - enum { DEFAULT_WIN_SIGMA = -1 }; - enum { DEFAULT_NLEVELS = 64 }; - enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL }; - - - HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16), - Size block_stride = Size(8, 8), Size cell_size = Size(8, 8), - int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA, - double threshold_L2hys = 0.2, bool gamma_correction = true, - int nlevels = DEFAULT_NLEVELS); - - size_t getDescriptorSize() const; - size_t getBlockHistogramSize() const; - - - void setSVMDetector(const vector &detector); - - - static vector getDefaultPeopleDetector(); - static vector getPeopleDetector48x96(); - static vector getPeopleDetector64x128(); - - - void detect(const oclMat &img, vector &found_locations, - double hit_threshold = 0, Size win_stride = Size(), - Size padding = Size()); - - - void detectMultiScale(const oclMat &img, vector &found_locations, - double hit_threshold = 0, Size win_stride = Size(), - Size padding = Size(), double scale0 = 1.05, - int group_threshold = 2); - - - void getDescriptors(const oclMat &img, Size win_stride, - oclMat &descriptors, - int descr_format = DESCR_FORMAT_COL_BY_COL); - - - Size win_size; - Size block_size; - Size block_stride; - Size cell_size; int nbins; - double win_sigma; - double threshold_L2hys; - bool gamma_correction; - int nlevels; - - protected: - // initialize buffers; only need to do once in case of multiscale detection - void init_buffer(const oclMat &img, Size win_stride); - - - void computeBlockHistograms(const oclMat &img); - void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle); - - - double getWinSigma() const; - bool checkDetectorSize() const; - - static int numPartsWithin(int size, int part_size, int stride); - static Size numPartsWithin(Size size, Size part_size, Size stride); - - // Coefficients of the separating plane - float free_coef; - oclMat detector; - - - // Results of the last classification step - oclMat labels; - Mat labels_host; - - - // Results of the last histogram evaluation step - oclMat block_hists; - - - // Gradients conputation results - oclMat grad, qangle; - - - // scaled image - oclMat image_scale; - - - // effect size of input image (might be different from original size after scaling) - Size effect_size; - }; @@ -1126,13 +1046,11 @@ namespace cv /****************************************************************************************\ * Distance * \****************************************************************************************/ - template struct CV_EXPORTS Accumulator { typedef T Type; }; - template<> struct Accumulator { typedef float Type; @@ -1206,469 +1124,276 @@ namespace cv { public: enum DistType {L1Dist = 0, L2Dist, HammingDist}; - explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist); - - - // Add descriptors to train descriptor collection - void add(const std::vector &descCollection); - - - // Get train descriptors collection - const std::vector &getTrainDescriptors() const; - - - // Clear train descriptors collection - void clear(); - - - // Return true if there are not train descriptors in collection - bool empty() const; - - // Return true if the matcher supports mask in match methods - bool isMaskSupported() const; - - // Find one best match for each query descriptor - void matchSingle(const oclMat &query, const oclMat &train, - oclMat &trainIdx, oclMat &distance, - const oclMat &mask = oclMat()); - - // Download trainIdx and distance and convert it to CPU vector with DMatch - static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector &matches); - // Convert trainIdx and distance to vector with DMatch - static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector &matches); - - // Find one best match for each query descriptor - void match(const oclMat &query, const oclMat &train, std::vector &matches, const oclMat &mask = oclMat()); - - // Make gpu collection of trains and masks in suitable format for matchCollection function - void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector &masks = std::vector()); - // Find one best match from train collection for each query descriptor - void matchCollection(const oclMat &query, const oclMat &trainCollection, - oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, - const oclMat &masks = oclMat()); - - // Download trainIdx, imgIdx and distance and convert it to vector with DMatch - static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector &matches); - // Convert trainIdx, imgIdx and distance to vector with DMatch - static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector &matches); - - // Find one best match from train collection for each query descriptor. - void match(const oclMat &query, std::vector &matches, const std::vector &masks = std::vector()); - - // Find k best matches for each query descriptor (in increasing order of distances) - void knnMatchSingle(const oclMat &query, const oclMat &train, - oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k, - const oclMat &mask = oclMat()); - - // Download trainIdx and distance and convert it to vector with DMatch - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. - static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance, - std::vector< std::vector > &matches, bool compactResult = false); // Convert trainIdx and distance to vector with DMatch - static void knnMatchConvert(const Mat &trainIdx, const Mat &distance, - std::vector< std::vector > &matches, bool compactResult = false); - - // Find k best matches for each query descriptor (in increasing order of distances). - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. - void knnMatch(const oclMat &query, const oclMat &train, - std::vector< std::vector > &matches, int k, const oclMat &mask = oclMat(), - bool compactResult = false); - - // Find k best matches from train collection for each query descriptor (in increasing order of distances) - void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection, - oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, - const oclMat &maskCollection = oclMat()); - - // Download trainIdx and distance and convert it to vector with DMatch - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. - static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, - std::vector< std::vector > &matches, bool compactResult = false); // Convert trainIdx and distance to vector with DMatch - static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, - std::vector< std::vector > &matches, bool compactResult = false); - - // Find k best matches for each query descriptor (in increasing order of distances). - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. - void knnMatch(const oclMat &query, std::vector< std::vector > &matches, int k, - const std::vector &masks = std::vector(), bool compactResult = false); - - // Find best matches for each query descriptor which have distance less than maxDistance. - // nMatches.at(0, queryIdx) will contain matches count for queryIdx. - // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches, - // because it didn't have enough memory. - // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10), - // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches - // Matches doesn't sorted. - void radiusMatchSingle(const oclMat &query, const oclMat &train, - oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, - const oclMat &mask = oclMat()); - - // Download trainIdx, nMatches and distance and convert it to vector with DMatch. - // matches will be sorted in increasing order of distances. - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. - static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, - std::vector< std::vector > &matches, bool compactResult = false); - // Convert trainIdx, nMatches and distance to vector with DMatch. - static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches, - std::vector< std::vector > &matches, bool compactResult = false); - - - // Find best matches for each query descriptor which have distance less than maxDistance - // in increasing order of distances). - void radiusMatch(const oclMat &query, const oclMat &train, - std::vector< std::vector > &matches, float maxDistance, - const oclMat &mask = oclMat(), bool compactResult = false); - - - // Find best matches for each query descriptor which have distance less than maxDistance. - // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10), - // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches - // Matches doesn't sorted. - void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance, - const std::vector &masks = std::vector()); - - - // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch. - // matches will be sorted in increasing order of distances. - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. - static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches, - std::vector< std::vector > &matches, bool compactResult = false); - // Convert trainIdx, nMatches and distance to vector with DMatch. - static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches, - std::vector< std::vector > &matches, bool compactResult = false); - - - // Find best matches from train collection for each query descriptor which have distance less than - // maxDistance (in increasing order of distances). - void radiusMatch(const oclMat &query, std::vector< std::vector > &matches, float maxDistance, - const std::vector &masks = std::vector(), bool compactResult = false); - - - DistType distType; - - - private: - std::vector trainDescCollection; - }; - - template - class CV_EXPORTS BruteForceMatcher_OCL; - - template - class CV_EXPORTS BruteForceMatcher_OCL< L1 > : public BruteForceMatcher_OCL_base - { - public: - explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {} - explicit BruteForceMatcher_OCL(L1 /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {} - }; template - class CV_EXPORTS BruteForceMatcher_OCL< L2 > : public BruteForceMatcher_OCL_base - { - public: - explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {} - explicit BruteForceMatcher_OCL(L2 /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {} - }; template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base - { - public: - explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {} - explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {} - }; + class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base + { + public: + explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {} + }; + class CV_EXPORTS GoodFeaturesToTrackDetector_OCL + { + public: + explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, + int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04); + + //! return 1 rows matrix with CV_32FC2 type + void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat()); + //! download points of type Point2f to a vector. the vector's content will be erased + void downloadPoints(const oclMat &points, vector &points_v); + + int maxCorners; + double qualityLevel; + double minDistance; + + int blockSize; + bool useHarrisDetector; + double harrisK; + void releaseMemory() + { + Dx_.release(); + Dy_.release(); + eig_.release(); + minMaxbuf_.release(); + tmpCorners_.release(); + } + private: + oclMat Dx_; + oclMat Dy_; + oclMat eig_; + oclMat minMaxbuf_; + oclMat tmpCorners_; + }; + + inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_, + int blockSize_, bool useHarrisDetector_, double harrisK_) + { + maxCorners = maxCorners_; + qualityLevel = qualityLevel_; + minDistance = minDistance_; + blockSize = blockSize_; + useHarrisDetector = useHarrisDetector_; + harrisK = harrisK_; + } /////////////////////////////// PyrLKOpticalFlow ///////////////////////////////////// - class CV_EXPORTS PyrLKOpticalFlow - { - public: - PyrLKOpticalFlow() - { - winSize = Size(21, 21); - maxLevel = 3; - iters = 30; - derivLambda = 0.5; - useInitialFlow = false; - minEigThreshold = 1e-4f; - getMinEigenVals = false; - isDeviceArch11_ = false; - } - - void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, - oclMat &status, oclMat *err = 0); - - - void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0); - - - Size winSize; - int maxLevel; - int iters; - double derivLambda; - bool useInitialFlow; - float minEigThreshold; - bool getMinEigenVals; - - - void releaseMemory() - { - dx_calcBuf_.release(); - dy_calcBuf_.release(); - - prevPyr_.clear(); - nextPyr_.clear(); - - dx_buf_.release(); - dy_buf_.release(); - } - - - private: - void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy); - - - void buildImagePyramid(const oclMat &img0, vector &pyr, bool withBorder); - - oclMat dx_calcBuf_; - oclMat dy_calcBuf_; - - vector prevPyr_; - vector nextPyr_; - - oclMat dx_buf_; - oclMat dy_buf_; - - - oclMat uPyr_[2]; - oclMat vPyr_[2]; - - - bool isDeviceArch11_; - }; //////////////// build warping maps //////////////////// //! builds plane warping maps @@ -1739,6 +1464,7 @@ namespace cv private: oclMat minSSD, leBuf, riBuf; }; + class CV_EXPORTS StereoBeliefPropagation { public: @@ -1769,6 +1495,7 @@ namespace cv std::vector datas; oclMat out; }; + class CV_EXPORTS StereoConstantSpaceBP { public: @@ -1807,6 +1534,94 @@ namespace cv oclMat temp; oclMat out; }; + + // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method + // + // see reference: + // [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". + // [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". + class CV_EXPORTS OpticalFlowDual_TVL1_OCL + { + public: + OpticalFlowDual_TVL1_OCL(); + + void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy); + + void collectGarbage(); + + /** + * Time step of the numerical scheme. + */ + double tau; + + /** + * Weight parameter for the data term, attachment parameter. + * This is the most relevant parameter, which determines the smoothness of the output. + * The smaller this parameter is, the smoother the solutions we obtain. + * It depends on the range of motions of the images, so its value should be adapted to each image sequence. + */ + double lambda; + + /** + * Weight parameter for (u - v)^2, tightness parameter. + * It serves as a link between the attachment and the regularization terms. + * In theory, it should have a small value in order to maintain both parts in correspondence. + * The method is stable for a large range of values of this parameter. + */ + double theta; + + /** + * Number of scales used to create the pyramid of images. + */ + int nscales; + + /** + * Number of warpings per scale. + * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale. + * This is a parameter that assures the stability of the method. + * It also affects the running time, so it is a compromise between speed and accuracy. + */ + int warps; + + /** + * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time. + * A small value will yield more accurate solutions at the expense of a slower convergence. + */ + double epsilon; + + /** + * Stopping criterion iterations number used in the numerical scheme. + */ + int iterations; + + bool useInitialFlow; + + private: + void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2); + + std::vector I0s; + std::vector I1s; + std::vector u1s; + std::vector u2s; + + oclMat I1x_buf; + oclMat I1y_buf; + + oclMat I1w_buf; + oclMat I1wx_buf; + oclMat I1wy_buf; + + oclMat grad_buf; + oclMat rho_c_buf; + + oclMat p11_buf; + oclMat p12_buf; + oclMat p21_buf; + oclMat p22_buf; + + oclMat diff_buf; + oclMat norm_buf; + }; } } #if defined _MSC_VER && _MSC_VER >= 1200 diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp index 081d2343dc..634f2f2b15 100644 --- a/modules/ocl/include/opencv2/ocl/private/util.hpp +++ b/modules/ocl/include/opencv2/ocl/private/util.hpp @@ -49,7 +49,7 @@ #include "opencv2/ocl/ocl.hpp" #if defined __APPLE__ -#include +#include #else #include #endif @@ -120,6 +120,33 @@ namespace cv cl_mem CV_EXPORTS bindTexture(const oclMat &mat); void CV_EXPORTS releaseTexture(cl_mem& texture); + //Represents an image texture object + class CV_EXPORTS TextureCL + { + public: + TextureCL(cl_mem tex, int r, int c, int t) + : tex_(tex), rows(r), cols(c), type(t) {} + ~TextureCL() + { + openCLFree(tex_); + } + operator cl_mem() + { + return tex_; + } + cl_mem const tex_; + const int rows; + const int cols; + const int type; + private: + //disable assignment + void operator=(const TextureCL&); + }; + // bind oclMat to OpenCL image textures and retunrs an TextureCL object + // note: + // for faster clamping, there is no buffer padding for the constructed texture + Ptr CV_EXPORTS bindTexturePtr(const oclMat &mat); + // returns whether the current context supports image2d_t format or not bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext()); @@ -128,11 +155,17 @@ namespace cv enum DEVICE_INFO { WAVEFRONT_SIZE, //in AMD speak - WARP_SIZE = WAVEFRONT_SIZE, //in nvidia speak IS_CPU_DEVICE //check if the device is CPU }; - //info should have been pre-allocated - void CV_EXPORTS queryDeviceInfo(DEVICE_INFO info_type, void* info); + template + _ty queryDeviceInfo(cl_kernel kernel = NULL); + + template<> + int CV_EXPORTS queryDeviceInfo(cl_kernel kernel); + template<> + size_t CV_EXPORTS queryDeviceInfo(cl_kernel kernel); + template<> + bool CV_EXPORTS queryDeviceInfo(cl_kernel kernel); }//namespace ocl diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index 2da17755eb..bd2a4ec4b6 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -44,41 +44,21 @@ int main(int argc, const char *argv[]) { - vector oclinfo; - int num_devices = getDevice(oclinfo); - - if (num_devices < 1) - { - cerr << "no device found\n"; - return -1; - } - - int devidx = 0; - - for (size_t i = 0; i < oclinfo.size(); i++) - { - for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++) - { - printf("device %d: %s\n", devidx++, oclinfo[i].DeviceName[j].c_str()); - } - } - - redirectError(cvErrorCallback); - const char *keys = "{ h | help | false | print help message }" "{ f | filter | | filter for test }" "{ w | workdir | | set working directory }" "{ l | list | false | show all tests }" "{ d | device | 0 | device id }" + "{ c | cpu_ocl | false | use cpu as ocl device}" "{ i | iters | 10 | iteration count }" "{ m | warmup | 1 | gpu warm up iteration count}" - "{ t | xtop | 1.1 | xfactor top boundary}" - "{ b | xbottom | 0.9 | xfactor bottom boundary}" + "{ t | xtop | 1.1 | xfactor top boundary}" + "{ b | xbottom | 0.9 | xfactor bottom boundary}" "{ v | verify | false | only run gpu once to verify if problems occur}"; + redirectError(cvErrorCallback); CommandLineParser cmd(argc, argv, keys); - if (cmd.get("help")) { cout << "Avaible options:" << endl; @@ -86,14 +66,40 @@ int main(int argc, const char *argv[]) return 0; } - int device = cmd.get("device"); + // get ocl devices + bool use_cpu = cmd.get("c"); + vector oclinfo; + int num_devices = 0; + if(use_cpu) + num_devices = getDevice(oclinfo, ocl::CVCL_DEVICE_TYPE_CPU); + else + num_devices = getDevice(oclinfo); + if (num_devices < 1) + { + cerr << "no device found\n"; + return -1; + } + // show device info + int devidx = 0; + for (size_t i = 0; i < oclinfo.size(); i++) + { + for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++) + { + cout << "device " << devidx++ << ": " << oclinfo[i].DeviceName[j] << endl; + } + } + + int device = cmd.get("device"); if (device < 0 || device >= num_devices) { cerr << "Invalid device ID" << endl; return -1; } + // set this to overwrite binary cache every time the test starts + ocl::setBinaryDiskCache(ocl::CACHE_UPDATE); + if (cmd.get("verify")) { TestSystem::instance().setNumIters(1); @@ -102,7 +108,6 @@ int main(int argc, const char *argv[]) } devidx = 0; - for (size_t i = 0; i < oclinfo.size(); i++) { for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++, devidx++) @@ -111,7 +116,7 @@ int main(int argc, const char *argv[]) { ocl::setDevice(oclinfo[i], (int)j); TestSystem::instance().setRecordName(oclinfo[i].DeviceName[j]); - printf("\nuse %d: %s\n", devidx, oclinfo[i].DeviceName[j].c_str()); + cout << "use " << devidx << ": " <(max_val_, max_val) && EeceptDoubleEQ(min_val_, min_val)) + TestSystem::instance().setAccurate(1, max(fabs(max_val_-max_val), fabs(min_val_-min_val))); + else + TestSystem::instance().setAccurate(0, max(fabs(max_val_-max_val), fabs(min_val_-min_val))); + GPU_ON; ocl::minMax(d_src, &min_val, &max_val); - ; GPU_OFF; GPU_FULL_ON; @@ -633,13 +655,15 @@ TEST(minMax) } ///////////// minMaxLoc //////////////////////// -TEST(minMaxLoc) +PERFTEST(minMaxLoc) { Mat src; ocl::oclMat d_src; - double min_val, max_val; + double min_val = 0.0, max_val = 0.0; + double min_val_ = 0.0, max_val_ = 0.0; Point min_loc, max_loc; + Point min_loc_, max_loc_; int all_type[] = {CV_8UC1, CV_32FC1}; std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; @@ -657,12 +681,71 @@ TEST(minMaxLoc) d_src.upload(src); WARMUP_ON; - ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); + ocl::minMaxLoc(d_src, &min_val_, &max_val_, &min_loc_, &max_loc_); WARMUP_OFF; + double error0 = 0., error1 = 0., minlocVal = 0., minlocVal_ = 0., maxlocVal = 0., maxlocVal_ = 0.; + if(src.depth() == 0) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + } + if(src.depth() == 1) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + } + if(src.depth() == 2) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + } + if(src.depth() == 3) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + } + if(src.depth() == 4) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + } + if(src.depth() == 5) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + } + if(src.depth() == 6) + { + minlocVal = src.at(min_loc); + minlocVal_ = src.at(min_loc_); + maxlocVal = src.at(max_loc); + maxlocVal_ = src.at(max_loc_); + } + error0 = ::abs(minlocVal_ - minlocVal); + error1 = ::abs(maxlocVal_ - maxlocVal); + if( EeceptDoubleEQ(maxlocVal_, maxlocVal) + &&EeceptDoubleEQ(minlocVal_, minlocVal) + &&EeceptDoubleEQ(max_val_, max_val) + &&EeceptDoubleEQ(min_val_, min_val)) + TestSystem::instance().setAccurate(1, 0.); + else + TestSystem::instance().setAccurate(0, max(error0, error1)); + GPU_ON; ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); - ; GPU_OFF; GPU_FULL_ON; @@ -675,7 +758,7 @@ TEST(minMaxLoc) } ///////////// Sum //////////////////////// -TEST(Sum) +PERFTEST(Sum) { Mat src; Scalar cpures, gpures; @@ -690,7 +773,7 @@ TEST(Sum) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; - gen(src, size, size, all_type[j], 0, 256); + gen(src, size, size, all_type[j], 0, 60); cpures = sum(src); @@ -703,9 +786,16 @@ TEST(Sum) gpures = ocl::sum(d_src); WARMUP_OFF; + vector diffs(4); + diffs[3] = fabs(cpures[3] - gpures[3]); + diffs[2] = fabs(cpures[2] - gpures[2]); + diffs[1] = fabs(cpures[1] - gpures[1]); + diffs[0] = fabs(cpures[0] - gpures[0]); + double max_diff = *max_element(diffs.begin(), diffs.end()); + TestSystem::instance().setAccurate(max_diff<0.1?1:0, max_diff); + GPU_ON; gpures = ocl::sum(d_src); - ; GPU_OFF; GPU_FULL_ON; @@ -718,7 +808,7 @@ TEST(Sum) } ///////////// countNonZero //////////////////////// -TEST(countNonZero) +PERFTEST(countNonZero) { Mat src; ocl::oclMat d_src; @@ -736,18 +826,24 @@ TEST(countNonZero) countNonZero(src); + int cpures = 0, gpures = 0; CPU_ON; - countNonZero(src); + cpures = countNonZero(src); CPU_OFF; d_src.upload(src); WARMUP_ON; - ocl::countNonZero(d_src); + gpures = ocl::countNonZero(d_src); WARMUP_OFF; + int diff = abs(cpures - gpures); + if(diff == 0) + TestSystem::instance().setAccurate(1, 0); + else + TestSystem::instance().setAccurate(0, diff); + GPU_ON; ocl::countNonZero(d_src); - ; GPU_OFF; GPU_FULL_ON; @@ -760,9 +856,9 @@ TEST(countNonZero) } ///////////// Phase //////////////////////// -TEST(Phase) +PERFTEST(Phase) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; int all_type[] = {CV_32FC1}; @@ -778,12 +874,12 @@ TEST(Phase) gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); - phase(src1, src2, dst, 1); CPU_ON; phase(src1, src2, dst, 1); CPU_OFF; + d_src1.upload(src1); d_src2.upload(src2); @@ -793,24 +889,25 @@ TEST(Phase) GPU_ON; ocl::phase(d_src1, d_src2, d_dst, 1); - ; GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::phase(d_src1, d_src2, d_dst, 1); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-2); } } } ///////////// bitwise_and//////////////////////// -TEST(bitwise_and) +PERFTEST(bitwise_and) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; int all_type[] = {CV_8UC1, CV_32SC1}; @@ -826,7 +923,6 @@ TEST(bitwise_and) gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); - bitwise_and(src1, src2, dst); CPU_ON; @@ -841,120 +937,25 @@ TEST(bitwise_and) GPU_ON; ocl::bitwise_and(d_src1, d_src2, d_dst); - ; GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::bitwise_and(d_src1, d_src2, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; - } - } -} - -///////////// bitwise_or//////////////////////// -TEST(bitwise_or) -{ - Mat src1, src2, dst; - ocl::oclMat d_src1, d_src2, d_dst; - - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = Min_Size; size <= Max_Size; size *= Multiple) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - bitwise_or(src1, src2, dst); - - CPU_ON; - bitwise_or(src1, src2, dst); - CPU_OFF; - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::bitwise_or(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::bitwise_or(d_src1, d_src2, d_dst); - ; - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::bitwise_or(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; - } - - } -} - -///////////// bitwise_xor//////////////////////// -TEST(bitwise_xor) -{ - Mat src1, src2, dst; - ocl::oclMat d_src1, d_src2, d_dst; - - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = Min_Size; size <= Max_Size; size *= Multiple) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - bitwise_xor(src1, src2, dst); - - CPU_ON; - bitwise_xor(src1, src2, dst); - CPU_OFF; - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::bitwise_xor(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::bitwise_xor(d_src1, d_src2, d_dst); - ; - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::bitwise_xor(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0); } } } ///////////// bitwise_not//////////////////////// -TEST(bitwise_not) +PERFTEST(bitwise_not) { - Mat src1, dst; + Mat src1, dst, ocl_dst; ocl::oclMat d_src1, d_dst; int all_type[] = {CV_8UC1, CV_32SC1}; @@ -969,7 +970,6 @@ TEST(bitwise_not) gen(src1, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); - bitwise_not(src1, dst); CPU_ON; @@ -983,23 +983,24 @@ TEST(bitwise_not) GPU_ON; ocl::bitwise_not(d_src1, d_dst); - ; GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); ocl::bitwise_not(d_src1, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0); } } } ///////////// compare//////////////////////// -TEST(compare) +PERFTEST(compare) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; int CMP_EQ = 0; @@ -1016,12 +1017,12 @@ TEST(compare) gen(src2, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); - compare(src1, src2, dst, CMP_EQ); CPU_ON; compare(src1, src2, dst, CMP_EQ); CPU_OFF; + d_src1.upload(src1); d_src2.upload(src2); @@ -1031,24 +1032,25 @@ TEST(compare) GPU_ON; ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); - ; GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 0.0); } } } ///////////// pow //////////////////////// -TEST(pow) +PERFTEST(pow) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_32FC1}; @@ -1060,8 +1062,7 @@ TEST(pow) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; - gen(src, size, size, all_type[j], 0, 100); - gen(dst, size, size, all_type[j], 0, 100); + gen(src, size, size, all_type[j], 5, 16); pow(src, -2.0, dst); @@ -1077,23 +1078,24 @@ TEST(pow) GPU_ON; ocl::pow(d_src, -2.0, d_dst); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::pow(d_src, -2.0, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0); } } } ///////////// MagnitudeSqr//////////////////////// -TEST(MagnitudeSqr) +PERFTEST(MagnitudeSqr) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; int all_type[] = {CV_32FC1}; @@ -1109,31 +1111,17 @@ TEST(MagnitudeSqr) gen(src2, size, size, all_type[t], 0, 256); gen(dst, size, size, all_type[t], 0, 256); - - for (int i = 0; i < src1.rows; ++i) - - for (int j = 0; j < src1.cols; ++j) - { - float val1 = src1.at(i, j); - float val2 = src2.at(i, j); - - ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; - - } - CPU_ON; - for (int i = 0; i < src1.rows; ++i) for (int j = 0; j < src1.cols; ++j) { float val1 = src1.at(i, j); float val2 = src2.at(i, j); - ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; } - CPU_OFF; + d_src1.upload(src1); d_src2.upload(src2); @@ -1143,24 +1131,25 @@ TEST(MagnitudeSqr) GPU_ON; ocl::magnitudeSqr(d_src1, d_src2, d_dst); - ; GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::magnitudeSqr(d_src1, d_src2, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0); } } } ///////////// AddWeighted//////////////////////// -TEST(AddWeighted) +PERFTEST(AddWeighted) { - Mat src1, src2, dst; + Mat src1, src2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_dst; double alpha = 2.0, beta = 1.0, gama = 3.0; @@ -1192,15 +1181,16 @@ TEST(AddWeighted) GPU_ON; ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); - ; GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); d_src2.upload(src2); ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5); } } diff --git a/modules/ocl/perf/perf_blend.cpp b/modules/ocl/perf/perf_blend.cpp index 00034700b4..8ebb6482ba 100644 --- a/modules/ocl/perf/perf_blend.cpp +++ b/modules/ocl/perf/perf_blend.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -68,9 +69,9 @@ void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &we } } } -TEST(blend) +PERFTEST(blend) { - Mat src1, src2, weights1, weights2, dst; + Mat src1, src2, weights1, weights2, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -104,7 +105,6 @@ TEST(blend) GPU_ON; ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -113,8 +113,10 @@ TEST(blend) d_weights1.upload(weights1); d_weights2.upload(weights2); ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.f); } } } \ No newline at end of file diff --git a/modules/ocl/perf/perf_brute_force_matcher.cpp b/modules/ocl/perf/perf_brute_force_matcher.cpp index 6562f91e43..406b46a324 100644 --- a/modules/ocl/perf/perf_brute_force_matcher.cpp +++ b/modules/ocl/perf/perf_brute_force_matcher.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" //////////////////// BruteForceMatch ///////////////// -TEST(BruteForceMatcher) +PERFTEST(BruteForceMatcher) { Mat trainIdx_cpu; Mat distance_cpu; @@ -66,6 +67,7 @@ TEST(BruteForceMatcher) gen(train, size, desc_len, CV_32F, 0, 1); // Output vector< vector > matches(2); + vector< vector > d_matches(2); // Init GPU matcher ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist); @@ -88,15 +90,20 @@ TEST(BruteForceMatcher) GPU_ON; d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); - ; GPU_OFF; GPU_FULL_ON; d_query.upload(query); d_train.upload(train); - d_matcher.match(d_query, d_train, matches[0]); + d_matcher.match(d_query, d_train, d_matches[0]); GPU_FULL_OFF; + int diff = abs((int)d_matches[0].size() - (int)matches[0].size()); + if(diff == 0) + TestSystem::instance().setAccurate(1, 0); + else + TestSystem::instance().setAccurate(0, diff); + SUBTEST << size << "; knnMatch"; matcher.knnMatch(query, train, matches, 2); @@ -111,15 +118,20 @@ TEST(BruteForceMatcher) GPU_ON; d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); - ; GPU_OFF; GPU_FULL_ON; d_query.upload(query); d_train.upload(train); - d_matcher.knnMatch(d_query, d_train, matches, 2); + d_matcher.knnMatch(d_query, d_train, d_matches, 2); GPU_FULL_OFF; + diff = abs((int)d_matches[0].size() - (int)matches[0].size()); + if(diff == 0) + TestSystem::instance().setAccurate(1, 0); + else + TestSystem::instance().setAccurate(0, diff); + SUBTEST << size << "; radiusMatch"; float max_distance = 2.0f; @@ -138,13 +150,18 @@ TEST(BruteForceMatcher) GPU_ON; d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); - ; GPU_OFF; GPU_FULL_ON; d_query.upload(query); d_train.upload(train); - d_matcher.radiusMatch(d_query, d_train, matches, max_distance); + d_matcher.radiusMatch(d_query, d_train, d_matches, max_distance); GPU_FULL_OFF; + + diff = abs((int)d_matches[0].size() - (int)matches[0].size()); + if(diff == 0) + TestSystem::instance().setAccurate(1, 0); + else + TestSystem::instance().setAccurate(0, diff); } } \ No newline at end of file diff --git a/modules/ocl/perf/perf_columnsum.cpp b/modules/ocl/perf/perf_calib3d.cpp similarity index 66% rename from modules/ocl/perf/perf_columnsum.cpp rename to modules/ocl/perf/perf_calib3d.cpp index d2e3b45e53..f998ddf0f3 100644 --- a/modules/ocl/perf/perf_columnsum.cpp +++ b/modules/ocl/perf/perf_calib3d.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -42,47 +43,59 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ + #include "precomp.hpp" - -///////////// columnSum//////////////////////// -TEST(columnSum) +///////////// StereoMatchBM //////////////////////// +PERFTEST(StereoMatchBM) { - Mat src, dst; - ocl::oclMat d_src, d_dst; + Mat left_image = imread(abspath("aloeL.jpg"), cv::IMREAD_GRAYSCALE); + Mat right_image = imread(abspath("aloeR.jpg"), cv::IMREAD_GRAYSCALE); + Mat disp,dst; + ocl::oclMat d_left, d_right,d_disp; + int n_disp= 128; + int winSize =19; - for (int size = Min_Size; size <= Max_Size; size *= Multiple) - { - SUBTEST << size << 'x' << size << "; CV_32FC1"; + SUBTEST << left_image.cols << 'x' << left_image.rows << "; aloeL.jpg ;"<< right_image.cols << 'x' << right_image.rows << "; aloeR.jpg "; - gen(src, size, size, CV_32FC1, 0, 256); + StereoBM bm(0, n_disp, winSize); + bm(left_image, right_image, dst); - CPU_ON; - dst.create(src.size(), src.type()); + CPU_ON; + bm(left_image, right_image, dst); + CPU_OFF; - for (int i = 1; i < src.rows; ++i) - { - for (int j = 0; j < src.cols; ++j) - { - dst.at(i, j) = src.at(i, j) += src.at(i - 1, j); - } - } + d_left.upload(left_image); + d_right.upload(right_image); - CPU_OFF; + ocl::StereoBM_OCL d_bm(0, n_disp, winSize); - d_src.upload(src); - WARMUP_ON; - ocl::columnSum(d_src, d_dst); - WARMUP_OFF; + WARMUP_ON; + d_bm(d_left, d_right, d_disp); + WARMUP_OFF; - GPU_ON; - ocl::columnSum(d_src, d_dst); - ; - GPU_OFF; + cv::Mat ocl_mat; + d_disp.download(ocl_mat); + ocl_mat.convertTo(ocl_mat, dst.type()); - GPU_FULL_ON; - d_src.upload(src); - ocl::columnSum(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; - } -} \ No newline at end of file + GPU_ON; + d_bm(d_left, d_right, d_disp); + GPU_OFF; + + GPU_FULL_ON; + d_left.upload(left_image); + d_right.upload(right_image); + d_bm(d_left, d_right, d_disp); + d_disp.download(disp); + GPU_FULL_OFF; + + TestSystem::instance().setAccurate(-1, 0.); +} + + + + + + + + + \ No newline at end of file diff --git a/modules/ocl/perf/perf_canny.cpp b/modules/ocl/perf/perf_canny.cpp index 428e036d0c..cb23d7ad28 100644 --- a/modules/ocl/perf/perf_canny.cpp +++ b/modules/ocl/perf/perf_canny.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,7 @@ #include "precomp.hpp" ///////////// Canny //////////////////////// -TEST(Canny) +PERFTEST(Canny) { Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE); @@ -56,7 +57,7 @@ TEST(Canny) SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1"; - Mat edges(img.size(), CV_8UC1); + Mat edges(img.size(), CV_8UC1), ocl_edges; CPU_ON; Canny(img, edges, 50.0, 100.0); @@ -72,12 +73,13 @@ TEST(Canny) GPU_ON; ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); - ; GPU_OFF; GPU_FULL_ON; d_img.upload(img); ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); - d_edges.download(edges); + d_edges.download(ocl_edges); GPU_FULL_OFF; + + TestSystem::instance().ExceptedMatSimilar(edges, ocl_edges, 2e-2); } \ No newline at end of file diff --git a/modules/ocl/perf/perf_color.cpp b/modules/ocl/perf/perf_color.cpp index e32a1839d8..daf1cfdc9c 100644 --- a/modules/ocl/perf/perf_color.cpp +++ b/modules/ocl/perf/perf_color.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,9 +46,9 @@ #include "precomp.hpp" ///////////// cvtColor//////////////////////// -TEST(cvtColor) +PERFTEST(cvtColor) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_8UC4}; @@ -74,14 +75,15 @@ TEST(cvtColor) GPU_ON; ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExceptedMatSimilar(dst, ocl_dst, 1e-5); } diff --git a/modules/ocl/perf/perf_fft.cpp b/modules/ocl/perf/perf_fft.cpp index 50be2546ee..6e0be3f19d 100644 --- a/modules/ocl/perf/perf_fft.cpp +++ b/modules/ocl/perf/perf_fft.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,13 +46,13 @@ #include "precomp.hpp" ///////////// dft //////////////////////// -TEST(dft) +PERFTEST(dft) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; - int all_type[] = {CV_32FC1, CV_32FC2}; - std::string type_name[] = {"CV_32FC1", "CV_32FC2"}; + int all_type[] = {CV_32FC2}; + std::string type_name[] = {"CV_32FC2"}; for (int size = Min_Size; size <= Max_Size; size *= Multiple) { @@ -75,14 +76,15 @@ TEST(dft) GPU_ON; ocl::dft(d_src, d_dst, Size(size, size)); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::dft(d_src, d_dst, Size(size, size)); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, src.size().area() * 1e-4); } } diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index e9646c77e2..e988ce09d6 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,9 +46,9 @@ #include "precomp.hpp" ///////////// Blur//////////////////////// -TEST(Blur) +PERFTEST(Blur) { - Mat src1, dst; + Mat src1, dst, ocl_dst; ocl::oclMat d_src1, d_dst; Size ksize = Size(3, 3); @@ -64,7 +65,6 @@ TEST(Blur) gen(src1, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); - blur(src1, dst, ksize, Point(-1, -1), bordertype); CPU_ON; @@ -79,22 +79,23 @@ TEST(Blur) GPU_ON; ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); - ; GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0); } } } ///////////// Laplacian//////////////////////// -TEST(Laplacian) +PERFTEST(Laplacian) { - Mat src1, dst; + Mat src1, dst, ocl_dst; ocl::oclMat d_src1, d_dst; int ksize = 3; @@ -110,7 +111,6 @@ TEST(Laplacian) gen(src1, size, size, all_type[j], 0, 256); gen(dst, size, size, all_type[j], 0, 256); - Laplacian(src1, dst, -1, ksize, 1); CPU_ON; @@ -125,23 +125,24 @@ TEST(Laplacian) GPU_ON; ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); - ; GPU_OFF; GPU_FULL_ON; d_src1.upload(src1); ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5); } } } ///////////// Erode //////////////////// -TEST(Erode) +PERFTEST(Erode) { - Mat src, dst, ker; + Mat src, dst, ker, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; @@ -170,23 +171,24 @@ TEST(Erode) GPU_ON; ocl::erode(d_src, d_dst, ker); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::erode(d_src, d_dst, ker); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5); } } } ///////////// Sobel //////////////////////// -TEST(Sobel) +PERFTEST(Sobel) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int dx = 1; @@ -216,22 +218,23 @@ TEST(Sobel) GPU_ON; ocl::Sobel(d_src, d_dst, -1, dx, dy); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::Sobel(d_src, d_dst, -1, dx, dy); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1); } } } ///////////// Scharr //////////////////////// -TEST(Scharr) +PERFTEST(Scharr) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int dx = 1; @@ -261,25 +264,27 @@ TEST(Scharr) GPU_ON; ocl::Scharr(d_src, d_dst, -1, dx, dy); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::Scharr(d_src, d_dst, -1, dx, dy); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1); } } } ///////////// GaussianBlur //////////////////////// -TEST(GaussianBlur) +PERFTEST(GaussianBlur) { - Mat src, dst; + Mat src, dst, ocl_dst; int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; + const int ksize = 7; for (int size = Min_Size; size <= Max_Size; size *= Multiple) { @@ -289,37 +294,37 @@ TEST(GaussianBlur) gen(src, size, size, all_type[j], 0, 256); - GaussianBlur(src, dst, Size(9, 9), 0); + GaussianBlur(src, dst, Size(ksize, ksize), 0); CPU_ON; - GaussianBlur(src, dst, Size(9, 9), 0); + GaussianBlur(src, dst, Size(ksize, ksize), 0); CPU_OFF; ocl::oclMat d_src(src); - ocl::oclMat d_dst(src.size(), src.type()); - ocl::oclMat d_buf; + ocl::oclMat d_dst; WARMUP_ON; - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); + ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0); WARMUP_OFF; GPU_ON; - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); - ; + ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0); GPU_OFF; GPU_FULL_ON; d_src.upload(src); - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); - d_dst.download(dst); + ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1.0); } } } ///////////// filter2D//////////////////////// -TEST(filter2D) +PERFTEST(filter2D) { Mat src; @@ -332,38 +337,38 @@ TEST(filter2D) { gen(src, size, size, all_type[j], 0, 256); - for (int ksize = 3; ksize <= 15; ksize = 2*ksize+1) - { - SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ; + const int ksize = 3; - Mat kernel; - gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0); + SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ; - Mat dst; - cv::filter2D(src, dst, -1, kernel); + Mat kernel; + gen(kernel, ksize, ksize, CV_32SC1, -3.0, 3.0); - CPU_ON; - cv::filter2D(src, dst, -1, kernel); - CPU_OFF; + Mat dst, ocl_dst; - ocl::oclMat d_src(src); - ocl::oclMat d_dst; + cv::filter2D(src, dst, -1, kernel); - WARMUP_ON; - ocl::filter2D(d_src, d_dst, -1, kernel); - WARMUP_OFF; + CPU_ON; + cv::filter2D(src, dst, -1, kernel); + CPU_OFF; - GPU_ON; - ocl::filter2D(d_src, d_dst, -1, kernel); - ; - GPU_OFF; + ocl::oclMat d_src(src), d_dst; - GPU_FULL_ON; - d_src.upload(src); - ocl::filter2D(d_src, d_dst, -1, kernel); - d_dst.download(dst); - GPU_FULL_OFF; - } + WARMUP_ON; + ocl::filter2D(d_src, d_dst, -1, kernel); + WARMUP_OFF; + + GPU_ON; + ocl::filter2D(d_src, d_dst, -1, kernel); + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::filter2D(d_src, d_dst, -1, kernel); + d_dst.download(ocl_dst); + GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, 1e-5); } diff --git a/modules/ocl/perf/perf_gemm.cpp b/modules/ocl/perf/perf_gemm.cpp index 930ecb0464..f197c5f5a0 100644 --- a/modules/ocl/perf/perf_gemm.cpp +++ b/modules/ocl/perf/perf_gemm.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,9 +46,9 @@ #include "precomp.hpp" ///////////// gemm //////////////////////// -TEST(gemm) +PERFTEST(gemm) { - Mat src1, src2, src3, dst; + Mat src1, src2, src3, dst, ocl_dst; ocl::oclMat d_src1, d_src2, d_src3, d_dst; for (int size = Min_Size; size <= Max_Size; size *= Multiple) @@ -74,7 +75,6 @@ TEST(gemm) GPU_ON; ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); - ; GPU_OFF; GPU_FULL_ON; @@ -82,7 +82,9 @@ TEST(gemm) d_src2.upload(src2); d_src3.upload(src3); ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(ocl_dst, dst, src1.cols * src1.rows * 1e-4); } } \ No newline at end of file diff --git a/modules/ocl/perf/perf_haar.cpp b/modules/ocl/perf/perf_haar.cpp index 5a909ace4e..72f01dc935 100644 --- a/modules/ocl/perf/perf_haar.cpp +++ b/modules/ocl/perf/perf_haar.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -82,7 +83,7 @@ public: } } -TEST(Haar) +PERFTEST(Haar) { Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE); @@ -106,6 +107,8 @@ TEST(Haar) 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); CPU_OFF; + + vector oclfaces; ocl::CascadeClassifier_GPU faceCascade; if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml"))) @@ -115,24 +118,26 @@ TEST(Haar) ocl::oclMat d_img(img); - faces.clear(); - WARMUP_ON; - faceCascade.detectMultiScale(d_img, faces, + faceCascade.detectMultiScale(d_img, oclfaces, 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); WARMUP_OFF; + if(faces.size() == oclfaces.size()) + TestSystem::instance().setAccurate(1, 0); + else + TestSystem::instance().setAccurate(0, abs((int)faces.size() - (int)oclfaces.size())); + faces.clear(); GPU_ON; - faceCascade.detectMultiScale(d_img, faces, + faceCascade.detectMultiScale(d_img, oclfaces, 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); - ; GPU_OFF; GPU_FULL_ON; d_img.upload(img); - faceCascade.detectMultiScale(d_img, faces, + faceCascade.detectMultiScale(d_img, oclfaces, 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); GPU_FULL_OFF; } \ No newline at end of file diff --git a/modules/ocl/perf/perf_hog.cpp b/modules/ocl/perf/perf_hog.cpp index b74077ff40..7daa61396c 100644 --- a/modules/ocl/perf/perf_hog.cpp +++ b/modules/ocl/perf/perf_hog.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,7 +46,8 @@ #include "precomp.hpp" ///////////// HOG//////////////////////// -TEST(HOG) + +PERFTEST(HOG) { Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE); @@ -54,12 +56,12 @@ TEST(HOG) throw runtime_error("can't open road.png"); } - cv::HOGDescriptor hog; hog.setSVMDetector(hog.getDefaultPeopleDetector()); std::vector found_locations; + std::vector d_found_locations; - SUBTEST << 768 << 'x' << 576 << "; road.png"; + SUBTEST << src.cols << 'x' << src.rows << "; road.png"; hog.detectMultiScale(src, found_locations); @@ -73,12 +75,16 @@ TEST(HOG) d_src.upload(src); WARMUP_ON; - ocl_hog.detectMultiScale(d_src, found_locations); + ocl_hog.detectMultiScale(d_src, d_found_locations); WARMUP_OFF; + + if(d_found_locations.size() == found_locations.size()) + TestSystem::instance().setAccurate(1, 0); + else + TestSystem::instance().setAccurate(0, abs((int)found_locations.size() - (int)d_found_locations.size())); GPU_ON; ocl_hog.detectMultiScale(d_src, found_locations); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index 756f69556f..ade5019147 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,9 +46,9 @@ #include "precomp.hpp" ///////////// equalizeHist //////////////////////// -TEST(equalizeHist) +PERFTEST(equalizeHist) { - Mat src, dst; + Mat src, dst, ocl_dst; int all_type[] = {CV_8UC1}; std::string type_name[] = {"CV_8UC1"}; @@ -76,22 +77,23 @@ TEST(equalizeHist) GPU_ON; ocl::equalizeHist(d_src, d_dst); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::equalizeHist(d_src, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.1); } } } /////////// CopyMakeBorder ////////////////////// -TEST(CopyMakeBorder) +PERFTEST(CopyMakeBorder) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_dst; int bordertype = BORDER_CONSTANT; @@ -121,22 +123,23 @@ TEST(CopyMakeBorder) GPU_ON; ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0); } } } ///////////// cornerMinEigenVal //////////////////////// -TEST(cornerMinEigenVal) +PERFTEST(cornerMinEigenVal) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_dst; int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4); @@ -150,7 +153,6 @@ TEST(cornerMinEigenVal) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; - gen(src, size, size, all_type[j], 0, 256); cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType); @@ -167,22 +169,23 @@ TEST(cornerMinEigenVal) GPU_ON; ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } } } ///////////// cornerHarris //////////////////////// -TEST(cornerHarris) +PERFTEST(cornerHarris) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_8UC1, CV_32FC1}; @@ -210,23 +213,24 @@ TEST(cornerHarris) GPU_ON; ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } } } ///////////// integral //////////////////////// -TEST(integral) +PERFTEST(integral) { - Mat src, sum; + Mat src, sum, ocl_sum; ocl::oclMat d_src, d_sum, d_buf; int all_type[] = {CV_8UC1}; @@ -254,28 +258,31 @@ TEST(integral) GPU_ON; ocl::integral(d_src, d_sum); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::integral(d_src, d_sum); - d_sum.download(sum); + d_sum.download(ocl_sum); GPU_FULL_OFF; + + if(sum.type() == ocl_sum.type()) //we won't test accuracy when cpu function overlow + TestSystem::instance().ExpectedMatNear(sum, ocl_sum, 0.0); + } } } ///////////// WarpAffine //////////////////////// -TEST(WarpAffine) +PERFTEST(WarpAffine) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; static const double coeffs[2][3] = { - {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, - {sin(3.14 / 6), cos(3.14 / 6), -100.0} + {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0}, + {sin(CV_PI / 6), cos(CV_PI / 6), -100.0} }; Mat M(2, 3, CV_64F, (void *)coeffs); int interpolation = INTER_NEAREST; @@ -308,32 +315,33 @@ TEST(WarpAffine) GPU_ON; ocl::warpAffine(d_src, d_dst, M, size1, interpolation); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::warpAffine(d_src, d_dst, M, size1, interpolation); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } } } ///////////// WarpPerspective //////////////////////// -TEST(WarpPerspective) +PERFTEST(WarpPerspective) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; static const double coeffs[3][3] = { - {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, - {sin(3.14 / 6), cos(3.14 / 6), -100.0}, + {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0}, + {sin(CV_PI / 6), cos(CV_PI / 6), -100.0}, {0.0, 0.0, 1.0} }; Mat M(3, 3, CV_64F, (void *)coeffs); - int interpolation = INTER_NEAREST; + int interpolation = INTER_LINEAR; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; @@ -362,23 +370,24 @@ TEST(WarpPerspective) GPU_ON; ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } } } ///////////// resize //////////////////////// -TEST(resize) +PERFTEST(resize) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; @@ -407,14 +416,15 @@ TEST(resize) GPU_ON; ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } } @@ -441,25 +451,25 @@ TEST(resize) GPU_ON; ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } } } ///////////// threshold//////////////////////// -TEST(threshold) +PERFTEST(threshold) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; - for (int size = Min_Size; size <= Max_Size; size *= Multiple) { SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY"; @@ -480,15 +490,15 @@ TEST(threshold) GPU_ON; ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } for (int size = Min_Size; size <= Max_Size; size *= Multiple) @@ -511,57 +521,18 @@ TEST(threshold) GPU_ON; ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); } } ///////////// meanShiftFiltering//////////////////////// -TEST(meanShiftFiltering) -{ - int sp = 10, sr = 10; - Mat src, dst; - - ocl::oclMat d_src, d_dst; - - for (int size = Min_Size; size <= Max_Size; size *= Multiple) - { - SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4"; - - gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256)); - - pyrMeanShiftFiltering(src, dst, sp, sr); - - CPU_ON; - pyrMeanShiftFiltering(src, dst, sp, sr); - CPU_OFF; - - gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - - d_src.upload(src); - - WARMUP_ON; - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); - WARMUP_OFF; - - GPU_ON; - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); - ; - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); - d_dst.download(dst); - GPU_FULL_OFF; - } -} -///////////// meanShiftProc//////////////////////// COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab) { @@ -575,9 +546,8 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size c1 = sptr[1]; c2 = sptr[2]; c3 = sptr[3]; - // iterate meanshift procedure - for (iter = 0; iter < maxIter; iter++) + for(iter = 0; iter < maxIter; iter++ ) { int count = 0; int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; @@ -589,27 +559,11 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size int maxy = y0 + sp; //deal with the image boundary - if (minx < 0) - { - minx = 0; - } - - if (miny < 0) - { - miny = 0; - } - - if (maxx >= size.width) - { - maxx = size.width - 1; - } - - if (maxy >= size.height) - { - maxy = size.height - 1; - } - - if (iter == 0) + if(minx < 0) minx = 0; + if(miny < 0) miny = 0; + if(maxx >= size.width) maxx = size.width - 1; + if(maxy >= size.height) maxy = size.height - 1; + if(iter == 0) { pstart = sptr; } @@ -617,22 +571,19 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size { pstart = pstart + revy * sstep + (revx << 2); //point to the new position } - ptr = pstart; ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row - for (int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2)) + for( int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2)) { int rowCount = 0; int x = minx; #if CV_ENABLE_UNROLLED - - for (; x + 4 <= maxx; x += 4, ptr += 16) + for( ; x + 4 <= maxx; x += 4, ptr += 16) { int t0, t1, t2; t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; @@ -640,10 +591,8 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size sx += x; rowCount++; } - t0 = ptr[4], t1 = ptr[5], t2 = ptr[6]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; @@ -651,10 +600,8 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size sx += x + 1; rowCount++; } - t0 = ptr[8], t1 = ptr[9], t2 = ptr[10]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; @@ -662,10 +609,8 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size sx += x + 2; rowCount++; } - t0 = ptr[12], t1 = ptr[13], t2 = ptr[14]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; @@ -674,14 +619,11 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size rowCount++; } } - #endif - - for (; x <= maxx; x++, ptr += 4) + for(; x <= maxx; x++, ptr += 4) { int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; @@ -690,20 +632,14 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size rowCount++; } } - - if (rowCount == 0) - { + if(rowCount == 0) continue; - } - count += rowCount; sy += y * rowCount; } - if (count == 0) - { + if( count == 0 ) break; - } int x1 = sx / count; int y1 = sy / count; @@ -712,7 +648,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size s2 = s2 / count; bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) + - tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps); + tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps); //revise the pointer corresponding to the new (y0,x0) revx = x1 - x0; @@ -724,10 +660,8 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size c1 = s1; c2 = s2; - if (stopFlag) - { + if( stopFlag ) break; - } } //for iter dptr[0] = (uchar)c0; @@ -741,19 +675,101 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size return coor; } +static void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit) +{ + if( src_roi.empty() ) + CV_Error( CV_StsBadArg, "The input image is empty" ); + + if( src_roi.depth() != CV_8U || src_roi.channels() != 4 ) + CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); + + dst_roi.create(src_roi.size(), src_roi.type()); + + CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) ); + CV_Assert( !(dst_roi.step & 0x3) ); + + if( !(crit.type & cv::TermCriteria::MAX_ITER) ) + crit.maxCount = 5; + int maxIter = std::min(std::max(crit.maxCount, 1), 100); + float eps; + if( !(crit.type & cv::TermCriteria::EPS) ) + eps = 1.f; + eps = (float)std::max(crit.epsilon, 0.0); + + int tab[512]; + for(int i = 0; i < 512; i++) + tab[i] = (i - 255) * (i - 255); + uchar *sptr = src_roi.data; + uchar *dptr = dst_roi.data; + int sstep = (int)src_roi.step; + int dstep = (int)dst_roi.step; + cv::Size size = src_roi.size(); + + for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2), + dptr += dstep - (size.width << 2)) + { + for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4) + { + do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab); + } + } +} + +PERFTEST(meanShiftFiltering) +{ + int sp = 5, sr = 6; + Mat src, dst, ocl_dst; + + ocl::oclMat d_src, d_dst; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4"; + + gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); + + cv::TermCriteria crit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1); + + meanShiftFiltering_(src, dst, sp, sr, crit); + + CPU_ON; + meanShiftFiltering_(src, dst, sp, sr, crit); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit); + WARMUP_OFF; + + GPU_ON; + ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit); + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit); + d_dst.download(ocl_dst); + GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0); + } +} + void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit) { - if (src_roi.empty()) { CV_Error(CV_StsBadArg, "The input image is empty"); } - if (src_roi.depth() != CV_8U || src_roi.channels() != 4) { CV_Error(CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported"); } + dst_roi.create(src_roi.size(), src_roi.type()); + dstCoor_roi.create(src_roi.size(), CV_16SC2); + CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) && (src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows)); CV_Assert(!(dstCoor_roi.step & 0x3)); @@ -798,10 +814,11 @@ void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, } } -TEST(meanShiftProc) +PERFTEST(meanShiftProc) { - Mat src, dst, dstCoor_roi; - ocl::oclMat d_src, d_dst, d_dstCoor_roi; + Mat src; + vector dst(2), ocl_dst(2); + ocl::oclMat d_src, d_dst, d_dstCoor; TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1); @@ -810,40 +827,39 @@ TEST(meanShiftProc) SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 "; gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - gen(dstCoor_roi, size, size, CV_16SC2, Scalar::all(0), Scalar::all(256)); - meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); + meanShiftProc_(src, dst[0], dst[1], 5, 6, crit); CPU_ON; - meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); + meanShiftProc_(src, dst[0], dst[1], 5, 6, crit); CPU_OFF; d_src.upload(src); WARMUP_ON; - ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); + ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit); WARMUP_OFF; GPU_ON; - ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); - ; + ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit); GPU_OFF; GPU_FULL_ON; d_src.upload(src); - ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); - d_dst.download(dst); - d_dstCoor_roi.download(dstCoor_roi); + ocl::meanShiftProc(d_src, d_dst, d_dstCoor, 5, 6, crit); + d_dst.download(ocl_dst[0]); + d_dstCoor.download(ocl_dst[1]); GPU_FULL_OFF; + vector eps(2, 0.); + TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps); } } ///////////// remap//////////////////////// -TEST(remap) +PERFTEST(remap) { - Mat src, dst, xmap, ymap; + Mat src, dst, xmap, ymap, ocl_dst; ocl::oclMat d_src, d_dst, d_xmap, d_ymap; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -876,7 +892,6 @@ TEST(remap) } } - remap(src, dst, xmap, ymap, interpolation, borderMode); CPU_ON; @@ -894,15 +909,105 @@ TEST(remap) GPU_ON; ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 2.0); } } -} \ No newline at end of file +} +///////////// CLAHE //////////////////////// +PERFTEST(CLAHE) +{ + Mat src, dst, ocl_dst; + cv::ocl::oclMat d_src, d_dst; + int all_type[] = {CV_8UC1}; + std::string type_name[] = {"CV_8UC1"}; + + double clipLimit = 40.0; + + cv::Ptr clahe = cv::createCLAHE(clipLimit); + cv::Ptr d_clahe = cv::ocl::createCLAHE(clipLimit); + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], 0, 256); + + CPU_ON; + clahe->apply(src, dst); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + d_clahe->apply(d_src, d_dst); + WARMUP_OFF; + + ocl_dst = d_dst; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); + + GPU_ON; + d_clahe->apply(d_src, d_dst); + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_clahe->apply(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; + } + } +} + +///////////// columnSum//////////////////////// +PERFTEST(columnSum) +{ + Mat src, dst, ocl_dst; + ocl::oclMat d_src, d_dst; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + SUBTEST << size << 'x' << size << "; CV_32FC1"; + + gen(src, size, size, CV_32FC1, 0, 256); + + CPU_ON; + dst.create(src.size(), src.type()); + for (int j = 0; j < src.cols; j++) + dst.at(0, j) = src.at(0, j); + + for (int i = 1; i < src.rows; ++i) + for (int j = 0; j < src.cols; ++j) + dst.at(i, j) = dst.at(i - 1 , j) + src.at(i , j); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::columnSum(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::columnSum(d_src, d_dst); + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::columnSum(d_src, d_dst); + d_dst.download(ocl_dst); + GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1); + } +} diff --git a/modules/ocl/perf/perf_match_template.cpp b/modules/ocl/perf/perf_match_template.cpp index 2828efe01a..5da15aaf64 100644 --- a/modules/ocl/perf/perf_match_template.cpp +++ b/modules/ocl/perf/perf_match_template.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -52,14 +53,12 @@ // ocl::oclMat d_src(src), d_templ(templ), d_dst; // ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); //} -TEST(matchTemplate) +PERFTEST(matchTemplate) { //InitMatchTemplate(); - - Mat src, templ, dst; + Mat src, templ, dst, ocl_dst; int templ_size = 5; - for (int size = Min_Size; size <= Max_Size; size *= Multiple) { int all_type[] = {CV_32FC1, CV_32FC4}; @@ -81,9 +80,7 @@ TEST(matchTemplate) matchTemplate(src, templ, dst, CV_TM_CCORR); CPU_OFF; - ocl::oclMat d_src(src), d_templ, d_dst; - - d_templ.upload(templ); + ocl::oclMat d_src(src), d_templ(templ), d_dst; WARMUP_ON; ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); @@ -91,15 +88,16 @@ TEST(matchTemplate) GPU_ON; ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); d_templ.upload(templ); ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1); } } @@ -131,15 +129,16 @@ TEST(matchTemplate) GPU_ON; ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); d_templ.upload(templ); ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, templ.rows * templ.cols * 1e-1); } } } diff --git a/modules/ocl/perf/perf_matrix_operation.cpp b/modules/ocl/perf/perf_matrix_operation.cpp index 495b2b82cf..b724cdbe64 100644 --- a/modules/ocl/perf/perf_matrix_operation.cpp +++ b/modules/ocl/perf/perf_matrix_operation.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,9 +46,9 @@ #include "precomp.hpp" ///////////// ConvertTo//////////////////////// -TEST(ConvertTo) +PERFTEST(ConvertTo) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -78,22 +79,23 @@ TEST(ConvertTo) GPU_ON; d_src.convertTo(d_dst, CV_32FC1); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); d_src.convertTo(d_dst, CV_32FC1); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0); } } } ///////////// copyTo//////////////////////// -TEST(copyTo) +PERFTEST(copyTo) { - Mat src, dst; + Mat src, dst, ocl_dst; ocl::oclMat d_src, d_dst; int all_type[] = {CV_8UC1, CV_8UC4}; @@ -124,24 +126,25 @@ TEST(copyTo) GPU_ON; d_src.copyTo(d_dst); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); d_src.copyTo(d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0); } } } ///////////// setTo//////////////////////// -TEST(setTo) +PERFTEST(setTo) { - Mat src, dst; + Mat src, ocl_src; Scalar val(1, 2, 3, 4); - ocl::oclMat d_src, d_dst; + ocl::oclMat d_src; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; @@ -166,9 +169,11 @@ TEST(setTo) d_src.setTo(val); WARMUP_OFF; - GPU_ON; + d_src.download(ocl_src); + TestSystem::instance().ExpectedMatNear(src, ocl_src, 1.0); + + GPU_ON;; d_src.setTo(val); - ; GPU_OFF; GPU_FULL_ON; diff --git a/modules/ocl/perf/perf_pyrdown.cpp b/modules/ocl/perf/perf_moments.cpp similarity index 78% rename from modules/ocl/perf/perf_pyrdown.cpp rename to modules/ocl/perf/perf_moments.cpp index 1d1d2dec11..7fa3948dec 100644 --- a/modules/ocl/perf/perf_pyrdown.cpp +++ b/modules/ocl/perf/perf_moments.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -43,45 +44,49 @@ // //M*/ #include "precomp.hpp" - -///////////// pyrDown ////////////////////// -TEST(pyrDown) +///////////// Moments //////////////////////// +PERFTEST(Moments) { - Mat src, dst; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + Mat src; + bool binaryImage = 0; + + int all_type[] = {CV_8UC1, CV_16SC1, CV_32FC1, CV_64FC1}; + std::string type_name[] = {"CV_8UC1", "CV_16SC1", "CV_32FC1", "CV_64FC1"}; for (int size = Min_Size; size <= Max_Size; size *= Multiple) { for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; + SUBTEST << size << 'x' << size << "; " << type_name[j]; gen(src, size, size, all_type[j], 0, 256); - pyrDown(src, dst); + cv::Moments CvMom = moments(src, binaryImage); CPU_ON; - pyrDown(src, dst); + moments(src, binaryImage); CPU_OFF; - ocl::oclMat d_src(src); - ocl::oclMat d_dst; - + cv::Moments oclMom; WARMUP_ON; - ocl::pyrDown(d_src, d_dst); + oclMom = ocl::ocl_moments(src, binaryImage); WARMUP_OFF; + Mat gpu_dst, cpu_dst; + HuMoments(CvMom, cpu_dst); + HuMoments(oclMom, gpu_dst); + GPU_ON; - ocl::pyrDown(d_src, d_dst); - ; + ocl::ocl_moments(src, binaryImage); GPU_OFF; GPU_FULL_ON; - d_src.upload(src); - ocl::pyrDown(d_src, d_dst); - d_dst.download(dst); + ocl::ocl_moments(src, binaryImage); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(gpu_dst, cpu_dst, .5); + } + } -} \ No newline at end of file +} diff --git a/modules/ocl/perf/perf_norm.cpp b/modules/ocl/perf/perf_norm.cpp index 8b7118a6ea..1d986c8e49 100644 --- a/modules/ocl/perf/perf_norm.cpp +++ b/modules/ocl/perf/perf_norm.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,40 +46,42 @@ #include "precomp.hpp" ///////////// norm//////////////////////// -TEST(norm) +PERFTEST(norm) { - Mat src, buf; - ocl::oclMat d_src, d_buf; - + Mat src1, src2, ocl_src1; + ocl::oclMat d_src1, d_src2; for (int size = Min_Size; size <= Max_Size; size *= Multiple) { SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF"; - gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); - gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); + gen(src1, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); + gen(src2, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); - norm(src, NORM_INF); + norm(src1, src2, NORM_INF); CPU_ON; - norm(src, NORM_INF); + norm(src1, src2, NORM_INF); CPU_OFF; - d_src.upload(src); - d_buf.upload(buf); + d_src1.upload(src1); + d_src2.upload(src2); WARMUP_ON; - ocl::norm(d_src, d_buf, NORM_INF); + ocl::norm(d_src1, d_src2, NORM_INF); WARMUP_OFF; + d_src1.download(ocl_src1); + TestSystem::instance().ExpectedMatNear(src1, ocl_src1, .5); + GPU_ON; - ocl::norm(d_src, d_buf, NORM_INF); - ; + ocl::norm(d_src1, d_src2, NORM_INF); GPU_OFF; GPU_FULL_ON; - d_src.upload(src); - ocl::norm(d_src, d_buf, NORM_INF); + d_src1.upload(src1); + d_src2.upload(src2); + ocl::norm(d_src1, d_src2, NORM_INF); GPU_FULL_OFF; } } \ No newline at end of file diff --git a/modules/ocl/perf/perf_pyrlk.cpp b/modules/ocl/perf/perf_opticalflow.cpp similarity index 61% rename from modules/ocl/perf/perf_pyrlk.cpp rename to modules/ocl/perf/perf_opticalflow.cpp index f7fc22b9d0..97283b206c 100644 --- a/modules/ocl/perf/perf_pyrlk.cpp +++ b/modules/ocl/perf/perf_opticalflow.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,10 +46,10 @@ #include "precomp.hpp" ///////////// PyrLKOpticalFlow //////////////////////// -TEST(PyrLKOpticalFlow) +PERFTEST(PyrLKOpticalFlow) { - std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"}; - std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"}; + std::string images1[] = {"rubberwhale1.png", "basketball1.png"}; + std::string images2[] = {"rubberwhale2.png", "basketball2.png"}; for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++) { @@ -81,8 +82,8 @@ TEST(PyrLKOpticalFlow) SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points"; else SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points"; - Mat nextPts_cpu; - Mat status_cpu; + Mat ocl_nextPts; + Mat ocl_status; vector pts; goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0); @@ -117,7 +118,6 @@ TEST(PyrLKOpticalFlow) GPU_ON; d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); - ; GPU_OFF; GPU_FULL_ON; @@ -127,17 +127,102 @@ TEST(PyrLKOpticalFlow) d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); if (!d_nextPts.empty()) - { - d_nextPts.download(nextPts_cpu); - } + d_nextPts.download(ocl_nextPts); if (!d_status.empty()) - { - d_status.download(status_cpu); - } - + d_status.download(ocl_status); GPU_FULL_OFF; + + size_t mismatch = 0; + for (int i = 0; i < (int)nextPts.size(); ++i) + { + if(status[i] != ocl_status.at(0, i)){ + mismatch++; + continue; + } + if(status[i]){ + Point2f gpu_rst = ocl_nextPts.at(0, i); + Point2f cpu_rst = nextPts[i]; + if(fabs(gpu_rst.x - cpu_rst.x) >= 1. || fabs(gpu_rst.y - cpu_rst.y) >= 1.) + mismatch++; + } + } + double ratio = (double)mismatch / (double)nextPts.size(); + if(ratio < .02) + TestSystem::instance().setAccurate(1, ratio); + else + TestSystem::instance().setAccurate(0, ratio); } } } + + +PERFTEST(tvl1flow) +{ + cv::Mat frame0 = imread("rubberwhale1.png", cv::IMREAD_GRAYSCALE); + assert(!frame0.empty()); + + cv::Mat frame1 = imread("rubberwhale2.png", cv::IMREAD_GRAYSCALE); + assert(!frame1.empty()); + + cv::ocl::OpticalFlowDual_TVL1_OCL d_alg; + cv::ocl::oclMat d_flowx(frame0.size(), CV_32FC1); + cv::ocl::oclMat d_flowy(frame1.size(), CV_32FC1); + + cv::Ptr alg = cv::createOptFlow_DualTVL1(); + cv::Mat flow; + + + SUBTEST << frame0.cols << 'x' << frame0.rows << "; rubberwhale1.png; "<calc(frame0, frame1, flow); + + CPU_ON; + alg->calc(frame0, frame1, flow); + CPU_OFF; + + cv::Mat gold[2]; + cv::split(flow, gold); + + cv::ocl::oclMat d0(frame0.size(), CV_32FC1); + d0.upload(frame0); + cv::ocl::oclMat d1(frame1.size(), CV_32FC1); + d1.upload(frame1); + + WARMUP_ON; + d_alg(d0, d1, d_flowx, d_flowy); + WARMUP_OFF; +/* + double diff1 = 0.0, diff2 = 0.0; + if(ExceptedMatSimilar(gold[0], cv::Mat(d_flowx), 3e-3, diff1) == 1 + &&ExceptedMatSimilar(gold[1], cv::Mat(d_flowy), 3e-3, diff2) == 1) + TestSystem::instance().setAccurate(1); + else + TestSystem::instance().setAccurate(0); + + TestSystem::instance().setDiff(diff1); + TestSystem::instance().setDiff(diff2); +*/ + + + GPU_ON; + d_alg(d0, d1, d_flowx, d_flowy); + d_alg.collectGarbage(); + GPU_OFF; + + + cv::Mat flowx, flowy; + + GPU_FULL_ON; + d0.upload(frame0); + d1.upload(frame1); + d_alg(d0, d1, d_flowx, d_flowy); + d_alg.collectGarbage(); + d_flowx.download(flowx); + d_flowy.download(flowy); + GPU_FULL_OFF; + + TestSystem::instance().ExceptedMatSimilar(gold[0], flowx, 3e-3); + TestSystem::instance().ExceptedMatSimilar(gold[1], flowy, 3e-3); +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_pyrup.cpp b/modules/ocl/perf/perf_pyramid.cpp similarity index 70% rename from modules/ocl/perf/perf_pyrup.cpp rename to modules/ocl/perf/perf_pyramid.cpp index d3b3003a2e..3b96251e5d 100644 --- a/modules/ocl/perf/perf_pyrup.cpp +++ b/modules/ocl/perf/perf_pyramid.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -44,10 +45,53 @@ //M*/ #include "precomp.hpp" -///////////// pyrUp //////////////////////// -TEST(pyrUp) +///////////// pyrDown ////////////////////// +PERFTEST(pyrDown) { - Mat src, dst; + Mat src, dst, ocl_dst; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], 0, 256); + + pyrDown(src, dst); + + CPU_ON; + pyrDown(src, dst); + CPU_OFF; + + ocl::oclMat d_src(src); + ocl::oclMat d_dst; + + WARMUP_ON; + ocl::pyrDown(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::pyrDown(d_src, d_dst); + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::pyrDown(d_src, d_dst); + d_dst.download(ocl_dst); + GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, dst.depth() == CV_32F ? 1e-4f : 1.0f); + } + } +} + +///////////// pyrUp //////////////////////// +PERFTEST(pyrUp) +{ + Mat src, dst, ocl_dst; int all_type[] = {CV_8UC1, CV_8UC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; @@ -74,14 +118,15 @@ TEST(pyrUp) GPU_ON; ocl::pyrUp(d_src, d_dst); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::pyrUp(d_src, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, (src.depth() == CV_32F ? 1e-4f : 1.0)); } } } \ No newline at end of file diff --git a/modules/ocl/perf/perf_split_merge.cpp b/modules/ocl/perf/perf_split_merge.cpp index 48ff1ff15a..0fafd14aba 100644 --- a/modules/ocl/perf/perf_split_merge.cpp +++ b/modules/ocl/perf/perf_split_merge.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,9 +46,9 @@ #include "precomp.hpp" ///////////// Merge//////////////////////// -TEST(Merge) +PERFTEST(Merge) { - Mat dst; + Mat dst, ocl_dst; ocl::oclMat d_dst; int channels = 4; @@ -86,26 +87,25 @@ TEST(Merge) GPU_ON; ocl::merge(d_src, d_dst); - ; GPU_OFF; GPU_FULL_ON; - for (int i = 0; i < channels; ++i) { - d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i)); + d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i)); } - ocl::merge(d_src, d_dst); - d_dst.download(dst); + d_dst.download(ocl_dst); GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 0.0); } } } ///////////// Split//////////////////////// -TEST(Split) +PERFTEST(Split) { //int channels = 4; int all_type[] = {CV_8UC1, CV_32FC1}; @@ -120,7 +120,7 @@ TEST(Split) Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4)); - std::vector dst; + std::vector dst, ocl_dst(4); split(src, dst); @@ -133,17 +133,21 @@ TEST(Split) WARMUP_ON; ocl::split(d_src, d_dst); - WARMUP_OFF; + WARMUP_OFF; GPU_ON; ocl::split(d_src, d_dst); - ; GPU_OFF; GPU_FULL_ON; d_src.upload(src); ocl::split(d_src, d_dst); + for(size_t i = 0; i < dst.size(); i++) + d_dst[i].download(ocl_dst[i]); GPU_FULL_OFF; + + vector eps(4, 0.); + TestSystem::instance().ExpectMatsNear(dst, ocl_dst, eps); } } diff --git a/modules/ocl/perf/precomp.cpp b/modules/ocl/perf/precomp.cpp index e35a071450..dd3b5e4ea1 100644 --- a/modules/ocl/perf/precomp.cpp +++ b/modules/ocl/perf/precomp.cpp @@ -41,6 +41,12 @@ //M*/ #include "precomp.hpp" +#if GTEST_OS_WINDOWS +#ifndef NOMINMAX +#define NOMINMAX +#endif +# include +#endif // This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files // All images needed in this test are in samples/gpu folder. @@ -166,7 +172,7 @@ void TestSystem::finishCurrentSubtest() deviation = std::sqrt(sum / gpu_times_.size()); } - printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup); + printMetrics(is_accurate_, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup); writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation); num_subtests_called_++; @@ -184,10 +190,19 @@ double TestSystem::meanTime(const vector &samples) void TestSystem::printHeading() { cout << endl; - cout << setiosflags(ios_base::left); - cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms" - << setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP" - << "DESCRIPTION\n"; + cout<< setiosflags(ios_base::left); + +#if 0 + cout<(0, 0) - 1.f); +} + + + diff --git a/modules/ocl/perf/precomp.hpp b/modules/ocl/perf/precomp.hpp index c2cf1238ef..97e3d7e5c6 100644 --- a/modules/ocl/perf/precomp.hpp +++ b/modules/ocl/perf/precomp.hpp @@ -50,10 +50,15 @@ #include "opencv2/core/core.hpp" #include "opencv2/imgproc/imgproc.hpp" #include "opencv2/highgui/highgui.hpp" +#include "opencv2/calib3d/calib3d.hpp" #include "opencv2/video/video.hpp" #include "opencv2/objdetect/objdetect.hpp" #include "opencv2/features2d/features2d.hpp" #include "opencv2/ocl/ocl.hpp" +#include "opencv2/ts/ts.hpp" +#include "opencv2/ts/ts_perf.hpp" +#include "opencv2/ts/ts_gtest.h" + #define Min_Size 1000 #define Max_Size 4000 @@ -64,6 +69,8 @@ using namespace std; using namespace cv; void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high); +void gen(Mat &mat, int rows, int cols, int type, int low, int high, int n); + string abspath(const string &relpath); int CV_CDECL cvErrorCallback(int, const char *, const char *, const char *, int, void *); typedef struct @@ -76,6 +83,50 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit); + +template +int ExpectedEQ(T1 expected, T2 actual) +{ + if(expected == actual) + return 1; + + return 0; +} + +template +int EeceptDoubleEQ(T1 expected, T1 actual) +{ + testing::internal::Double lhs(expected); + testing::internal::Double rhs(actual); + + if (lhs.AlmostEquals(rhs)) + { + return 1; + } + + return 0; +} + +template +int AssertEQ(T expected, T actual) +{ + if(expected == actual) + { + return 1; + } + return 0; +} + +int ExceptDoubleNear(double val1, double val2, double abs_error); +bool match_rect(cv::Rect r1, cv::Rect r2, int threshold); + +double checkNorm(const cv::Mat &m); +double checkNorm(const cv::Mat &m1, const cv::Mat &m2); +double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2); + +int ExpectedMatNear(cv::Mat dst, cv::Mat cpu_dst, double eps); +int ExceptedMatSimilar(cv::Mat dst, cv::Mat cpu_dst, double eps); + class Runnable { public: @@ -171,6 +222,16 @@ public: return cur_iter_idx_ >= cpu_num_iters_; } + int get_cur_iter_idx() + { + return cur_iter_idx_; + } + + int get_cpu_num_iters() + { + return cpu_num_iters_; + } + bool warmupStop() { return cur_warmup_idx_++ >= gpu_warmup_iters_; @@ -252,6 +313,53 @@ public: itname_changed_ = true; } + void setAccurate(int accurate, double diff) + { + is_accurate_ = accurate; + accurate_diff_ = diff; + } + + void ExpectMatsNear(vector& dst, vector& cpu_dst, vector& eps) + { + assert(dst.size() == cpu_dst.size()); + assert(cpu_dst.size() == eps.size()); + is_accurate_ = 1; + for(size_t i=0; i eps[i]) + is_accurate_ = 0; + } + } + + void ExpectedMatNear(cv::Mat& dst, cv::Mat& cpu_dst, double eps) + { + assert(dst.type() == cpu_dst.type()); + assert(dst.size() == cpu_dst.size()); + accurate_diff_ = checkNorm(dst, cpu_dst); + if(accurate_diff_ <= eps) + is_accurate_ = 1; + else + is_accurate_ = 0; + } + + void ExceptedMatSimilar(cv::Mat& dst, cv::Mat& cpu_dst, double eps) + { + assert(dst.type() == cpu_dst.type()); + assert(dst.size() == cpu_dst.size()); + accurate_diff_ = checkSimilarity(cpu_dst, dst); + if(accurate_diff_ <= eps) + is_accurate_ = 1; + else + is_accurate_ = 0; + } + + std::stringstream &getCurSubtestDescription() + { + return cur_subtest_description_; + } + private: TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0), @@ -261,7 +369,8 @@ private: speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false), num_iters_(10), cpu_num_iters_(2), gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0), - record_(0), recordname_("performance"), itname_changed_(true) + record_(0), recordname_("performance"), itname_changed_(true), + is_accurate_(-1), accurate_diff_(0.) { cpu_times_.reserve(num_iters_); gpu_times_.reserve(num_iters_); @@ -277,16 +386,19 @@ private: cur_subtest_description_.str(""); cur_subtest_is_empty_ = true; cur_iter_idx_ = 0; + cur_warmup_idx_ = 0; cpu_times_.clear(); gpu_times_.clear(); gpu_full_times_.clear(); + is_accurate_ = -1; + accurate_diff_ = 0.; } double meanTime(const std::vector &samples); void printHeading(); void printSummary(); - void printMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f); + void printMetrics(int is_accurate, double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f); void writeHeading(); void writeSummary(); @@ -340,6 +452,9 @@ private: std::string recordname_; std::string itname_; bool itname_changed_; + + int is_accurate_; + double accurate_diff_; }; @@ -353,7 +468,7 @@ struct name##_init: Runnable { \ void name##_init::run() -#define TEST(name) \ +#define PERFTEST(name) \ struct name##_test: Runnable { \ name##_test(): Runnable(#name) { \ TestSystem::instance().addTest(this); \ @@ -375,7 +490,7 @@ struct name##_test: Runnable { \ while (!TestSystem::instance().stop()) { \ TestSystem::instance().gpuOn() #define GPU_OFF \ - ocl::finish(); \ + ocl::finish();\ TestSystem::instance().gpuOff(); \ } TestSystem::instance().gpuComplete() @@ -389,5 +504,5 @@ struct name##_test: Runnable { \ #define WARMUP_ON \ while (!TestSystem::instance().warmupStop()) { #define WARMUP_OFF \ - ocl::finish(); \ + ocl::finish();\ } TestSystem::instance().warmupComplete() diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index d679a93480..49a56ceabb 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -22,6 +22,7 @@ // Jiang Liyuan, jlyuan001.good@163.com // Rock Li, Rock.Li@amd.com // Zailong Wu, bullet@yeah.net +// Peng Xiao, pengxiao@outlook.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -286,6 +287,7 @@ void cv::ocl::multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, doub else arithmetic_run(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar)); } + void cv::ocl::divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar) { @@ -411,11 +413,11 @@ static void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelN args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); + float f_scalar = (float)scalar; if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) args.push_back( make_pair( sizeof(cl_double), (void *)&scalar )); else { - float f_scalar = (float)scalar; args.push_back( make_pair( sizeof(cl_float), (void *)&f_scalar)); } @@ -468,6 +470,11 @@ void cv::ocl::subtract(const Scalar &src2, const oclMat &src1, oclMat &dst, cons const char **kernelString = mask.data ? &arithm_add_scalar_mask : &arithm_add_scalar; arithmetic_scalar( src1, src2, dst, mask, kernelName, kernelString, -1); } +void cv::ocl::multiply(double scalar, const oclMat &src, oclMat &dst) +{ + string kernelName = "arithm_muls"; + arithmetic_scalar_run( src, dst, kernelName, &arithm_mul, scalar); +} void cv::ocl::divide(double scalar, const oclMat &src, oclMat &dst) { if(!src.clCxt->supportsFeature(Context::CL_DOUBLE)) @@ -775,45 +782,55 @@ static void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl } } -template void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) +template void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, + const oclMat &mask, oclMat &buf) { size_t groupnum = src.clCxt->computeUnits(); CV_Assert(groupnum != 0); groupnum = groupnum * 2; int vlen = 8; int dbsize = groupnum * 2 * vlen * sizeof(T) ; - Context *clCxt = src.clCxt; - cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize); - *minVal = std::numeric_limits::max() , *maxVal = -std::numeric_limits::max(); + + ensureSizeIsEnough(1, dbsize, CV_8UC1, buf); + + cl_mem buf_data = reinterpret_cast(buf.data); + if (mask.empty()) { - arithmetic_minMax_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax"); + arithmetic_minMax_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax"); } else { - arithmetic_minMax_mask_run(src, mask, dstBuffer, vlen, groupnum, "arithm_op_minMax_mask"); + arithmetic_minMax_mask_run(src, mask, buf_data, vlen, groupnum, "arithm_op_minMax_mask"); } - T *p = new T[groupnum * vlen * 2]; - memset(p, 0, dbsize); - openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize); - if(minVal != NULL){ + + Mat matbuf = Mat(buf); + T *p = matbuf.ptr(); + if(minVal != NULL) + { + *minVal = std::numeric_limits::max(); for(int i = 0; i < vlen * (int)groupnum; i++) { *minVal = *minVal < p[i] ? *minVal : p[i]; } } - if(maxVal != NULL){ + if(maxVal != NULL) + { + *maxVal = -std::numeric_limits::max(); for(int i = vlen * (int)groupnum; i < 2 * vlen * (int)groupnum; i++) { *maxVal = *maxVal > p[i] ? *maxVal : p[i]; } } - delete[] p; - openCLFree(dstBuffer); } -typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask); +typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf); void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) +{ + oclMat buf; + minMax_buf(src, minVal, maxVal, mask, buf); +} +void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat &buf) { CV_Assert(src.oclchannels() == 1); if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) @@ -833,7 +850,7 @@ void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oc }; minMaxFunc func; func = functab[src.depth()]; - func(src, minVal, maxVal, mask); + func(src, minVal, maxVal, mask, buf); } ////////////////////////////////////////////////////////////////////////////// @@ -1680,10 +1697,11 @@ void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string ker args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); + T scalar; if(_scalar != NULL) { double scalar1 = *((double *)_scalar); - T scalar = (T)scalar1; + scalar = (T)scalar1; args.push_back( make_pair( sizeof(T), (void *)&scalar )); } @@ -2300,9 +2318,9 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); + float pf = p; if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE)) { - float pf = p; args.push_back( make_pair( sizeof(cl_float), (void *)&pf )); } else diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp index c12fa73064..74da6ddd06 100644 --- a/modules/ocl/src/brute_force_matcher.cpp +++ b/modules/ocl/src/brute_force_matcher.cpp @@ -245,11 +245,12 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, const oclM { const oclMat zeroMask; const oclMat &tempMask = mask.data ? mask : zeroMask; + bool is_cpu = queryDeviceInfo(); if (query.cols <= 64) { matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType); } - else if (query.cols <= 128) + else if (query.cols <= 128 && !is_cpu) { matchUnrolledCached<16, 128>(query, train, tempMask, trainIdx, distance, distType); } @@ -264,11 +265,12 @@ static void matchDispatcher(const oclMat &query, const oclMat *trains, int n, co { const oclMat zeroMask; const oclMat &tempMask = mask.data ? mask : zeroMask; + bool is_cpu = queryDeviceInfo(); if (query.cols <= 64) { matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType); } - else if (query.cols <= 128) + else if (query.cols <= 128 && !is_cpu) { matchUnrolledCached<16, 128>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType); } @@ -284,11 +286,12 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, float maxD { const oclMat zeroMask; const oclMat &tempMask = mask.data ? mask : zeroMask; + bool is_cpu = queryDeviceInfo(); if (query.cols <= 64) { matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType); } - else if (query.cols <= 128) + else if (query.cols <= 128 && !is_cpu) { matchUnrolledCached<16, 128>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType); } @@ -466,11 +469,12 @@ static void calcDistanceDispatcher(const oclMat &query, const oclMat &train, con static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &trainIdx, const oclMat &distance, int distType) { + bool is_cpu = queryDeviceInfo(); if (query.cols <= 64) { knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType); } - else if (query.cols <= 128) + else if (query.cols <= 128 && !is_cpu) { knn_matchUnrolledCached<16, 128>(query, train, mask, trainIdx, distance, distType); } diff --git a/modules/ocl/src/canny.cpp b/modules/ocl/src/canny.cpp index cc7e60e0d9..82bb01bfdc 100644 --- a/modules/ocl/src/canny.cpp +++ b/modules/ocl/src/canny.cpp @@ -87,7 +87,7 @@ void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size) filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE); } } - ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, edgeBuf); + ensureSizeIsEnough(2 * (image_size.height + 2), image_size.width + 2, CV_32FC1, edgeBuf); ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf1); ensureSizeIsEnough(1, image_size.width * image_size.height, CV_16UC2, trackBuf2); @@ -141,13 +141,16 @@ namespace void CannyCaller(CannyBuf &buf, oclMat &dst, float low_thresh, float high_thresh) { using namespace ::cv::ocl::canny; - calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh); + oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2)); + oclMat mapBuf = buf.edgeBuf(Rect(0, buf.edgeBuf.rows / 2, buf.edgeBuf.cols, buf.edgeBuf.rows / 2)); - edgesHysteresisLocal_gpu(buf.edgeBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols); + calcMap_gpu(buf.dx, buf.dy, magBuf, mapBuf, dst.rows, dst.cols, low_thresh, high_thresh); - edgesHysteresisGlobal_gpu(buf.edgeBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols); + edgesHysteresisLocal_gpu(mapBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols); - getEdges_gpu(buf.edgeBuf, dst, dst.rows, dst.cols); + edgesHysteresisGlobal_gpu(mapBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols); + + getEdges_gpu(mapBuf, dst, dst.rows, dst.cols); } } @@ -172,18 +175,20 @@ void cv::ocl::Canny(const oclMat &src, CannyBuf &buf, oclMat &dst, double low_th buf.create(src.size(), apperture_size); buf.edgeBuf.setTo(Scalar::all(0)); + oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2)); + if (apperture_size == 3) { calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols); - calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient); + calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient); } else { buf.filterDX->apply(src, buf.dx); buf.filterDY->apply(src, buf.dy); - calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, src.rows, src.cols, L2gradient); + calcMagnitude_gpu(buf.dx, buf.dy, magBuf, src.rows, src.cols, L2gradient); } CannyCaller(buf, dst, static_cast(low_thresh), static_cast(high_thresh)); } @@ -209,7 +214,10 @@ void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &d buf.dy = dy; buf.create(dx.size(), -1); buf.edgeBuf.setTo(Scalar::all(0)); - calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient); + + oclMat magBuf = buf.edgeBuf(Rect(0, 0, buf.edgeBuf.cols, buf.edgeBuf.rows / 2)); + + calcMagnitude_gpu(buf.dx, buf.dy, magBuf, dx.rows, dx.cols, L2gradient); CannyCaller(buf, dst, static_cast(low_thresh), static_cast(high_thresh)); } @@ -234,7 +242,7 @@ void canny::calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_b size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad) @@ -264,12 +272,8 @@ void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; - char build_options [15] = ""; - if(L2Grad) - { - strcat(build_options, "-D L2GRAD"); - } - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); + const char * build_options = L2Grad ? "-D L2GRAD":""; + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); } void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad) { @@ -292,12 +296,8 @@ void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, i size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; - char build_options [15] = ""; - if(L2Grad) - { - strcat(build_options, "-D L2GRAD"); - } - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); + const char * build_options = L2Grad ? "-D L2GRAD":""; + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); } void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh) @@ -328,7 +328,7 @@ void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int ro string kernelName = "calcMap"; size_t localThreads[3] = {16, 16, 1}; - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols) @@ -348,7 +348,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols) @@ -378,7 +378,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi args.push_back( make_pair( sizeof(cl_int), (void *)&map.step)); args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset)); - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, DISABLE); + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL)); std::swap(st1, st2); } @@ -403,5 +403,5 @@ void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols) size_t globalThreads[3] = {cols, rows, 1}; size_t localThreads[3] = {16, 16, 1}; - openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index cc07209b15..f35a26e332 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -356,8 +356,7 @@ static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, char compile_option[128]; sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s", anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], - rectKernel?"-D RECTKERNEL":"", - s); + s, rectKernel?"-D RECTKERNEL":""); vector< pair > args; args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data)); @@ -646,7 +645,11 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols)); args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows)); - openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth); + const int buffer_size = 100; + char opt_buffer [buffer_size] = ""; + sprintf(opt_buffer, "-DANCHOR=%d -DANX=%d -DANY=%d", ksize.width, anchor.x, anchor.y); + + openCLExecuteKernel(clCxt, &filtering_laplacian, kernelName, globalThreads, localThreads, args, cn, depth, opt_buffer); } Ptr cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize, Point anchor, int borderType) @@ -657,7 +660,7 @@ Ptr cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const oclMat gpu_krnl; int nDivisor; - normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true); + normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, false); normalizeAnchor(anchor, ksize); return Ptr(new LinearFilter_GPU(ksize, anchor, gpu_krnl, GPUFilter2D_callers[CV_MAT_CN(srcType)], @@ -1173,7 +1176,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel args.push_back(make_pair(sizeof(cl_int), (void *)&ridusy)); args.push_back(make_pair(sizeof(cl_mem), (void *)&mat_kernel.data)); - openCLExecuteKernel2(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option, CLFLUSH); + openCLExecuteKernel(clCxt, &filter_sep_row, kernelName, globalThreads, localThreads, args, channels, src.depth(), compile_option); } Ptr cv::ocl::getLinearRowFilter_GPU(int srcType, int /*bufType*/, const Mat &rowKernel, int anchor, int bordertype) diff --git a/modules/ocl/src/gfft.cpp b/modules/ocl/src/gfft.cpp new file mode 100644 index 0000000000..7fd5e3a174 --- /dev/null +++ b/modules/ocl/src/gfft.cpp @@ -0,0 +1,352 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@outlook.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include +#include "precomp.hpp" + +using namespace cv; +using namespace cv::ocl; + +static bool use_cpu_sorter = true; + +namespace cv +{ + namespace ocl + { + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char *imgproc_gfft; + } +} + +namespace +{ +enum SortMethod +{ + CPU_STL, + BITONIC, + SELECTION +}; + +const int GROUP_SIZE = 256; + +template +struct Sorter +{ + //typedef EigType; +}; + +//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed. +template<> +struct Sorter +{ + typedef oclMat EigType; + static cv::Mutex cs; + static Mat mat_eig; + + //prototype + static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2) + { + float v1 = mat_eig.at(cvRound(pt1.s[1]), cvRound(pt1.s[0])); + float v2 = mat_eig.at(cvRound(pt2.s[1]), cvRound(pt2.s[0])); + return v1 > v2; + } + static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) + { + cv::AutoLock lock(cs); + //temporarily use STL's sort function + Mat mat_corners = corners; + mat_eig = eig_tex; + std::sort(mat_corners.begin(), mat_corners.begin() + count, clfloat2Gt); + corners = mat_corners; + } +}; +cv::Mutex Sorter::cs; +cv::Mat Sorter::mat_eig; + +template<> +struct Sorter +{ + typedef TextureCL EigType; + + static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) + { + Context * cxt = Context::getContext(); + size_t globalThreads[3] = {count / 2, 1, 1}; + size_t localThreads[3] = {GROUP_SIZE, 1, 1}; + + // 2^numStages should be equal to count or the output is invalid + int numStages = 0; + for(int i = count; i > 1; i >>= 1) + { + ++numStages; + } + const int argc = 5; + std::vector< std::pair > args(argc); + std::string kernelname = "sortCorners_bitonicSort"; + args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex); + args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data); + args[2] = std::make_pair(sizeof(cl_int), (void *)&count); + for(int stage = 0; stage < numStages; ++stage) + { + args[3] = std::make_pair(sizeof(cl_int), (void *)&stage); + for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage) + { + args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage); + openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); + } + } + } +}; + +template<> +struct Sorter +{ + typedef TextureCL EigType; + + static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) + { + Context * cxt = Context::getContext(); + + size_t globalThreads[3] = {count, 1, 1}; + size_t localThreads[3] = {GROUP_SIZE, 1, 1}; + + std::vector< std::pair > args; + //local + std::string kernelname = "sortCorners_selectionSortLocal"; + int lds_size = GROUP_SIZE * sizeof(cl_float2); + args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) ); + args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) ); + args.push_back( std::make_pair( lds_size, (void*)NULL) ); + + openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); + + //final + kernelname = "sortCorners_selectionSortFinal"; + args.pop_back(); + openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1); + } +}; + +int findCorners_caller( + const TextureCL& eig, + const float threshold, + const oclMat& mask, + oclMat& corners, + const int max_count) +{ + std::vector k; + Context * cxt = Context::getContext(); + + std::vector< std::pair > args; + std::string kernelname = "findCorners"; + + const int mask_strip = mask.step / mask.elemSize1(); + + oclMat g_counter(1, 1, CV_32SC1); + g_counter.setTo(0); + + args.push_back(make_pair( sizeof(cl_mem), (void*)&eig )); + args.push_back(make_pair( sizeof(cl_mem), (void*)&mask.data )); + args.push_back(make_pair( sizeof(cl_mem), (void*)&corners.data )); + args.push_back(make_pair( sizeof(cl_int), (void*)&mask_strip)); + args.push_back(make_pair( sizeof(cl_float), (void*)&threshold )); + args.push_back(make_pair( sizeof(cl_int), (void*)&eig.rows )); + args.push_back(make_pair( sizeof(cl_int), (void*)&eig.cols )); + args.push_back(make_pair( sizeof(cl_int), (void*)&max_count )); + args.push_back(make_pair( sizeof(cl_mem), (void*)&g_counter.data )); + + size_t globalThreads[3] = {eig.cols, eig.rows, 1}; + size_t localThreads[3] = {16, 16, 1}; + + const char * opt = mask.empty() ? "" : "-D WITH_MASK"; + openCLExecuteKernel(cxt, &imgproc_gfft, kernelname, globalThreads, localThreads, args, -1, -1, opt); + return std::min(Mat(g_counter).at(0), max_count); +} +}//unnamed namespace + +void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask) +{ + CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0); + CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); + + CV_DbgAssert(support_image2d()); + + ensureSizeIsEnough(image.size(), CV_32F, eig_); + + if (useHarrisDetector) + cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK); + else + cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3); + + double maxVal = 0; + minMax_buf(eig_, 0, &maxVal, oclMat(), minMaxbuf_); + + ensureSizeIsEnough(1, std::max(1000, static_cast(image.size().area() * 0.05)), CV_32FC2, tmpCorners_); + + Ptr eig_tex = bindTexturePtr(eig_); + int total = findCorners_caller( + *eig_tex, + static_cast(maxVal * qualityLevel), + mask, + tmpCorners_, + tmpCorners_.cols); + + if (total == 0) + { + corners.release(); + return; + } + if(use_cpu_sorter) + { + Sorter::sortCorners_caller(eig_, tmpCorners_, total); + } + else + { + //if total is power of 2 + if(((total - 1) & (total)) == 0) + { + Sorter::sortCorners_caller(*eig_tex, tmpCorners_, total); + } + else + { + Sorter::sortCorners_caller(*eig_tex, tmpCorners_, total); + } + } + + if (minDistance < 1) + { + Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1); + tmpCorners_(roi_range).copyTo(corners); + } + else + { + vector tmp(total); + downloadPoints(tmpCorners_, tmp); + + vector tmp2; + tmp2.reserve(total); + + const int cell_size = cvRound(minDistance); + const int grid_width = (image.cols + cell_size - 1) / cell_size; + const int grid_height = (image.rows + cell_size - 1) / cell_size; + + std::vector< std::vector > grid(grid_width * grid_height); + + for (int i = 0; i < total; ++i) + { + Point2f p = tmp[i]; + + bool good = true; + + int x_cell = static_cast(p.x / cell_size); + int y_cell = static_cast(p.y / cell_size); + + int x1 = x_cell - 1; + int y1 = y_cell - 1; + int x2 = x_cell + 1; + int y2 = y_cell + 1; + + // boundary check + x1 = std::max(0, x1); + y1 = std::max(0, y1); + x2 = std::min(grid_width - 1, x2); + y2 = std::min(grid_height - 1, y2); + + for (int yy = y1; yy <= y2; yy++) + { + for (int xx = x1; xx <= x2; xx++) + { + vector& m = grid[yy * grid_width + xx]; + + if (!m.empty()) + { + for(size_t j = 0; j < m.size(); j++) + { + float dx = p.x - m[j].x; + float dy = p.y - m[j].y; + + if (dx * dx + dy * dy < minDistance * minDistance) + { + good = false; + goto break_out; + } + } + } + } + } + + break_out: + + if(good) + { + grid[y_cell * grid_width + x_cell].push_back(p); + + tmp2.push_back(p); + + if (maxCorners > 0 && tmp2.size() == static_cast(maxCorners)) + break; + } + } + + corners.upload(Mat(1, static_cast(tmp2.size()), CV_32FC2, &tmp2[0])); + } +} +void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, vector &points_v) +{ + CV_DbgAssert(points.type() == CV_32FC2); + points_v.resize(points.cols); + openCLSafeCall(clEnqueueReadBuffer( + *reinterpret_cast(getoclCommandQueue()), + reinterpret_cast(points.data), + CL_TRUE, + 0, + points.cols * sizeof(Point2f), + &points_v[0], + 0, + NULL, + NULL)); +} + + diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index 5afe5423ed..6283ac8d9f 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -137,47 +137,22 @@ struct CvHidHaarClassifierCascade }; typedef struct { - //int rows; - //int ystep; int width_height; - //int height; int grpnumperline_totalgrp; - //int totalgrp; int imgoff; float factor; } detect_piramid_info; - -#if defined WIN32 && !defined __MINGW__ && !defined __MINGW32__ +#ifdef _MSC_VER #define _ALIGNED_ON(_ALIGNMENT) __declspec(align(_ALIGNMENT)) -typedef _ALIGNED_ON(128) struct GpuHidHaarFeature -{ - _ALIGNED_ON(32) struct - { - _ALIGNED_ON(4) int p0 ; - _ALIGNED_ON(4) int p1 ; - _ALIGNED_ON(4) int p2 ; - _ALIGNED_ON(4) int p3 ; - _ALIGNED_ON(4) float weight ; - } - /*_ALIGNED_ON(32)*/ rect[CV_HAAR_FEATURE_MAX] ; -} -GpuHidHaarFeature; - typedef _ALIGNED_ON(128) struct GpuHidHaarTreeNode { _ALIGNED_ON(64) int p[CV_HAAR_FEATURE_MAX][4]; - //_ALIGNED_ON(16) int p1[CV_HAAR_FEATURE_MAX] ; - //_ALIGNED_ON(16) int p2[CV_HAAR_FEATURE_MAX] ; - //_ALIGNED_ON(16) int p3[CV_HAAR_FEATURE_MAX] ; - /*_ALIGNED_ON(16)*/ float weight[CV_HAAR_FEATURE_MAX] ; - /*_ALIGNED_ON(4)*/ float threshold ; - _ALIGNED_ON(8) float alpha[2] ; + _ALIGNED_ON(16) float alpha[3] ; _ALIGNED_ON(4) int left ; _ALIGNED_ON(4) int right ; - // GpuHidHaarFeature feature __attribute__((aligned (128))); } GpuHidHaarTreeNode; @@ -185,7 +160,6 @@ GpuHidHaarTreeNode; typedef _ALIGNED_ON(32) struct GpuHidHaarClassifier { _ALIGNED_ON(4) int count; - //CvHaarFeature* orig_feature; _ALIGNED_ON(8) GpuHidHaarTreeNode *node ; _ALIGNED_ON(8) float *alpha ; } @@ -220,32 +194,16 @@ typedef _ALIGNED_ON(64) struct GpuHidHaarClassifierCascade _ALIGNED_ON(4) int p2 ; _ALIGNED_ON(4) int p3 ; _ALIGNED_ON(4) float inv_window_area ; - // GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8))); } GpuHidHaarClassifierCascade; #else #define _ALIGNED_ON(_ALIGNMENT) __attribute__((aligned(_ALIGNMENT) )) -typedef struct _ALIGNED_ON(128) GpuHidHaarFeature -{ - struct _ALIGNED_ON(32) -{ - int p0 _ALIGNED_ON(4); - int p1 _ALIGNED_ON(4); - int p2 _ALIGNED_ON(4); - int p3 _ALIGNED_ON(4); - float weight _ALIGNED_ON(4); -} -rect[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(32); -} -GpuHidHaarFeature; - - typedef struct _ALIGNED_ON(128) GpuHidHaarTreeNode { int p[CV_HAAR_FEATURE_MAX][4] _ALIGNED_ON(64); float weight[CV_HAAR_FEATURE_MAX];// _ALIGNED_ON(16); float threshold;// _ALIGNED_ON(4); - float alpha[2] _ALIGNED_ON(8); + float alpha[3] _ALIGNED_ON(16); int left _ALIGNED_ON(4); int right _ALIGNED_ON(4); } @@ -288,7 +246,6 @@ typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade int p2 _ALIGNED_ON(4); int p3 _ALIGNED_ON(4); float inv_window_area _ALIGNED_ON(4); - // GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8))); } GpuHidHaarClassifierCascade; #endif @@ -296,36 +253,6 @@ const int icv_object_win_border = 1; const float icv_stage_threshold_bias = 0.0001f; double globaltime = 0; - -// static CvHaarClassifierCascade * gpuCreateHaarClassifierCascade( int stage_count ) -// { -// CvHaarClassifierCascade *cascade = 0; - -// int block_size = sizeof(*cascade) + stage_count * sizeof(*cascade->stage_classifier); - -// if( stage_count <= 0 ) -// CV_Error( CV_StsOutOfRange, "Number of stages should be positive" ); - -// cascade = (CvHaarClassifierCascade *)cvAlloc( block_size ); -// memset( cascade, 0, block_size ); - -// cascade->stage_classifier = (CvHaarStageClassifier *)(cascade + 1); -// cascade->flags = CV_HAAR_MAGIC_VAL; -// cascade->count = stage_count; - -// return cascade; -// } - -//static int globalcounter = 0; - -// static void gpuReleaseHidHaarClassifierCascade( GpuHidHaarClassifierCascade **_cascade ) -// { -// if( _cascade && *_cascade ) -// { -// cvFree( _cascade ); -// } -// } - /* create more efficient internal representation of haar classifier cascade */ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, int *totalclassifier) { @@ -441,24 +368,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl hid_stage_classifier->two_rects = 1; haar_classifier_ptr += stage_classifier->count; - /* - hid_stage_classifier->parent = (stage_classifier->parent == -1) - ? NULL : stage_classifier_ptr + stage_classifier->parent; - hid_stage_classifier->next = (stage_classifier->next == -1) - ? NULL : stage_classifier_ptr + stage_classifier->next; - hid_stage_classifier->child = (stage_classifier->child == -1) - ? NULL : stage_classifier_ptr + stage_classifier->child; - - out->is_tree |= hid_stage_classifier->next != NULL; - */ - for( j = 0; j < stage_classifier->count; j++ ) { CvHaarClassifier *classifier = stage_classifier->classifier + j; GpuHidHaarClassifier *hid_classifier = hid_stage_classifier->classifier + j; int node_count = classifier->count; - // float* alpha_ptr = (float*)(haar_node_ptr + node_count); float *alpha_ptr = &haar_node_ptr->alpha[0]; hid_classifier->count = node_count; @@ -485,16 +400,12 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl node->p[2][3] = 0; node->weight[2] = 0; } - // memset( &(node->feature.rect[2]), 0, sizeof(node->feature.rect[2]) ); else hid_stage_classifier->two_rects = 0; + + memcpy( node->alpha, classifier->alpha, (node_count + 1)*sizeof(alpha_ptr[0])); + haar_node_ptr = haar_node_ptr + 1; } - - memcpy( alpha_ptr, classifier->alpha, (node_count + 1)*sizeof(alpha_ptr[0])); - haar_node_ptr = haar_node_ptr + 1; - // (GpuHidHaarTreeNode*)cvAlignPtr(alpha_ptr+node_count+1, sizeof(void*)); - // (GpuHidHaarTreeNode*)(alpha_ptr+node_count+1); - out->is_stump_based &= node_count == 1; } } @@ -507,25 +418,19 @@ static GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarCl #define sum_elem_ptr(sum,row,col) \ - ((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype))) + ((sumtype*)CV_MAT_ELEM_PTR_FAST((sum),(row),(col),sizeof(sumtype))) #define sqsum_elem_ptr(sqsum,row,col) \ - ((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype))) + ((sqsumtype*)CV_MAT_ELEM_PTR_FAST((sqsum),(row),(col),sizeof(sqsumtype))) #define calc_sum(rect,offset) \ - ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset]) + ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset]) static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_cascade, - /* const CvArr* _sum, - const CvArr* _sqsum, - const CvArr* _tilted_sum,*/ double scale, int step) { - // CvMat sum_stub, *sum = (CvMat*)_sum; - // CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum; - // CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum; GpuHidHaarClassifierCascade *cascade; int coi0 = 0, coi1 = 0; int i; @@ -541,61 +446,25 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc if( scale <= 0 ) CV_Error( CV_StsOutOfRange, "Scale must be positive" ); - // sum = cvGetMat( sum, &sum_stub, &coi0 ); - // sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 ); - if( coi0 || coi1 ) CV_Error( CV_BadCOI, "COI is not supported" ); - // if( !CV_ARE_SIZES_EQ( sum, sqsum )) - // CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" ); - - // if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 || - // CV_MAT_TYPE(sum->type) != CV_32SC1 ) - // CV_Error( CV_StsUnsupportedFormat, - // "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" ); - if( !_cascade->hid_cascade ) gpuCreateHidHaarClassifierCascade(_cascade, &datasize, &total); cascade = (GpuHidHaarClassifierCascade *) _cascade->hid_cascade; stage_classifier = (GpuHidHaarStageClassifier *) (cascade + 1); - if( cascade->has_tilted_features ) - { - // tilted = cvGetMat( tilted, &tilted_stub, &coi1 ); - - // if( CV_MAT_TYPE(tilted->type) != CV_32SC1 ) - // CV_Error( CV_StsUnsupportedFormat, - // "Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" ); - - // if( sum->step != tilted->step ) - // CV_Error( CV_StsUnmatchedSizes, - // "Sum and tilted_sum must have the same stride (step, widthStep)" ); - - // if( !CV_ARE_SIZES_EQ( sum, tilted )) - // CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" ); - // cascade->tilted = *tilted; - } - _cascade->scale = scale; _cascade->real_window_size.width = cvRound( _cascade->orig_window_size.width * scale ); _cascade->real_window_size.height = cvRound( _cascade->orig_window_size.height * scale ); - //cascade->sum = *sum; - //cascade->sqsum = *sqsum; - equRect.x = equRect.y = cvRound(scale); equRect.width = cvRound((_cascade->orig_window_size.width - 2) * scale); equRect.height = cvRound((_cascade->orig_window_size.height - 2) * scale); weight_scale = 1. / (equRect.width * equRect.height); cascade->inv_window_area = weight_scale; - // cascade->pq0 = equRect.y * step + equRect.x; - // cascade->pq1 = equRect.y * step + equRect.x + equRect.width ; - // cascade->pq2 = (equRect.y + equRect.height)*step + equRect.x; - // cascade->pq3 = (equRect.y + equRect.height)*step + equRect.x + equRect.width ; - cascade->pq0 = equRect.x; cascade->pq1 = equRect.y; cascade->pq2 = equRect.x + equRect.width; @@ -618,10 +487,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc { CvHaarFeature *feature = &_cascade->stage_classifier[i].classifier[j].haar_feature[l]; - /* GpuHidHaarClassifier* classifier = - cascade->stage_classifier[i].classifier + j; */ - //GpuHidHaarFeature* hidfeature = - // &cascade->stage_classifier[i].classifier[j].node[l].feature; GpuHidHaarTreeNode *hidnode = &stage_classifier[i].classifier[j].node[l]; double sum0 = 0, area0 = 0; CvRect r[3]; @@ -636,8 +501,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc /* align blocks */ for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ ) { - //if( !hidfeature->rect[k].p0 ) - // break; if(!hidnode->p[k][0]) break; r[k] = feature->rect[k].r; @@ -717,15 +580,6 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc if( !feature->tilted ) { - /* hidfeature->rect[k].p0 = tr.y * sum->cols + tr.x; - hidfeature->rect[k].p1 = tr.y * sum->cols + tr.x + tr.width; - hidfeature->rect[k].p2 = (tr.y + tr.height) * sum->cols + tr.x; - hidfeature->rect[k].p3 = (tr.y + tr.height) * sum->cols + tr.x + tr.width; - */ - /*hidnode->p0[k] = tr.y * step + tr.x; - hidnode->p1[k] = tr.y * step + tr.x + tr.width; - hidnode->p2[k] = (tr.y + tr.height) * step + tr.x; - hidnode->p3[k] = (tr.y + tr.height) * step + tr.x + tr.width;*/ hidnode->p[k][0] = tr.x; hidnode->p[k][1] = tr.y; hidnode->p[k][2] = tr.x + tr.width; @@ -733,37 +587,24 @@ static void gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade *_casc } else { - /* hidfeature->rect[k].p2 = (tr.y + tr.width) * tilted->cols + tr.x + tr.width; - hidfeature->rect[k].p3 = (tr.y + tr.width + tr.height) * tilted->cols + tr.x + tr.width - tr.height; - hidfeature->rect[k].p0 = tr.y * tilted->cols + tr.x; - hidfeature->rect[k].p1 = (tr.y + tr.height) * tilted->cols + tr.x - tr.height; - */ - hidnode->p[k][2] = (tr.y + tr.width) * step + tr.x + tr.width; hidnode->p[k][3] = (tr.y + tr.width + tr.height) * step + tr.x + tr.width - tr.height; hidnode->p[k][0] = tr.y * step + tr.x; hidnode->p[k][1] = (tr.y + tr.height) * step + tr.x - tr.height; } - - //hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio); hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio); if( k == 0 ) area0 = tr.width * tr.height; else - //sum0 += hidfeature->rect[k].weight * tr.width * tr.height; sum0 += hidnode->weight[k] * tr.width * tr.height; } - - // hidfeature->rect[0].weight = (float)(-sum0/area0); hidnode->weight[0] = (float)(-sum0 / area0); } /* l */ } /* j */ } } -static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade - /*double scale=0.0,*/ - /*int step*/) +static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade) { GpuHidHaarClassifierCascade *cascade; int i; @@ -817,11 +658,7 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade if(!hidnode->p[k][0]) break; r[k] = feature->rect[k].r; - // base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) ); - // base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) ); - // base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) ); - // base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) ); - } + } nr = k; for( k = 0; k < nr; k++ ) @@ -839,7 +676,6 @@ static void gpuSetHaarClassifierCascade( CvHaarClassifierCascade *_cascade hidnode->p[k][3] = tr.height; hidnode->weight[k] = (float)(feature->rect[k].weight * correction_ratio); } - //hidnode->weight[0]=(float)(-sum0/area0); } /* l */ } /* j */ } @@ -852,7 +688,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS const double GROUP_EPS = 0.2; CvSeq *result_seq = 0; - cv::Ptr temp_storage; cv::ConcurrentRectVector allCandidates; std::vector rectList; @@ -910,6 +745,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS if( gimg.cols < minSize.width || gimg.rows < minSize.height ) CV_Error(CV_StsError, "Image too small"); + cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); if( (flags & CV_HAAR_SCALE_IMAGE) ) { CvSize winSize0 = cascade->orig_window_size; @@ -952,7 +788,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS size_t blocksize = 8; size_t localThreads[3] = { blocksize, blocksize , 1 }; - size_t globalThreads[3] = { grp_per_CU * gsum.clCxt->computeUnits() *localThreads[0], + size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->computeUnits()) *localThreads[0], localThreads[1], 1 }; int outputsz = 256 * globalThreads[0] / localThreads[0]; @@ -997,7 +833,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 ); stagebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(GpuHidHaarStageClassifier) * gcascade->count); - cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue(); openCLSafeCall(clEnqueueWriteBuffer(qu, stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL)); nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode)); @@ -1044,7 +879,9 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq )); args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction )); - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1); + const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0"; + + openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options); openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz ); @@ -1059,6 +896,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS openCLSafeCall(clReleaseMemObject(scaleinfobuffer)); openCLSafeCall(clReleaseMemObject(nodebuffer)); openCLSafeCall(clReleaseMemObject(candidatebuffer)); + } else { @@ -1118,7 +956,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode); nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode)); - cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue(); openCLSafeCall(clEnqueueWriteBuffer(qu, nodebuffer, 1, 0, nodenum * sizeof(GpuHidHaarTreeNode), node, 0, NULL, NULL)); @@ -1160,7 +997,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum )); size_t globalThreads2[3] = {nodenum, 1, 1}; - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1); } @@ -1195,8 +1031,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS args.push_back ( make_pair(sizeof(cl_mem) , (void *)&pbuffer )); args.push_back ( make_pair(sizeof(cl_mem) , (void *)&correctionbuffer )); args.push_back ( make_pair(sizeof(cl_int) , (void *)&nodenum )); - - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1); + const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0"; + openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options); candidate = (int *)clEnqueueMapBuffer(qu, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, &status); @@ -1284,7 +1120,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std int blocksize = 8; int grp_per_CU = 12; size_t localThreads[3] = { blocksize, blocksize, 1 }; - size_t globalThreads[3] = { grp_per_CU * Context::getContext()->computeUnits() * localThreads[0], + size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->computeUnits() *localThreads[0], localThreads[1], 1 }; int outputsz = 256 * globalThreads[0] / localThreads[0]; @@ -1300,8 +1136,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std CvHaarClassifierCascade *cascade = oldCascade; GpuHidHaarClassifierCascade *gcascade; GpuHidHaarStageClassifier *stage; - GpuHidHaarClassifier *classifier; - GpuHidHaarTreeNode *node; if( CV_MAT_DEPTH(gimg.type()) != CV_8U ) CV_Error( CV_StsUnsupportedFormat, "Only 8-bit images are supported" ); @@ -1314,7 +1148,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std } int *candidate; - + cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); if( (flags & CV_HAAR_SCALE_IMAGE) ) { int indexy = 0; @@ -1340,19 +1174,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade); stage = (GpuHidHaarStageClassifier *)(gcascade + 1); - classifier = (GpuHidHaarClassifier *)(stage + gcascade->count); - node = (GpuHidHaarTreeNode *)(classifier->node); - - gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 ); - - cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue(); - openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, - sizeof(GpuHidHaarStageClassifier) * gcascade->count, - stage, 0, NULL, NULL)); - - openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0, - m_nodenum * sizeof(GpuHidHaarTreeNode), - node, 0, NULL, NULL)); int startstage = 0; int endstage = gcascade->count; @@ -1389,17 +1210,23 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq )); args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction )); - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1); + const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0"; + + openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1, build_options); candidate = (int *)malloc(4 * sizeof(int) * outputsz); memset(candidate, 0, 4 * sizeof(int) * outputsz); + openCLReadBuffer( gsum.clCxt, ((OclBuffers *)buffers)->candidatebuffer, candidate, 4 * sizeof(int)*outputsz ); for(int i = 0; i < outputsz; i++) + { if(candidate[4 * i + 2] != 0) + { allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1], candidate[4 * i + 2], candidate[4 * i + 3])); - + } + } free((void *)candidate); candidate = NULL; } @@ -1407,6 +1234,132 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std { cv::ocl::integral(gimg, gsum, gsqsum); + gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade; + + int step = gsum.step / 4; + int startnode = 0; + int splitstage = 3; + + int startstage = 0; + int endstage = gcascade->count; + + vector > args; + args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->stagebuffer )); + args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->scaleinfobuffer )); + args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer )); + args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data )); + args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data )); + args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->candidatebuffer )); + args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.rows )); + args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.cols )); + args.push_back ( make_pair(sizeof(cl_int) , (void *)&step )); + args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_loopcount )); + args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage )); + args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage )); + args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage )); + args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode )); + args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->pbuffer )); + args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->correctionbuffer )); + args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_nodenum )); + + const char * build_options = gcascade->is_stump_based ? "-D STUMP_BASED=1" : "-D STUMP_BASED=0"; + openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1, build_options); + + candidate = (int *)clEnqueueMapBuffer(qu, ((OclBuffers *)buffers)->candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, NULL); + + for(int i = 0; i < outputsz; i++) + { + if(candidate[4 * i + 2] != 0) + allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1], + candidate[4 * i + 2], candidate[4 * i + 3])); + } + clEnqueueUnmapMemObject(qu, ((OclBuffers *)buffers)->candidatebuffer, candidate, 0, 0, 0); + } + rectList.resize(allCandidates.size()); + if(!allCandidates.empty()) + std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin()); + + if( minNeighbors != 0 || findBiggestObject ) + groupRectangles(rectList, rweights, std::max(minNeighbors, 1), GROUP_EPS); + else + rweights.resize(rectList.size(), 0); + + GenResult(faces, rectList, rweights); +} + +void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols, + double scaleFactor, int flags, + const int outputsz, const size_t localThreads[], + CvSize minSize, CvSize maxSize) +{ + if(initialized) + { + return; // we only allow one time initialization + } + CvHaarClassifierCascade *cascade = oldCascade; + + if( !CV_IS_HAAR_CLASSIFIER(cascade) ) + CV_Error( !cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier cascade" ); + + if( scaleFactor <= 1 ) + CV_Error( CV_StsOutOfRange, "scale factor must be > 1" ); + + if( cols < minSize.width || rows < minSize.height ) + CV_Error(CV_StsError, "Image too small"); + + int datasize=0; + int totalclassifier=0; + + if( !cascade->hid_cascade ) + { + gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier); + } + + if( maxSize.height == 0 || maxSize.width == 0 ) + { + maxSize.height = rows; + maxSize.width = cols; + } + + findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0; + if( findBiggestObject ) + flags &= ~(CV_HAAR_SCALE_IMAGE | CV_HAAR_DO_CANNY_PRUNING); + + CreateBaseBufs(datasize, totalclassifier, flags, outputsz); + CreateFactorRelatedBufs(rows, cols, flags, scaleFactor, localThreads, minSize, maxSize); + + m_scaleFactor = scaleFactor; + m_rows = rows; + m_cols = cols; + m_flags = flags; + m_minSize = minSize; + m_maxSize = maxSize; + + // initialize nodes + GpuHidHaarClassifierCascade *gcascade; + GpuHidHaarStageClassifier *stage; + GpuHidHaarClassifier *classifier; + GpuHidHaarTreeNode *node; + cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); + if( (flags & CV_HAAR_SCALE_IMAGE) ) + { + gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade); + stage = (GpuHidHaarStageClassifier *)(gcascade + 1); + classifier = (GpuHidHaarClassifier *)(stage + gcascade->count); + node = (GpuHidHaarTreeNode *)(classifier->node); + + gpuSetImagesForHaarClassifierCascade( cascade, 1., gsum.step / 4 ); + + openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, + sizeof(GpuHidHaarStageClassifier) * gcascade->count, + stage, 0, NULL, NULL)); + + openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0, + m_nodenum * sizeof(GpuHidHaarTreeNode), + node, 0, NULL, NULL)); + } + else + { gpuSetHaarClassifierCascade(cascade); gcascade = (GpuHidHaarClassifierCascade *)cascade->hid_cascade; @@ -1414,15 +1367,12 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std classifier = (GpuHidHaarClassifier *)(stage + gcascade->count); node = (GpuHidHaarTreeNode *)(classifier->node); - cl_command_queue qu = (cl_command_queue)gsum.clCxt->oclCommandQueue(); openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->nodebuffer, 1, 0, - m_nodenum * sizeof(GpuHidHaarTreeNode), - node, 0, NULL, NULL)); + m_nodenum * sizeof(GpuHidHaarTreeNode), + node, 0, NULL, NULL)); cl_int4 *p = (cl_int4 *)malloc(sizeof(cl_int4) * m_loopcount); float *correction = (float *)malloc(sizeof(float) * m_loopcount); - int startstage = 0; - int endstage = gcascade->count; double factor; for(int i = 0; i < m_loopcount; i++) { @@ -1448,105 +1398,15 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std size_t globalThreads2[3] = {m_nodenum, 1, 1}; - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1); + openCLExecuteKernel(Context::getContext(), &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1); } - - int step = gsum.step / 4; - int startnode = 0; - int splitstage = 3; openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->stagebuffer, 1, 0, sizeof(GpuHidHaarStageClassifier)*gcascade->count, stage, 0, NULL, NULL)); openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->pbuffer, 1, 0, sizeof(cl_int4)*m_loopcount, p, 0, NULL, NULL)); openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->correctionbuffer, 1, 0, sizeof(cl_float)*m_loopcount, correction, 0, NULL, NULL)); - vector > args; - args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->stagebuffer )); - args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->scaleinfobuffer )); - args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->newnodebuffer )); - args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsum.data )); - args.push_back ( make_pair(sizeof(cl_mem) , (void *)&gsqsum.data )); - args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->candidatebuffer )); - args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.rows )); - args.push_back ( make_pair(sizeof(cl_int) , (void *)&gsum.cols )); - args.push_back ( make_pair(sizeof(cl_int) , (void *)&step )); - args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_loopcount )); - args.push_back ( make_pair(sizeof(cl_int) , (void *)&startstage )); - args.push_back ( make_pair(sizeof(cl_int) , (void *)&splitstage )); - args.push_back ( make_pair(sizeof(cl_int) , (void *)&endstage )); - args.push_back ( make_pair(sizeof(cl_int) , (void *)&startnode )); - args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->pbuffer )); - args.push_back ( make_pair(sizeof(cl_mem) , (void *)&((OclBuffers *)buffers)->correctionbuffer )); - args.push_back ( make_pair(sizeof(cl_int) , (void *)&m_nodenum )); - - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1); - - candidate = (int *)clEnqueueMapBuffer(qu, ((OclBuffers *)buffers)->candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int) * outputsz, 0, 0, 0, NULL); - - for(int i = 0; i < outputsz; i++) - { - if(candidate[4 * i + 2] != 0) - allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1], - candidate[4 * i + 2], candidate[4 * i + 3])); - } - free(p); free(correction); - clEnqueueUnmapMemObject(qu, ((OclBuffers *)buffers)->candidatebuffer, candidate, 0, 0, 0); } - - rectList.resize(allCandidates.size()); - if(!allCandidates.empty()) - std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin()); - - if( minNeighbors != 0 || findBiggestObject ) - groupRectangles(rectList, rweights, std::max(minNeighbors, 1), GROUP_EPS); - else - rweights.resize(rectList.size(), 0); - - GenResult(faces, rectList, rweights); -} - -void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols, - double scaleFactor, int flags, - const int outputsz, const size_t localThreads[], - CvSize minSize, CvSize maxSize) -{ - CvHaarClassifierCascade *cascade = oldCascade; - - if( !CV_IS_HAAR_CLASSIFIER(cascade) ) - CV_Error( !cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier cascade" ); - - if( scaleFactor <= 1 ) - CV_Error( CV_StsOutOfRange, "scale factor must be > 1" ); - - if( cols < minSize.width || rows < minSize.height ) - CV_Error(CV_StsError, "Image too small"); - - int datasize=0; - int totalclassifier=0; - - if( !cascade->hid_cascade ) - gpuCreateHidHaarClassifierCascade(cascade, &datasize, &totalclassifier); - - if( maxSize.height == 0 || maxSize.width == 0 ) - { - maxSize.height = rows; - maxSize.width = cols; - } - - findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0; - if( findBiggestObject ) - flags &= ~(CV_HAAR_SCALE_IMAGE | CV_HAAR_DO_CANNY_PRUNING); - - CreateBaseBufs(datasize, totalclassifier, flags, outputsz); - CreateFactorRelatedBufs(rows, cols, flags, scaleFactor, localThreads, minSize, maxSize); - - m_scaleFactor = scaleFactor; - m_rows = rows; - m_cols = cols; - m_flags = flags; - m_minSize = minSize; - m_maxSize = maxSize; - initialized = true; } @@ -1645,6 +1505,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs( CvSize sz; CvSize winSize0 = oldCascade->orig_window_size; detect_piramid_info *scaleinfo; + cl_command_queue qu = reinterpret_cast(Context::getContext()->oclCommandQueue()); if (flags & CV_HAAR_SCALE_IMAGE) { for(factor = 1.f;; factor *= scaleFactor) @@ -1746,7 +1607,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs( ((OclBuffers *)buffers)->scaleinfobuffer = openCLCreateBuffer(cv::ocl::Context::getContext(), CL_MEM_READ_ONLY, sizeof(detect_piramid_info) * loopcount); } - openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)cv::ocl::Context::getContext()->oclCommandQueue(), ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0, + openCLSafeCall(clEnqueueWriteBuffer(qu, ((OclBuffers *)buffers)->scaleinfobuffer, 1, 0, sizeof(detect_piramid_info)*loopcount, scaleinfo, 0, NULL, NULL)); free(scaleinfo); @@ -1758,7 +1619,8 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector& f const std::vector &rectList, const std::vector &rweights) { - CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), cvCreateMemStorage(0) ); + MemStorage tempStorage(cvCreateMemStorage(0)); + CvSeq *result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), tempStorage ); if( findBiggestObject && rectList.size() ) { @@ -1794,167 +1656,30 @@ void cv::ocl::OclCascadeClassifierBuf::GenResult(CV_OUT std::vector& f void cv::ocl::OclCascadeClassifierBuf::release() { - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer)); - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer)); - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer)); - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer)); - - if( (m_flags & CV_HAAR_SCALE_IMAGE) ) + if(initialized) { - cvFree(&oldCascade->hid_cascade); - } - else - { - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer)); - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer)); - openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer)); - } + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->stagebuffer)); + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->scaleinfobuffer)); + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->nodebuffer)); + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->candidatebuffer)); - free(buffers); - buffers = NULL; + if( (m_flags & CV_HAAR_SCALE_IMAGE) ) + { + cvFree(&oldCascade->hid_cascade); + } + else + { + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->newnodebuffer)); + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->correctionbuffer)); + openCLSafeCall(clReleaseMemObject(((OclBuffers *)buffers)->pbuffer)); + } + + free(buffers); + buffers = NULL; + initialized = false; + } } #ifndef _MAX_PATH #define _MAX_PATH 1024 #endif - - -/****************************************************************************************\ -* Persistence functions * -\****************************************************************************************/ - -/* field names */ - -#define ICV_HAAR_SIZE_NAME "size" -#define ICV_HAAR_STAGES_NAME "stages" -#define ICV_HAAR_TREES_NAME "trees" -#define ICV_HAAR_FEATURE_NAME "feature" -#define ICV_HAAR_RECTS_NAME "rects" -#define ICV_HAAR_TILTED_NAME "tilted" -#define ICV_HAAR_THRESHOLD_NAME "threshold" -#define ICV_HAAR_LEFT_NODE_NAME "left_node" -#define ICV_HAAR_LEFT_VAL_NAME "left_val" -#define ICV_HAAR_RIGHT_NODE_NAME "right_node" -#define ICV_HAAR_RIGHT_VAL_NAME "right_val" -#define ICV_HAAR_STAGE_THRESHOLD_NAME "stage_threshold" -#define ICV_HAAR_PARENT_NAME "parent" -#define ICV_HAAR_NEXT_NAME "next" - -static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */) -{ - return 1; -} - -namespace cv -{ -namespace ocl -{ - -struct gpuHaarDetectObjects_ScaleImage_Invoker -{ - gpuHaarDetectObjects_ScaleImage_Invoker( const CvHaarClassifierCascade *_cascade, - int _stripSize, double _factor, - const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1, - Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec ) - { - cascade = _cascade; - stripSize = _stripSize; - factor = _factor; - sum1 = _sum1; - sqsum1 = _sqsum1; - norm1 = _norm1; - mask1 = _mask1; - equRect = _equRect; - vec = &_vec; - } - - void operator()( const BlockedRange &range ) const - { - Size winSize0 = cascade->orig_window_size; - Size winSize(cvRound(winSize0.width * factor), cvRound(winSize0.height * factor)); - int y1 = range.begin() * stripSize, y2 = min(range.end() * stripSize, sum1.rows - 1 - winSize0.height); - Size ssz(sum1.cols - 1 - winSize0.width, y2 - y1); - int x, y, ystep = factor > 2 ? 1 : 2; - - for( y = y1; y < y2; y += ystep ) - for( x = 0; x < ssz.width; x += ystep ) - { - if( gpuRunHaarClassifierCascade( /*cascade, cvPoint(x, y), 0*/ ) > 0 ) - vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor), - winSize.width, winSize.height)); - } - } - - const CvHaarClassifierCascade *cascade; - int stripSize; - double factor; - Mat sum1, sqsum1, *norm1, *mask1; - Rect equRect; - ConcurrentRectVector *vec; -}; - - -struct gpuHaarDetectObjects_ScaleCascade_Invoker -{ - gpuHaarDetectObjects_ScaleCascade_Invoker( const CvHaarClassifierCascade *_cascade, - Size _winsize, const Range &_xrange, double _ystep, - size_t _sumstep, const int **_p, const int **_pq, - ConcurrentRectVector &_vec ) - { - cascade = _cascade; - winsize = _winsize; - xrange = _xrange; - ystep = _ystep; - sumstep = _sumstep; - p = _p; - pq = _pq; - vec = &_vec; - } - - void operator()( const BlockedRange &range ) const - { - int iy, startY = range.begin(), endY = range.end(); - const int *p0 = p[0], *p1 = p[1], *p2 = p[2], *p3 = p[3]; - const int *pq0 = pq[0], *pq1 = pq[1], *pq2 = pq[2], *pq3 = pq[3]; - bool doCannyPruning = p0 != 0; - int sstep = (int)(sumstep / sizeof(p0[0])); - - for( iy = startY; iy < endY; iy++ ) - { - int ix, y = cvRound(iy * ystep), ixstep = 1; - for( ix = xrange.start; ix < xrange.end; ix += ixstep ) - { - int x = cvRound(ix * ystep); // it should really be ystep, not ixstep - - if( doCannyPruning ) - { - int offset = y * sstep + x; - int s = p0[offset] - p1[offset] - p2[offset] + p3[offset]; - int sq = pq0[offset] - pq1[offset] - pq2[offset] + pq3[offset]; - if( s < 100 || sq < 20 ) - { - ixstep = 2; - continue; - } - } - - int result = gpuRunHaarClassifierCascade(/* cascade, cvPoint(x, y), 0 */); - if( result > 0 ) - vec->push_back(Rect(x, y, winsize.width, winsize.height)); - ixstep = result != 0 ? 1 : 2; - } - } - } - - const CvHaarClassifierCascade *cascade; - double ystep; - size_t sumstep; - Size winsize; - Range xrange; - const int **p; - const int **pq; - ConcurrentRectVector *vec; -}; - -} -} diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp index 7a13324077..3533cce69a 100644 --- a/modules/ocl/src/hog.cpp +++ b/modules/ocl/src/hog.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Wenju He, wenju@multicorewareinc.com +// Wenju He, wenju@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -48,13 +48,107 @@ using namespace cv; using namespace cv::ocl; using namespace std; - #define CELL_WIDTH 8 #define CELL_HEIGHT 8 #define CELLS_PER_BLOCK_X 2 #define CELLS_PER_BLOCK_Y 2 #define NTHREADS 256 +static oclMat gauss_w_lut; +static bool hog_device_cpu; +/* pre-compute gaussian and interp_weight lookup tables if sigma is 4.0f */ +static const float gaussian_interp_lut[] = +{ + /* gaussian lut */ + 0.01831564f, 0.02926831f, 0.04393693f, 0.06196101f, 0.08208500f, 0.10215643f, + 0.11943297f, 0.13117145f, 0.13533528f, 0.13117145f, 0.11943297f, 0.10215643f, + 0.08208500f, 0.06196101f, 0.04393693f, 0.02926831f, 0.02926831f, 0.04677062f, + 0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f, 0.19085334f, 0.20961139f, + 0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f, 0.13117145f, 0.09901341f, + 0.07021102f, 0.04677062f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f, + 0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f, + 0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f, + 0.06196101f, 0.09901341f, 0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f, + 0.40403652f, 0.44374731f, 0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f, + 0.27768996f, 0.20961139f, 0.14863673f, 0.09901341f, 0.08208500f, 0.13117145f, + 0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f, 0.53526145f, 0.58786964f, + 0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f, 0.36787945f, 0.27768996f, + 0.19691168f, 0.13117145f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f, + 0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f, + 0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f, + 0.11943297f, 0.19085334f, 0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f, + 0.77880079f, 0.85534531f, 0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f, + 0.53526145f, 0.40403652f, 0.28650481f, 0.19085334f, 0.13117145f, 0.20961139f, + 0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f, 0.85534531f, 0.93941307f, + 0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f, 0.58786964f, 0.44374731f, + 0.31466395f, 0.20961139f, 0.13533528f, 0.21626517f, 0.32465246f, 0.45783335f, + 0.60653067f, 0.75483960f, 0.88249689f, 0.96923321f, 1.00000000f, 0.96923321f, + 0.88249689f, 0.75483960f, 0.60653067f, 0.45783335f, 0.32465246f, 0.21626517f, + 0.13117145f, 0.20961139f, 0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f, + 0.85534531f, 0.93941307f, 0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f, + 0.58786964f, 0.44374731f, 0.31466395f, 0.20961139f, 0.11943297f, 0.19085334f, + 0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f, 0.77880079f, 0.85534531f, + 0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f, 0.53526145f, 0.40403652f, + 0.28650481f, 0.19085334f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f, + 0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f, + 0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f, + 0.08208500f, 0.13117145f, 0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f, + 0.53526145f, 0.58786964f, 0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f, + 0.36787945f, 0.27768996f, 0.19691168f, 0.13117145f, 0.06196101f, 0.09901341f, + 0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f, 0.40403652f, 0.44374731f, + 0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f, 0.27768996f, 0.20961139f, + 0.14863673f, 0.09901341f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f, + 0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f, + 0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f, + 0.02926831f, 0.04677062f, 0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f, + 0.19085334f, 0.20961139f, 0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f, + 0.13117145f, 0.09901341f, 0.07021102f, 0.04677062f, + /* interp_weight lut */ + 0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f, + 0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f, + 0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f, 0.01171875f, 0.03515625f, + 0.05859375f, 0.08203125f, 0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f, + 0.17578125f, 0.15234375f, 0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f, + 0.03515625f, 0.01171875f, 0.01953125f, 0.05859375f, 0.09765625f, 0.13671875f, + 0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f, 0.29296875f, 0.25390625f, + 0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f, 0.05859375f, 0.01953125f, + 0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f, + 0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f, + 0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.03515625f, 0.10546875f, + 0.17578125f, 0.24609375f, 0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f, + 0.52734375f, 0.45703125f, 0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f, + 0.10546875f, 0.03515625f, 0.04296875f, 0.12890625f, 0.21484375f, 0.30078125f, + 0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f, 0.64453125f, 0.55859375f, + 0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f, 0.12890625f, 0.04296875f, + 0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f, + 0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f, + 0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.05859375f, 0.17578125f, + 0.29296875f, 0.41015625f, 0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f, + 0.87890625f, 0.76171875f, 0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f, + 0.17578125f, 0.05859375f, 0.05859375f, 0.17578125f, 0.29296875f, 0.41015625f, + 0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f, 0.87890625f, 0.76171875f, + 0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f, 0.17578125f, 0.05859375f, + 0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f, + 0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f, + 0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.04296875f, 0.12890625f, + 0.21484375f, 0.30078125f, 0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f, + 0.64453125f, 0.55859375f, 0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f, + 0.12890625f, 0.04296875f, 0.03515625f, 0.10546875f, 0.17578125f, 0.24609375f, + 0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f, 0.52734375f, 0.45703125f, + 0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f, 0.10546875f, 0.03515625f, + 0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f, + 0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f, + 0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.01953125f, 0.05859375f, + 0.09765625f, 0.13671875f, 0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f, + 0.29296875f, 0.25390625f, 0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f, + 0.05859375f, 0.01953125f, 0.01171875f, 0.03515625f, 0.05859375f, 0.08203125f, + 0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f, 0.17578125f, 0.15234375f, + 0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f, 0.03515625f, 0.01171875f, + 0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f, + 0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f, + 0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f +}; + namespace cv { namespace ocl @@ -78,38 +172,43 @@ namespace cv int cnblocks_win_x; int cnblocks_win_y; int cblock_hist_size; - int cblock_hist_size_2up; int cdescr_size; int cdescr_width; + int cdescr_height; void set_up_constants(int nbins, int block_stride_x, int block_stride_y, int nblocks_win_x, int nblocks_win_y); void compute_hists(int nbins, int block_stride_x, int blovck_stride_y, - int height, int width, const cv::ocl::oclMat &grad, - const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists); + int height, int width, float sigma, const cv::ocl::oclMat &grad, + const cv::ocl::oclMat &qangle, + const cv::ocl::oclMat &gauss_w_lut, cv::ocl::oclMat &block_hists); void normalize_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, cv::ocl::oclMat &block_hists, float threshold); + int height, int width, cv::ocl::oclMat &block_hists, + float threshold); void classify_hists(int win_height, int win_width, int block_stride_y, - int block_stride_x, int win_stride_y, int win_stride_x, int height, - int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef, + int block_stride_x, int win_stride_y, int win_stride_x, + int height, int width, const cv::ocl::oclMat &block_hists, + const cv::ocl::oclMat &coefs, float free_coef, float threshold, cv::ocl::oclMat &labels); - void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists, + void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, + int block_stride_x, int win_stride_y, int win_stride_x, + int height, int width, const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors); - void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists, + void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, + int block_stride_x, int win_stride_y, int win_stride_x, + int height, int width, const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors); void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img, - float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma); + float angle_scale, cv::ocl::oclMat &grad, + cv::ocl::oclMat &qangle, bool correct_gamma); void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img, - float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma); - - void resize( const oclMat &src, oclMat &dst, const Size sz); + float angle_scale, cv::ocl::oclMat &grad, + cv::ocl::oclMat &qangle, bool correct_gamma); } } } @@ -117,8 +216,14 @@ namespace cv using namespace ::cv::ocl::device; -cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_, - int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_) +static inline int divUp(int total, int grain) +{ + return (total + grain - 1) / grain; +} + +cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, + Size cell_size_, int nbins_, double win_sigma_, + double threshold_L2hys_, bool gamma_correction_, int nlevels_) : win_size(win_size_), block_size(block_size_), block_stride(block_stride_), @@ -132,19 +237,27 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 && (win_size.height - block_size.height) % block_stride.height == 0); - CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0); + CV_Assert(block_size.width % cell_size.width == 0 && + block_size.height % cell_size.height == 0); CV_Assert(block_stride == cell_size); CV_Assert(cell_size == Size(8, 8)); - Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); + Size cells_per_block(block_size.width / cell_size.width, + block_size.height / cell_size.height); CV_Assert(cells_per_block == Size(2, 2)); cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); - hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height); + hog::set_up_constants(nbins, block_stride.width, block_stride.height, + blocks_per_win.width, blocks_per_win.height); effect_size = Size(0, 0); + + if (queryDeviceInfo()) + hog_device_cpu = true; + else + hog_device_cpu = false; } size_t cv::ocl::HOGDescriptor::getDescriptorSize() const @@ -154,7 +267,8 @@ size_t cv::ocl::HOGDescriptor::getDescriptorSize() const size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const { - Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); + Size cells_per_block = Size(block_size.width / cell_size.width, + block_size.height / cell_size.height); return (size_t)(nbins * cells_per_block.area()); } @@ -167,7 +281,8 @@ bool cv::ocl::HOGDescriptor::checkDetectorSize() const { size_t detector_size = detector.rows * detector.cols; size_t descriptor_size = getDescriptorSize(); - return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1; + return detector_size == 0 || detector_size == descriptor_size || + detector_size == descriptor_size + 1; } void cv::ocl::HOGDescriptor::setSVMDetector(const vector &_detector) @@ -207,10 +322,16 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride) const size_t block_hist_size = getBlockHistogramSize(); const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride); - block_hists.create(1, static_cast(block_hist_size * blocks_per_img.area()), CV_32F); + block_hists.create(1, + static_cast(block_hist_size * blocks_per_img.area()) + 256, CV_32F); Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); labels.create(1, wins_per_img.area(), CV_8U); + + vector v_lut = vector(gaussian_interp_lut, gaussian_interp_lut + + sizeof(gaussian_interp_lut) / sizeof(gaussian_interp_lut[0])); + Mat m_lut(v_lut); + gauss_w_lut.upload(m_lut.reshape(1,1)); } void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle) @@ -221,29 +342,34 @@ void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oc switch (img.type()) { case CV_8UC1: - hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction); + hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, + angleScale, grad, qangle, gamma_correction); break; case CV_8UC4: - hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction); + hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, + angleScale, grad, qangle, gamma_correction); break; } } + void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img) { - computeGradient(img, grad, qangle); + computeGradient(img, this->grad, this->qangle); - hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, - grad, qangle, (float)getWinSigma(), block_hists); + hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, + effect_size.width, (float)getWinSigma(), grad, qangle, gauss_w_lut, block_hists); - hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, - block_hists, (float)threshold_L2hys); + hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, + effect_size.width, block_hists, (float)threshold_L2hys); } -void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, oclMat &descriptors, int descr_format) +void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, + oclMat &descriptors, int descr_format) { - CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); + CV_Assert(win_stride.width % block_stride.width == 0 && + win_stride.height % block_stride.height == 0); init_buffer(img, win_stride); @@ -253,17 +379,20 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride); - descriptors.create(wins_per_img.area(), static_cast(blocks_per_win.area() * block_hist_size), CV_32F); + descriptors.create(wins_per_img.area(), + static_cast(blocks_per_win.area() * block_hist_size), CV_32F); switch (descr_format) { case DESCR_FORMAT_ROW_BY_ROW: - hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width, - win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); + hog::extract_descrs_by_rows(win_size.height, win_size.width, + block_stride.height, block_stride.width, win_stride.height, win_stride.width, + effect_size.height, effect_size.width, block_hists, descriptors); break; case DESCR_FORMAT_COL_BY_COL: - hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width, - win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); + hog::extract_descrs_by_cols(win_size.height, win_size.width, + block_stride.height, block_stride.width, win_stride.height, win_stride.width, + effect_size.height, effect_size.width, block_hists, descriptors); break; default: CV_Error(CV_StsBadArg, "Unknown descriptor format"); @@ -271,7 +400,8 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, } -void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector &hits, double hit_threshold, Size win_stride, Size padding) +void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector &hits, + double hit_threshold, Size win_stride, Size padding) { CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(padding == Size(0, 0)); @@ -283,14 +413,16 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector &hits, doub if (win_stride == Size()) win_stride = block_stride; else - CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); + CV_Assert(win_stride.width % block_stride.width == 0 && + win_stride.height % block_stride.height == 0); init_buffer(img, win_stride); computeBlockHistograms(img); - hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, - win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, - detector, (float)free_coef, (float)hit_threshold, labels); + hog::classify_hists(win_size.height, win_size.width, block_stride.height, + block_stride.width, win_stride.height, win_stride.width, + effect_size.height, effect_size.width, block_hists, detector, + (float)free_coef, (float)hit_threshold, labels); labels.download(labels_host); unsigned char *vec = labels_host.ptr(); @@ -306,8 +438,9 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector &hits, doub -void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector &found_locations, double hit_threshold, - Size win_stride, Size padding, double scale0, int group_threshold) +void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector &found_locations, + double hit_threshold, Size win_stride, Size padding, + double scale0, int group_threshold) { CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(scale0 > 1); @@ -333,7 +466,8 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector &f if (win_stride == Size()) win_stride = block_stride; else - CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); + CV_Assert(win_stride.width % block_stride.width == 0 && + win_stride.height % block_stride.height == 0); init_buffer(img, win_stride); image_scale.create(img.size(), img.type()); @@ -347,16 +481,18 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector &f } else { - hog::resize( img, image_scale, effect_size); + resize(img, image_scale, effect_size); detect(image_scale, locations, hit_threshold, win_stride, padding); } - Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale)); + Size scaled_win_size(cvRound(win_size.width * scale), + cvRound(win_size.height * scale)); for (size_t j = 0; j < locations.size(); j++) - all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size)); + all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, + scaled_win_size)); } found_locations.assign(all_candidates.begin(), all_candidates.end()); - groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/); + groupRectangles(found_locations, group_threshold, 0.2); } int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride) @@ -364,9 +500,11 @@ int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride) return (size - part_size + stride) / stride; } -cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride) +cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, + cv::Size stride) { - return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height)); + return Size(numPartsWithin(size.width, part_size.width, stride.width), + numPartsWithin(size.height, part_size.height, stride.height)); } std::vector cv::ocl::HOGDescriptor::getDefaultPeopleDetector() @@ -1547,8 +1685,9 @@ static int power_2up(unsigned int n) return -1; // Input is too big } -void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int block_stride_y, - int nblocks_win_x, int nblocks_win_y) +void cv::ocl::device::hog::set_up_constants(int nbins, + int block_stride_x, int block_stride_y, + int nblocks_win_x, int nblocks_win_y) { cnbins = nbins; cblock_stride_x = block_stride_x; @@ -1559,52 +1698,32 @@ void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int b int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; cblock_hist_size = block_hist_size; - int block_hist_size_2up = power_2up(block_hist_size); - cblock_hist_size_2up = block_hist_size_2up; - int descr_width = nblocks_win_x * block_hist_size; cdescr_width = descr_width; + cdescr_height = nblocks_win_y; int descr_size = descr_width * nblocks_win_y; cdescr_size = descr_size; } -static inline int divUp(int total, int grain) -{ - return (total + grain - 1) / grain; -} - -static void openCLExecuteKernel_hog(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args) -{ - size_t wave_size = 0; - queryDeviceInfo(WAVEFRONT_SIZE, &wave_size); - if (wave_size <= 16) - { - char build_options[64]; - sprintf(build_options, (wave_size == 16) ? "-D WAVE_SIZE_16" : "-D WAVE_SIZE_1"); - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, build_options); - } - else - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1); -} - -void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, const cv::ocl::oclMat &grad, - const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists) +void cv::ocl::device::hog::compute_hists(int nbins, + int block_stride_x, int block_stride_y, + int height, int width, float sigma, + const cv::ocl::oclMat &grad, + const cv::ocl::oclMat &qangle, + const cv::ocl::oclMat &gauss_w_lut, + cv::ocl::oclMat &block_hists) { Context *clCxt = Context::getContext(); - string kernelName = "compute_hists_kernel"; vector< pair > args; + string kernelName = (sigma == 4.0f) ? "compute_hists_lut_kernel" : + "compute_hists_kernel"; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; - int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y; - + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) + / block_stride_x; + int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) + / block_stride_y; int blocks_total = img_block_width * img_block_height; - int blocks_in_group = 4; - size_t localThreads[3] = { blocks_in_group * 24, 2, 1 }; - size_t globalThreads[3] = { divUp(blocks_total, blocks_in_group) * localThreads[0], 2, 1 }; int grad_quadstep = grad.step >> 2; int qangle_step = qangle.step; @@ -1612,6 +1731,11 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc // Precompute gaussian spatial window parameter float scale = 1.f / (2.f * sigma * sigma); + int blocks_in_group = 4; + size_t localThreads[3] = { blocks_in_group * 24, 2, 1 }; + size_t globalThreads[3] = { + divUp(img_block_width * img_block_height, blocks_in_group) * localThreads[0], 2, 1 }; + int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float); int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float); int smem = (hists_size + final_hists_size) * blocks_in_group; @@ -1627,62 +1751,120 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step)); args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data)); - args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); + if (kernelName.compare("compute_hists_lut_kernel") == 0) + args.push_back( make_pair( sizeof(cl_mem), (void *)&gauss_w_lut.data)); + else + args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( smem, (void *)NULL)); - openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args); + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, cv::ocl::oclMat &block_hists, float threshold) +void cv::ocl::device::hog::normalize_hists(int nbins, + int block_stride_x, int block_stride_y, + int height, int width, + cv::ocl::oclMat &block_hists, + float threshold) { Context *clCxt = Context::getContext(); - string kernelName = "normalize_hists_kernel"; vector< pair > args; + string kernelName; int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; - int nthreads = power_2up(block_hist_size); + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) + / block_stride_x; + int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) + / block_stride_y; + int nthreads; + size_t globalThreads[3] = { 1, 1, 1 }; + size_t localThreads[3] = { 1, 1, 1 }; + + if ( nbins == 9 ) + { + /* optimized for the case of 9 bins */ + kernelName = "normalize_hists_36_kernel"; + int blocks_in_group = NTHREADS / block_hist_size; + nthreads = blocks_in_group * block_hist_size; + int num_groups = divUp( img_block_width * img_block_height, blocks_in_group); + globalThreads[0] = nthreads * num_groups; + localThreads[0] = nthreads; + } + else + { + kernelName = "normalize_hists_kernel"; + nthreads = power_2up(block_hist_size); + globalThreads[0] = img_block_width * nthreads; + globalThreads[1] = img_block_height; + localThreads[0] = nthreads; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; - int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y; - size_t globalThreads[3] = { img_block_width * nthreads, img_block_height, 1 }; - size_t localThreads[3] = { nthreads, 1, 1 }; + if ((nthreads < 32) || (nthreads > 512) ) + cv::ocl::error("normalize_hists: histogram's size is too small or too big", + __FILE__, __LINE__, "normalize_hists"); - if ((nthreads < 32) || (nthreads > 512) ) - cv::ocl::error("normalize_hists: histogram's size is too small or too big", __FILE__, __LINE__, "normalize_hists"); + args.push_back( make_pair( sizeof(cl_int), (void *)&nthreads)); + args.push_back( make_pair( sizeof(cl_int), (void *)&block_hist_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); + } - args.push_back( make_pair( sizeof(cl_int), (void *)&nthreads)); - args.push_back( make_pair( sizeof(cl_int), (void *)&block_hist_size)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( sizeof(cl_float), (void *)&threshold)); args.push_back( make_pair( nthreads * sizeof(float), (void *)NULL)); - openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args); + if(hog_device_cpu) + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1, "-D CPU"); + else + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y, - int block_stride_x, int win_stride_y, int win_stride_x, int height, - int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef, - float threshold, cv::ocl::oclMat &labels) +void cv::ocl::device::hog::classify_hists(int win_height, int win_width, + int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, + int height, int width, + const cv::ocl::oclMat &block_hists, + const cv::ocl::oclMat &coefs, + float free_coef, float threshold, + cv::ocl::oclMat &labels) { Context *clCxt = Context::getContext(); - string kernelName = "classify_hists_kernel"; vector< pair > args; + int nthreads; + string kernelName; + switch (cdescr_width) + { + case 180: + nthreads = 180; + kernelName = "classify_hists_180_kernel"; + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_height)); + break; + case 252: + nthreads = 256; + kernelName = "classify_hists_252_kernel"; + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_height)); + break; + default: + nthreads = 256; + kernelName = "classify_hists_kernel"; + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); + } + int win_block_stride_x = win_stride_x / block_stride_x; int win_block_stride_y = win_stride_y / block_stride_y; int img_win_width = (width - win_width + win_stride_x) / win_stride_x; int img_win_height = (height - win_height + win_stride_y) / win_stride_y; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; - - size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; - size_t localThreads[3] = { NTHREADS, 1, 1 }; + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / + block_stride_x; + size_t globalThreads[3] = { img_win_width * nthreads, img_win_height, 1 }; + size_t localThreads[3] = { nthreads, 1, 1 }; args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size)); - args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size)); - args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); args.push_back( make_pair( sizeof(cl_int), (void *)&img_win_width)); args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_x)); @@ -1693,12 +1875,20 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int blo args.push_back( make_pair( sizeof(cl_float), (void *)&threshold)); args.push_back( make_pair( sizeof(cl_mem), (void *)&labels.data)); - openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args); + if(hog_device_cpu) + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1, "-D CPU"); + else + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, - const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors) +void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, + int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, + int height, int width, + const cv::ocl::oclMat &block_hists, + cv::ocl::oclMat &descriptors) { Context *clCxt = Context::getContext(); string kernelName = "extract_descrs_by_rows_kernel"; @@ -1708,7 +1898,8 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int win_block_stride_y = win_stride_y / block_stride_y; int img_win_width = (width - win_width + win_stride_x) / win_stride_x; int img_win_height = (height - win_height + win_stride_y) / win_stride_y; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / + block_stride_x; int descriptors_quadstep = descriptors.step >> 2; size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; @@ -1724,12 +1915,16 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, - const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors) +void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, + int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, + int height, int width, + const cv::ocl::oclMat &block_hists, + cv::ocl::oclMat &descriptors) { Context *clCxt = Context::getContext(); string kernelName = "extract_descrs_by_cols_kernel"; @@ -1739,7 +1934,8 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int win_block_stride_y = win_stride_y / block_stride_y; int img_win_width = (width - win_width + win_stride_x) / win_stride_x; int img_win_height = (height - win_height + win_stride_y) / win_stride_y; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / + block_stride_x; int descriptors_quadstep = descriptors.step >> 2; size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; @@ -1756,11 +1952,16 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img, - float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma) +void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, + const cv::ocl::oclMat &img, + float angle_scale, + cv::ocl::oclMat &grad, + cv::ocl::oclMat &qangle, + bool correct_gamma) { Context *clCxt = Context::getContext(); string kernelName = "compute_gradients_8UC1_kernel"; @@ -1785,11 +1986,16 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const c args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma)); args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img, - float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma) +void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, + const cv::ocl::oclMat &img, + float angle_scale, + cv::ocl::oclMat &grad, + cv::ocl::oclMat &qangle, + bool correct_gamma) { Context *clCxt = Context::getContext(); string kernelName = "compute_gradients_8UC4_kernel"; @@ -1815,39 +2021,6 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const c args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma)); args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); -} - -void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz) -{ - CV_Assert( (src.channels() == dst.channels()) ); - Context *clCxt = Context::getContext(); - - string kernelName = (src.type() == CV_8UC1) ? "resize_8UC1_kernel" : "resize_8UC4_kernel"; - size_t blkSizeX = 16, blkSizeY = 16; - size_t glbSizeX = sz.width % blkSizeX == 0 ? sz.width : (sz.width / blkSizeX + 1) * blkSizeX; - size_t glbSizeY = sz.height % blkSizeY == 0 ? sz.height : (sz.height / blkSizeY + 1) * blkSizeY; - size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; - size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; - - float ifx = (float)src.cols / sz.width; - float ify = (float)src.rows / sz.height; - int src_step = static_cast(src.step); - int dst_step = static_cast(dst.step); - - vector< pair > args; - args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src_step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); - args.push_back( make_pair(sizeof(cl_int), (void *)&sz.width)); - args.push_back( make_pair(sizeof(cl_int), (void *)&sz.height)); - args.push_back( make_pair(sizeof(cl_float), (void *)&ifx)); - args.push_back( make_pair(sizeof(cl_float), (void *)&ify)); - - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); -} + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); +} \ No newline at end of file diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index ee1e92a712..15c1539c0e 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -25,6 +25,7 @@ // Xu Pang, pangxu010@163.com // Wu Zailong, bullet@yeah.net // Wenju He, wenju@multicorewareinc.com +// Sen Liu, swjtuls1987@126.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -80,6 +81,7 @@ namespace cv extern const char *imgproc_calcHarris; extern const char *imgproc_calcMinEigenVal; extern const char *imgproc_convolve; + extern const char *imgproc_clahe; ////////////////////////////////////OpenCL call wrappers//////////////////////////// template struct index_and_sizeof; @@ -269,7 +271,7 @@ namespace cv size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; - + float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; vector< pair > args; if(map1.channels() == 2) { @@ -289,9 +291,8 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols)); args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); - float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; - - if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) + + if(src.clCxt->supportsFeature(Context::CL_DOUBLE)) { args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); } @@ -325,7 +326,6 @@ namespace cv } else { - float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat)); } } @@ -1207,30 +1207,41 @@ namespace cv void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int borderType) { - if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) - { - CV_Error(CV_GpuNotSupported, "select device don't support double"); - } - CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2); - oclMat Dx, Dy; - CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); - extractCovData(src, Dx, Dy, blockSize, ksize, borderType); - dst.create(src.size(), CV_32F); - corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast(k), Dx, Dy, dst, borderType); + oclMat dx, dy; + cornerHarris_dxdy(src, dst, dx, dy, blockSize, ksize, k, borderType); } - void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType) + void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, + double k, int borderType) { if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) { CV_Error(CV_GpuNotSupported, "select device don't support double"); } CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2); - oclMat Dx, Dy; CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); - extractCovData(src, Dx, Dy, blockSize, ksize, borderType); + extractCovData(src, dx, dy, blockSize, ksize, borderType); dst.create(src.size(), CV_32F); - corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, Dx, Dy, dst, borderType); + corner_ocl(imgproc_calcHarris, "calcHarris", blockSize, static_cast(k), dx, dy, dst, borderType); + } + + void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType) + { + oclMat dx, dy; + cornerMinEigenVal_dxdy(src, dst, dx, dy, blockSize, ksize, borderType); + } + + void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType) + { + if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F) + { + CV_Error(CV_GpuNotSupported, "select device don't support double"); + } + CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2); + CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); + extractCovData(src, dx, dy, blockSize, ksize, borderType); + dst.create(src.size(), CV_32F); + corner_ocl(imgproc_calcMinEigenVal, "calcMinEigenVal", blockSize, 0, dx, dy, dst, borderType); } /////////////////////////////////// MeanShiftfiltering /////////////////////////////////////////////// static void meanShiftFiltering_gpu(const oclMat &src, oclMat dst, int sp, int sr, int maxIter, float eps) @@ -1502,6 +1513,194 @@ namespace cv openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, -1); LUT(mat_src, lut, mat_dst); } + + //////////////////////////////////////////////////////////////////////// + // CLAHE + namespace clahe + { + inline int divUp(int total, int grain) + { + return (total + grain - 1) / grain * grain; + } + + static void calcLut(const oclMat &src, oclMat &dst, + const int tilesX, const int tilesY, const cv::Size tileSize, + const int clipLimit, const float lutScale) + { + cl_int2 tile_size; + tile_size.s[0] = tileSize.width; + tile_size.s[1] = tileSize.height; + + std::vector > args; + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step )); + args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&clipLimit )); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&lutScale )); + + String kernelName = "calcLut"; + size_t localThreads[3] = { 32, 8, 1 }; + size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 }; + bool is_cpu = queryDeviceInfo(); + if (is_cpu) + { + openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, (char*)" -D CPU"); + } + else + { + cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &imgproc_clahe, kernelName); + int wave_size = queryDeviceInfo(kernel); + openCLSafeCall(clReleaseKernel(kernel)); + + static char opt[20] = {0}; + sprintf(opt, " -D WAVE_SIZE=%d", wave_size); + openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt); + } + } + + static void transform(const oclMat &src, oclMat &dst, const oclMat &lut, + const int tilesX, const int tilesY, const cv::Size tileSize) + { + cl_int2 tile_size; + tile_size.s[0] = tileSize.width; + tile_size.s[1] = tileSize.height; + + std::vector > args; + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&lut.data )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&lut.step )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows )); + args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesY )); + + String kernelName = "transform"; + size_t localThreads[3] = { 32, 8, 1 }; + size_t globalThreads[3] = { divUp(src.cols, localThreads[0]), divUp(src.rows, localThreads[1]), 1 }; + + openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1); + } + } + + namespace + { + class CLAHE_Impl : public cv::CLAHE + { + public: + CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8); + + cv::AlgorithmInfo* info() const; + + void apply(cv::InputArray src, cv::OutputArray dst); + + void setClipLimit(double clipLimit); + double getClipLimit() const; + + void setTilesGridSize(cv::Size tileGridSize); + cv::Size getTilesGridSize() const; + + void collectGarbage(); + + private: + double clipLimit_; + int tilesX_; + int tilesY_; + + oclMat srcExt_; + oclMat lut_; + }; + CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) : + clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY) + { + } + + CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE_OCL", + obj.info()->addParam(obj, "clipLimit", obj.clipLimit_); + obj.info()->addParam(obj, "tilesX", obj.tilesX_); + obj.info()->addParam(obj, "tilesY", obj.tilesY_)) + void CLAHE_Impl::apply(cv::InputArray src_raw, cv::OutputArray dst_raw) + { + oclMat& src = getOclMatRef(src_raw); + oclMat& dst = getOclMatRef(dst_raw); + CV_Assert( src.type() == CV_8UC1 ); + + dst.create( src.size(), src.type() ); + + const int histSize = 256; + + ensureSizeIsEnough(tilesX_ * tilesY_, histSize, CV_8UC1, lut_); + + cv::Size tileSize; + oclMat srcForLut; + + if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0) + { + tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_); + srcForLut = src; + } + else + { + cv::ocl::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101, cv::Scalar()); + + tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_); + srcForLut = srcExt_; + } + + const int tileSizeTotal = tileSize.area(); + const float lutScale = static_cast(histSize - 1) / tileSizeTotal; + + int clipLimit = 0; + if (clipLimit_ > 0.0) + { + clipLimit = static_cast(clipLimit_ * tileSizeTotal / histSize); + clipLimit = std::max(clipLimit, 1); + } + + clahe::calcLut(srcForLut, lut_, tilesX_, tilesY_, tileSize, clipLimit, lutScale); + //finish(); + clahe::transform(src, dst, lut_, tilesX_, tilesY_, tileSize); + } + + void CLAHE_Impl::setClipLimit(double clipLimit) + { + clipLimit_ = clipLimit; + } + + double CLAHE_Impl::getClipLimit() const + { + return clipLimit_; + } + + void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize) + { + tilesX_ = tileGridSize.width; + tilesY_ = tileGridSize.height; + } + + cv::Size CLAHE_Impl::getTilesGridSize() const + { + return cv::Size(tilesX_, tilesY_); + } + + void CLAHE_Impl::collectGarbage() + { + srcExt_.release(); + lut_.release(); + } + } + + cv::Ptr createCLAHE(double clipLimit, cv::Size tileGridSize) + { + return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height); + } + //////////////////////////////////bilateralFilter//////////////////////////////////////////////////// static void oclbilateralFilter_8u( const oclMat &src, oclMat &dst, int d, diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index 799c49c50c..d4841fcfd4 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -124,7 +124,8 @@ namespace cv cacheSize = 0; } - + // not to be exported to dynamic lib + void setBinaryDiskCacheImpl(int mode, String path, Info::Impl * impl); struct Info::Impl { cl_platform_id oclplatform; @@ -142,22 +143,12 @@ namespace cv char extra_options[512]; int double_support; int unified_memory; //1 means integrated GPU, otherwise this value is 0 + bool enable_disk_cache; + bool update_disk_cache; string binpath; int refcounter; - Impl() - { - refcounter = 1; - oclplatform = 0; - oclcontext = 0; - clCmdQueue = 0; - devnum = -1; - maxComputeUnits = 0; - maxWorkGroupSize = 0; - memset(extra_options, 0, 512); - double_support = 0; - unified_memory = 0; - } + Impl(); void setDevice(void *ctx, void *q, int devnum); @@ -182,6 +173,25 @@ namespace cv void releaseResources(); }; + Info::Impl::Impl() + :oclplatform(0), + oclcontext(0), + clCmdQueue(0), + devnum(-1), + maxWorkGroupSize(0), + maxDimensions(0), + maxComputeUnits(0), + double_support(0), + unified_memory(0), + enable_disk_cache(false), + update_disk_cache(false), + binpath("./"), + refcounter(1) + { + memset(extra_options, 0, 512); + setBinaryDiskCacheImpl(CACHE_RELEASE, String("./"), this); + } + void Info::Impl::releaseResources() { devnum = -1; @@ -333,6 +343,10 @@ namespace cv oclinfo.push_back(ocltmpinfo); } } + if(devcienums > 0) + { + setDevice(oclinfo[0]); + } return devcienums; } @@ -363,64 +377,43 @@ namespace cv clFinish(Context::getContext()->impl->clCmdQueue); } - void queryDeviceInfo(DEVICE_INFO info_type, void* info) + //template specializations of queryDeviceInfo + template<> + bool queryDeviceInfo(cl_kernel) { - static Info::Impl* impl = Context::getContext()->impl; - switch(info_type) - { - case WAVEFRONT_SIZE: - { - bool is_cpu = false; - queryDeviceInfo(IS_CPU_DEVICE, &is_cpu); - if(is_cpu) - { - *(int*)info = 1; - return; - } -#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD - try - { - openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0], - CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof(size_t), info, 0)); - } - catch(const cv::Exception&) -#elif defined (CL_DEVICE_WARP_SIZE_NV) - const int EXT_LEN = 4096 + 1 ; - char extends_set[EXT_LEN]; - size_t extends_size; - openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum], CL_DEVICE_EXTENSIONS, EXT_LEN, (void *)extends_set, &extends_size)); - extends_set[EXT_LEN - 1] = 0; - if(std::string(extends_set).find("cl_nv_device_attribute_query") != std::string::npos) - { - openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0], - CL_DEVICE_WARP_SIZE_NV, sizeof(size_t), info, 0)); - } - else -#endif - { - // if no way left for us to query the warp size, we can get it from kernel group info - static const char * _kernel_string = "__kernel void test_func() {}"; - cl_kernel kernel; - kernel = openCLGetKernelFromSource(Context::getContext(), &_kernel_string, "test_func"); - openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum], - CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), info, NULL)); - } + Info::Impl* impl = Context::getContext()->impl; + cl_device_type devicetype; + openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum], + CL_DEVICE_TYPE, sizeof(cl_device_type), + &devicetype, NULL)); + return (devicetype == CVCL_DEVICE_TYPE_CPU); + } - } - break; - case IS_CPU_DEVICE: - { - cl_device_type devicetype; - openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum], - CL_DEVICE_TYPE, sizeof(cl_device_type), - &devicetype, NULL)); - *(bool*)info = (devicetype == CVCL_DEVICE_TYPE_CPU); - } - break; - default: - CV_Error(-1, "Invalid device info type"); - break; + template + static _ty queryWavesize(cl_kernel kernel) + { + size_t info = 0; + Info::Impl* impl = Context::getContext()->impl; + bool is_cpu = queryDeviceInfo(); + if(is_cpu) + { + return 1; } + CV_Assert(kernel != NULL); + openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum], + CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &info, NULL)); + return static_cast<_ty>(info); + } + + template<> + size_t queryDeviceInfo(cl_kernel kernel) + { + return queryWavesize(kernel); + } + template<> + int queryDeviceInfo(cl_kernel kernel) + { + return queryWavesize(kernel); } void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size) @@ -511,6 +504,24 @@ namespace cv return openCLGetKernelFromSource(clCxt, source, kernelName, NULL); } + void setBinaryDiskCacheImpl(int mode, String path, Info::Impl * impl) + { + impl->update_disk_cache = (mode & CACHE_UPDATE) == CACHE_UPDATE; + impl->enable_disk_cache = +#ifdef _DEBUG + (mode & CACHE_DEBUG) == CACHE_DEBUG; +#else + (mode & CACHE_RELEASE) == CACHE_RELEASE; +#endif + if(impl->enable_disk_cache && !path.empty()) + { + impl->binpath = path; + } + } + void setBinaryDiskCache(int mode, cv::String path) + { + setBinaryDiskCacheImpl(mode, path, Context::getContext()->impl); + } void setBinpath(const char *path) { @@ -590,8 +601,8 @@ namespace cv filename = clCxt->impl->binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + ".clb"; } - FILE *fp = fopen(filename.c_str(), "rb"); - if(fp == NULL || clCxt->impl->binpath.size() == 0) //we should generate a binary file for the first time. + FILE *fp = clCxt->impl->enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL; + if(fp == NULL || clCxt->impl->update_disk_cache) { if(fp != NULL) fclose(fp); @@ -600,7 +611,7 @@ namespace cv clCxt->impl->oclcontext, 1, source, NULL, &status); openCLVerifyCall(status); status = clBuildProgram(program, 1, &(clCxt->impl->devices[clCxt->impl->devnum]), all_build_options, NULL, NULL); - if(status == CL_SUCCESS && clCxt->impl->binpath.size()) + if(status == CL_SUCCESS && clCxt->impl->enable_disk_cache) savetofile(clCxt, program, filename.c_str()); } else @@ -934,6 +945,14 @@ namespace cv int Context::val = 0; static Mutex cs; static volatile int context_tear_down = 0; + + bool initialized() + { + return *((volatile int*)&Context::val) != 0 && + Context::clCxt->impl->clCmdQueue != NULL&& + Context::clCxt->impl->oclcontext != NULL; + } + Context* Context::getContext() { if(*((volatile int*)&val) != 1) @@ -947,8 +966,6 @@ namespace cv clCxt.reset(new Context); std::vector oclinfo; CV_Assert(getDevice(oclinfo, CVCL_DEVICE_TYPE_ALL) > 0); - oclinfo[0].impl->setDevice(0, 0, 0); - clCxt.get()->impl = oclinfo[0].impl->copy(); *((volatile int*)&val) = 1; } @@ -1073,7 +1090,7 @@ BOOL WINAPI DllMain( HINSTANCE, DWORD fdwReason, LPVOID ) Context* cv_ctx = Context::getContext(); if(cv_ctx) { - cl_context ctx = (cl_context)&(cv_ctx->impl->oclcontext); + cl_context ctx = cv_ctx->impl->oclcontext; if(ctx) openCLSafeCall(clReleaseContext(ctx)); } diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index 268a1fe9b5..1ff963a5cd 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -74,6 +74,7 @@ namespace cv } } + //////////////////////////////////////////////////////////////////////// // convert_C3C4 static void convert_C3C4(const cl_mem &src, oclMat &dst) @@ -227,6 +228,34 @@ void cv::ocl::oclMat::upload(const Mat &m) //download_channels = m.channels(); } +cv::ocl::oclMat::operator cv::_InputArray() +{ + _InputArray newInputArray; + newInputArray.flags = cv::_InputArray::OCL_MAT; + newInputArray.obj = reinterpret_cast(this); + return newInputArray; +} + +cv::ocl::oclMat::operator cv::_OutputArray() +{ + _OutputArray newOutputArray; + newOutputArray.flags = cv::_InputArray::OCL_MAT; + newOutputArray.obj = reinterpret_cast(this); + return newOutputArray; +} + +cv::ocl::oclMat& cv::ocl::getOclMatRef(InputArray src) +{ + CV_Assert(src.flags & cv::_InputArray::OCL_MAT); + return *reinterpret_cast(src.obj); +} + +cv::ocl::oclMat& cv::ocl::getOclMatRef(OutputArray src) +{ + CV_Assert(src.flags & cv::_InputArray::OCL_MAT); + return *reinterpret_cast(src.obj); +} + void cv::ocl::oclMat::download(cv::Mat &m) const { CV_DbgAssert(!this->empty()); @@ -394,7 +423,7 @@ void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double be if( rtype < 0 ) rtype = type(); else - rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), oclchannels()); + rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); //int scn = channels(); int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype); diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp index 3bcb8700b7..4292a1f877 100644 --- a/modules/ocl/src/mcwutil.cpp +++ b/modules/ocl/src/mcwutil.cpp @@ -43,9 +43,28 @@ // //M*/ -#define CL_USE_DEPRECATED_OPENCL_1_1_APIS #include "precomp.hpp" +#ifdef __GNUC__ +#if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402 +#define GCC_DIAG_STR(s) #s +#define GCC_DIAG_JOINSTR(x,y) GCC_DIAG_STR(x ## y) +# define GCC_DIAG_DO_PRAGMA(x) _Pragma (#x) +# define GCC_DIAG_PRAGMA(x) GCC_DIAG_DO_PRAGMA(GCC diagnostic x) +# if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406 +# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(push) \ +GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x)) +# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(pop) +# else +# define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x)) +# define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(warning GCC_DIAG_JOINSTR(-W,x)) +# endif +#else +# define GCC_DIAG_OFF(x) +# define GCC_DIAG_ON(x) +#endif +#endif /* __GNUC__ */ + using namespace std; namespace cv @@ -121,13 +140,16 @@ namespace cv build_options, finish_mode); } +#ifdef __GNUC__ + GCC_DIAG_OFF(deprecated-declarations) +#endif cl_mem bindTexture(const oclMat &mat) { cl_mem texture; cl_image_format format; int err; int depth = mat.depth(); - int channels = mat.channels(); + int channels = mat.oclchannels(); switch(depth) { @@ -156,7 +178,7 @@ namespace cv format.image_channel_order = CL_RGBA; break; default: - CV_Error(-1, "Image forma is not supported"); + CV_Error(-1, "Image format is not supported"); break; } #ifdef CL_VERSION_1_2 @@ -180,10 +202,6 @@ namespace cv else #endif { -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif texture = clCreateImage2D( (cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, @@ -193,9 +211,6 @@ namespace cv 0, NULL, &err); -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif } size_t origin[] = { 0, 0, 0 }; size_t region[] = { mat.cols, mat.rows, 1 }; @@ -225,6 +240,14 @@ namespace cv openCLSafeCall(err); return texture; } +#ifdef __GNUC__ + GCC_DIAG_ON(deprecated-declarations) +#endif + + Ptr bindTexturePtr(const oclMat &mat) + { + return Ptr(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type())); + } void releaseTexture(cl_mem& texture) { openCLFree(texture); diff --git a/modules/ocl/src/moments.cpp b/modules/ocl/src/moments.cpp index d6baba207c..cb16fb136d 100644 --- a/modules/ocl/src/moments.cpp +++ b/modules/ocl/src/moments.cpp @@ -16,7 +16,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Sen Liu, sen@multicorewareinc.com +// Sen Liu, swjtuls1987@126.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -277,8 +277,8 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary ) blocky = size.height/TILE_SIZE; else blocky = size.height/TILE_SIZE + 1; - cv::ocl::oclMat dst_m(blocky * 10, blockx, CV_64FC1); - cl_mem sum = openCLCreateBuffer(src.clCxt,CL_MEM_READ_WRITE,10*sizeof(double)); + oclMat dst_m(blocky * 10, blockx, CV_64FC1); + oclMat sum(1, 10, CV_64FC1); int tile_width = std::min(size.width,TILE_SIZE); int tile_height = std::min(size.height,TILE_SIZE); size_t localThreads[3] = { tile_height, 1, 1}; @@ -288,19 +288,16 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary ) args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.width )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&tileSize.height )); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m.cols )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m.step )); args.push_back( make_pair( sizeof(cl_int) , (void *)&blocky )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&type )); args.push_back( make_pair( sizeof(cl_int) , (void *)&depth )); args.push_back( make_pair( sizeof(cl_int) , (void *)&cn )); args.push_back( make_pair( sizeof(cl_int) , (void *)&coi )); args.push_back( make_pair( sizeof(cl_int) , (void *)&binary )); args.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE )); - openCLExecuteKernel(dst_m.clCxt, &moments, "CvMoments", globalThreads, localThreads, args, -1, depth); + openCLExecuteKernel(Context::getContext(), &moments, "CvMoments", globalThreads, localThreads, args, -1, depth); size_t localThreadss[3] = { 128, 1, 1}; size_t globalThreadss[3] = { 128, 1, 1}; @@ -309,25 +306,23 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary ) args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&tile_height )); args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&tile_width )); args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&TILE_SIZE )); - args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&sum )); + args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&sum.data )); args_sum.push_back( make_pair( sizeof(cl_mem) , (void *)&dst_m.data )); args_sum.push_back( make_pair( sizeof(cl_int) , (void *)&dst_m.step )); - openCLExecuteKernel(dst_m.clCxt, &moments, "dst_sum", globalThreadss, localThreadss, args_sum, -1, -1); - double* dstsum = new double[10]; - memset(dstsum,0,10*sizeof(double)); - openCLReadBuffer(dst_m.clCxt,sum,(void *)dstsum,10*sizeof(double)); - mom->m00 = dstsum[0]; - mom->m10 = dstsum[1]; - mom->m01 = dstsum[2]; - mom->m20 = dstsum[3]; - mom->m11 = dstsum[4]; - mom->m02 = dstsum[5]; - mom->m30 = dstsum[6]; - mom->m21 = dstsum[7]; - mom->m12 = dstsum[8]; - mom->m03 = dstsum[9]; - delete [] dstsum; - openCLSafeCall(clReleaseMemObject(sum)); + openCLExecuteKernel(Context::getContext(), &moments, "dst_sum", globalThreadss, localThreadss, args_sum, -1, -1); + + Mat dstsum(sum); + mom->m00 = dstsum.at(0, 0); + mom->m10 = dstsum.at(0, 1); + mom->m01 = dstsum.at(0, 2); + mom->m20 = dstsum.at(0, 3); + mom->m11 = dstsum.at(0, 4); + mom->m02 = dstsum.at(0, 5); + mom->m30 = dstsum.at(0, 6); + mom->m21 = dstsum.at(0, 7); + mom->m12 = dstsum.at(0, 8); + mom->m03 = dstsum.at(0, 9); + icvCompleteMomentState( mom ); } diff --git a/modules/ocl/src/opencl/arithm_add.cl b/modules/ocl/src/opencl/arithm_add.cl index 7d4b0a7653..070ced4731 100644 --- a/modules/ocl/src/opencl/arithm_add.cl +++ b/modules/ocl/src/opencl/arithm_add.cl @@ -127,7 +127,7 @@ __kernel void arithm_add_D2 (__global ushort *src1, int src1_step, int src1_offs #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 3) +#define dst_align ((dst_offset / 2) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -165,7 +165,7 @@ __kernel void arithm_add_D3 (__global short *src1, int src1_step, int src1_offse #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 3) +#define dst_align ((dst_offset / 2) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -335,7 +335,7 @@ __kernel void arithm_add_with_mask_C1_D2 (__global ushort *src1, int src1_step, #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 1) +#define dst_align ((dst_offset / 2) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -375,7 +375,7 @@ __kernel void arithm_add_with_mask_C1_D3 (__global short *src1, int src1_step, i #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 1) +#define dst_align ((dst_offset / 2) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -507,7 +507,7 @@ __kernel void arithm_add_with_mask_C2_D0 (__global uchar *src1, int src1_step, i #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 1) +#define dst_align ((dst_offset / 2) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); diff --git a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl index fdf65923cd..3dbd376ecf 100644 --- a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl +++ b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl @@ -126,7 +126,7 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global ushort *src1, int src1_st #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 1) +#define dst_align ((dst_offset / 2) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -164,7 +164,7 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global short *src1, int src1_ste #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 1) +#define dst_align ((dst_offset / 2) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -288,7 +288,7 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global uchar *src1, int src1_ste #ifdef dst_align #undef dst_align #endif -#define dst_align ((dst_offset >> 1) & 1) +#define dst_align ((dst_offset / 2) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); diff --git a/modules/ocl/src/opencl/arithm_mul.cl b/modules/ocl/src/opencl/arithm_mul.cl index e1cc9f6ab4..40988f5fed 100644 --- a/modules/ocl/src/opencl/arithm_mul.cl +++ b/modules/ocl/src/opencl/arithm_mul.cl @@ -277,9 +277,15 @@ __kernel void arithm_mul_D6 (__global double *src1, int src1_step, int src1_offs } #endif +#ifdef DOUBLE_SUPPORT +#define SCALAR_TYPE double +#else +#define SCALAR_TYPE float +#endif + __kernel void arithm_muls_D5 (__global float *src1, int src1_step, int src1_offset, __global float *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1, float scalar) + int rows, int cols, int dst_step1, SCALAR_TYPE scalar) { int x = get_global_id(0); int y = get_global_id(1); diff --git a/modules/ocl/src/opencl/filtering_laplacian.cl b/modules/ocl/src/opencl/filtering_laplacian.cl index 96a2f51ef4..8535eb1a54 100644 --- a/modules/ocl/src/opencl/filtering_laplacian.cl +++ b/modules/ocl/src/opencl/filtering_laplacian.cl @@ -82,9 +82,9 @@ ////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////Macro for define elements number per thread///////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////// -#define ANCHOR 3 -#define ANX 1 -#define ANY 1 +//#define ANCHOR 3 +//#define ANX 1 +//#define ANY 1 #define ROWS_PER_GROUP 4 #define ROWS_PER_GROUP_BITS 2 @@ -185,7 +185,7 @@ __kernel void filter2D_C1_D0(__global uchar *src, int src_step, int src_offset_x for(int i = 0; i < ANCHOR; i++) { -#pragma unroll 3 +#pragma unroll for(int j = 0; j < ANCHOR; j++) { if(dst_rows_index < dst_rows_end) @@ -295,7 +295,7 @@ __kernel void filter2D_C1_D5(__global float *src, int src_step, int src_offset_x for(int i = 0; i < ANCHOR; i++) { -#pragma unroll 3 +#pragma unroll for(int j = 0; j < ANCHOR; j++) { if(dst_rows_index < dst_rows_end) @@ -410,7 +410,7 @@ __kernel void filter2D_C4_D0(__global uchar4 *src, int src_step, int src_offset_ for(int i = 0; i < ANCHOR; i++) { -#pragma unroll 3 +#pragma unroll for(int j = 0; j < ANCHOR; j++) { if(dst_rows_index < dst_rows_end) diff --git a/modules/ocl/src/opencl/filtering_morph.cl b/modules/ocl/src/opencl/filtering_morph.cl index 49640008f4..e659a59f51 100644 --- a/modules/ocl/src/opencl/filtering_morph.cl +++ b/modules/ocl/src/opencl/filtering_morph.cl @@ -120,7 +120,7 @@ __kernel void morph_C1_D0(__global const uchar * restrict src, int gidy = get_global_id(1); int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel); - if(gidx+3p[1][0])); int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0])); float4 w = *(__global float4*)(&(currentnodeptr->weight[0])); - float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0])); + float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0])); + float nodethreshold = w.w * variance_norm_factor; info1.x +=lcl_off; @@ -261,8 +251,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] - lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z; - stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x; + bool passThres = classsum >= nodethreshold; +#if STUMP_BASED + stage_sum += passThres ? alpha3.y : alpha3.x; nodecounter++; + nodeloop++; +#else + bool isRootNode = (nodecounter & 1) == 0; + if(isRootNode) + { + if( (passThres && currentnodeptr->right) || + (!passThres && currentnodeptr->left)) + { + nodecounter ++; + } + else + { + stage_sum += alpha3.x; + nodecounter += 2; + nodeloop ++; + } + } + else + { + stage_sum += passThres ? alpha3.z : alpha3.y; + nodecounter ++; + nodeloop ++; + } +#endif } result = (stage_sum >= stagethreshold); @@ -301,18 +317,20 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa if(lcl_compute_win_id < queuecount) { - int tempnodecounter = lcl_compute_id; float part_sum = 0.f; - for(int lcl_loop=0; lcl_loopp[0][0])); int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0])); int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0])); float4 w = *(__global float4*)(&(currentnodeptr->weight[0])); - float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0])); + float3 alpha3 = *(__global float3*)(&(currentnodeptr->alpha[0])); float nodethreshold = w.w * variance_norm_factor; info1.x +=queue_pixel; @@ -332,8 +350,34 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] - lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z; - part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x; - tempnodecounter +=lcl_compute_win; + bool passThres = classsum >= nodethreshold; +#if STUMP_BASED + part_sum += passThres ? alpha3.y : alpha3.x; + tempnodecounter += lcl_compute_win; + lcl_loop++; +#else + if(root_offset == 0) + { + if( (passThres && currentnodeptr->right) || + (!passThres && currentnodeptr->left)) + { + root_offset = 1; + } + else + { + part_sum += alpha3.x; + tempnodecounter += lcl_compute_win; + lcl_loop++; + } + } + else + { + part_sum += passThres ? alpha3.z : alpha3.y; + tempnodecounter += lcl_compute_win; + lcl_loop++; + root_offset = 0; + } +#endif }//end for(int lcl_loop=0;lcl_looptwo_rects) -{ - #pragma unroll - for( n = 0; n < stagecascade->count; n++ ) - { - t1 = *(node + counter); - t = t1.threshold * variance_norm_factor; - classsum = calc_sum1(t1,p_offset,0) * t1.weight[0]; - - classsum += calc_sum1(t1, p_offset,1) * t1.weight[1]; - stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0]; - - counter++; - } -} -else -{ - #pragma unroll - for( n = 0; n < stagecascade->count; n++ ) - { - t = node[counter].threshold*variance_norm_factor; - classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0]; - classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1]; - - if( node[counter].p0[2] ) - classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2]; - - stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify - - counter++; - } -} -*/ -/* -__kernel void gpuRunHaarClassifierCascade_ScaleWindow( - constant GpuHidHaarClassifierCascade * _cascade, - global GpuHidHaarStageClassifier * stagecascadeptr, - //global GpuHidHaarClassifier * classifierptr, - global GpuHidHaarTreeNode * nodeptr, - global int * sum, - global float * sqsum, - global int * _candidate, - int pixel_step, - int cols, - int rows, - int start_stage, - int end_stage, - //int counts, - int nodenum, - int ystep, - int detect_width, - //int detect_height, - int loopcount, - int outputstep) - //float scalefactor) -{ -unsigned int x1 = get_global_id(0); -unsigned int y1 = get_global_id(1); -int p_offset; -int m, n; -int result; -int counter; -float mean, variance_norm_factor; -for(int i=0;ip1 - cascade->p0; -int window_height = window_width; -result = 1; -counter = 0; -unsigned int x = mul24(x1,ystep); -unsigned int y = mul24(y1,ystep); -if((x < cols - window_width - 1) && (y < rows - window_height -1)) -{ -global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage; -//global GpuHidHaarClassifier *classifier = classifierptr; -global GpuHidHaarTreeNode *node = nodeptr + nodenum*i; - -p_offset = mad24(y, pixel_step, x);// modify - -mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) - - *(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3)) - *cascade->inv_window_area; - -variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) - - *(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset); -variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean; -variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify - -// if( cascade->is_stump_based ) -//{ -for( m = start_stage; m < end_stage; m++ ) -{ -float stage_sum = 0.f; -float t, classsum; -GpuHidHaarTreeNode t1; - -//#pragma unroll -for( n = 0; n < stagecascade->count; n++ ) -{ - t1 = *(node + counter); - t = t1.threshold * variance_norm_factor; - classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1]; - - if((t1.p0[2]) && (!stagecascade->two_rects)) - classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2]; - - stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify - counter++; -} - -if (stage_sum < stagecascade->threshold) -{ - result = 0; - break; -} - -stagecascade++; - -} -if(result) -{ - candidate[4 * (y1 * detect_width + x1)] = x; - candidate[4 * (y1 * detect_width + x1) + 1] = y; - candidate[4 * (y1 * detect_width + x1)+2] = window_width; - candidate[4 * (y1 * detect_width + x1) + 3] = window_height; -} -//} -} -} -} -*/ - - - - diff --git a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl index 44877f3860..8507972ff2 100644 --- a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl +++ b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl @@ -17,7 +17,7 @@ // @Authors // Wu Xinglong, wxl370@126.com // Sen Liu, swjtuls1987@126.com -// +// Peng Xiao, pengxiao@outlook.com // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -49,25 +49,13 @@ #define CV_HAAR_FEATURE_MAX 3 typedef int sumtype; typedef float sqsumtype; -typedef struct __attribute__((aligned(128))) GpuHidHaarFeature -{ - struct __attribute__((aligned(32))) -{ - int p0 __attribute__((aligned(4))); - int p1 __attribute__((aligned(4))); - int p2 __attribute__((aligned(4))); - int p3 __attribute__((aligned(4))); - float weight __attribute__((aligned(4))); -} -rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned(32))); -} -GpuHidHaarFeature; + typedef struct __attribute__((aligned(128))) GpuHidHaarTreeNode { int p[CV_HAAR_FEATURE_MAX][4] __attribute__((aligned(64))); float weight[CV_HAAR_FEATURE_MAX] /*__attribute__((aligned (16)))*/; float threshold /*__attribute__((aligned (4)))*/; - float alpha[2] __attribute__((aligned(8))); + float alpha[3] __attribute__((aligned(16))); int left __attribute__((aligned(4))); int right __attribute__((aligned(4))); } @@ -174,45 +162,83 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( const int p_offset = mad24(y, step, x); cascadeinfo.x += p_offset; cascadeinfo.z += p_offset; - mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] - - sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)]) + mean = (sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] + - sum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] - + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + + sum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)]) * correction_t; - variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] - - sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)]; + variance_norm_factor = sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.x), 0, max_idx)] + - sqsum[clamp(mad24(cascadeinfo.y, step, cascadeinfo.z), 0, max_idx)] - + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.x), 0, max_idx)] + + sqsum[clamp(mad24(cascadeinfo.w, step, cascadeinfo.z), 0, max_idx)]; variance_norm_factor = variance_norm_factor * correction_t - mean * mean; variance_norm_factor = variance_norm_factor >= 0.f ? sqrt(variance_norm_factor) : 1.f; bool result = true; nodecounter = startnode + nodecount * scalei; - for (int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++) { float stage_sum = 0.f; int stagecount = stagecascadeptr[stageloop].count; - for (int nodeloop = 0; nodeloop < stagecount; nodeloop++) + for (int nodeloop = 0; nodeloop < stagecount;) { __global GpuHidHaarTreeNode *currentnodeptr = (nodeptr + nodecounter); int4 info1 = *(__global int4 *)(&(currentnodeptr->p[0][0])); int4 info2 = *(__global int4 *)(&(currentnodeptr->p[1][0])); int4 info3 = *(__global int4 *)(&(currentnodeptr->p[2][0])); float4 w = *(__global float4 *)(&(currentnodeptr->weight[0])); - float2 alpha2 = *(__global float2 *)(&(currentnodeptr->alpha[0])); + float3 alpha3 = *(__global float3 *)(&(currentnodeptr->alpha[0])); float nodethreshold = w.w * variance_norm_factor; + info1.x += p_offset; info1.z += p_offset; info2.x += p_offset; info2.z += p_offset; - float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)] - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] - - sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)] + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x; - classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)] - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] - - sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)] + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y; info3.x += p_offset; info3.z += p_offset; - classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)] - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] - - sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)] + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z; - stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x; + float classsum = (sum[clamp(mad24(info1.y, step, info1.x), 0, max_idx)] + - sum[clamp(mad24(info1.y, step, info1.z), 0, max_idx)] - + sum[clamp(mad24(info1.w, step, info1.x), 0, max_idx)] + + sum[clamp(mad24(info1.w, step, info1.z), 0, max_idx)]) * w.x; + classsum += (sum[clamp(mad24(info2.y, step, info2.x), 0, max_idx)] + - sum[clamp(mad24(info2.y, step, info2.z), 0, max_idx)] - + sum[clamp(mad24(info2.w, step, info2.x), 0, max_idx)] + + sum[clamp(mad24(info2.w, step, info2.z), 0, max_idx)]) * w.y; + classsum += (sum[clamp(mad24(info3.y, step, info3.x), 0, max_idx)] + - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] - + sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)] + + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z; + + bool passThres = classsum >= nodethreshold; + +#if STUMP_BASED + stage_sum += passThres ? alpha3.y : alpha3.x; nodecounter++; + nodeloop++; +#else + bool isRootNode = (nodecounter & 1) == 0; + if(isRootNode) + { + if( (passThres && currentnodeptr->right) || + (!passThres && currentnodeptr->left)) + { + nodecounter ++; + } + else + { + stage_sum += alpha3.x; + nodecounter += 2; + nodeloop ++; + } + } + else + { + stage_sum += (passThres ? alpha3.z : alpha3.y); + nodecounter ++; + nodeloop ++; + } +#endif } - result = (bool)(stage_sum >= stagecascadeptr[stageloop].threshold); + result = (int)(stage_sum >= stagecascadeptr[stageloop].threshold); } barrier(CLK_LOCAL_MEM_FENCE); @@ -222,7 +248,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2( int queueindex = atomic_inc(lclcount); lcloutindex[queueindex] = (y << 16) | x; } - barrier(CLK_LOCAL_MEM_FENCE); int queuecount = lclcount[0]; @@ -277,5 +302,6 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH newnode[counter].threshold = t1.threshold; newnode[counter].alpha[0] = t1.alpha[0]; newnode[counter].alpha[1] = t1.alpha[1]; + newnode[counter].alpha[2] = t1.alpha[2]; } diff --git a/modules/ocl/src/opencl/imgproc_calcHarris.cl b/modules/ocl/src/opencl/imgproc_calcHarris.cl index 15742d6c5e..1911a72016 100644 --- a/modules/ocl/src/opencl/imgproc_calcHarris.cl +++ b/modules/ocl/src/opencl/imgproc_calcHarris.cl @@ -130,28 +130,29 @@ __kernel void calcHarris(__global const float *Dx,__global const float *Dy, __gl data[2][i] = dy_data[i] * dy_data[i]; } #else - for(int i=0; i < ksY+1; i++) - { + int clamped_col = min(dst_cols, col); + for(int i=0; i < ksY+1; i++) + { int dx_selected_row; int dx_selected_col; dx_selected_row = ADDR_H(dx_startY+i, 0, dx_whole_rows); dx_selected_row = ADDR_B(dx_startY+i, dx_whole_rows, dx_selected_row); - dx_selected_col = ADDR_L(dx_startX+col, 0, dx_whole_cols); - dx_selected_col = ADDR_R(dx_startX+col, dx_whole_cols, dx_selected_col); + dx_selected_col = ADDR_L(dx_startX+clamped_col, 0, dx_whole_cols); + dx_selected_col = ADDR_R(dx_startX+clamped_col, dx_whole_cols, dx_selected_col); dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col]; int dy_selected_row; int dy_selected_col; dy_selected_row = ADDR_H(dy_startY+i, 0, dy_whole_rows); dy_selected_row = ADDR_B(dy_startY+i, dy_whole_rows, dy_selected_row); - dy_selected_col = ADDR_L(dy_startX+col, 0, dy_whole_cols); - dy_selected_col = ADDR_R(dy_startX+col, dy_whole_cols, dy_selected_col); + dy_selected_col = ADDR_L(dy_startX+clamped_col, 0, dy_whole_cols); + dy_selected_col = ADDR_R(dy_startX+clamped_col, dy_whole_cols, dy_selected_col); dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col]; data[0][i] = dx_data[i] * dx_data[i]; data[1][i] = dx_data[i] * dy_data[i]; data[2][i] = dy_data[i] * dy_data[i]; - } + } #endif float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0; for(int i=1; i < ksY; i++) diff --git a/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl b/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl index 662fbb07b9..462ec77925 100644 --- a/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl +++ b/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl @@ -130,28 +130,30 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy, data[2][i] = dy_data[i] * dy_data[i]; } #else - for(int i=0; i < ksY+1; i++) - { + int clamped_col = min(dst_cols, col); + + for(int i=0; i < ksY+1; i++) + { int dx_selected_row; int dx_selected_col; dx_selected_row = ADDR_H(dx_startY+i, 0, dx_whole_rows); dx_selected_row = ADDR_B(dx_startY+i, dx_whole_rows, dx_selected_row); - dx_selected_col = ADDR_L(dx_startX+col, 0, dx_whole_cols); - dx_selected_col = ADDR_R(dx_startX+col, dx_whole_cols, dx_selected_col); + dx_selected_col = ADDR_L(dx_startX+clamped_col, 0, dx_whole_cols); + dx_selected_col = ADDR_R(dx_startX+clamped_col, dx_whole_cols, dx_selected_col); dx_data[i] = Dx[dx_selected_row * (dx_step>>2) + dx_selected_col]; int dy_selected_row; int dy_selected_col; dy_selected_row = ADDR_H(dy_startY+i, 0, dy_whole_rows); dy_selected_row = ADDR_B(dy_startY+i, dy_whole_rows, dy_selected_row); - dy_selected_col = ADDR_L(dy_startX+col, 0, dy_whole_cols); - dy_selected_col = ADDR_R(dy_startX+col, dy_whole_cols, dy_selected_col); + dy_selected_col = ADDR_L(dy_startX+clamped_col, 0, dy_whole_cols); + dy_selected_col = ADDR_R(dy_startX+clamped_col, dy_whole_cols, dy_selected_col); dy_data[i] = Dy[dy_selected_row * (dy_step>>2) + dy_selected_col]; data[0][i] = dx_data[i] * dx_data[i]; data[1][i] = dx_data[i] * dy_data[i]; data[2][i] = dy_data[i] * dy_data[i]; - } + } #endif float sum0 = 0.0, sum1 = 0.0, sum2 = 0.0; for(int i=1; i < ksY; i++) diff --git a/modules/ocl/src/opencl/imgproc_canny.cl b/modules/ocl/src/opencl/imgproc_canny.cl index ceaaed1eb6..5402759e3c 100644 --- a/modules/ocl/src/opencl/imgproc_canny.cl +++ b/modules/ocl/src/opencl/imgproc_canny.cl @@ -297,6 +297,9 @@ calcMap map_step /= sizeof(*map); map_offset /= sizeof(*map); + mag += mag_offset; + map += map_offset; + __local float smem[18][18]; int gidx = get_global_id(0); @@ -389,7 +392,7 @@ edgesHysteresisLocal ( __global int * map, __global ushort2 * st, - volatile __global unsigned int * counter, + __global unsigned int * counter, int rows, int cols, int map_step, @@ -399,6 +402,8 @@ edgesHysteresisLocal map_step /= sizeof(*map); map_offset /= sizeof(*map); + map += map_offset; + __local int smem[18][18]; int gidx = get_global_id(0); @@ -416,12 +421,12 @@ edgesHysteresisLocal if(ly < 14) { smem[ly][lx] = - map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step + map_offset]; + map[grp_idx + lx + min(grp_idy + ly, rows - 1) * map_step]; } if(ly < 4 && grp_idy + ly + 14 <= rows && grp_idx + lx <= cols) { smem[ly + 14][lx] = - map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step + map_offset]; + map[grp_idx + lx + min(grp_idy + ly + 14, rows - 1) * map_step]; } barrier(CLK_LOCAL_MEM_FENCE); @@ -482,14 +487,17 @@ edgesHysteresisLocal __constant int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1}; __constant int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1}; + #define stack_size 512 __kernel -void edgesHysteresisGlobal +void +__attribute__((reqd_work_group_size(128,1,1))) +edgesHysteresisGlobal ( __global int * map, __global ushort2 * st1, __global ushort2 * st2, - volatile __global int * counter, + __global int * counter, int rows, int cols, int count, @@ -501,6 +509,8 @@ void edgesHysteresisGlobal map_step /= sizeof(*map); map_offset /= sizeof(*map); + map += map_offset; + int gidx = get_global_id(0); int gidy = get_global_id(1); @@ -510,7 +520,7 @@ void edgesHysteresisGlobal int grp_idx = get_group_id(0); int grp_idy = get_group_id(1); - volatile __local unsigned int s_counter; + __local unsigned int s_counter; __local unsigned int s_ind; __local ushort2 s_st[stack_size]; @@ -564,9 +574,9 @@ void edgesHysteresisGlobal pos.x += c_dx[lidx & 7]; pos.y += c_dy[lidx & 7]; - if (map[pos.x + map_offset + pos.y * map_step] == 1) + if (map[pos.x + pos.y * map_step] == 1) { - map[pos.x + map_offset + pos.y * map_step] = 2; + map[pos.x + pos.y * map_step] = 2; ind = atomic_inc(&s_counter); @@ -621,6 +631,6 @@ void getEdges if(gidy < rows && gidx < cols) { - dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step] >> 1)); + dst[gidx + gidy * dst_step] = (uchar)(-(map[gidx + 1 + (gidy + 1) * map_step + map_offset] >> 1)); } } diff --git a/modules/ocl/src/opencl/imgproc_clahe.cl b/modules/ocl/src/opencl/imgproc_clahe.cl new file mode 100644 index 0000000000..0d010f7a5b --- /dev/null +++ b/modules/ocl/src/opencl/imgproc_clahe.cl @@ -0,0 +1,275 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Sen Liu, swjtuls1987@126.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef WAVE_SIZE +#define WAVE_SIZE 1 +#endif + +int calc_lut(__local int* smem, int val, int tid) +{ + smem[tid] = val; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid == 0) + { + for (int i = 1; i < 256; ++i) + { + smem[i] += smem[i - 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + return smem[tid]; +} + +#ifdef CPU +void reduce(volatile __local int* smem, int val, int tid) +{ + smem[tid] = val; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 128) + { + smem[tid] = val += smem[tid + 128]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 64) + { + smem[tid] = val += smem[tid + 64]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 32) + { + smem[tid] += smem[tid + 32]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 16) + { + smem[tid] += smem[tid + 16]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 8) + { + smem[tid] += smem[tid + 8]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 4) + { + smem[tid] += smem[tid + 4]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 2) + { + smem[tid] += smem[tid + 2]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 1) + { + smem[256] = smem[tid] + smem[tid + 1]; + } + barrier(CLK_LOCAL_MEM_FENCE); +} +#else +void reduce(__local volatile int* smem, int val, int tid) +{ + smem[tid] = val; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 128) + { + smem[tid] = val += smem[tid + 128]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 64) + { + smem[tid] = val += smem[tid + 64]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 32) + { + smem[tid] += smem[tid + 32]; +#if WAVE_SIZE < 32 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) { +#endif + smem[tid] += smem[tid + 16]; +#if WAVE_SIZE < 16 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 8) { +#endif + smem[tid] += smem[tid + 8]; + smem[tid] += smem[tid + 4]; + smem[tid] += smem[tid + 2]; + smem[tid] += smem[tid + 1]; + } +} +#endif + +__kernel void calcLut(__global __const uchar * src, __global uchar * lut, + const int srcStep, const int dstStep, + const int2 tileSize, const int tilesX, + const int clipLimit, const float lutScale) +{ + __local int smem[512]; + + const int tx = get_group_id(0); + const int ty = get_group_id(1); + const unsigned int tid = get_local_id(1) * get_local_size(0) + + get_local_id(0); + + smem[tid] = 0; + barrier(CLK_LOCAL_MEM_FENCE); + + for (int i = get_local_id(1); i < tileSize.y; i += get_local_size(1)) + { + __global const uchar* srcPtr = src + mad24( ty * tileSize.y + i, + srcStep, tx * tileSize.x ); + for (int j = get_local_id(0); j < tileSize.x; j += get_local_size(0)) + { + const int data = srcPtr[j]; + atomic_inc(&smem[data]); + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + int tHistVal = smem[tid]; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (clipLimit > 0) + { + // clip histogram bar + + int clipped = 0; + if (tHistVal > clipLimit) + { + clipped = tHistVal - clipLimit; + tHistVal = clipLimit; + } + + // find number of overall clipped samples + + reduce(smem, clipped, tid); + barrier(CLK_LOCAL_MEM_FENCE); +#ifdef CPU + clipped = smem[256]; +#else + clipped = smem[0]; +#endif + + // broadcast evaluated value + + __local int totalClipped; + + if (tid == 0) + totalClipped = clipped; + barrier(CLK_LOCAL_MEM_FENCE); + + // redistribute clipped samples evenly + + int redistBatch = totalClipped / 256; + tHistVal += redistBatch; + + int residual = totalClipped - redistBatch * 256; + if (tid < residual) + ++tHistVal; + } + + const int lutVal = calc_lut(smem, tHistVal, tid); + uint ires = (uint)convert_int_rte(lutScale * lutVal); + lut[(ty * tilesX + tx) * dstStep + tid] = + convert_uchar(clamp(ires, (uint)0, (uint)255)); +} + +__kernel void transform(__global __const uchar * src, + __global uchar * dst, + __global uchar * lut, + const int srcStep, const int dstStep, const int lutStep, + const int cols, const int rows, + const int2 tileSize, + const int tilesX, const int tilesY) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if (x >= cols || y >= rows) + return; + + const float tyf = (convert_float(y) / tileSize.y) - 0.5f; + int ty1 = convert_int_rtn(tyf); + int ty2 = ty1 + 1; + const float ya = tyf - ty1; + ty1 = max(ty1, 0); + ty2 = min(ty2, tilesY - 1); + + const float txf = (convert_float(x) / tileSize.x) - 0.5f; + int tx1 = convert_int_rtn(txf); + int tx2 = tx1 + 1; + const float xa = txf - tx1; + tx1 = max(tx1, 0); + tx2 = min(tx2, tilesX - 1); + + const int srcVal = src[mad24(y, srcStep, x)]; + + float res = 0; + + res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (1.0f - ya)); + res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (1.0f - ya)); + res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (ya)); + res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (ya)); + + uint ires = (uint)convert_int_rte(res); + dst[mad24(y, dstStep, x)] = convert_uchar(clamp(ires, (uint)0, (uint)255)); +} diff --git a/modules/ocl/src/opencl/imgproc_gfft.cl b/modules/ocl/src/opencl/imgproc_gfft.cl new file mode 100644 index 0000000000..5fa27ffc1b --- /dev/null +++ b/modules/ocl/src/opencl/imgproc_gfft.cl @@ -0,0 +1,276 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@outlook.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef WITH_MASK +#define WITH_MASK 0 +#endif + +__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; + +inline float ELEM_INT2(image2d_t _eig, int _x, int _y) +{ + return read_imagef(_eig, sampler, (int2)(_x, _y)).x; +} + +inline float ELEM_FLT2(image2d_t _eig, float2 pt) +{ + return read_imagef(_eig, sampler, pt).x; +} + +__kernel + void findCorners + ( + image2d_t eig, + __global const char * mask, + __global float2 * corners, + const int mask_strip,// in pixels + const float threshold, + const int rows, + const int cols, + const int max_count, + __global int * g_counter + ) +{ + const int j = get_global_id(0); + const int i = get_global_id(1); + + if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1 +#if WITH_MASK + && mask[i * mask_strip + j] != 0 +#endif + ) + { + const float val = ELEM_INT2(eig, j, i); + + if (val > threshold) + { + float maxVal = val; + + maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal); + maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal); + maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal); + + maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal); + maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal); + + maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal); + maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal); + maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal); + + if (val == maxVal) + { + const int ind = atomic_inc(g_counter); + + if (ind < max_count) + corners[ind] = (float2)(j, i); + } + } + } +} + +//bitonic sort +__kernel + void sortCorners_bitonicSort + ( + image2d_t eig, + __global float2 * corners, + const int count, + const int stage, + const int passOfStage + ) +{ + const int threadId = get_global_id(0); + if(threadId >= count / 2) + { + return; + } + + const int sortOrder = (((threadId/(1 << stage)) % 2)) == 1 ? 1 : 0; // 0 is descent + + const int pairDistance = 1 << (stage - passOfStage); + const int blockWidth = 2 * pairDistance; + + const int leftId = min( (threadId % pairDistance) + + (threadId / pairDistance) * blockWidth, count ); + + const int rightId = min( leftId + pairDistance, count ); + + const float2 leftPt = corners[leftId]; + const float2 rightPt = corners[rightId]; + + const float leftVal = ELEM_FLT2(eig, leftPt); + const float rightVal = ELEM_FLT2(eig, rightPt); + + const bool compareResult = leftVal > rightVal; + + float2 greater = compareResult ? leftPt:rightPt; + float2 lesser = compareResult ? rightPt:leftPt; + + corners[leftId] = sortOrder ? lesser : greater; + corners[rightId] = sortOrder ? greater : lesser; +} + +//selection sort for gfft +//kernel is ported from Bolt library: +//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl +// Local sort will firstly sort elements of each workgroup using selection sort +// its performance is O(n) +__kernel + void sortCorners_selectionSortLocal + ( + image2d_t eig, + __global float2 * corners, + const int count, + __local float2 * scratch + ) +{ + int i = get_local_id(0); // index in workgroup + int numOfGroups = get_num_groups(0); // index in workgroup + int groupID = get_group_id(0); + int wg = get_local_size(0); // workgroup size = block size + int n; // number of elements to be processed for this work group + + int offset = groupID * wg; + int same = 0; + corners += offset; + n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg; + float2 pt1, pt2; + + pt1 = corners[min(i, n)]; + scratch[i] = pt1; + barrier(CLK_LOCAL_MEM_FENCE); + + if(i >= n) + { + return; + } + + float val1 = ELEM_FLT2(eig, pt1); + float val2; + + int pos = 0; + for (int j=0;j val1) + pos++;//calculate the rank of this element in this work group + else + { + if(val1 > val2) + continue; + else + { + // val1 and val2 are same + same++; + } + } + } + for (int j=0; j< same; j++) + corners[pos + j] = pt1; +} +__kernel + void sortCorners_selectionSortFinal + ( + image2d_t eig, + __global float2 * corners, + const int count + ) +{ + const int i = get_local_id(0); // index in workgroup + const int numOfGroups = get_num_groups(0); // index in workgroup + const int groupID = get_group_id(0); + const int wg = get_local_size(0); // workgroup size = block size + int pos = 0, same = 0; + const int offset = get_group_id(0) * wg; + const int remainder = count - wg*(numOfGroups-1); + + if((offset + i ) >= count) + return; + float2 pt1, pt2; + pt1 = corners[groupID*wg + i]; + + float val1 = ELEM_FLT2(eig, pt1); + float val2; + + for(int j=0; j val2) + break; + else + { + //Increment only if the value is not the same. + if( val2 > val1 ) + pos++; + else + same++; + } + } + } + + for(int k=0; k val2) + break; + else + { + //Don't increment if the value is the same. + //Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false + if(val2 > val1) + pos++; + else + same++; + } + } + for (int j=0; j< same; j++) + corners[pos + j] = pt1; +} + diff --git a/modules/ocl/src/opencl/imgproc_threshold.cl b/modules/ocl/src/opencl/imgproc_threshold.cl index 8ad501f7c1..9162abb7ef 100644 --- a/modules/ocl/src/opencl/imgproc_threshold.cl +++ b/modules/ocl/src/opencl/imgproc_threshold.cl @@ -143,7 +143,7 @@ __kernel void threshold_C1_D5(__global const float * restrict src, __global floa int4 dpos = (int4)(dstart, dstart+1, dstart+2, dstart+3); float4 dVal = *(__global float4*)(dst+dst_offset+gy*dst_step+dstart); int4 con = dpos >= 0 && dpos < dst_cols; - ddata = convert_float4(con) != 0 ? ddata : dVal; + ddata = convert_float4(con) != (float4)(0) ? ddata : dVal; if(dstart < dst_cols) { *(__global float4*)(dst+dst_offset+gy*dst_step+dstart) = ddata; diff --git a/modules/ocl/src/opencl/moments.cl b/modules/ocl/src/opencl/moments.cl index 2378f4f849..71313017a9 100644 --- a/modules/ocl/src/opencl/moments.cl +++ b/modules/ocl/src/opencl/moments.cl @@ -173,10 +173,10 @@ __kernel void dst_sum(int src_rows, int src_cols, int tile_height, int tile_widt sum[i] = dst_sum[i][0]; } -__kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height, +__kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_cols, int src_step, __global F* dst_m, int dst_cols, int dst_step, int blocky, - int type, int depth, int cn, int coi, int binary, int TILE_SIZE) + int depth, int cn, int coi, int binary, int TILE_SIZE) { uchar tmp_coi[16]; // get the coi data uchar16 tmp[16]; @@ -192,35 +192,43 @@ __kernel void CvMoments_D0(__global uchar16* src_data, int src_rows, int src_col int x = wgidx*TILE_SIZE; // vector length of uchar int kcn = (cn==2)?2:4; int rstep = min(src_step, TILE_SIZE); - tileSize_height = min(TILE_SIZE, src_rows - y); - tileSize_width = min(TILE_SIZE, src_cols - x); + int tileSize_height = min(TILE_SIZE, src_rows - y); + int tileSize_width = min(TILE_SIZE, src_cols - x); + + if ( y+lidy < src_rows ) + { + if( tileSize_width < TILE_SIZE ) + for(int i = tileSize_width; i < rstep && (x+i) < src_cols; i++ ) + *((__global uchar*)src_data+(y+lidy)*src_step+x+i) = 0; + + if( coi > 0 ) //channel of interest + for(int i = 0; i < tileSize_width; i += VLEN_C) + { + for(int j=0; j 0 ) //channel of interest - for(int i = 0; i < tileSize_width; i += VLEN_C) - { - for(int j=0; j TILE_SIZE && tileSize_width < TILE_SIZE) - for(int i=tileSize_width; i < rstep; i++ ) - *((__global ushort*)src_data+(y+lidy)*src_step/2+x+i) = 0; - if( coi > 0 ) - for(int i=0; i < tileSize_width; i+=VLEN_US) - { - for(int j=0; j TILE_SIZE && tileSize_width < TILE_SIZE) + for(int i=tileSize_width; i < rstep && (x+i) < src_cols; i++ ) + *((__global ushort*)src_data+(y+lidy)*src_step/2+x+i) = 0; + if( coi > 0 ) + for(int i=0; i < tileSize_width; i+=VLEN_US) + { + for(int j=0; j= 1; j = j/2 ) { if(lidy < j) for( int i = 0; i < 10; i++ ) lm[i] = lm[i] + m[i][lidy]; - barrier(CLK_LOCAL_MEM_FENCE); + } + barrier(CLK_LOCAL_MEM_FENCE); + for( int j = TILE_SIZE/2; j >= 1; j = j/2 ) + { if(lidy >= j/2&&lidy < j) for( int i = 0; i < 10; i++ ) m[i][lidy-j/2] = lm[i]; - barrier(CLK_LOCAL_MEM_FENCE); } + barrier(CLK_LOCAL_MEM_FENCE); + if(lidy == 0&&lidx == 0) { for(int mt = 0; mt < 10; mt++ ) @@ -482,10 +501,10 @@ __kernel void CvMoments_D2(__global ushort8* src_data, int src_rows, int src_col } } -__kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height, +__kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols, int src_step, __global F* dst_m, int dst_cols, int dst_step, int blocky, - int type, int depth, int cn, int coi, int binary, const int TILE_SIZE) + int depth, int cn, int coi, int binary, const int TILE_SIZE) { short tmp_coi[8]; // get the coi data short8 tmp[32]; @@ -500,21 +519,26 @@ __kernel void CvMoments_D3(__global short8* src_data, int src_rows, int src_cols int x = wgidx*TILE_SIZE; // real X index of pixel int kcn = (cn==2)?2:4; int rstep = min(src_step/2, TILE_SIZE); - tileSize_height = min(TILE_SIZE, src_rows - y); - tileSize_width = min(TILE_SIZE, src_cols -x); - if(tileSize_width < TILE_SIZE) - for(int i = tileSize_width; i < rstep; i++ ) - *((__global short*)src_data+(y+lidy)*src_step/2+x+i) = 0; - if( coi > 0 ) - for(int i=0; i < tileSize_width; i+=VLEN_S) - { - for(int j=0; j 0 ) + for(int i=0; i < tileSize_width; i+=VLEN_S) + { + for(int j=0; j 0 ) - for(int i=0; i < tileSize_width; i+=VLEN_F) - { -#pragma unroll - for(int j=0; j<4; j++) + + if ( y+lidy < src_rows ) + { + if(tileSize_width < TILE_SIZE) + for(int i = tileSize_width; i < rstep && (x+i) < src_cols; i++ ) + *((__global float*)src_data+(y+lidy)*src_step/4+x+i) = 0; + if( coi > 0 ) + for(int i=0; i < tileSize_width; i+=VLEN_F) { - index = yOff+(x+i+j)*kcn+coi-1; - if (index < maxIdx) - tmp_coi[j] = *(src_data+index); - else - tmp_coi[j] = 0; + for(int j=0; j<4; j++) + tmp_coi[j] = *(src_data+(y+lidy)*src_step/4+(x+i+j)*kcn+coi-1); + tmp[i/VLEN_F] = (float4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]); } - tmp[i/VLEN_F] = (float4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]); - } - else - for(int i=0; i < tileSize_width && (yOff+x+i) < maxIdx; i+=VLEN_F) - tmp[i/VLEN_F] = (*(__global float4 *)(src_data+yOff+x+i)); + else + for(int i=0; i < tileSize_width; i+=VLEN_F) + tmp[i/VLEN_F] = (float4)(*(src_data+(y+lidy)*src_step/4+x+i),*(src_data+(y+lidy)*src_step/4+x+i+1),*(src_data+(y+lidy)*src_step/4+x+i+2),*(src_data+(y+lidy)*src_step/4+x+i+3)); + } + float4 zero = (float4)(0); float4 full = (float4)(255); if( binary ) @@ -688,10 +708,9 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols tmp[i/VLEN_F] = (tmp[i/VLEN_F]!=zero)?full:zero; F mom[10]; __local F m[10][128]; - if(lidy == 0) + if(lidy < 128) for(int i = 0; i < 10; i ++) - for(int j = 0; j < 128; j ++) - m[i][j] = 0; + m[i][lidy] = 0; barrier(CLK_LOCAL_MEM_FENCE); F lm[10] = {0}; F4 x0 = (F4)(0); @@ -729,185 +748,6 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols m[0][lidy-bheight] = x0.s0; // m00 } - else if(lidy < bheight) - { - lm[9] = ((F)py) * sy; // m03 - lm[8] = ((F)x1.s0) * sy; // m12 - lm[7] = ((F)x2.s0) * lidy; // m21 - lm[6] = x3.s0; // m30 - lm[5] = x0.s0 * sy; // m02 - lm[4] = x1.s0 * lidy; // m11 - lm[3] = x2.s0; // m20 - lm[2] = py; // m01 - lm[1] = x1.s0; // m10 - lm[0] = x0.s0; // m00 - } - barrier(CLK_LOCAL_MEM_FENCE); - for( int j = TILE_SIZE/2; j >= 1; j = j/2 ) - { - if(lidy < j) - for( int i = 0; i < 10; i++ ) - lm[i] = lm[i] + m[i][lidy]; - barrier(CLK_LOCAL_MEM_FENCE); - if(lidy >= j/2&&lidy < j) - for( int i = 0; i < 10; i++ ) - m[i][lidy-j/2] = lm[i]; - barrier(CLK_LOCAL_MEM_FENCE); - } - if(lidy == 0&&lidx == 0) - { - for( int mt = 0; mt < 10; mt++ ) - mom[mt] = (F)lm[mt]; - if(binary) - { - F s = 1./255; - for( int mt = 0; mt < 10; mt++ ) - mom[mt] *= s; - } - - F xm = x * mom[0], ym = y * mom[0]; - - // accumulate moments computed in each tile - dst_step /= sizeof(F); - - int dst_x_off = mad24(wgidy, dst_cols, wgidx); - int dst_off = 0; - int max_dst_index = 10 * blocky * get_global_size(1); - - // + m00 ( = m00' ) - dst_off = mad24(DST_ROW_00 * blocky, dst_step, dst_x_off); - if (dst_off < max_dst_index) - *(dst_m + dst_off) = mom[0]; - - // + m10 ( = m10' + x*m00' ) - dst_off = mad24(DST_ROW_10 * blocky, dst_step, dst_x_off); - if (dst_off < max_dst_index) - *(dst_m + dst_off) = mom[1] + xm; - - // + m01 ( = m01' + y*m00' ) - dst_off = mad24(DST_ROW_01 * blocky, dst_step, dst_x_off); - if (dst_off < max_dst_index) - *(dst_m + dst_off) = mom[2] + ym; - - // + m20 ( = m20' + 2*x*m10' + x*x*m00' ) - dst_off = mad24(DST_ROW_20 * blocky, dst_step, dst_x_off); - if (dst_off < max_dst_index) - *(dst_m + dst_off) = mom[3] + x * (mom[1] * 2 + xm); - - // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' ) - dst_off = mad24(DST_ROW_11 * blocky, dst_step, dst_x_off); - if (dst_off < max_dst_index) - *(dst_m + dst_off) = mom[4] + x * (mom[2] + ym) + y * mom[1]; - - // + m02 ( = m02' + 2*y*m01' + y*y*m00' ) - dst_off = mad24(DST_ROW_02 * blocky, dst_step, dst_x_off); - if (dst_off < max_dst_index) - *(dst_m + dst_off) = mom[5] + y * (mom[2] * 2 + ym); - - // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' ) - dst_off = mad24(DST_ROW_30 * blocky, dst_step, dst_x_off); - if (dst_off < max_dst_index) - *(dst_m + dst_off) = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm)); - - // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20') - dst_off = mad24(DST_ROW_21 * blocky, dst_step, dst_x_off); - if (dst_off < max_dst_index) - *(dst_m + dst_off) = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3]; - - // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02') - dst_off = mad24(DST_ROW_12 * blocky, dst_step, dst_x_off); - if (dst_off < max_dst_index) - *(dst_m + dst_off) = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5]; - - // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' ) - dst_off = mad24(DST_ROW_03 * blocky, dst_step, dst_x_off); - if (dst_off < max_dst_index) - *(dst_m + dst_off) = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym)); - } -} - -__kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, int src_step, int tileSize_width, int tileSize_height, - __global F* dst_m, - int dst_cols, int dst_step, int blocky, - int type, int depth, int cn, int coi, int binary, const int TILE_SIZE) -{ - F tmp_coi[4]; // get the coi data - F4 tmp[64]; - int VLEN_D = 4; // length of vetor - int gidy = get_global_id(0); - int gidx = get_global_id(1); - int wgidy = get_group_id(0); - int wgidx = get_group_id(1); - int lidy = get_local_id(0); - int lidx = get_local_id(1); - int y = wgidy*TILE_SIZE; // real Y index of pixel - int x = wgidx*TILE_SIZE; // real X index of pixel - int kcn = (cn==2)?2:4; - int rstep = min(src_step/8, TILE_SIZE); - tileSize_height = min(TILE_SIZE, src_rows - y); - tileSize_width = min(TILE_SIZE, src_cols - x); - - if(tileSize_width < TILE_SIZE) - for(int i = tileSize_width; i < rstep; i++ ) - *((__global F*)src_data+(y+lidy)*src_step/8+x+i) = 0; - if( coi > 0 ) - for(int i=0; i < tileSize_width; i+=VLEN_D) - { - for(int j=0; j<4; j++) - tmp_coi[j] = *(src_data+(y+lidy)*src_step/8+(x+i+j)*kcn+coi-1); - tmp[i/VLEN_D] = (F4)(tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]); - } - else - for(int i=0; i < tileSize_width; i+=VLEN_D) - tmp[i/VLEN_D] = (F4)(*(src_data+(y+lidy)*src_step/8+x+i),*(src_data+(y+lidy)*src_step/8+x+i+1),*(src_data+(y+lidy)*src_step/8+x+i+2),*(src_data+(y+lidy)*src_step/8+x+i+3)); - F4 zero = (F4)(0); - F4 full = (F4)(255); - if( binary ) - for(int i=0; i < tileSize_width; i+=VLEN_D) - tmp[i/VLEN_D] = (tmp[i/VLEN_D]!=zero)?full:zero; - F mom[10]; - __local F m[10][128]; - if(lidy == 0) - for(int i=0; i<10; i++) - for(int j=0; j<128; j++) - m[i][j]=0; - barrier(CLK_LOCAL_MEM_FENCE); - F lm[10] = {0}; - F4 x0 = (F4)(0); - F4 x1 = (F4)(0); - F4 x2 = (F4)(0); - F4 x3 = (F4)(0); - for( int xt = 0 ; xt < tileSize_width; xt+=VLEN_D ) - { - F4 v_xt = (F4)(xt, xt+1, xt+2, xt+3); - F4 p = tmp[xt/VLEN_D]; - F4 xp = v_xt * p, xxp = xp * v_xt; - x0 += p; - x1 += xp; - x2 += xxp; - x3 += xxp *v_xt; - } - x0.s0 += x0.s1 + x0.s2 + x0.s3; - x1.s0 += x1.s1 + x1.s2 + x1.s3; - x2.s0 += x2.s1 + x2.s2 + x2.s3; - x3.s0 += x3.s1 + x3.s2 + x3.s3; - - F py = lidy * x0.s0, sy = lidy*lidy; - int bheight = min(tileSize_height, TILE_SIZE/2); - if(bheight >= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height) - { - m[9][lidy-bheight] = ((F)py) * sy; // m03 - m[8][lidy-bheight] = ((F)x1.s0) * sy; // m12 - m[7][lidy-bheight] = ((F)x2.s0) * lidy; // m21 - m[6][lidy-bheight] = x3.s0; // m30 - m[5][lidy-bheight] = x0.s0 * sy; // m02 - m[4][lidy-bheight] = x1.s0 * lidy; // m11 - m[3][lidy-bheight] = x2.s0; // m20 - m[2][lidy-bheight] = py; // m01 - m[1][lidy-bheight] = x1.s0; // m10 - m[0][lidy-bheight] = x0.s0; // m00 - } - else if(lidy < bheight) { lm[9] = ((F)py) * sy; // m03 @@ -922,6 +762,164 @@ __kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, in lm[0] = x0.s0; // m00 } barrier(CLK_LOCAL_MEM_FENCE); + for( int j = TILE_SIZE/2; j >= 1; j = j/2 ) + { + if(lidy < j) + for( int i = 0; i < 10; i++ ) + lm[i] = lm[i] + m[i][lidy]; + barrier(CLK_LOCAL_MEM_FENCE); + if(lidy >= j/2&&lidy < j) + for( int i = 0; i < 10; i++ ) + m[i][lidy-j/2] = lm[i]; + barrier(CLK_LOCAL_MEM_FENCE); + } + if(lidy == 0&&lidx == 0) + { + for( int mt = 0; mt < 10; mt++ ) + mom[mt] = (F)lm[mt]; + if(binary) + { + F s = 1./255; + for( int mt = 0; mt < 10; mt++ ) + mom[mt] *= s; + } + + F xm = x * mom[0], ym = y * mom[0]; + + // accumulate moments computed in each tile + dst_step /= sizeof(F); + + // + m00 ( = m00' ) + *(dst_m + mad24(DST_ROW_00 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[0]; + + // + m10 ( = m10' + x*m00' ) + *(dst_m + mad24(DST_ROW_10 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[1] + xm; + + // + m01 ( = m01' + y*m00' ) + *(dst_m + mad24(DST_ROW_01 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[2] + ym; + + // + m20 ( = m20' + 2*x*m10' + x*x*m00' ) + *(dst_m + mad24(DST_ROW_20 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[3] + x * (mom[1] * 2 + xm); + + // + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' ) + *(dst_m + mad24(DST_ROW_11 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[4] + x * (mom[2] + ym) + y * mom[1]; + + // + m02 ( = m02' + 2*y*m01' + y*y*m00' ) + *(dst_m + mad24(DST_ROW_02 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[5] + y * (mom[2] * 2 + ym); + + // + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' ) + *(dst_m + mad24(DST_ROW_30 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm)); + + // + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20') + *(dst_m + mad24(DST_ROW_21 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3]; + + // + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02') + *(dst_m + mad24(DST_ROW_12 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5]; + + // + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' ) + *(dst_m + mad24(DST_ROW_03 * blocky, dst_step, mad24(wgidy, dst_cols, wgidx))) = mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym)); + } +} + +__kernel void CvMoments_D6(__global F* src_data, int src_rows, int src_cols, int src_step, + __global F* dst_m, + int dst_cols, int dst_step, int blocky, + int depth, int cn, int coi, int binary, const int TILE_SIZE) +{ + F tmp_coi[4]; // get the coi data + F4 tmp[64]; + int VLEN_D = 4; // length of vetor + int gidy = get_global_id(0); + int gidx = get_global_id(1); + int wgidy = get_group_id(0); + int wgidx = get_group_id(1); + int lidy = get_local_id(0); + int lidx = get_local_id(1); + int y = wgidy*TILE_SIZE; // real Y index of pixel + int x = wgidx*TILE_SIZE; // real X index of pixel + int kcn = (cn==2)?2:4; + int rstep = min(src_step/8, TILE_SIZE); + int tileSize_height = min(TILE_SIZE, src_rows - y); + int tileSize_width = min(TILE_SIZE, src_cols - x); + + if ( y+lidy < src_rows ) + { + if(tileSize_width < TILE_SIZE) + for(int i = tileSize_width; i < rstep && (x+i) < src_cols; i++ ) + *((__global F*)src_data+(y+lidy)*src_step/8+x+i) = 0; + if( coi > 0 ) + for(int i=0; i < tileSize_width; i+=VLEN_D) + { + for(int j=0; j<4 && ((x+i+j)*kcn+coi-1)= TILE_SIZE/2&&lidy > bheight-1&&lidy < tileSize_height) + { + m[9][lidy-bheight] = ((F)py) * sy; // m03 + m[8][lidy-bheight] = ((F)x1.s0) * sy; // m12 + m[7][lidy-bheight] = ((F)x2.s0) * lidy; // m21 + m[6][lidy-bheight] = x3.s0; // m30 + m[5][lidy-bheight] = x0.s0 * sy; // m02 + m[4][lidy-bheight] = x1.s0 * lidy; // m11 + m[3][lidy-bheight] = x2.s0; // m20 + m[2][lidy-bheight] = py; // m01 + m[1][lidy-bheight] = x1.s0; // m10 + m[0][lidy-bheight] = x0.s0; // m00 + } + else if(lidy < bheight) + { + lm[9] = ((F)py) * sy; // m03 + lm[8] = ((F)x1.s0) * sy; // m12 + lm[7] = ((F)x2.s0) * lidy; // m21 + lm[6] = x3.s0; // m30 + lm[5] = x0.s0 * sy; // m02 + lm[4] = x1.s0 * lidy; // m11 + lm[3] = x2.s0; // m20 + lm[2] = py; // m01 + lm[1] = x1.s0; // m10 + lm[0] = x0.s0; // m00 + } + barrier(CLK_LOCAL_MEM_FENCE); + for( int j = TILE_SIZE/2; j >= 1; j = j/2 ) { if(lidy < j) diff --git a/modules/ocl/src/opencl/objdetect_hog.cl b/modules/ocl/src/opencl/objdetect_hog.cl index 8852facae8..05d538330f 100644 --- a/modules/ocl/src/opencl/objdetect_hog.cl +++ b/modules/ocl/src/opencl/objdetect_hog.cl @@ -43,7 +43,6 @@ // //M*/ - #define CELL_WIDTH 8 #define CELL_HEIGHT 8 #define CELLS_PER_BLOCK_X 2 @@ -51,6 +50,100 @@ #define NTHREADS 256 #define CV_PI_F 3.1415926535897932384626433832795f +//---------------------------------------------------------------------------- +// Histogram computation +// 12 threads for a cell, 12x4 threads per block +// Use pre-computed gaussian and interp_weight lookup tables if sigma is 4.0f +__kernel void compute_hists_lut_kernel( + const int cblock_stride_x, const int cblock_stride_y, + const int cnbins, const int cblock_hist_size, const int img_block_width, + const int blocks_in_group, const int blocks_total, + const int grad_quadstep, const int qangle_step, + __global const float* grad, __global const uchar* qangle, + __global const float* gauss_w_lut, + __global float* block_hists, __local float* smem) +{ + const int lx = get_local_id(0); + const int lp = lx / 24; /* local group id */ + const int gid = get_group_id(0) * blocks_in_group + lp;/* global group id */ + const int gidY = gid / img_block_width; + const int gidX = gid - gidY * img_block_width; + + const int lidX = lx - lp * 24; + const int lidY = get_local_id(1); + + const int cell_x = lidX / 12; + const int cell_y = lidY; + const int cell_thread_x = lidX - cell_x * 12; + + __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X * + CELLS_PER_BLOCK_Y * 12 + CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y); + __local float* final_hist = hists + cnbins * + (CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12); + + const int offset_x = gidX * cblock_stride_x + (cell_x << 2) + cell_thread_x; + const int offset_y = gidY * cblock_stride_y + (cell_y << 2); + + __global const float* grad_ptr = (gid < blocks_total) ? + grad + offset_y * grad_quadstep + (offset_x << 1) : grad; + __global const uchar* qangle_ptr = (gid < blocks_total) ? + qangle + offset_y * qangle_step + (offset_x << 1) : qangle; + + __local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) + + cell_thread_x; + for (int bin_id = 0; bin_id < cnbins; ++bin_id) + hist[bin_id * 48] = 0.f; + + const int dist_x = -4 + cell_thread_x - 4 * cell_x; + const int dist_center_x = dist_x - 4 * (1 - 2 * cell_x); + + const int dist_y_begin = -4 - 4 * lidY; + for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y) + { + float2 vote = (float2) (grad_ptr[0], grad_ptr[1]); + uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]); + + grad_ptr += grad_quadstep; + qangle_ptr += qangle_step; + + int dist_center_y = dist_y - 4 * (1 - 2 * cell_y); + + int idx = (dist_center_y + 8) * 16 + (dist_center_x + 8); + float gaussian = gauss_w_lut[idx]; + idx = (dist_y + 8) * 16 + (dist_x + 8); + float interp_weight = gauss_w_lut[256+idx]; + + hist[bin.x * 48] += gaussian * interp_weight * vote.x; + hist[bin.y * 48] += gaussian * interp_weight * vote.y; + } + barrier(CLK_LOCAL_MEM_FENCE); + + volatile __local float* hist_ = hist; + for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48) + { + if (cell_thread_x < 6) + hist_[0] += hist_[6]; + barrier(CLK_LOCAL_MEM_FENCE); + if (cell_thread_x < 3) + hist_[0] += hist_[3]; +#ifdef CPU + barrier(CLK_LOCAL_MEM_FENCE); +#endif + if (cell_thread_x == 0) + final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = + hist_[0] + hist_[1] + hist_[2]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x; + if ((tid < cblock_hist_size) && (gid < blocks_total)) + { + __global float* block_hist = block_hists + + (gidY * img_block_width + gidX) * cblock_hist_size; + block_hist[tid] = final_hist[tid]; + } +} + //---------------------------------------------------------------------------- // Histogram computation // 12 threads for a cell, 12x4 threads per block @@ -125,16 +218,14 @@ __kernel void compute_hists_kernel( barrier(CLK_LOCAL_MEM_FENCE); if (cell_thread_x < 3) hist_[0] += hist_[3]; -#ifdef WAVE_SIZE_1 +#ifdef CPU barrier(CLK_LOCAL_MEM_FENCE); #endif if (cell_thread_x == 0) final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = hist_[0] + hist_[1] + hist_[2]; } -#ifdef WAVE_SIZE_1 barrier(CLK_LOCAL_MEM_FENCE); -#endif int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x; if ((tid < cblock_hist_size) && (gid < blocks_total)) @@ -145,6 +236,57 @@ __kernel void compute_hists_kernel( } } +//------------------------------------------------------------- +// Normalization of histograms via L2Hys_norm +// optimized for the case of 9 bins +__kernel void normalize_hists_36_kernel(__global float* block_hists, + const float threshold, __local float *squares) +{ + const int tid = get_local_id(0); + const int gid = get_global_id(0); + const int bid = tid / 36; /* block-hist id, (0 - 6) */ + const int boffset = bid * 36; /* block-hist offset in the work-group */ + const int hid = tid - boffset; /* histogram bin id, (0 - 35) */ + + float elem = block_hists[gid]; + squares[tid] = elem * elem; + barrier(CLK_LOCAL_MEM_FENCE); + + __local float* smem = squares + boffset; + float sum = smem[hid]; + if (hid < 18) + smem[hid] = sum = sum + smem[hid + 18]; + barrier(CLK_LOCAL_MEM_FENCE); + if (hid < 9) + smem[hid] = sum = sum + smem[hid + 9]; + barrier(CLK_LOCAL_MEM_FENCE); + if (hid < 4) + smem[hid] = sum + smem[hid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8]; + + elem = elem / (sqrt(sum) + 3.6f); + elem = min(elem, threshold); + + barrier(CLK_LOCAL_MEM_FENCE); + squares[tid] = elem * elem; + barrier(CLK_LOCAL_MEM_FENCE); + + sum = smem[hid]; + if (hid < 18) + smem[hid] = sum = sum + smem[hid + 18]; + barrier(CLK_LOCAL_MEM_FENCE); + if (hid < 9) + smem[hid] = sum = sum + smem[hid + 9]; + barrier(CLK_LOCAL_MEM_FENCE); + if (hid < 4) + smem[hid] = sum + smem[hid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8]; + + block_hists[gid] = elem / (sqrt(sum) + 1e-3f); +} + //------------------------------------------------------------- // Normalization of histograms via L2Hys_norm // @@ -153,76 +295,50 @@ float reduce_smem(volatile __local float* smem, int size) unsigned int tid = get_local_id(0); float sum = smem[tid]; - if (size >= 512) - { - if (tid < 256) smem[tid] = sum = sum + smem[tid + 256]; - barrier(CLK_LOCAL_MEM_FENCE); - } - if (size >= 256) - { - if (tid < 128) smem[tid] = sum = sum + smem[tid + 128]; - barrier(CLK_LOCAL_MEM_FENCE); - } - if (size >= 128) - { - if (tid < 64) smem[tid] = sum = sum + smem[tid + 64]; - barrier(CLK_LOCAL_MEM_FENCE); - } - + if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64]; + barrier(CLK_LOCAL_MEM_FENCE); } +#ifdef CPU + if (size >= 64) { if (tid < 32) smem[tid] = sum = sum + smem[tid + 32]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 32) { if (tid < 16) smem[tid] = sum = sum + smem[tid + 16]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 16) { if (tid < 8) smem[tid] = sum = sum + smem[tid + 8]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 8) { if (tid < 4) smem[tid] = sum = sum + smem[tid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 4) { if (tid < 2) smem[tid] = sum = sum + smem[tid + 2]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 2) { if (tid < 1) smem[tid] = sum = sum + smem[tid + 1]; + barrier(CLK_LOCAL_MEM_FENCE); } +#else if (tid < 32) { if (size >= 64) smem[tid] = sum = sum + smem[tid + 32]; -#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1) - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 16) - { -#endif if (size >= 32) smem[tid] = sum = sum + smem[tid + 16]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 8) - { -#endif if (size >= 16) smem[tid] = sum = sum + smem[tid + 8]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 4) - { -#endif if (size >= 8) smem[tid] = sum = sum + smem[tid + 4]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 2) - { -#endif if (size >= 4) smem[tid] = sum = sum + smem[tid + 2]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 1) - { -#endif if (size >= 2) smem[tid] = sum = sum + smem[tid + 1]; } - - barrier(CLK_LOCAL_MEM_FENCE); - sum = smem[0]; +#endif return sum; } -__kernel void normalize_hists_kernel(const int nthreads, const int block_hist_size, const int img_block_width, - __global float* block_hists, const float threshold, __local float *squares) +__kernel void normalize_hists_kernel( + const int nthreads, const int block_hist_size, const int img_block_width, + __global float* block_hists, const float threshold, __local float *squares) { const int tid = get_local_id(0); const int gidX = get_group_id(0); const int gidY = get_group_id(1); - __global float* hist = block_hists + (gidY * img_block_width + gidX) * block_hist_size + tid; + __global float* hist = block_hists + (gidY * img_block_width + gidX) * + block_hist_size + tid; float elem = 0.f; if (tid < block_hist_size) @@ -249,25 +365,98 @@ __kernel void normalize_hists_kernel(const int nthreads, const int block_hist_si //--------------------------------------------------------------------- // Linear SVM based classification -// -__kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr_size, const int cdescr_width, - const int img_win_width, const int img_block_width, - const int win_block_stride_x, const int win_block_stride_y, - __global const float * block_hists, __global const float* coefs, - float free_coef, float threshold, __global uchar* labels) +// 48x96 window, 9 bins and default parameters +// 180 threads, each thread corresponds to a bin in a row +__kernel void classify_hists_180_kernel( + const int cdescr_width, const int cdescr_height, const int cblock_hist_size, + const int img_win_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float * block_hists, __global const float* coefs, + float free_coef, float threshold, __global uchar* labels) { const int tid = get_local_id(0); const int gidX = get_group_id(0); const int gidY = get_group_id(1); - __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; float product = 0.f; - for (int i = tid; i < cdescr_size; i += NTHREADS) + + for (int i = 0; i < cdescr_height; i++) { - int offset_y = i / cdescr_width; - int offset_x = i - offset_y * cdescr_width; - product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x]; + product += coefs[i * cdescr_width + tid] * + hist[i * img_block_width * cblock_hist_size + tid]; + } + + __local float products[180]; + + products[tid] = product; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 90) products[tid] = product = product + products[tid + 90]; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 45) products[tid] = product = product + products[tid + 45]; + barrier(CLK_LOCAL_MEM_FENCE); + + volatile __local float* smem = products; +#ifdef CPU + if (tid < 13) smem[tid] = product = product + smem[tid + 32]; + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) smem[tid] = product = product + smem[tid + 16]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<8) smem[tid] = product = product + smem[tid + 8]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<4) smem[tid] = product = product + smem[tid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<2) smem[tid] = product = product + smem[tid + 2]; + barrier(CLK_LOCAL_MEM_FENCE); +#else + if (tid < 13) + { + smem[tid] = product = product + smem[tid + 32]; + } + if (tid < 16) + { + smem[tid] = product = product + smem[tid + 16]; + smem[tid] = product = product + smem[tid + 8]; + smem[tid] = product = product + smem[tid + 4]; + smem[tid] = product = product + smem[tid + 2]; + } +#endif + + if (tid == 0){ + product = product + smem[tid + 1]; + labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold); + } +} + +//--------------------------------------------------------------------- +// Linear SVM based classification +// 64x128 window, 9 bins and default parameters +// 256 threads, 252 of them are used +__kernel void classify_hists_252_kernel( + const int cdescr_width, const int cdescr_height, const int cblock_hist_size, + const int img_win_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float * block_hists, __global const float* coefs, + float free_coef, float threshold, __global uchar* labels) +{ + const int tid = get_local_id(0); + const int gidX = get_group_id(0); + const int gidY = get_group_id(1); + + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + + float product = 0.f; + if (tid < cdescr_width) + { + for (int i = 0; i < cdescr_height; i++) + product += coefs[i * cdescr_width + tid] * + hist[i * img_block_width * cblock_hist_size + tid]; } __local float products[NTHREADS]; @@ -282,67 +471,120 @@ __kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr if (tid < 64) products[tid] = product = product + products[tid + 64]; barrier(CLK_LOCAL_MEM_FENCE); - volatile __local float* smem = products; + volatile __local float* smem = products; +#ifdef CPU + if(tid<32) smem[tid] = product = product + smem[tid + 32]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<16) smem[tid] = product = product + smem[tid + 16]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<8) smem[tid] = product = product + smem[tid + 8]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<4) smem[tid] = product = product + smem[tid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<2) smem[tid] = product = product + smem[tid + 2]; + barrier(CLK_LOCAL_MEM_FENCE); +#else if (tid < 32) - { + { smem[tid] = product = product + smem[tid + 32]; -#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1) - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 16) - { -#endif smem[tid] = product = product + smem[tid + 16]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 8) - { -#endif smem[tid] = product = product + smem[tid + 8]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 4) - { -#endif smem[tid] = product = product + smem[tid + 4]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 2) - { -#endif smem[tid] = product = product + smem[tid + 2]; -#ifdef WAVE_SIZE_1 } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 1) - { #endif - smem[tid] = product = product + smem[tid + 1]; + if (tid == 0){ + product = product + smem[tid + 1]; + labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold); + } +} + +//--------------------------------------------------------------------- +// Linear SVM based classification +// 256 threads +__kernel void classify_hists_kernel( + const int cdescr_size, const int cdescr_width, const int cblock_hist_size, + const int img_win_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float * block_hists, __global const float* coefs, + float free_coef, float threshold, __global uchar* labels) +{ + const int tid = get_local_id(0); + const int gidX = get_group_id(0); + const int gidY = get_group_id(1); + + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + + float product = 0.f; + for (int i = tid; i < cdescr_size; i += NTHREADS) + { + int offset_y = i / cdescr_width; + int offset_x = i - offset_y * cdescr_width; + product += coefs[i] * + hist[offset_y * img_block_width * cblock_hist_size + offset_x]; } - if (tid == 0) + __local float products[NTHREADS]; + + products[tid] = product; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 128) products[tid] = product = product + products[tid + 128]; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 64) products[tid] = product = product + products[tid + 64]; + barrier(CLK_LOCAL_MEM_FENCE); + + volatile __local float* smem = products; +#ifdef CPU + if(tid<32) smem[tid] = product = product + smem[tid + 32]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<16) smem[tid] = product = product + smem[tid + 16]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<8) smem[tid] = product = product + smem[tid + 8]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<4) smem[tid] = product = product + smem[tid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<2) smem[tid] = product = product + smem[tid + 2]; + barrier(CLK_LOCAL_MEM_FENCE); +#else + if (tid < 32) + { + smem[tid] = product = product + smem[tid + 32]; + smem[tid] = product = product + smem[tid + 16]; + smem[tid] = product = product + smem[tid + 8]; + smem[tid] = product = product + smem[tid + 4]; + smem[tid] = product = product + smem[tid + 2]; + } +#endif + if (tid == 0){ + smem[tid] = product = product + smem[tid + 1]; labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold); + } } //---------------------------------------------------------------------------- // Extract descriptors -__kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, const int cdescr_width, - const int img_block_width, const int win_block_stride_x, const int win_block_stride_y, - __global const float* block_hists, __global float* descriptors) +__kernel void extract_descrs_by_rows_kernel( + const int cblock_hist_size, const int descriptors_quadstep, + const int cdescr_size, const int cdescr_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float* block_hists, __global float* descriptors) { int tid = get_local_id(0); int gidX = get_group_id(0); int gidY = get_group_id(1); // Get left top corner of the window in src - __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; // Get left top corner of the window in dst - __global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; + __global float* descriptor = descriptors + + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; // Copy elements from src to dst for (int i = tid; i < cdescr_size; i += NTHREADS) @@ -353,19 +595,23 @@ __kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const in } } -__kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, - const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, const int win_block_stride_x, - const int win_block_stride_y, __global const float* block_hists, __global float* descriptors) +__kernel void extract_descrs_by_cols_kernel( + const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, + const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float* block_hists, __global float* descriptors) { int tid = get_local_id(0); int gidX = get_group_id(0); int gidY = get_group_id(1); // Get left top corner of the window in src - __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; // Get left top corner of the window in dst - __global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; + __global float* descriptor = descriptors + + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; // Copy elements from src to dst for (int i = tid; i < cdescr_size; i += NTHREADS) @@ -376,16 +622,19 @@ __kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const in int y = block_idx / cnblocks_win_x; int x = block_idx - y * cnblocks_win_x; - descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] = hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block]; + descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] = + hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block]; } } //---------------------------------------------------------------------------- // Gradients computation -__kernel void compute_gradients_8UC4_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step, - const __global uchar4 * img, __global float * grad, __global uchar * qangle, - const float angle_scale, const char correct_gamma, const int cnbins) +__kernel void compute_gradients_8UC4_kernel( + const int height, const int width, + const int img_step, const int grad_quadstep, const int qangle_step, + const __global uchar4 * img, __global float * grad, __global uchar * qangle, + const float angle_scale, const char correct_gamma, const int cnbins) { const int x = get_global_id(0); const int tid = get_local_id(0); @@ -426,8 +675,10 @@ __kernel void compute_gradients_8UC4_kernel(const int height, const int width, c barrier(CLK_LOCAL_MEM_FENCE); if (x < width) { - float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)], sh_row[tid + 2 * (NTHREADS + 2)]); - float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)], sh_row[tid + 2 + 2 * (NTHREADS + 2)]); + float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)], + sh_row[tid + 2 * (NTHREADS + 2)]); + float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)], + sh_row[tid + 2 + 2 * (NTHREADS + 2)]); float3 dx; if (correct_gamma == 1) @@ -482,9 +733,11 @@ __kernel void compute_gradients_8UC4_kernel(const int height, const int width, c } } -__kernel void compute_gradients_8UC1_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step, - __global const uchar * img, __global float * grad, __global uchar * qangle, - const float angle_scale, const char correct_gamma, const int cnbins) +__kernel void compute_gradients_8UC1_kernel( + const int height, const int width, + const int img_step, const int grad_quadstep, const int qangle_step, + __global const uchar * img, __global float * grad, __global uchar * qangle, + const float angle_scale, const char correct_gamma, const int cnbins) { const int x = get_global_id(0); const int tid = get_local_id(0); @@ -539,43 +792,4 @@ __kernel void compute_gradients_8UC1_kernel(const int height, const int width, c grad[ (gidY * grad_quadstep + x) << 1 ] = mag * (1.f - ang); grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang; } -} - -//---------------------------------------------------------------------------- -// Resize - -__kernel void resize_8UC4_kernel(__global uchar4 * dst, __global const uchar4 * src, - int dst_offset, int src_offset, int dst_step, int src_step, - int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify ) -{ - int dx = get_global_id(0); - int dy = get_global_id(1); - - int sx = (int)floor(dx*ifx+0.5f); - int sy = (int)floor(dy*ify+0.5f); - sx = min(sx, src_cols-1); - sy = min(sy, src_rows-1); - int dpos = (dst_offset>>2) + dy * (dst_step>>2) + dx; - int spos = (src_offset>>2) + sy * (src_step>>2) + sx; - - if(dx 0 ? 255 : 0); } @@ -142,7 +143,7 @@ __kernel void pyrUp_C1_D0(__global uchar* src,__global uchar* dst, sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][tidx]; if ((x < dstCols) && (y < dstRows)) - dst[x + y * dstStep] = (float)(4.0f * sum); + dst[x + y * dstStep] = convert_uchar_sat_rte(4.0f * sum); } @@ -244,7 +245,7 @@ __kernel void pyrUp_C1_D2(__global ushort* src,__global ushort* dst, sum = sum + 0.0625f * s_dstPatch[2 + tidy + 2][get_local_id(0)]; if ((x < dstCols) && (y < dstRows)) - dst[x + y * dstStep] = (float)(4.0f * sum); + dst[x + y * dstStep] = convert_short_sat_rte(4.0f * sum); } @@ -351,31 +352,6 @@ __kernel void pyrUp_C1_D5(__global float* src,__global float* dst, /////////////////////////////////////////////////////////////////////// ////////////////////////// CV_8UC4 ////////////////////////////////// /////////////////////////////////////////////////////////////////////// -float4 covert_uchar4_to_float4(uchar4 data) -{ - float4 f4Data = {0,0,0,0}; - - f4Data.x = (float)data.x; - f4Data.y = (float)data.y; - f4Data.z = (float)data.z; - f4Data.w = (float)data.w; - - return f4Data; -} - - -uchar4 convert_float4_to_uchar4(float4 data) -{ - uchar4 u4Data; - - u4Data.x = get_valid_uchar(data.x); - u4Data.y = get_valid_uchar(data.y); - u4Data.z = get_valid_uchar(data.z); - u4Data.w = get_valid_uchar(data.w); - - return u4Data; -} - __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst, int srcRows,int dstRows,int srcCols,int dstCols, int srcOffset,int dstOffset,int srcStep,int dstStep) @@ -406,15 +382,15 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst, srcy = abs(srcy); srcy = min(srcRows -1 ,srcy); - s_srcPatch[tidy][tidx] = covert_uchar4_to_float4(src[srcx + srcy * srcStep]); + s_srcPatch[tidy][tidx] = convert_float4(src[srcx + srcy * srcStep]); } barrier(CLK_LOCAL_MEM_FENCE); float4 sum = (float4)(0,0,0,0); - const int evenFlag = (int)((tidx & 1) == 0); - const int oddFlag = (int)((tidx & 1) != 0); + const float4 evenFlag = (float4)((tidx & 1) == 0); + const float4 oddFlag = (float4)((tidx & 1) != 0); const bool eveny = ((tidy & 1) == 0); float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); @@ -476,38 +452,13 @@ __kernel void pyrUp_C4_D0(__global uchar4* src,__global uchar4* dst, if ((x < dstCols) && (y < dstRows)) { - dst[x + y * dstStep] = convert_float4_to_uchar4(4.0f * sum); + dst[x + y * dstStep] = convert_uchar4_sat_rte(4.0f * sum); } } + /////////////////////////////////////////////////////////////////////// ////////////////////////// CV_16UC4 ////////////////////////////////// /////////////////////////////////////////////////////////////////////// -float4 covert_ushort4_to_float4(ushort4 data) -{ - float4 f4Data = {0,0,0,0}; - - f4Data.x = (float)data.x; - f4Data.y = (float)data.y; - f4Data.z = (float)data.z; - f4Data.w = (float)data.w; - - return f4Data; -} - - -ushort4 convert_float4_to_ushort4(float4 data) -{ - ushort4 u4Data; - - u4Data.x = (float)data.x; - u4Data.y = (float)data.y; - u4Data.z = (float)data.z; - u4Data.w = (float)data.w; - - return u4Data; -} - - __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst, int srcRows,int dstRows,int srcCols,int dstCols, int srcOffset,int dstOffset,int srcStep,int dstStep) @@ -535,15 +486,15 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst, srcy = abs(srcy); srcy = min(srcRows -1 ,srcy); - s_srcPatch[get_local_id(1)][get_local_id(0)] = covert_ushort4_to_float4(src[srcx + srcy * srcStep]); + s_srcPatch[get_local_id(1)][get_local_id(0)] = convert_float4(src[srcx + srcy * srcStep]); } barrier(CLK_LOCAL_MEM_FENCE); float4 sum = (float4)(0,0,0,0); - const int evenFlag = (int)((get_local_id(0) & 1) == 0); - const int oddFlag = (int)((get_local_id(0) & 1) != 0); + const float4 evenFlag = (float4)((get_local_id(0) & 1) == 0); + const float4 oddFlag = (float4)((get_local_id(0) & 1) != 0); const bool eveny = ((get_local_id(1) & 1) == 0); const int tidx = get_local_id(0); @@ -570,11 +521,11 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst, if (eveny) { - sum = sum + (evenFlag * co3) * s_srcPatch[0][1 + ((tidx - 2) >> 1)]; - sum = sum + ( oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx - 2) >> 1)]; + sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx - 1) >> 1)]; sum = sum + (evenFlag * co1 ) * s_srcPatch[0][1 + ((tidx ) >> 1)]; - sum = sum + ( oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)]; - sum = sum + (evenFlag * co3) * s_srcPatch[0][1 + ((tidx + 2) >> 1)]; + sum = sum + (oddFlag * co2 ) * s_srcPatch[0][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * co3 ) * s_srcPatch[0][1 + ((tidx + 2) >> 1)]; } s_dstPatch[get_local_id(1)][get_local_id(0)] = sum; @@ -610,7 +561,7 @@ __kernel void pyrUp_C4_D2(__global ushort4* src,__global ushort4* dst, if ((x < dstCols) && (y < dstRows)) { - dst[x + y * dstStep] = convert_float4_to_ushort4(4.0f * sum); + dst[x + y * dstStep] = convert_ushort4_sat_rte(4.0f * sum); } } @@ -654,8 +605,8 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst, float4 sum = (float4)(0,0,0,0); - const int evenFlag = (int)((tidx & 1) == 0); - const int oddFlag = (int)((tidx & 1) != 0); + const float4 evenFlag = (float4)((tidx & 1) == 0); + const float4 oddFlag = (float4)((tidx & 1) != 0); const bool eveny = ((tidy & 1) == 0); float4 co1 = (float4)(0.375f, 0.375f, 0.375f, 0.375f); @@ -681,11 +632,11 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst, if (eveny) { - sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)]; - sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)]; + sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)]; + sum = sum + (oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)]; sum = sum + (evenFlag * co1 ) * s_srcPatch[lsizey-16][1 + ((tidx ) >> 1)]; - sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)]; - sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)]; + sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)]; + sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)]; } s_dstPatch[tidy][tidx] = sum; @@ -719,4 +670,4 @@ __kernel void pyrUp_C4_D5(__global float4* src,__global float4* dst, { dst[x + y * dstStep] = 4.0f * sum; } -} \ No newline at end of file +} diff --git a/modules/ocl/src/opencl/pyrlk.cl b/modules/ocl/src/opencl/pyrlk.cl index 1043b8410b..40a1993952 100644 --- a/modules/ocl/src/opencl/pyrlk.cl +++ b/modules/ocl/src/opencl/pyrlk.cl @@ -46,145 +46,10 @@ //#pragma OPENCL EXTENSION cl_amd_printf : enable -__kernel void calcSharrDeriv_vertical_C1_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep) -{ - const int x = get_global_id(0); - const int y = get_global_id(1); - - if (y < rows && x < cols * cn) - { - const uchar src_val0 = (src + (y > 0 ? y-1 : rows > 1 ? 1 : 0) * srcStep)[x]; - const uchar src_val1 = (src + y * srcStep)[x]; - const uchar src_val2 = (src + (y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0) * srcStep)[x]; - - ((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10; - ((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0; - } -} - -__kernel void calcSharrDeriv_vertical_C4_D0(__global const uchar* src, int srcStep, int rows, int cols, int cn, __global short* dx_buf, int dx_bufStep, __global short* dy_buf, int dy_bufStep) -{ - const int x = get_global_id(0); - const int y = get_global_id(1); - - if (y < rows && x < cols * cn) - { - const uchar src_val0 = (src + (y > 0 ? y - 1 : 1) * srcStep)[x]; - const uchar src_val1 = (src + y * srcStep)[x]; - const uchar src_val2 = (src + (y < rows - 1 ? y + 1 : rows - 2) * srcStep)[x]; - - ((__global short*)((__global char*)dx_buf + y * dx_bufStep / 2))[x] = (src_val0 + src_val2) * 3 + src_val1 * 10; - ((__global short*)((__global char*)dy_buf + y * dy_bufStep / 2))[x] = src_val2 - src_val0; - } -} - -__kernel void calcSharrDeriv_horizontal_C1_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep) -{ - const int x = get_global_id(0); - const int y = get_global_id(1); - - const int colsn = cols * cn; - - if (y < rows && x < colsn) - { - __global const short* dx_buf_row = dx_buf + y * dx_bufStep; - __global const short* dy_buf_row = dy_buf + y * dy_bufStep; - - const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn; - const int xl = x - cn >= 0 ? x - cn : cn + x; - - ((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl]; - ((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10; - } -} - -__kernel void calcSharrDeriv_horizontal_C4_D0(int rows, int cols, int cn, __global const short* dx_buf, int dx_bufStep, __global const short* dy_buf, int dy_bufStep, __global short* dIdx, int dIdxStep, __global short* dIdy, int dIdyStep) -{ - const int x = get_global_id(0); - const int y = get_global_id(1); - - const int colsn = cols * cn; - - if (y < rows && x < colsn) - { - __global const short* dx_buf_row = dx_buf + y * dx_bufStep; - __global const short* dy_buf_row = dy_buf + y * dy_bufStep; - - const int xr = x + cn < colsn ? x + cn : (cols - 2) * cn + x + cn - colsn; - const int xl = x - cn >= 0 ? x - cn : cn + x; - - ((__global short*)((__global char*)dIdx + y * dIdxStep / 2))[x] = dx_buf_row[xr] - dx_buf_row[xl]; - ((__global short*)((__global char*)dIdy + y * dIdyStep / 2))[x] = (dy_buf_row[xr] + dy_buf_row[xl]) * 3 + dy_buf_row[x] * 10; - } -} - -#define W_BITS 14 -#define W_BITS1 14 - -#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n)) - -int linearFilter_uchar(__global const uchar* src, int srcStep, int cn, float2 pt, int x, int y) -{ - int2 ipt; - ipt.x = convert_int_sat_rtn(pt.x); - ipt.y = convert_int_sat_rtn(pt.y); - - float a = pt.x - ipt.x; - float b = pt.y - ipt.y; - - int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS)); - int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS)); - int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS)); - int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; - - __global const uchar* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn; - __global const uchar* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn; - - return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1 - 5); -} - -int linearFilter_short(__global const short* src, int srcStep, int cn, float2 pt, int x, int y) -{ - int2 ipt; - ipt.x = convert_int_sat_rtn(pt.x); - ipt.y = convert_int_sat_rtn(pt.y); - - float a = pt.x - ipt.x; - float b = pt.y - ipt.y; - - int iw00 = convert_int_sat_rte((1.0f - a) * (1.0f - b) * (1 << W_BITS)); - int iw01 = convert_int_sat_rte(a * (1.0f - b) * (1 << W_BITS)); - int iw10 = convert_int_sat_rte((1.0f - a) * b * (1 << W_BITS)); - int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; - - __global const short* src_row = src + (ipt.y + y) * srcStep + ipt.x * cn; - __global const short* src_row1 = src + (ipt.y + y + 1) * srcStep + ipt.x * cn; - - return CV_DESCALE(src_row[x] * iw00 + src_row[x + cn] * iw01 + src_row1[x] * iw10 + src_row1[x + cn] * iw11, W_BITS1); -} - -float linearFilter_float(__global const float* src, int srcStep, int cn, float2 pt, float x, float y) -{ - int2 ipt; - ipt.x = convert_int_sat_rtn(pt.x); - ipt.y = convert_int_sat_rtn(pt.y); - - float a = pt.x - ipt.x; - float b = pt.y - ipt.y; - - float iw00 = ((1.0f - a) * (1.0f - b) * (1 << W_BITS)); - float iw01 = (a * (1.0f - b) * (1 << W_BITS)); - float iw10 = ((1.0f - a) * b * (1 << W_BITS)); - float iw11 = (1 << W_BITS) - iw00 - iw01 - iw10; - - __global const float* src_row = src + (int)(ipt.y + y) * srcStep / 4 + ipt.x * cn; - __global const float* src_row1 = src + (int)(ipt.y + y + 1) * srcStep / 4 + ipt.x * cn; - - return src_row[(int)x] * iw00 + src_row[(int)x + cn] * iw01 + src_row1[(int)x] * iw10 + src_row1[(int)x + cn] * iw11, W_BITS1 - 5; -} - #define BUFFER 64 - +#ifndef WAVE_SIZE +#define WAVE_SIZE 1 +#endif #ifdef CPU void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid) { @@ -193,71 +58,51 @@ void reduce3(float val1, float val2, float val3, __local float* smem1, __local smem3[tid] = val3; barrier(CLK_LOCAL_MEM_FENCE); -#if BUFFER > 128 - if (tid < 128) - { - smem1[tid] = val1 += smem1[tid + 128]; - smem2[tid] = val2 += smem2[tid + 128]; - smem3[tid] = val3 += smem3[tid + 128]; - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - -#if BUFFER > 64 - if (tid < 64) - { - smem1[tid] = val1 += smem1[tid + 64]; - smem2[tid] = val2 += smem2[tid + 64]; - smem3[tid] = val3 += smem3[tid + 64]; - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - if (tid < 32) { - smem1[tid] = val1 += smem1[tid + 32]; - smem2[tid] = val2 += smem2[tid + 32]; - smem3[tid] = val3 += smem3[tid + 32]; + smem1[tid] += smem1[tid + 32]; + smem2[tid] += smem2[tid + 32]; + smem3[tid] += smem3[tid + 32]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 16) { - smem1[tid] = val1 += smem1[tid + 16]; - smem2[tid] = val2 += smem2[tid + 16]; - smem3[tid] = val3 += smem3[tid + 16]; + smem1[tid] += smem1[tid + 16]; + smem2[tid] += smem2[tid + 16]; + smem3[tid] += smem3[tid + 16]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 8) { - smem1[tid] = val1 += smem1[tid + 8]; - smem2[tid] = val2 += smem2[tid + 8]; - smem3[tid] = val3 += smem3[tid + 8]; + smem1[tid] += smem1[tid + 8]; + smem2[tid] += smem2[tid + 8]; + smem3[tid] += smem3[tid + 8]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 4) { - smem1[tid] = val1 += smem1[tid + 4]; - smem2[tid] = val2 += smem2[tid + 4]; - smem3[tid] = val3 += smem3[tid + 4]; + smem1[tid] += smem1[tid + 4]; + smem2[tid] += smem2[tid + 4]; + smem3[tid] += smem3[tid + 4]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 2) { - smem1[tid] = val1 += smem1[tid + 2]; - smem2[tid] = val2 += smem2[tid + 2]; - smem3[tid] = val3 += smem3[tid + 2]; + smem1[tid] += smem1[tid + 2]; + smem2[tid] += smem2[tid + 2]; + smem3[tid] += smem3[tid + 2]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 1) { - smem1[BUFFER] = val1 += smem1[tid + 1]; - smem2[BUFFER] = val2 += smem2[tid + 1]; - smem3[BUFFER] = val3 += smem3[tid + 1]; + smem1[BUFFER] = smem1[tid] + smem1[tid + 1]; + smem2[BUFFER] = smem2[tid] + smem2[tid + 1]; + smem3[BUFFER] = smem3[tid] + smem3[tid + 1]; } barrier(CLK_LOCAL_MEM_FENCE); } @@ -268,63 +113,45 @@ void reduce2(float val1, float val2, volatile __local float* smem1, volatile __l smem2[tid] = val2; barrier(CLK_LOCAL_MEM_FENCE); -#if BUFFER > 128 - if (tid < 128) - { - smem1[tid] = (val1 += smem1[tid + 128]); - smem2[tid] = (val2 += smem2[tid + 128]); - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - -#if BUFFER > 64 - if (tid < 64) - { - smem1[tid] = (val1 += smem1[tid + 64]); - smem2[tid] = (val2 += smem2[tid + 64]); - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - if (tid < 32) { - smem1[tid] = (val1 += smem1[tid + 32]); - smem2[tid] = (val2 += smem2[tid + 32]); + smem1[tid] += smem1[tid + 32]; + smem2[tid] += smem2[tid + 32]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 16) { - smem1[tid] = (val1 += smem1[tid + 16]); - smem2[tid] = (val2 += smem2[tid + 16]); + smem1[tid] += smem1[tid + 16]; + smem2[tid] += smem2[tid + 16]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 8) { - smem1[tid] = (val1 += smem1[tid + 8]); - smem2[tid] = (val2 += smem2[tid + 8]); + smem1[tid] += smem1[tid + 8]; + smem2[tid] += smem2[tid + 8]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 4) { - smem1[tid] = (val1 += smem1[tid + 4]); - smem2[tid] = (val2 += smem2[tid + 4]); + smem1[tid] += smem1[tid + 4]; + smem2[tid] += smem2[tid + 4]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 2) { - smem1[tid] = (val1 += smem1[tid + 2]); - smem2[tid] = (val2 += smem2[tid + 2]); + smem1[tid] += smem1[tid + 2]; + smem2[tid] += smem2[tid + 2]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 1) { - smem1[BUFFER] = (val1 += smem1[tid + 1]); - smem2[BUFFER] = (val2 += smem2[tid + 1]); + smem1[BUFFER] = smem1[tid] + smem1[tid + 1]; + smem2[BUFFER] = smem2[tid] + smem2[tid + 1]; } barrier(CLK_LOCAL_MEM_FENCE); } @@ -334,205 +161,146 @@ void reduce1(float val1, volatile __local float* smem1, int tid) smem1[tid] = val1; barrier(CLK_LOCAL_MEM_FENCE); -#if BUFFER > 128 - if (tid < 128) - { - smem1[tid] = (val1 += smem1[tid + 128]); - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - -#if BUFFER > 64 - if (tid < 64) - { - smem1[tid] = (val1 += smem1[tid + 64]); - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - if (tid < 32) { - smem1[tid] = (val1 += smem1[tid + 32]); + smem1[tid] += smem1[tid + 32]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 16) { - smem1[tid] = (val1 += smem1[tid + 16]); + smem1[tid] += smem1[tid + 16]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 8) { - smem1[tid] = (val1 += smem1[tid + 8]); + smem1[tid] += smem1[tid + 8]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 4) { - smem1[tid] = (val1 += smem1[tid + 4]); + smem1[tid] += smem1[tid + 4]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 2) { - smem1[tid] = (val1 += smem1[tid + 2]); + smem1[tid] += smem1[tid + 2]; } barrier(CLK_LOCAL_MEM_FENCE); if (tid < 1) { - smem1[BUFFER] = (val1 += smem1[tid + 1]); + smem1[BUFFER] = smem1[tid] + smem1[tid + 1]; } barrier(CLK_LOCAL_MEM_FENCE); } #else -void reduce3(float val1, float val2, float val3, __local float* smem1, __local float* smem2, __local float* smem3, int tid) +void reduce3(float val1, float val2, float val3, +__local volatile float* smem1, __local volatile float* smem2, __local volatile float* smem3, int tid) { smem1[tid] = val1; smem2[tid] = val2; smem3[tid] = val3; barrier(CLK_LOCAL_MEM_FENCE); -#if BUFFER > 128 - if (tid < 128) - { - smem1[tid] = val1 += smem1[tid + 128]; - smem2[tid] = val2 += smem2[tid + 128]; - smem3[tid] = val3 += smem3[tid + 128]; - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - -#if BUFFER > 64 - if (tid < 64) - { - smem1[tid] = val1 += smem1[tid + 64]; - smem2[tid] = val2 += smem2[tid + 64]; - smem3[tid] = val3 += smem3[tid + 64]; - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - if (tid < 32) { - volatile __local float* vmem1 = smem1; - volatile __local float* vmem2 = smem2; - volatile __local float* vmem3 = smem3; + smem1[tid] += smem1[tid + 32]; + smem2[tid] += smem2[tid + 32]; + smem3[tid] += smem3[tid + 32]; +#if WAVE_SIZE < 32 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) { +#endif + smem1[tid] += smem1[tid + 16]; + smem2[tid] += smem2[tid + 16]; + smem3[tid] += smem3[tid + 16]; +#if WAVE_SIZE <16 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 8) { +#endif + smem1[tid] += smem1[tid + 8]; + smem2[tid] += smem2[tid + 8]; + smem3[tid] += smem3[tid + 8]; - vmem1[tid] = val1 += vmem1[tid + 32]; - vmem2[tid] = val2 += vmem2[tid + 32]; - vmem3[tid] = val3 += vmem3[tid + 32]; + smem1[tid] += smem1[tid + 4]; + smem2[tid] += smem2[tid + 4]; + smem3[tid] += smem3[tid + 4]; - vmem1[tid] = val1 += vmem1[tid + 16]; - vmem2[tid] = val2 += vmem2[tid + 16]; - vmem3[tid] = val3 += vmem3[tid + 16]; + smem1[tid] += smem1[tid + 2]; + smem2[tid] += smem2[tid + 2]; + smem3[tid] += smem3[tid + 2]; - vmem1[tid] = val1 += vmem1[tid + 8]; - vmem2[tid] = val2 += vmem2[tid + 8]; - vmem3[tid] = val3 += vmem3[tid + 8]; - - vmem1[tid] = val1 += vmem1[tid + 4]; - vmem2[tid] = val2 += vmem2[tid + 4]; - vmem3[tid] = val3 += vmem3[tid + 4]; - - vmem1[tid] = val1 += vmem1[tid + 2]; - vmem2[tid] = val2 += vmem2[tid + 2]; - vmem3[tid] = val3 += vmem3[tid + 2]; - - vmem1[tid] = val1 += vmem1[tid + 1]; - vmem2[tid] = val2 += vmem2[tid + 1]; - vmem3[tid] = val3 += vmem3[tid + 1]; + smem1[tid] += smem1[tid + 1]; + smem2[tid] += smem2[tid + 1]; + smem3[tid] += smem3[tid + 1]; } } -void reduce2(float val1, float val2, __local float* smem1, __local float* smem2, int tid) +void reduce2(float val1, float val2, __local volatile float* smem1, __local volatile float* smem2, int tid) { smem1[tid] = val1; smem2[tid] = val2; barrier(CLK_LOCAL_MEM_FENCE); -#if BUFFER > 128 - if (tid < 128) - { - smem1[tid] = val1 += smem1[tid + 128]; - smem2[tid] = val2 += smem2[tid + 128]; - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - -#if BUFFER > 64 - if (tid < 64) - { - smem1[tid] = val1 += smem1[tid + 64]; - smem2[tid] = val2 += smem2[tid + 64]; - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - if (tid < 32) { - volatile __local float* vmem1 = smem1; - volatile __local float* vmem2 = smem2; + smem1[tid] += smem1[tid + 32]; + smem2[tid] += smem2[tid + 32]; +#if WAVE_SIZE < 32 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) { +#endif + smem1[tid] += smem1[tid + 16]; + smem2[tid] += smem2[tid + 16]; +#if WAVE_SIZE <16 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 8) { +#endif + smem1[tid] += smem1[tid + 8]; + smem2[tid] += smem2[tid + 8]; - vmem1[tid] = val1 += vmem1[tid + 32]; - vmem2[tid] = val2 += vmem2[tid + 32]; + smem1[tid] += smem1[tid + 4]; + smem2[tid] += smem2[tid + 4]; - vmem1[tid] = val1 += vmem1[tid + 16]; - vmem2[tid] = val2 += vmem2[tid + 16]; + smem1[tid] += smem1[tid + 2]; + smem2[tid] += smem2[tid + 2]; - vmem1[tid] = val1 += vmem1[tid + 8]; - vmem2[tid] = val2 += vmem2[tid + 8]; - - vmem1[tid] = val1 += vmem1[tid + 4]; - vmem2[tid] = val2 += vmem2[tid + 4]; - - vmem1[tid] = val1 += vmem1[tid + 2]; - vmem2[tid] = val2 += vmem2[tid + 2]; - - vmem1[tid] = val1 += vmem1[tid + 1]; - vmem2[tid] = val2 += vmem2[tid + 1]; + smem1[tid] += smem1[tid + 1]; + smem2[tid] += smem2[tid + 1]; } } -void reduce1(float val1, __local float* smem1, int tid) +void reduce1(float val1, __local volatile float* smem1, int tid) { smem1[tid] = val1; barrier(CLK_LOCAL_MEM_FENCE); -#if BUFFER > 128 - if (tid < 128) - { - smem1[tid] = val1 += smem1[tid + 128]; - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - -#if BUFFER > 64 - if (tid < 64) - { - smem1[tid] = val1 += smem1[tid + 64]; - } - barrier(CLK_LOCAL_MEM_FENCE); -#endif - if (tid < 32) { - volatile __local float* vmem1 = smem1; - - vmem1[tid] = val1 += vmem1[tid + 32]; - vmem1[tid] = val1 += vmem1[tid + 16]; - vmem1[tid] = val1 += vmem1[tid + 8]; - vmem1[tid] = val1 += vmem1[tid + 4]; - vmem1[tid] = val1 += vmem1[tid + 2]; - vmem1[tid] = val1 += vmem1[tid + 1]; + smem1[tid] += smem1[tid + 32]; +#if WAVE_SIZE < 32 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) { +#endif + smem1[tid] += smem1[tid + 16]; +#if WAVE_SIZE <16 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 8) { +#endif + smem1[tid] += smem1[tid + 8]; + smem1[tid] += smem1[tid + 4]; + smem1[tid] += smem1[tid + 2]; + smem1[tid] += smem1[tid + 1]; } } #endif #define SCALE (1.0f / (1 << 20)) #define THRESHOLD 0.01f -#define DIMENSION 21 // Image read mode __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; diff --git a/modules/ocl/src/opencl/stereobm.cl b/modules/ocl/src/opencl/stereobm.cl index bd86a7f3fb..552874d427 100644 --- a/modules/ocl/src/opencl/stereobm.cl +++ b/modules/ocl/src/opencl/stereobm.cl @@ -162,8 +162,8 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char int y_tex; int x_tex = X - radius; - if (x_tex >= cwidth) - return; + //if (x_tex >= cwidth) + // return; for(int d = STEREO_MIND; d < maxdisp; d += STEREO_DISP_STEP) { diff --git a/modules/ocl/src/opencl/tvl1flow.cl b/modules/ocl/src/opencl/tvl1flow.cl new file mode 100644 index 0000000000..e0ff7307b1 --- /dev/null +++ b/modules/ocl/src/opencl/tvl1flow.cl @@ -0,0 +1,407 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jin Ma jin@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step, +__global float* dx, __global float* dy, int dx_step) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if((x < src_col)&&(y < src_row)) + { + int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1); + int src_x2 = (x - 1) > 0 ? (x -1) : 0; + + //if(src[y * src_step + src_x1] == src[y * src_step+ src_x2]) + //{ + // printf("y = %d\n", y); + // printf("src_x1 = %d\n", src_x1); + // printf("src_x2 = %d\n", src_x2); + //} + dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]); + + int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1); + int src_y2 = (y - 1) > 0 ? (y - 1) : 0; + dy[y * dx_step+ x] = 0.5f * (src[src_y1 * src_step + x] - src[src_y2 * src_step+ x]); + } + +} + +float bicubicCoeff(float x_) +{ + + float x = fabs(x_); + if (x <= 1.0f) + { + return x * x * (1.5f * x - 2.5f) + 1.0f; + } + else if (x < 2.0f) + { + return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; + } + else + { + return 0.0f; + } + +} + +__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row, + image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y, + __global const float* u1, int u1_step, + __global const float* u2, + __global float* I1w, + __global float* I1wx, /*int I1wx_step,*/ + __global float* I1wy, /*int I1wy_step,*/ + __global float* grad, /*int grad_step,*/ + __global float* rho, + int I1w_step, + int u2_step, + int u1_offset_x, + int u1_offset_y, + int u2_offset_x, + int u2_offset_y) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if(x < I0_col&&y < I0_row) + { + //const float u1Val = u1(y, x); + const float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + //const float u2Val = u2(y, x); + const float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + const float wx = x + u1Val; + const float wy = y + u2Val; + + const int xmin = ceil(wx - 2.0f); + const int xmax = floor(wx + 2.0f); + + const int ymin = ceil(wy - 2.0f); + const int ymax = floor(wy + 2.0f); + + float sum = 0.0f; + float sumx = 0.0f; + float sumy = 0.0f; + float wsum = 0.0f; + sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; + + for (int cy = ymin; cy <= ymax; ++cy) + { + for (int cx = xmin; cx <= xmax; ++cx) + { + const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); + + //sum += w * tex2D(tex_I1 , cx, cy); + int2 cood = (int2)(cx, cy); + sum += w * read_imagef(tex_I1, sampleri, cood).x; + //sumx += w * tex2D(tex_I1x, cx, cy); + sumx += w * read_imagef(tex_I1x, sampleri, cood).x; + //sumy += w * tex2D(tex_I1y, cx, cy); + sumy += w * read_imagef(tex_I1y, sampleri, cood).x; + + wsum += w; + } + } + + const float coeff = 1.0f / wsum; + + const float I1wVal = sum * coeff; + const float I1wxVal = sumx * coeff; + const float I1wyVal = sumy * coeff; + + I1w[y * I1w_step + x] = I1wVal; + I1wx[y * I1w_step + x] = I1wxVal; + I1wy[y * I1w_step + x] = I1wyVal; + + const float Ix2 = I1wxVal * I1wxVal; + const float Iy2 = I1wyVal * I1wyVal; + + // store the |Grad(I1)|^2 + grad[y * I1w_step + x] = Ix2 + Iy2; + + // compute the constant part of the rho function + const float I0Val = I0[y * I0_step + x]; + rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; + } + +} + +float readImage(__global const float *image, const int x, const int y, const int rows, const int cols, const int elemCntPerRow) +{ + int i0 = clamp(x, 0, cols - 1); + int j0 = clamp(y, 0, rows - 1); + int i1 = clamp(x + 1, 0, cols - 1); + int j1 = clamp(y + 1, 0, rows - 1); + + return image[j0 * elemCntPerRow + i0]; +} + +__kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row, + __global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y, + __global const float* u1, int u1_step, + __global const float* u2, + __global float* I1w, + __global float* I1wx, /*int I1wx_step,*/ + __global float* I1wy, /*int I1wy_step,*/ + __global float* grad, /*int grad_step,*/ + __global float* rho, + int I1w_step, + int u2_step, + int I1_step, + int I1x_step) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if(x < I0_col&&y < I0_row) + { + //const float u1Val = u1(y, x); + const float u1Val = u1[y * u1_step + x]; + //const float u2Val = u2(y, x); + const float u2Val = u2[y * u2_step + x]; + + const float wx = x + u1Val; + const float wy = y + u2Val; + + const int xmin = ceil(wx - 2.0f); + const int xmax = floor(wx + 2.0f); + + const int ymin = ceil(wy - 2.0f); + const int ymax = floor(wy + 2.0f); + + float sum = 0.0f; + float sumx = 0.0f; + float sumy = 0.0f; + float wsum = 0.0f; + + for (int cy = ymin; cy <= ymax; ++cy) + { + for (int cx = xmin; cx <= xmax; ++cx) + { + const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); + + int2 cood = (int2)(cx, cy); + sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step); + sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step); + sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step); + wsum += w; + } + } + + const float coeff = 1.0f / wsum; + + const float I1wVal = sum * coeff; + const float I1wxVal = sumx * coeff; + const float I1wyVal = sumy * coeff; + + I1w[y * I1w_step + x] = I1wVal; + I1wx[y * I1w_step + x] = I1wxVal; + I1wy[y * I1w_step + x] = I1wyVal; + + const float Ix2 = I1wxVal * I1wxVal; + const float Iy2 = I1wyVal * I1wyVal; + + // store the |Grad(I1)|^2 + grad[y * I1w_step + x] = Ix2 + Iy2; + + // compute the constant part of the rho function + const float I0Val = I0[y * I0_step + x]; + rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; + } + +} + + +__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step, + __global const float* u2, + __global float* p11, int p11_step, + __global float* p12, + __global float* p21, + __global float* p22, + const float taut, + int u2_step, + int u1_offset_x, + int u1_offset_y, + int u2_offset_x, + int u2_offset_y) +{ + + //const int x = blockIdx.x * blockDim.x + threadIdx.x; + //const int y = blockIdx.y * blockDim.y + threadIdx.y; + const int x = get_global_id(0); + const int y = get_global_id(1); + + if(x < u1_col && y < u1_row) + { + int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); + const float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + + int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); + const float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + + int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); + const float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); + const float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + const float g1 = hypot(u1x, u1y); + const float g2 = hypot(u2x, u2y); + + const float ng1 = 1.0f + taut * g1; + const float ng2 = 1.0f + taut * g2; + + p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1; + p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1; + p21[y * p11_step + x] = (p21[y * p11_step + x] + taut * u2x) / ng2; + p22[y * p11_step + x] = (p22[y * p11_step + x] + taut * u2y) / ng2; + } + +} + +float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step) +{ + + if (x > 0 && y > 0) + { + const float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1]; + const float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x]; + return v1x + v2y; + } + else + { + if (y > 0) + return v1[y * v1_step + 0] + v2[y * v2_step + 0] - v2[(y - 1) * v2_step + 0]; + else + { + if (x > 0) + return v1[0 * v1_step + x] - v1[0 * v1_step + x - 1] + v2[0 * v2_step + x]; + else + return v1[0 * v1_step + 0] + v2[0 * v2_step + 0]; + } + } + +} + +__kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step, + __global const float* I1wy, /*int I1wy_step,*/ + __global const float* grad, /*int grad_step,*/ + __global const float* rho_c, /*int rho_c_step,*/ + __global const float* p11, /*int p11_step,*/ + __global const float* p12, /*int p12_step,*/ + __global const float* p21, /*int p21_step,*/ + __global const float* p22, /*int p22_step,*/ + __global float* u1, int u1_step, + __global float* u2, + __global float* error, const float l_t, const float theta, int u2_step, + int u1_offset_x, + int u1_offset_y, + int u2_offset_x, + int u2_offset_y) +{ + + //const int x = blockIdx.x * blockDim.x + threadIdx.x; + //const int y = blockIdx.y * blockDim.y + threadIdx.y; + + int x = get_global_id(0); + int y = get_global_id(1); + + + if(x < I1wx_col && y < I1wx_row) + { + const float I1wxVal = I1wx[y * I1wx_step + x]; + const float I1wyVal = I1wy[y * I1wx_step + x]; + const float gradVal = grad[y * I1wx_step + x]; + const float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + const float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + const float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal); + + // estimate the values of the variable (v1, v2) (thresholding operator TH) + + float d1 = 0.0f; + float d2 = 0.0f; + + if (rho < -l_t * gradVal) + { + d1 = l_t * I1wxVal; + d2 = l_t * I1wyVal; + } + else if (rho > l_t * gradVal) + { + d1 = -l_t * I1wxVal; + d2 = -l_t * I1wyVal; + } + else if (gradVal > 1.192092896e-07f) + { + const float fi = -rho / gradVal; + d1 = fi * I1wxVal; + d2 = fi * I1wyVal; + } + + const float v1 = u1OldVal + d1; + const float v2 = u2OldVal + d2; + + // compute the divergence of the dual variable (p1, p2) + + const float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step); + const float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step); + + // estimate the values of the optical flow (u1, u2) + + const float u1NewVal = v1 + theta * div_p1; + const float u2NewVal = v2 + theta * div_p2; + + u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal; + u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal; + + const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); + const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); + error[y * I1wx_step + x] = n1 + n2; + } + +} diff --git a/modules/ocl/src/precomp.hpp b/modules/ocl/src/precomp.hpp index b2a3e41c6f..4f93eac420 100644 --- a/modules/ocl/src/precomp.hpp +++ b/modules/ocl/src/precomp.hpp @@ -78,6 +78,7 @@ #if defined (HAVE_OPENCL) +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS #include "opencv2/ocl/private/util.hpp" #include "safe_call.hpp" diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp index 4a6ce1c790..8e9420480c 100644 --- a/modules/ocl/src/pyrlk.cpp +++ b/modules/ocl/src/pyrlk.cpp @@ -15,8 +15,8 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Dachuan Zhao, dachuan@multicorewareinc.com -// Yao Wang, bitwangyaoyao@gmail.com +// Dachuan Zhao, dachuan@multicorewareinc.com +// Yao Wang, bitwangyaoyao@gmail.com // Nathan, liujun@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, @@ -56,31 +56,16 @@ namespace cv { namespace ocl { -///////////////////////////OpenCL kernel strings/////////////////////////// extern const char *pyrlk; extern const char *pyrlk_no_image; -extern const char *arithm_mul; } } - struct dim3 { unsigned int x, y, z; }; -struct float2 -{ - float x, y; -}; - -struct int2 -{ - int x, y; -}; - -namespace -{ -void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11) +static void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11) { winSize.width *= cn; @@ -100,45 +85,6 @@ void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDe block.z = patch.z = 1; } -} - -static void multiply_cus(const oclMat &src1, oclMat &dst, float scalar) -{ - if(!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F) - { - CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); - return; - } - - CV_Assert(src1.cols == dst.cols && - src1.rows == dst.rows); - - CV_Assert(src1.type() == dst.type()); - CV_Assert(src1.depth() != CV_8S); - - Context *clCxt = src1.clCxt; - - size_t localThreads[3] = { 16, 16, 1 }; - size_t globalThreads[3] = { src1.cols, - src1.rows, - 1 - }; - - int dst_step1 = dst.cols * dst.elemSize(); - vector > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); - args.push_back( make_pair( sizeof(float), (float *)&scalar )); - - openCLExecuteKernel(clCxt, &arithm_mul, "arithm_muls", globalThreads, localThreads, args, -1, src1.depth()); -} static void lkSparse_run(oclMat &I, oclMat &J, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat& err, bool /*GET_MIN_EIGENVALS*/, int ptcount, @@ -151,15 +97,7 @@ static void lkSparse_run(oclMat &I, oclMat &J, size_t localThreads[3] = { 8, isImageSupported ? 8 : 32, 1 }; size_t globalThreads[3] = { 8 * ptcount, isImageSupported ? 8 : 32, 1}; int cn = I.oclchannels(); - char calcErr; - if (level == 0) - { - calcErr = 1; - } - else - { - calcErr = 0; - } + char calcErr = level==0?1:0; vector > args; @@ -187,8 +125,7 @@ static void lkSparse_run(oclMat &I, oclMat &J, args.push_back( make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr )); - bool is_cpu; - queryDeviceInfo(IS_CPU_DEVICE, &is_cpu); + bool is_cpu = queryDeviceInfo(); if (is_cpu) { openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU"); @@ -199,7 +136,17 @@ static void lkSparse_run(oclMat &I, oclMat &J, { if(isImageSupported) { - openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth()); + stringstream idxStr; + idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth(); + cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str()); + int wave_size = queryDeviceInfo(kernel); + openCLSafeCall(clReleaseKernel(kernel)); + + static char opt[32] = {0}; + sprintf(opt, " -D WAVE_SIZE=%d", wave_size); + + openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, + args, I.oclchannels(), I.depth(), opt); releaseTexture(ITex); releaseTexture(JTex); } @@ -242,8 +189,7 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &next oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1); oclMat temp2 = nextPts.reshape(1); - multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f); - //::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2); + multiply(1.0f/(1<= 0; level--) { lkSparse_run(prevPyr_[level], nextPyr_[level], diff --git a/modules/ocl/src/safe_call.hpp b/modules/ocl/src/safe_call.hpp index 441495f860..ba36cabd32 100644 --- a/modules/ocl/src/safe_call.hpp +++ b/modules/ocl/src/safe_call.hpp @@ -47,7 +47,7 @@ #define __OPENCV_OPENCL_SAFE_CALL_HPP__ #if defined __APPLE__ -#include +#include #else #include #endif diff --git a/modules/ocl/src/tvl1flow.cpp b/modules/ocl/src/tvl1flow.cpp new file mode 100644 index 0000000000..a322f62a4e --- /dev/null +++ b/modules/ocl/src/tvl1flow.cpp @@ -0,0 +1,479 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jin Ma, jin@multicorewareinc.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + +#include "precomp.hpp" +using namespace std; +using namespace cv; +using namespace cv::ocl; + +namespace cv +{ + namespace ocl + { + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char* tvl1flow; + } +} + +cv::ocl::OpticalFlowDual_TVL1_OCL::OpticalFlowDual_TVL1_OCL() +{ + tau = 0.25; + lambda = 0.15; + theta = 0.3; + nscales = 5; + warps = 5; + epsilon = 0.01; + iterations = 300; + useInitialFlow = false; +} + +void cv::ocl::OpticalFlowDual_TVL1_OCL::operator()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy) +{ + CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); + CV_Assert( I0.size() == I1.size() ); + CV_Assert( I0.type() == I1.type() ); + CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) ); + CV_Assert( nscales > 0 ); + + // allocate memory for the pyramid structure + I0s.resize(nscales); + I1s.resize(nscales); + u1s.resize(nscales); + u2s.resize(nscales); + //I0s_step == I1s_step + I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0); + I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0); + + + if (!useInitialFlow) + { + flowx.create(I0.size(), CV_32FC1); + flowy.create(I0.size(), CV_32FC1); + } + //u1s_step != u2s_step + u1s[0] = flowx; + u2s[0] = flowy; + + I1x_buf.create(I0.size(), CV_32FC1); + I1y_buf.create(I0.size(), CV_32FC1); + + I1w_buf.create(I0.size(), CV_32FC1); + I1wx_buf.create(I0.size(), CV_32FC1); + I1wy_buf.create(I0.size(), CV_32FC1); + + grad_buf.create(I0.size(), CV_32FC1); + rho_c_buf.create(I0.size(), CV_32FC1); + + p11_buf.create(I0.size(), CV_32FC1); + p12_buf.create(I0.size(), CV_32FC1); + p21_buf.create(I0.size(), CV_32FC1); + p22_buf.create(I0.size(), CV_32FC1); + + diff_buf.create(I0.size(), CV_32FC1); + + // create the scales + for (int s = 1; s < nscales; ++s) + { + ocl::pyrDown(I0s[s - 1], I0s[s]); + ocl::pyrDown(I1s[s - 1], I1s[s]); + + if (I0s[s].cols < 16 || I0s[s].rows < 16) + { + nscales = s; + break; + } + + if (useInitialFlow) + { + ocl::pyrDown(u1s[s - 1], u1s[s]); + ocl::pyrDown(u2s[s - 1], u2s[s]); + + //ocl::multiply(u1s[s], Scalar::all(0.5), u1s[s]); + multiply(0.5, u1s[s], u1s[s]); + //ocl::multiply(u2s[s], Scalar::all(0.5), u2s[s]); + multiply(0.5, u1s[s], u2s[s]); + } + } + + // pyramidal structure for computing the optical flow + for (int s = nscales - 1; s >= 0; --s) + { + // compute the optical flow at the current scale + procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]); + + // if this was the last scale, finish now + if (s == 0) + break; + + // otherwise, upsample the optical flow + + // zoom the optical flow for the next finer scale + ocl::resize(u1s[s], u1s[s - 1], I0s[s - 1].size()); + ocl::resize(u2s[s], u2s[s - 1], I0s[s - 1].size()); + + // scale the optical flow with the appropriate zoom factor + multiply(2, u1s[s - 1], u1s[s - 1]); + multiply(2, u2s[s - 1], u2s[s - 1]); + + } + +} + +namespace ocl_tvl1flow +{ + void centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy); + + void warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, + oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, + oclMat &grad, oclMat &rho); + + void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, + oclMat &rho_c, oclMat &p11, oclMat &p12, + oclMat &p21, oclMat &p22, oclMat &u1, + oclMat &u2, oclMat &error, float l_t, float theta); + + void estimateDualVariables(oclMat &u1, oclMat &u2, + oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut); +} + +void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const oclMat &I1, oclMat &u1, oclMat &u2) +{ + using namespace ocl_tvl1flow; + + const double scaledEpsilon = epsilon * epsilon * I0.size().area(); + + CV_DbgAssert( I1.size() == I0.size() ); + CV_DbgAssert( I1.type() == I0.type() ); + CV_DbgAssert( u1.empty() || u1.size() == I0.size() ); + CV_DbgAssert( u2.size() == u1.size() ); + + if (u1.empty()) + { + u1.create(I0.size(), CV_32FC1); + u1.setTo(Scalar::all(0)); + + u2.create(I0.size(), CV_32FC1); + u2.setTo(Scalar::all(0)); + } + + oclMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); + + centeredGradient(I1, I1x, I1y); + + oclMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); + + oclMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); + + oclMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows)); + oclMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows)); + p11.setTo(Scalar::all(0)); + p12.setTo(Scalar::all(0)); + p21.setTo(Scalar::all(0)); + p22.setTo(Scalar::all(0)); + + oclMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows)); + + const float l_t = static_cast(lambda * theta); + const float taut = static_cast(tau / theta); + + for (int warpings = 0; warpings < warps; ++warpings) + { + warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); + + double error = numeric_limits::max(); + for (int n = 0; error > scaledEpsilon && n < iterations; ++n) + { + estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, + u1, u2, diff, l_t, static_cast(theta)); + + error = ocl::sum(diff)[0]; + + estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); + + } + } + +} + +void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage() +{ + I0s.clear(); + I1s.clear(); + u1s.clear(); + u2s.clear(); + + I1x_buf.release(); + I1y_buf.release(); + + I1w_buf.release(); + I1wx_buf.release(); + I1wy_buf.release(); + + grad_buf.release(); + rho_c_buf.release(); + + p11_buf.release(); + p12_buf.release(); + p21_buf.release(); + p22_buf.release(); + + diff_buf.release(); + norm_buf.release(); +} + +void ocl_tvl1flow::centeredGradient(const oclMat &src, oclMat &dx, oclMat &dy) +{ + Context *clCxt = src.clCxt; + size_t localThreads[3] = {32, 8, 1}; + size_t globalThreads[3] = {src.cols, src.rows, 1}; + + int srcElementSize = src.elemSize(); + int src_step = src.step/srcElementSize; + + int dElememntSize = dx.elemSize(); + int dx_step = dx.step/dElememntSize; + + string kernelName = "centeredGradientKernel"; + vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void*)&src.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&src.cols)); + args.push_back( make_pair( sizeof(cl_int), (void*)&src.rows)); + args.push_back( make_pair( sizeof(cl_int), (void*)&src_step)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&dx.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&dy.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&dx_step)); + openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThreads, localThreads, args, -1, -1); + +} + +void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut) +{ + Context *clCxt = u1.clCxt; + + size_t localThread[] = {32, 8, 1}; + size_t globalThread[] = + { + u1.cols, + u1.rows, + 1 + }; + + int u1_element_size = u1.elemSize(); + int u1_step = u1.step/u1_element_size; + + int u2_element_size = u2.elemSize(); + int u2_step = u2.step/u2_element_size; + + int p11_element_size = p11.elemSize(); + int p11_step = p11.step/p11_element_size; + + int u1_offset_y = u1.offset/u1.step; + int u1_offset_x = u1.offset%u1.step; + u1_offset_x = u1_offset_x/u1.elemSize(); + + int u2_offset_y = u2.offset/u2.step; + int u2_offset_x = u2.offset%u2.step; + u2_offset_x = u2_offset_x/u2.elemSize(); + + string kernelName = "estimateDualVariablesKernel"; + vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1.cols)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1.rows)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&p11_step)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data)); + args.push_back( make_pair( sizeof(cl_float), (void*)&taut)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); + + openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); +} + +void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, + oclMat &rho_c, oclMat &p11, oclMat &p12, + oclMat &p21, oclMat &p22, oclMat &u1, + oclMat &u2, oclMat &error, float l_t, float theta) +{ + Context* clCxt = I1wx.clCxt; + + size_t localThread[] = {32, 8, 1}; + size_t globalThread[] = + { + I1wx.cols, + I1wx.rows, + 1 + }; + + int I1wx_element_size = I1wx.elemSize(); + int I1wx_step = I1wx.step/I1wx_element_size; + + int u1_element_size = u1.elemSize(); + int u1_step = u1.step/u1_element_size; + + int u2_element_size = u2.elemSize(); + int u2_step = u2.step/u2_element_size; + + int u1_offset_y = u1.offset/u1.step; + int u1_offset_x = u1.offset%u1.step; + u1_offset_x = u1_offset_x/u1.elemSize(); + + int u2_offset_y = u2.offset/u2.step; + int u2_offset_x = u2.offset%u2.step; + u2_offset_x = u2_offset_x/u2.elemSize(); + + string kernelName = "estimateUKernel"; + vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.cols)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx.rows)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I1wx_step)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&rho_c.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p11.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p12.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p21.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&p22.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_step)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&error.data)); + args.push_back( make_pair( sizeof(cl_float), (void*)&l_t)); + args.push_back( make_pair( sizeof(cl_float), (void*)&theta)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_step)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); + + openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); +} + +void ocl_tvl1flow::warpBackward(const oclMat &I0, const oclMat &I1, oclMat &I1x, oclMat &I1y, oclMat &u1, oclMat &u2, oclMat &I1w, oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho) +{ + Context* clCxt = I0.clCxt; + const bool isImgSupported = support_image2d(clCxt); + + CV_Assert(isImgSupported); + + int u1ElementSize = u1.elemSize(); + int u1Step = u1.step/u1ElementSize; + + int u2ElementSize = u2.elemSize(); + int u2Step = u2.step/u2ElementSize; + + int I0ElementSize = I0.elemSize(); + int I0Step = I0.step/I0ElementSize; + + int I1w_element_size = I1w.elemSize(); + int I1w_step = I1w.step/I1w_element_size; + + int u1_offset_y = u1.offset/u1.step; + int u1_offset_x = u1.offset%u1.step; + u1_offset_x = u1_offset_x/u1.elemSize(); + + int u2_offset_y = u2.offset/u2.step; + int u2_offset_x = u2.offset%u2.step; + u2_offset_x = u2_offset_x/u2.elemSize(); + + size_t localThread[] = {32, 8, 1}; + size_t globalThread[] = + { + I0.cols, + I0.rows, + 1 + }; + + cl_mem I1_tex; + cl_mem I1x_tex; + cl_mem I1y_tex; + I1_tex = bindTexture(I1); + I1x_tex = bindTexture(I1x); + I1y_tex = bindTexture(I1y); + + string kernelName = "warpBackwardKernel"; + vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void*)&I0.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I0Step)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I0.cols)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I0.rows)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1_tex)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1x_tex)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1y_tex)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&u1.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1Step)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&u2.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1w.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wx.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&I1wy.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&grad.data)); + args.push_back( make_pair( sizeof(cl_mem), (void*)&rho.data)); + args.push_back( make_pair( sizeof(cl_int), (void*)&I1w_step)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2Step)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_x)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); + args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); + + openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); + + releaseTexture(I1_tex); + releaseTexture(I1x_tex); + releaseTexture(I1y_tex); +} \ No newline at end of file diff --git a/modules/ocl/test/test_canny.cpp b/modules/ocl/test/test_canny.cpp index cac6b66f51..10032e897c 100644 --- a/modules/ocl/test/test_canny.cpp +++ b/modules/ocl/test/test_canny.cpp @@ -45,7 +45,6 @@ #include "precomp.hpp" #ifdef HAVE_OPENCL -#define SHOW_RESULT 0 //////////////////////////////////////////////////////// // Canny @@ -59,13 +58,10 @@ PARAM_TEST_CASE(Canny, AppertureSize, L2gradient) bool useL2gradient; cv::Mat edges_gold; - //std::vector oclinfo; virtual void SetUp() { apperture_size = GET_PARAM(0); useL2gradient = GET_PARAM(1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); } }; @@ -77,32 +73,18 @@ TEST_P(Canny, Accuracy) double low_thresh = 50.0; double high_thresh = 100.0; - cv::resize(img, img, cv::Size(512, 384)); cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img); cv::ocl::oclMat edges; cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient); - char filename [100]; - sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient); - cv::Mat edges_gold; cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient); -#if SHOW_RESULT - cv::Mat edges_x2, ocl_edges(edges); - edges_x2.create(edges.rows, edges.cols * 2, edges.type()); - edges_x2.setTo(0); - cv::add(edges_gold, cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows))); - cv::add(ocl_edges, cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows))); - cv::namedWindow("Canny result (left: cpu, right: ocl)"); - cv::imshow("Canny result (left: cpu, right: ocl)", edges_x2); - cv::waitKey(); -#endif //OUTPUT_RESULT EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2); } -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine( +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Canny, testing::Combine( testing::Values(AppertureSize(3), AppertureSize(5)), testing::Values(L2gradient(false), L2gradient(true)))); #endif \ No newline at end of file diff --git a/modules/ocl/test/test_columnsum.cpp b/modules/ocl/test/test_columnsum.cpp deleted file mode 100644 index 231f0657b0..0000000000 --- a/modules/ocl/test/test_columnsum.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Chunpeng Zhang chunpeng@multicorewareinc.com -// -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include - -#ifdef HAVE_OPENCL - -PARAM_TEST_CASE(ColumnSum, cv::Size) -{ - cv::Size size; - cv::Mat src; - - virtual void SetUp() - { - size = GET_PARAM(0); - } -}; - -TEST_P(ColumnSum, Accuracy) -{ - cv::Mat src = randomMat(size, CV_32FC1); - cv::ocl::oclMat d_dst; - cv::ocl::oclMat d_src(src); - - cv::ocl::columnSum(d_src, d_dst); - - cv::Mat dst(d_dst); - - for (int j = 0; j < src.cols; ++j) - { - float gold = src.at(0, j); - float res = dst.at(0, j); - ASSERT_NEAR(res, gold, 1e-5); - } - - for (int i = 1; i < src.rows; ++i) - { - for (int j = 0; j < src.cols; ++j) - { - float gold = src.at(i, j) += src.at(i - 1, j); - float res = dst.at(i, j); - ASSERT_NEAR(res, gold, 1e-5); - } - } -} - -INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES); - - -#endif diff --git a/modules/ocl/test/test_gemm.cpp b/modules/ocl/test/test_gemm.cpp index a5d90ff01c..5548456568 100644 --- a/modules/ocl/test/test_gemm.cpp +++ b/modules/ocl/test/test_gemm.cpp @@ -74,7 +74,7 @@ TEST_P(Gemm, Accuracy) cv::gemm(a, b, 1.0, c, 1.0, dst, flags); cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags); - EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, ""); + EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4); } INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine( diff --git a/modules/ocl/test/test_haar.cpp b/modules/ocl/test/test_haar.cpp deleted file mode 100644 index 96f721146b..0000000000 --- a/modules/ocl/test/test_haar.cpp +++ /dev/null @@ -1,162 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Jia Haipeng, jiahaipeng95@gmail.com -// Sen Liu, swjutls1987@126.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "opencv2/objdetect/objdetect.hpp" -#include "precomp.hpp" - -#ifdef HAVE_OPENCL - -using namespace cvtest; -using namespace testing; -using namespace std; -using namespace cv; -extern string workdir; -struct getRect -{ - Rect operator ()(const CvAvgComp &e) const - { - return e.rect; - } -}; - -PARAM_TEST_CASE(Haar, double, int) -{ - cv::ocl::OclCascadeClassifier cascade, nestedCascade; - cv::ocl::OclCascadeClassifierBuf cascadebuf; - cv::CascadeClassifier cpucascade, cpunestedCascade; - - double scale; - int flags; - - virtual void SetUp() - { - scale = GET_PARAM(0); - flags = GET_PARAM(1); - string cascadeName = workdir + "../../data/haarcascades/haarcascade_frontalface_alt.xml"; - - if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) || (!cascadebuf.load( cascadeName ))) - { - cout << "ERROR: Could not load classifier cascade" << endl; - return; - } - } -}; - -////////////////////////////////faceDetect///////////////////////////////////////////////// -TEST_P(Haar, FaceDetect) -{ - string imgName = workdir + "lena.jpg"; - Mat img = imread( imgName, 1 ); - - if(img.empty()) - { - std::cout << "Couldn't read " << imgName << std::endl; - return ; - } - - vector faces, oclfaces; - - Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 ); - MemStorage storage(cvCreateMemStorage(0)); - cvtColor( img, gray, CV_BGR2GRAY ); - resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); - equalizeHist( smallImg, smallImg ); - - cv::ocl::oclMat image; - CvSeq *_objects; - image.upload(smallImg); - _objects = cascade.oclHaarDetectObjects( image, storage, 1.1, - 3, flags, Size(30, 30), Size(0, 0) ); - vector vecAvgComp; - Seq(_objects).copyTo(vecAvgComp); - oclfaces.resize(vecAvgComp.size()); - std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); - - cpucascade.detectMultiScale( smallImg, faces, 1.1, 3, - flags, - Size(30, 30), Size(0, 0) ); - EXPECT_EQ(faces.size(), oclfaces.size()); -} - -TEST_P(Haar, FaceDetectUseBuf) -{ - string imgName = workdir + "lena.jpg"; - Mat img = imread( imgName, 1 ); - - if(img.empty()) - { - std::cout << "Couldn't read " << imgName << std::endl; - return ; - } - - vector faces, oclfaces; - - Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 ); - MemStorage storage(cvCreateMemStorage(0)); - cvtColor( img, gray, CV_BGR2GRAY ); - resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); - equalizeHist( smallImg, smallImg ); - - cv::ocl::oclMat image; - image.upload(smallImg); - - cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3, - flags, - Size(30, 30), Size(0, 0) ); - cascadebuf.release(); - - cpucascade.detectMultiScale( smallImg, faces, 1.1, 3, - flags, - Size(30, 30), Size(0, 0) ); - EXPECT_EQ(faces.size(), oclfaces.size()); -} - -INSTANTIATE_TEST_CASE_P(FaceDetect, Haar, - Combine(Values(1.0), - Values(CV_HAAR_SCALE_IMAGE, 0))); - -#endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index 664f8a3919..3228b6c0cf 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -23,6 +23,7 @@ // Rock Li, Rock.Li@amd.com // Wu Zailong, bullet@yeah.net // Xu Pang, pangxu010@163.com +// Sen Liu, swjtuls1987@126.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -1393,6 +1394,46 @@ TEST_P(calcHist, Mat) EXPECT_MAT_NEAR(dst_hist, cpu_hist, 0.0); } } +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// CLAHE +namespace +{ + IMPLEMENT_PARAM_CLASS(ClipLimit, double) +} + +PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit) +{ + cv::Size size; + double clipLimit; + + cv::Mat src; + cv::Mat dst_gold; + + cv::ocl::oclMat g_src; + cv::ocl::oclMat g_dst; + + virtual void SetUp() + { + size = GET_PARAM(0); + clipLimit = GET_PARAM(1); + + cv::RNG &rng = TS::ptr()->get_rng(); + src = randomMat(rng, size, CV_8UC1, 0, 256, false); + g_src.upload(src); + } +}; + +TEST_P(CLAHE, Accuracy) +{ + cv::Ptr clahe = cv::ocl::createCLAHE(clipLimit); + clahe->apply(g_src, g_dst); + cv::Mat dst(g_dst); + + cv::Ptr clahe_gold = cv::createCLAHE(clipLimit); + clahe_gold->apply(src, dst_gold); + + EXPECT_MAT_NEAR(dst_gold, dst, 1.0); +} ///////////////////////////Convolve////////////////////////////////// PARAM_TEST_CASE(ConvolveTestBase, MatType, bool) @@ -1532,6 +1573,47 @@ TEST_P(Convolve, Mat) } } +//////////////////////////////// ColumnSum ////////////////////////////////////// +PARAM_TEST_CASE(ColumnSum, cv::Size) +{ + cv::Size size; + cv::Mat src; + + virtual void SetUp() + { + size = GET_PARAM(0); + } +}; + +TEST_P(ColumnSum, Accuracy) +{ + cv::Mat src = randomMat(size, CV_32FC1); + cv::ocl::oclMat d_dst; + cv::ocl::oclMat d_src(src); + + cv::ocl::columnSum(d_src, d_dst); + + cv::Mat dst(d_dst); + + for (int j = 0; j < src.cols; ++j) + { + float gold = src.at(0, j); + float res = dst.at(0, j); + ASSERT_NEAR(res, gold, 1e-5); + } + + for (int i = 1; i < src.rows; ++i) + { + for (int j = 0; j < src.cols; ++j) + { + float gold = src.at(i, j) += src.at(i - 1, j); + float res = dst.at(i, j); + ASSERT_NEAR(res, gold, 1e-5); + } + } +} +///////////////////////////////////////////////////////////////////////////////////// + INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine( ONE_TYPE(CV_8UC1), NULL_TYPE, @@ -1643,7 +1725,10 @@ INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine( ONE_TYPE(CV_32SC1) //no use )); -//INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine( -// Values(CV_32FC1, CV_32FC1), -// Values(false))); // Values(false) is the reserved parameter +INSTANTIATE_TEST_CASE_P(ImgProc, CLAHE, Combine( + Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)), + Values(0.0, 40.0))); + +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES); + #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_moments.cpp b/modules/ocl/test/test_moments.cpp index 98c66def31..86f4779d68 100644 --- a/modules/ocl/test/test_moments.cpp +++ b/modules/ocl/test/test_moments.cpp @@ -45,12 +45,12 @@ TEST_P(MomentsTest, Mat) { if(test_contours) { - Mat src = imread( workdir + "../cpp/pic3.png", 1 ); - Mat src_gray, canny_output; - cvtColor( src, src_gray, CV_BGR2GRAY ); + Mat src = imread( workdir + "../cpp/pic3.png", IMREAD_GRAYSCALE ); + ASSERT_FALSE(src.empty()); + Mat canny_output; vector > contours; vector hierarchy; - Canny( src_gray, canny_output, 100, 200, 3 ); + Canny( src, canny_output, 100, 200, 3 ); findContours( canny_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) ); for( size_t i = 0; i < contours.size(); i++ ) { diff --git a/modules/ocl/test/test_hog.cpp b/modules/ocl/test/test_objdetect.cpp similarity index 51% rename from modules/ocl/test/test_hog.cpp rename to modules/ocl/test/test_objdetect.cpp index cfc4e3963f..86590f7981 100644 --- a/modules/ocl/test/test_hog.cpp +++ b/modules/ocl/test/test_objdetect.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Wenju He, wenju@multicorewareinc.com +// Yao Wang, bitwangyaoyao@gmail.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,51 +45,58 @@ #include "precomp.hpp" #include "opencv2/core/core.hpp" -using namespace std; +#include "opencv2/objdetect/objdetect.hpp" + +using namespace cv; +using namespace testing; #ifdef HAVE_OPENCL extern string workdir; -PARAM_TEST_CASE(HOG, cv::Size, int) + +///////////////////// HOG ///////////////////////////// +PARAM_TEST_CASE(HOG, Size, int) { - cv::Size winSize; + Size winSize; int type; + Mat img_rgb; virtual void SetUp() { winSize = GET_PARAM(0); type = GET_PARAM(1); + img_rgb = readImage(workdir + "../gpu/road.png"); + if(img_rgb.empty()) + { + std::cout << "Couldn't read road.png" << std::endl; + } } }; TEST_P(HOG, GetDescriptors) { - // Load image - cv::Mat img_rgb = readImage(workdir + "lena.jpg"); - ASSERT_FALSE(img_rgb.empty()); - // Convert image - cv::Mat img; + Mat img; switch (type) { case CV_8UC1: - cv::cvtColor(img_rgb, img, CV_BGR2GRAY); + cvtColor(img_rgb, img, CV_BGR2GRAY); break; case CV_8UC4: default: - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + cvtColor(img_rgb, img, CV_BGR2BGRA); break; } - cv::ocl::oclMat d_img(img); + ocl::oclMat d_img(img); // HOGs - cv::ocl::HOGDescriptor ocl_hog; + ocl::HOGDescriptor ocl_hog; ocl_hog.gamma_correction = true; - cv::HOGDescriptor hog; + HOGDescriptor hog; hog.gammaCorrection = true; // Compute descriptor - cv::ocl::oclMat d_descriptors; + ocl::oclMat d_descriptors; ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL); - cv::Mat down_descriptors; + Mat down_descriptors; d_descriptors.download(down_descriptors); down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows); @@ -105,45 +112,34 @@ TEST_P(HOG, GetDescriptors) hog.compute(img_rgb, descriptors, ocl_hog.win_size); break; } - cv::Mat cpu_descriptors(descriptors); + Mat cpu_descriptors(descriptors); EXPECT_MAT_SIMILAR(down_descriptors, cpu_descriptors, 1e-2); } - -bool match_rect(cv::Rect r1, cv::Rect r2, int threshold) -{ - return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) && - (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold)); -} - TEST_P(HOG, Detect) { - // Load image - cv::Mat img_rgb = readImage(workdir + "lena.jpg"); - ASSERT_FALSE(img_rgb.empty()); - // Convert image - cv::Mat img; + Mat img; switch (type) { case CV_8UC1: - cv::cvtColor(img_rgb, img, CV_BGR2GRAY); + cvtColor(img_rgb, img, CV_BGR2GRAY); break; case CV_8UC4: default: - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + cvtColor(img_rgb, img, CV_BGR2BGRA); break; } - cv::ocl::oclMat d_img(img); + ocl::oclMat d_img(img); // HOGs - if ((winSize != cv::Size(48, 96)) && (winSize != cv::Size(64, 128))) - winSize = cv::Size(64, 128); - cv::ocl::HOGDescriptor ocl_hog(winSize); + if ((winSize != Size(48, 96)) && (winSize != Size(64, 128))) + winSize = Size(64, 128); + ocl::HOGDescriptor ocl_hog(winSize); ocl_hog.gamma_correction = true; - cv::HOGDescriptor hog; + HOGDescriptor hog; hog.winSize = winSize; hog.gammaCorrection = true; @@ -165,88 +161,117 @@ TEST_P(HOG, Detect) } // OpenCL detection - std::vector d_found; - ocl_hog.detectMultiScale(d_img, d_found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); + std::vector d_found; + ocl_hog.detectMultiScale(d_img, d_found, 0, Size(8, 8), Size(0, 0), 1.05, 6); // CPU detection - std::vector found; + std::vector found; switch (type) { case CV_8UC1: - hog.detectMultiScale(img, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); + hog.detectMultiScale(img, found, 0, Size(8, 8), Size(0, 0), 1.05, 6); break; case CV_8UC4: default: - hog.detectMultiScale(img_rgb, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); + hog.detectMultiScale(img_rgb, found, 0, Size(8, 8), Size(0, 0), 1.05, 6); break; } - // Ground-truth rectangular people window - cv::Rect win1_64x128(231, 190, 72, 144); - cv::Rect win2_64x128(621, 156, 97, 194); - cv::Rect win1_48x96(238, 198, 63, 126); - cv::Rect win2_48x96(619, 161, 92, 185); - cv::Rect win3_48x96(488, 136, 56, 112); - - // Compare whether ground-truth windows are detected and compare the number of windows detected. - std::vector d_comp(4); - std::vector comp(4); - for(int i = 0; i < (int)d_comp.size(); i++) - { - d_comp[i] = 0; - comp[i] = 0; - } - - int threshold = 10; - int val = 32; - d_comp[0] = (int)d_found.size(); - comp[0] = (int)found.size(); - if (winSize == cv::Size(48, 96)) - { - for(int i = 0; i < (int)d_found.size(); i++) - { - if (match_rect(d_found[i], win1_48x96, threshold)) - d_comp[1] = val; - if (match_rect(d_found[i], win2_48x96, threshold)) - d_comp[2] = val; - if (match_rect(d_found[i], win3_48x96, threshold)) - d_comp[3] = val; - } - for(int i = 0; i < (int)found.size(); i++) - { - if (match_rect(found[i], win1_48x96, threshold)) - comp[1] = val; - if (match_rect(found[i], win2_48x96, threshold)) - comp[2] = val; - if (match_rect(found[i], win3_48x96, threshold)) - comp[3] = val; - } - } - else if (winSize == cv::Size(64, 128)) - { - for(int i = 0; i < (int)d_found.size(); i++) - { - if (match_rect(d_found[i], win1_64x128, threshold)) - d_comp[1] = val; - if (match_rect(d_found[i], win2_64x128, threshold)) - d_comp[2] = val; - } - for(int i = 0; i < (int)found.size(); i++) - { - if (match_rect(found[i], win1_64x128, threshold)) - comp[1] = val; - if (match_rect(found[i], win2_64x128, threshold)) - comp[2] = val; - } - } - - EXPECT_MAT_NEAR(cv::Mat(d_comp), cv::Mat(comp), 3); + EXPECT_LT(checkRectSimilarity(img.size(), found, d_found), 1.0); } INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine( - testing::Values(cv::Size(64, 128), cv::Size(48, 96)), + testing::Values(Size(64, 128), Size(48, 96)), testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)))); +///////////////////////////// Haar ////////////////////////////// +IMPLEMENT_PARAM_CLASS(CascadeName, std::string); +CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml")); +CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml")); +struct getRect +{ + Rect operator ()(const CvAvgComp &e) const + { + return e.rect; + } +}; -#endif //HAVE_OPENCL +PARAM_TEST_CASE(Haar, int, CascadeName) +{ + ocl::OclCascadeClassifier cascade, nestedCascade; + CascadeClassifier cpucascade, cpunestedCascade; + + int flags; + std::string cascadeName; + vector faces, oclfaces; + Mat img; + ocl::oclMat d_img; + + virtual void SetUp() + { + flags = GET_PARAM(0); + cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(1)); + if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) ) + { + std::cout << "ERROR: Could not load classifier cascade" << std::endl; + return; + } + img = readImage(workdir + "lena.jpg", IMREAD_GRAYSCALE); + if(img.empty()) + { + std::cout << "Couldn't read lena.jpg" << std::endl; + return ; + } + equalizeHist(img, img); + d_img.upload(img); + } +}; + +TEST_P(Haar, FaceDetect) +{ + MemStorage storage(cvCreateMemStorage(0)); + CvSeq *_objects; + _objects = cascade.oclHaarDetectObjects(d_img, storage, 1.1, 3, + flags, Size(30, 30), Size(0, 0)); + vector vecAvgComp; + Seq(_objects).copyTo(vecAvgComp); + oclfaces.resize(vecAvgComp.size()); + std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); + + cpucascade.detectMultiScale(img, faces, 1.1, 3, + flags, + Size(30, 30), Size(0, 0)); + + EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0); +} + +TEST_P(Haar, FaceDetectUseBuf) +{ + ocl::OclCascadeClassifierBuf cascadebuf; + if(!cascadebuf.load(cascadeName)) + { + std::cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << std::endl; + return; + } + cascadebuf.detectMultiScale(d_img, oclfaces, 1.1, 3, + flags, + Size(30, 30), Size(0, 0)); + cpucascade.detectMultiScale(img, faces, 1.1, 3, + flags, + Size(30, 30), Size(0, 0)); + + // intentionally run ocl facedetect again and check if it still works after the first run + cascadebuf.detectMultiScale(d_img, oclfaces, 1.1, 3, + flags, + Size(30, 30)); + cascadebuf.release(); + + EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0); +} + +INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, Haar, + Combine(Values(CV_HAAR_SCALE_IMAGE, 0), + Values(cascade_frontalface_alt/*, cascade_frontalface_alt2*/))); + +#endif //HAVE_OPENCL \ No newline at end of file diff --git a/modules/ocl/test/test_pyrlk.cpp b/modules/ocl/test/test_optflow.cpp similarity index 54% rename from modules/ocl/test/test_pyrlk.cpp rename to modules/ocl/test/test_optflow.cpp index 064cb30bd8..0121be8f9e 100644 --- a/modules/ocl/test/test_pyrlk.cpp +++ b/modules/ocl/test/test_optflow.cpp @@ -1,4 +1,4 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // @@ -7,12 +7,16 @@ // copy or use the software. // // -// Intel License Agreement +// License Agreement // For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // +// @Authors +// +// // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -21,9 +25,9 @@ // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. +// and/or other oclMaterials provided with the distribution. // -// * The name of Intel Corporation may not be used to endorse or promote products +// * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and @@ -52,6 +56,124 @@ using namespace std; extern string workdir; + +////////////////////////////////////////////////////// +// GoodFeaturesToTrack +namespace +{ + IMPLEMENT_PARAM_CLASS(MinDistance, double) +} +PARAM_TEST_CASE(GoodFeaturesToTrack, MinDistance) +{ + double minDistance; + + virtual void SetUp() + { + minDistance = GET_PARAM(0); + } +}; + +TEST_P(GoodFeaturesToTrack, Accuracy) +{ + cv::Mat frame = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame.empty()); + + int maxCorners = 1000; + double qualityLevel = 0.01; + + cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance); + + cv::ocl::oclMat d_pts; + detector(oclMat(frame), d_pts); + + ASSERT_FALSE(d_pts.empty()); + + std::vector pts(d_pts.cols); + + detector.downloadPoints(d_pts, pts); + + std::vector pts_gold; + cv::goodFeaturesToTrack(frame, pts_gold, maxCorners, qualityLevel, minDistance); + + ASSERT_EQ(pts_gold.size(), pts.size()); + + size_t mistmatch = 0; + for (size_t i = 0; i < pts.size(); ++i) + { + cv::Point2i a = pts_gold[i]; + cv::Point2i b = pts[i]; + + bool eq = std::abs(a.x - b.x) < 1 && std::abs(a.y - b.y) < 1; + + if (!eq) + ++mistmatch; + } + + double bad_ratio = static_cast(mistmatch) / pts.size(); + + ASSERT_LE(bad_ratio, 0.01); +} + +TEST_P(GoodFeaturesToTrack, EmptyCorners) +{ + int maxCorners = 1000; + double qualityLevel = 0.01; + + cv::ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance); + + cv::ocl::oclMat src(100, 100, CV_8UC1, cv::Scalar::all(0)); + cv::ocl::oclMat corners(1, maxCorners, CV_32FC2); + + detector(src, corners); + + ASSERT_TRUE(corners.empty()); +} + +INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack, + testing::Values(MinDistance(0.0), MinDistance(3.0))); + +////////////////////////////////////////////////////////////////////////// +PARAM_TEST_CASE(TVL1, bool) +{ + bool useRoi; + + virtual void SetUp() + { + useRoi = GET_PARAM(0); + } + +}; + +TEST_P(TVL1, Accuracy) +{ + cv::Mat frame0 = readImage(workdir + "../gpu/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + cv::Mat frame1 = readImage(workdir + "../gpu/rubberwhale2.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + cv::ocl::OpticalFlowDual_TVL1_OCL d_alg; + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Mat flowx = randomMat(rng, frame0.size(), CV_32FC1, 0, 0, useRoi); + cv::Mat flowy = randomMat(rng, frame0.size(), CV_32FC1, 0, 0, useRoi); + cv::ocl::oclMat d_flowx(flowx), d_flowy(flowy); + d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy); + + cv::Ptr alg = cv::createOptFlow_DualTVL1(); + cv::Mat flow; + alg->calc(frame0, frame1, flow); + cv::Mat gold[2]; + cv::split(flow, gold); + + EXPECT_MAT_SIMILAR(gold[0], d_flowx, 3e-3); + EXPECT_MAT_SIMILAR(gold[1], d_flowy, 3e-3); +} +INSTANTIATE_TEST_CASE_P(OCL_Video, TVL1, Values(true, false)); + + +///////////////////////////////////////////////////////////////////////////////////////////////// +// PyrLKOpticalFlow + PARAM_TEST_CASE(Sparse, bool, bool) { bool useGray; @@ -60,7 +182,7 @@ PARAM_TEST_CASE(Sparse, bool, bool) virtual void SetUp() { UseSmart = GET_PARAM(0); - useGray = GET_PARAM(0); + useGray = GET_PARAM(1); } }; @@ -147,9 +269,9 @@ TEST_P(Sparse, Mat) } -INSTANTIATE_TEST_CASE_P(Video, Sparse, Combine( - Values(false, true), - Values(false))); +INSTANTIATE_TEST_CASE_P(OCL_Video, Sparse, Combine( + Values(false, true), + Values(false, true))); #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_pyrdown.cpp b/modules/ocl/test/test_pyramids.cpp similarity index 75% rename from modules/ocl/test/test_pyrdown.cpp rename to modules/ocl/test/test_pyramids.cpp index 6d00fb5e45..1bd188dea6 100644 --- a/modules/ocl/test/test_pyrdown.cpp +++ b/modules/ocl/test/test_pyramids.cpp @@ -15,7 +15,6 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Dachuan Zhao, dachuan@multicorewareinc.com // Yao Wang yao@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, @@ -56,11 +55,12 @@ using namespace cvtest; using namespace testing; using namespace std; -PARAM_TEST_CASE(PyrDown, MatType, int) +PARAM_TEST_CASE(PyrBase, MatType, int) { int type; int channels; - + Mat dst_cpu; + oclMat gdst; virtual void SetUp() { type = GET_PARAM(0); @@ -69,19 +69,19 @@ PARAM_TEST_CASE(PyrDown, MatType, int) }; +/////////////////////// PyrDown ////////////////////////// +struct PyrDown : PyrBase {}; TEST_P(PyrDown, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { - cv::Size size(MWIDTH, MHEIGHT); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Mat src = randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false); - - cv::ocl::oclMat gsrc(src), gdst; - cv::Mat dst_cpu; - cv::pyrDown(src, dst_cpu); - cv::ocl::pyrDown(gsrc, gdst); + Size size(MWIDTH, MHEIGHT); + Mat src = randomMat(size, CV_MAKETYPE(type, channels)); + oclMat gsrc(src); + + pyrDown(src, dst_cpu); + pyrDown(gsrc, gdst); EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), type == CV_32F ? 1e-4f : 1.0f); } @@ -90,5 +90,27 @@ TEST_P(PyrDown, Mat) INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine( Values(CV_8U, CV_32F), Values(1, 3, 4))); +/////////////////////// PyrUp ////////////////////////// +struct PyrUp : PyrBase {}; + +TEST_P(PyrUp, Accuracy) +{ + for(int j = 0; j < LOOP_TIMES; j++) + { + Size size(MWIDTH, MHEIGHT); + Mat src = randomMat(size, CV_MAKETYPE(type, channels)); + oclMat gsrc(src); + + pyrUp(src, dst_cpu); + pyrUp(gsrc, gdst); + + EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (type == CV_32F ? 1e-4f : 1.0)); + } + +} + + +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine( + Values(CV_8U, CV_32F), Values(1, 3, 4))); #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_pyrup.cpp b/modules/ocl/test/test_pyrup.cpp deleted file mode 100644 index afd3e8b1b8..0000000000 --- a/modules/ocl/test/test_pyrup.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Zhang Chunpeng chunpeng@multicorewareinc.com -// Yao Wang yao@multicorewareinc.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include "opencv2/core/core.hpp" - -#ifdef HAVE_OPENCL - -using namespace cv; -using namespace cvtest; -using namespace testing; -using namespace std; - -PARAM_TEST_CASE(PyrUp, MatType, int) -{ - int type; - int channels; - - virtual void SetUp() - { - type = GET_PARAM(0); - channels = GET_PARAM(1); - } -}; - -TEST_P(PyrUp, Accuracy) -{ - for(int j = 0; j < LOOP_TIMES; j++) - { - Size size(MWIDTH, MHEIGHT); - Mat src = randomMat(size, CV_MAKETYPE(type, channels)); - Mat dst_gold; - pyrUp(src, dst_gold); - ocl::oclMat dst; - ocl::oclMat srcMat(src); - ocl::pyrUp(srcMat, dst); - - EXPECT_MAT_NEAR(dst_gold, Mat(dst), (type == CV_32F ? 1e-4f : 1.0)); - } - -} - - -INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine( - Values(CV_8U, CV_32F), Values(1, 3, 4))); - - -#endif // HAVE_OPENCL \ No newline at end of file diff --git a/modules/ocl/test/utility.cpp b/modules/ocl/test/utility.cpp index 4b21081a8b..27f9cec079 100644 --- a/modules/ocl/test/utility.cpp +++ b/modules/ocl/test/utility.cpp @@ -100,12 +100,6 @@ Mat randomMat(Size size, int type, double minVal, double maxVal) return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false); } - - - - - - /* void showDiff(InputArray gold_, InputArray actual_, double eps) { @@ -137,58 +131,7 @@ void showDiff(InputArray gold_, InputArray actual_, double eps) } */ -/* -bool supportFeature(const DeviceInfo& info, FeatureSet feature) -{ - return TargetArchs::builtWith(feature) && info.supports(feature); -} -const vector& devices() -{ - static vector devs; - static bool first = true; - - if (first) - { - int deviceCount = getCudaEnabledDeviceCount(); - - devs.reserve(deviceCount); - - for (int i = 0; i < deviceCount; ++i) - { - DeviceInfo info(i); - if (info.isCompatible()) - devs.push_back(info); - } - - first = false; - } - - return devs; -} - -vector devices(FeatureSet feature) -{ - const vector& d = devices(); - - vector devs_filtered; - - if (TargetArchs::builtWith(feature)) - { - devs_filtered.reserve(d.size()); - - for (size_t i = 0, size = d.size(); i < size; ++i) - { - const DeviceInfo& info = d[i]; - - if (info.supports(feature)) - devs_filtered.push_back(info); - } - } - - return devs_filtered; -} -*/ vector types(int depth_start, int depth_end, int cn_start, int cn_end) { @@ -264,3 +207,48 @@ void PrintTo(const Inverse &inverse, std::ostream *os) (*os) << "direct"; } +double checkRectSimilarity(Size sz, std::vector& ob1, std::vector& ob2) +{ + double final_test_result = 0.0; + size_t sz1 = ob1.size(); + size_t sz2 = ob2.size(); + + if(sz1 != sz2) + { + return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1); + } + else + { + if(sz1==0 && sz2==0) + return 0; + cv::Mat cpu_result(sz, CV_8UC1); + cpu_result.setTo(0); + + for(vector::const_iterator r = ob1.begin(); r != ob1.end(); r++) + { + cv::Mat cpu_result_roi(cpu_result, *r); + cpu_result_roi.setTo(1); + cpu_result.copyTo(cpu_result); + } + int cpu_area = cv::countNonZero(cpu_result > 0); + + cv::Mat gpu_result(sz, CV_8UC1); + gpu_result.setTo(0); + for(vector::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++) + { + cv::Mat gpu_result_roi(gpu_result, *r2); + gpu_result_roi.setTo(1); + gpu_result.copyTo(gpu_result); + } + + cv::Mat result_; + multiply(cpu_result, gpu_result, result_); + int result = cv::countNonZero(result_ > 0); + if(cpu_area!=0 && result!=0) + final_test_result = 1.0 - (double)result/(double)cpu_area; + else if(cpu_area==0 && result!=0) + final_test_result = -1; + } + return final_test_result; +} + diff --git a/modules/ocl/test/utility.hpp b/modules/ocl/test/utility.hpp index 42fa69384d..0b101ec50b 100644 --- a/modules/ocl/test/utility.hpp +++ b/modules/ocl/test/utility.hpp @@ -55,13 +55,12 @@ cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = void showDiff(cv::InputArray gold, cv::InputArray actual, double eps); -//! return true if device supports specified feature and gpu module was built with support the feature. -//bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature); +// This function test if gpu_rst matches cpu_rst. +// If the two vectors are not equal, it will return the difference in vector size +// Else it will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) +// The smaller, the better matched +double checkRectSimilarity(cv::Size sz, std::vector& ob1, std::vector& ob2); -//! return all devices compatible with current gpu module build. -//const std::vector& devices(); -//! return all devices compatible with current gpu module build which support specified feature. -//std::vector devices(cv::gpu::FeatureSet feature); //! read image from testdata folder. cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR); diff --git a/modules/photo/src/denoising.cpp b/modules/photo/src/denoising.cpp index 02d7a6f620..191926ccb7 100644 --- a/modules/photo/src/denoising.cpp +++ b/modules/photo/src/denoising.cpp @@ -59,17 +59,17 @@ void cv::fastNlMeansDenoising( InputArray _src, OutputArray _dst, float h, switch (src.type()) { case CV_8U: - parallel_for(cv::BlockedRange(0, src.rows), + parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: - parallel_for(cv::BlockedRange(0, src.rows), + parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: - parallel_for(cv::BlockedRange(0, src.rows), + parallel_for_(cv::Range(0, src.rows), FastNlMeansDenoisingInvoker( src, dst, templateWindowSize, searchWindowSize, h)); break; @@ -159,19 +159,19 @@ void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _ds switch (srcImgs[0].type()) { case CV_8U: - parallel_for(cv::BlockedRange(0, srcImgs[0].rows), + parallel_for_(cv::Range(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: - parallel_for(cv::BlockedRange(0, srcImgs[0].rows), + parallel_for_(cv::Range(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: - parallel_for(cv::BlockedRange(0, srcImgs[0].rows), + parallel_for_(cv::Range(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); diff --git a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp index c4f13826d2..8824f17c0d 100644 --- a/modules/photo/src/fast_nlmeans_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_denoising_invoker.hpp @@ -55,12 +55,12 @@ using namespace std; using namespace cv; template -struct FastNlMeansDenoisingInvoker { +struct FastNlMeansDenoisingInvoker : ParallelLoopBody { public: FastNlMeansDenoisingInvoker(const Mat& src, Mat& dst, int template_window_size, int search_window_size, const float h); - void operator() (const BlockedRange& range) const; + void operator() (const Range& range) const; private: void operator= (const FastNlMeansDenoisingInvoker&); @@ -156,9 +156,9 @@ FastNlMeansDenoisingInvoker::FastNlMeansDenoisingInvoker( } template -void FastNlMeansDenoisingInvoker::operator() (const BlockedRange& range) const { - int row_from = range.begin(); - int row_to = range.end() - 1; +void FastNlMeansDenoisingInvoker::operator() (const Range& range) const { + int row_from = range.start; + int row_to = range.end - 1; Array2d dist_sums(search_window_size_, search_window_size_); diff --git a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp index 2ae5054e00..8b32eded18 100644 --- a/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp +++ b/modules/photo/src/fast_nlmeans_multi_denoising_invoker.hpp @@ -55,13 +55,13 @@ using namespace std; using namespace cv; template -struct FastNlMeansMultiDenoisingInvoker { +struct FastNlMeansMultiDenoisingInvoker : ParallelLoopBody { public: FastNlMeansMultiDenoisingInvoker( const std::vector& srcImgs, int imgToDenoiseIndex, int temporalWindowSize, Mat& dst, int template_window_size, int search_window_size, const float h); - void operator() (const BlockedRange& range) const; + void operator() (const Range& range) const; private: void operator= (const FastNlMeansMultiDenoisingInvoker&); @@ -175,9 +175,9 @@ FastNlMeansMultiDenoisingInvoker::FastNlMeansMultiDenoisingInvoker( } template -void FastNlMeansMultiDenoisingInvoker::operator() (const BlockedRange& range) const { - int row_from = range.begin(); - int row_to = range.end() - 1; +void FastNlMeansMultiDenoisingInvoker::operator() (const Range& range) const { + int row_from = range.start; + int row_to = range.end - 1; Array3d dist_sums(temporal_window_size_, search_window_size_, search_window_size_); diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp index 9bab58c52f..d918cfff29 100644 --- a/modules/stitching/src/matchers.cpp +++ b/modules/stitching/src/matchers.cpp @@ -66,21 +66,17 @@ struct DistIdxPair }; -struct MatchPairsBody +struct MatchPairsBody : ParallelLoopBody { - MatchPairsBody(const MatchPairsBody& other) - : matcher(other.matcher), features(other.features), - pairwise_matches(other.pairwise_matches), near_pairs(other.near_pairs) {} - MatchPairsBody(FeaturesMatcher &_matcher, const vector &_features, vector &_pairwise_matches, vector > &_near_pairs) : matcher(_matcher), features(_features), pairwise_matches(_pairwise_matches), near_pairs(_near_pairs) {} - void operator ()(const BlockedRange &r) const + void operator ()(const Range &r) const { const int num_images = static_cast(features.size()); - for (int i = r.begin(); i < r.end(); ++i) + for (int i = r.start; i < r.end; ++i) { int from = near_pairs[i].first; int to = near_pairs[i].second; @@ -526,9 +522,9 @@ void FeaturesMatcher::operator ()(const vector &features, vector< MatchPairsBody body(*this, features, pairwise_matches, near_pairs); if (is_thread_safe_) - parallel_for(BlockedRange(0, static_cast(near_pairs.size())), body); + parallel_for_(Range(0, static_cast(near_pairs.size())), body); else - body(BlockedRange(0, static_cast(near_pairs.size()))); + body(Range(0, static_cast(near_pairs.size()))); LOGLN_CHAT(""); } diff --git a/modules/stitching/src/motion_estimators.cpp b/modules/stitching/src/motion_estimators.cpp index ab27a46a2a..c873bc721a 100644 --- a/modules/stitching/src/motion_estimators.cpp +++ b/modules/stitching/src/motion_estimators.cpp @@ -69,13 +69,13 @@ struct CalcRotation K_from(0,0) = cameras[edge.from].focal; K_from(1,1) = cameras[edge.from].focal * cameras[edge.from].aspect; K_from(0,2) = cameras[edge.from].ppx; - K_from(0,2) = cameras[edge.from].ppy; + K_from(1,2) = cameras[edge.from].ppy; Mat_ K_to = Mat::eye(3, 3, CV_64F); K_to(0,0) = cameras[edge.to].focal; K_to(1,1) = cameras[edge.to].focal * cameras[edge.to].aspect; K_to(0,2) = cameras[edge.to].ppx; - K_to(0,2) = cameras[edge.to].ppy; + K_to(1,2) = cameras[edge.to].ppy; Mat R = K_from.inv() * pairwise_matches[pair_idx].H.inv() * K_to; cameras[edge.to].R = cameras[edge.from].R * R; diff --git a/modules/superres/perf/perf_main.cpp b/modules/superres/perf/perf_main.cpp index adc69e6e8b..0a8ab5deaa 100644 --- a/modules/superres/perf/perf_main.cpp +++ b/modules/superres/perf/perf_main.cpp @@ -44,4 +44,11 @@ using namespace perf; -CV_PERF_TEST_MAIN(superres, printCudaInfo()) +static const char * impls[] = { +#ifdef HAVE_CUDA + "cuda", +#endif + "plain" +}; + +CV_PERF_TEST_MAIN_WITH_IMPLS(superres, impls, printCudaInfo()) diff --git a/modules/ts/include/opencv2/ts/ts_gtest.h b/modules/ts/include/opencv2/ts/ts_gtest.h index 2d1227ecdc..80b410bb3c 100644 --- a/modules/ts/include/opencv2/ts/ts_gtest.h +++ b/modules/ts/include/opencv2/ts/ts_gtest.h @@ -17566,6 +17566,9 @@ GTEST_DECLARE_string_(color); // the tests to run. If the filter is not given all tests are executed. GTEST_DECLARE_string_(filter); +// OpenCV extension: same as filter, but for the parameters string. +GTEST_DECLARE_string_(param_filter); + // This flag causes the Google Test to list tests. None of the tests listed // are actually run if the flag is provided. GTEST_DECLARE_bool_(list_tests); diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp index fe57655157..1e68cd49b0 100644 --- a/modules/ts/include/opencv2/ts/ts_perf.hpp +++ b/modules/ts/include/opencv2/ts/ts_perf.hpp @@ -210,18 +210,13 @@ private: #define SANITY_CHECK_KEYPOINTS(array, ...) ::perf::Regression::addKeypoints(this, #array, array , ## __VA_ARGS__) #define SANITY_CHECK_MATCHES(array, ...) ::perf::Regression::addMatches(this, #array, array , ## __VA_ARGS__) -#ifdef HAVE_CUDA class CV_EXPORTS GpuPerf { public: static bool targetDevice(); }; -# define PERF_RUN_GPU() ::perf::GpuPerf::targetDevice() -#else -# define PERF_RUN_GPU() false -#endif - +#define PERF_RUN_GPU() ::perf::GpuPerf::targetDevice() /*****************************************************************************************\ * Container for performance metrics * @@ -263,7 +258,11 @@ public: TestBase(); static void Init(int argc, const char* const argv[]); + static void Init(const std::vector & availableImpls, + int argc, const char* const argv[]); + static void RecordRunParameters(); static std::string getDataPath(const std::string& relativePath); + static std::string getSelectedImpl(); protected: virtual void PerfTestBody() = 0; @@ -477,15 +476,29 @@ CV_EXPORTS void PrintTo(const Size& sz, ::std::ostream* os); void fixture##_##name::PerfTestBody() -#define CV_PERF_TEST_MAIN(testsuitname, ...) \ -int main(int argc, char **argv)\ -{\ +#define CV_PERF_TEST_MAIN_INTERNALS(modulename, impls, ...) \ while (++argc >= (--argc,-1)) {__VA_ARGS__; break;} /*this ugly construction is needed for VS 2005*/\ - ::perf::Regression::Init(#testsuitname);\ - ::perf::TestBase::Init(argc, argv);\ + ::perf::Regression::Init(#modulename);\ + ::perf::TestBase::Init(std::vector(impls, impls + sizeof impls / sizeof *impls),\ + argc, argv);\ ::testing::InitGoogleTest(&argc, argv);\ cvtest::printVersionInfo();\ - return RUN_ALL_TESTS();\ + ::testing::Test::RecordProperty("cv_module_name", #modulename);\ + ::perf::TestBase::RecordRunParameters();\ + return RUN_ALL_TESTS(); + +// impls must be an array, not a pointer; "plain" should always be one of the implementations +#define CV_PERF_TEST_MAIN_WITH_IMPLS(modulename, impls, ...) \ +int main(int argc, char **argv)\ +{\ + CV_PERF_TEST_MAIN_INTERNALS(modulename, impls, __VA_ARGS__)\ +} + +#define CV_PERF_TEST_MAIN(modulename, ...) \ +int main(int argc, char **argv)\ +{\ + const char * plain_only[] = { "plain" };\ + CV_PERF_TEST_MAIN_INTERNALS(modulename, plain_only, __VA_ARGS__)\ } #define TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); stopTimer()) diff --git a/modules/ts/misc/run.py b/modules/ts/misc/run.py index 4351713715..a64127f0d4 100755 --- a/modules/ts/misc/run.py +++ b/modules/ts/misc/run.py @@ -288,6 +288,16 @@ class TestSuite(object): if self.adb: # construct name for aapt tool self.aapt = [os.path.join(os.path.dirname(self.adb[0]), ("aapt","aapt.exe")[hostos == 'nt'])] + if not os.path.isfile(self.aapt[0]): + # it's moved in SDK r22 + sdk_dir = os.path.dirname( os.path.dirname(self.adb[0]) ) + aapt_fn = ("aapt", "aapt.exe")[hostos == 'nt'] + for r, ds, fs in os.walk( os.path.join(sdk_dir, 'build-tools') ): + if aapt_fn in fs: + self.aapt = [ os.path.join(r, aapt_fn) ] + break + else: + self.error = "Can't find '%s' tool!" % aapt_fn # fix has_perf_tests param self.has_perf_tests = self.has_perf_tests == "ON" diff --git a/modules/ts/misc/testlog_parser.py b/modules/ts/misc/testlog_parser.py index 7ae6aa5980..5d478645b2 100755 --- a/modules/ts/misc/testlog_parser.py +++ b/modules/ts/misc/testlog_parser.py @@ -1,6 +1,9 @@ #!/usr/bin/env python -import sys, re, os.path +import collections +import re +import os.path +import sys from xml.dom.minidom import parse class TestInfo(object): @@ -100,34 +103,39 @@ class TestInfo(object): def dump(self, units="ms"): print "%s ->\t\033[1;31m%s\033[0m = \t%.2f%s" % (str(self), self.status, self.get("gmean", units), units) - def shortName(self): + + def getName(self): pos = self.name.find("/") if pos > 0: - name = self.name[:pos] - else: - name = self.name - if self.fixture.endswith(name): - fixture = self.fixture[:-len(name)] + return self.name[:pos] + return self.name + + + def getFixture(self): + if self.fixture.endswith(self.getName()): + fixture = self.fixture[:-len(self.getName())] else: fixture = self.fixture if fixture.endswith("_"): fixture = fixture[:-1] + return fixture + + + def param(self): + return '::'.join(filter(None, [self.type_param, self.value_param])) + + def shortName(self): + name = self.getName() + fixture = self.getFixture() return '::'.join(filter(None, [name, fixture])) + def __str__(self): - pos = self.name.find("/") - if pos > 0: - name = self.name[:pos] - else: - name = self.name - if self.fixture.endswith(name): - fixture = self.fixture[:-len(name)] - else: - fixture = self.fixture - if fixture.endswith("_"): - fixture = fixture[:-1] + name = self.getName() + fixture = self.getFixture() return '::'.join(filter(None, [name, fixture, self.type_param, self.value_param])) + def __cmp__(self, other): r = cmp(self.fixture, other.fixture); if r != 0: @@ -154,12 +162,31 @@ class TestInfo(object): return 1 return 0 +# This is a Sequence for compatibility with old scripts, +# which treat parseLogFile's return value as a list. +class TestRunInfo(collections.Sequence): + def __init__(self, properties, tests): + self.properties = properties + self.tests = tests + + def __len__(self): + return len(self.tests) + + def __getitem__(self, key): + return self.tests[key] + def parseLogFile(filename): - tests = [] log = parse(filename) - for case in log.getElementsByTagName("testcase"): - tests.append(TestInfo(case)) - return tests + + properties = { + attr_name[3:]: attr_value + for (attr_name, attr_value) in log.documentElement.attributes.items() + if attr_name.startswith('cv_') + } + + tests = map(TestInfo, log.getElementsByTagName("testcase")) + + return TestRunInfo(properties, tests) if __name__ == "__main__": @@ -168,8 +195,18 @@ if __name__ == "__main__": exit(0) for arg in sys.argv[1:]: - print "Tests found in", arg - tests = parseLogFile(arg) - for t in sorted(tests): + print "Processing {}...".format(arg) + + run = parseLogFile(arg) + + print "Properties:" + + for (prop_name, prop_value) in run.properties.items(): + print "\t{} = {}".format(prop_name, prop_value) + + print "Tests:" + + for t in sorted(run.tests): t.dump() + print diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py new file mode 100755 index 0000000000..e911314e92 --- /dev/null +++ b/modules/ts/misc/xls-report.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python + +""" + This script can generate XLS reports from OpenCV tests' XML output files. + + To use it, first, create a directory for each machine you ran tests on. + Each such directory will become a sheet in the report. Put each XML file + into the corresponding directory. + + Then, create your configuration file(s). You can have a global configuration + file (specified with the -c option), and per-sheet configuration files, which + must be called sheet.conf and placed in the directory corresponding to the sheet. + The settings in the per-sheet configuration file will override those in the + global configuration file, if both are present. + + A configuration file must consist of a Python dictionary. The following keys + will be recognized: + + * 'comparisons': [{'from': string, 'to': string}] + List of configurations to compare performance between. For each item, + the sheet will have a column showing speedup from configuration named + 'from' to configuration named "to". + + * 'configuration_matchers': [{'properties': {string: object}, 'name': string}] + Instructions for matching test run property sets to configuration names. + + For each found XML file: + + 1) All attributes of the root element starting with the prefix 'cv_' are + placed in a dictionary, with the cv_ prefix stripped and the cv_module_name + element deleted. + + 2) The first matcher for which the XML's file property set contains the same + keys with equal values as its 'properties' dictionary is searched for. + A missing property can be matched by using None as the value. + + Corollary 1: you should place more specific matchers before less specific + ones. + + Corollary 2: an empty 'properties' dictionary matches every property set. + + 3) If a matching matcher is found, its 'name' string is presumed to be the name + of the configuration the XML file corresponds to. Otherwise, a warning is + printed. A warning is also printed if two different property sets match to the + same configuration name. + + * 'configurations': [string] + List of names for compile-time and runtime configurations of OpenCV. + Each item will correspond to a column of the sheet. + + * 'module_colors': {string: string} + Mapping from module name to color name. In the sheet, cells containing module + names from this mapping will be colored with the corresponding color. You can + find the list of available colors here: + . + + * 'sheet_name': string + Name for the sheet. If this parameter is missing, the name of sheet's directory + will be used. + + Note that all keys are optional, although to get useful results, you'll want to + specify at least 'configurations' and 'configuration_matchers'. + + Finally, run the script. Use the --help option for usage information. +""" + +from __future__ import division + +import ast +import fnmatch +import logging +import numbers +import os, os.path +import re + +from argparse import ArgumentParser +from collections import OrderedDict +from glob import glob +from itertools import ifilter + +import xlwt + +from testlog_parser import parseLogFile + +re_image_size = re.compile(r'^ \d+ x \d+$', re.VERBOSE) +re_data_type = re.compile(r'^ (?: 8 | 16 | 32 | 64 ) [USF] C [1234] $', re.VERBOSE) + +time_style = xlwt.easyxf(num_format_str='#0.00') +no_time_style = xlwt.easyxf('pattern: pattern solid, fore_color gray25') + +speedup_style = time_style +good_speedup_style = xlwt.easyxf('font: color green', num_format_str='#0.00') +bad_speedup_style = xlwt.easyxf('font: color red', num_format_str='#0.00') +no_speedup_style = no_time_style +error_speedup_style = xlwt.easyxf('pattern: pattern solid, fore_color orange') +header_style = xlwt.easyxf('font: bold true; alignment: horizontal centre, vertical top, wrap True') + +class Collector(object): + def __init__(self, config_match_func): + self.__config_cache = {} + self.config_match_func = config_match_func + self.tests = {} + + # Format a sorted sequence of pairs as if it was a dictionary. + # We can't just use a dictionary instead, since we want to preserve the sorted order of the keys. + @staticmethod + def __format_config_cache_key(pairs): + return '{' + ', '.join(repr(k) + ': ' + repr(v) for (k, v) in pairs) + '}' + + def collect_from(self, xml_path): + run = parseLogFile(xml_path) + + module = run.properties['module_name'] + + properties = run.properties.copy() + del properties['module_name'] + + props_key = tuple(sorted(properties.iteritems())) # dicts can't be keys + + if props_key in self.__config_cache: + configuration = self.__config_cache[props_key] + else: + configuration = self.config_match_func(properties) + + if configuration is None: + logging.warning('failed to match properties to a configuration: %s', + Collector.__format_config_cache_key(props_key)) + else: + same_config_props = [it[0] for it in self.__config_cache.iteritems() if it[1] == configuration] + if len(same_config_props) > 0: + logging.warning('property set %s matches the same configuration %r as property set %s', + Collector.__format_config_cache_key(props_key), + configuration, + Collector.__format_config_cache_key(same_config_props[0])) + + self.__config_cache[props_key] = configuration + + if configuration is None: return + + module_tests = self.tests.setdefault(module, OrderedDict()) + + for test in run.tests: + test_results = module_tests.setdefault((test.shortName(), test.param()), {}) + test_results[configuration] = test.get("gmean") if test.status == 'run' else test.status + +def make_match_func(matchers): + def match_func(properties): + for matcher in matchers: + if all(properties.get(name) == value + for (name, value) in matcher['properties'].iteritems()): + return matcher['name'] + + return None + + return match_func + +def main(): + arg_parser = ArgumentParser(description='Build an XLS performance report.') + arg_parser.add_argument('sheet_dirs', nargs='+', metavar='DIR', help='directory containing perf test logs') + arg_parser.add_argument('-o', '--output', metavar='XLS', default='report.xls', help='name of output file') + arg_parser.add_argument('-c', '--config', metavar='CONF', help='global configuration file') + + args = arg_parser.parse_args() + + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) + + if args.config is not None: + with open(args.config) as global_conf_file: + global_conf = ast.literal_eval(global_conf_file.read()) + else: + global_conf = {} + + wb = xlwt.Workbook() + + for sheet_path in args.sheet_dirs: + try: + with open(os.path.join(sheet_path, 'sheet.conf')) as sheet_conf_file: + sheet_conf = ast.literal_eval(sheet_conf_file.read()) + except Exception: + sheet_conf = {} + logging.debug('no sheet.conf for %s', sheet_path) + + sheet_conf = dict(global_conf.items() + sheet_conf.items()) + + config_names = sheet_conf.get('configurations', []) + config_matchers = sheet_conf.get('configuration_matchers', []) + + collector = Collector(make_match_func(config_matchers)) + + for root, _, filenames in os.walk(sheet_path): + logging.info('looking in %s', root) + for filename in fnmatch.filter(filenames, '*.xml'): + collector.collect_from(os.path.join(root, filename)) + + sheet = wb.add_sheet(sheet_conf.get('sheet_name', os.path.basename(os.path.abspath(sheet_path)))) + + sheet.row(0).height = 800 + sheet.panes_frozen = True + sheet.remove_splits = True + sheet.horz_split_pos = 1 + sheet.horz_split_first_visible = 1 + + sheet_comparisons = sheet_conf.get('comparisons', []) + + for i, w in enumerate([2000, 15000, 2500, 2000, 15000] + + (len(config_names) + 1 + len(sheet_comparisons)) * [3000]): + sheet.col(i).width = w + + for i, caption in enumerate(['Module', 'Test', 'Image\nsize', 'Data\ntype', 'Parameters'] + + config_names + [None] + + [comp['to'] + '\nvs\n' + comp['from'] for comp in sheet_comparisons]): + sheet.row(0).write(i, caption, header_style) + + row = 1 + + module_colors = sheet_conf.get('module_colors', {}) + module_styles = {module: xlwt.easyxf('pattern: pattern solid, fore_color {}'.format(color)) + for module, color in module_colors.iteritems()} + + for module, tests in sorted(collector.tests.iteritems()): + for ((test, param), configs) in tests.iteritems(): + sheet.write(row, 0, module, module_styles.get(module, xlwt.Style.default_style)) + sheet.write(row, 1, test) + + param_list = param[1:-1].split(", ") + sheet.write(row, 2, next(ifilter(re_image_size.match, param_list), None)) + sheet.write(row, 3, next(ifilter(re_data_type.match, param_list), None)) + + sheet.row(row).write(4, param) + for i, c in enumerate(config_names): + if c in configs: + sheet.write(row, 5 + i, configs[c], time_style) + else: + sheet.write(row, 5 + i, None, no_time_style) + + for i, comp in enumerate(sheet_comparisons): + cmp_from = configs.get(comp["from"]) + cmp_to = configs.get(comp["to"]) + col = 5 + len(config_names) + 1 + i + + if isinstance(cmp_from, numbers.Number) and isinstance(cmp_to, numbers.Number): + try: + speedup = cmp_from / cmp_to + sheet.write(row, col, speedup, good_speedup_style if speedup > 1.1 else + bad_speedup_style if speedup < 0.9 else + speedup_style) + except ArithmeticError as e: + sheet.write(row, col, None, error_speedup_style) + else: + sheet.write(row, col, None, no_speedup_style) + + row += 1 + if row % 1000 == 0: sheet.flush_row_data() + + wb.save(args.output) + +if __name__ == '__main__': + main() diff --git a/modules/ts/src/precomp.hpp b/modules/ts/src/precomp.hpp index 10acd7ad8f..a74417da47 100644 --- a/modules/ts/src/precomp.hpp +++ b/modules/ts/src/precomp.hpp @@ -1,4 +1,5 @@ #include "opencv2/core/core_c.h" +#include "opencv2/core/internal.hpp" #include "opencv2/ts/ts.hpp" #ifdef GTEST_LINKED_AS_SHARED_LIBRARY diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp index 1d636e6746..38a23706dd 100644 --- a/modules/ts/src/ts_func.cpp +++ b/modules/ts/src/ts_func.cpp @@ -2,6 +2,10 @@ #include #include +#ifdef HAVE_TEGRA_OPTIMIZATION +#include "tegra.hpp" +#endif + using namespace cv; namespace cvtest @@ -2936,28 +2940,75 @@ MatComparator::operator()(const char* expr1, const char* expr2, void printVersionInfo(bool useStdOut) { - ::testing::Test::RecordProperty("CV_VERSION", CV_VERSION); + ::testing::Test::RecordProperty("cv_version", CV_VERSION); if(useStdOut) std::cout << "OpenCV version: " << CV_VERSION << std::endl; std::string buildInfo( cv::getBuildInformation() ); size_t pos1 = buildInfo.find("Version control"); - size_t pos2 = buildInfo.find("\n", pos1);\ + size_t pos2 = buildInfo.find('\n', pos1); if(pos1 != std::string::npos && pos2 != std::string::npos) { - std::string ver( buildInfo.substr(pos1, pos2-pos1) ); - ::testing::Test::RecordProperty("Version_control", ver); - if(useStdOut) std::cout << ver << std::endl; + size_t value_start = buildInfo.rfind(' ', pos2) + 1; + std::string ver( buildInfo.substr(value_start, pos2 - value_start) ); + ::testing::Test::RecordProperty("cv_vcs_version", ver); + if (useStdOut) std::cout << "OpenCV VCS version: " << ver << std::endl; } pos1 = buildInfo.find("inner version"); - pos2 = buildInfo.find("\n", pos1);\ + pos2 = buildInfo.find('\n', pos1); if(pos1 != std::string::npos && pos2 != std::string::npos) { - std::string ver( buildInfo.substr(pos1, pos2-pos1) ); - ::testing::Test::RecordProperty("inner_version", ver); - if(useStdOut) std::cout << ver << std::endl; + size_t value_start = buildInfo.rfind(' ', pos2) + 1; + std::string ver( buildInfo.substr(value_start, pos2 - value_start) ); + ::testing::Test::RecordProperty("cv_inner_vcs_version", ver); + if(useStdOut) std::cout << "Inner VCS version: " << ver << std::endl; } + + const char* parallel_framework = currentParallelFramework(); + + if (parallel_framework) { + ::testing::Test::RecordProperty("cv_parallel_framework", parallel_framework); + if (useStdOut) std::cout << "Parallel framework: " << parallel_framework << std::endl; + } + + std::string cpu_features; + +#if CV_SSE + if (checkHardwareSupport(CV_CPU_SSE)) cpu_features += " sse"; +#endif +#if CV_SSE2 + if (checkHardwareSupport(CV_CPU_SSE2)) cpu_features += " sse2"; +#endif +#if CV_SSE3 + if (checkHardwareSupport(CV_CPU_SSE3)) cpu_features += " sse3"; +#endif +#if CV_SSSE3 + if (checkHardwareSupport(CV_CPU_SSSE3)) cpu_features += " ssse3"; +#endif +#if CV_SSE4_1 + if (checkHardwareSupport(CV_CPU_SSE4_1)) cpu_features += " sse4.1"; +#endif +#if CV_SSE4_2 + if (checkHardwareSupport(CV_CPU_SSE4_2)) cpu_features += " sse4.2"; +#endif +#if CV_AVX + if (checkHardwareSupport(CV_CPU_AVX)) cpu_features += " avx"; +#endif +#if CV_NEON + cpu_features += " neon"; // NEON is currently not checked at runtime +#endif + + cpu_features.erase(0, 1); // erase initial space + + ::testing::Test::RecordProperty("cv_cpu_features", cpu_features); + if (useStdOut) std::cout << "CPU features: " << cpu_features << std::endl; + +#ifdef HAVE_TEGRA_OPTIMIZATION + const char * tegra_optimization = tegra::isDeviceSupported() ? "enabled" : "disabled"; + ::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization); + if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl; +#endif } } //namespace cvtest diff --git a/modules/ts/src/ts_gtest.cpp b/modules/ts/src/ts_gtest.cpp index 7c388cbd4a..48870913c3 100644 --- a/modules/ts/src/ts_gtest.cpp +++ b/modules/ts/src/ts_gtest.cpp @@ -497,6 +497,7 @@ const char kBreakOnFailureFlag[] = "break_on_failure"; const char kCatchExceptionsFlag[] = "catch_exceptions"; const char kColorFlag[] = "color"; const char kFilterFlag[] = "filter"; +const char kParamFilterFlag[] = "param_filter"; const char kListTestsFlag[] = "list_tests"; const char kOutputFlag[] = "output"; const char kPrintTimeFlag[] = "print_time"; @@ -575,6 +576,7 @@ class GTestFlagSaver { death_test_style_ = GTEST_FLAG(death_test_style); death_test_use_fork_ = GTEST_FLAG(death_test_use_fork); filter_ = GTEST_FLAG(filter); + param_filter_ = GTEST_FLAG(param_filter); internal_run_death_test_ = GTEST_FLAG(internal_run_death_test); list_tests_ = GTEST_FLAG(list_tests); output_ = GTEST_FLAG(output); @@ -596,6 +598,7 @@ class GTestFlagSaver { GTEST_FLAG(death_test_style) = death_test_style_; GTEST_FLAG(death_test_use_fork) = death_test_use_fork_; GTEST_FLAG(filter) = filter_; + GTEST_FLAG(param_filter) = param_filter_; GTEST_FLAG(internal_run_death_test) = internal_run_death_test_; GTEST_FLAG(list_tests) = list_tests_; GTEST_FLAG(output) = output_; @@ -617,6 +620,7 @@ class GTestFlagSaver { std::string death_test_style_; bool death_test_use_fork_; std::string filter_; + std::string param_filter_; std::string internal_run_death_test_; bool list_tests_; std::string output_; @@ -1699,6 +1703,12 @@ GTEST_DEFINE_string_( "exclude). A test is run if it matches one of the positive " "patterns and does not match any of the negative patterns."); +GTEST_DEFINE_string_( + param_filter, + internal::StringFromGTestEnv("param_filter", kUniversalFilter), + "Same syntax and semantics as for param, but these patterns " + "have to match the test's parameters."); + GTEST_DEFINE_bool_(list_tests, false, "List all tests without running them."); @@ -4188,6 +4198,14 @@ void PrettyUnitTestResultPrinter::OnTestIterationStart( "Note: %s filter = %s\n", GTEST_NAME_, filter); } + const char* const param_filter = GTEST_FLAG(param_filter).c_str(); + + // Ditto. + if (!String::CStringEquals(param_filter, kUniversalFilter)) { + ColoredPrintf(COLOR_YELLOW, + "Note: %s parameter filter = %s\n", GTEST_NAME_, param_filter); + } + if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) { const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1); ColoredPrintf(COLOR_YELLOW, @@ -5873,9 +5891,15 @@ int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) { kDisableTestFilter); test_info->is_disabled_ = is_disabled; + const std::string value_param(test_info->value_param() == NULL ? + "" : test_info->value_param()); + const bool matches_filter = internal::UnitTestOptions::FilterMatchesTest(test_case_name, - test_name); + test_name) && + internal::UnitTestOptions::MatchesFilter(value_param, + GTEST_FLAG(param_filter).c_str()); + test_info->matches_filter_ = matches_filter; const bool is_runnable = @@ -6223,6 +6247,12 @@ static const char kColorEncodedHelpMessage[] = " Run only the tests whose name matches one of the positive patterns but\n" " none of the negative patterns. '?' matches any single character; '*'\n" " matches any substring; ':' separates two patterns.\n" +" @G--" GTEST_FLAG_PREFIX_ "param_filter=@YPOSITIVE_PATTERNS" + "[@G-@YNEGATIVE_PATTERNS]@D\n" +" Like @G--" GTEST_FLAG_PREFIX_ + "filter@D, but applies to the test's parameter. If a\n" +" test is not parameterized, its parameter is considered to be the\n" +" empty string.\n" " @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n" " Run all disabled tests too.\n" "\n" @@ -6300,6 +6330,7 @@ void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) { ParseBoolFlag(arg, kDeathTestUseFork, >EST_FLAG(death_test_use_fork)) || ParseStringFlag(arg, kFilterFlag, >EST_FLAG(filter)) || + ParseStringFlag(arg, kParamFilterFlag, >EST_FLAG(param_filter)) || ParseStringFlag(arg, kInternalRunDeathTestFlag, >EST_FLAG(internal_run_death_test)) || ParseBoolFlag(arg, kListTestsFlag, >EST_FLAG(list_tests)) || diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index c375e7c388..c2c1ee6bd2 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -14,30 +14,10 @@ int64 TestBase::timeLimitDefault = 0; unsigned int TestBase::iterationsLimitDefault = (unsigned int)(-1); int64 TestBase::_timeadjustment = 0; -const std::string command_line_keys = - "{ |perf_max_outliers |8 |percent of allowed outliers}" - "{ |perf_min_samples |10 |minimal required numer of samples}" - "{ |perf_force_samples |100 |force set maximum number of samples for all tests}" - "{ |perf_seed |809564 |seed for random numbers generator}" - "{ |perf_threads |-1 |the number of worker threads, if parallel execution is enabled}" - "{ |perf_write_sanity |false |create new records for sanity checks}" - "{ |perf_verify_sanity |false |fail tests having no regression data for sanity checks}" -#ifdef ANDROID - "{ |perf_time_limit |6.0 |default time limit for a single test (in seconds)}" - "{ |perf_affinity_mask |0 |set affinity mask for the main thread}" - "{ |perf_log_power_checkpoints | |additional xml logging for power measurement}" -#else - "{ |perf_time_limit |3.0 |default time limit for a single test (in seconds)}" -#endif - "{ |perf_max_deviation |1.0 |}" - "{h |help |false |print help info}" -#ifdef HAVE_CUDA - "{ |perf_run_cpu |false |run GPU performance tests for analogical CPU functions}" - "{ |perf_cuda_device |0 |run GPU test suite onto specific CUDA capable device}" - "{ |perf_cuda_info_only |false |print an information about system and an available CUDA devices and then exit.}" -#endif -; +// Item [0] will be considered the default implementation. +static std::vector available_impls; +static std::string param_impl; static double param_max_outliers; static double param_max_deviation; static unsigned int param_min_samples; @@ -48,7 +28,6 @@ static int param_threads; static bool param_write_sanity; static bool param_verify_sanity; #ifdef HAVE_CUDA -static bool param_run_cpu; static int param_cuda_device; #endif @@ -577,11 +556,12 @@ Regression& Regression::operator() (const std::string& name, cv::InputArray arra std::string nodename = getCurrentTestNodeName(); -#ifdef HAVE_CUDA - static const std::string prefix = (param_run_cpu)? "CPU_" : "GPU_"; + // This is a hack for compatibility and it should eventually get removed. + // gpu's tests don't even have CPU sanity data anymore. if(suiteName == "gpu") - nodename = prefix + nodename; -#endif + { + nodename = (PERF_RUN_GPU() ? "GPU_" : "CPU_") + nodename; + } cv::FileNode n = rootIn[nodename]; if(n.isNone()) @@ -646,6 +626,43 @@ performance_metrics::performance_metrics() void TestBase::Init(int argc, const char* const argv[]) { + std::vector plain_only; + plain_only.push_back("plain"); + TestBase::Init(plain_only, argc, argv); +} + +void TestBase::Init(const std::vector & availableImpls, + int argc, const char* const argv[]) +{ + available_impls = availableImpls; + + const std::string command_line_keys = + "{ |perf_max_outliers |8 |percent of allowed outliers}" + "{ |perf_min_samples |10 |minimal required numer of samples}" + "{ |perf_force_samples |100 |force set maximum number of samples for all tests}" + "{ |perf_seed |809564 |seed for random numbers generator}" + "{ |perf_threads |-1 |the number of worker threads, if parallel execution is enabled}" + "{ |perf_write_sanity |false |create new records for sanity checks}" + "{ |perf_verify_sanity |false |fail tests having no regression data for sanity checks}" + "{ |perf_impl |" + available_impls[0] + + "|the implementation variant of functions under test}" + "{ |perf_list_impls |false |list available implementation variants and exit}" + "{ |perf_run_cpu |false |deprecated, equivalent to --perf_impl=plain}" +#ifdef ANDROID + "{ |perf_time_limit |6.0 |default time limit for a single test (in seconds)}" + "{ |perf_affinity_mask |0 |set affinity mask for the main thread}" + "{ |perf_log_power_checkpoints | |additional xml logging for power measurement}" +#else + "{ |perf_time_limit |3.0 |default time limit for a single test (in seconds)}" +#endif + "{ |perf_max_deviation |1.0 |}" + "{h |help |false |print help info}" +#ifdef HAVE_CUDA + "{ |perf_cuda_device |0 |run GPU test suite onto specific CUDA capable device}" + "{ |perf_cuda_info_only |false |print an information about system and an available CUDA devices and then exit.}" +#endif + ; + cv::CommandLineParser args(argc, argv, command_line_keys.c_str()); if (args.get("help")) { @@ -656,6 +673,7 @@ void TestBase::Init(int argc, const char* const argv[]) ::testing::AddGlobalTestEnvironment(new PerfEnvironment); + param_impl = args.get("perf_run_cpu") ? "plain" : args.get("perf_impl"); param_max_outliers = std::min(100., std::max(0., args.get("perf_max_outliers"))); param_min_samples = std::max(1u, args.get("perf_min_samples")); param_max_deviation = std::max(0., args.get("perf_max_deviation")); @@ -670,19 +688,41 @@ void TestBase::Init(int argc, const char* const argv[]) log_power_checkpoints = args.get("perf_log_power_checkpoints"); #endif + bool param_list_impls = args.get("perf_list_impls"); + + if (param_list_impls) + { + fputs("Available implementation variants:", stdout); + for (size_t i = 0; i < available_impls.size(); ++i) { + putchar(' '); + fputs(available_impls[i].c_str(), stdout); + } + putchar('\n'); + exit(0); + } + + if (std::find(available_impls.begin(), available_impls.end(), param_impl) == available_impls.end()) + { + printf("No such implementation: %s\n", param_impl.c_str()); + exit(1); + } + #ifdef HAVE_CUDA bool printOnly = args.get("perf_cuda_info_only"); if (printOnly) exit(0); +#endif + + if (available_impls.size() > 1) + printf("[----------]\n[ INFO ] \tImplementation variant: %s.\n[----------]\n", param_impl.c_str()), fflush(stdout); + +#ifdef HAVE_CUDA - param_run_cpu = args.get("perf_run_cpu"); param_cuda_device = std::max(0, std::min(cv::gpu::getCudaEnabledDeviceCount(), args.get("perf_cuda_device"))); - if (param_run_cpu) - printf("[----------]\n[ GPU INFO ] \tRun test suite on CPU.\n[----------]\n"), fflush(stdout); - else + if (param_impl == "cuda") { cv::gpu::DeviceInfo info(param_cuda_device); if (!info.isCompatible()) @@ -708,6 +748,18 @@ void TestBase::Init(int argc, const char* const argv[]) _timeadjustment = _calibrate(); } +void TestBase::RecordRunParameters() +{ + ::testing::Test::RecordProperty("cv_implementation", param_impl); + ::testing::Test::RecordProperty("cv_num_threads", param_threads); +} + +std::string TestBase::getSelectedImpl() +{ + return param_impl; +} + + int64 TestBase::_calibrate() { class _helper : public ::perf::TestBase @@ -1325,12 +1377,10 @@ void perf::sort(std::vector& pts, cv::InputOutputArray descriptors /*****************************************************************************************\ * ::perf::GpuPerf \*****************************************************************************************/ -#ifdef HAVE_CUDA bool perf::GpuPerf::targetDevice() { - return !param_run_cpu; + return param_impl == "cuda"; } -#endif /*****************************************************************************************\ * ::perf::PrintTo diff --git a/modules/video/perf/perf_optflowpyrlk.cpp b/modules/video/perf/perf_optflowpyrlk.cpp index 12005f8ffa..8c53db03ae 100644 --- a/modules/video/perf/perf_optflowpyrlk.cpp +++ b/modules/video/perf/perf_optflowpyrlk.cpp @@ -165,7 +165,8 @@ PERF_TEST_P(Path_Idx_Cn_NPoints_WSize_Deriv, OpticalFlowPyrLK_self, testing::Com declare.in(pyramid1, pyramid2, inPoints).out(outPoints); declare.time(400); - TEST_CYCLE() + int runs = 3; + TEST_CYCLE_MULTIRUN(runs) { calcOpticalFlowPyrLK(pyramid1, pyramid2, inPoints, outPoints, status, err, Size(winSize, winSize), maxLevel, criteria, @@ -217,4 +218,4 @@ PERF_TEST_P(Path_Win_Deriv_Border_Reuse, OpticalFlowPyrLK_pyr, testing::Combine( } SANITY_CHECK(pyramid); -} \ No newline at end of file +} diff --git a/modules/video/src/bgfg_gaussmix2.cpp b/modules/video/src/bgfg_gaussmix2.cpp index e532af2ae6..6bbb960482 100644 --- a/modules/video/src/bgfg_gaussmix2.cpp +++ b/modules/video/src/bgfg_gaussmix2.cpp @@ -248,7 +248,7 @@ detectShadowGMM(const float* data, int nchannels, int nmodes, //IEEE Trans. on Pattern Analysis and Machine Intelligence, vol.26, no.5, pages 651-656, 2004 //http://www.zoranz.net/Publications/zivkovic2004PAMI.pdf -struct MOG2Invoker +struct MOG2Invoker : ParallelLoopBody { MOG2Invoker(const Mat& _src, Mat& _dst, GMM* _gmm, float* _mean, @@ -280,9 +280,9 @@ struct MOG2Invoker cvtfunc = src->depth() != CV_32F ? getConvertFunc(src->depth(), CV_32F) : 0; } - void operator()(const BlockedRange& range) const + void operator()(const Range& range) const { - int y0 = range.begin(), y1 = range.end(); + int y0 = range.start, y1 = range.end; int ncols = src->cols, nchannels = src->channels(); AutoBuffer buf(src->cols*nchannels); float alpha1 = 1.f - alphaT; @@ -562,15 +562,15 @@ void BackgroundSubtractorMOG2::operator()(InputArray _image, OutputArray _fgmask learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./min( 2*nframes, history ); CV_Assert(learningRate >= 0); - parallel_for(BlockedRange(0, image.rows), - MOG2Invoker(image, fgmask, - (GMM*)bgmodel.data, - (float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols), - bgmodelUsedModes.data, nmixtures, (float)learningRate, - (float)varThreshold, - backgroundRatio, varThresholdGen, - fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau, - bShadowDetection, nShadowDetection)); + parallel_for_(Range(0, image.rows), + MOG2Invoker(image, fgmask, + (GMM*)bgmodel.data, + (float*)(bgmodel.data + sizeof(GMM)*nmixtures*image.rows*image.cols), + bgmodelUsedModes.data, nmixtures, (float)learningRate, + (float)varThreshold, + backgroundRatio, varThresholdGen, + fVarInit, fVarMin, fVarMax, float(-learningRate*fCT), fTau, + bShadowDetection, nShadowDetection)); } void BackgroundSubtractorMOG2::getBackgroundImage(OutputArray backgroundImage) const diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp index 9e47eb8029..291cb86a26 100644 --- a/modules/video/src/lkpyramid.cpp +++ b/modules/video/src/lkpyramid.cpp @@ -156,7 +156,7 @@ cv::detail::LKTrackerInvoker::LKTrackerInvoker( minEigThreshold = _minEigThreshold; } -void cv::detail::LKTrackerInvoker::operator()(const BlockedRange& range) const +void cv::detail::LKTrackerInvoker::operator()(const Range& range) const { Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f); const Mat& I = *prevImg; @@ -170,7 +170,7 @@ void cv::detail::LKTrackerInvoker::operator()(const BlockedRange& range) const Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), (deriv_type*)_buf); Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), (deriv_type*)_buf + winSize.area()*cn); - for( int ptidx = range.begin(); ptidx < range.end(); ptidx++ ) + for( int ptidx = range.start; ptidx < range.end; ptidx++ ) { Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level)); Point2f nextPt; @@ -733,11 +733,11 @@ void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg, typedef cv::detail::LKTrackerInvoker LKTrackerInvoker; #endif - parallel_for(BlockedRange(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI, - nextPyr[level * lvlStep2], prevPts, nextPts, - status, err, - winSize, criteria, level, maxLevel, - flags, (float)minEigThreshold)); + parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI, + nextPyr[level * lvlStep2], prevPts, nextPts, + status, err, + winSize, criteria, level, maxLevel, + flags, (float)minEigThreshold)); } } diff --git a/modules/video/src/lkpyramid.hpp b/modules/video/src/lkpyramid.hpp index 390e46bf99..4aff37ef84 100644 --- a/modules/video/src/lkpyramid.hpp +++ b/modules/video/src/lkpyramid.hpp @@ -7,7 +7,7 @@ namespace detail typedef short deriv_type; - struct LKTrackerInvoker + struct LKTrackerInvoker : ParallelLoopBody { LKTrackerInvoker( const Mat& _prevImg, const Mat& _prevDeriv, const Mat& _nextImg, const Point2f* _prevPts, Point2f* _nextPts, @@ -15,7 +15,7 @@ namespace detail Size _winSize, TermCriteria _criteria, int _level, int _maxLevel, int _flags, float _minEigThreshold ); - void operator()(const BlockedRange& range) const; + void operator()(const Range& range) const; const Mat* prevImg; const Mat* nextImg; diff --git a/modules/video/src/video_init.cpp b/modules/video/src/video_init.cpp index 0f3cec144c..7ec860fbd3 100644 --- a/modules/video/src/video_init.cpp +++ b/modules/video/src/video_init.cpp @@ -60,7 +60,15 @@ CV_INIT_ALGORITHM(BackgroundSubtractorMOG2, "BackgroundSubtractor.MOG2", obj.info()->addParam(obj, "history", obj.history); obj.info()->addParam(obj, "nmixtures", obj.nmixtures); obj.info()->addParam(obj, "varThreshold", obj.varThreshold); - obj.info()->addParam(obj, "detectShadows", obj.bShadowDetection)); + obj.info()->addParam(obj, "detectShadows", obj.bShadowDetection); + obj.info()->addParam(obj, "backgroundRatio", obj.backgroundRatio); + obj.info()->addParam(obj, "varThresholdGen", obj.varThresholdGen); + obj.info()->addParam(obj, "fVarInit", obj.fVarInit); + obj.info()->addParam(obj, "fVarMin", obj.fVarMin); + obj.info()->addParam(obj, "fVarMax", obj.fVarMax); + obj.info()->addParam(obj, "fCT", obj.fCT); + obj.info()->addParam(obj, "nShadowDetection", obj.nShadowDetection); + obj.info()->addParam(obj, "fTau", obj.fTau)); /////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/modules/videostab/src/global_motion.cpp b/modules/videostab/src/global_motion.cpp index 484b598dc1..de93d5c5ac 100644 --- a/modules/videostab/src/global_motion.cpp +++ b/modules/videostab/src/global_motion.cpp @@ -205,6 +205,9 @@ Mat estimateGlobalMotionRobust( estimateGlobMotionLeastSquaresAffine }; const int npoints = static_cast(points0.size()); + if (npoints < params.size) + return Mat::eye(3, 3, CV_32F); + const int niters = static_cast(ceil(log(1 - params.prob) / log(1 - pow(1 - params.eps, params.size)))); @@ -300,6 +303,8 @@ PyrLkRobustMotionEstimator::PyrLkRobustMotionEstimator() Mat PyrLkRobustMotionEstimator::estimate(const Mat &frame0, const Mat &frame1) { detector_->detect(frame0, keypointsPrev_); + if (keypointsPrev_.empty()) + return Mat::eye(3, 3, CV_32F); pointsPrev_.resize(keypointsPrev_.size()); for (size_t i = 0; i < keypointsPrev_.size(); ++i) diff --git a/android/README.android b/platforms/android/README.android similarity index 100% rename from android/README.android rename to platforms/android/README.android diff --git a/android/android.toolchain.cmake b/platforms/android/android.toolchain.cmake similarity index 88% rename from android/android.toolchain.cmake rename to platforms/android/android.toolchain.cmake index 0f7e340678..d7f09c7888 100644 --- a/android/android.toolchain.cmake +++ b/platforms/android/android.toolchain.cmake @@ -289,6 +289,9 @@ # - March 2013 # [+] updated for NDK r8e (x86 version) # [+] support x86_64 version of NDK +# - April 2013 +# [+] support non-release NDK layouts (from Linaro git and Android git) +# [~] automatically detect if explicit link to crtbegin_*.o is needed # ------------------------------------------------------------------------------ cmake_minimum_required( VERSION 2.6.3 ) @@ -516,24 +519,19 @@ if( NOT ANDROID_NDK ) endif( ANDROID_NDK ) endif( NOT ANDROID_STANDALONE_TOOLCHAIN ) endif( NOT ANDROID_NDK ) + # remember found paths if( ANDROID_NDK ) get_filename_component( ANDROID_NDK "${ANDROID_NDK}" ABSOLUTE ) - # try to detect change - if( CMAKE_AR ) - string( LENGTH "${ANDROID_NDK}" __length ) - string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath ) - if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK ) - message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first. - " ) - endif() - unset( __androidNdkPreviousPath ) - unset( __length ) - endif() set( ANDROID_NDK "${ANDROID_NDK}" CACHE INTERNAL "Path of the Android NDK" FORCE ) set( BUILD_WITH_ANDROID_NDK True ) - file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? ) - string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" ) + if( EXISTS "${ANDROID_NDK}/RELEASE.TXT" ) + file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? ) + string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" ) + else() + set( ANDROID_NDK_RELEASE "r1x" ) + set( ANDROID_NDK_RELEASE_FULL "unreleased" ) + endif() elseif( ANDROID_STANDALONE_TOOLCHAIN ) get_filename_component( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" ABSOLUTE ) # try to detect change @@ -560,6 +558,51 @@ else() sudo ln -s ~/my-android-toolchain ${ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH}" ) endif() +# android NDK layout +if( BUILD_WITH_ANDROID_NDK ) + if( NOT DEFINED ANDROID_NDK_LAYOUT ) + # try to automatically detect the layout + if( EXISTS "${ANDROID_NDK}/RELEASE.TXT") + set( ANDROID_NDK_LAYOUT "RELEASE" ) + elseif( EXISTS "${ANDROID_NDK}/../../linux-x86/toolchain/" ) + set( ANDROID_NDK_LAYOUT "LINARO" ) + elseif( EXISTS "${ANDROID_NDK}/../../gcc/" ) + set( ANDROID_NDK_LAYOUT "ANDROID" ) + endif() + endif() + set( ANDROID_NDK_LAYOUT "${ANDROID_NDK_LAYOUT}" CACHE STRING "The inner layout of NDK" ) + mark_as_advanced( ANDROID_NDK_LAYOUT ) + if( ANDROID_NDK_LAYOUT STREQUAL "LINARO" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment + set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../${ANDROID_NDK_HOST_SYSTEM_NAME}/toolchain" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH "" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" ) + elseif( ANDROID_NDK_LAYOUT STREQUAL "ANDROID" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment + set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../gcc/${ANDROID_NDK_HOST_SYSTEM_NAME}/arm" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH "" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" ) + else() # ANDROID_NDK_LAYOUT STREQUAL "RELEASE" + set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/toolchains" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME2}" ) + endif() + get_filename_component( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK_TOOLCHAINS_PATH}" ABSOLUTE ) + + # try to detect change of NDK + if( CMAKE_AR ) + string( LENGTH "${ANDROID_NDK_TOOLCHAINS_PATH}" __length ) + string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath ) + if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK_TOOLCHAINS_PATH ) + message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first. + " ) + endif() + unset( __androidNdkPreviousPath ) + unset( __length ) + endif() +endif() + + # get all the details about standalone toolchain if( BUILD_WITH_STANDALONE_TOOLCHAIN ) __DETECT_NATIVE_API_LEVEL( ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" ) @@ -587,17 +630,23 @@ if( BUILD_WITH_STANDALONE_TOOLCHAIN ) endif() endif() -macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __host_system_name ) +macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __toolchain_subpath ) foreach( __toolchain ${${__availableToolchainsLst}} ) - if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK}/toolchains/${__toolchain}/prebuilt/" ) + if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${__toolchain}${__toolchain_subpath}" ) string( REGEX REPLACE "-clang3[.][0-9]$" "-4.6" __gcc_toolchain "${__toolchain}" ) else() set( __gcc_toolchain "${__toolchain}" ) endif() - __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK}/toolchains/${__gcc_toolchain}/prebuilt/${__host_system_name}" ) + __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK_TOOLCHAINS_PATH}/${__gcc_toolchain}${__toolchain_subpath}" ) if( __machine ) - string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9]+)?$" __version "${__gcc_toolchain}" ) - string( REGEX MATCH "^[^-]+" __arch "${__gcc_toolchain}" ) + string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9x]+)?$" __version "${__gcc_toolchain}" ) + if( __machine MATCHES i686 ) + set( __arch "x86" ) + elseif( __machine MATCHES arm ) + set( __arch "arm" ) + elseif( __machine MATCHES mipsel ) + set( __arch "mipsel" ) + endif() list( APPEND __availableToolchainMachines "${__machine}" ) list( APPEND __availableToolchainArchs "${__arch}" ) list( APPEND __availableToolchainCompilerVersions "${__version}" ) @@ -615,29 +664,29 @@ if( BUILD_WITH_ANDROID_NDK ) set( __availableToolchainMachines "" ) set( __availableToolchainArchs "" ) set( __availableToolchainCompilerVersions "" ) - if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK}/toolchains/${ANDROID_TOOLCHAIN_NAME}/" ) + if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_TOOLCHAIN_NAME}/" ) # do not go through all toolchains if we know the name set( __availableToolchainsLst "${ANDROID_TOOLCHAIN_NAME}" ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} ) - if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" ) + if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" ) if( __availableToolchains ) - set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} ) endif() endif() endif() if( NOT __availableToolchains ) - file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK}/toolchains" "${ANDROID_NDK}/toolchains/*" ) + file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK_TOOLCHAINS_PATH}" "${ANDROID_NDK_TOOLCHAINS_PATH}/*" ) if( __availableToolchains ) list(SORT __availableToolchainsLst) # we need clang to go after gcc endif() __LIST_FILTER( __availableToolchainsLst "^[.]" ) __LIST_FILTER( __availableToolchainsLst "llvm" ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} ) - if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" ) + if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" ) if( __availableToolchains ) - set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} ) endif() endif() endif() @@ -768,6 +817,7 @@ else() list( GET __availableToolchainArchs ${__idx} __toolchainArch ) if( __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME ) list( GET __availableToolchainCompilerVersions ${__idx} __toolchainVersion ) + string( REPLACE "x" "99" __toolchainVersion "${__toolchainVersion}") if( __toolchainVersion VERSION_GREATER __toolchainMaxVersion ) set( __toolchainMaxVersion "${__toolchainVersion}" ) set( __toolchainIdx ${__idx} ) @@ -971,11 +1021,11 @@ if( "${ANDROID_TOOLCHAIN_NAME}" STREQUAL "standalone-clang" ) elseif( "${ANDROID_TOOLCHAIN_NAME}" MATCHES "-clang3[.][0-9]?$" ) string( REGEX MATCH "3[.][0-9]$" ANDROID_CLANG_VERSION "${ANDROID_TOOLCHAIN_NAME}") string( REGEX REPLACE "-clang${ANDROID_CLANG_VERSION}$" "-4.6" ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" ) - if( NOT EXISTS "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}/bin/clang${TOOL_OS_SUFFIX}" ) + if( NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}/bin/clang${TOOL_OS_SUFFIX}" ) message( FATAL_ERROR "Could not find the Clang compiler driver" ) endif() set( ANDROID_COMPILER_IS_CLANG 1 ) - set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" ) + set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" ) else() set( ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" ) unset( ANDROID_COMPILER_IS_CLANG CACHE ) @@ -989,7 +1039,7 @@ endif() # setup paths and STL for NDK if( BUILD_WITH_ANDROID_NDK ) - set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" ) + set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" ) set( ANDROID_SYSROOT "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}" ) if( ANDROID_STL STREQUAL "none" ) @@ -1048,11 +1098,11 @@ if( BUILD_WITH_ANDROID_NDK ) endif() # find libsupc++.a - rtti & exceptions if( ANDROID_STL STREQUAL "system_re" OR ANDROID_STL MATCHES "gnustl" ) - if( ANDROID_NDK_RELEASE STRGREATER "r8" ) # r8b - set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) - elseif( NOT ANDROID_NDK_RELEASE STRLESS "r7" AND ANDROID_NDK_RELEASE STRLESS "r8b") - set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) - else( ANDROID_NDK_RELEASE STRLESS "r7" ) + set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r8b or newer + if( NOT EXISTS "${__libsupcxx}" ) + set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r7-r8 + endif() + if( NOT EXISTS "${__libsupcxx}" ) # before r7 if( ARMEABI_V7A ) if( ANDROID_FORCE_ARM_BUILD ) set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" ) @@ -1102,7 +1152,7 @@ unset( _ndk_ccache ) # setup the cross-compiler if( NOT CMAKE_C_COMPILER ) - if( NDK_CCACHE ) + if( NDK_CCACHE AND NOT ANDROID_SYSROOT MATCHES "[ ;\"]" ) set( CMAKE_C_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C compiler" ) set( CMAKE_CXX_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C++ compiler" ) if( ANDROID_COMPILER_IS_CLANG ) @@ -1174,11 +1224,25 @@ set( CMAKE_ASM_SOURCE_FILE_EXTENSIONS s S asm ) remove_definitions( -DANDROID ) add_definitions( -DANDROID ) -if(ANDROID_SYSROOT MATCHES "[ ;\"]") - set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" ) +if( ANDROID_SYSROOT MATCHES "[ ;\"]" ) + if( CMAKE_HOST_WIN32 ) + # try to convert path to 8.3 form + file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "@echo %~s1" ) + execute_process( COMMAND "$ENV{ComSpec}" /c "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "${ANDROID_SYSROOT}" + OUTPUT_VARIABLE __path OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE __result ERROR_QUIET ) + if( __result EQUAL 0 ) + file( TO_CMAKE_PATH "${__path}" ANDROID_SYSROOT ) + set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" ) + else() + set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" ) + endif() + else() + set( ANDROID_CXX_FLAGS "'--sysroot=${ANDROID_SYSROOT}'" ) + endif() if( NOT _CMAKE_IN_TRY_COMPILE ) - # quotes will break try_compile and compiler identification - message(WARNING "Your Android system root has non-alphanumeric symbols. It can break compiler features detection and the whole build.") + # quotes can break try_compile and compiler identification + message(WARNING "Path to your Android NDK (or toolchain) has non-alphanumeric symbols.\nThe build might be broken.\n") endif() else() set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" ) @@ -1249,22 +1313,18 @@ elseif( ARMEABI ) set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv5te -mtune=xscale -msoft-float" ) endif() +if( ANDROID_STL MATCHES "gnustl" AND (EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}") ) + set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) + set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) + set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) +else() + set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) + set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) + set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) +endif() + # STL if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" ) - if( ANDROID_STL MATCHES "gnustl" ) - set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) - set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) - set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) - else() - set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) - set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) - set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) - endif() - if ( X86 AND ANDROID_STL MATCHES "gnustl" AND ANDROID_NDK_RELEASE STREQUAL "r6" ) - # workaround "undefined reference to `__dso_handle'" problem - set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) - set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) - endif() if( EXISTS "${__libstl}" ) set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libstl}\"" ) set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libstl}\"" ) @@ -1283,9 +1343,12 @@ if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" ) set( CMAKE_C_LINK_EXECUTABLE "${CMAKE_C_LINK_EXECUTABLE} \"${__libsupcxx}\"" ) endif() if( ANDROID_STL MATCHES "gnustl" ) - set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} -lm" ) - set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} -lm" ) - set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -lm" ) + if( NOT EXISTS "${ANDROID_LIBM_PATH}" ) + set( ANDROID_LIBM_PATH -lm ) + endif() + set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} ${ANDROID_LIBM_PATH}" ) + set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} ${ANDROID_LIBM_PATH}" ) + set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} ${ANDROID_LIBM_PATH}" ) endif() endif() @@ -1321,7 +1384,14 @@ if( ARMEABI_V7A ) endif() if( ANDROID_NO_UNDEFINED ) - set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" ) + if( MIPS ) + # there is some sysroot-related problem in mips linker... + if( NOT ANDROID_SYSROOT MATCHES "[ ;\"]" ) + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined -Wl,-rpath-link,${ANDROID_SYSROOT}/usr/lib" ) + endif() + else() + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" ) + endif() endif() if( ANDROID_SO_UNDEFINED ) @@ -1401,9 +1471,9 @@ set( CMAKE_MODULE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FL set( CMAKE_EXE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" ) if( MIPS AND BUILD_WITH_ANDROID_NDK AND ANDROID_NDK_RELEASE STREQUAL "r8" ) - set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" ) - set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" ) - set( CMAKE_EXE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" ) + set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" ) + set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" ) + set( CMAKE_EXE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" ) endif() # configure rtti @@ -1430,6 +1500,43 @@ endif() include_directories( SYSTEM "${ANDROID_SYSROOT}/usr/include" ${ANDROID_STL_INCLUDE_DIRS} ) link_directories( "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" ) +# detect if need link crtbegin_so.o explicitly +if( NOT DEFINED ANDROID_EXPLICIT_CRT_LINK ) + set( __cmd "${CMAKE_CXX_CREATE_SHARED_LIBRARY}" ) + string( REPLACE "" "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1}" __cmd "${__cmd}" ) + string( REPLACE "" "${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}" __cmd "${__cmd}" ) + string( REPLACE "" "${CMAKE_CXX_FLAGS}" __cmd "${__cmd}" ) + string( REPLACE "" "" __cmd "${__cmd}" ) + string( REPLACE "" "${CMAKE_SHARED_LINKER_FLAGS}" __cmd "${__cmd}" ) + string( REPLACE "" "-shared" __cmd "${__cmd}" ) + string( REPLACE "" "" __cmd "${__cmd}" ) + string( REPLACE "" "" __cmd "${__cmd}" ) + string( REPLACE "" "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/toolchain_crtlink_test.so" __cmd "${__cmd}" ) + string( REPLACE "" "\"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" __cmd "${__cmd}" ) + string( REPLACE "" "" __cmd "${__cmd}" ) + separate_arguments( __cmd ) + foreach( __var ANDROID_NDK ANDROID_NDK_TOOLCHAINS_PATH ANDROID_STANDALONE_TOOLCHAIN ) + if( ${__var} ) + set( __tmp "${${__var}}" ) + separate_arguments( __tmp ) + string( REPLACE "${__tmp}" "${${__var}}" __cmd "${__cmd}") + endif() + endforeach() + string( REPLACE "'" "" __cmd "${__cmd}" ) + string( REPLACE "\"" "" __cmd "${__cmd}" ) + execute_process( COMMAND ${__cmd} RESULT_VARIABLE __cmd_result OUTPUT_QUIET ERROR_QUIET ) + if( __cmd_result EQUAL 0 ) + set( ANDROID_EXPLICIT_CRT_LINK ON ) + else() + set( ANDROID_EXPLICIT_CRT_LINK OFF ) + endif() +endif() + +if( ANDROID_EXPLICIT_CRT_LINK ) + set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) + set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) +endif() + # setup output directories set( LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_SOURCE_DIR} CACHE PATH "root for library output, set this to change where android libs are installed to" ) set( CMAKE_INSTALL_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/user" CACHE STRING "path for installing" ) @@ -1521,6 +1628,7 @@ if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" ) foreach( __var NDK_CCACHE LIBRARY_OUTPUT_PATH_ROOT ANDROID_FORBID_SYGWIN ANDROID_SET_OBSOLETE_VARIABLES ANDROID_NDK_HOST_X64 ANDROID_NDK + ANDROID_NDK_LAYOUT ANDROID_STANDALONE_TOOLCHAIN ANDROID_TOOLCHAIN_NAME ANDROID_ABI @@ -1534,6 +1642,8 @@ if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" ) ANDROID_GOLD_LINKER ANDROID_NOEXECSTACK ANDROID_RELRO + ANDROID_LIBM_PATH + ANDROID_EXPLICIT_CRT_LINK ) if( DEFINED ${__var} ) if( "${__var}" MATCHES " ") @@ -1577,6 +1687,7 @@ endif() # ANDROID_STANDALONE_TOOLCHAIN # ANDROID_TOOLCHAIN_NAME : the NDK name of compiler toolchain # ANDROID_NDK_HOST_X64 : try to use x86_64 toolchain (default for x64 host systems) +# ANDROID_NDK_LAYOUT : the inner NDK structure (RELEASE, LINARO, ANDROID) # LIBRARY_OUTPUT_PATH_ROOT : # NDK_CCACHE : # Obsolete: @@ -1622,6 +1733,7 @@ endif() # ANDROID_EXCEPTIONS : if exceptions are enabled by the runtime # ANDROID_GCC_TOOLCHAIN_NAME : read-only, differs from ANDROID_TOOLCHAIN_NAME only if clang is used # ANDROID_CLANG_VERSION : version of clang compiler if clang is used +# ANDROID_LIBM_PATH : path to libm.so (set to something like $(TOP)/out/target/product//obj/lib/libm.so) to workaround unresolved `sincos` # # Defaults: # ANDROID_DEFAULT_NDK_API_LEVEL diff --git a/android/java.rst b/platforms/android/java.rst similarity index 100% rename from android/java.rst rename to platforms/android/java.rst diff --git a/android/libinfo/CMakeLists.txt b/platforms/android/libinfo/CMakeLists.txt similarity index 100% rename from android/libinfo/CMakeLists.txt rename to platforms/android/libinfo/CMakeLists.txt diff --git a/android/libinfo/info.c b/platforms/android/libinfo/info.c similarity index 100% rename from android/libinfo/info.c rename to platforms/android/libinfo/info.c diff --git a/android/package/AndroidManifest.xml b/platforms/android/package/AndroidManifest.xml similarity index 100% rename from android/package/AndroidManifest.xml rename to platforms/android/package/AndroidManifest.xml diff --git a/android/package/CMakeLists.txt b/platforms/android/package/CMakeLists.txt similarity index 100% rename from android/package/CMakeLists.txt rename to platforms/android/package/CMakeLists.txt diff --git a/android/package/res/drawable/icon.png b/platforms/android/package/res/drawable/icon.png similarity index 100% rename from android/package/res/drawable/icon.png rename to platforms/android/package/res/drawable/icon.png diff --git a/android/package/res/values/strings.xml b/platforms/android/package/res/values/strings.xml similarity index 100% rename from android/package/res/values/strings.xml rename to platforms/android/package/res/values/strings.xml diff --git a/android/refman.rst b/platforms/android/refman.rst similarity index 100% rename from android/refman.rst rename to platforms/android/refman.rst diff --git a/android/service/CMakeLists.txt b/platforms/android/service/CMakeLists.txt similarity index 100% rename from android/service/CMakeLists.txt rename to platforms/android/service/CMakeLists.txt diff --git a/android/service/all.py b/platforms/android/service/all.py similarity index 100% rename from android/service/all.py rename to platforms/android/service/all.py diff --git a/android/service/device.conf b/platforms/android/service/device.conf similarity index 100% rename from android/service/device.conf rename to platforms/android/service/device.conf diff --git a/android/service/doc/AndroidAppUsageModel.dia b/platforms/android/service/doc/AndroidAppUsageModel.dia similarity index 100% rename from android/service/doc/AndroidAppUsageModel.dia rename to platforms/android/service/doc/AndroidAppUsageModel.dia diff --git a/android/service/doc/BaseLoaderCallback.rst b/platforms/android/service/doc/BaseLoaderCallback.rst similarity index 100% rename from android/service/doc/BaseLoaderCallback.rst rename to platforms/android/service/doc/BaseLoaderCallback.rst diff --git a/android/service/doc/InstallCallbackInterface.rst b/platforms/android/service/doc/InstallCallbackInterface.rst similarity index 100% rename from android/service/doc/InstallCallbackInterface.rst rename to platforms/android/service/doc/InstallCallbackInterface.rst diff --git a/android/service/doc/Intro.rst b/platforms/android/service/doc/Intro.rst similarity index 100% rename from android/service/doc/Intro.rst rename to platforms/android/service/doc/Intro.rst diff --git a/android/service/doc/JavaHelper.rst b/platforms/android/service/doc/JavaHelper.rst similarity index 100% rename from android/service/doc/JavaHelper.rst rename to platforms/android/service/doc/JavaHelper.rst diff --git a/android/service/doc/LibInstallAproved.dia b/platforms/android/service/doc/LibInstallAproved.dia similarity index 100% rename from android/service/doc/LibInstallAproved.dia rename to platforms/android/service/doc/LibInstallAproved.dia diff --git a/android/service/doc/LibInstallCanceled.dia b/platforms/android/service/doc/LibInstallCanceled.dia similarity index 100% rename from android/service/doc/LibInstallCanceled.dia rename to platforms/android/service/doc/LibInstallCanceled.dia diff --git a/android/service/doc/LibInstalled.dia b/platforms/android/service/doc/LibInstalled.dia similarity index 100% rename from android/service/doc/LibInstalled.dia rename to platforms/android/service/doc/LibInstalled.dia diff --git a/android/service/doc/LoaderCallbackInterface.rst b/platforms/android/service/doc/LoaderCallbackInterface.rst similarity index 100% rename from android/service/doc/LoaderCallbackInterface.rst rename to platforms/android/service/doc/LoaderCallbackInterface.rst diff --git a/android/service/doc/NoService.dia b/platforms/android/service/doc/NoService.dia similarity index 100% rename from android/service/doc/NoService.dia rename to platforms/android/service/doc/NoService.dia diff --git a/android/service/doc/Structure.dia b/platforms/android/service/doc/Structure.dia similarity index 100% rename from android/service/doc/Structure.dia rename to platforms/android/service/doc/Structure.dia diff --git a/android/service/doc/UseCases.rst b/platforms/android/service/doc/UseCases.rst similarity index 100% rename from android/service/doc/UseCases.rst rename to platforms/android/service/doc/UseCases.rst diff --git a/android/service/doc/build_uml.py b/platforms/android/service/doc/build_uml.py similarity index 100% rename from android/service/doc/build_uml.py rename to platforms/android/service/doc/build_uml.py diff --git a/android/service/doc/img/AndroidAppUsageModel.png b/platforms/android/service/doc/img/AndroidAppUsageModel.png similarity index 100% rename from android/service/doc/img/AndroidAppUsageModel.png rename to platforms/android/service/doc/img/AndroidAppUsageModel.png diff --git a/android/service/doc/img/LibInstallAproved.png b/platforms/android/service/doc/img/LibInstallAproved.png similarity index 100% rename from android/service/doc/img/LibInstallAproved.png rename to platforms/android/service/doc/img/LibInstallAproved.png diff --git a/android/service/doc/img/LibInstallCanceled.png b/platforms/android/service/doc/img/LibInstallCanceled.png similarity index 100% rename from android/service/doc/img/LibInstallCanceled.png rename to platforms/android/service/doc/img/LibInstallCanceled.png diff --git a/android/service/doc/img/LibInstalled.png b/platforms/android/service/doc/img/LibInstalled.png similarity index 100% rename from android/service/doc/img/LibInstalled.png rename to platforms/android/service/doc/img/LibInstalled.png diff --git a/android/service/doc/img/NoService.png b/platforms/android/service/doc/img/NoService.png similarity index 100% rename from android/service/doc/img/NoService.png rename to platforms/android/service/doc/img/NoService.png diff --git a/android/service/doc/img/Structure.png b/platforms/android/service/doc/img/Structure.png similarity index 100% rename from android/service/doc/img/Structure.png rename to platforms/android/service/doc/img/Structure.png diff --git a/android/service/doc/index.rst b/platforms/android/service/doc/index.rst similarity index 100% rename from android/service/doc/index.rst rename to platforms/android/service/doc/index.rst diff --git a/android/service/engine/.classpath b/platforms/android/service/engine/.classpath similarity index 100% rename from android/service/engine/.classpath rename to platforms/android/service/engine/.classpath diff --git a/android/service/engine/.project b/platforms/android/service/engine/.project similarity index 100% rename from android/service/engine/.project rename to platforms/android/service/engine/.project diff --git a/android/service/engine/AndroidManifest.xml b/platforms/android/service/engine/AndroidManifest.xml similarity index 100% rename from android/service/engine/AndroidManifest.xml rename to platforms/android/service/engine/AndroidManifest.xml diff --git a/android/service/engine/CMakeLists.txt b/platforms/android/service/engine/CMakeLists.txt similarity index 97% rename from android/service/engine/CMakeLists.txt rename to platforms/android/service/engine/CMakeLists.txt index 8b88393942..852a028cab 100644 --- a/android/service/engine/CMakeLists.txt +++ b/platforms/android/service/engine/CMakeLists.txt @@ -24,7 +24,7 @@ else() message(WARNING "Can not automatically determine the value for ANDROID_PLATFORM_VERSION_CODE") endif() -configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/android/service/engine/.build/${ANDROID_MANIFEST_FILE}" @ONLY) +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/platforms/android/service/engine/.build/${ANDROID_MANIFEST_FILE}" @ONLY) link_directories("${ANDROID_SOURCE_TREE}/out/target/product/generic/system/lib" "${ANDROID_SOURCE_TREE}/out/target/product/${ANDROID_PRODUCT}/system/lib" "${ANDROID_SOURCE_TREE}/bin/${ANDROID_ARCH_NAME}") @@ -72,4 +72,3 @@ file(GLOB engine_test_files "jni/Tests/*.cpp") add_executable(opencv_test_engine ${engine_test_files} jni/Tests/gtest/gtest-all.cpp) target_link_libraries(opencv_test_engine z binder log utils android_runtime ${engine} ${engine}_jni) - diff --git a/android/service/engine/build.xml b/platforms/android/service/engine/build.xml similarity index 100% rename from android/service/engine/build.xml rename to platforms/android/service/engine/build.xml diff --git a/android/service/engine/jni/Android.mk b/platforms/android/service/engine/jni/Android.mk similarity index 100% rename from android/service/engine/jni/Android.mk rename to platforms/android/service/engine/jni/Android.mk diff --git a/android/service/engine/jni/Application.mk b/platforms/android/service/engine/jni/Application.mk similarity index 100% rename from android/service/engine/jni/Application.mk rename to platforms/android/service/engine/jni/Application.mk diff --git a/android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp b/platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp rename to platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.cpp diff --git a/android/service/engine/jni/BinderComponent/BnOpenCVEngine.h b/platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.h similarity index 100% rename from android/service/engine/jni/BinderComponent/BnOpenCVEngine.h rename to platforms/android/service/engine/jni/BinderComponent/BnOpenCVEngine.h diff --git a/android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp b/platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp rename to platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.cpp diff --git a/android/service/engine/jni/BinderComponent/BpOpenCVEngine.h b/platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.h similarity index 100% rename from android/service/engine/jni/BinderComponent/BpOpenCVEngine.h rename to platforms/android/service/engine/jni/BinderComponent/BpOpenCVEngine.h diff --git a/android/service/engine/jni/BinderComponent/HardwareDetector.cpp b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/HardwareDetector.cpp rename to platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp diff --git a/android/service/engine/jni/BinderComponent/HardwareDetector.h b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h similarity index 100% rename from android/service/engine/jni/BinderComponent/HardwareDetector.h rename to platforms/android/service/engine/jni/BinderComponent/HardwareDetector.h diff --git a/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp b/platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/OpenCVEngine.cpp rename to platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp diff --git a/android/service/engine/jni/BinderComponent/OpenCVEngine.h b/platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.h similarity index 100% rename from android/service/engine/jni/BinderComponent/OpenCVEngine.h rename to platforms/android/service/engine/jni/BinderComponent/OpenCVEngine.h diff --git a/android/service/engine/jni/BinderComponent/ProcReader.cpp b/platforms/android/service/engine/jni/BinderComponent/ProcReader.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/ProcReader.cpp rename to platforms/android/service/engine/jni/BinderComponent/ProcReader.cpp diff --git a/android/service/engine/jni/BinderComponent/ProcReader.h b/platforms/android/service/engine/jni/BinderComponent/ProcReader.h similarity index 100% rename from android/service/engine/jni/BinderComponent/ProcReader.h rename to platforms/android/service/engine/jni/BinderComponent/ProcReader.h diff --git a/android/service/engine/jni/BinderComponent/StringUtils.cpp b/platforms/android/service/engine/jni/BinderComponent/StringUtils.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/StringUtils.cpp rename to platforms/android/service/engine/jni/BinderComponent/StringUtils.cpp diff --git a/android/service/engine/jni/BinderComponent/StringUtils.h b/platforms/android/service/engine/jni/BinderComponent/StringUtils.h similarity index 100% rename from android/service/engine/jni/BinderComponent/StringUtils.h rename to platforms/android/service/engine/jni/BinderComponent/StringUtils.h diff --git a/android/service/engine/jni/BinderComponent/TegraDetector.cpp b/platforms/android/service/engine/jni/BinderComponent/TegraDetector.cpp similarity index 100% rename from android/service/engine/jni/BinderComponent/TegraDetector.cpp rename to platforms/android/service/engine/jni/BinderComponent/TegraDetector.cpp diff --git a/android/service/engine/jni/BinderComponent/TegraDetector.h b/platforms/android/service/engine/jni/BinderComponent/TegraDetector.h similarity index 100% rename from android/service/engine/jni/BinderComponent/TegraDetector.h rename to platforms/android/service/engine/jni/BinderComponent/TegraDetector.h diff --git a/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp b/platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp similarity index 100% rename from android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp rename to platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.cpp diff --git a/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.h b/platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.h similarity index 100% rename from android/service/engine/jni/JNIWrapper/HardwareDetector_jni.h rename to platforms/android/service/engine/jni/JNIWrapper/HardwareDetector_jni.h diff --git a/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.cpp b/platforms/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.cpp similarity index 100% rename from android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.cpp rename to platforms/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.cpp diff --git a/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.h b/platforms/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.h similarity index 100% rename from android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.h rename to platforms/android/service/engine/jni/JNIWrapper/JavaBasedPackageManager.h diff --git a/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.cpp b/platforms/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.cpp similarity index 100% rename from android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.cpp rename to platforms/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.cpp diff --git a/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.h b/platforms/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.h similarity index 100% rename from android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.h rename to platforms/android/service/engine/jni/JNIWrapper/OpenCVEngine_jni.h diff --git a/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.cpp b/platforms/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.cpp similarity index 100% rename from android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.cpp rename to platforms/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.cpp diff --git a/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.h b/platforms/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.h similarity index 100% rename from android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.h rename to platforms/android/service/engine/jni/JNIWrapper/OpenCVLibraryInfo.h diff --git a/android/service/engine/jni/NativeClient/ClientMain.cpp b/platforms/android/service/engine/jni/NativeClient/ClientMain.cpp similarity index 100% rename from android/service/engine/jni/NativeClient/ClientMain.cpp rename to platforms/android/service/engine/jni/NativeClient/ClientMain.cpp diff --git a/android/service/engine/jni/NativeService/CommonPackageManager.cpp b/platforms/android/service/engine/jni/NativeService/CommonPackageManager.cpp similarity index 100% rename from android/service/engine/jni/NativeService/CommonPackageManager.cpp rename to platforms/android/service/engine/jni/NativeService/CommonPackageManager.cpp diff --git a/android/service/engine/jni/NativeService/CommonPackageManager.h b/platforms/android/service/engine/jni/NativeService/CommonPackageManager.h similarity index 100% rename from android/service/engine/jni/NativeService/CommonPackageManager.h rename to platforms/android/service/engine/jni/NativeService/CommonPackageManager.h diff --git a/android/service/engine/jni/NativeService/NativePackageManager.cpp b/platforms/android/service/engine/jni/NativeService/NativePackageManager.cpp similarity index 100% rename from android/service/engine/jni/NativeService/NativePackageManager.cpp rename to platforms/android/service/engine/jni/NativeService/NativePackageManager.cpp diff --git a/android/service/engine/jni/NativeService/NativePackageManager.h b/platforms/android/service/engine/jni/NativeService/NativePackageManager.h similarity index 100% rename from android/service/engine/jni/NativeService/NativePackageManager.h rename to platforms/android/service/engine/jni/NativeService/NativePackageManager.h diff --git a/android/service/engine/jni/NativeService/PackageInfo.cpp b/platforms/android/service/engine/jni/NativeService/PackageInfo.cpp similarity index 100% rename from android/service/engine/jni/NativeService/PackageInfo.cpp rename to platforms/android/service/engine/jni/NativeService/PackageInfo.cpp diff --git a/android/service/engine/jni/NativeService/PackageInfo.h b/platforms/android/service/engine/jni/NativeService/PackageInfo.h similarity index 100% rename from android/service/engine/jni/NativeService/PackageInfo.h rename to platforms/android/service/engine/jni/NativeService/PackageInfo.h diff --git a/android/service/engine/jni/NativeService/ServiceMain.cpp b/platforms/android/service/engine/jni/NativeService/ServiceMain.cpp similarity index 100% rename from android/service/engine/jni/NativeService/ServiceMain.cpp rename to platforms/android/service/engine/jni/NativeService/ServiceMain.cpp diff --git a/android/service/engine/jni/Tests/HardwareDetectionTest.cpp b/platforms/android/service/engine/jni/Tests/HardwareDetectionTest.cpp similarity index 100% rename from android/service/engine/jni/Tests/HardwareDetectionTest.cpp rename to platforms/android/service/engine/jni/Tests/HardwareDetectionTest.cpp diff --git a/android/service/engine/jni/Tests/OpenCVEngineTest.cpp b/platforms/android/service/engine/jni/Tests/OpenCVEngineTest.cpp similarity index 100% rename from android/service/engine/jni/Tests/OpenCVEngineTest.cpp rename to platforms/android/service/engine/jni/Tests/OpenCVEngineTest.cpp diff --git a/android/service/engine/jni/Tests/PackageInfoTest.cpp b/platforms/android/service/engine/jni/Tests/PackageInfoTest.cpp similarity index 99% rename from android/service/engine/jni/Tests/PackageInfoTest.cpp rename to platforms/android/service/engine/jni/Tests/PackageInfoTest.cpp index 6cbb069431..36fdae764f 100644 --- a/android/service/engine/jni/Tests/PackageInfoTest.cpp +++ b/platforms/android/service/engine/jni/Tests/PackageInfoTest.cpp @@ -222,4 +222,3 @@ TEST(PackageInfo, Comparator3) EXPECT_EQ(info1, info2); } #endif - diff --git a/android/service/engine/jni/Tests/PackageManagerStub.cpp b/platforms/android/service/engine/jni/Tests/PackageManagerStub.cpp similarity index 100% rename from android/service/engine/jni/Tests/PackageManagerStub.cpp rename to platforms/android/service/engine/jni/Tests/PackageManagerStub.cpp diff --git a/android/service/engine/jni/Tests/PackageManagerStub.h b/platforms/android/service/engine/jni/Tests/PackageManagerStub.h similarity index 100% rename from android/service/engine/jni/Tests/PackageManagerStub.h rename to platforms/android/service/engine/jni/Tests/PackageManagerStub.h diff --git a/android/service/engine/jni/Tests/PackageManagmentTest.cpp b/platforms/android/service/engine/jni/Tests/PackageManagmentTest.cpp similarity index 99% rename from android/service/engine/jni/Tests/PackageManagmentTest.cpp rename to platforms/android/service/engine/jni/Tests/PackageManagmentTest.cpp index e21dcf7604..61d6e01c24 100644 --- a/android/service/engine/jni/Tests/PackageManagmentTest.cpp +++ b/platforms/android/service/engine/jni/Tests/PackageManagmentTest.cpp @@ -137,5 +137,3 @@ TEST(PackageManager, GetPackagePathForMips) // string path = pm.GetPackagePathByVersion("240", PLATFORM_TEGRA2, 0); // EXPECT_STREQ("/data/data/org.opencv.lib_v24_tegra2/lib", path.c_str()); // } - - diff --git a/android/service/engine/jni/Tests/TestMain.cpp b/platforms/android/service/engine/jni/Tests/TestMain.cpp similarity index 100% rename from android/service/engine/jni/Tests/TestMain.cpp rename to platforms/android/service/engine/jni/Tests/TestMain.cpp diff --git a/android/service/engine/jni/Tests/Tests.mk b/platforms/android/service/engine/jni/Tests/Tests.mk similarity index 100% rename from android/service/engine/jni/Tests/Tests.mk rename to platforms/android/service/engine/jni/Tests/Tests.mk diff --git a/android/service/engine/jni/Tests/gtest/gtest-all.cpp b/platforms/android/service/engine/jni/Tests/gtest/gtest-all.cpp similarity index 100% rename from android/service/engine/jni/Tests/gtest/gtest-all.cpp rename to platforms/android/service/engine/jni/Tests/gtest/gtest-all.cpp diff --git a/android/service/engine/jni/Tests/gtest/gtest.h b/platforms/android/service/engine/jni/Tests/gtest/gtest.h similarity index 100% rename from android/service/engine/jni/Tests/gtest/gtest.h rename to platforms/android/service/engine/jni/Tests/gtest/gtest.h diff --git a/android/service/engine/jni/include/EngineCommon.h b/platforms/android/service/engine/jni/include/EngineCommon.h similarity index 100% rename from android/service/engine/jni/include/EngineCommon.h rename to platforms/android/service/engine/jni/include/EngineCommon.h diff --git a/android/service/engine/jni/include/IOpenCVEngine.h b/platforms/android/service/engine/jni/include/IOpenCVEngine.h similarity index 100% rename from android/service/engine/jni/include/IOpenCVEngine.h rename to platforms/android/service/engine/jni/include/IOpenCVEngine.h diff --git a/android/service/engine/jni/include/IPackageManager.h b/platforms/android/service/engine/jni/include/IPackageManager.h similarity index 100% rename from android/service/engine/jni/include/IPackageManager.h rename to platforms/android/service/engine/jni/include/IPackageManager.h diff --git a/android/service/engine/jni/include/OpenCVEngineHelper.h b/platforms/android/service/engine/jni/include/OpenCVEngineHelper.h similarity index 100% rename from android/service/engine/jni/include/OpenCVEngineHelper.h rename to platforms/android/service/engine/jni/include/OpenCVEngineHelper.h diff --git a/android/service/engine/project.properties b/platforms/android/service/engine/project.properties similarity index 100% rename from android/service/engine/project.properties rename to platforms/android/service/engine/project.properties diff --git a/android/service/engine/res/drawable/icon.png b/platforms/android/service/engine/res/drawable/icon.png similarity index 100% rename from android/service/engine/res/drawable/icon.png rename to platforms/android/service/engine/res/drawable/icon.png diff --git a/android/service/engine/res/layout-small/info.xml b/platforms/android/service/engine/res/layout-small/info.xml similarity index 100% rename from android/service/engine/res/layout-small/info.xml rename to platforms/android/service/engine/res/layout-small/info.xml diff --git a/android/service/engine/res/layout-small/main.xml b/platforms/android/service/engine/res/layout-small/main.xml similarity index 100% rename from android/service/engine/res/layout-small/main.xml rename to platforms/android/service/engine/res/layout-small/main.xml diff --git a/android/service/engine/res/layout/info.xml b/platforms/android/service/engine/res/layout/info.xml similarity index 100% rename from android/service/engine/res/layout/info.xml rename to platforms/android/service/engine/res/layout/info.xml diff --git a/android/service/engine/res/layout/main.xml b/platforms/android/service/engine/res/layout/main.xml similarity index 100% rename from android/service/engine/res/layout/main.xml rename to platforms/android/service/engine/res/layout/main.xml diff --git a/android/service/engine/res/values/strings.xml b/platforms/android/service/engine/res/values/strings.xml similarity index 100% rename from android/service/engine/res/values/strings.xml rename to platforms/android/service/engine/res/values/strings.xml diff --git a/android/service/engine/src/org/opencv/engine/BinderConnector.java b/platforms/android/service/engine/src/org/opencv/engine/BinderConnector.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/BinderConnector.java rename to platforms/android/service/engine/src/org/opencv/engine/BinderConnector.java diff --git a/android/service/engine/src/org/opencv/engine/HardwareDetector.java b/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/HardwareDetector.java rename to platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java diff --git a/android/service/engine/src/org/opencv/engine/MarketConnector.java b/platforms/android/service/engine/src/org/opencv/engine/MarketConnector.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/MarketConnector.java rename to platforms/android/service/engine/src/org/opencv/engine/MarketConnector.java diff --git a/android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl b/platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl similarity index 100% rename from android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl rename to platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineInterface.aidl diff --git a/android/service/engine/src/org/opencv/engine/OpenCVEngineService.java b/platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineService.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/OpenCVEngineService.java rename to platforms/android/service/engine/src/org/opencv/engine/OpenCVEngineService.java diff --git a/android/service/engine/src/org/opencv/engine/OpenCVLibraryInfo.java b/platforms/android/service/engine/src/org/opencv/engine/OpenCVLibraryInfo.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/OpenCVLibraryInfo.java rename to platforms/android/service/engine/src/org/opencv/engine/OpenCVLibraryInfo.java diff --git a/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java b/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java rename to platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java diff --git a/android/service/engine/src/org/opencv/engine/manager/PackageListAdapter.java b/platforms/android/service/engine/src/org/opencv/engine/manager/PackageListAdapter.java similarity index 100% rename from android/service/engine/src/org/opencv/engine/manager/PackageListAdapter.java rename to platforms/android/service/engine/src/org/opencv/engine/manager/PackageListAdapter.java diff --git a/android/service/engine_test/.classpath b/platforms/android/service/engine_test/.classpath similarity index 100% rename from android/service/engine_test/.classpath rename to platforms/android/service/engine_test/.classpath diff --git a/android/service/engine_test/.project b/platforms/android/service/engine_test/.project similarity index 100% rename from android/service/engine_test/.project rename to platforms/android/service/engine_test/.project diff --git a/android/service/engine_test/AndroidManifest.xml b/platforms/android/service/engine_test/AndroidManifest.xml similarity index 100% rename from android/service/engine_test/AndroidManifest.xml rename to platforms/android/service/engine_test/AndroidManifest.xml diff --git a/android/service/engine_test/build.xml b/platforms/android/service/engine_test/build.xml similarity index 100% rename from android/service/engine_test/build.xml rename to platforms/android/service/engine_test/build.xml diff --git a/android/service/engine_test/project.properties b/platforms/android/service/engine_test/project.properties similarity index 100% rename from android/service/engine_test/project.properties rename to platforms/android/service/engine_test/project.properties diff --git a/android/service/engine_test/res/drawable-hdpi/ic_launcher.png b/platforms/android/service/engine_test/res/drawable-hdpi/ic_launcher.png similarity index 100% rename from android/service/engine_test/res/drawable-hdpi/ic_launcher.png rename to platforms/android/service/engine_test/res/drawable-hdpi/ic_launcher.png diff --git a/android/service/engine_test/res/drawable-ldpi/ic_launcher.png b/platforms/android/service/engine_test/res/drawable-ldpi/ic_launcher.png similarity index 100% rename from android/service/engine_test/res/drawable-ldpi/ic_launcher.png rename to platforms/android/service/engine_test/res/drawable-ldpi/ic_launcher.png diff --git a/android/service/engine_test/res/drawable-mdpi/ic_launcher.png b/platforms/android/service/engine_test/res/drawable-mdpi/ic_launcher.png similarity index 100% rename from android/service/engine_test/res/drawable-mdpi/ic_launcher.png rename to platforms/android/service/engine_test/res/drawable-mdpi/ic_launcher.png diff --git a/android/service/engine_test/res/layout/main.xml b/platforms/android/service/engine_test/res/layout/main.xml similarity index 100% rename from android/service/engine_test/res/layout/main.xml rename to platforms/android/service/engine_test/res/layout/main.xml diff --git a/android/service/engine_test/res/values/strings.xml b/platforms/android/service/engine_test/res/values/strings.xml similarity index 100% rename from android/service/engine_test/res/values/strings.xml rename to platforms/android/service/engine_test/res/values/strings.xml diff --git a/android/service/engine_test/src/org/opencv/engine/test/EngineInterfaceTest.java b/platforms/android/service/engine_test/src/org/opencv/engine/test/EngineInterfaceTest.java similarity index 100% rename from android/service/engine_test/src/org/opencv/engine/test/EngineInterfaceTest.java rename to platforms/android/service/engine_test/src/org/opencv/engine/test/EngineInterfaceTest.java diff --git a/android/service/push_native.py b/platforms/android/service/push_native.py similarity index 100% rename from android/service/push_native.py rename to platforms/android/service/push_native.py diff --git a/android/service/readme.txt b/platforms/android/service/readme.txt similarity index 100% rename from android/service/readme.txt rename to platforms/android/service/readme.txt diff --git a/android/service/test_native.py b/platforms/android/service/test_native.py similarity index 99% rename from android/service/test_native.py rename to platforms/android/service/test_native.py index 9a39032b18..328b9a8a51 100755 --- a/android/service/test_native.py +++ b/platforms/android/service/test_native.py @@ -34,4 +34,3 @@ if (__name__ == "__main__"): os.system("adb %s shell mkdir -p \"%s\"" % (DEVICE_STR, DEVICE_LOG_PATH)) RunTestApp("OpenCVEngineTestApp") - diff --git a/ios/Info.plist.in b/platforms/ios/Info.plist.in similarity index 93% rename from ios/Info.plist.in rename to platforms/ios/Info.plist.in index 89ef38625d..6bcfe862d0 100644 --- a/ios/Info.plist.in +++ b/platforms/ios/Info.plist.in @@ -5,7 +5,7 @@ CFBundleName OpenCV CFBundleIdentifier - com.itseez.opencv + org.opencv CFBundleVersion ${VERSION} CFBundleShortVersionString diff --git a/ios/build_framework.py b/platforms/ios/build_framework.py similarity index 95% rename from ios/build_framework.py rename to platforms/ios/build_framework.py index ceef4b71d7..bc385bb1bb 100755 --- a/ios/build_framework.py +++ b/platforms/ios/build_framework.py @@ -38,7 +38,7 @@ def build_opencv(srcroot, buildroot, target, arch): # for some reason, if you do not specify CMAKE_BUILD_TYPE, it puts libs to "RELEASE" rather than "Release" cmakeargs = ("-GXcode " + "-DCMAKE_BUILD_TYPE=Release " + - "-DCMAKE_TOOLCHAIN_FILE=%s/ios/cmake/Toolchains/Toolchain-%s_Xcode.cmake " + + "-DCMAKE_TOOLCHAIN_FILE=%s/platforms/ios/cmake/Toolchains/Toolchain-%s_Xcode.cmake " + "-DBUILD_opencv_world=ON " + "-DCMAKE_INSTALL_PREFIX=install") % (srcroot, target) # if cmake cache exists, just rerun cmake to update OpenCV.xproj if necessary @@ -92,16 +92,13 @@ def put_framework_together(srcroot, dstroot): os.system("lipo -create " + wlist + " -o " + dstdir + "/opencv2") # form Info.plist - srcfile = open(srcroot + "/ios/Info.plist.in", "rt") + srcfile = open(srcroot + "/platforms/ios/Info.plist.in", "rt") dstfile = open(dstdir + "/Resources/Info.plist", "wt") for l in srcfile.readlines(): dstfile.write(l.replace("${VERSION}", opencv_version)) srcfile.close() dstfile.close() - # copy cascades - # TODO ... - # make symbolic links os.symlink("A", "Versions/Current") os.symlink("Versions/Current/Headers", "Headers") @@ -125,4 +122,4 @@ if __name__ == "__main__": print "Usage:\n\t./build_framework.py \n\n" sys.exit(0) - build_framework(os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), "..")), os.path.abspath(sys.argv[1])) \ No newline at end of file + build_framework(os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), "../..")), os.path.abspath(sys.argv[1])) \ No newline at end of file diff --git a/ios/cmake/Modules/Platform/iOS.cmake b/platforms/ios/cmake/Modules/Platform/iOS.cmake similarity index 100% rename from ios/cmake/Modules/Platform/iOS.cmake rename to platforms/ios/cmake/Modules/Platform/iOS.cmake diff --git a/ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake b/platforms/ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake similarity index 84% rename from ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake rename to platforms/ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake index 67343253bd..6493deb459 100644 --- a/ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake +++ b/platforms/ios/cmake/Toolchains/Toolchain-iPhoneOS_Xcode.cmake @@ -4,12 +4,12 @@ set (IPHONEOS TRUE) # Standard settings set (CMAKE_SYSTEM_NAME iOS) # Include extra modules for the iOS platform files -set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/ios/cmake/Modules") +set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/platforms/ios/cmake/Modules") -# Force the compilers to gcc for iOS +# Force the compilers to clang for iOS include (CMakeForceCompiler) -#CMAKE_FORCE_C_COMPILER (gcc gcc) -#CMAKE_FORCE_CXX_COMPILER (g++ g++) +#CMAKE_FORCE_C_COMPILER (clang GNU) +#CMAKE_FORCE_CXX_COMPILER (clang++ GNU) set (CMAKE_C_SIZEOF_DATA_PTR 4) set (CMAKE_C_HAS_ISYSROOT 1) diff --git a/ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake b/platforms/ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake similarity index 85% rename from ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake rename to platforms/ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake index 7ef8113edb..0056c8dbd4 100644 --- a/ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake +++ b/platforms/ios/cmake/Toolchains/Toolchain-iPhoneSimulator_Xcode.cmake @@ -4,12 +4,12 @@ set (IPHONESIMULATOR TRUE) # Standard settings set (CMAKE_SYSTEM_NAME iOS) # Include extra modules for the iOS platform files -set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/ios/cmake/Modules") +set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/platforms/ios/cmake/Modules") -# Force the compilers to gcc for iOS +# Force the compilers to clang for iOS include (CMakeForceCompiler) -#CMAKE_FORCE_C_COMPILER (gcc gcc) -#CMAKE_FORCE_CXX_COMPILER (g++ g++) +#CMAKE_FORCE_C_COMPILER (clang GNU) +#CMAKE_FORCE_CXX_COMPILER (clang++ GNU) set (CMAKE_C_SIZEOF_DATA_PTR 4) set (CMAKE_C_HAS_ISYSROOT 1) diff --git a/platforms/ios/readme.txt b/platforms/ios/readme.txt new file mode 100644 index 0000000000..8f1f206b03 --- /dev/null +++ b/platforms/ios/readme.txt @@ -0,0 +1,7 @@ +Building OpenCV from Source, using CMake and Command Line +========================================================= + +cd ~/ +python opencv/platforms/ios/build_framework.py ios + +If everything's fine, a few minutes later you will get ~//ios/opencv2.framework. You can add this framework to your Xcode projects. \ No newline at end of file diff --git a/platforms/linux/scripts/cmake_arm_gnueabi_hardfp.sh b/platforms/linux/scripts/cmake_arm_gnueabi_hardfp.sh deleted file mode 100755 index f8df7859c3..0000000000 --- a/platforms/linux/scripts/cmake_arm_gnueabi_hardfp.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -cd `dirname $0`/.. - -mkdir -p build_hardfp -cd build_hardfp - -cmake -DCMAKE_TOOLCHAIN_FILE=../arm-gnueabi.toolchain.cmake $@ ../../.. - diff --git a/platforms/linux/scripts/cmake_arm_gnueabi_softfp.sh b/platforms/linux/scripts/cmake_arm_gnueabi_softfp.sh deleted file mode 100755 index f4210fa829..0000000000 --- a/platforms/linux/scripts/cmake_arm_gnueabi_softfp.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -cd `dirname $0`/.. - -mkdir -p build_softfp -cd build_softfp - -cmake -DSOFTFP=ON -DCMAKE_TOOLCHAIN_FILE=../arm-gnueabi.toolchain.cmake $@ ../../.. - diff --git a/platforms/readme.txt b/platforms/readme.txt index 7e1c4555c5..dfe0461422 100644 --- a/platforms/readme.txt +++ b/platforms/readme.txt @@ -1 +1,3 @@ -This folder contains toolchains and additional files that are needed for cross compitation. \ No newline at end of file +This folder contains toolchains and additional files that are needed for cross compilation. +For more information see introduction tutorials for target platform in documentation: +http://docs.opencv.org/doc/tutorials/introduction/table_of_content_introduction/table_of_content_introduction.html#table-of-content-introduction \ No newline at end of file diff --git a/android/scripts/ABI_compat_generator.py b/platforms/scripts/ABI_compat_generator.py similarity index 98% rename from android/scripts/ABI_compat_generator.py rename to platforms/scripts/ABI_compat_generator.py index b492a70fe4..fdabf00611 100755 --- a/android/scripts/ABI_compat_generator.py +++ b/platforms/scripts/ABI_compat_generator.py @@ -6,9 +6,7 @@ import os architecture = 'armeabi' -excludedHeaders = set(['hdf5.h', 'cap_ios.h', - 'eigen.hpp', 'cxeigen.hpp' #TOREMOVE - ]) +excludedHeaders = set(['hdf5.h', 'cap_ios.h', 'eigen.hpp', 'cxeigen.hpp']) #TOREMOVE systemIncludes = ['sources/cxx-stl/gnu-libstdc++/4.6/include', \ '/opt/android-ndk-r8c/platforms/android-8/arch-arm', # TODO: check if this one could be passed as command line arg 'sources/cxx-stl/gnu-libstdc++/4.6/libs/armeabi-v7a/include'] @@ -113,7 +111,7 @@ def FindHeaders(): if f == m: moduleHeaders += GetHeaderFiles(os.path.join(cppHeadersFolder, f)) if m == 'flann': - flann = os.path.join(cppHeadersFolder, f, 'flann.hpp') + flann = os.path.join(cppHeadersFolder, f, 'flann.hpp') moduleHeaders.remove(flann) moduleHeaders.insert(0, flann) cppHeaders += moduleHeaders diff --git a/android/scripts/camera_build.conf b/platforms/scripts/camera_build.conf similarity index 100% rename from android/scripts/camera_build.conf rename to platforms/scripts/camera_build.conf diff --git a/android/scripts/cmake_android_all_cameras.py b/platforms/scripts/cmake_android_all_cameras.py similarity index 90% rename from android/scripts/cmake_android_all_cameras.py rename to platforms/scripts/cmake_android_all_cameras.py index afcab63a75..c160df0fa0 100755 --- a/android/scripts/cmake_android_all_cameras.py +++ b/platforms/scripts/cmake_android_all_cameras.py @@ -49,7 +49,7 @@ for s in ConfFile.readlines(): os.chdir(BuildDir) BuildLog = os.path.join(BuildDir, "build.log") - CmakeCmdLine = "cmake -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake -DANDROID_SOURCE_TREE=\"%s\" -DANDROID_NATIVE_API_LEVEL=\"%s\" -DANDROID_ABI=\"%s\" -DANDROID_STL=stlport_static ../../ > \"%s\" 2>&1" % (AndroidTreeRoot, NativeApiLevel, Arch, BuildLog) + CmakeCmdLine = "cmake -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake -DANDROID_SOURCE_TREE=\"%s\" -DANDROID_NATIVE_API_LEVEL=\"%s\" -DANDROID_ABI=\"%s\" -DANDROID_STL=stlport_static ../.. > \"%s\" 2>&1" % (AndroidTreeRoot, NativeApiLevel, Arch, BuildLog) MakeCmdLine = "make %s >> \"%s\" 2>&1" % (MakeTarget, BuildLog); #print(CmakeCmdLine) os.system(CmakeCmdLine) diff --git a/android/scripts/cmake_android.sh b/platforms/scripts/cmake_android_arm.sh similarity index 50% rename from android/scripts/cmake_android.sh rename to platforms/scripts/cmake_android_arm.sh index 101ba3cee8..84c88a8159 100755 --- a/android/scripts/cmake_android.sh +++ b/platforms/scripts/cmake_android_arm.sh @@ -1,8 +1,7 @@ #!/bin/sh cd `dirname $0`/.. -mkdir -p build -cd build - -cmake -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_TOOLCHAIN_FILE=../android.toolchain.cmake $@ ../.. +mkdir -p build_android_arm +cd build_android_arm +cmake -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../.. diff --git a/platforms/scripts/cmake_android_mips.sh b/platforms/scripts/cmake_android_mips.sh new file mode 100755 index 0000000000..6bc7944b6d --- /dev/null +++ b/platforms/scripts/cmake_android_mips.sh @@ -0,0 +1,7 @@ +#!/bin/sh +cd `dirname $0`/.. + +mkdir -p build_android_mips +cd build_android_mips + +cmake -DANDROID_ABI=mips -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../.. diff --git a/platforms/scripts/cmake_android_service.sh b/platforms/scripts/cmake_android_service.sh new file mode 100755 index 0000000000..7ba8865b2a --- /dev/null +++ b/platforms/scripts/cmake_android_service.sh @@ -0,0 +1,7 @@ +#!/bin/sh +cd `dirname $0`/.. + +mkdir -p build_android_service +cd build_android_service + +cmake -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake -DANDROID_TOOLCHAIN_NAME="arm-linux-androideabi-4.4.3" -DANDROID_STL=stlport_static -DANDROID_STL_FORCE_FEATURES=OFF -DBUILD_ANDROID_SERVICE=ON -DANDROID_SOURCE_TREE=~/Projects/AndroidSource/ServiceStub/ $@ ../.. diff --git a/platforms/scripts/cmake_android_x86.sh b/platforms/scripts/cmake_android_x86.sh new file mode 100755 index 0000000000..8fb8abda7e --- /dev/null +++ b/platforms/scripts/cmake_android_x86.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +cd `dirname $0`/.. + +mkdir -p build_android_x86 +cd build_android_x86 + +cmake -DANDROID_ABI=x86 -DCMAKE_TOOLCHAIN_FILE=../android/android.toolchain.cmake $@ ../.. diff --git a/platforms/scripts/cmake_arm_gnueabi_hardfp.sh b/platforms/scripts/cmake_arm_gnueabi_hardfp.sh new file mode 100755 index 0000000000..1fce4f9dc1 --- /dev/null +++ b/platforms/scripts/cmake_arm_gnueabi_hardfp.sh @@ -0,0 +1,7 @@ +#!/bin/sh +cd `dirname $0`/.. + +mkdir -p build_linux_arm_hardfp +cd build_linux_arm_hardfp + +cmake -DCMAKE_TOOLCHAIN_FILE=../linux/arm-gnueabi.toolchain.cmake $@ ../.. diff --git a/platforms/scripts/cmake_arm_gnueabi_softfp.sh b/platforms/scripts/cmake_arm_gnueabi_softfp.sh new file mode 100755 index 0000000000..734348907c --- /dev/null +++ b/platforms/scripts/cmake_arm_gnueabi_softfp.sh @@ -0,0 +1,7 @@ +#!/bin/sh +cd `dirname $0`/.. + +mkdir -p build_linux_arm_softfp +cd build_linux_arm_softfp + +cmake -DSOFTFP=ON -DCMAKE_TOOLCHAIN_FILE=../linux/arm-gnueabi.toolchain.cmake $@ ../.. diff --git a/platforms/linux/scripts/cmake_carma.sh b/platforms/scripts/cmake_carma.sh similarity index 100% rename from platforms/linux/scripts/cmake_carma.sh rename to platforms/scripts/cmake_carma.sh diff --git a/platforms/scripts/cmake_winrt.cmd b/platforms/scripts/cmake_winrt.cmd new file mode 100644 index 0000000000..df70e856c5 --- /dev/null +++ b/platforms/scripts/cmake_winrt.cmd @@ -0,0 +1,6 @@ +mkdir build_winrt_arm +cd build_winrt_arm + +rem call "C:\Program Files\Microsoft Visual Studio 11.0\VC\bin\x86_arm\vcvarsx86_arm.bat" + +cmake.exe -GNinja -DWITH_TBB=ON -DBUILD_TBB=ON -DCMAKE_BUILD_TYPE=Release -DWITH_FFMPEG=OFF -DBUILD_opencv_gpu=OFF -DBUILD_opencv_python=OFF -DCMAKE_TOOLCHAIN_FILE=..\winrt\arm.winrt.toolchain.cmake ..\.. diff --git a/platforms/winrt/scripts/cmake_winrt.cmd b/platforms/winrt/scripts/cmake_winrt.cmd deleted file mode 100644 index aafed7d09d..0000000000 --- a/platforms/winrt/scripts/cmake_winrt.cmd +++ /dev/null @@ -1,6 +0,0 @@ -mkdir build -cd build - -rem call "C:\Program Files\Microsoft Visual Studio 11.0\VC\bin\x86_arm\vcvarsx86_arm.bat" - -cmake.exe -GNinja -DCMAKE_BUILD_TYPE=Release -DWITH_FFMPEG=OFF -DBUILD_opencv_gpu=OFF -DBUILD_opencv_python=OFF -DCMAKE_TOOLCHAIN_FILE=..\..\winrt\arm.winrt.toolchain.cmake ..\..\.. diff --git a/samples/android/face-detection/src/org/opencv/samples/facedetect/FdActivity.java b/samples/android/face-detection/src/org/opencv/samples/facedetect/FdActivity.java index 23727739e4..b06b2cc1c5 100644 --- a/samples/android/face-detection/src/org/opencv/samples/facedetect/FdActivity.java +++ b/samples/android/face-detection/src/org/opencv/samples/facedetect/FdActivity.java @@ -215,9 +215,9 @@ public class FdActivity extends Activity implements CvCameraViewListener2 { else if (item == mItemFace20) setMinFaceSize(0.2f); else if (item == mItemType) { - mDetectorType = (mDetectorType + 1) % mDetectorName.length; - item.setTitle(mDetectorName[mDetectorType]); - setDetectorType(mDetectorType); + int tmpDetectorType = (mDetectorType + 1) % mDetectorName.length; + item.setTitle(mDetectorName[tmpDetectorType]); + setDetectorType(tmpDetectorType); } return true; } diff --git a/samples/android/native-activity/.cproject b/samples/android/native-activity/.cproject index 09687f3ac0..44aadfe9af 100644 --- a/samples/android/native-activity/.cproject +++ b/samples/android/native-activity/.cproject @@ -1,75 +1,61 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/samples/android/native-activity/.project b/samples/android/native-activity/.project index cf0823c0b3..c20be83f60 100644 --- a/samples/android/native-activity/.project +++ b/samples/android/native-activity/.project @@ -5,6 +5,64 @@ + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + auto,full,incremental, + + + ?name? + + + + org.eclipse.cdt.make.core.append_environment + true + + + org.eclipse.cdt.make.core.autoBuildTarget + + + + org.eclipse.cdt.make.core.buildArguments + + + + org.eclipse.cdt.make.core.buildCommand + "${NDKROOT}/ndk-build.cmd" + + + org.eclipse.cdt.make.core.cleanBuildTarget + clean + + + org.eclipse.cdt.make.core.contents + org.eclipse.cdt.make.core.activeConfigSettings + + + org.eclipse.cdt.make.core.enableAutoBuild + true + + + org.eclipse.cdt.make.core.enableCleanBuild + false + + + org.eclipse.cdt.make.core.enableFullBuild + true + + + org.eclipse.cdt.make.core.fullBuildTarget + + + + org.eclipse.cdt.make.core.stopOnError + true + + + org.eclipse.cdt.make.core.useDefaultBuildCmd + false + + + com.android.ide.eclipse.adt.ResourceManagerBuilder @@ -25,9 +83,19 @@ + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + com.android.ide.eclipse.adt.AndroidNature org.eclipse.jdt.core.javanature + org.eclipse.cdt.core.cnature + org.eclipse.cdt.core.ccnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature diff --git a/samples/android/native-activity/jni/native.cpp b/samples/android/native-activity/jni/native.cpp index 66bc006db1..5cfb3a9611 100644 --- a/samples/android/native-activity/jni/native.cpp +++ b/samples/android/native-activity/jni/native.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -60,7 +59,7 @@ static cv::Size calc_optimal_camera_resolution(const char* supported, int width, } } - idx++; // to skip coma symbol + idx++; // to skip comma symbol } while(supported[idx-1] != '\0'); @@ -86,9 +85,9 @@ static void engine_draw_frame(Engine* engine, const cv::Mat& frame) for (int yy = top_indent; yy < std::min(frame.rows+top_indent, buffer.height); yy++) { - unsigned char* line = (unsigned char*)pixels; - memcpy(line+left_indent*4*sizeof(unsigned char), frame.ptr(yy), - std::min(frame.cols, buffer.width)*4*sizeof(unsigned char)); + unsigned char* line = (unsigned char*)pixels + left_indent*4*sizeof(unsigned char); + size_t line_size = std::min(frame.cols, buffer.width)*4*sizeof(unsigned char); + memcpy(line, frame.ptr(yy), line_size); // go to next line pixels = (int32_t*)pixels + buffer.stride; } @@ -139,7 +138,7 @@ static void engine_handle_cmd(android_app* app, int32_t cmd) return; } - LOGI("Camera initialized at resoution %dx%d", camera_resolution.width, camera_resolution.height); + LOGI("Camera initialized at resolution %dx%d", camera_resolution.width, camera_resolution.height); } break; case APP_CMD_TERM_WINDOW: @@ -157,7 +156,8 @@ void android_main(android_app* app) // Make sure glue isn't stripped. app_dummy(); - memset(&engine, 0, sizeof(engine)); + size_t engine_size = sizeof(engine); // for Eclipse CDT parser + memset((void*)&engine, 0, engine_size); app->userData = &engine; app->onAppCmd = engine_handle_cmd; engine.app = app; diff --git a/samples/cpp/latentsvm_multidetect.cpp b/samples/cpp/latentsvm_multidetect.cpp index d2105122ab..619c54b849 100644 --- a/samples/cpp/latentsvm_multidetect.cpp +++ b/samples/cpp/latentsvm_multidetect.cpp @@ -3,7 +3,7 @@ #include "opencv2/highgui/highgui.hpp" #include "opencv2/contrib/contrib.hpp" -#ifdef WIN32 +#if defined(WIN32) || defined(_WIN32) #include #else #include @@ -67,7 +67,7 @@ static void readDirectory( const string& directoryName, vector& filename { filenames.clear(); -#ifdef WIN32 +#if defined(WIN32) | defined(_WIN32) struct _finddata_t s_file; string str = directoryName + "\\*.*"; diff --git a/samples/cpp/tutorial_code/features2D/SURF_FlannMatcher.cpp b/samples/cpp/tutorial_code/features2D/SURF_FlannMatcher.cpp index f4cde9b2ee..ead7fd7182 100644 --- a/samples/cpp/tutorial_code/features2D/SURF_FlannMatcher.cpp +++ b/samples/cpp/tutorial_code/features2D/SURF_FlannMatcher.cpp @@ -70,7 +70,7 @@ int main( int argc, char** argv ) std::vector< DMatch > good_matches; for( int i = 0; i < descriptors_1.rows; i++ ) - { if( matches[i].distance < 2*min_dist ) + { if( matches[i].distance <= 2*min_dist ) { good_matches.push_back( matches[i]); } } diff --git a/samples/gpu/bgfg_segm.cpp b/samples/gpu/bgfg_segm.cpp index a77d336a9e..6963e75ff8 100644 --- a/samples/gpu/bgfg_segm.cpp +++ b/samples/gpu/bgfg_segm.cpp @@ -1,15 +1,10 @@ #include #include -#include "opencv2/opencv_modules.hpp" #include "opencv2/core/core.hpp" #include "opencv2/gpu/gpu.hpp" #include "opencv2/highgui/highgui.hpp" -#ifdef HAVE_OPENCV_NONFREE -#include "opencv2/nonfree/gpu.hpp" -#endif - using namespace std; using namespace cv; using namespace cv::gpu; @@ -19,9 +14,6 @@ enum Method FGD_STAT, MOG, MOG2, -#ifdef HAVE_OPENCV_NONFREE - VIBE, -#endif GMG }; @@ -30,7 +22,7 @@ int main(int argc, const char** argv) cv::CommandLineParser cmd(argc, argv, "{ c | camera | false | use camera }" "{ f | file | 768x576.avi | input video file }" - "{ m | method | mog | method (fgd, mog, mog2, vibe, gmg) }" + "{ m | method | mog | method (fgd, mog, mog2, gmg) }" "{ h | help | false | print help message }"); if (cmd.get("help")) @@ -48,9 +40,6 @@ int main(int argc, const char** argv) if (method != "fgd" && method != "mog" && method != "mog2" - #ifdef HAVE_OPENCV_NONFREE - && method != "vibe" - #endif && method != "gmg") { cerr << "Incorrect method" << endl; @@ -60,9 +49,6 @@ int main(int argc, const char** argv) Method m = method == "fgd" ? FGD_STAT : method == "mog" ? MOG : method == "mog2" ? MOG2 : - #ifdef HAVE_OPENCV_NONFREE - method == "vibe" ? VIBE : - #endif GMG; VideoCapture cap; @@ -86,9 +72,6 @@ int main(int argc, const char** argv) FGDStatModel fgd_stat; MOG_GPU mog; MOG2_GPU mog2; -#ifdef HAVE_OPENCV_NONFREE - VIBE_GPU vibe; -#endif GMG_GPU gmg; gmg.numInitializationFrames = 40; @@ -114,12 +97,6 @@ int main(int argc, const char** argv) mog2(d_frame, d_fgmask); break; -#ifdef HAVE_OPENCV_NONFREE - case VIBE: - vibe.initialize(d_frame); - break; -#endif - case GMG: gmg.initialize(d_frame.size()); break; @@ -128,11 +105,7 @@ int main(int argc, const char** argv) namedWindow("image", WINDOW_NORMAL); namedWindow("foreground mask", WINDOW_NORMAL); namedWindow("foreground image", WINDOW_NORMAL); - if (m != GMG - #ifdef HAVE_OPENCV_NONFREE - && m != VIBE - #endif - ) + if (m != GMG) { namedWindow("mean background image", WINDOW_NORMAL); } @@ -165,12 +138,6 @@ int main(int argc, const char** argv) mog2.getBackgroundImage(d_bgimg); break; -#ifdef HAVE_OPENCV_NONFREE - case VIBE: - vibe(d_frame, d_fgmask); - break; -#endif - case GMG: gmg(d_frame, d_fgmask); break; diff --git a/samples/gpu/cascadeclassifier_nvidia_api.cpp b/samples/gpu/cascadeclassifier_nvidia_api.cpp index 99c95ab977..98195b35c2 100644 --- a/samples/gpu/cascadeclassifier_nvidia_api.cpp +++ b/samples/gpu/cascadeclassifier_nvidia_api.cpp @@ -17,12 +17,21 @@ using namespace std; using namespace cv; -#if !defined(HAVE_CUDA) +#if !defined(HAVE_CUDA) || defined(__arm__) + int main( int, const char** ) { - cout << "Please compile the library with CUDA support" << endl; - return -1; +#if !defined(HAVE_CUDA) + std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true)." << std::endl; +#endif + +#if defined(__arm__) + std::cout << "Unsupported for ARM CUDA library." << std::endl; +#endif + + return 0; } + #else diff --git a/samples/gpu/driver_api_multi.cpp b/samples/gpu/driver_api_multi.cpp index 2d743f0e9c..c829830e72 100644 --- a/samples/gpu/driver_api_multi.cpp +++ b/samples/gpu/driver_api_multi.cpp @@ -11,7 +11,7 @@ #include "opencv2/core/core.hpp" #include "opencv2/gpu/gpu.hpp" -#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) +#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__) int main() { @@ -23,6 +23,10 @@ int main() std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n"; #endif +#if defined(__arm__) + std::cout << "Unsupported for ARM CUDA library." << std::endl; +#endif + return 0; } diff --git a/samples/gpu/driver_api_stereo_multi.cpp b/samples/gpu/driver_api_stereo_multi.cpp index 10c3974771..d4d0af451c 100644 --- a/samples/gpu/driver_api_stereo_multi.cpp +++ b/samples/gpu/driver_api_stereo_multi.cpp @@ -13,7 +13,7 @@ #include "opencv2/highgui/highgui.hpp" #include "opencv2/gpu/gpu.hpp" -#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) +#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__) int main() { @@ -25,6 +25,10 @@ int main() std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n"; #endif +#if defined(__arm__) + std::cout << "Unsupported for ARM CUDA library." << std::endl; +#endif + return 0; } diff --git a/samples/ocl/aloe-L.png b/samples/ocl/aloe-L.png deleted file mode 100644 index 47587668e2..0000000000 Binary files a/samples/ocl/aloe-L.png and /dev/null differ diff --git a/samples/ocl/aloe-R.png b/samples/ocl/aloe-R.png deleted file mode 100644 index 5d11c57a9e..0000000000 Binary files a/samples/ocl/aloe-R.png and /dev/null differ diff --git a/samples/ocl/aloe-disp.png b/samples/ocl/aloe-disp.png deleted file mode 100644 index dd4a499bed..0000000000 Binary files a/samples/ocl/aloe-disp.png and /dev/null differ diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp index ec79339518..a49610aeb7 100644 --- a/samples/ocl/facedetect.cpp +++ b/samples/ocl/facedetect.cpp @@ -1,5 +1,3 @@ -//This sample is inherited from facedetect.cpp in smaple/c - #include "opencv2/objdetect/objdetect.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" @@ -9,78 +7,97 @@ using namespace std; using namespace cv; +#define LOOP_NUM 10 -static void help() +const static Scalar colors[] = { CV_RGB(0,0,255), + CV_RGB(0,128,255), + CV_RGB(0,255,255), + CV_RGB(0,255,0), + CV_RGB(255,128,0), + CV_RGB(255,255,0), + CV_RGB(255,0,0), + CV_RGB(255,0,255) + } ; + + +int64 work_begin = 0; +int64 work_end = 0; +string outputName; + +static void workBegin() { - cout << "\nThis program demonstrates the cascade recognizer.\n" - "This classifier can recognize many ~rigid objects, it's most known use is for faces.\n" - "Usage:\n" - "./facedetect [--cascade= this is the primary trained classifier such as frontal face]\n" - " [--scale=\n" - " [filename|camera_index]\n\n" - "see facedetect.cmd for one call:\n" - "./facedetect --cascade=\"../../data/haarcascades/haarcascade_frontalface_alt.xml\" --scale=1.3 \n" - "Hit any key to quit.\n" - "Using OpenCV version " << CV_VERSION << "\n" << endl; + work_begin = getTickCount(); +} +static void workEnd() +{ + work_end += (getTickCount() - work_begin); +} +static double getTime() +{ + return work_end /((double)cvGetTickFrequency() * 1000.); } -struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } }; -void detectAndDraw( Mat& img, - cv::ocl::OclCascadeClassifier& cascade, CascadeClassifier& nestedCascade, - double scale); -String cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml"; + +void detect( Mat& img, vector& faces, + ocl::OclCascadeClassifierBuf& cascade, + double scale, bool calTime); + + +void detectCPU( Mat& img, vector& faces, + CascadeClassifier& cascade, + double scale, bool calTime); + + +void Draw(Mat& img, vector& faces, double scale); + + +// This function test if gpu_rst matches cpu_rst. +// If the two vectors are not equal, it will return the difference in vector size +// Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) +double checkRectSimilarity(Size sz, vector& cpu_rst, vector& gpu_rst); + int main( int argc, const char** argv ) { + const char* keys = + "{ h | help | false | print help message }" + "{ i | input | | specify input image }" + "{ t | template | haarcascade_frontalface_alt.xml |" + " specify template file path }" + "{ c | scale | 1.0 | scale image }" + "{ s | use_cpu | false | use cpu or gpu to process the image }" + "{ o | output | facedetect_output.jpg |" + " specify output image save path(only works when input is images) }"; + + CommandLineParser cmd(argc, argv, keys); + if (cmd.get("help")) + { + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; + } CvCapture* capture = 0; Mat frame, frameCopy, image; - const String scaleOpt = "--scale="; - size_t scaleOptLen = scaleOpt.length(); - const String cascadeOpt = "--cascade="; - size_t cascadeOptLen = cascadeOpt.length(); - String inputName; - help(); - cv::ocl::OclCascadeClassifier cascade; - CascadeClassifier nestedCascade; - double scale = 1; + bool useCPU = cmd.get("s"); + string inputName = cmd.get("i"); + outputName = cmd.get("o"); + string cascadeName = cmd.get("t"); + double scale = cmd.get("c"); + ocl::OclCascadeClassifierBuf cascade; + CascadeClassifier cpu_cascade; - for( int i = 1; i < argc; i++ ) - { - cout << "Processing " << i << " " << argv[i] << endl; - if( cascadeOpt.compare( 0, cascadeOptLen, argv[i], cascadeOptLen ) == 0 ) - { - cascadeName.assign( argv[i] + cascadeOptLen ); - cout << " from which we have cascadeName= " << cascadeName << endl; - } - else if( scaleOpt.compare( 0, scaleOptLen, argv[i], scaleOptLen ) == 0 ) - { - if( !sscanf( argv[i] + scaleOpt.length(), "%lf", &scale ) || scale < 1 ) - scale = 1; - cout << " from which we read scale = " << scale << endl; - } - else if( argv[i][0] == '-' ) - { - cerr << "WARNING: Unknown option %s" << argv[i] << endl; - } - else - inputName.assign( argv[i] ); - } - - if( !cascade.load( cascadeName ) ) + if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) ) { cerr << "ERROR: Could not load classifier cascade" << endl; - cerr << "Usage: facedetect [--cascade=]\n" - " [--scale[=\n" - " [filename|camera_index]\n" << endl ; return -1; } - if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') ) + if( inputName.empty() ) { - capture = cvCaptureFromCAM( inputName.empty() ? 0 : inputName.c_str()[0] - '0' ); - int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0' ; - if(!capture) cout << "Capture from CAM " << c << " didn't work" << endl; + capture = cvCaptureFromCAM(0); + if(!capture) + cout << "Capture from CAM 0 didn't work" << endl; } else if( inputName.size() ) { @@ -88,26 +105,31 @@ int main( int argc, const char** argv ) if( image.empty() ) { capture = cvCaptureFromAVI( inputName.c_str() ); - if(!capture) cout << "Capture from AVI didn't work" << endl; + if(!capture) + cout << "Capture from AVI didn't work" << endl; + return -1; } } else { image = imread( "lena.jpg", 1 ); - if(image.empty()) cout << "Couldn't read lena.jpg" << endl; + if(image.empty()) + cout << "Couldn't read lena.jpg" << endl; + return -1; } + cvNamedWindow( "result", 1 ); - std::vector oclinfo; - int devnums = cv::ocl::getDevice(oclinfo); - if(devnums<1) + vector oclinfo; + int devnums = ocl::getDevice(oclinfo); + if( devnums < 1 ) { std::cout << "no device found\n"; return -1; } //if you want to use undefault device, set it here //setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); + ocl::setBinpath("./"); if( capture ) { cout << "In capture ..." << endl; @@ -115,108 +137,113 @@ int main( int argc, const char** argv ) { IplImage* iplImg = cvQueryFrame( capture ); frame = iplImg; + vector faces; if( frame.empty() ) break; if( iplImg->origin == IPL_ORIGIN_TL ) frame.copyTo( frameCopy ); else flip( frame, frameCopy, 0 ); - - detectAndDraw( frameCopy, cascade, nestedCascade, scale ); - + if(useCPU) + { + detectCPU(frameCopy, faces, cpu_cascade, scale, false); + } + else + { + detect(frameCopy, faces, cascade, scale, false); + } + Draw(frameCopy, faces, scale); if( waitKey( 10 ) >= 0 ) goto _cleanup_; } + waitKey(0); + _cleanup_: cvReleaseCapture( &capture ); } else { cout << "In image read" << endl; - if( !image.empty() ) + vector faces; + vector ref_rst; + double accuracy = 0.; + for(int i = 0; i <= LOOP_NUM; i ++) { - detectAndDraw( image, cascade, nestedCascade, scale ); - waitKey(0); - } - else if( !inputName.empty() ) - { - /* assume it is a text file containing the - list of the image filenames to be processed - one per line */ - FILE* f = fopen( inputName.c_str(), "rt" ); - if( f ) + cout << "loop" << i << endl; + if(useCPU) { - char buf[1000+1]; - while( fgets( buf, 1000, f ) ) + detectCPU(image, faces, cpu_cascade, scale, i==0?false:true); + } + else + { + detect(image, faces, cascade, scale, i==0?false:true); + if(i == 0) { - int len = (int)strlen(buf), c; - while( len > 0 && isspace(buf[len-1]) ) - len--; - buf[len] = '\0'; - cout << "file " << buf << endl; - image = imread( buf, 1 ); - if( !image.empty() ) - { - detectAndDraw( image, cascade, nestedCascade, scale ); - c = waitKey(0); - if( c == 27 || c == 'q' || c == 'Q' ) - break; - } - else - { - cerr << "Aw snap, couldn't read image " << buf << endl; - } + detectCPU(image, ref_rst, cpu_cascade, scale, false); + accuracy = checkRectSimilarity(image.size(), ref_rst, faces); } - fclose(f); + } + if (i == LOOP_NUM) + { + if (useCPU) + cout << "average CPU time (noCamera) : "; + else + cout << "average GPU time (noCamera) : "; + cout << getTime() / LOOP_NUM << " ms" << endl; + cout << "accuracy value: " << accuracy <& faces, + ocl::OclCascadeClassifierBuf& cascade, + double scale, bool calTime) +{ + ocl::oclMat image(img); + ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); + if(calTime) workBegin(); + ocl::cvtColor( image, gray, CV_BGR2GRAY ); + ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); + ocl::equalizeHist( smallImg, smallImg ); + + cascade.detectMultiScale( smallImg, faces, 1.1, + 3, 0 + |CV_HAAR_SCALE_IMAGE + , Size(30,30), Size(0, 0) ); + if(calTime) workEnd(); +} + + +void detectCPU( Mat& img, vector& faces, + CascadeClassifier& cascade, + double scale, bool calTime) +{ + if(calTime) workBegin(); + Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); + cvtColor(img, cpu_gray, CV_BGR2GRAY); + resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR); + equalizeHist(cpu_smallImg, cpu_smallImg); + cascade.detectMultiScale(cpu_smallImg, faces, 1.1, + 3, 0 | CV_HAAR_SCALE_IMAGE, + Size(30, 30), Size(0, 0)); + if(calTime) workEnd(); +} + + +void Draw(Mat& img, vector& faces, double scale) { int i = 0; - double t = 0; - vector faces; - const static Scalar colors[] = { CV_RGB(0,0,255), - CV_RGB(0,128,255), - CV_RGB(0,255,255), - CV_RGB(0,255,0), - CV_RGB(255,128,0), - CV_RGB(255,255,0), - CV_RGB(255,0,0), - CV_RGB(255,0,255)} ; - cv::ocl::oclMat image(img); - cv::ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); - - cv::ocl::cvtColor( image, gray, CV_BGR2GRAY ); - cv::ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); - cv::ocl::equalizeHist( smallImg, smallImg ); - - CvSeq* _objects; - MemStorage storage(cvCreateMemStorage(0)); - t = (double)cvGetTickCount(); - _objects = cascade.oclHaarDetectObjects( smallImg, storage, 1.1, - 3, 0 - |CV_HAAR_SCALE_IMAGE - , Size(30,30), Size(0, 0) ); - vector vecAvgComp; - Seq(_objects).copyTo(vecAvgComp); - faces.resize(vecAvgComp.size()); - std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect()); - t = (double)cvGetTickCount() - t; - printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) ); for( vector::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) { - Mat smallImgROI; Point center; Scalar color = colors[i%8]; int radius; @@ -225,5 +252,53 @@ void detectAndDraw( Mat& img, radius = cvRound((r->width + r->height)*0.25*scale); circle( img, center, radius, color, 3, 8, 0 ); } - cv::imshow( "result", img ); + imshow( "result", img ); + imwrite( outputName, img ); +} + + +double checkRectSimilarity(Size sz, vector& ob1, vector& ob2) +{ + double final_test_result = 0.0; + size_t sz1 = ob1.size(); + size_t sz2 = ob2.size(); + + if(sz1 != sz2) + { + return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1); + } + else + { + if(sz1==0 && sz2==0) + return 0; + Mat cpu_result(sz, CV_8UC1); + cpu_result.setTo(0); + + for(vector::const_iterator r = ob1.begin(); r != ob1.end(); r++) + { + Mat cpu_result_roi(cpu_result, *r); + cpu_result_roi.setTo(1); + cpu_result.copyTo(cpu_result); + } + int cpu_area = countNonZero(cpu_result > 0); + + + Mat gpu_result(sz, CV_8UC1); + gpu_result.setTo(0); + for(vector::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++) + { + cv::Mat gpu_result_roi(gpu_result, *r2); + gpu_result_roi.setTo(1); + gpu_result.copyTo(gpu_result); + } + + Mat result_; + multiply(cpu_result, gpu_result, result_); + int result = countNonZero(result_ > 0); + if(cpu_area!=0 && result!=0) + final_test_result = 1.0 - (double)result/(double)cpu_area; + else if(cpu_area==0 && result!=0) + final_test_result = -1; + } + return final_test_result; } diff --git a/samples/ocl/hog.cpp b/samples/ocl/hog.cpp index 76b6d2830e..ff53e010cf 100644 --- a/samples/ocl/hog.cpp +++ b/samples/ocl/hog.cpp @@ -10,69 +10,39 @@ using namespace std; using namespace cv; -bool help_showed = false; - -class Args -{ -public: - Args(); - static Args read(int argc, char** argv); - - string src; - bool src_is_video; - bool src_is_camera; - int camera_id; - - bool write_video; - string dst_video; - double dst_video_fps; - - bool make_gray; - - bool resize_src; - int width, height; - - double scale; - int nlevels; - int gr_threshold; - - double hit_threshold; - bool hit_threshold_auto; - - int win_width; - int win_stride_width, win_stride_height; - - bool gamma_corr; -}; - - class App { public: - App(const Args& s); + App(CommandLineParser& cmd); void run(); - void handleKey(char key); - void hogWorkBegin(); void hogWorkEnd(); string hogWorkFps() const; - void workBegin(); void workEnd(); string workFps() const; - string message() const; + +// This function test if gpu_rst matches cpu_rst. +// If the two vectors are not equal, it will return the difference in vector size +// Else if will return +// (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) + double checkRectSimilarity(Size sz, + std::vector& cpu_rst, + std::vector& gpu_rst); private: App operator=(App&); - Args args; + //Args args; bool running; - bool use_gpu; bool make_gray; double scale; + double resize_scale; + int win_width; + int win_stride_width, win_stride_height; int gr_threshold; int nlevels; double hit_threshold; @@ -80,119 +50,49 @@ private: int64 hog_work_begin; double hog_work_fps; - int64 work_begin; double work_fps; -}; -static void printHelp() -{ - cout << "Histogram of Oriented Gradients descriptor and detector sample.\n" - << "\nUsage: hog_gpu\n" - << " (|--video |--camera ) # frames source\n" - << " [--make_gray ] # convert image to gray one or not\n" - << " [--resize_src ] # do resize of the source image or not\n" - << " [--width ] # resized image width\n" - << " [--height ] # resized image height\n" - << " [--hit_threshold ] # classifying plane distance threshold (0.0 usually)\n" - << " [--scale ] # HOG window scale factor\n" - << " [--nlevels ] # max number of HOG window scales\n" - << " [--win_width ] # width of the window (48 or 64)\n" - << " [--win_stride_width ] # distance by OX axis between neighbour wins\n" - << " [--win_stride_height ] # distance by OY axis between neighbour wins\n" - << " [--gr_threshold ] # merging similar rects constant\n" - << " [--gamma_correct ] # do gamma correction or not\n" - << " [--write_video ] # write video or not\n" - << " [--dst_video ] # output video path\n" - << " [--dst_video_fps ] # output video fps\n"; - help_showed = true; -} + string img_source; + string vdo_source; + string output; + int camera_id; +}; int main(int argc, char** argv) { + const char* keys = + "{ h | help | false | print help message }" + "{ i | input | | specify input image}" + "{ c | camera | -1 | enable camera capturing }" + "{ v | video | | use video as input }" + "{ g | gray | false | convert image to gray one or not}" + "{ s | scale | 1.0 | resize the image before detect}" + "{ l |larger_win| false | use 64x128 window}" + "{ o | output | | specify output path when input is images}"; + CommandLineParser cmd(argc, argv, keys); + App app(cmd); try { - if (argc < 2) - printHelp(); - Args args = Args::read(argc, argv); - if (help_showed) - return -1; - App app(args); app.run(); } - catch (const Exception& e) { return cout << "error: " << e.what() << endl, 1; } - catch (const exception& e) { return cout << "error: " << e.what() << endl, 1; } - catch(...) { return cout << "unknown exception" << endl, 1; } + catch (const Exception& e) + { + return cout << "error: " << e.what() << endl, 1; + } + catch (const exception& e) + { + return cout << "error: " << e.what() << endl, 1; + } + catch(...) + { + return cout << "unknown exception" << endl, 1; + } return 0; } - -Args::Args() +App::App(CommandLineParser& cmd) { - src_is_video = false; - src_is_camera = false; - camera_id = 0; - - write_video = false; - dst_video_fps = 24.; - - make_gray = false; - - resize_src = false; - width = 640; - height = 480; - - scale = 1.05; - nlevels = 13; - gr_threshold = 8; - hit_threshold = 1.4; - hit_threshold_auto = true; - - win_width = 48; - win_stride_width = 8; - win_stride_height = 8; - - gamma_corr = true; -} - - -Args Args::read(int argc, char** argv) -{ - Args args; - for (int i = 1; i < argc; i++) - { - if (string(argv[i]) == "--make_gray") args.make_gray = (string(argv[++i]) == "true"); - else if (string(argv[i]) == "--resize_src") args.resize_src = (string(argv[++i]) == "true"); - else if (string(argv[i]) == "--width") args.width = atoi(argv[++i]); - else if (string(argv[i]) == "--height") args.height = atoi(argv[++i]); - else if (string(argv[i]) == "--hit_threshold") - { - args.hit_threshold = atof(argv[++i]); - args.hit_threshold_auto = false; - } - else if (string(argv[i]) == "--scale") args.scale = atof(argv[++i]); - else if (string(argv[i]) == "--nlevels") args.nlevels = atoi(argv[++i]); - else if (string(argv[i]) == "--win_width") args.win_width = atoi(argv[++i]); - else if (string(argv[i]) == "--win_stride_width") args.win_stride_width = atoi(argv[++i]); - else if (string(argv[i]) == "--win_stride_height") args.win_stride_height = atoi(argv[++i]); - else if (string(argv[i]) == "--gr_threshold") args.gr_threshold = atoi(argv[++i]); - else if (string(argv[i]) == "--gamma_correct") args.gamma_corr = (string(argv[++i]) == "true"); - else if (string(argv[i]) == "--write_video") args.write_video = (string(argv[++i]) == "true"); - else if (string(argv[i]) == "--dst_video") args.dst_video = argv[++i]; - else if (string(argv[i]) == "--dst_video_fps") args.dst_video_fps = atof(argv[++i]); - else if (string(argv[i]) == "--help") printHelp(); - else if (string(argv[i]) == "--video") { args.src = argv[++i]; args.src_is_video = true; } - else if (string(argv[i]) == "--camera") { args.camera_id = atoi(argv[++i]); args.src_is_camera = true; } - else if (args.src.empty()) args.src = argv[i]; - else throw runtime_error((string("unknown key: ") + argv[i])); - } - return args; -} - - -App::App(const Args& s) -{ - args = s; cout << "\nControls:\n" << "\tESC - exit\n" << "\tm - change mode GPU <-> CPU\n" @@ -203,56 +103,56 @@ App::App(const Args& s) << "\t4/r - increase/decrease hit threshold\n" << endl; + use_gpu = true; - make_gray = args.make_gray; - scale = args.scale; - gr_threshold = args.gr_threshold; - nlevels = args.nlevels; + make_gray = cmd.get("g"); + resize_scale = cmd.get("s"); + win_width = cmd.get("l") == true ? 64 : 48; + vdo_source = cmd.get("v"); + img_source = cmd.get("i"); + output = cmd.get("o"); + camera_id = cmd.get("c"); - if (args.hit_threshold_auto) - args.hit_threshold = args.win_width == 48 ? 1.4 : 0.; - hit_threshold = args.hit_threshold; + win_stride_width = 8; + win_stride_height = 8; + gr_threshold = 8; + nlevels = 13; + hit_threshold = win_width == 48 ? 1.4 : 0.; + scale = 1.05; + gamma_corr = true; - gamma_corr = args.gamma_corr; - - if (args.win_width != 64 && args.win_width != 48) - args.win_width = 64; - - cout << "Scale: " << scale << endl; - if (args.resize_src) - cout << "Resized source: (" << args.width << ", " << args.height << ")\n"; cout << "Group threshold: " << gr_threshold << endl; cout << "Levels number: " << nlevels << endl; - cout << "Win width: " << args.win_width << endl; - cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n"; + cout << "Win width: " << win_width << endl; + cout << "Win stride: (" << win_stride_width << ", " << win_stride_height << ")\n"; cout << "Hit threshold: " << hit_threshold << endl; cout << "Gamma correction: " << gamma_corr << endl; cout << endl; } - void App::run() { - std::vector oclinfo; + vector oclinfo; ocl::getDevice(oclinfo); running = true; - cv::VideoWriter video_writer; + VideoWriter video_writer; - Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96) - Size win_stride(args.win_stride_width, args.win_stride_height); + Size win_size(win_width, win_width * 2); + Size win_stride(win_stride_width, win_stride_height); // Create HOG descriptors and detectors here vector detector; if (win_size == Size(64, 128)) - detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128(); + detector = ocl::HOGDescriptor::getPeopleDetector64x128(); else - detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96(); + detector = ocl::HOGDescriptor::getPeopleDetector48x96(); - cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, - cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, - cv::ocl::HOGDescriptor::DEFAULT_NLEVELS); - cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, - HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS); + + ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, + ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, + ocl::HOGDescriptor::DEFAULT_NLEVELS); + HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, + HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS); gpu_hog.setSVMDetector(detector); cpu_hog.setSVMDetector(detector); @@ -261,35 +161,36 @@ void App::run() VideoCapture vc; Mat frame; - if (args.src_is_video) + if (vdo_source!="") { - vc.open(args.src.c_str()); + vc.open(vdo_source.c_str()); if (!vc.isOpened()) - throw runtime_error(string("can't open video file: " + args.src)); + throw runtime_error(string("can't open video file: " + vdo_source)); vc >> frame; } - else if (args.src_is_camera) + else if (camera_id != -1) { - vc.open(args.camera_id); + vc.open(camera_id); if (!vc.isOpened()) { stringstream msg; - msg << "can't open camera: " << args.camera_id; + msg << "can't open camera: " << camera_id; throw runtime_error(msg.str()); } vc >> frame; } else { - frame = imread(args.src); + frame = imread(img_source); if (frame.empty()) - throw runtime_error(string("can't open image file: " + args.src)); + throw runtime_error(string("can't open image file: " + img_source)); } Mat img_aux, img, img_to_show; ocl::oclMat gpu_img; // Iterate over all frames + bool verify = false; while (running && !frame.empty()) { workBegin(); @@ -300,13 +201,15 @@ void App::run() else frame.copyTo(img_aux); // Resize image - if (args.resize_src) resize(img_aux, img, Size(args.width, args.height)); + if (abs(scale-1.0)>0.001) + { + Size sz((int)((double)img_aux.cols/resize_scale), (int)((double)img_aux.rows/resize_scale)); + resize(img_aux, img, sz); + } else img = img_aux; img_to_show = img; - gpu_hog.nlevels = nlevels; cpu_hog.nlevels = nlevels; - vector found; // Perform HOG classification @@ -316,11 +219,23 @@ void App::run() gpu_img.upload(img); gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride, Size(0, 0), scale, gr_threshold); + if (!verify) + { + // verify if GPU output same objects with CPU at 1st run + verify = true; + vector ref_rst; + cvtColor(img, img, CV_BGRA2BGR); + cpu_hog.detectMultiScale(img, ref_rst, hit_threshold, win_stride, + Size(0, 0), scale, gr_threshold-2); + double accuracy = checkRectSimilarity(img.size(), ref_rst, found); + cout << "\naccuracy value: " << accuracy << endl; + } } else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride, - Size(0, 0), scale, gr_threshold); + Size(0, 0), scale, gr_threshold); hogWorkEnd(); + // Draw positive classified windows for (size_t i = 0; i < found.size(); i++) { @@ -335,25 +250,31 @@ void App::run() putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); imshow("opencv_gpu_hog", img_to_show); - - if (args.src_is_video || args.src_is_camera) vc >> frame; + if (vdo_source!="" || camera_id!=-1) vc >> frame; workEnd(); - if (args.write_video) + if (output!="") { - if (!video_writer.isOpened()) + if (img_source!="") // wirte image { - video_writer.open(args.dst_video, CV_FOURCC('x','v','i','d'), args.dst_video_fps, - img_to_show.size(), true); - if (!video_writer.isOpened()) - throw std::runtime_error("can't create video writer"); + imwrite(output, img_to_show); } + else //write video + { + if (!video_writer.isOpened()) + { + video_writer.open(output, CV_FOURCC('x','v','i','d'), 24, + img_to_show.size(), true); + if (!video_writer.isOpened()) + throw std::runtime_error("can't create video writer"); + } - if (make_gray) cvtColor(img_to_show, img, CV_GRAY2BGR); - else cvtColor(img_to_show, img, CV_BGRA2BGR); + if (make_gray) cvtColor(img_to_show, img, CV_GRAY2BGR); + else cvtColor(img_to_show, img, CV_BGRA2BGR); - video_writer << img; + video_writer << img; + } } handleKey((char)waitKey(3)); @@ -361,7 +282,6 @@ void App::run() } } - void App::handleKey(char key) { switch (key) @@ -424,7 +344,10 @@ void App::handleKey(char key) } -inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); } +inline void App::hogWorkBegin() +{ + hog_work_begin = getTickCount(); +} inline void App::hogWorkEnd() { @@ -440,8 +363,10 @@ inline string App::hogWorkFps() const return ss.str(); } - -inline void App::workBegin() { work_begin = getTickCount(); } +inline void App::workBegin() +{ + work_begin = getTickCount(); +} inline void App::workEnd() { @@ -457,3 +382,53 @@ inline string App::workFps() const return ss.str(); } + +double App::checkRectSimilarity(Size sz, + std::vector& ob1, + std::vector& ob2) +{ + double final_test_result = 0.0; + size_t sz1 = ob1.size(); + size_t sz2 = ob2.size(); + + if(sz1 != sz2) + { + return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1); + } + else + { + if(sz1==0 && sz2==0) + return 0; + cv::Mat cpu_result(sz, CV_8UC1); + cpu_result.setTo(0); + + + for(vector::const_iterator r = ob1.begin(); r != ob1.end(); r++) + { + cv::Mat cpu_result_roi(cpu_result, *r); + cpu_result_roi.setTo(1); + cpu_result.copyTo(cpu_result); + } + int cpu_area = cv::countNonZero(cpu_result > 0); + + + cv::Mat gpu_result(sz, CV_8UC1); + gpu_result.setTo(0); + for(vector::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++) + { + cv::Mat gpu_result_roi(gpu_result, *r2); + gpu_result_roi.setTo(1); + gpu_result.copyTo(gpu_result); + } + + cv::Mat result_; + multiply(cpu_result, gpu_result, result_); + int result = cv::countNonZero(result_ > 0); + if(cpu_area!=0 && result!=0) + final_test_result = 1.0 - (double)result/(double)cpu_area; + else if(cpu_area==0 && result!=0) + final_test_result = -1; + } + return final_test_result; +} + diff --git a/samples/ocl/pyrlk_optical_flow.cpp b/samples/ocl/pyrlk_optical_flow.cpp new file mode 100644 index 0000000000..cefa928670 --- /dev/null +++ b/samples/ocl/pyrlk_optical_flow.cpp @@ -0,0 +1,275 @@ +#include +#include +#include + +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/ocl/ocl.hpp" +#include "opencv2/video/video.hpp" + +using namespace std; +using namespace cv; +using namespace cv::ocl; + +typedef unsigned char uchar; +#define LOOP_NUM 10 +int64 work_begin = 0; +int64 work_end = 0; + +static void workBegin() +{ + work_begin = getTickCount(); +} +static void workEnd() +{ + work_end += (getTickCount() - work_begin); +} +static double getTime() +{ + return work_end * 1000. / getTickFrequency(); +} + +static void download(const oclMat& d_mat, vector& vec) +{ + vec.clear(); + vec.resize(d_mat.cols); + Mat mat(1, d_mat.cols, CV_32FC2, (void*)&vec[0]); + d_mat.download(mat); +} + +static void download(const oclMat& d_mat, vector& vec) +{ + vec.clear(); + vec.resize(d_mat.cols); + Mat mat(1, d_mat.cols, CV_8UC1, (void*)&vec[0]); + d_mat.download(mat); +} + +static void drawArrows(Mat& frame, const vector& prevPts, const vector& nextPts, const vector& status, Scalar line_color = Scalar(0, 0, 255)) +{ + for (size_t i = 0; i < prevPts.size(); ++i) + { + if (status[i]) + { + int line_thickness = 1; + + Point p = prevPts[i]; + Point q = nextPts[i]; + + double angle = atan2((double) p.y - q.y, (double) p.x - q.x); + + double hypotenuse = sqrt( (double)(p.y - q.y)*(p.y - q.y) + (double)(p.x - q.x)*(p.x - q.x) ); + + if (hypotenuse < 1.0) + continue; + + // Here we lengthen the arrow by a factor of three. + q.x = (int) (p.x - 3 * hypotenuse * cos(angle)); + q.y = (int) (p.y - 3 * hypotenuse * sin(angle)); + + // Now we draw the main line of the arrow. + line(frame, p, q, line_color, line_thickness); + + // Now draw the tips of the arrow. I do some scaling so that the + // tips look proportional to the main line of the arrow. + + p.x = (int) (q.x + 9 * cos(angle + CV_PI / 4)); + p.y = (int) (q.y + 9 * sin(angle + CV_PI / 4)); + line(frame, p, q, line_color, line_thickness); + + p.x = (int) (q.x + 9 * cos(angle - CV_PI / 4)); + p.y = (int) (q.y + 9 * sin(angle - CV_PI / 4)); + line(frame, p, q, line_color, line_thickness); + } + } +} + + +int main(int argc, const char* argv[]) +{ + static std::vector ocl_info; + ocl::getDevice(ocl_info); + //if you want to use undefault device, set it here + setDevice(ocl_info[0]); + + //set this to save kernel compile time from second time you run + ocl::setBinpath("./"); + const char* keys = + "{ h | help | false | print help message }" + "{ l | left | | specify left image }" + "{ r | right | | specify right image }" + "{ c | camera | 0 | specify camera id }" + "{ s | use_cpu | false | use cpu or gpu to process the image }" + "{ v | video | | use video as input }" + "{ o | output | pyrlk_output.jpg| specify output save path when input is images }" + "{ p | points | 1000 | specify points count [GoodFeatureToTrack] }" + "{ m | min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }"; + + CommandLineParser cmd(argc, argv, keys); + + if (cmd.get("help")) + { + cout << "Usage: pyrlk_optical_flow [options]" << endl; + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; + } + + bool defaultPicturesFail = false; + string fname0 = cmd.get("l"); + string fname1 = cmd.get("r"); + string vdofile = cmd.get("v"); + string outfile = cmd.get("o"); + int points = cmd.get("p"); + double minDist = cmd.get("m"); + bool useCPU = cmd.get("s"); + int inputName = cmd.get("c"); + + oclMat d_nextPts, d_status; + GoodFeaturesToTrackDetector_OCL d_features(points); + Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE); + Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE); + PyrLKOpticalFlow d_pyrLK; + vector pts(points); + vector nextPts(points); + vector status(points); + vector err; + + cout << "Points count : " << points << endl << endl; + + if (frame0.empty() || frame1.empty()) + { + CvCapture* capture = 0; + Mat frame, frameCopy; + Mat frame0Gray, frame1Gray; + Mat ptr0, ptr1; + + if(vdofile == "") + capture = cvCaptureFromCAM( inputName ); + else + capture = cvCreateFileCapture(vdofile.c_str()); + + int c = inputName ; + if(!capture) + { + if(vdofile == "") + cout << "Capture from CAM " << c << " didn't work" << endl; + else + cout << "Capture from file " << vdofile << " failed" <= 0 ) + goto _cleanup_; + } + + waitKey(0); + +_cleanup_: + cvReleaseCapture( &capture ); + } + else + { +nocamera: + for(int i = 0; i <= LOOP_NUM; i ++) + { + cout << "loop" << i << endl; + if (i > 0) workBegin(); + + if (useCPU) + { + goodFeaturesToTrack(frame0, pts, points, 0.01, minDist); + calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); + } + else + { + oclMat d_img(frame0), d_prevPts; + d_features(d_img, d_prevPts); + d_pyrLK.sparse(d_img, oclMat(frame1), d_prevPts, d_nextPts, d_status); + d_features.downloadPoints(d_prevPts, pts); + download(d_nextPts, nextPts); + download(d_status, status); + } + + if (i > 0 && i <= LOOP_NUM) + workEnd(); + + if (i == LOOP_NUM) + { + if (useCPU) + cout << "average CPU time (noCamera) : "; + else + cout << "average GPU time (noCamera) : "; + + cout << getTime() / LOOP_NUM << " ms" << endl; + + drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0)); + imshow("PyrLK [Sparse]", frame0); + imwrite(outfile, frame0); + } + } + } + + waitKey(); + + return 0; +} diff --git a/samples/ocl/squares.cpp b/samples/ocl/squares.cpp index 6b184161f7..48964ffb2e 100644 --- a/samples/ocl/squares.cpp +++ b/samples/ocl/squares.cpp @@ -6,7 +6,6 @@ #include "opencv2/imgproc/imgproc.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/ocl/ocl.hpp" - #include #include #include @@ -14,23 +13,50 @@ using namespace cv; using namespace std; -static void help() -{ - cout << - "\nA program using OCL module pyramid scaling, Canny, dilate functions, threshold, split; cpu contours, contour simpification and\n" - "memory storage (it's got it all folks) to find\n" - "squares in a list of images pic1-6.png\n" - "Returns sequence of squares detected on the image.\n" - "the sequence is stored in the specified memory storage\n" - "Call:\n" - "./squares\n" - "Using OpenCV version %s\n" << CV_VERSION << "\n" << endl; -} +#define ACCURACY_CHECK 1 +#if ACCURACY_CHECK +// check if two vectors of vector of points are near or not +// prior assumption is that they are in correct order +static bool checkPoints( + vector< vector > set1, + vector< vector > set2, + int maxDiff = 5) +{ + if(set1.size() != set2.size()) + { + return false; + } + + for(vector< vector >::iterator it1 = set1.begin(), it2 = set2.begin(); + it1 < set1.end() && it2 < set2.end(); it1 ++, it2 ++) + { + vector pts1 = *it1; + vector pts2 = *it2; + + + if(pts1.size() != pts2.size()) + { + return false; + } + for(size_t i = 0; i < pts1.size(); i ++) + { + Point pt1 = pts1[i], pt2 = pts2[i]; + if(std::abs(pt1.x - pt2.x) > maxDiff || + std::abs(pt1.y - pt2.y) > maxDiff) + { + return false; + } + } + } + return true; +} +#endif int thresh = 50, N = 11; const char* wndname = "OpenCL Square Detection Demo"; + // helper function: // finds a cosine of angle between vectors // from pt0->pt1 and from pt0->pt2 @@ -43,9 +69,92 @@ static double angle( Point pt1, Point pt2, Point pt0 ) return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10); } + // returns sequence of squares detected on the image. // the sequence is stored in the specified memory storage static void findSquares( const Mat& image, vector >& squares ) +{ + squares.clear(); + Mat pyr, timg, gray0(image.size(), CV_8U), gray; + + // down-scale and upscale the image to filter out the noise + pyrDown(image, pyr, Size(image.cols/2, image.rows/2)); + pyrUp(pyr, timg, image.size()); + vector > contours; + + // find squares in every color plane of the image + for( int c = 0; c < 3; c++ ) + { + int ch[] = {c, 0}; + mixChannels(&timg, 1, &gray0, 1, ch, 1); + + // try several threshold levels + for( int l = 0; l < N; l++ ) + { + // hack: use Canny instead of zero threshold level. + // Canny helps to catch squares with gradient shading + if( l == 0 ) + { + // apply Canny. Take the upper threshold from slider + // and set the lower to 0 (which forces edges merging) + Canny(gray0, gray, 0, thresh, 5); + // dilate canny output to remove potential + // holes between edge segments + dilate(gray, gray, Mat(), Point(-1,-1)); + } + else + { + // apply threshold if l!=0: + // tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0 + cv::threshold(gray0, gray, (l+1)*255/N, 255, THRESH_BINARY); + } + + // find contours and store them all as a list + findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE); + + vector approx; + + // test each contour + for( size_t i = 0; i < contours.size(); i++ ) + { + // approximate contour with accuracy proportional + // to the contour perimeter + approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true); + + // square contours should have 4 vertices after approximation + // relatively large area (to filter out noisy contours) + // and be convex. + // Note: absolute value of an area is used because + // area may be positive or negative - in accordance with the + // contour orientation + if( approx.size() == 4 && + fabs(contourArea(Mat(approx))) > 1000 && + isContourConvex(Mat(approx)) ) + { + double maxCosine = 0; + + for( int j = 2; j < 5; j++ ) + { + // find the maximum cosine of the angle between joint edges + double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1])); + maxCosine = MAX(maxCosine, cosine); + } + + // if cosines of all angles are small + // (all angles are ~90 degree) then write quandrange + // vertices to resultant sequence + if( maxCosine < 0.3 ) + squares.push_back(approx); + } + } + } + } +} + + +// returns sequence of squares detected on the image. +// the sequence is stored in the specified memory storage +static void findSquares_ocl( const Mat& image, vector >& squares ) { squares.clear(); @@ -91,7 +200,6 @@ static void findSquares( const Mat& image, vector >& squares ) findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE); vector approx; - // test each contour for( size_t i = 0; i < contours.size(); i++ ) { @@ -106,11 +214,10 @@ static void findSquares( const Mat& image, vector >& squares ) // area may be positive or negative - in accordance with the // contour orientation if( approx.size() == 4 && - fabs(contourArea(Mat(approx))) > 1000 && - isContourConvex(Mat(approx)) ) + fabs(contourArea(Mat(approx))) > 1000 && + isContourConvex(Mat(approx)) ) { double maxCosine = 0; - for( int j = 2; j < 5; j++ ) { // find the maximum cosine of the angle between joint edges @@ -139,40 +246,93 @@ static void drawSquares( Mat& image, const vector >& squares ) int n = (int)squares[i].size(); polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, CV_AA); } - - imshow(wndname, image); } -int main(int /*argc*/, char** /*argv*/) +// draw both pure-C++ and ocl square results onto a single image +static Mat drawSquaresBoth( const Mat& image, + const vector >& sqsCPP, + const vector >& sqsOCL +) { + Mat imgToShow(Size(image.cols * 2, image.rows), image.type()); + Mat lImg = imgToShow(Rect(Point(0, 0), image.size())); + Mat rImg = imgToShow(Rect(Point(image.cols, 0), image.size())); + image.copyTo(lImg); + image.copyTo(rImg); + drawSquares(lImg, sqsCPP); + drawSquares(rImg, sqsOCL); + float fontScale = 0.8f; + Scalar white = Scalar::all(255), black = Scalar::all(0); + + putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2); + putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2); + putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1); + putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1); + + return imgToShow; +} + + +int main(int argc, char** argv) +{ + const char* keys = + "{ i | input | | specify input image }" + "{ o | output | squares_output.jpg | specify output save path}"; + CommandLineParser cmd(argc, argv, keys); + string inputName = cmd.get("i"); + string outfile = cmd.get("o"); + if(inputName.empty()) + { + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; + } - //ocl::setBinpath("F:/kernel_bin"); vector info; CV_Assert(ocl::getDevice(info)); - - static const char* names[] = { "pic1.png", "pic2.png", "pic3.png", - "pic4.png", "pic5.png", "pic6.png", 0 }; - help(); + int iterations = 10; namedWindow( wndname, 1 ); - vector > squares; + vector > squares_cpu, squares_ocl; - for( int i = 0; names[i] != 0; i++ ) + Mat image = imread(inputName, 1); + if( image.empty() ) { - Mat image = imread(names[i], 1); - if( image.empty() ) - { - cout << "Couldn't load " << names[i] << endl; - continue; - } - - findSquares(image, squares); - drawSquares(image, squares); - - int c = waitKey(); - if( (char)c == 27 ) - break; + cout << "Couldn't load " << inputName << endl; + return -1; } + int j = iterations; + int64 t_ocl = 0, t_cpp = 0; + //warm-ups + cout << "warming up ..." << endl; + findSquares(image, squares_cpu); + findSquares_ocl(image, squares_ocl); + + +#if ACCURACY_CHECK + cout << "Checking ocl accuracy ... " << endl; + cout << (checkPoints(squares_cpu, squares_ocl) ? "Pass" : "Failed") << endl; +#endif + do + { + int64 t_start = cv::getTickCount(); + findSquares(image, squares_cpu); + t_cpp += cv::getTickCount() - t_start; + + + t_start = cv::getTickCount(); + findSquares_ocl(image, squares_ocl); + t_ocl += cv::getTickCount() - t_start; + cout << "run loop: " << j << endl; + } + while(--j); + cout << "cpp average time: " << 1000.0f * (double)t_cpp / getTickFrequency() / iterations << "ms" << endl; + cout << "ocl average time: " << 1000.0f * (double)t_ocl / getTickFrequency() / iterations << "ms" << endl; + + Mat result = drawSquaresBoth(image, squares_cpu, squares_ocl); + imshow(wndname, result); + imwrite(outfile, result); + cvWaitKey(0); return 0; } diff --git a/samples/ocl/stereo_match.cpp b/samples/ocl/stereo_match.cpp new file mode 100644 index 0000000000..abe75c70e1 --- /dev/null +++ b/samples/ocl/stereo_match.cpp @@ -0,0 +1,384 @@ +#include +#include +#include +#include +#include +#include "opencv2/ocl/ocl.hpp" +#include "opencv2/highgui/highgui.hpp" + +using namespace cv; +using namespace std; +using namespace ocl; + + +struct App +{ + App(CommandLineParser& cmd); + void run(); + void handleKey(char key); + void printParams() const; + + void workBegin() + { + work_begin = getTickCount(); + } + void workEnd() + { + int64 d = getTickCount() - work_begin; + double f = getTickFrequency(); + work_fps = f / d; + } + string method_str() const + { + switch (method) + { + case BM: + return "BM"; + case BP: + return "BP"; + case CSBP: + return "CSBP"; + } + return ""; + } + string text() const + { + stringstream ss; + ss << "(" << method_str() << ") FPS: " << setiosflags(ios::left) + << setprecision(4) << work_fps; + return ss.str(); + } +private: + bool running; + + Mat left_src, right_src; + Mat left, right; + oclMat d_left, d_right; + + StereoBM_OCL bm; + StereoBeliefPropagation bp; + StereoConstantSpaceBP csbp; + + int64 work_begin; + double work_fps; + + string l_img, r_img; + string out_img; + enum {BM, BP, CSBP} method; + int ndisp; // Max disparity + 1 + enum {GPU, CPU} type; +}; + +int main(int argc, char** argv) +{ + const char* keys = + "{ h | help | false | print help message }" + "{ l | left | | specify left image }" + "{ r | right | | specify right image }" + "{ m | method | BM | specify match method(BM/BP/CSBP) }" + "{ n | ndisp | 64 | specify number of disparity levels }" + "{ s | cpu_ocl | false | use cpu or gpu as ocl device to process the image }" + "{ o | output | stereo_match_output.jpg | specify output path when input is images}"; + CommandLineParser cmd(argc, argv, keys); + if (cmd.get("help")) + { + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; + } + try + { + App app(cmd); + int flag = CVCL_DEVICE_TYPE_GPU; + if(cmd.get("s") == true) + flag = CVCL_DEVICE_TYPE_CPU; + + vector info; + if(getDevice(info, flag) == 0) + { + throw runtime_error("Error: Did not find a valid OpenCL device!"); + } + cout << "Device name:" << info[0].DeviceName[0] << endl; + + app.run(); + } + catch (const exception& e) + { + cout << "error: " << e.what() << endl; + } + return 0; +} + +App::App(CommandLineParser& cmd) + : running(false),method(BM) +{ + cout << "stereo_match_ocl sample\n"; + cout << "\nControls:\n" + << "\tesc - exit\n" + << "\tp - print current parameters\n" + << "\tg - convert source images into gray\n" + << "\tm - change stereo match method\n" + << "\ts - change Sobel prefiltering flag (for BM only)\n" + << "\t1/q - increase/decrease maximum disparity\n" + << "\t2/w - increase/decrease window size (for BM only)\n" + << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n" + << "\t4/r - increase/decrease level count (for BP and CSBP only)\n"; + l_img = cmd.get("l"); + r_img = cmd.get("r"); + string mstr = cmd.get("m"); + if(mstr == "BM") method = BM; + else if(mstr == "BP") method = BP; + else if(mstr == "CSBP") method = CSBP; + else cout << "unknown method!\n"; + ndisp = cmd.get("n"); + out_img = cmd.get("o"); +} + + +void App::run() +{ + // Load images + left_src = imread(l_img); + right_src = imread(r_img); + if (left_src.empty()) throw runtime_error("can't open file \"" + l_img + "\""); + if (right_src.empty()) throw runtime_error("can't open file \"" + r_img + "\""); + + cvtColor(left_src, left, CV_BGR2GRAY); + cvtColor(right_src, right, CV_BGR2GRAY); + + d_left.upload(left); + d_right.upload(right); + + imshow("left", left); + imshow("right", right); + + // Set common parameters + bm.ndisp = ndisp; + bp.ndisp = ndisp; + csbp.ndisp = ndisp; + + cout << endl; + printParams(); + + running = true; + bool written = false; + while (running) + { + + // Prepare disparity map of specified type + Mat disp; + oclMat d_disp; + workBegin(); + switch (method) + { + case BM: + if (d_left.channels() > 1 || d_right.channels() > 1) + { + cout << "BM doesn't support color images\n"; + cvtColor(left_src, left, CV_BGR2GRAY); + cvtColor(right_src, right, CV_BGR2GRAY); + cout << "image_channels: " << left.channels() << endl; + d_left.upload(left); + d_right.upload(right); + imshow("left", left); + imshow("right", right); + } + bm(d_left, d_right, d_disp); + break; + case BP: + bp(d_left, d_right, d_disp); + break; + case CSBP: + csbp(d_left, d_right, d_disp); + break; + } + // Show results + d_disp.download(disp); + workEnd(); + if (method != BM) + { + disp.convertTo(disp, 0); + } + putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255)); + imshow("disparity", disp); + if(!written) + { + imwrite(out_img, disp); + written = true; + } + handleKey((char)waitKey(3)); + } +} + + +void App::printParams() const +{ + cout << "--- Parameters ---\n"; + cout << "image_size: (" << left.cols << ", " << left.rows << ")\n"; + cout << "image_channels: " << left.channels() << endl; + cout << "method: " << method_str() << endl + << "ndisp: " << ndisp << endl; + switch (method) + { + case BM: + cout << "win_size: " << bm.winSize << endl; + cout << "prefilter_sobel: " << bm.preset << endl; + break; + case BP: + cout << "iter_count: " << bp.iters << endl; + cout << "level_count: " << bp.levels << endl; + break; + case CSBP: + cout << "iter_count: " << csbp.iters << endl; + cout << "level_count: " << csbp.levels << endl; + break; + } + cout << endl; +} + + +void App::handleKey(char key) +{ + switch (key) + { + case 27: + running = false; + break; + case 'p': + case 'P': + printParams(); + break; + case 'g': + case 'G': + if (left.channels() == 1 && method != BM) + { + left = left_src; + right = right_src; + } + else + { + cvtColor(left_src, left, CV_BGR2GRAY); + cvtColor(right_src, right, CV_BGR2GRAY); + } + d_left.upload(left); + d_right.upload(right); + cout << "image_channels: " << left.channels() << endl; + imshow("left", left); + imshow("right", right); + break; + case 'm': + case 'M': + switch (method) + { + case BM: + method = BP; + break; + case BP: + method = CSBP; + break; + case CSBP: + method = BM; + break; + } + cout << "method: " << method_str() << endl; + break; + case 's': + case 'S': + if (method == BM) + { + switch (bm.preset) + { + case StereoBM_OCL::BASIC_PRESET: + bm.preset = StereoBM_OCL::PREFILTER_XSOBEL; + break; + case StereoBM_OCL::PREFILTER_XSOBEL: + bm.preset = StereoBM_OCL::BASIC_PRESET; + break; + } + cout << "prefilter_sobel: " << bm.preset << endl; + } + break; + case '1': + ndisp == 1 ? ndisp = 8 : ndisp += 8; + cout << "ndisp: " << ndisp << endl; + bm.ndisp = ndisp; + bp.ndisp = ndisp; + csbp.ndisp = ndisp; + break; + case 'q': + case 'Q': + ndisp = max(ndisp - 8, 1); + cout << "ndisp: " << ndisp << endl; + bm.ndisp = ndisp; + bp.ndisp = ndisp; + csbp.ndisp = ndisp; + break; + case '2': + if (method == BM) + { + bm.winSize = min(bm.winSize + 1, 51); + cout << "win_size: " << bm.winSize << endl; + } + break; + case 'w': + case 'W': + if (method == BM) + { + bm.winSize = max(bm.winSize - 1, 2); + cout << "win_size: " << bm.winSize << endl; + } + break; + case '3': + if (method == BP) + { + bp.iters += 1; + cout << "iter_count: " << bp.iters << endl; + } + else if (method == CSBP) + { + csbp.iters += 1; + cout << "iter_count: " << csbp.iters << endl; + } + break; + case 'e': + case 'E': + if (method == BP) + { + bp.iters = max(bp.iters - 1, 1); + cout << "iter_count: " << bp.iters << endl; + } + else if (method == CSBP) + { + csbp.iters = max(csbp.iters - 1, 1); + cout << "iter_count: " << csbp.iters << endl; + } + break; + case '4': + if (method == BP) + { + bp.levels += 1; + cout << "level_count: " << bp.levels << endl; + } + else if (method == CSBP) + { + csbp.levels += 1; + cout << "level_count: " << csbp.levels << endl; + } + break; + case 'r': + case 'R': + if (method == BP) + { + bp.levels = max(bp.levels - 1, 1); + cout << "level_count: " << bp.levels << endl; + } + else if (method == CSBP) + { + csbp.levels = max(csbp.levels - 1, 1); + cout << "level_count: " << csbp.levels << endl; + } + break; + } +} + + diff --git a/samples/ocl/surf_matcher.cpp b/samples/ocl/surf_matcher.cpp index ea6ee97cb2..bee517fbca 100644 --- a/samples/ocl/surf_matcher.cpp +++ b/samples/ocl/surf_matcher.cpp @@ -1,201 +1,94 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Peng Xiao, pengxiao@multicorewareinc.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - #include #include #include "opencv2/core/core.hpp" -#include "opencv2/features2d/features2d.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/ocl/ocl.hpp" -#include "opencv2/nonfree/nonfree.hpp" #include "opencv2/nonfree/ocl.hpp" #include "opencv2/calib3d/calib3d.hpp" +#include "opencv2/nonfree/nonfree.hpp" -using namespace std; using namespace cv; using namespace cv::ocl; -//#define USE_CPU_DESCRIPTOR // use cpu descriptor extractor until ocl descriptor extractor is fixed -//#define USE_CPU_BFMATCHER -void help(); +const int LOOP_NUM = 10; +const int GOOD_PTS_MAX = 50; +const float GOOD_PORTION = 0.15f; -void help() +namespace { - cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << endl; - cout << "\nUsage:\n\tsurf_matcher --left --right " << endl; + +int64 work_begin = 0; +int64 work_end = 0; + +void workBegin() +{ + work_begin = getTickCount(); +} +void workEnd() +{ + work_end = getTickCount() - work_begin; +} +double getTime() +{ + return work_end /((double)cvGetTickFrequency() * 1000.); } - -//////////////////////////////////////////////////// -// This program demonstrates the usage of SURF_OCL. -// use cpu findHomography interface to calculate the transformation matrix -int main(int argc, char* argv[]) +template +struct SURFDetector { - if (argc != 5 && argc != 1) + KPDetector surf; + SURFDetector(double hessian = 800.0) + :surf(hessian) { - help(); - return -1; } - vector info; - if(!cv::ocl::getDevice(info)) + template + void operator()(const T& in, const T& mask, vector& pts, T& descriptors, bool useProvided = false) { - cout << "Error: Did not find a valid OpenCL device!" << endl; - return -1; + surf(in, mask, pts, descriptors, useProvided); } - Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey; - oclMat img1, img2; - if(argc != 5) +}; + +template +struct SURFMatcher +{ + KPMatcher matcher; + template + void match(const T& in1, const T& in2, vector& matches) { - cpu_img1 = imread("o.png"); - cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY); - img1 = cpu_img1_grey; - CV_Assert(!img1.empty()); - - cpu_img2 = imread("r2.png"); - cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY); - img2 = cpu_img2_grey; - } - else - { - for (int i = 1; i < argc; ++i) - { - if (string(argv[i]) == "--left") - { - cpu_img1 = imread(argv[++i]); - cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY); - img1 = cpu_img1_grey; - CV_Assert(!img1.empty()); - } - else if (string(argv[i]) == "--right") - { - cpu_img2 = imread(argv[++i]); - cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY); - img2 = cpu_img2_grey; - } - else if (string(argv[i]) == "--help") - { - help(); - return -1; - } - } + matcher.match(in1, in2, matches); } +}; - SURF_OCL surf; - //surf.hessianThreshold = 400.f; - //surf.extended = false; - - // detecting keypoints & computing descriptors - oclMat keypoints1GPU, keypoints2GPU; - oclMat descriptors1GPU, descriptors2GPU; - - // downloading results - vector keypoints1, keypoints2; - vector matches; - - -#ifndef USE_CPU_DESCRIPTOR - surf(img1, oclMat(), keypoints1GPU, descriptors1GPU); - surf(img2, oclMat(), keypoints2GPU, descriptors2GPU); - - surf.downloadKeypoints(keypoints1GPU, keypoints1); - surf.downloadKeypoints(keypoints2GPU, keypoints2); - - -#ifdef USE_CPU_BFMATCHER - //BFMatcher - BFMatcher matcher(cv::NORM_L2); - matcher.match(Mat(descriptors1GPU), Mat(descriptors2GPU), matches); -#else - BruteForceMatcher_OCL_base matcher(BruteForceMatcher_OCL_base::L2Dist); - matcher.match(descriptors1GPU, descriptors2GPU, matches); -#endif - -#else - surf(img1, oclMat(), keypoints1GPU); - surf(img2, oclMat(), keypoints2GPU); - surf.downloadKeypoints(keypoints1GPU, keypoints1); - surf.downloadKeypoints(keypoints2GPU, keypoints2); - - // use SURF_OCL to detect keypoints and use SURF to extract descriptors - SURF surf_cpu; - Mat descriptors1, descriptors2; - surf_cpu(cpu_img1, Mat(), keypoints1, descriptors1, true); - surf_cpu(cpu_img2, Mat(), keypoints2, descriptors2, true); - matcher.match(descriptors1, descriptors2, matches); -#endif - cout << "OCL: FOUND " << keypoints1GPU.cols << " keypoints on first image" << endl; - cout << "OCL: FOUND " << keypoints2GPU.cols << " keypoints on second image" << endl; - - double max_dist = 0; double min_dist = 100; - //-- Quick calculation of max and min distances between keypoints - for( size_t i = 0; i < keypoints1.size(); i++ ) - { - double dist = matches[i].distance; - if( dist < min_dist ) min_dist = dist; - if( dist > max_dist ) max_dist = dist; - } - - printf("-- Max dist : %f \n", max_dist ); - printf("-- Min dist : %f \n", min_dist ); - - //-- Draw only "good" matches (i.e. whose distance is less than 2.5*min_dist ) +Mat drawGoodMatches( + const Mat& cpu_img1, + const Mat& cpu_img2, + const vector& keypoints1, + const vector& keypoints2, + vector& matches, + vector& scene_corners_ +) +{ + //-- Sort matches and preserve top 10% matches + std::sort(matches.begin(), matches.end()); std::vector< DMatch > good_matches; + double minDist = matches.front().distance, + maxDist = matches.back().distance; - for( size_t i = 0; i < keypoints1.size(); i++ ) + const int ptsPairs = std::min(GOOD_PTS_MAX, (int)(matches.size() * GOOD_PORTION)); + for( int i = 0; i < ptsPairs; i++ ) { - if( matches[i].distance < 3*min_dist ) - { - good_matches.push_back( matches[i]); - } + good_matches.push_back( matches[i] ); } + std::cout << "\nMax distance: " << maxDist << std::endl; + std::cout << "Min distance: " << minDist << std::endl; + + std::cout << "Calculating homography using " << ptsPairs << " point pairs." << std::endl; // drawing the results Mat img_matches; drawMatches( cpu_img1, keypoints1, cpu_img2, keypoints2, - good_matches, img_matches, Scalar::all(-1), Scalar::all(-1), - vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS ); + good_matches, img_matches, Scalar::all(-1), Scalar::all(-1), + vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS ); //-- Localize the object std::vector obj; @@ -207,26 +100,243 @@ int main(int argc, char* argv[]) obj.push_back( keypoints1[ good_matches[i].queryIdx ].pt ); scene.push_back( keypoints2[ good_matches[i].trainIdx ].pt ); } - Mat H = findHomography( obj, scene, CV_RANSAC ); - //-- Get the corners from the image_1 ( the object to be "detected" ) std::vector obj_corners(4); - obj_corners[0] = cvPoint(0,0); obj_corners[1] = cvPoint( cpu_img1.cols, 0 ); - obj_corners[2] = cvPoint( cpu_img1.cols, cpu_img1.rows ); obj_corners[3] = cvPoint( 0, cpu_img1.rows ); + obj_corners[0] = cvPoint(0,0); + obj_corners[1] = cvPoint( cpu_img1.cols, 0 ); + obj_corners[2] = cvPoint( cpu_img1.cols, cpu_img1.rows ); + obj_corners[3] = cvPoint( 0, cpu_img1.rows ); std::vector scene_corners(4); + Mat H = findHomography( obj, scene, CV_RANSAC ); perspectiveTransform( obj_corners, scene_corners, H); + scene_corners_ = scene_corners; + //-- Draw lines between the corners (the mapped object in the scene - image_2 ) - line( img_matches, scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); - line( img_matches, scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); - line( img_matches, scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); - line( img_matches, scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), Scalar( 0, 255, 0), 4 ); + line( img_matches, + scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + return img_matches; +} + +} +//////////////////////////////////////////////////// +// This program demonstrates the usage of SURF_OCL. +// use cpu findHomography interface to calculate the transformation matrix +int main(int argc, char* argv[]) +{ + const char* keys = + "{ h | help | false | print help message }" + "{ l | left | | specify left image }" + "{ r | right | | specify right image }" + "{ o | output | SURF_output.jpg | specify output save path (only works in CPU or GPU only mode) }" + "{ c | use_cpu | false | use CPU algorithms }" + "{ a | use_all | false | use both CPU and GPU algorithms}"; + CommandLineParser cmd(argc, argv, keys); + if (cmd.get("help")) + { + std::cout << "Avaible options:" << std::endl; + cmd.printParams(); + return 0; + } + + vector info; + if(cv::ocl::getDevice(info) == 0) + { + std::cout << "Error: Did not find a valid OpenCL device!" << std::endl; + return -1; + } + ocl::setDevice(info[0]); + + Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey; + oclMat img1, img2; + bool useCPU = cmd.get("c"); + bool useGPU = false; + bool useALL = cmd.get("a"); + + string outpath = cmd.get("o"); + + cpu_img1 = imread(cmd.get("l")); + CV_Assert(!cpu_img1.empty()); + cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY); + img1 = cpu_img1_grey; + + cpu_img2 = imread(cmd.get("r")); + CV_Assert(!cpu_img2.empty()); + cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY); + img2 = cpu_img2_grey; + + if(useALL) + { + useCPU = false; + useGPU = false; + } + else if(useCPU==false && useALL==false) + { + useGPU = true; + } + + if(!useCPU) + { + std::cout + << "Device name:" + << info[0].DeviceName[0] + << std::endl; + } + double surf_time = 0.; + + //declare input/output + vector keypoints1, keypoints2; + vector matches; + + vector gpu_keypoints1; + vector gpu_keypoints2; + vector gpu_matches; + + Mat descriptors1CPU, descriptors2CPU; + + oclMat keypoints1GPU, keypoints2GPU; + oclMat descriptors1GPU, descriptors2GPU; + + //instantiate detectors/matchers + SURFDetector cpp_surf; + SURFDetector ocl_surf; + + SURFMatcher cpp_matcher; + SURFMatcher ocl_matcher; + + //-- start of timing section + if (useCPU) + { + for (int i = 0; i <= LOOP_NUM; i++) + { + if(i == 1) workBegin(); + cpp_surf(cpu_img1_grey, Mat(), keypoints1, descriptors1CPU); + cpp_surf(cpu_img2_grey, Mat(), keypoints2, descriptors2CPU); + cpp_matcher.match(descriptors1CPU, descriptors2CPU, matches); + } + workEnd(); + std::cout << "CPP: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl; + std::cout << "CPP: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl; + + surf_time = getTime(); + std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n"; + } + else if(useGPU) + { + for (int i = 0; i <= LOOP_NUM; i++) + { + if(i == 1) workBegin(); + ocl_surf(img1, oclMat(), keypoints1, descriptors1GPU); + ocl_surf(img2, oclMat(), keypoints2, descriptors2GPU); + ocl_matcher.match(descriptors1GPU, descriptors2GPU, matches); + } + workEnd(); + std::cout << "OCL: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl; + std::cout << "OCL: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl; + + surf_time = getTime(); + std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n"; + } + else + { + //cpu runs + for (int i = 0; i <= LOOP_NUM; i++) + { + if(i == 1) workBegin(); + cpp_surf(cpu_img1_grey, Mat(), keypoints1, descriptors1CPU); + cpp_surf(cpu_img2_grey, Mat(), keypoints2, descriptors2CPU); + cpp_matcher.match(descriptors1CPU, descriptors2CPU, matches); + } + workEnd(); + std::cout << "\nCPP: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl; + std::cout << "CPP: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl; + + surf_time = getTime(); + std::cout << "(CPP)SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl; + + //gpu runs + for (int i = 0; i <= LOOP_NUM; i++) + { + if(i == 1) workBegin(); + ocl_surf(img1, oclMat(), gpu_keypoints1, descriptors1GPU); + ocl_surf(img2, oclMat(), gpu_keypoints2, descriptors2GPU); + ocl_matcher.match(descriptors1GPU, descriptors2GPU, gpu_matches); + } + workEnd(); + std::cout << "\nOCL: FOUND " << keypoints1.size() << " keypoints on first image" << std::endl; + std::cout << "OCL: FOUND " << keypoints2.size() << " keypoints on second image" << std::endl; + + surf_time = getTime(); + std::cout << "(OCL)SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n"; + + } + + //-------------------------------------------------------------------------- + std::vector cpu_corner; + Mat img_matches = drawGoodMatches(cpu_img1, cpu_img2, keypoints1, keypoints2, matches, cpu_corner); + + std::vector gpu_corner; + Mat ocl_img_matches; + if(useALL || (!useCPU&&!useGPU)) + { + ocl_img_matches = drawGoodMatches(cpu_img1, cpu_img2, gpu_keypoints1, gpu_keypoints2, gpu_matches, gpu_corner); + + //check accuracy + std::cout<<"\nCheck accuracy:\n"; + + if(cpu_corner.size()!=gpu_corner.size()) + std::cout<<"Failed\n"; + else + { + bool result = false; + for(size_t i = 0; i < cpu_corner.size(); i++) + { + if((std::abs(cpu_corner[i].x - gpu_corner[i].x) > 10) + ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10)) + { + std::cout<<"Failed\n"; + result = false; + break; + } + result = true; + } + if(result) + std::cout<<"Passed\n"; + } + } //-- Show detected matches - namedWindow("ocl surf matches", 0); - imshow("ocl surf matches", img_matches); - waitKey(0); + if (useCPU) + { + namedWindow("cpu surf matches", 0); + imshow("cpu surf matches", img_matches); + imwrite(outpath, img_matches); + } + else if(useGPU) + { + namedWindow("ocl surf matches", 0); + imshow("ocl surf matches", img_matches); + imwrite(outpath, img_matches); + } + else + { + namedWindow("cpu surf matches", 0); + imshow("cpu surf matches", img_matches); + namedWindow("ocl surf matches", 0); + imshow("ocl surf matches", ocl_img_matches); + } + waitKey(0); return 0; } diff --git a/samples/ocl/tvl1_optical_flow.cpp b/samples/ocl/tvl1_optical_flow.cpp new file mode 100644 index 0000000000..cff9692ed6 --- /dev/null +++ b/samples/ocl/tvl1_optical_flow.cpp @@ -0,0 +1,265 @@ +#include +#include +#include + +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/ocl/ocl.hpp" +#include "opencv2/video/video.hpp" + +using namespace std; +using namespace cv; +using namespace cv::ocl; + +typedef unsigned char uchar; +#define LOOP_NUM 10 +int64 work_begin = 0; +int64 work_end = 0; + +static void workBegin() +{ + work_begin = getTickCount(); +} +static void workEnd() +{ + work_end += (getTickCount() - work_begin); +} +static double getTime() +{ + return work_end * 1000. / getTickFrequency(); +} + +template inline T clamp (T x, T a, T b) +{ + return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a)); +} + +template inline T mapValue(T x, T a, T b, T c, T d) +{ + x = clamp(x, a, b); + return c + (d - c) * (x - a) / (b - a); +} + +static void getFlowField(const Mat& u, const Mat& v, Mat& flowField) +{ + float maxDisplacement = 1.0f; + + for (int i = 0; i < u.rows; ++i) + { + const float* ptr_u = u.ptr(i); + const float* ptr_v = v.ptr(i); + + for (int j = 0; j < u.cols; ++j) + { + float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j])); + + if (d > maxDisplacement) + maxDisplacement = d; + } + } + + flowField.create(u.size(), CV_8UC4); + + for (int i = 0; i < flowField.rows; ++i) + { + const float* ptr_u = u.ptr(i); + const float* ptr_v = v.ptr(i); + + + Vec4b* row = flowField.ptr(i); + + for (int j = 0; j < flowField.cols; ++j) + { + row[j][0] = 0; + row[j][1] = static_cast (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); + row[j][2] = static_cast (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); + row[j][3] = 255; + } + } +} + + +int main(int argc, const char* argv[]) +{ + static std::vector ocl_info; + ocl::getDevice(ocl_info); + //if you want to use undefault device, set it here + setDevice(ocl_info[0]); + + //set this to save kernel compile time from second time you run + ocl::setBinpath("./"); + const char* keys = + "{ h | help | false | print help message }" + "{ l | left | | specify left image }" + "{ r | right | | specify right image }" + "{ o | output | tvl1_output.jpg | specify output save path }" + "{ c | camera | 0 | enable camera capturing }" + "{ s | use_cpu | false | use cpu or gpu to process the image }" + "{ v | video | | use video as input }"; + + CommandLineParser cmd(argc, argv, keys); + + if (cmd.get("help")) + { + cout << "Usage: pyrlk_optical_flow [options]" << endl; + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; + } + + bool defaultPicturesFail = false; + string fname0 = cmd.get("l"); + string fname1 = cmd.get("r"); + string vdofile = cmd.get("v"); + string outpath = cmd.get("o"); + bool useCPU = cmd.get("s"); + bool useCamera = cmd.get("c"); + int inputName = cmd.get("c"); + + Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE); + Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE); + cv::Ptr alg = cv::createOptFlow_DualTVL1(); + cv::ocl::OpticalFlowDual_TVL1_OCL d_alg; + + + Mat flow, show_flow; + Mat flow_vec[2]; + if (frame0.empty() || frame1.empty()) + { + useCamera = true; + defaultPicturesFail = true; + CvCapture* capture = 0; + capture = cvCaptureFromCAM( inputName ); + if (!capture) + { + cout << "Can't load input images" << endl; + return -1; + } + } + + + if (useCamera) + { + CvCapture* capture = 0; + Mat frame, frameCopy; + Mat frame0Gray, frame1Gray; + Mat ptr0, ptr1; + + if(vdofile == "") + capture = cvCaptureFromCAM( inputName ); + else + capture = cvCreateFileCapture(vdofile.c_str()); + + int c = inputName ; + if(!capture) + { + if(vdofile == "") + cout << "Capture from CAM " << c << " didn't work" << endl; + else + cout << "Capture from file " << vdofile << " failed" <calc(ptr0, ptr1, flow); + split(flow, flow_vec); + } + else + { + oclMat d_flowx, d_flowy; + d_alg(oclMat(ptr0), oclMat(ptr1), d_flowx, d_flowy); + d_flowx.download(flow_vec[0]); + d_flowy.download(flow_vec[1]); + } + if (i%2 == 1) + frame1.copyTo(frameCopy); + else + frame0.copyTo(frameCopy); + getFlowField(flow_vec[0], flow_vec[1], show_flow); + imshow("PyrLK [Sparse]", show_flow); + } + + if( waitKey( 10 ) >= 0 ) + goto _cleanup_; + } + + waitKey(0); + +_cleanup_: + cvReleaseCapture( &capture ); + } + else + { +nocamera: + oclMat d_flowx, d_flowy; + for(int i = 0; i <= LOOP_NUM; i ++) + { + cout << "loop" << i << endl; + + if (i > 0) workBegin(); + if (useCPU) + { + alg->calc(frame0, frame1, flow); + split(flow, flow_vec); + } + else + { + d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy); + d_flowx.download(flow_vec[0]); + d_flowy.download(flow_vec[1]); + } + if (i > 0 && i <= LOOP_NUM) + workEnd(); + + if (i == LOOP_NUM) + { + if (useCPU) + cout << "average CPU time (noCamera) : "; + else + cout << "average GPU time (noCamera) : "; + cout << getTime() / LOOP_NUM << " ms" << endl; + + getFlowField(flow_vec[0], flow_vec[1], show_flow); + imshow("PyrLK [Sparse]", show_flow); + imwrite(outpath, show_flow); + } + } + } + + waitKey(); + + return 0; +} \ No newline at end of file diff --git a/samples/python2/grabcut.py b/samples/python2/grabcut.py new file mode 100644 index 0000000000..9fc1280acf --- /dev/null +++ b/samples/python2/grabcut.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python +''' +=============================================================================== +Interactive Image Segmentation using GrabCut algorithm. + +This sample shows interactive image segmentation using grabcut algorithm. + +USAGE : + python grabcut.py + +README FIRST: + Two windows will show up, one for input and one for output. + + At first, in input window, draw a rectangle around the object using +mouse right button. Then press 'n' to segment the object (once or a few times) +For any finer touch-ups, you can press any of the keys below and draw lines on +the areas you want. Then again press 'n' for updating the output. + +Key '0' - To select areas of sure background +Key '1' - To select areas of sure foreground +Key '2' - To select areas of probable background +Key '3' - To select areas of probable foreground + +Key 'n' - To update the segmentation +Key 'r' - To reset the setup +Key 's' - To save the results +=============================================================================== +''' + +import numpy as np +import cv2 +import sys + +BLUE = [255,0,0] # rectangle color +RED = [0,0,255] # PR BG +GREEN = [0,255,0] # PR FG +BLACK = [0,0,0] # sure BG +WHITE = [255,255,255] # sure FG + +DRAW_BG = {'color' : BLACK, 'val' : 0} +DRAW_FG = {'color' : WHITE, 'val' : 1} +DRAW_PR_FG = {'color' : GREEN, 'val' : 3} +DRAW_PR_BG = {'color' : RED, 'val' : 2} + +# setting up flags +rect = (0,0,1,1) +drawing = False # flag for drawing curves +rectangle = False # flag for drawing rect +rect_over = False # flag to check if rect drawn +rect_or_mask = 100 # flag for selecting rect or mask mode +value = DRAW_FG # drawing initialized to FG +thickness = 3 # brush thickness + +def onmouse(event,x,y,flags,param): + global img,img2,drawing,value,mask,rectangle,rect,rect_or_mask,ix,iy,rect_over + + # Draw Rectangle + if event == cv2.EVENT_RBUTTONDOWN: + rectangle = True + ix,iy = x,y + + elif event == cv2.EVENT_MOUSEMOVE: + if rectangle == True: + img = img2.copy() + cv2.rectangle(img,(ix,iy),(x,y),BLUE,2) + rect = (ix,iy,abs(ix-x),abs(iy-y)) + rect_or_mask = 0 + + elif event == cv2.EVENT_RBUTTONUP: + rectangle = False + rect_over = True + cv2.rectangle(img,(ix,iy),(x,y),BLUE,2) + rect = (ix,iy,abs(ix-x),abs(iy-y)) + rect_or_mask = 0 + print " Now press the key 'n' a few times until no further change \n" + + # draw touchup curves + + if event == cv2.EVENT_LBUTTONDOWN: + if rect_over == False: + print "first draw rectangle \n" + else: + drawing = True + cv2.circle(img,(x,y),thickness,value['color'],-1) + cv2.circle(mask,(x,y),thickness,value['val'],-1) + + elif event == cv2.EVENT_MOUSEMOVE: + if drawing == True: + cv2.circle(img,(x,y),thickness,value['color'],-1) + cv2.circle(mask,(x,y),thickness,value['val'],-1) + + elif event == cv2.EVENT_LBUTTONUP: + if drawing == True: + drawing = False + cv2.circle(img,(x,y),thickness,value['color'],-1) + cv2.circle(mask,(x,y),thickness,value['val'],-1) + +# print documentation +print __doc__ + +# Loading images +if len(sys.argv) == 2: + filename = sys.argv[1] # for drawing purposes +else: + print "No input image given, so loading default image, lena.jpg \n" + print "Correct Usage : python grabcut.py \n" + filename = '../cpp/lena.jpg' + +img = cv2.imread(filename) +img2 = img.copy() # a copy of original image +mask = np.zeros(img.shape[:2],dtype = np.uint8) # mask initialized to PR_BG +output = np.zeros(img.shape,np.uint8) # output image to be shown + +# input and output windows +cv2.namedWindow('output') +cv2.namedWindow('input') +cv2.setMouseCallback('input',onmouse) +cv2.moveWindow('input',img.shape[1]+10,90) + +print " Instructions : \n" +print " Draw a rectangle around the object using right mouse button \n" + +while(1): + + cv2.imshow('output',output) + cv2.imshow('input',img) + k = 0xFF & cv2.waitKey(1) + + # key bindings + if k == 27: # esc to exit + break + elif k == ord('0'): # BG drawing + print " mark background regions with left mouse button \n" + value = DRAW_BG + elif k == ord('1'): # FG drawing + print " mark foreground regions with left mouse button \n" + value = DRAW_FG + elif k == ord('2'): # PR_BG drawing + value = DRAW_PR_BG + elif k == ord('3'): # PR_FG drawing + value = DRAW_PR_FG + elif k == ord('s'): # save image + bar = np.zeros((img.shape[0],5,3),np.uint8) + res = np.hstack((img2,bar,img,bar,output)) + cv2.imwrite('grabcut_output.png',res) + print " Result saved as image \n" + elif k == ord('r'): # reset everything + print "resetting \n" + rect = (0,0,1,1) + drawing = False + rectangle = False + rect_or_mask = 100 + rect_over = False + value = DRAW_FG + img = img2.copy() + mask = np.zeros(img.shape[:2],dtype = np.uint8) # mask initialized to PR_BG + output = np.zeros(img.shape,np.uint8) # output image to be shown + elif k == ord('n'): # segment the image + print """ For finer touchups, mark foreground and background after pressing keys 0-3 + and again press 'n' \n""" + if (rect_or_mask == 0): # grabcut with rect + bgdmodel = np.zeros((1,65),np.float64) + fgdmodel = np.zeros((1,65),np.float64) + cv2.grabCut(img2,mask,rect,bgdmodel,fgdmodel,1,cv2.GC_INIT_WITH_RECT) + rect_or_mask = 1 + elif rect_or_mask == 1: # grabcut with mask + bgdmodel = np.zeros((1,65),np.float64) + fgdmodel = np.zeros((1,65),np.float64) + cv2.grabCut(img2,mask,rect,bgdmodel,fgdmodel,1,cv2.GC_INIT_WITH_MASK) + + mask2 = np.where((mask==1) + (mask==3),255,0).astype('uint8') + output = cv2.bitwise_and(img2,img2,mask=mask2) + +cv2.destroyAllWindows() diff --git a/samples/winrt/ImageManipulations/AdvancedCapture.xaml b/samples/winrt/ImageManipulations/AdvancedCapture.xaml new file mode 100644 index 0000000000..07db96f275 --- /dev/null +++ b/samples/winrt/ImageManipulations/AdvancedCapture.xaml @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + +