mirror of
https://github.com/opencv/opencv.git
synced 2025-06-08 10:03:15 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
82c477c9f7
@ -1402,15 +1402,19 @@ if(WITH_HALIDE OR HAVE_HALIDE)
|
|||||||
status(" Halide:" HAVE_HALIDE THEN "YES (${HALIDE_LIBRARIES} ${HALIDE_INCLUDE_DIRS})" ELSE NO)
|
status(" Halide:" HAVE_HALIDE THEN "YES (${HALIDE_LIBRARIES} ${HALIDE_INCLUDE_DIRS})" ELSE NO)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WITH_INF_ENGINE OR HAVE_INF_ENGINE)
|
if(WITH_INF_ENGINE OR INF_ENGINE_TARGET)
|
||||||
if(HAVE_INF_ENGINE)
|
if(INF_ENGINE_TARGET)
|
||||||
set(__msg "YES")
|
set(__msg "YES (${INF_ENGINE_RELEASE} / ${INF_ENGINE_VERSION})")
|
||||||
if(DEFINED INF_ENGINE_VERSION)
|
get_target_property(_lib ${INF_ENGINE_TARGET} IMPORTED_LOCATION)
|
||||||
set(__msg "YES (ver ${INF_ENGINE_VERSION})")
|
if(NOT _lib)
|
||||||
|
get_target_property(_lib_rel ${INF_ENGINE_TARGET} IMPORTED_IMPLIB_RELEASE)
|
||||||
|
get_target_property(_lib_dbg ${INF_ENGINE_TARGET} IMPORTED_IMPLIB_DEBUG)
|
||||||
|
set(_lib "${_lib_rel} / ${_lib_dbg}")
|
||||||
endif()
|
endif()
|
||||||
|
get_target_property(_inc ${INF_ENGINE_TARGET} INTERFACE_INCLUDE_DIRECTORIES)
|
||||||
status(" Inference Engine:" "${__msg}")
|
status(" Inference Engine:" "${__msg}")
|
||||||
status(" libs:" "${INF_ENGINE_LIBRARIES}")
|
status(" libs:" "${_lib}")
|
||||||
status(" includes:" "${INF_ENGINE_INCLUDE_DIRS}")
|
status(" includes:" "${_inc}")
|
||||||
else()
|
else()
|
||||||
status(" Inference Engine:" "NO")
|
status(" Inference Engine:" "NO")
|
||||||
endif()
|
endif()
|
||||||
|
@ -700,12 +700,21 @@ macro(ocv_compiler_optimization_fill_cpu_config)
|
|||||||
list(APPEND __dispatch_modes ${CPU_DISPATCH_${OPT}_FORCE} ${OPT})
|
list(APPEND __dispatch_modes ${CPU_DISPATCH_${OPT}_FORCE} ${OPT})
|
||||||
endforeach()
|
endforeach()
|
||||||
list(REMOVE_DUPLICATES __dispatch_modes)
|
list(REMOVE_DUPLICATES __dispatch_modes)
|
||||||
set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "")
|
|
||||||
foreach(OPT ${__dispatch_modes})
|
foreach(OPT ${__dispatch_modes})
|
||||||
set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}
|
set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}
|
||||||
#define CV_CPU_DISPATCH_COMPILE_${OPT} 1")
|
#define CV_CPU_DISPATCH_COMPILE_${OPT} 1")
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
|
set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}
|
||||||
|
\n\n#define CV_CPU_DISPATCH_FEATURES 0 \\")
|
||||||
|
foreach(OPT ${__dispatch_modes})
|
||||||
|
if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
|
||||||
|
set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}
|
||||||
|
, CV_CPU_${OPT} \\")
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE}\n")
|
||||||
|
|
||||||
set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "// AUTOGENERATED, DO NOT EDIT\n")
|
set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "// AUTOGENERATED, DO NOT EDIT\n")
|
||||||
foreach(OPT ${CPU_ALL_OPTIMIZATIONS})
|
foreach(OPT ${CPU_ALL_OPTIMIZATIONS})
|
||||||
if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
|
if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
|
||||||
|
@ -1,79 +1,87 @@
|
|||||||
# The script detects Intel(R) Inference Engine installation
|
# The script detects Intel(R) Inference Engine installation
|
||||||
#
|
#
|
||||||
# Parameters:
|
# Cache variables:
|
||||||
# INTEL_CVSDK_DIR - Path to Inference Engine root folder
|
# INF_ENGINE_OMP_DIR - directory with OpenMP library to link with (needed by some versions of IE)
|
||||||
# IE_PLUGINS_PATH - Path to folder with Inference Engine plugins
|
# INF_ENGINE_RELEASE - a number reflecting IE source interface (linked with OpenVINO release)
|
||||||
#
|
#
|
||||||
# On return this will define:
|
# Detect parameters:
|
||||||
|
# 1. Native cmake IE package:
|
||||||
|
# - enironment variable InferenceEngine_DIR is set to location of cmake module
|
||||||
|
# 2. Custom location:
|
||||||
|
# - INF_ENGINE_INCLUDE_DIRS - headers search location
|
||||||
|
# - INF_ENGINE_LIB_DIRS - library search location
|
||||||
|
# 3. OpenVINO location:
|
||||||
|
# - environment variable INTEL_CVSDK_DIR is set to location of OpenVINO installation dir
|
||||||
|
# - INF_ENGINE_PLATFORM - part of name of library directory representing its platform (default ubuntu_16.04)
|
||||||
#
|
#
|
||||||
# HAVE_INF_ENGINE - True if Intel Inference Engine was found
|
# Result:
|
||||||
# INF_ENGINE_INCLUDE_DIRS - Inference Engine include folder
|
# INF_ENGINE_TARGET - set to name of imported library target representing InferenceEngine
|
||||||
# INF_ENGINE_LIBRARIES - Inference Engine libraries and it's dependencies
|
|
||||||
#
|
#
|
||||||
macro(ie_fail)
|
|
||||||
set(HAVE_INF_ENGINE FALSE)
|
|
||||||
return()
|
|
||||||
endmacro()
|
|
||||||
|
|
||||||
|
if(NOT HAVE_CXX11)
|
||||||
|
message(WARNING "DL Inference engine requires C++11. You can turn it on via ENABLE_CXX11=ON CMake flag.")
|
||||||
|
return()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# =======================
|
||||||
|
|
||||||
|
function(add_custom_ie_build _inc _lib _lib_rel _lib_dbg _msg)
|
||||||
|
if(NOT _inc OR NOT (_lib OR _lib_rel OR _lib_dbg))
|
||||||
|
return()
|
||||||
|
endif()
|
||||||
|
add_library(inference_engine UNKNOWN IMPORTED)
|
||||||
|
set_target_properties(inference_engine PROPERTIES
|
||||||
|
IMPORTED_LOCATION "${_lib}"
|
||||||
|
IMPORTED_IMPLIB_RELEASE "${_lib_rel}"
|
||||||
|
IMPORTED_IMPLIB_DEBUG "${_lib_dbg}"
|
||||||
|
INTERFACE_INCLUDE_DIRECTORIES "${_inc}"
|
||||||
|
)
|
||||||
|
find_library(omp_lib iomp5 PATHS "${INF_ENGINE_OMP_DIR}" NO_DEFAULT_PATH)
|
||||||
|
if(NOT omp_lib)
|
||||||
|
message(WARNING "OpenMP for IE have not been found. Set INF_ENGINE_OMP_DIR variable if you experience build errors.")
|
||||||
|
else()
|
||||||
|
set_target_properties(inference_engine PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${omp_lib}")
|
||||||
|
endif()
|
||||||
|
set(INF_ENGINE_VERSION "Unknown" CACHE STRING "")
|
||||||
|
set(INF_ENGINE_TARGET inference_engine PARENT_SCOPE)
|
||||||
|
message(STATUS "Detected InferenceEngine: ${_msg}")
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
# ======================
|
||||||
|
|
||||||
find_package(InferenceEngine QUIET)
|
find_package(InferenceEngine QUIET)
|
||||||
if(InferenceEngine_FOUND)
|
if(InferenceEngine_FOUND)
|
||||||
set(INF_ENGINE_LIBRARIES "${InferenceEngine_LIBRARIES}")
|
set(INF_ENGINE_TARGET IE::inference_engine)
|
||||||
set(INF_ENGINE_INCLUDE_DIRS "${InferenceEngine_INCLUDE_DIRS}")
|
set(INF_ENGINE_VERSION "${InferenceEngine_VERSION}" CACHE STRING "")
|
||||||
set(INF_ENGINE_VERSION "${InferenceEngine_VERSION}")
|
message(STATUS "Detected InferenceEngine: cmake package")
|
||||||
set(HAVE_INF_ENGINE TRUE)
|
|
||||||
return()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
ocv_check_environment_variables(INTEL_CVSDK_DIR INF_ENGINE_ROOT_DIR IE_PLUGINS_PATH)
|
if(NOT INF_ENGINE_TARGET AND INF_ENGINE_LIB_DIRS AND INF_ENGINE_INCLUDE_DIRS)
|
||||||
|
find_path(ie_custom_inc "inference_engine.hpp" PATHS "${INF_ENGINE_INCLUDE_DIRS}" NO_DEFAULT_PATH)
|
||||||
if(NOT INF_ENGINE_ROOT_DIR OR NOT EXISTS "${INF_ENGINE_ROOT_DIR}/include/inference_engine.hpp")
|
find_library(ie_custom_lib "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}" NO_DEFAULT_PATH)
|
||||||
set(ie_root_paths "${INF_ENGINE_ROOT_DIR}")
|
find_library(ie_custom_lib_rel "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}/Release" NO_DEFAULT_PATH)
|
||||||
if(DEFINED INTEL_CVSDK_DIR)
|
find_library(ie_custom_lib_dbg "inference_engine" PATHS "${INF_ENGINE_LIB_DIRS}/Debug" NO_DEFAULT_PATH)
|
||||||
list(APPEND ie_root_paths "${INTEL_CVSDK_DIR}/")
|
add_custom_ie_build("${ie_custom_inc}" "${ie_custom_lib}" "${ie_custom_lib_rel}" "${ie_custom_lib_dbg}" "INF_ENGINE_{INCLUDE,LIB}_DIRS")
|
||||||
list(APPEND ie_root_paths "${INTEL_CVSDK_DIR}/deployment_tools/inference_engine")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(NOT ie_root_paths)
|
|
||||||
list(APPEND ie_root_paths "/opt/intel/computer_vision_sdk/deployment_tools/inference_engine/")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
find_path(INF_ENGINE_ROOT_DIR include/inference_engine.hpp PATHS ${ie_root_paths})
|
|
||||||
if(INF_ENGINE_ROOT_DIR MATCHES "-NOTFOUND$")
|
|
||||||
unset(INF_ENGINE_ROOT_DIR CACHE)
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(INF_ENGINE_INCLUDE_DIRS "${INF_ENGINE_ROOT_DIR}/include" CACHE PATH "Path to Inference Engine include directory")
|
set(_loc "$ENV{INTEL_CVSDK_DIR}")
|
||||||
|
if(NOT INF_ENGINE_TARGET AND _loc)
|
||||||
if(NOT INF_ENGINE_ROOT_DIR
|
set(INF_ENGINE_PLATFORM "ubuntu_16.04" CACHE STRING "InferenceEngine platform (library dir)")
|
||||||
OR NOT EXISTS "${INF_ENGINE_ROOT_DIR}"
|
find_path(ie_custom_env_inc "inference_engine.hpp" PATHS "${_loc}/deployment_tools/inference_engine/include" NO_DEFAULT_PATH)
|
||||||
OR NOT EXISTS "${INF_ENGINE_ROOT_DIR}/include/inference_engine.hpp"
|
find_library(ie_custom_env_lib "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/${INF_ENGINE_PLATFORM}/intel64" NO_DEFAULT_PATH)
|
||||||
)
|
find_library(ie_custom_env_lib_rel "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/intel64/Release" NO_DEFAULT_PATH)
|
||||||
message(WARNING "DL IE: Can't detect INF_ENGINE_ROOT_DIR location.")
|
find_library(ie_custom_env_lib_dbg "inference_engine" PATHS "${_loc}/deployment_tools/inference_engine/lib/intel64/Debug" NO_DEFAULT_PATH)
|
||||||
ie_fail()
|
add_custom_ie_build("${ie_custom_env_inc}" "${ie_custom_env_lib}" "${ie_custom_env_lib_rel}" "${ie_custom_env_lib_dbg}" "OpenVINO (${_loc})")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(INF_ENGINE_LIBRARIES "")
|
# Add more features to the target
|
||||||
|
|
||||||
set(ie_lib_list inference_engine)
|
if(INF_ENGINE_TARGET)
|
||||||
|
if(NOT INF_ENGINE_RELEASE)
|
||||||
if(NOT IS_ABSOLUTE "${IE_PLUGINS_PATH}")
|
message(WARNING "InferenceEngine version have not been set, 2018R2 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.")
|
||||||
set(IE_PLUGINS_PATH "${INF_ENGINE_ROOT_DIR}/${IE_PLUGINS_PATH}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
link_directories(
|
|
||||||
${INF_ENGINE_ROOT_DIR}/external/mkltiny_lnx/lib
|
|
||||||
${INF_ENGINE_ROOT_DIR}/external/cldnn/lib
|
|
||||||
)
|
|
||||||
|
|
||||||
foreach(lib ${ie_lib_list})
|
|
||||||
find_library(${lib} NAMES ${lib} HINTS ${IE_PLUGINS_PATH})
|
|
||||||
if(NOT ${lib})
|
|
||||||
message(WARNING "DL IE: Can't find library: '${lib}'")
|
|
||||||
ie_fail()
|
|
||||||
endif()
|
endif()
|
||||||
list(APPEND INF_ENGINE_LIBRARIES ${${lib}})
|
set(INF_ENGINE_RELEASE "2018020000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2018R2.0.2 -> 2018020002)")
|
||||||
endforeach()
|
set_target_properties(${INF_ENGINE_TARGET} PROPERTIES
|
||||||
|
INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}"
|
||||||
set(HAVE_INF_ENGINE TRUE)
|
)
|
||||||
|
endif()
|
||||||
|
@ -1132,7 +1132,7 @@ function(ocv_add_perf_tests)
|
|||||||
source_group("Src" FILES "${${the_target}_pch}")
|
source_group("Src" FILES "${${the_target}_pch}")
|
||||||
ocv_add_executable(${the_target} ${OPENCV_PERF_${the_module}_SOURCES} ${${the_target}_pch})
|
ocv_add_executable(${the_target} ${OPENCV_PERF_${the_module}_SOURCES} ${${the_target}_pch})
|
||||||
ocv_target_include_modules(${the_target} ${perf_deps} "${perf_path}")
|
ocv_target_include_modules(${the_target} ${perf_deps} "${perf_path}")
|
||||||
ocv_target_link_libraries(${the_target} LINK_PRIVATE ${perf_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS})
|
ocv_target_link_libraries(${the_target} LINK_PRIVATE ${perf_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS} ${OPENCV_PERF_${the_module}_DEPS})
|
||||||
add_dependencies(opencv_perf_tests ${the_target})
|
add_dependencies(opencv_perf_tests ${the_target})
|
||||||
|
|
||||||
set_target_properties(${the_target} PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};PerfTest")
|
set_target_properties(${the_target} PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};PerfTest")
|
||||||
@ -1175,7 +1175,7 @@ function(ocv_add_perf_tests)
|
|||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
# this is a command for adding OpenCV accuracy/regression tests to the module
|
# this is a command for adding OpenCV accuracy/regression tests to the module
|
||||||
# ocv_add_accuracy_tests([FILES <source group name> <list of sources>] [DEPENDS_ON] <list of extra dependencies>)
|
# ocv_add_accuracy_tests(<list of extra dependencies>)
|
||||||
function(ocv_add_accuracy_tests)
|
function(ocv_add_accuracy_tests)
|
||||||
ocv_debug_message("ocv_add_accuracy_tests(" ${ARGN} ")")
|
ocv_debug_message("ocv_add_accuracy_tests(" ${ARGN} ")")
|
||||||
|
|
||||||
@ -1211,7 +1211,7 @@ function(ocv_add_accuracy_tests)
|
|||||||
source_group("Src" FILES "${${the_target}_pch}")
|
source_group("Src" FILES "${${the_target}_pch}")
|
||||||
ocv_add_executable(${the_target} ${OPENCV_TEST_${the_module}_SOURCES} ${${the_target}_pch})
|
ocv_add_executable(${the_target} ${OPENCV_TEST_${the_module}_SOURCES} ${${the_target}_pch})
|
||||||
ocv_target_include_modules(${the_target} ${test_deps} "${test_path}")
|
ocv_target_include_modules(${the_target} ${test_deps} "${test_path}")
|
||||||
ocv_target_link_libraries(${the_target} LINK_PRIVATE ${test_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS})
|
ocv_target_link_libraries(${the_target} LINK_PRIVATE ${test_deps} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS} ${OPENCV_TEST_${the_module}_DEPS})
|
||||||
add_dependencies(opencv_tests ${the_target})
|
add_dependencies(opencv_tests ${the_target})
|
||||||
|
|
||||||
set_target_properties(${the_target} PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};AccuracyTest")
|
set_target_properties(${the_target} PROPERTIES LABELS "${OPENCV_MODULE_${the_module}_LABEL};AccuracyTest")
|
||||||
|
@ -1016,3 +1016,17 @@
|
|||||||
year = {2017},
|
year = {2017},
|
||||||
organization = {IEEE}
|
organization = {IEEE}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ARTICLE{gonzalez,
|
||||||
|
title={Digital Image Fundamentals, Digital Imaging Processing},
|
||||||
|
author={Gonzalez, Rafael C and others},
|
||||||
|
year={1987},
|
||||||
|
publisher={Addison Wesley Publishing Company}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ARTICLE{gruzman,
|
||||||
|
title={Цифровая обработка изображений в информационных системах},
|
||||||
|
author={Грузман, И.С. and Киричук, В.С. and Косых, В.П. and Перетягин, Г.И. and Спектор, А.А.},
|
||||||
|
year={2000},
|
||||||
|
publisher={Изд-во НГТУ Новосибирск}
|
||||||
|
}
|
||||||
|
BIN
doc/tutorials/imgproc/out_of_focus_deblur_filter/images/original.jpg
Executable file
BIN
doc/tutorials/imgproc/out_of_focus_deblur_filter/images/original.jpg
Executable file
Binary file not shown.
After Width: | Height: | Size: 14 KiB |
BIN
doc/tutorials/imgproc/out_of_focus_deblur_filter/images/psf.png
Executable file
BIN
doc/tutorials/imgproc/out_of_focus_deblur_filter/images/psf.png
Executable file
Binary file not shown.
After Width: | Height: | Size: 630 B |
BIN
doc/tutorials/imgproc/out_of_focus_deblur_filter/images/recovered.jpg
Executable file
BIN
doc/tutorials/imgproc/out_of_focus_deblur_filter/images/recovered.jpg
Executable file
Binary file not shown.
After Width: | Height: | Size: 42 KiB |
@ -0,0 +1,112 @@
|
|||||||
|
Out-of-focus Deblur Filter {#tutorial_out_of_focus_deblur_filter}
|
||||||
|
==========================
|
||||||
|
|
||||||
|
Goal
|
||||||
|
----
|
||||||
|
|
||||||
|
In this tutorial you will learn:
|
||||||
|
|
||||||
|
- what is a degradation image model
|
||||||
|
- what is PSF of out-of-focus image
|
||||||
|
- how to restore a blurred image
|
||||||
|
- what is Wiener filter
|
||||||
|
|
||||||
|
Theory
|
||||||
|
------
|
||||||
|
|
||||||
|
@note The explanation is based on the books @cite gonzalez and @cite gruzman. Also, you can refer to Matlab's tutorial [Image Deblurring in Matlab] and an article [SmartDeblur].
|
||||||
|
@note An out-of-focus image on this page is a real world image. An out-of-focus was done manually by camera optics.
|
||||||
|
|
||||||
|
### What is a degradation image model?
|
||||||
|
|
||||||
|
A mathematical model of the image degradation in frequency domain representation is:
|
||||||
|
|
||||||
|
\f[S = H\cdot U + N\f]
|
||||||
|
|
||||||
|
where
|
||||||
|
\f$S\f$ is a spectrum of blurred (degraded) image,
|
||||||
|
\f$U\f$ is a spectrum of original true (undegraded) image,
|
||||||
|
\f$H\f$ is frequency response of point spread function (PSF),
|
||||||
|
\f$N\f$ is a spectrum of additive noise.
|
||||||
|
|
||||||
|
Circular PSF is a good approximation of out-of-focus distortion. Such PSF is specified by only one parameter - radius \f$R\f$. Circular PSF is used in this work.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### How to restore an blurred image?
|
||||||
|
|
||||||
|
The objective of restoration (deblurring) is to obtain an estimate of the original image. Restoration formula in frequency domain is:
|
||||||
|
|
||||||
|
\f[U' = H_w\cdot S\f]
|
||||||
|
|
||||||
|
where
|
||||||
|
\f$U'\f$ is spectrum of estimation of original image \f$U\f$,
|
||||||
|
\f$H_w\f$ is restoration filter, for example, Wiener filter.
|
||||||
|
|
||||||
|
### What is Wiener filter?
|
||||||
|
|
||||||
|
Wiener filter is a way to restore a blurred image. Let's suppose that PSF is a real and symmetric signal, a power spectrum of the original true image and noise are not known,
|
||||||
|
then simplified Wiener formula is:
|
||||||
|
|
||||||
|
\f[H_w = \frac{H}{|H|^2+\frac{1}{SNR}} \f]
|
||||||
|
|
||||||
|
where
|
||||||
|
\f$SNR\f$ is signal-to-noise ratio.
|
||||||
|
|
||||||
|
So, in order to recover an out-of-focus image by Wiener filter, it needs to know \f$SNR\f$ and \f$R\f$ of circular PSF.
|
||||||
|
|
||||||
|
|
||||||
|
Source code
|
||||||
|
-----------
|
||||||
|
|
||||||
|
You can find source code in the `samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp` of the OpenCV source code library.
|
||||||
|
|
||||||
|
@include cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp
|
||||||
|
|
||||||
|
Explanation
|
||||||
|
-----------
|
||||||
|
|
||||||
|
An out-of-focus image recovering algorithm consists of PSF generation, Wiener filter generation and filtering an blurred image in frequency domain:
|
||||||
|
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp main
|
||||||
|
|
||||||
|
A function calcPSF() forms an circular PSF according to input parameter radius \f$R\f$:
|
||||||
|
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp calcPSF
|
||||||
|
|
||||||
|
A function calcWnrFilter() synthesizes simplified Wiener filter \f$H_w\f$ according to formula described above:
|
||||||
|
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp calcWnrFilter
|
||||||
|
|
||||||
|
A function fftshift() rearranges PSF. This code was just copied from tutorial @ref tutorial_discrete_fourier_transform "Discrete Fourier Transform":
|
||||||
|
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp fftshift
|
||||||
|
|
||||||
|
A function filter2DFreq() filters an blurred image in frequency domain:
|
||||||
|
@snippet samples/cpp/tutorial_code/ImgProc/out_of_focus_deblur_filter/out_of_focus_deblur_filter.cpp filter2DFreq
|
||||||
|
|
||||||
|
Result
|
||||||
|
------
|
||||||
|
|
||||||
|
Below you can see real out-of-focus image:
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
Below result was done by \f$R\f$ = 53 and \f$SNR\f$ = 5200 parameters:
|
||||||
|

|
||||||
|
|
||||||
|
The Wiener filter was used, values of \f$R\f$ and \f$SNR\f$ were selected manually to give the best possible visual result.
|
||||||
|
We can see that the result is not perfect, but it gives us a hint to the image content. With some difficulty, the text is readable.
|
||||||
|
|
||||||
|
@note The parameter \f$R\f$ is the most important. So you should adjust \f$R\f$ first, then \f$SNR\f$.
|
||||||
|
@note Sometimes you can observe the ringing effect in an restored image. This effect can be reduced by several methods. For example, you can taper input image edges.
|
||||||
|
|
||||||
|
You can also find a quick video demonstration of this on
|
||||||
|
[YouTube](https://youtu.be/0bEcE4B0XP4).
|
||||||
|
@youtube{0bEcE4B0XP4}
|
||||||
|
|
||||||
|
References
|
||||||
|
------
|
||||||
|
- [Image Deblurring in Matlab] - Image Deblurring in Matlab
|
||||||
|
- [SmartDeblur] - SmartDeblur site
|
||||||
|
|
||||||
|
<!-- invisible references list -->
|
||||||
|
[Digital Image Processing]: http://web.ipac.caltech.edu/staff/fmasci/home/astro_refs/Digital_Image_Processing_2ndEd.pdf
|
||||||
|
[Image Deblurring in Matlab]: https://www.mathworks.com/help/images/image-deblurring.html
|
||||||
|
[SmartDeblur]: http://yuzhikov.com/articles/BlurredImagesRestoration1.htm
|
@ -292,3 +292,13 @@ In this section you will learn about the image processing (manipulation) functio
|
|||||||
*Author:* Theodore Tsesmelis
|
*Author:* Theodore Tsesmelis
|
||||||
|
|
||||||
Where we learn to segment objects using Laplacian filtering, the Distance Transformation and the Watershed algorithm.
|
Where we learn to segment objects using Laplacian filtering, the Distance Transformation and the Watershed algorithm.
|
||||||
|
|
||||||
|
- @subpage tutorial_out_of_focus_deblur_filter
|
||||||
|
|
||||||
|
*Languages:* C++
|
||||||
|
|
||||||
|
*Compatibility:* \> OpenCV 2.0
|
||||||
|
|
||||||
|
*Author:* Karpushin Vladislav
|
||||||
|
|
||||||
|
You will learn how to recover an out-of-focus image by Wiener filter.
|
||||||
|
@ -60,6 +60,17 @@
|
|||||||
// access from within opencv code more accessible
|
// access from within opencv code more accessible
|
||||||
namespace cv {
|
namespace cv {
|
||||||
|
|
||||||
|
namespace hal {
|
||||||
|
|
||||||
|
enum StoreMode
|
||||||
|
{
|
||||||
|
STORE_UNALIGNED = 0,
|
||||||
|
STORE_ALIGNED = 1,
|
||||||
|
STORE_ALIGNED_NOCACHE = 2
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
template<typename _Tp> struct V_TypeTraits
|
template<typename _Tp> struct V_TypeTraits
|
||||||
{
|
{
|
||||||
};
|
};
|
||||||
@ -154,7 +165,7 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
|
|||||||
// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
|
// but some of AVX2 intrinsics get v256_ prefix instead of v_, e.g. v256_load() vs v_load().
|
||||||
// Correspondingly, the wide intrinsics (which are mapped to the "widest"
|
// Correspondingly, the wide intrinsics (which are mapped to the "widest"
|
||||||
// available instruction set) will get vx_ prefix
|
// available instruction set) will get vx_ prefix
|
||||||
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v245_load())
|
// (and will be mapped to v256_ counterparts) (e.g. vx_load() => v256_load())
|
||||||
#if CV_AVX2
|
#if CV_AVX2
|
||||||
|
|
||||||
#include "opencv2/core/hal/intrin_avx.hpp"
|
#include "opencv2/core/hal/intrin_avx.hpp"
|
||||||
@ -214,14 +225,16 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
|||||||
inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \
|
inline vtyp vx_setzero_##short_typ() { return prefix##_setzero_##short_typ(); } \
|
||||||
inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \
|
inline vtyp vx_##loadsfx(const typ* ptr) { return prefix##_##loadsfx(ptr); } \
|
||||||
inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \
|
inline vtyp vx_##loadsfx##_aligned(const typ* ptr) { return prefix##_##loadsfx##_aligned(ptr); } \
|
||||||
|
inline vtyp vx_##loadsfx##_low(const typ* ptr) { return prefix##_##loadsfx##_low(ptr); } \
|
||||||
|
inline vtyp vx_##loadsfx##_halves(const typ* ptr0, const typ* ptr1) { return prefix##_##loadsfx##_halves(ptr0, ptr1); } \
|
||||||
inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \
|
inline void vx_store(typ* ptr, const vtyp& v) { return v_store(ptr, v); } \
|
||||||
inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); }
|
inline void vx_store_aligned(typ* ptr, const vtyp& v) { return v_store_aligned(ptr, v); }
|
||||||
|
|
||||||
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
|
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND(typ, wtyp, prefix) \
|
||||||
inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }
|
inline wtyp vx_load_expand(const typ* ptr) { return prefix##_load_expand(ptr); }
|
||||||
|
|
||||||
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \
|
#define CV_INTRIN_DEFINE_WIDE_LOAD_EXPAND_Q(typ, qtyp, prefix) \
|
||||||
inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }
|
inline qtyp vx_load_expand_q(const typ* ptr) { return prefix##_load_expand_q(ptr); }
|
||||||
|
|
||||||
#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \
|
#define CV_INTRIN_DEFINE_WIDE_INTRIN_WITH_EXPAND(typ, vtyp, short_typ, wtyp, qtyp, prefix, loadsfx) \
|
||||||
CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
|
CV_INTRIN_DEFINE_WIDE_INTRIN(typ, vtyp, short_typ, prefix, loadsfx) \
|
||||||
@ -316,7 +329,7 @@ template<typename _Tp> struct V_RegTraits
|
|||||||
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256)
|
CV_INTRIN_DEFINE_WIDE_INTRIN_ALL_TYPES(v256)
|
||||||
CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load)
|
CV_INTRIN_DEFINE_WIDE_INTRIN(double, v_float64, f64, v256, load)
|
||||||
inline void vx_cleanup() { v256_cleanup(); }
|
inline void vx_cleanup() { v256_cleanup(); }
|
||||||
#elif CV_SIMD128
|
#elif CV_SIMD128 || CV_SIMD128_CPP
|
||||||
typedef v_uint8x16 v_uint8;
|
typedef v_uint8x16 v_uint8;
|
||||||
typedef v_int8x16 v_int8;
|
typedef v_int8x16 v_int8;
|
||||||
typedef v_uint16x8 v_uint16;
|
typedef v_uint16x8 v_uint16;
|
||||||
|
@ -304,6 +304,17 @@ inline v_float16x16 v256_setall_f16(short val) { return v_float16x16(_mm256_set1
|
|||||||
{ _mm256_storeu_si256((__m256i*)ptr, a.val); } \
|
{ _mm256_storeu_si256((__m256i*)ptr, a.val); } \
|
||||||
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
||||||
{ _mm256_store_si256((__m256i*)ptr, a.val); } \
|
{ _mm256_store_si256((__m256i*)ptr, a.val); } \
|
||||||
|
inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
|
||||||
|
{ _mm256_stream_si256((__m256i*)ptr, a.val); } \
|
||||||
|
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
|
||||||
|
{ \
|
||||||
|
if( mode == hal::STORE_UNALIGNED ) \
|
||||||
|
_mm256_storeu_si256((__m256i*)ptr, a.val); \
|
||||||
|
else if( mode == hal::STORE_ALIGNED_NOCACHE ) \
|
||||||
|
_mm256_stream_si256((__m256i*)ptr, a.val); \
|
||||||
|
else \
|
||||||
|
_mm256_store_si256((__m256i*)ptr, a.val); \
|
||||||
|
} \
|
||||||
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
||||||
{ _mm_storeu_si128((__m128i*)ptr, _v256_extract_low(a.val)); } \
|
{ _mm_storeu_si128((__m128i*)ptr, _v256_extract_low(a.val)); } \
|
||||||
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
||||||
@ -338,6 +349,17 @@ OPENCV_HAL_IMPL_AVX_LOADSTORE(v_int64x4, int64)
|
|||||||
{ _mm256_storeu_##suffix(ptr, a.val); } \
|
{ _mm256_storeu_##suffix(ptr, a.val); } \
|
||||||
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
||||||
{ _mm256_store_##suffix(ptr, a.val); } \
|
{ _mm256_store_##suffix(ptr, a.val); } \
|
||||||
|
inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
|
||||||
|
{ _mm256_stream_##suffix(ptr, a.val); } \
|
||||||
|
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
|
||||||
|
{ \
|
||||||
|
if( mode == hal::STORE_UNALIGNED ) \
|
||||||
|
_mm256_storeu_##suffix(ptr, a.val); \
|
||||||
|
else if( mode == hal::STORE_ALIGNED_NOCACHE ) \
|
||||||
|
_mm256_stream_##suffix(ptr, a.val); \
|
||||||
|
else \
|
||||||
|
_mm256_store_##suffix(ptr, a.val); \
|
||||||
|
} \
|
||||||
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
||||||
{ _mm_storeu_##suffix(ptr, _v256_extract_low(a.val)); } \
|
{ _mm_storeu_##suffix(ptr, _v256_extract_low(a.val)); } \
|
||||||
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
||||||
@ -407,6 +429,11 @@ inline v_float16x16 v256_load_f16(const short* ptr)
|
|||||||
inline v_float16x16 v256_load_f16_aligned(const short* ptr)
|
inline v_float16x16 v256_load_f16_aligned(const short* ptr)
|
||||||
{ return v_float16x16(_mm256_load_si256((const __m256i*)ptr)); }
|
{ return v_float16x16(_mm256_load_si256((const __m256i*)ptr)); }
|
||||||
|
|
||||||
|
inline v_float16x16 v256_load_f16_low(const short* ptr)
|
||||||
|
{ return v_float16x16(v256_load_low(ptr).val); }
|
||||||
|
inline v_float16x16 v256_load_f16_halves(const short* ptr0, const short* ptr1)
|
||||||
|
{ return v_float16x16(v256_load_halves(ptr0, ptr1).val); }
|
||||||
|
|
||||||
inline void v_store(short* ptr, const v_float16x16& a)
|
inline void v_store(short* ptr, const v_float16x16& a)
|
||||||
{ _mm256_storeu_si256((__m256i*)ptr, a.val); }
|
{ _mm256_storeu_si256((__m256i*)ptr, a.val); }
|
||||||
inline void v_store_aligned(short* ptr, const v_float16x16& a)
|
inline void v_store_aligned(short* ptr, const v_float16x16& a)
|
||||||
@ -819,93 +846,79 @@ OPENCV_HAL_IMPL_AVX_BIN_FUNC(v_max, v_float64x4, _mm256_max_pd)
|
|||||||
template<int imm>
|
template<int imm>
|
||||||
inline v_uint8x32 v_rotate_left(const v_uint8x32& a, const v_uint8x32& b)
|
inline v_uint8x32 v_rotate_left(const v_uint8x32& a, const v_uint8x32& b)
|
||||||
{
|
{
|
||||||
|
enum {IMM_R = (16 - imm) & 0xFF};
|
||||||
|
enum {IMM_R2 = (32 - imm) & 0xFF};
|
||||||
|
|
||||||
|
if (imm == 0) return a;
|
||||||
|
if (imm == 32) return b;
|
||||||
|
if (imm > 32) return v_uint8x32();
|
||||||
|
|
||||||
__m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x03);
|
__m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x03);
|
||||||
|
if (imm == 16) return v_uint8x32(swap);
|
||||||
switch(imm)
|
if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swap, IMM_R));
|
||||||
{
|
return v_uint8x32(_mm256_alignr_epi8(swap, b.val, IMM_R2)); // imm < 32
|
||||||
case 0: return a;
|
|
||||||
case 32: return b;
|
|
||||||
case 16: return v_uint8x32(swap);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swap, 16 - imm));
|
|
||||||
if (imm < 32) return v_uint8x32(_mm256_alignr_epi8(swap, b.val, 32 - imm));
|
|
||||||
|
|
||||||
return v_uint8x32();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int imm>
|
template<int imm>
|
||||||
inline v_uint8x32 v_rotate_right(const v_uint8x32& a, const v_uint8x32& b)
|
inline v_uint8x32 v_rotate_right(const v_uint8x32& a, const v_uint8x32& b)
|
||||||
{
|
{
|
||||||
|
enum {IMM_L = (imm - 16) & 0xFF};
|
||||||
|
|
||||||
|
if (imm == 0) return a;
|
||||||
|
if (imm == 32) return b;
|
||||||
|
if (imm > 32) return v_uint8x32();
|
||||||
|
|
||||||
__m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x21);
|
__m256i swap = _mm256_permute2x128_si256(a.val, b.val, 0x21);
|
||||||
|
if (imm == 16) return v_uint8x32(swap);
|
||||||
switch(imm)
|
|
||||||
{
|
|
||||||
case 0: return a;
|
|
||||||
case 32: return b;
|
|
||||||
case 16: return v_uint8x32(swap);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swap, a.val, imm));
|
if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swap, a.val, imm));
|
||||||
if (imm < 32) return v_uint8x32(_mm256_alignr_epi8(b.val, swap, imm - 16));
|
return v_uint8x32(_mm256_alignr_epi8(b.val, swap, IMM_L));
|
||||||
|
|
||||||
return v_uint8x32();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int imm>
|
template<int imm>
|
||||||
inline v_uint8x32 v_rotate_left(const v_uint8x32& a)
|
inline v_uint8x32 v_rotate_left(const v_uint8x32& a)
|
||||||
{
|
{
|
||||||
v_uint8x32 res;
|
enum {IMM_L = (imm - 16) & 0xFF};
|
||||||
|
enum {IMM_R = (16 - imm) & 0xFF};
|
||||||
|
|
||||||
|
if (imm == 0) return a;
|
||||||
|
if (imm > 32) return v_uint8x32();
|
||||||
|
|
||||||
// ESAC control[3] ? [127:0] = 0
|
// ESAC control[3] ? [127:0] = 0
|
||||||
__m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(0, 0, 2, 0));
|
__m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(0, 0, 2, 0));
|
||||||
|
if (imm == 16) return v_uint8x32(swapz);
|
||||||
if (imm == 0)
|
if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(a.val, swapz, IMM_R));
|
||||||
return a;
|
return v_uint8x32(_mm256_slli_si256(swapz, IMM_L));
|
||||||
if (imm == 16)
|
|
||||||
res.val = swapz;
|
|
||||||
else if (imm < 16)
|
|
||||||
res.val = _mm256_alignr_epi8(a.val, swapz, 16 - imm);
|
|
||||||
else if (imm < 32)
|
|
||||||
res.val = _mm256_slli_si256(swapz, imm - 16);
|
|
||||||
else
|
|
||||||
return v_uint8x32();
|
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int imm>
|
template<int imm>
|
||||||
inline v_uint8x32 v_rotate_right(const v_uint8x32& a)
|
inline v_uint8x32 v_rotate_right(const v_uint8x32& a)
|
||||||
{
|
{
|
||||||
v_uint8x32 res;
|
enum {IMM_L = (imm - 16) & 0xFF};
|
||||||
|
|
||||||
|
if (imm == 0) return a;
|
||||||
|
if (imm > 32) return v_uint8x32();
|
||||||
|
|
||||||
// ESAC control[3] ? [127:0] = 0
|
// ESAC control[3] ? [127:0] = 0
|
||||||
__m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(2, 0, 0, 1));
|
__m256i swapz = _mm256_permute2x128_si256(a.val, a.val, _MM_SHUFFLE(2, 0, 0, 1));
|
||||||
|
if (imm == 16) return v_uint8x32(swapz);
|
||||||
if (imm == 0)
|
if (imm < 16) return v_uint8x32(_mm256_alignr_epi8(swapz, a.val, imm));
|
||||||
return a;
|
return v_uint8x32(_mm256_srli_si256(swapz, IMM_L));
|
||||||
if (imm == 16)
|
|
||||||
res.val = swapz;
|
|
||||||
else if (imm < 16)
|
|
||||||
res.val = _mm256_alignr_epi8(swapz, a.val, imm);
|
|
||||||
else if (imm < 32)
|
|
||||||
res.val = _mm256_srli_si256(swapz, imm - 16);
|
|
||||||
else
|
|
||||||
return v_uint8x32();
|
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST(intrin, _Tpvec, cast) \
|
#define OPENCV_HAL_IMPL_AVX_ROTATE_CAST(intrin, _Tpvec, cast) \
|
||||||
template<int imm> \
|
template<int imm> \
|
||||||
inline _Tpvec intrin(const _Tpvec& a, const _Tpvec& b) \
|
inline _Tpvec intrin(const _Tpvec& a, const _Tpvec& b) \
|
||||||
{ \
|
{ \
|
||||||
const int w = sizeof(typename _Tpvec::lane_type); \
|
enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \
|
||||||
v_uint8x32 ret = intrin<imm*w>(v_reinterpret_as_u8(a), \
|
v_uint8x32 ret = intrin<IMMxW>(v_reinterpret_as_u8(a), \
|
||||||
v_reinterpret_as_u8(b)); \
|
v_reinterpret_as_u8(b)); \
|
||||||
return _Tpvec(cast(ret.val)); \
|
return _Tpvec(cast(ret.val)); \
|
||||||
} \
|
} \
|
||||||
template<int imm> \
|
template<int imm> \
|
||||||
inline _Tpvec intrin(const _Tpvec& a) \
|
inline _Tpvec intrin(const _Tpvec& a) \
|
||||||
{ \
|
{ \
|
||||||
const int w = sizeof(typename _Tpvec::lane_type); \
|
enum {IMMxW = imm * sizeof(typename _Tpvec::lane_type)}; \
|
||||||
v_uint8x32 ret = intrin<imm*w>(v_reinterpret_as_u8(a)); \
|
v_uint8x32 ret = intrin<IMMxW>(v_reinterpret_as_u8(a)); \
|
||||||
return _Tpvec(cast(ret.val)); \
|
return _Tpvec(cast(ret.val)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1616,7 +1629,7 @@ inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& a, v_uint8x32& b
|
|||||||
__m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
|
__m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||||
__m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
|
__m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
|
||||||
|
|
||||||
static const __m256i sh = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15,
|
const __m256i sh = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15,
|
||||||
0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
|
0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15);
|
||||||
__m256i p0 = _mm256_shuffle_epi8(ab0, sh);
|
__m256i p0 = _mm256_shuffle_epi8(ab0, sh);
|
||||||
__m256i p1 = _mm256_shuffle_epi8(ab1, sh);
|
__m256i p1 = _mm256_shuffle_epi8(ab1, sh);
|
||||||
@ -1633,7 +1646,7 @@ inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& a, v_uint16x16&
|
|||||||
__m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
|
__m256i ab0 = _mm256_loadu_si256((const __m256i*)ptr);
|
||||||
__m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
|
__m256i ab1 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
|
||||||
|
|
||||||
static const __m256i sh = _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
|
const __m256i sh = _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
|
||||||
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
|
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
|
||||||
__m256i p0 = _mm256_shuffle_epi8(ab0, sh);
|
__m256i p0 = _mm256_shuffle_epi8(ab0, sh);
|
||||||
__m256i p1 = _mm256_shuffle_epi8(ab1, sh);
|
__m256i p1 = _mm256_shuffle_epi8(ab1, sh);
|
||||||
@ -1683,16 +1696,16 @@ inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& b, v_uint8x32& g,
|
|||||||
__m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16);
|
__m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16);
|
||||||
__m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16);
|
__m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16);
|
||||||
|
|
||||||
static const __m256i m0 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
|
const __m256i m0 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
|
||||||
0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
|
0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
|
||||||
static const __m256i m1 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0,
|
const __m256i m1 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0,
|
||||||
-1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1);
|
-1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1);
|
||||||
|
|
||||||
__m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1);
|
__m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1);
|
||||||
__m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0);
|
__m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0);
|
||||||
__m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1);
|
__m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1);
|
||||||
|
|
||||||
static const __m256i
|
const __m256i
|
||||||
sh_b = _mm256_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13,
|
sh_b = _mm256_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13,
|
||||||
0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13),
|
0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13),
|
||||||
sh_g = _mm256_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14,
|
sh_g = _mm256_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14,
|
||||||
@ -1717,18 +1730,18 @@ inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& b, v_uint16x16&
|
|||||||
__m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16);
|
__m256i s02_low = _mm256_permute2x128_si256(bgr0, bgr2, 0 + 2*16);
|
||||||
__m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16);
|
__m256i s02_high = _mm256_permute2x128_si256(bgr0, bgr2, 1 + 3*16);
|
||||||
|
|
||||||
static const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1,
|
const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1,
|
||||||
0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0);
|
0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0);
|
||||||
static const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0,
|
const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0,
|
||||||
-1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0);
|
-1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0);
|
||||||
__m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1);
|
__m256i b0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_low, s02_high, m0), bgr1, m1);
|
||||||
__m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1);
|
__m256i g0 = _mm256_blendv_epi8(_mm256_blendv_epi8(bgr1, s02_low, m0), s02_high, m1);
|
||||||
__m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0);
|
__m256i r0 = _mm256_blendv_epi8(_mm256_blendv_epi8(s02_high, s02_low, m1), bgr1, m0);
|
||||||
static const __m256i sh_b = _mm256_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11,
|
const __m256i sh_b = _mm256_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11,
|
||||||
0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
|
0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
|
||||||
static const __m256i sh_g = _mm256_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13,
|
const __m256i sh_g = _mm256_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13,
|
||||||
2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13);
|
2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13);
|
||||||
static const __m256i sh_r = _mm256_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15,
|
const __m256i sh_r = _mm256_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15,
|
||||||
4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
|
4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
|
||||||
b0 = _mm256_shuffle_epi8(b0, sh_b);
|
b0 = _mm256_shuffle_epi8(b0, sh_b);
|
||||||
g0 = _mm256_shuffle_epi8(g0, sh_g);
|
g0 = _mm256_shuffle_epi8(g0, sh_g);
|
||||||
@ -1785,7 +1798,7 @@ inline void v_load_deinterleave( const uchar* ptr, v_uint8x32& b, v_uint8x32& g,
|
|||||||
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
|
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
|
||||||
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64));
|
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 64));
|
||||||
__m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 96));
|
__m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 96));
|
||||||
static const __m256i sh = _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
|
const __m256i sh = _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
|
||||||
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
|
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15);
|
||||||
|
|
||||||
__m256i p0 = _mm256_shuffle_epi8(bgr0, sh);
|
__m256i p0 = _mm256_shuffle_epi8(bgr0, sh);
|
||||||
@ -1820,7 +1833,7 @@ inline void v_load_deinterleave( const ushort* ptr, v_uint16x16& b, v_uint16x16&
|
|||||||
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
|
__m256i bgr1 = _mm256_loadu_si256((const __m256i*)(ptr + 16));
|
||||||
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
|
__m256i bgr2 = _mm256_loadu_si256((const __m256i*)(ptr + 32));
|
||||||
__m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 48));
|
__m256i bgr3 = _mm256_loadu_si256((const __m256i*)(ptr + 48));
|
||||||
static const __m256i sh = _mm256_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
|
const __m256i sh = _mm256_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
|
||||||
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15);
|
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15);
|
||||||
__m256i p0 = _mm256_shuffle_epi8(bgr0, sh);
|
__m256i p0 = _mm256_shuffle_epi8(bgr0, sh);
|
||||||
__m256i p1 = _mm256_shuffle_epi8(bgr1, sh);
|
__m256i p1 = _mm256_shuffle_epi8(bgr1, sh);
|
||||||
@ -1901,7 +1914,8 @@ inline void v_load_deinterleave( const uint64* ptr, v_uint64x4& b, v_uint64x4& g
|
|||||||
|
|
||||||
///////////////////////////// store interleave /////////////////////////////////////
|
///////////////////////////// store interleave /////////////////////////////////////
|
||||||
|
|
||||||
inline void v_store_interleave( uchar* ptr, const v_uint8x32& x, const v_uint8x32& y )
|
inline void v_store_interleave( uchar* ptr, const v_uint8x32& x, const v_uint8x32& y,
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED )
|
||||||
{
|
{
|
||||||
__m256i xy_l = _mm256_unpacklo_epi8(x.val, y.val);
|
__m256i xy_l = _mm256_unpacklo_epi8(x.val, y.val);
|
||||||
__m256i xy_h = _mm256_unpackhi_epi8(x.val, y.val);
|
__m256i xy_h = _mm256_unpackhi_epi8(x.val, y.val);
|
||||||
@ -1909,11 +1923,25 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x32& x, const v_uint8x3
|
|||||||
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
|
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
|
||||||
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
|
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm256_stream_si256((__m256i*)ptr, xy0);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 32), xy1);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm256_store_si256((__m256i*)ptr, xy0);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 32), xy1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm256_storeu_si256((__m256i*)ptr, xy0);
|
_mm256_storeu_si256((__m256i*)ptr, xy0);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 32), xy1);
|
_mm256_storeu_si256((__m256i*)(ptr + 32), xy1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( ushort* ptr, const v_uint16x16& x, const v_uint16x16& y )
|
inline void v_store_interleave( ushort* ptr, const v_uint16x16& x, const v_uint16x16& y,
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED )
|
||||||
{
|
{
|
||||||
__m256i xy_l = _mm256_unpacklo_epi16(x.val, y.val);
|
__m256i xy_l = _mm256_unpacklo_epi16(x.val, y.val);
|
||||||
__m256i xy_h = _mm256_unpackhi_epi16(x.val, y.val);
|
__m256i xy_h = _mm256_unpackhi_epi16(x.val, y.val);
|
||||||
@ -1921,11 +1949,25 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x16& x, const v_uint1
|
|||||||
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
|
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
|
||||||
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
|
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm256_stream_si256((__m256i*)ptr, xy0);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 16), xy1);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm256_store_si256((__m256i*)ptr, xy0);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 16), xy1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm256_storeu_si256((__m256i*)ptr, xy0);
|
_mm256_storeu_si256((__m256i*)ptr, xy0);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 16), xy1);
|
_mm256_storeu_si256((__m256i*)(ptr + 16), xy1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( unsigned* ptr, const v_uint32x8& x, const v_uint32x8& y )
|
inline void v_store_interleave( unsigned* ptr, const v_uint32x8& x, const v_uint32x8& y,
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED )
|
||||||
{
|
{
|
||||||
__m256i xy_l = _mm256_unpacklo_epi32(x.val, y.val);
|
__m256i xy_l = _mm256_unpacklo_epi32(x.val, y.val);
|
||||||
__m256i xy_h = _mm256_unpackhi_epi32(x.val, y.val);
|
__m256i xy_h = _mm256_unpackhi_epi32(x.val, y.val);
|
||||||
@ -1933,11 +1975,25 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x8& x, const v_uint
|
|||||||
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
|
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
|
||||||
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
|
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm256_stream_si256((__m256i*)ptr, xy0);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 8), xy1);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm256_store_si256((__m256i*)ptr, xy0);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 8), xy1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm256_storeu_si256((__m256i*)ptr, xy0);
|
_mm256_storeu_si256((__m256i*)ptr, xy0);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 8), xy1);
|
_mm256_storeu_si256((__m256i*)(ptr + 8), xy1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( uint64* ptr, const v_uint64x4& x, const v_uint64x4& y )
|
inline void v_store_interleave( uint64* ptr, const v_uint64x4& x, const v_uint64x4& y,
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED )
|
||||||
{
|
{
|
||||||
__m256i xy_l = _mm256_unpacklo_epi64(x.val, y.val);
|
__m256i xy_l = _mm256_unpacklo_epi64(x.val, y.val);
|
||||||
__m256i xy_h = _mm256_unpackhi_epi64(x.val, y.val);
|
__m256i xy_h = _mm256_unpackhi_epi64(x.val, y.val);
|
||||||
@ -1945,19 +2001,33 @@ inline void v_store_interleave( uint64* ptr, const v_uint64x4& x, const v_uint64
|
|||||||
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
|
__m256i xy0 = _mm256_permute2x128_si256(xy_l, xy_h, 0 + 2*16);
|
||||||
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
|
__m256i xy1 = _mm256_permute2x128_si256(xy_l, xy_h, 1 + 3*16);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm256_stream_si256((__m256i*)ptr, xy0);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 4), xy1);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm256_store_si256((__m256i*)ptr, xy0);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 4), xy1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm256_storeu_si256((__m256i*)ptr, xy0);
|
_mm256_storeu_si256((__m256i*)ptr, xy0);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 4), xy1);
|
_mm256_storeu_si256((__m256i*)(ptr + 4), xy1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x32& g, const v_uint8x32& r )
|
inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x32& g, const v_uint8x32& r,
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED )
|
||||||
{
|
{
|
||||||
static const __m256i sh_b = _mm256_setr_epi8(
|
const __m256i sh_b = _mm256_setr_epi8(
|
||||||
0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5,
|
0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5,
|
||||||
0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5);
|
0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5);
|
||||||
static const __m256i sh_g = _mm256_setr_epi8(
|
const __m256i sh_g = _mm256_setr_epi8(
|
||||||
5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10,
|
5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10,
|
||||||
5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10);
|
5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10);
|
||||||
static const __m256i sh_r = _mm256_setr_epi8(
|
const __m256i sh_r = _mm256_setr_epi8(
|
||||||
10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15,
|
10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15,
|
||||||
10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15);
|
10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15);
|
||||||
|
|
||||||
@ -1965,9 +2035,9 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x3
|
|||||||
__m256i g0 = _mm256_shuffle_epi8(g.val, sh_g);
|
__m256i g0 = _mm256_shuffle_epi8(g.val, sh_g);
|
||||||
__m256i r0 = _mm256_shuffle_epi8(r.val, sh_r);
|
__m256i r0 = _mm256_shuffle_epi8(r.val, sh_r);
|
||||||
|
|
||||||
static const __m256i m0 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0,
|
const __m256i m0 = _mm256_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0,
|
||||||
0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
|
0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
|
||||||
static const __m256i m1 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
|
const __m256i m1 = _mm256_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
|
||||||
0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
|
0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
|
||||||
|
|
||||||
__m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1);
|
__m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1);
|
||||||
@ -1978,20 +2048,36 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x3
|
|||||||
__m256i bgr1 = _mm256_permute2x128_si256(p2, p0, 0 + 3*16);
|
__m256i bgr1 = _mm256_permute2x128_si256(p2, p0, 0 + 3*16);
|
||||||
__m256i bgr2 = _mm256_permute2x128_si256(p1, p2, 1 + 3*16);
|
__m256i bgr2 = _mm256_permute2x128_si256(p1, p2, 1 + 3*16);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm256_stream_si256((__m256i*)ptr, bgr0);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 32), bgr1);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 64), bgr2);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm256_store_si256((__m256i*)ptr, bgr0);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 32), bgr1);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 64), bgr2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm256_storeu_si256((__m256i*)ptr, bgr0);
|
_mm256_storeu_si256((__m256i*)ptr, bgr0);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 32), bgr1);
|
_mm256_storeu_si256((__m256i*)(ptr + 32), bgr1);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 64), bgr2);
|
_mm256_storeu_si256((__m256i*)(ptr + 64), bgr2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint16x16& g, const v_uint16x16& r )
|
inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint16x16& g, const v_uint16x16& r,
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED )
|
||||||
{
|
{
|
||||||
static const __m256i sh_b = _mm256_setr_epi8(
|
const __m256i sh_b = _mm256_setr_epi8(
|
||||||
0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11,
|
0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11,
|
||||||
0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
|
0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
|
||||||
static const __m256i sh_g = _mm256_setr_epi8(
|
const __m256i sh_g = _mm256_setr_epi8(
|
||||||
10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5,
|
10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5,
|
||||||
10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5);
|
10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5);
|
||||||
static const __m256i sh_r = _mm256_setr_epi8(
|
const __m256i sh_r = _mm256_setr_epi8(
|
||||||
4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15,
|
4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15,
|
||||||
4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
|
4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
|
||||||
|
|
||||||
@ -1999,9 +2085,9 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint1
|
|||||||
__m256i g0 = _mm256_shuffle_epi8(g.val, sh_g);
|
__m256i g0 = _mm256_shuffle_epi8(g.val, sh_g);
|
||||||
__m256i r0 = _mm256_shuffle_epi8(r.val, sh_r);
|
__m256i r0 = _mm256_shuffle_epi8(r.val, sh_r);
|
||||||
|
|
||||||
static const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1,
|
const __m256i m0 = _mm256_setr_epi8(0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1,
|
||||||
0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0);
|
0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0);
|
||||||
static const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0,
|
const __m256i m1 = _mm256_setr_epi8(0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0,
|
||||||
-1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0);
|
-1, -1, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0);
|
||||||
|
|
||||||
__m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1);
|
__m256i p0 = _mm256_blendv_epi8(_mm256_blendv_epi8(b0, g0, m0), r0, m1);
|
||||||
@ -2012,12 +2098,28 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint1
|
|||||||
//__m256i bgr1 = p1;
|
//__m256i bgr1 = p1;
|
||||||
__m256i bgr2 = _mm256_permute2x128_si256(p0, p2, 1 + 3*16);
|
__m256i bgr2 = _mm256_permute2x128_si256(p0, p2, 1 + 3*16);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm256_stream_si256((__m256i*)ptr, bgr0);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 16), p1);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 32), bgr2);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm256_store_si256((__m256i*)ptr, bgr0);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 16), p1);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 32), bgr2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm256_storeu_si256((__m256i*)ptr, bgr0);
|
_mm256_storeu_si256((__m256i*)ptr, bgr0);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 16), p1);
|
_mm256_storeu_si256((__m256i*)(ptr + 16), p1);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 32), bgr2);
|
_mm256_storeu_si256((__m256i*)(ptr + 32), bgr2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint32x8& g, const v_uint32x8& r )
|
inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint32x8& g, const v_uint32x8& r,
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED )
|
||||||
{
|
{
|
||||||
__m256i b0 = _mm256_shuffle_epi32(b.val, 0x6c);
|
__m256i b0 = _mm256_shuffle_epi32(b.val, 0x6c);
|
||||||
__m256i g0 = _mm256_shuffle_epi32(g.val, 0xb1);
|
__m256i g0 = _mm256_shuffle_epi32(g.val, 0xb1);
|
||||||
@ -2031,12 +2133,28 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint
|
|||||||
//__m256i bgr1 = p2;
|
//__m256i bgr1 = p2;
|
||||||
__m256i bgr2 = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
|
__m256i bgr2 = _mm256_permute2x128_si256(p0, p1, 1 + 3*16);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm256_stream_si256((__m256i*)ptr, bgr0);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 8), p2);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 16), bgr2);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm256_store_si256((__m256i*)ptr, bgr0);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 8), p2);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 16), bgr2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm256_storeu_si256((__m256i*)ptr, bgr0);
|
_mm256_storeu_si256((__m256i*)ptr, bgr0);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 8), p2);
|
_mm256_storeu_si256((__m256i*)(ptr + 8), p2);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 16), bgr2);
|
_mm256_storeu_si256((__m256i*)(ptr + 16), bgr2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64x4& g, const v_uint64x4& r )
|
inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64x4& g, const v_uint64x4& r,
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED )
|
||||||
{
|
{
|
||||||
__m256i s01 = _mm256_unpacklo_epi64(b.val, g.val);
|
__m256i s01 = _mm256_unpacklo_epi64(b.val, g.val);
|
||||||
__m256i s12 = _mm256_unpackhi_epi64(g.val, r.val);
|
__m256i s12 = _mm256_unpackhi_epi64(g.val, r.val);
|
||||||
@ -2046,12 +2164,29 @@ inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64
|
|||||||
__m256i bgr1 = _mm256_blend_epi32(s01, s12, 0x0f);
|
__m256i bgr1 = _mm256_blend_epi32(s01, s12, 0x0f);
|
||||||
__m256i bgr2 = _mm256_permute2x128_si256(s20, s12, 1 + 3*16);
|
__m256i bgr2 = _mm256_permute2x128_si256(s20, s12, 1 + 3*16);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm256_stream_si256((__m256i*)ptr, bgr0);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 4), bgr1);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 8), bgr2);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm256_store_si256((__m256i*)ptr, bgr0);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 4), bgr1);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 8), bgr2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm256_storeu_si256((__m256i*)ptr, bgr0);
|
_mm256_storeu_si256((__m256i*)ptr, bgr0);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 4), bgr1);
|
_mm256_storeu_si256((__m256i*)(ptr + 4), bgr1);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 8), bgr2);
|
_mm256_storeu_si256((__m256i*)(ptr + 8), bgr2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x32& g, const v_uint8x32& r, const v_uint8x32& a )
|
inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x32& g,
|
||||||
|
const v_uint8x32& r, const v_uint8x32& a,
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED )
|
||||||
{
|
{
|
||||||
__m256i bg0 = _mm256_unpacklo_epi8(b.val, g.val);
|
__m256i bg0 = _mm256_unpacklo_epi8(b.val, g.val);
|
||||||
__m256i bg1 = _mm256_unpackhi_epi8(b.val, g.val);
|
__m256i bg1 = _mm256_unpackhi_epi8(b.val, g.val);
|
||||||
@ -2068,14 +2203,32 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x32& b, const v_uint8x3
|
|||||||
__m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
|
__m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
|
||||||
__m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
|
__m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm256_stream_si256((__m256i*)ptr, bgra0);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 32), bgra1);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 64), bgra2);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 96), bgra3);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm256_store_si256((__m256i*)ptr, bgra0);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 32), bgra1);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 64), bgra2);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 96), bgra3);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm256_storeu_si256((__m256i*)ptr, bgra0);
|
_mm256_storeu_si256((__m256i*)ptr, bgra0);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 32), bgra1);
|
_mm256_storeu_si256((__m256i*)(ptr + 32), bgra1);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 64), bgra2);
|
_mm256_storeu_si256((__m256i*)(ptr + 64), bgra2);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 96), bgra3);
|
_mm256_storeu_si256((__m256i*)(ptr + 96), bgra3);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint16x16& g,
|
inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint16x16& g,
|
||||||
const v_uint16x16& r, const v_uint16x16& a )
|
const v_uint16x16& r, const v_uint16x16& a,
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED )
|
||||||
{
|
{
|
||||||
__m256i bg0 = _mm256_unpacklo_epi16(b.val, g.val);
|
__m256i bg0 = _mm256_unpacklo_epi16(b.val, g.val);
|
||||||
__m256i bg1 = _mm256_unpackhi_epi16(b.val, g.val);
|
__m256i bg1 = _mm256_unpackhi_epi16(b.val, g.val);
|
||||||
@ -2092,14 +2245,32 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x16& b, const v_uint1
|
|||||||
__m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
|
__m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
|
||||||
__m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
|
__m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm256_stream_si256((__m256i*)ptr, bgra0);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 16), bgra1);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 32), bgra2);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 48), bgra3);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm256_store_si256((__m256i*)ptr, bgra0);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 16), bgra1);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 32), bgra2);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 48), bgra3);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm256_storeu_si256((__m256i*)ptr, bgra0);
|
_mm256_storeu_si256((__m256i*)ptr, bgra0);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 16), bgra1);
|
_mm256_storeu_si256((__m256i*)(ptr + 16), bgra1);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 32), bgra2);
|
_mm256_storeu_si256((__m256i*)(ptr + 32), bgra2);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 48), bgra3);
|
_mm256_storeu_si256((__m256i*)(ptr + 48), bgra3);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint32x8& g,
|
inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint32x8& g,
|
||||||
const v_uint32x8& r, const v_uint32x8& a )
|
const v_uint32x8& r, const v_uint32x8& a,
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED )
|
||||||
{
|
{
|
||||||
__m256i bg0 = _mm256_unpacklo_epi32(b.val, g.val);
|
__m256i bg0 = _mm256_unpacklo_epi32(b.val, g.val);
|
||||||
__m256i bg1 = _mm256_unpackhi_epi32(b.val, g.val);
|
__m256i bg1 = _mm256_unpackhi_epi32(b.val, g.val);
|
||||||
@ -2116,14 +2287,32 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x8& b, const v_uint
|
|||||||
__m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
|
__m256i bgra1 = _mm256_permute2x128_si256(bgra2_, bgra3_, 0 + 2*16);
|
||||||
__m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
|
__m256i bgra3 = _mm256_permute2x128_si256(bgra2_, bgra3_, 1 + 3*16);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm256_stream_si256((__m256i*)ptr, bgra0);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 8), bgra1);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 16), bgra2);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 24), bgra3);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm256_store_si256((__m256i*)ptr, bgra0);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 8), bgra1);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 16), bgra2);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 24), bgra3);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm256_storeu_si256((__m256i*)ptr, bgra0);
|
_mm256_storeu_si256((__m256i*)ptr, bgra0);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 8), bgra1);
|
_mm256_storeu_si256((__m256i*)(ptr + 8), bgra1);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 16), bgra2);
|
_mm256_storeu_si256((__m256i*)(ptr + 16), bgra2);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 24), bgra3);
|
_mm256_storeu_si256((__m256i*)(ptr + 24), bgra3);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64x4& g,
|
inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64x4& g,
|
||||||
const v_uint64x4& r, const v_uint64x4& a )
|
const v_uint64x4& r, const v_uint64x4& a,
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED )
|
||||||
{
|
{
|
||||||
__m256i bg0 = _mm256_unpacklo_epi64(b.val, g.val);
|
__m256i bg0 = _mm256_unpacklo_epi64(b.val, g.val);
|
||||||
__m256i bg1 = _mm256_unpackhi_epi64(b.val, g.val);
|
__m256i bg1 = _mm256_unpackhi_epi64(b.val, g.val);
|
||||||
@ -2135,10 +2324,27 @@ inline void v_store_interleave( uint64* ptr, const v_uint64x4& b, const v_uint64
|
|||||||
__m256i bgra2 = _mm256_permute2x128_si256(bg0, ra0, 1 + 3*16);
|
__m256i bgra2 = _mm256_permute2x128_si256(bg0, ra0, 1 + 3*16);
|
||||||
__m256i bgra3 = _mm256_permute2x128_si256(bg1, ra1, 1 + 3*16);
|
__m256i bgra3 = _mm256_permute2x128_si256(bg1, ra1, 1 + 3*16);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm256_stream_si256((__m256i*)ptr, bgra0);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 4), bgra1);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 8), bgra2);
|
||||||
|
_mm256_stream_si256((__m256i*)(ptr + 12), bgra3);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm256_store_si256((__m256i*)ptr, bgra0);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 4), bgra1);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 8), bgra2);
|
||||||
|
_mm256_store_si256((__m256i*)(ptr + 12), bgra3);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm256_storeu_si256((__m256i*)ptr, bgra0);
|
_mm256_storeu_si256((__m256i*)ptr, bgra0);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 4), bgra1);
|
_mm256_storeu_si256((__m256i*)(ptr + 4), bgra1);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 8), bgra2);
|
_mm256_storeu_si256((__m256i*)(ptr + 8), bgra2);
|
||||||
_mm256_storeu_si256((__m256i*)(ptr + 12), bgra3);
|
_mm256_storeu_si256((__m256i*)(ptr + 12), bgra3);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \
|
#define OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \
|
||||||
@ -2166,27 +2372,30 @@ inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpv
|
|||||||
c0 = v_reinterpret_as_##suffix0(c1); \
|
c0 = v_reinterpret_as_##suffix0(c1); \
|
||||||
d0 = v_reinterpret_as_##suffix0(d1); \
|
d0 = v_reinterpret_as_##suffix0(d1); \
|
||||||
} \
|
} \
|
||||||
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0 ) \
|
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED ) \
|
||||||
{ \
|
{ \
|
||||||
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
||||||
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
||||||
v_store_interleave((_Tp1*)ptr, a1, b1); \
|
v_store_interleave((_Tp1*)ptr, a1, b1, mode); \
|
||||||
} \
|
} \
|
||||||
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0 ) \
|
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0, \
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED ) \
|
||||||
{ \
|
{ \
|
||||||
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
||||||
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
||||||
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
|
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
|
||||||
v_store_interleave((_Tp1*)ptr, a1, b1, c1); \
|
v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \
|
||||||
} \
|
} \
|
||||||
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
|
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
|
||||||
const _Tpvec0& c0, const _Tpvec0& d0 ) \
|
const _Tpvec0& c0, const _Tpvec0& d0, \
|
||||||
|
hal::StoreMode mode=hal::STORE_UNALIGNED ) \
|
||||||
{ \
|
{ \
|
||||||
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
||||||
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
||||||
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
|
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
|
||||||
_Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \
|
_Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \
|
||||||
v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1); \
|
v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \
|
||||||
}
|
}
|
||||||
|
|
||||||
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int8x32, schar, s8, v_uint8x32, uchar, u8)
|
OPENCV_HAL_IMPL_AVX_LOADSTORE_INTERLEAVE(v_int8x32, schar, s8, v_uint8x32, uchar, u8)
|
||||||
|
@ -1319,7 +1319,8 @@ Scheme:
|
|||||||
For all types except 64-bit. */
|
For all types except 64-bit. */
|
||||||
template<typename _Tp, int n>
|
template<typename _Tp, int n>
|
||||||
inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
|
inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
|
||||||
const v_reg<_Tp, n>& b)
|
const v_reg<_Tp, n>& b,
|
||||||
|
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
int i, i2;
|
int i, i2;
|
||||||
for( i = i2 = 0; i < n; i++, i2 += 2 )
|
for( i = i2 = 0; i < n; i++, i2 += 2 )
|
||||||
@ -1339,7 +1340,8 @@ Scheme:
|
|||||||
For all types except 64-bit. */
|
For all types except 64-bit. */
|
||||||
template<typename _Tp, int n>
|
template<typename _Tp, int n>
|
||||||
inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
|
inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
|
||||||
const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c)
|
const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
|
||||||
|
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
int i, i3;
|
int i, i3;
|
||||||
for( i = i3 = 0; i < n; i++, i3 += 3 )
|
for( i = i3 = 0; i < n; i++, i3 += 3 )
|
||||||
@ -1360,7 +1362,8 @@ Scheme:
|
|||||||
For all types except 64-bit. */
|
For all types except 64-bit. */
|
||||||
template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
|
template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
|
||||||
const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
|
const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
|
||||||
const v_reg<_Tp, n>& d)
|
const v_reg<_Tp, n>& d,
|
||||||
|
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
int i, i4;
|
int i, i4;
|
||||||
for( i = i4 = 0; i < n; i++, i4 += 4 )
|
for( i = i4 = 0; i < n; i++, i4 += 4 )
|
||||||
@ -1430,6 +1433,20 @@ inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a)
|
|||||||
ptr[i] = a.s[i];
|
ptr[i] = a.s[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename _Tp, int n>
|
||||||
|
inline void v_store_aligned_nocache(_Tp* ptr, const v_reg<_Tp, n>& a)
|
||||||
|
{
|
||||||
|
for( int i = 0; i < n; i++ )
|
||||||
|
ptr[i] = a.s[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp, int n>
|
||||||
|
inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a, hal::StoreMode /*mode*/)
|
||||||
|
{
|
||||||
|
for( int i = 0; i < n; i++ )
|
||||||
|
ptr[i] = a.s[i];
|
||||||
|
}
|
||||||
|
|
||||||
/** @brief Combine vector from first elements of two vectors
|
/** @brief Combine vector from first elements of two vectors
|
||||||
|
|
||||||
Scheme:
|
Scheme:
|
||||||
|
@ -319,6 +319,9 @@ static inline void cv_vst1_f16(void* ptr, float16x4_t a)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef vdup_n_f16
|
||||||
|
#define vdup_n_f16(v) (float16x4_t){v, v, v, v}
|
||||||
|
#endif
|
||||||
|
|
||||||
struct v_float16x8
|
struct v_float16x8
|
||||||
{
|
{
|
||||||
@ -864,6 +867,10 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \
|
|||||||
{ vst1q_##suffix(ptr, a.val); } \
|
{ vst1q_##suffix(ptr, a.val); } \
|
||||||
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
||||||
{ vst1q_##suffix(ptr, a.val); } \
|
{ vst1q_##suffix(ptr, a.val); } \
|
||||||
|
inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
|
||||||
|
{ vst1q_##suffix(ptr, a.val); } \
|
||||||
|
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \
|
||||||
|
{ vst1q_##suffix(ptr, a.val); } \
|
||||||
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
||||||
{ vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \
|
{ vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \
|
||||||
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
||||||
@ -889,6 +896,11 @@ inline v_float16x8 v_load_f16(const short* ptr)
|
|||||||
inline v_float16x8 v_load_f16_aligned(const short* ptr)
|
inline v_float16x8 v_load_f16_aligned(const short* ptr)
|
||||||
{ return v_float16x8(cv_vld1q_f16(ptr)); }
|
{ return v_float16x8(cv_vld1q_f16(ptr)); }
|
||||||
|
|
||||||
|
inline v_float16x8 v_load_f16_low(const short* ptr)
|
||||||
|
{ return v_float16x8(vcombine_f16(cv_vld1_f16(ptr), vdup_n_f16((float16_t)0))); }
|
||||||
|
inline v_float16x8 v_load_f16_halves(const short* ptr0, const short* ptr1)
|
||||||
|
{ return v_float16x8(vcombine_f16(cv_vld1_f16(ptr0), cv_vld1_f16(ptr1))); }
|
||||||
|
|
||||||
inline void v_store(short* ptr, const v_float16x8& a)
|
inline void v_store(short* ptr, const v_float16x8& a)
|
||||||
{ cv_vst1q_f16(ptr, a.val); }
|
{ cv_vst1q_f16(ptr, a.val); }
|
||||||
inline void v_store_aligned(short* ptr, const v_float16x8& a)
|
inline void v_store_aligned(short* ptr, const v_float16x8& a)
|
||||||
@ -1292,14 +1304,16 @@ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \
|
|||||||
c.val = v.val[2]; \
|
c.val = v.val[2]; \
|
||||||
d.val = v.val[3]; \
|
d.val = v.val[3]; \
|
||||||
} \
|
} \
|
||||||
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b) \
|
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
|
||||||
|
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||||
{ \
|
{ \
|
||||||
_Tpvec##x2_t v; \
|
_Tpvec##x2_t v; \
|
||||||
v.val[0] = a.val; \
|
v.val[0] = a.val; \
|
||||||
v.val[1] = b.val; \
|
v.val[1] = b.val; \
|
||||||
vst2q_##suffix(ptr, v); \
|
vst2q_##suffix(ptr, v); \
|
||||||
} \
|
} \
|
||||||
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, const v_##_Tpvec& c) \
|
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
|
||||||
|
const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||||
{ \
|
{ \
|
||||||
_Tpvec##x3_t v; \
|
_Tpvec##x3_t v; \
|
||||||
v.val[0] = a.val; \
|
v.val[0] = a.val; \
|
||||||
@ -1308,7 +1322,8 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec&
|
|||||||
vst3q_##suffix(ptr, v); \
|
vst3q_##suffix(ptr, v); \
|
||||||
} \
|
} \
|
||||||
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
|
inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
|
||||||
const v_##_Tpvec& c, const v_##_Tpvec& d) \
|
const v_##_Tpvec& c, const v_##_Tpvec& d, \
|
||||||
|
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \
|
||||||
{ \
|
{ \
|
||||||
_Tpvec##x4_t v; \
|
_Tpvec##x4_t v; \
|
||||||
v.val[0] = a.val; \
|
v.val[0] = a.val; \
|
||||||
@ -1360,7 +1375,8 @@ inline void v_load_deinterleave( const tp* ptr, v_##tp##x2& a, v_##tp##x2& b, \
|
|||||||
d = v_##tp##x2(vcombine_##suffix(d0, d1)); \
|
d = v_##tp##x2(vcombine_##suffix(d0, d1)); \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b ) \
|
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \
|
||||||
|
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||||
{ \
|
{ \
|
||||||
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
|
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
|
||||||
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
|
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
|
||||||
@ -1369,7 +1385,8 @@ inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2&
|
|||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \
|
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \
|
||||||
const v_##tp##x2& b, const v_##tp##x2& c ) \
|
const v_##tp##x2& b, const v_##tp##x2& c, \
|
||||||
|
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||||
{ \
|
{ \
|
||||||
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
|
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
|
||||||
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
|
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
|
||||||
@ -1380,7 +1397,8 @@ inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, \
|
|||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \
|
inline void v_store_interleave( tp* ptr, const v_##tp##x2& a, const v_##tp##x2& b, \
|
||||||
const v_##tp##x2& c, const v_##tp##x2& d ) \
|
const v_##tp##x2& c, const v_##tp##x2& d, \
|
||||||
|
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||||
{ \
|
{ \
|
||||||
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
|
vst1_##suffix(ptr, vget_low_##suffix(a.val)); \
|
||||||
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
|
vst1_##suffix(ptr + 1, vget_low_##suffix(b.val)); \
|
||||||
|
@ -788,7 +788,7 @@ inline v_float32x4 v_sqrt(const v_float32x4& x)
|
|||||||
|
|
||||||
inline v_float32x4 v_invsqrt(const v_float32x4& x)
|
inline v_float32x4 v_invsqrt(const v_float32x4& x)
|
||||||
{
|
{
|
||||||
static const __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f);
|
const __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f);
|
||||||
__m128 t = x.val;
|
__m128 t = x.val;
|
||||||
__m128 h = _mm_mul_ps(t, _0_5);
|
__m128 h = _mm_mul_ps(t, _0_5);
|
||||||
t = _mm_rsqrt_ps(t);
|
t = _mm_rsqrt_ps(t);
|
||||||
@ -801,7 +801,7 @@ inline v_float64x2 v_sqrt(const v_float64x2& x)
|
|||||||
|
|
||||||
inline v_float64x2 v_invsqrt(const v_float64x2& x)
|
inline v_float64x2 v_invsqrt(const v_float64x2& x)
|
||||||
{
|
{
|
||||||
static const __m128d v_1 = _mm_set1_pd(1.);
|
const __m128d v_1 = _mm_set1_pd(1.);
|
||||||
return v_float64x2(_mm_div_pd(v_1, _mm_sqrt_pd(x.val)));
|
return v_float64x2(_mm_div_pd(v_1, _mm_sqrt_pd(x.val)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1261,6 +1261,17 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \
|
|||||||
{ _mm_storeu_si128((__m128i*)ptr, a.val); } \
|
{ _mm_storeu_si128((__m128i*)ptr, a.val); } \
|
||||||
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
||||||
{ _mm_store_si128((__m128i*)ptr, a.val); } \
|
{ _mm_store_si128((__m128i*)ptr, a.val); } \
|
||||||
|
inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
|
||||||
|
{ _mm_stream_si128((__m128i*)ptr, a.val); } \
|
||||||
|
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
|
||||||
|
{ \
|
||||||
|
if( mode == hal::STORE_UNALIGNED ) \
|
||||||
|
_mm_storeu_si128((__m128i*)ptr, a.val); \
|
||||||
|
else if( mode == hal::STORE_ALIGNED_NOCACHE ) \
|
||||||
|
_mm_stream_si128((__m128i*)ptr, a.val); \
|
||||||
|
else \
|
||||||
|
_mm_store_si128((__m128i*)ptr, a.val); \
|
||||||
|
} \
|
||||||
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
||||||
{ _mm_storel_epi64((__m128i*)ptr, a.val); } \
|
{ _mm_storel_epi64((__m128i*)ptr, a.val); } \
|
||||||
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
||||||
@ -1292,6 +1303,17 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \
|
|||||||
{ _mm_storeu_##suffix(ptr, a.val); } \
|
{ _mm_storeu_##suffix(ptr, a.val); } \
|
||||||
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
||||||
{ _mm_store_##suffix(ptr, a.val); } \
|
{ _mm_store_##suffix(ptr, a.val); } \
|
||||||
|
inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \
|
||||||
|
{ _mm_stream_##suffix(ptr, a.val); } \
|
||||||
|
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
|
||||||
|
{ \
|
||||||
|
if( mode == hal::STORE_UNALIGNED ) \
|
||||||
|
_mm_storeu_##suffix(ptr, a.val); \
|
||||||
|
else if( mode == hal::STORE_ALIGNED_NOCACHE ) \
|
||||||
|
_mm_stream_##suffix(ptr, a.val); \
|
||||||
|
else \
|
||||||
|
_mm_store_##suffix(ptr, a.val); \
|
||||||
|
} \
|
||||||
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
||||||
{ _mm_storel_epi64((__m128i*)ptr, _mm_cast##suffix##_si128(a.val)); } \
|
{ _mm_storel_epi64((__m128i*)ptr, _mm_cast##suffix##_si128(a.val)); } \
|
||||||
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
||||||
@ -1308,6 +1330,11 @@ inline v_float16x8 v_load_f16(const short* ptr)
|
|||||||
inline v_float16x8 v_load_f16_aligned(const short* ptr)
|
inline v_float16x8 v_load_f16_aligned(const short* ptr)
|
||||||
{ return v_float16x8(_mm_load_si128((const __m128i*)ptr)); }
|
{ return v_float16x8(_mm_load_si128((const __m128i*)ptr)); }
|
||||||
|
|
||||||
|
inline v_float16x8 v_load_f16_low(const short* ptr)
|
||||||
|
{ return v_float16x8(v_load_low(ptr).val); }
|
||||||
|
inline v_float16x8 v_load_f16_halves(const short* ptr0, const short* ptr1)
|
||||||
|
{ return v_float16x8(v_load_halves(ptr0, ptr1).val); }
|
||||||
|
|
||||||
inline void v_store(short* ptr, const v_float16x8& a)
|
inline void v_store(short* ptr, const v_float16x8& a)
|
||||||
{ _mm_storeu_si128((__m128i*)ptr, a.val); }
|
{ _mm_storeu_si128((__m128i*)ptr, a.val); }
|
||||||
inline void v_store_aligned(short* ptr, const v_float16x8& a)
|
inline void v_store_aligned(short* ptr, const v_float16x8& a)
|
||||||
@ -1671,17 +1698,17 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b)
|
|||||||
inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c)
|
inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b, v_uint8x16& c)
|
||||||
{
|
{
|
||||||
#if CV_SSE4_1
|
#if CV_SSE4_1
|
||||||
static const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
|
const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
|
||||||
static const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
|
const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
|
||||||
__m128i s0 = _mm_loadu_si128((const __m128i*)ptr);
|
__m128i s0 = _mm_loadu_si128((const __m128i*)ptr);
|
||||||
__m128i s1 = _mm_loadu_si128((const __m128i*)(ptr + 16));
|
__m128i s1 = _mm_loadu_si128((const __m128i*)(ptr + 16));
|
||||||
__m128i s2 = _mm_loadu_si128((const __m128i*)(ptr + 32));
|
__m128i s2 = _mm_loadu_si128((const __m128i*)(ptr + 32));
|
||||||
__m128i a0 = _mm_blendv_epi8(_mm_blendv_epi8(s0, s1, m0), s2, m1);
|
__m128i a0 = _mm_blendv_epi8(_mm_blendv_epi8(s0, s1, m0), s2, m1);
|
||||||
__m128i b0 = _mm_blendv_epi8(_mm_blendv_epi8(s1, s2, m0), s0, m1);
|
__m128i b0 = _mm_blendv_epi8(_mm_blendv_epi8(s1, s2, m0), s0, m1);
|
||||||
__m128i c0 = _mm_blendv_epi8(_mm_blendv_epi8(s2, s0, m0), s1, m1);
|
__m128i c0 = _mm_blendv_epi8(_mm_blendv_epi8(s2, s0, m0), s1, m1);
|
||||||
static const __m128i sh_b = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13);
|
const __m128i sh_b = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13);
|
||||||
static const __m128i sh_g = _mm_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14);
|
const __m128i sh_g = _mm_setr_epi8(1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14);
|
||||||
static const __m128i sh_r = _mm_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15);
|
const __m128i sh_r = _mm_setr_epi8(2, 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15);
|
||||||
a0 = _mm_shuffle_epi8(a0, sh_b);
|
a0 = _mm_shuffle_epi8(a0, sh_b);
|
||||||
b0 = _mm_shuffle_epi8(b0, sh_g);
|
b0 = _mm_shuffle_epi8(b0, sh_g);
|
||||||
c0 = _mm_shuffle_epi8(c0, sh_r);
|
c0 = _mm_shuffle_epi8(c0, sh_r);
|
||||||
@ -1689,9 +1716,9 @@ inline void v_load_deinterleave(const uchar* ptr, v_uint8x16& a, v_uint8x16& b,
|
|||||||
b.val = b0;
|
b.val = b0;
|
||||||
c.val = c0;
|
c.val = c0;
|
||||||
#elif CV_SSSE3
|
#elif CV_SSSE3
|
||||||
static const __m128i m0 = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13, 2, 5, 8, 11, 14);
|
const __m128i m0 = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13, 2, 5, 8, 11, 14);
|
||||||
static const __m128i m1 = _mm_alignr_epi8(m0, m0, 11);
|
const __m128i m1 = _mm_alignr_epi8(m0, m0, 11);
|
||||||
static const __m128i m2 = _mm_alignr_epi8(m0, m0, 6);
|
const __m128i m2 = _mm_alignr_epi8(m0, m0, 6);
|
||||||
|
|
||||||
__m128i t0 = _mm_loadu_si128((const __m128i*)ptr);
|
__m128i t0 = _mm_loadu_si128((const __m128i*)ptr);
|
||||||
__m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 16));
|
__m128i t1 = _mm_loadu_si128((const __m128i*)(ptr + 16));
|
||||||
@ -1784,9 +1811,9 @@ inline void v_load_deinterleave(const ushort* ptr, v_uint16x8& a, v_uint16x8& b,
|
|||||||
__m128i b0 = _mm_blend_epi16(_mm_blend_epi16(v2, v0, 0x92), v1, 0x24);
|
__m128i b0 = _mm_blend_epi16(_mm_blend_epi16(v2, v0, 0x92), v1, 0x24);
|
||||||
__m128i c0 = _mm_blend_epi16(_mm_blend_epi16(v1, v2, 0x92), v0, 0x24);
|
__m128i c0 = _mm_blend_epi16(_mm_blend_epi16(v1, v2, 0x92), v0, 0x24);
|
||||||
|
|
||||||
static const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
|
const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
|
||||||
static const __m128i sh_b = _mm_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13);
|
const __m128i sh_b = _mm_setr_epi8(2, 3, 8, 9, 14, 15, 4, 5, 10, 11, 0, 1, 6, 7, 12, 13);
|
||||||
static const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
|
const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
|
||||||
a0 = _mm_shuffle_epi8(a0, sh_a);
|
a0 = _mm_shuffle_epi8(a0, sh_a);
|
||||||
b0 = _mm_shuffle_epi8(b0, sh_b);
|
b0 = _mm_shuffle_epi8(b0, sh_b);
|
||||||
c0 = _mm_shuffle_epi8(c0, sh_c);
|
c0 = _mm_shuffle_epi8(c0, sh_c);
|
||||||
@ -1955,55 +1982,61 @@ inline void v_load_deinterleave(const uint64 *ptr, v_uint64x2& a,
|
|||||||
|
|
||||||
// store interleave
|
// store interleave
|
||||||
|
|
||||||
inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b)
|
inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
|
||||||
|
hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
__m128i v0 = _mm_unpacklo_epi8(a.val, b.val);
|
__m128i v0 = _mm_unpacklo_epi8(a.val, b.val);
|
||||||
__m128i v1 = _mm_unpackhi_epi8(a.val, b.val);
|
__m128i v1 = _mm_unpackhi_epi8(a.val, b.val);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm_stream_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 16), v1);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 16), v1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm_storeu_si128((__m128i*)(ptr), v0);
|
_mm_storeu_si128((__m128i*)(ptr), v0);
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 16), v1);
|
_mm_storeu_si128((__m128i*)(ptr + 16), v1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
|
inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
|
||||||
const v_uint8x16& c )
|
const v_uint8x16& c, hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
#if CV_SSE4_1
|
#if CV_SSE4_1
|
||||||
static const __m128i sh_a = _mm_setr_epi8(0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5);
|
const __m128i sh_a = _mm_setr_epi8(0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10, 5);
|
||||||
static const __m128i sh_b = _mm_setr_epi8(5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10);
|
const __m128i sh_b = _mm_setr_epi8(5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15, 10);
|
||||||
static const __m128i sh_c = _mm_setr_epi8(10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15);
|
const __m128i sh_c = _mm_setr_epi8(10, 5, 0, 11, 6, 1, 12, 7, 2, 13, 8, 3, 14, 9, 4, 15);
|
||||||
__m128i a0 = _mm_shuffle_epi8(a.val, sh_a);
|
__m128i a0 = _mm_shuffle_epi8(a.val, sh_a);
|
||||||
__m128i b0 = _mm_shuffle_epi8(b.val, sh_b);
|
__m128i b0 = _mm_shuffle_epi8(b.val, sh_b);
|
||||||
__m128i c0 = _mm_shuffle_epi8(c.val, sh_c);
|
__m128i c0 = _mm_shuffle_epi8(c.val, sh_c);
|
||||||
|
|
||||||
static const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
|
const __m128i m0 = _mm_setr_epi8(0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0);
|
||||||
static const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
|
const __m128i m1 = _mm_setr_epi8(0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0);
|
||||||
__m128i v0 = _mm_blendv_epi8(_mm_blendv_epi8(a0, b0, m1), c0, m0);
|
__m128i v0 = _mm_blendv_epi8(_mm_blendv_epi8(a0, b0, m1), c0, m0);
|
||||||
__m128i v1 = _mm_blendv_epi8(_mm_blendv_epi8(b0, c0, m1), a0, m0);
|
__m128i v1 = _mm_blendv_epi8(_mm_blendv_epi8(b0, c0, m1), a0, m0);
|
||||||
__m128i v2 = _mm_blendv_epi8(_mm_blendv_epi8(c0, a0, m1), b0, m0);
|
__m128i v2 = _mm_blendv_epi8(_mm_blendv_epi8(c0, a0, m1), b0, m0);
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i*)(ptr), v0);
|
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 16), v1);
|
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 32), v2);
|
|
||||||
#elif CV_SSSE3
|
#elif CV_SSSE3
|
||||||
static const __m128i m0 = _mm_setr_epi8(0, 6, 11, 1, 7, 12, 2, 8, 13, 3, 9, 14, 4, 10, 15, 5);
|
const __m128i m0 = _mm_setr_epi8(0, 6, 11, 1, 7, 12, 2, 8, 13, 3, 9, 14, 4, 10, 15, 5);
|
||||||
static const __m128i m1 = _mm_setr_epi8(5, 11, 0, 6, 12, 1, 7, 13, 2, 8, 14, 3, 9, 15, 4, 10);
|
const __m128i m1 = _mm_setr_epi8(5, 11, 0, 6, 12, 1, 7, 13, 2, 8, 14, 3, 9, 15, 4, 10);
|
||||||
static const __m128i m2 = _mm_setr_epi8(10, 0, 5, 11, 1, 6, 12, 2, 7, 13, 3, 8, 14, 4, 9, 15);
|
const __m128i m2 = _mm_setr_epi8(10, 0, 5, 11, 1, 6, 12, 2, 7, 13, 3, 8, 14, 4, 9, 15);
|
||||||
|
|
||||||
__m128i t0 = _mm_alignr_epi8(b.val, _mm_slli_si128(a.val, 10), 5);
|
__m128i t0 = _mm_alignr_epi8(b.val, _mm_slli_si128(a.val, 10), 5);
|
||||||
t0 = _mm_alignr_epi8(c.val, t0, 5);
|
t0 = _mm_alignr_epi8(c.val, t0, 5);
|
||||||
__m128i s0 = _mm_shuffle_epi8(t0, m0);
|
__m128i v0 = _mm_shuffle_epi8(t0, m0);
|
||||||
|
|
||||||
__m128i t1 = _mm_alignr_epi8(_mm_srli_si128(b.val, 5), _mm_slli_si128(a.val, 5), 6);
|
__m128i t1 = _mm_alignr_epi8(_mm_srli_si128(b.val, 5), _mm_slli_si128(a.val, 5), 6);
|
||||||
t1 = _mm_alignr_epi8(_mm_srli_si128(c.val, 5), t1, 5);
|
t1 = _mm_alignr_epi8(_mm_srli_si128(c.val, 5), t1, 5);
|
||||||
__m128i s1 = _mm_shuffle_epi8(t1, m1);
|
__m128i v1 = _mm_shuffle_epi8(t1, m1);
|
||||||
|
|
||||||
__m128i t2 = _mm_alignr_epi8(_mm_srli_si128(c.val, 10), b.val, 11);
|
__m128i t2 = _mm_alignr_epi8(_mm_srli_si128(c.val, 10), b.val, 11);
|
||||||
t2 = _mm_alignr_epi8(t2, a.val, 11);
|
t2 = _mm_alignr_epi8(t2, a.val, 11);
|
||||||
__m128i s2 = _mm_shuffle_epi8(t2, m2);
|
__m128i v2 = _mm_shuffle_epi8(t2, m2);
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i*)ptr, s0);
|
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 16), s1);
|
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 32), s2);
|
|
||||||
#else
|
#else
|
||||||
__m128i z = _mm_setzero_si128();
|
__m128i z = _mm_setzero_si128();
|
||||||
__m128i ab0 = _mm_unpacklo_epi8(a.val, b.val);
|
__m128i ab0 = _mm_unpacklo_epi8(a.val, b.val);
|
||||||
@ -2042,15 +2075,31 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1
|
|||||||
__m128i v0 = _mm_or_si128(_mm_srli_si128(p40, 2), _mm_slli_si128(p41, 10));
|
__m128i v0 = _mm_or_si128(_mm_srli_si128(p40, 2), _mm_slli_si128(p41, 10));
|
||||||
__m128i v1 = _mm_or_si128(_mm_srli_si128(p41, 6), _mm_slli_si128(p42, 6));
|
__m128i v1 = _mm_or_si128(_mm_srli_si128(p41, 6), _mm_slli_si128(p42, 6));
|
||||||
__m128i v2 = _mm_or_si128(_mm_srli_si128(p42, 10), _mm_slli_si128(p43, 2));
|
__m128i v2 = _mm_or_si128(_mm_srli_si128(p42, 10), _mm_slli_si128(p43, 2));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm_stream_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 16), v1);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 32), v2);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 16), v1);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 32), v2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm_storeu_si128((__m128i*)(ptr), v0);
|
_mm_storeu_si128((__m128i*)(ptr), v0);
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 16), v1);
|
_mm_storeu_si128((__m128i*)(ptr + 16), v1);
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 32), v2);
|
_mm_storeu_si128((__m128i*)(ptr + 32), v2);
|
||||||
#endif
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
|
inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x16& b,
|
||||||
const v_uint8x16& c, const v_uint8x16& d)
|
const v_uint8x16& c, const v_uint8x16& d,
|
||||||
|
hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
// a0 a1 a2 a3 ....
|
// a0 a1 a2 a3 ....
|
||||||
// b0 b1 b2 b3 ....
|
// b0 b1 b2 b3 ....
|
||||||
@ -2062,33 +2111,64 @@ inline void v_store_interleave( uchar* ptr, const v_uint8x16& a, const v_uint8x1
|
|||||||
__m128i u3 = _mm_unpackhi_epi8(b.val, d.val); // b8 d8 b9 d9 ...
|
__m128i u3 = _mm_unpackhi_epi8(b.val, d.val); // b8 d8 b9 d9 ...
|
||||||
|
|
||||||
__m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 b0 c0 d0 ...
|
__m128i v0 = _mm_unpacklo_epi8(u0, u2); // a0 b0 c0 d0 ...
|
||||||
__m128i v1 = _mm_unpacklo_epi8(u1, u3); // a8 b8 c8 d8 ...
|
__m128i v1 = _mm_unpackhi_epi8(u0, u2); // a4 b4 c4 d4 ...
|
||||||
__m128i v2 = _mm_unpackhi_epi8(u0, u2); // a4 b4 c4 d4 ...
|
__m128i v2 = _mm_unpacklo_epi8(u1, u3); // a8 b8 c8 d8 ...
|
||||||
__m128i v3 = _mm_unpackhi_epi8(u1, u3); // a12 b12 c12 d12 ...
|
__m128i v3 = _mm_unpackhi_epi8(u1, u3); // a12 b12 c12 d12 ...
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i*)ptr, v0);
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 16), v2);
|
{
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 32), v1);
|
_mm_stream_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 16), v1);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 32), v2);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 48), v3);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 16), v1);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 32), v2);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 48), v3);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 16), v1);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 32), v2);
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 48), v3);
|
_mm_storeu_si128((__m128i*)(ptr + 48), v3);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b )
|
inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b,
|
||||||
|
hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
__m128i t0, t1;
|
__m128i v0 = _mm_unpacklo_epi16(a.val, b.val);
|
||||||
t0 = _mm_unpacklo_epi16(a.val, b.val);
|
__m128i v1 = _mm_unpackhi_epi16(a.val, b.val);
|
||||||
t1 = _mm_unpackhi_epi16(a.val, b.val);
|
|
||||||
_mm_storeu_si128((__m128i*)(ptr), t0);
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 8), t1);
|
{
|
||||||
|
_mm_stream_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 8), v1);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 8), v1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 8), v1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
|
inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
|
||||||
const v_uint16x8& b,
|
const v_uint16x8& b, const v_uint16x8& c,
|
||||||
const v_uint16x8& c )
|
hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
#if CV_SSE4_1
|
#if CV_SSE4_1
|
||||||
static const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
|
const __m128i sh_a = _mm_setr_epi8(0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5, 10, 11);
|
||||||
static const __m128i sh_b = _mm_setr_epi8(10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5);
|
const __m128i sh_b = _mm_setr_epi8(10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15, 4, 5);
|
||||||
static const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
|
const __m128i sh_c = _mm_setr_epi8(4, 5, 10, 11, 0, 1, 6, 7, 12, 13, 2, 3, 8, 9, 14, 15);
|
||||||
__m128i a0 = _mm_shuffle_epi8(a.val, sh_a);
|
__m128i a0 = _mm_shuffle_epi8(a.val, sh_a);
|
||||||
__m128i b0 = _mm_shuffle_epi8(b.val, sh_b);
|
__m128i b0 = _mm_shuffle_epi8(b.val, sh_b);
|
||||||
__m128i c0 = _mm_shuffle_epi8(c.val, sh_c);
|
__m128i c0 = _mm_shuffle_epi8(c.val, sh_c);
|
||||||
@ -2096,10 +2176,6 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
|
|||||||
__m128i v0 = _mm_blend_epi16(_mm_blend_epi16(a0, b0, 0x92), c0, 0x24);
|
__m128i v0 = _mm_blend_epi16(_mm_blend_epi16(a0, b0, 0x92), c0, 0x24);
|
||||||
__m128i v1 = _mm_blend_epi16(_mm_blend_epi16(c0, a0, 0x92), b0, 0x24);
|
__m128i v1 = _mm_blend_epi16(_mm_blend_epi16(c0, a0, 0x92), b0, 0x24);
|
||||||
__m128i v2 = _mm_blend_epi16(_mm_blend_epi16(b0, c0, 0x92), a0, 0x24);
|
__m128i v2 = _mm_blend_epi16(_mm_blend_epi16(b0, c0, 0x92), a0, 0x24);
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i*)ptr, v0);
|
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 8), v1);
|
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 16), v2);
|
|
||||||
#else
|
#else
|
||||||
__m128i z = _mm_setzero_si128();
|
__m128i z = _mm_setzero_si128();
|
||||||
__m128i ab0 = _mm_unpacklo_epi16(a.val, b.val);
|
__m128i ab0 = _mm_unpacklo_epi16(a.val, b.val);
|
||||||
@ -2128,15 +2204,30 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a,
|
|||||||
__m128i v0 = _mm_or_si128(_mm_srli_si128(p30, 2), _mm_slli_si128(p31, 10));
|
__m128i v0 = _mm_or_si128(_mm_srli_si128(p30, 2), _mm_slli_si128(p31, 10));
|
||||||
__m128i v1 = _mm_or_si128(_mm_srli_si128(p31, 6), _mm_slli_si128(p32, 6));
|
__m128i v1 = _mm_or_si128(_mm_srli_si128(p31, 6), _mm_slli_si128(p32, 6));
|
||||||
__m128i v2 = _mm_or_si128(_mm_srli_si128(p32, 10), _mm_slli_si128(p33, 2));
|
__m128i v2 = _mm_or_si128(_mm_srli_si128(p32, 10), _mm_slli_si128(p33, 2));
|
||||||
|
#endif
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm_stream_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 8), v1);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 16), v2);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 8), v1);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 16), v2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
_mm_storeu_si128((__m128i*)(ptr), v0);
|
_mm_storeu_si128((__m128i*)(ptr), v0);
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 8), v1);
|
_mm_storeu_si128((__m128i*)(ptr + 8), v1);
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 16), v2);
|
_mm_storeu_si128((__m128i*)(ptr + 16), v2);
|
||||||
#endif
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b,
|
inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16x8& b,
|
||||||
const v_uint16x8& c, const v_uint16x8& d)
|
const v_uint16x8& c, const v_uint16x8& d,
|
||||||
|
hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
// a0 a1 a2 a3 ....
|
// a0 a1 a2 a3 ....
|
||||||
// b0 b1 b2 b3 ....
|
// b0 b1 b2 b3 ....
|
||||||
@ -2148,27 +2239,58 @@ inline void v_store_interleave( ushort* ptr, const v_uint16x8& a, const v_uint16
|
|||||||
__m128i u3 = _mm_unpackhi_epi16(b.val, d.val); // b4 d4 b5 d5 ...
|
__m128i u3 = _mm_unpackhi_epi16(b.val, d.val); // b4 d4 b5 d5 ...
|
||||||
|
|
||||||
__m128i v0 = _mm_unpacklo_epi16(u0, u2); // a0 b0 c0 d0 ...
|
__m128i v0 = _mm_unpacklo_epi16(u0, u2); // a0 b0 c0 d0 ...
|
||||||
__m128i v1 = _mm_unpacklo_epi16(u1, u3); // a4 b4 c4 d4 ...
|
__m128i v1 = _mm_unpackhi_epi16(u0, u2); // a2 b2 c2 d2 ...
|
||||||
__m128i v2 = _mm_unpackhi_epi16(u0, u2); // a2 b2 c2 d2 ...
|
__m128i v2 = _mm_unpacklo_epi16(u1, u3); // a4 b4 c4 d4 ...
|
||||||
__m128i v3 = _mm_unpackhi_epi16(u1, u3); // a6 b6 c6 d6 ...
|
__m128i v3 = _mm_unpackhi_epi16(u1, u3); // a6 b6 c6 d6 ...
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i*)ptr, v0);
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 8), v2);
|
{
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 16), v1);
|
_mm_stream_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 8), v1);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 16), v2);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 24), v3);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 8), v1);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 16), v2);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 24), v3);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 8), v1);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 16), v2);
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 24), v3);
|
_mm_storeu_si128((__m128i*)(ptr + 24), v3);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b )
|
|
||||||
{
|
|
||||||
__m128i t0 = _mm_unpacklo_epi32(a.val, b.val);
|
|
||||||
__m128i t1 = _mm_unpackhi_epi32(a.val, b.val);
|
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i*)ptr, t0);
|
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 4), t1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
|
inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
|
||||||
const v_uint32x4& c )
|
hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
|
{
|
||||||
|
__m128i v0 = _mm_unpacklo_epi32(a.val, b.val);
|
||||||
|
__m128i v1 = _mm_unpackhi_epi32(a.val, b.val);
|
||||||
|
|
||||||
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm_stream_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 4), v1);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 4), v1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 4), v1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
|
||||||
|
const v_uint32x4& c, hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
v_uint32x4 z = v_setzero_u32(), u0, u1, u2, u3;
|
v_uint32x4 z = v_setzero_u32(), u0, u1, u2, u3;
|
||||||
v_transpose4x4(a, b, c, z, u0, u1, u2, u3);
|
v_transpose4x4(a, b, c, z, u0, u1, u2, u3);
|
||||||
@ -2177,35 +2299,82 @@ inline void v_store_interleave( unsigned* ptr, const v_uint32x4& a, const v_uint
|
|||||||
__m128i v1 = _mm_or_si128(_mm_srli_si128(u1.val, 4), _mm_slli_si128(u2.val, 8));
|
__m128i v1 = _mm_or_si128(_mm_srli_si128(u1.val, 4), _mm_slli_si128(u2.val, 8));
|
||||||
__m128i v2 = _mm_or_si128(_mm_srli_si128(u2.val, 8), _mm_slli_si128(u3.val, 4));
|
__m128i v2 = _mm_or_si128(_mm_srli_si128(u2.val, 8), _mm_slli_si128(u3.val, 4));
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i*)ptr, v0);
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm_stream_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 4), v1);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 8), v2);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 4), v1);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 8), v2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr), v0);
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 4), v1);
|
_mm_storeu_si128((__m128i*)(ptr + 4), v1);
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 8), v2);
|
_mm_storeu_si128((__m128i*)(ptr + 8), v2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
|
inline void v_store_interleave(unsigned* ptr, const v_uint32x4& a, const v_uint32x4& b,
|
||||||
const v_uint32x4& c, const v_uint32x4& d)
|
const v_uint32x4& c, const v_uint32x4& d,
|
||||||
|
hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
v_uint32x4 t0, t1, t2, t3;
|
v_uint32x4 v0, v1, v2, v3;
|
||||||
v_transpose4x4(a, b, c, d, t0, t1, t2, t3);
|
v_transpose4x4(a, b, c, d, v0, v1, v2, v3);
|
||||||
v_store(ptr, t0);
|
|
||||||
v_store(ptr + 4, t1);
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
v_store(ptr + 8, t2);
|
{
|
||||||
v_store(ptr + 12, t3);
|
_mm_stream_si128((__m128i*)(ptr), v0.val);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 4), v1.val);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 8), v2.val);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 12), v3.val);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_si128((__m128i*)(ptr), v0.val);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 4), v1.val);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 8), v2.val);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 12), v3.val);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr), v0.val);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 4), v1.val);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 8), v2.val);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 12), v3.val);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2-channel, float only
|
// 2-channel, float only
|
||||||
inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b)
|
inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b,
|
||||||
|
hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
// a0 a1 a2 a3 ...
|
__m128 v0 = _mm_unpacklo_ps(a.val, b.val); // a0 b0 a1 b1
|
||||||
// b0 b1 b2 b3 ...
|
__m128 v1 = _mm_unpackhi_ps(a.val, b.val); // a2 b2 a3 b3
|
||||||
__m128 u0 = _mm_unpacklo_ps(a.val, b.val); // a0 b0 a1 b1
|
|
||||||
__m128 u1 = _mm_unpackhi_ps(a.val, b.val); // a2 b2 a3 b3
|
|
||||||
|
|
||||||
_mm_storeu_ps(ptr, u0);
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
_mm_storeu_ps((ptr + 4), u1);
|
{
|
||||||
|
_mm_stream_ps(ptr, v0);
|
||||||
|
_mm_stream_ps(ptr + 4, v1);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_ps(ptr, v0);
|
||||||
|
_mm_store_ps(ptr + 4, v1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_mm_storeu_ps(ptr, v0);
|
||||||
|
_mm_storeu_ps(ptr + 4, v1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
|
inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b,
|
||||||
|
const v_float32x4& c, hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
__m128 u0 = _mm_shuffle_ps(a.val, b.val, _MM_SHUFFLE(0, 0, 0, 0));
|
__m128 u0 = _mm_shuffle_ps(a.val, b.val, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
__m128 u1 = _mm_shuffle_ps(c.val, a.val, _MM_SHUFFLE(1, 1, 0, 0));
|
__m128 u1 = _mm_shuffle_ps(c.val, a.val, _MM_SHUFFLE(1, 1, 0, 0));
|
||||||
@ -2217,13 +2386,29 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32
|
|||||||
__m128 u5 = _mm_shuffle_ps(b.val, c.val, _MM_SHUFFLE(3, 3, 3, 3));
|
__m128 u5 = _mm_shuffle_ps(b.val, c.val, _MM_SHUFFLE(3, 3, 3, 3));
|
||||||
__m128 v2 = _mm_shuffle_ps(u4, u5, _MM_SHUFFLE(2, 0, 2, 0));
|
__m128 v2 = _mm_shuffle_ps(u4, u5, _MM_SHUFFLE(2, 0, 2, 0));
|
||||||
|
|
||||||
_mm_storeu_ps(ptr + 0, v0);
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm_stream_ps(ptr, v0);
|
||||||
|
_mm_stream_ps(ptr + 4, v1);
|
||||||
|
_mm_stream_ps(ptr + 8, v2);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_ps(ptr, v0);
|
||||||
|
_mm_store_ps(ptr + 4, v1);
|
||||||
|
_mm_store_ps(ptr + 8, v2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_mm_storeu_ps(ptr, v0);
|
||||||
_mm_storeu_ps(ptr + 4, v1);
|
_mm_storeu_ps(ptr + 4, v1);
|
||||||
_mm_storeu_ps(ptr + 8, v2);
|
_mm_storeu_ps(ptr + 8, v2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b,
|
inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32x4& b,
|
||||||
const v_float32x4& c, const v_float32x4& d)
|
const v_float32x4& c, const v_float32x4& d,
|
||||||
|
hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
__m128 u0 = _mm_unpacklo_ps(a.val, c.val);
|
__m128 u0 = _mm_unpacklo_ps(a.val, c.val);
|
||||||
__m128 u1 = _mm_unpacklo_ps(b.val, d.val);
|
__m128 u1 = _mm_unpacklo_ps(b.val, d.val);
|
||||||
@ -2234,43 +2419,109 @@ inline void v_store_interleave(float* ptr, const v_float32x4& a, const v_float32
|
|||||||
__m128 v1 = _mm_unpackhi_ps(u0, u1);
|
__m128 v1 = _mm_unpackhi_ps(u0, u1);
|
||||||
__m128 v3 = _mm_unpackhi_ps(u2, u3);
|
__m128 v3 = _mm_unpackhi_ps(u2, u3);
|
||||||
|
|
||||||
_mm_storeu_ps(ptr + 0, v0);
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
|
{
|
||||||
|
_mm_stream_ps(ptr, v0);
|
||||||
|
_mm_stream_ps(ptr + 4, v1);
|
||||||
|
_mm_stream_ps(ptr + 8, v2);
|
||||||
|
_mm_stream_ps(ptr + 12, v3);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_ps(ptr, v0);
|
||||||
|
_mm_store_ps(ptr + 4, v1);
|
||||||
|
_mm_store_ps(ptr + 8, v2);
|
||||||
|
_mm_store_ps(ptr + 12, v3);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_mm_storeu_ps(ptr, v0);
|
||||||
_mm_storeu_ps(ptr + 4, v1);
|
_mm_storeu_ps(ptr + 4, v1);
|
||||||
_mm_storeu_ps(ptr + 8, v2);
|
_mm_storeu_ps(ptr + 8, v2);
|
||||||
_mm_storeu_ps(ptr + 12, v3);
|
_mm_storeu_ps(ptr + 12, v3);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b)
|
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b,
|
||||||
|
hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
__m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
|
__m128i v0 = _mm_unpacklo_epi64(a.val, b.val);
|
||||||
__m128i t1 = _mm_unpackhi_epi64(a.val, b.val);
|
__m128i v1 = _mm_unpackhi_epi64(a.val, b.val);
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i*)ptr, t0);
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 2), t1);
|
{
|
||||||
|
_mm_stream_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 2), v1);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 2), v1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 2), v1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c)
|
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b,
|
||||||
|
const v_uint64x2& c, hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
__m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
|
__m128i v0 = _mm_unpacklo_epi64(a.val, b.val);
|
||||||
__m128i t1 = _mm_unpacklo_epi64(c.val, _mm_unpackhi_epi64(a.val, a.val));
|
__m128i v1 = _mm_unpacklo_epi64(c.val, _mm_unpackhi_epi64(a.val, a.val));
|
||||||
__m128i t2 = _mm_unpackhi_epi64(b.val, c.val);
|
__m128i v2 = _mm_unpackhi_epi64(b.val, c.val);
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i*)ptr, t0);
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 2), t1);
|
{
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 4), t2);
|
_mm_stream_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 2), v1);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 4), v2);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 2), v1);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 4), v2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 2), v1);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 4), v2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, const v_uint64x2& d)
|
inline void v_store_interleave(uint64 *ptr, const v_uint64x2& a, const v_uint64x2& b,
|
||||||
|
const v_uint64x2& c, const v_uint64x2& d,
|
||||||
|
hal::StoreMode mode = hal::STORE_UNALIGNED)
|
||||||
{
|
{
|
||||||
__m128i t0 = _mm_unpacklo_epi64(a.val, b.val);
|
__m128i v0 = _mm_unpacklo_epi64(a.val, b.val);
|
||||||
__m128i t1 = _mm_unpacklo_epi64(c.val, d.val);
|
__m128i v1 = _mm_unpacklo_epi64(c.val, d.val);
|
||||||
__m128i t2 = _mm_unpackhi_epi64(a.val, b.val);
|
__m128i v2 = _mm_unpackhi_epi64(a.val, b.val);
|
||||||
__m128i t3 = _mm_unpackhi_epi64(c.val, d.val);
|
__m128i v3 = _mm_unpackhi_epi64(c.val, d.val);
|
||||||
|
|
||||||
_mm_storeu_si128((__m128i*)ptr, t0);
|
if( mode == hal::STORE_ALIGNED_NOCACHE )
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 2), t1);
|
{
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 4), t2);
|
_mm_stream_si128((__m128i*)(ptr), v0);
|
||||||
_mm_storeu_si128((__m128i*)(ptr + 6), t3);
|
_mm_stream_si128((__m128i*)(ptr + 2), v1);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 4), v2);
|
||||||
|
_mm_stream_si128((__m128i*)(ptr + 6), v3);
|
||||||
|
}
|
||||||
|
else if( mode == hal::STORE_ALIGNED )
|
||||||
|
{
|
||||||
|
_mm_store_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 2), v1);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 4), v2);
|
||||||
|
_mm_store_si128((__m128i*)(ptr + 6), v3);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr), v0);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 2), v1);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 4), v2);
|
||||||
|
_mm_storeu_si128((__m128i*)(ptr + 6), v3);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \
|
#define OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(_Tpvec0, _Tp0, suffix0, _Tpvec1, _Tp1, suffix1) \
|
||||||
@ -2298,27 +2549,30 @@ inline void v_load_deinterleave( const _Tp0* ptr, _Tpvec0& a0, _Tpvec0& b0, _Tpv
|
|||||||
c0 = v_reinterpret_as_##suffix0(c1); \
|
c0 = v_reinterpret_as_##suffix0(c1); \
|
||||||
d0 = v_reinterpret_as_##suffix0(d1); \
|
d0 = v_reinterpret_as_##suffix0(d1); \
|
||||||
} \
|
} \
|
||||||
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0 ) \
|
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
|
||||||
|
hal::StoreMode mode = hal::STORE_UNALIGNED ) \
|
||||||
{ \
|
{ \
|
||||||
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
||||||
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
||||||
v_store_interleave((_Tp1*)ptr, a1, b1); \
|
v_store_interleave((_Tp1*)ptr, a1, b1, mode); \
|
||||||
} \
|
} \
|
||||||
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, const _Tpvec0& c0 ) \
|
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
|
||||||
|
const _Tpvec0& c0, hal::StoreMode mode = hal::STORE_UNALIGNED ) \
|
||||||
{ \
|
{ \
|
||||||
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
||||||
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
||||||
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
|
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
|
||||||
v_store_interleave((_Tp1*)ptr, a1, b1, c1); \
|
v_store_interleave((_Tp1*)ptr, a1, b1, c1, mode); \
|
||||||
} \
|
} \
|
||||||
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
|
inline void v_store_interleave( _Tp0* ptr, const _Tpvec0& a0, const _Tpvec0& b0, \
|
||||||
const _Tpvec0& c0, const _Tpvec0& d0 ) \
|
const _Tpvec0& c0, const _Tpvec0& d0, \
|
||||||
|
hal::StoreMode mode = hal::STORE_UNALIGNED ) \
|
||||||
{ \
|
{ \
|
||||||
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
_Tpvec1 a1 = v_reinterpret_as_##suffix1(a0); \
|
||||||
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
_Tpvec1 b1 = v_reinterpret_as_##suffix1(b0); \
|
||||||
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
|
_Tpvec1 c1 = v_reinterpret_as_##suffix1(c0); \
|
||||||
_Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \
|
_Tpvec1 d1 = v_reinterpret_as_##suffix1(d0); \
|
||||||
v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1); \
|
v_store_interleave((_Tp1*)ptr, a1, b1, c1, d1, mode); \
|
||||||
}
|
}
|
||||||
|
|
||||||
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8)
|
OPENCV_HAL_IMPL_SSE_LOADSTORE_INTERLEAVE(v_int8x16, schar, s8, v_uint8x16, uchar, u8)
|
||||||
|
@ -249,6 +249,10 @@ inline void v_store(_Tp* ptr, const _Tpvec& a) \
|
|||||||
{ st(a.val, 0, ptr); } \
|
{ st(a.val, 0, ptr); } \
|
||||||
inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \
|
inline void v_store_aligned(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \
|
||||||
{ st_a(a.val, 0, ptr); } \
|
{ st_a(a.val, 0, ptr); } \
|
||||||
|
inline void v_store_aligned_nocache(VSX_UNUSED(_Tp* ptr), const _Tpvec& a) \
|
||||||
|
{ st_a(a.val, 0, ptr); } \
|
||||||
|
inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode mode) \
|
||||||
|
{ if(mode == hal::STORE_UNALIGNED) st(a.val, 0, ptr); else st_a(a.val, 0, ptr); } \
|
||||||
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
||||||
{ vec_st_l8(a.val, ptr); } \
|
{ vec_st_l8(a.val, ptr); } \
|
||||||
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
||||||
@ -281,13 +285,16 @@ inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, \
|
|||||||
inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b, \
|
inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b, \
|
||||||
_Tpvec& c, _Tpvec& d) \
|
_Tpvec& c, _Tpvec& d) \
|
||||||
{ vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); } \
|
{ vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); } \
|
||||||
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b) \
|
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \
|
||||||
|
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||||
{ vec_st_interleave(a.val, b.val, ptr); } \
|
{ vec_st_interleave(a.val, b.val, ptr); } \
|
||||||
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, \
|
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, \
|
||||||
const _Tpvec& b, const _Tpvec& c) \
|
const _Tpvec& b, const _Tpvec& c, \
|
||||||
|
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||||
{ vec_st_interleave(a.val, b.val, c.val, ptr); } \
|
{ vec_st_interleave(a.val, b.val, c.val, ptr); } \
|
||||||
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \
|
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \
|
||||||
const _Tpvec& c, const _Tpvec& d) \
|
const _Tpvec& c, const _Tpvec& d, \
|
||||||
|
hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \
|
||||||
{ vec_st_interleave(a.val, b.val, c.val, d.val, ptr); }
|
{ vec_st_interleave(a.val, b.val, c.val, d.val, ptr); }
|
||||||
|
|
||||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar, v_uint8x16)
|
OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar, v_uint8x16)
|
||||||
|
@ -457,6 +457,18 @@ Returns empty string if feature is not defined
|
|||||||
*/
|
*/
|
||||||
CV_EXPORTS_W String getHardwareFeatureName(int feature);
|
CV_EXPORTS_W String getHardwareFeatureName(int feature);
|
||||||
|
|
||||||
|
/** @brief Returns list of CPU features enabled during compilation.
|
||||||
|
|
||||||
|
Returned value is a string containing space separated list of CPU features with following markers:
|
||||||
|
|
||||||
|
- no markers - baseline features
|
||||||
|
- prefix `*` - features enabled in dispatcher
|
||||||
|
- suffix `?` - features enabled but not available in HW
|
||||||
|
|
||||||
|
Example: `SSE SSE2 SSE3 *SSE4.1 *SSE4.2 *FP16 *AVX *AVX2 *AVX512-SKX?`
|
||||||
|
*/
|
||||||
|
CV_EXPORTS std::string getCPUFeaturesLine();
|
||||||
|
|
||||||
/** @brief Returns the number of logical CPUs available for the process.
|
/** @brief Returns the number of logical CPUs available for the process.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W int getNumberOfCPUs();
|
CV_EXPORTS_W int getNumberOfCPUs();
|
||||||
|
@ -1180,7 +1180,8 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
|
|||||||
CV_Assert( op == CMP_LT || op == CMP_LE || op == CMP_EQ ||
|
CV_Assert( op == CMP_LT || op == CMP_LE || op == CMP_EQ ||
|
||||||
op == CMP_NE || op == CMP_GE || op == CMP_GT );
|
op == CMP_NE || op == CMP_GE || op == CMP_GT );
|
||||||
|
|
||||||
if(_src1.empty() || _src2.empty())
|
CV_Assert(_src1.empty() == _src2.empty());
|
||||||
|
if (_src1.empty() && _src2.empty())
|
||||||
{
|
{
|
||||||
_dst.release();
|
_dst.release();
|
||||||
return;
|
return;
|
||||||
|
@ -411,7 +411,8 @@ Mat& Mat::operator = (const Scalar& s)
|
|||||||
{
|
{
|
||||||
CV_INSTRUMENT_REGION()
|
CV_INSTRUMENT_REGION()
|
||||||
|
|
||||||
if (empty()) return *this;
|
if (this->empty())
|
||||||
|
return *this;
|
||||||
|
|
||||||
const Mat* arrays[] = { this };
|
const Mat* arrays[] = { this };
|
||||||
uchar* dptr;
|
uchar* dptr;
|
||||||
|
@ -515,17 +515,17 @@ void exp32f( const float *_x, float *y, int n )
|
|||||||
|
|
||||||
#if CV_SIMD
|
#if CV_SIMD
|
||||||
const int VECSZ = v_float32::nlanes;
|
const int VECSZ = v_float32::nlanes;
|
||||||
static const v_float32 vprescale = vx_setall_f32((float)exp_prescale);
|
const v_float32 vprescale = vx_setall_f32((float)exp_prescale);
|
||||||
static const v_float32 vpostscale = vx_setall_f32((float)exp_postscale);
|
const v_float32 vpostscale = vx_setall_f32((float)exp_postscale);
|
||||||
static const v_float32 vminval = vx_setall_f32(minval);
|
const v_float32 vminval = vx_setall_f32(minval);
|
||||||
static const v_float32 vmaxval = vx_setall_f32(maxval);
|
const v_float32 vmaxval = vx_setall_f32(maxval);
|
||||||
|
|
||||||
static const v_float32 vA1 = vx_setall_f32((float)A1);
|
const v_float32 vA1 = vx_setall_f32((float)A1);
|
||||||
static const v_float32 vA2 = vx_setall_f32((float)A2);
|
const v_float32 vA2 = vx_setall_f32((float)A2);
|
||||||
static const v_float32 vA3 = vx_setall_f32((float)A3);
|
const v_float32 vA3 = vx_setall_f32((float)A3);
|
||||||
static const v_float32 vA4 = vx_setall_f32((float)A4);
|
const v_float32 vA4 = vx_setall_f32((float)A4);
|
||||||
|
|
||||||
static const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK);
|
const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK);
|
||||||
bool y_aligned = (size_t)(void*)y % 32 == 0;
|
bool y_aligned = (size_t)(void*)y % 32 == 0;
|
||||||
|
|
||||||
for( ; i < n; i += VECSZ*2 )
|
for( ; i < n; i += VECSZ*2 )
|
||||||
@ -627,18 +627,18 @@ void exp64f( const double *_x, double *y, int n )
|
|||||||
|
|
||||||
#if CV_SIMD_64F
|
#if CV_SIMD_64F
|
||||||
const int VECSZ = v_float64::nlanes;
|
const int VECSZ = v_float64::nlanes;
|
||||||
static const v_float64 vprescale = vx_setall_f64(exp_prescale);
|
const v_float64 vprescale = vx_setall_f64(exp_prescale);
|
||||||
static const v_float64 vpostscale = vx_setall_f64(exp_postscale);
|
const v_float64 vpostscale = vx_setall_f64(exp_postscale);
|
||||||
static const v_float64 vminval = vx_setall_f64(minval);
|
const v_float64 vminval = vx_setall_f64(minval);
|
||||||
static const v_float64 vmaxval = vx_setall_f64(maxval);
|
const v_float64 vmaxval = vx_setall_f64(maxval);
|
||||||
|
|
||||||
static const v_float64 vA1 = vx_setall_f64(A1);
|
const v_float64 vA1 = vx_setall_f64(A1);
|
||||||
static const v_float64 vA2 = vx_setall_f64(A2);
|
const v_float64 vA2 = vx_setall_f64(A2);
|
||||||
static const v_float64 vA3 = vx_setall_f64(A3);
|
const v_float64 vA3 = vx_setall_f64(A3);
|
||||||
static const v_float64 vA4 = vx_setall_f64(A4);
|
const v_float64 vA4 = vx_setall_f64(A4);
|
||||||
static const v_float64 vA5 = vx_setall_f64(A5);
|
const v_float64 vA5 = vx_setall_f64(A5);
|
||||||
|
|
||||||
static const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK);
|
const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK);
|
||||||
bool y_aligned = (size_t)(void*)y % 32 == 0;
|
bool y_aligned = (size_t)(void*)y % 32 == 0;
|
||||||
|
|
||||||
for( ; i < n; i += VECSZ*2 )
|
for( ; i < n; i += VECSZ*2 )
|
||||||
@ -1024,13 +1024,13 @@ void log32f( const float *_x, float *y, int n )
|
|||||||
|
|
||||||
#if CV_SIMD
|
#if CV_SIMD
|
||||||
const int VECSZ = v_float32::nlanes;
|
const int VECSZ = v_float32::nlanes;
|
||||||
static const v_float32 vln2 = vx_setall_f32((float)ln_2);
|
const v_float32 vln2 = vx_setall_f32((float)ln_2);
|
||||||
static const v_float32 v1 = vx_setall_f32(1.f);
|
const v_float32 v1 = vx_setall_f32(1.f);
|
||||||
static const v_float32 vshift = vx_setall_f32(-1.f/512);
|
const v_float32 vshift = vx_setall_f32(-1.f/512);
|
||||||
|
|
||||||
static const v_float32 vA0 = vx_setall_f32(A0);
|
const v_float32 vA0 = vx_setall_f32(A0);
|
||||||
static const v_float32 vA1 = vx_setall_f32(A1);
|
const v_float32 vA1 = vx_setall_f32(A1);
|
||||||
static const v_float32 vA2 = vx_setall_f32(A2);
|
const v_float32 vA2 = vx_setall_f32(A2);
|
||||||
|
|
||||||
for( ; i < n; i += VECSZ )
|
for( ; i < n; i += VECSZ )
|
||||||
{
|
{
|
||||||
@ -1097,9 +1097,9 @@ void log64f( const double *x, double *y, int n )
|
|||||||
|
|
||||||
#if CV_SIMD_64F
|
#if CV_SIMD_64F
|
||||||
const int VECSZ = v_float64::nlanes;
|
const int VECSZ = v_float64::nlanes;
|
||||||
static const v_float64 vln2 = vx_setall_f64(ln_2);
|
const v_float64 vln2 = vx_setall_f64(ln_2);
|
||||||
|
|
||||||
static const v_float64
|
const v_float64
|
||||||
vA0 = vx_setall_f64(A0), vA1 = vx_setall_f64(A1),
|
vA0 = vx_setall_f64(A0), vA1 = vx_setall_f64(A1),
|
||||||
vA2 = vx_setall_f64(A2), vA3 = vx_setall_f64(A3),
|
vA2 = vx_setall_f64(A2), vA3 = vx_setall_f64(A3),
|
||||||
vA4 = vx_setall_f64(A4), vA5 = vx_setall_f64(A5),
|
vA4 = vx_setall_f64(A4), vA5 = vx_setall_f64(A5),
|
||||||
|
@ -602,13 +602,13 @@ void Mat::pop_back(size_t nelems)
|
|||||||
|
|
||||||
void Mat::push_back_(const void* elem)
|
void Mat::push_back_(const void* elem)
|
||||||
{
|
{
|
||||||
int r = size.p[0];
|
size_t r = size.p[0];
|
||||||
if( isSubmatrix() || dataend + step.p[0] > datalimit )
|
if( isSubmatrix() || dataend + step.p[0] > datalimit )
|
||||||
reserve( std::max(r + 1, (r*3+1)/2) );
|
reserve( std::max(r + 1, (r*3+1)/2) );
|
||||||
|
|
||||||
size_t esz = elemSize();
|
size_t esz = elemSize();
|
||||||
memcpy(data + r*step.p[0], elem, esz);
|
memcpy(data + r*step.p[0], elem, esz);
|
||||||
size.p[0] = r + 1;
|
size.p[0] = int(r + 1);
|
||||||
dataend += step.p[0];
|
dataend += step.p[0];
|
||||||
uint64 tsz = size.p[0];
|
uint64 tsz = size.p[0];
|
||||||
for( int i = 1; i < dims; i++ )
|
for( int i = 1; i < dims; i++ )
|
||||||
@ -709,7 +709,8 @@ void Mat::resize(size_t nelems, const Scalar& s)
|
|||||||
|
|
||||||
void Mat::push_back(const Mat& elems)
|
void Mat::push_back(const Mat& elems)
|
||||||
{
|
{
|
||||||
int r = size.p[0], delta = elems.size.p[0];
|
size_t r = size.p[0];
|
||||||
|
size_t delta = elems.size.p[0];
|
||||||
if( delta == 0 )
|
if( delta == 0 )
|
||||||
return;
|
return;
|
||||||
if( this == &elems )
|
if( this == &elems )
|
||||||
@ -726,7 +727,7 @@ void Mat::push_back(const Mat& elems)
|
|||||||
|
|
||||||
size.p[0] = elems.size.p[0];
|
size.p[0] = elems.size.p[0];
|
||||||
bool eq = size == elems.size;
|
bool eq = size == elems.size;
|
||||||
size.p[0] = r;
|
size.p[0] = int(r);
|
||||||
if( !eq )
|
if( !eq )
|
||||||
CV_Error(CV_StsUnmatchedSizes, "Pushed vector length is not equal to matrix row length");
|
CV_Error(CV_StsUnmatchedSizes, "Pushed vector length is not equal to matrix row length");
|
||||||
if( type() != elems.type() )
|
if( type() != elems.type() )
|
||||||
@ -735,7 +736,7 @@ void Mat::push_back(const Mat& elems)
|
|||||||
if( isSubmatrix() || dataend + step.p[0]*delta > datalimit )
|
if( isSubmatrix() || dataend + step.p[0]*delta > datalimit )
|
||||||
reserve( std::max(r + delta, (r*3+1)/2) );
|
reserve( std::max(r + delta, (r*3+1)/2) );
|
||||||
|
|
||||||
size.p[0] += delta;
|
size.p[0] += int(delta);
|
||||||
dataend += step.p[0]*delta;
|
dataend += step.p[0]*delta;
|
||||||
|
|
||||||
//updateContinuityFlag(*this);
|
//updateContinuityFlag(*this);
|
||||||
@ -744,7 +745,7 @@ void Mat::push_back(const Mat& elems)
|
|||||||
memcpy(data + r*step.p[0], elems.data, elems.total()*elems.elemSize());
|
memcpy(data + r*step.p[0], elems.data, elems.total()*elems.elemSize());
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Mat part = rowRange(r, r + delta);
|
Mat part = rowRange(int(r), int(r + delta));
|
||||||
elems.copyTo(part);
|
elems.copyTo(part);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -766,11 +766,13 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
|
|||||||
{
|
{
|
||||||
CV_INSTRUMENT_REGION()
|
CV_INSTRUMENT_REGION()
|
||||||
|
|
||||||
|
CV_Assert(!_src.empty());
|
||||||
|
CV_Assert( _mask.empty() || _mask.type() == CV_8UC1 );
|
||||||
|
|
||||||
CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
|
CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
|
||||||
ocl_meanStdDev(_src, _mean, _sdv, _mask))
|
ocl_meanStdDev(_src, _mean, _sdv, _mask))
|
||||||
|
|
||||||
Mat src = _src.getMat(), mask = _mask.getMat();
|
Mat src = _src.getMat(), mask = _mask.getMat();
|
||||||
CV_Assert( mask.empty() || mask.type() == CV_8UC1 );
|
|
||||||
|
|
||||||
CV_OVX_RUN(!ovx::skipSmallImages<VX_KERNEL_MEAN_STDDEV>(src.cols, src.rows),
|
CV_OVX_RUN(!ovx::skipSmallImages<VX_KERNEL_MEAN_STDDEV>(src.cols, src.rows),
|
||||||
openvx_meanStdDev(src, _mean, _sdv, mask))
|
openvx_meanStdDev(src, _mean, _sdv, mask))
|
||||||
|
@ -9,21 +9,58 @@
|
|||||||
namespace cv { namespace hal {
|
namespace cv { namespace hal {
|
||||||
|
|
||||||
#if CV_SIMD
|
#if CV_SIMD
|
||||||
|
/*
|
||||||
|
The trick with STORE_UNALIGNED/STORE_ALIGNED_NOCACHE is the following:
|
||||||
|
on IA there are instructions movntps and such to which
|
||||||
|
v_store_interleave(...., STORE_ALIGNED_NOCACHE) is mapped.
|
||||||
|
Those instructions write directly into memory w/o touching cache
|
||||||
|
that results in dramatic speed improvements, especially on
|
||||||
|
large arrays (FullHD, 4K etc.).
|
||||||
|
|
||||||
|
Those intrinsics require the destination address to be aligned
|
||||||
|
by 16/32 bits (with SSE2 and AVX2, respectively).
|
||||||
|
So we potentially split the processing into 3 stages:
|
||||||
|
1) the optional prefix part [0:i0), where we use simple unaligned stores.
|
||||||
|
2) the optional main part [i0:len - VECSZ], where we use "nocache" mode.
|
||||||
|
But in some cases we have to use unaligned stores in this part.
|
||||||
|
3) the optional suffix part (the tail) (len - VECSZ:len) where we switch back to "unaligned" mode
|
||||||
|
to process the remaining len - VECSZ elements.
|
||||||
|
In principle there can be very poorly aligned data where there is no main part.
|
||||||
|
For that we set i0=0 and use unaligned stores for the whole array.
|
||||||
|
*/
|
||||||
template<typename T, typename VecT> static void
|
template<typename T, typename VecT> static void
|
||||||
vecmerge_( const T** src, T* dst, int len, int cn )
|
vecmerge_( const T** src, T* dst, int len, int cn )
|
||||||
{
|
{
|
||||||
int i;
|
const int VECSZ = VecT::nlanes;
|
||||||
|
int i, i0 = 0;
|
||||||
const T* src0 = src[0];
|
const T* src0 = src[0];
|
||||||
const T* src1 = src[1];
|
const T* src1 = src[1];
|
||||||
|
|
||||||
const int VECSZ = VecT::nlanes;
|
int r = (int)((size_t)(void*)dst % (VECSZ*sizeof(T)));
|
||||||
|
hal::StoreMode mode = hal::STORE_ALIGNED_NOCACHE;
|
||||||
|
if( r != 0 )
|
||||||
|
{
|
||||||
|
mode = hal::STORE_UNALIGNED;
|
||||||
|
if( r % cn == 0 && len > VECSZ )
|
||||||
|
i0 = VECSZ - (r / cn);
|
||||||
|
}
|
||||||
|
|
||||||
if( cn == 2 )
|
if( cn == 2 )
|
||||||
{
|
{
|
||||||
for( i = 0; i < len; i += VECSZ )
|
for( i = 0; i < len; i += VECSZ )
|
||||||
{
|
{
|
||||||
i = std::min( len - VECSZ, i );
|
if( i > len - VECSZ )
|
||||||
|
{
|
||||||
|
i = len - VECSZ;
|
||||||
|
mode = hal::STORE_UNALIGNED;
|
||||||
|
}
|
||||||
VecT a = vx_load(src0 + i), b = vx_load(src1 + i);
|
VecT a = vx_load(src0 + i), b = vx_load(src1 + i);
|
||||||
v_store_interleave(dst + i*cn, a, b);
|
v_store_interleave(dst + i*cn, a, b, mode);
|
||||||
|
if( i < i0 )
|
||||||
|
{
|
||||||
|
i = i0 - VECSZ;
|
||||||
|
mode = hal::STORE_ALIGNED_NOCACHE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if( cn == 3 )
|
else if( cn == 3 )
|
||||||
@ -31,9 +68,18 @@ vecmerge_( const T** src, T* dst, int len, int cn )
|
|||||||
const T* src2 = src[2];
|
const T* src2 = src[2];
|
||||||
for( i = 0; i < len; i += VECSZ )
|
for( i = 0; i < len; i += VECSZ )
|
||||||
{
|
{
|
||||||
i = std::min( len - VECSZ, i );
|
if( i > len - VECSZ )
|
||||||
|
{
|
||||||
|
i = len - VECSZ;
|
||||||
|
mode = hal::STORE_UNALIGNED;
|
||||||
|
}
|
||||||
VecT a = vx_load(src0 + i), b = vx_load(src1 + i), c = vx_load(src2 + i);
|
VecT a = vx_load(src0 + i), b = vx_load(src1 + i), c = vx_load(src2 + i);
|
||||||
v_store_interleave(dst + i*cn, a, b, c);
|
v_store_interleave(dst + i*cn, a, b, c, mode);
|
||||||
|
if( i < i0 )
|
||||||
|
{
|
||||||
|
i = i0 - VECSZ;
|
||||||
|
mode = hal::STORE_ALIGNED_NOCACHE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -43,10 +89,19 @@ vecmerge_( const T** src, T* dst, int len, int cn )
|
|||||||
const T* src3 = src[3];
|
const T* src3 = src[3];
|
||||||
for( i = 0; i < len; i += VECSZ )
|
for( i = 0; i < len; i += VECSZ )
|
||||||
{
|
{
|
||||||
i = std::min( len - VECSZ, i );
|
if( i > len - VECSZ )
|
||||||
|
{
|
||||||
|
i = len - VECSZ;
|
||||||
|
mode = hal::STORE_UNALIGNED;
|
||||||
|
}
|
||||||
VecT a = vx_load(src0 + i), b = vx_load(src1 + i);
|
VecT a = vx_load(src0 + i), b = vx_load(src1 + i);
|
||||||
VecT c = vx_load(src2 + i), d = vx_load(src3 + i);
|
VecT c = vx_load(src2 + i), d = vx_load(src3 + i);
|
||||||
v_store_interleave(dst + i*cn, a, b, c, d);
|
v_store_interleave(dst + i*cn, a, b, c, d, mode);
|
||||||
|
if( i < i0 )
|
||||||
|
{
|
||||||
|
i = i0 - VECSZ;
|
||||||
|
mode = hal::STORE_ALIGNED_NOCACHE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
vx_cleanup();
|
vx_cleanup();
|
||||||
|
@ -2834,7 +2834,22 @@ extern "C" {
|
|||||||
|
|
||||||
static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
|
static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
|
||||||
{
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
((cv::ocl::Kernel::Impl*)p)->finit(e);
|
((cv::ocl::Kernel::Impl*)p)->finit(e);
|
||||||
|
}
|
||||||
|
catch (const cv::Exception& exc)
|
||||||
|
{
|
||||||
|
CV_LOG_ERROR(NULL, "OCL: Unexpected OpenCV exception in OpenCL callback: " << exc.what());
|
||||||
|
}
|
||||||
|
catch (const std::exception& exc)
|
||||||
|
{
|
||||||
|
CV_LOG_ERROR(NULL, "OCL: Unexpected C++ exception in OpenCL callback: " << exc.what());
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
CV_LOG_ERROR(NULL, "OCL: Unexpected unknown C++ exception in OpenCL callback");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -511,8 +511,8 @@ static RandnScaleFunc randnScaleTab[] =
|
|||||||
void RNG::fill( InputOutputArray _mat, int disttype,
|
void RNG::fill( InputOutputArray _mat, int disttype,
|
||||||
InputArray _param1arg, InputArray _param2arg, bool saturateRange )
|
InputArray _param1arg, InputArray _param2arg, bool saturateRange )
|
||||||
{
|
{
|
||||||
if (_mat.empty())
|
CV_Assert(!_mat.empty());
|
||||||
return;
|
|
||||||
Mat mat = _mat.getMat(), _param1 = _param1arg.getMat(), _param2 = _param2arg.getMat();
|
Mat mat = _mat.getMat(), _param1 = _param1arg.getMat(), _param2 = _param2arg.getMat();
|
||||||
int depth = mat.depth(), cn = mat.channels();
|
int depth = mat.depth(), cn = mat.channels();
|
||||||
AutoBuffer<double> _parambuf;
|
AutoBuffer<double> _parambuf;
|
||||||
|
@ -9,23 +9,46 @@
|
|||||||
namespace cv { namespace hal {
|
namespace cv { namespace hal {
|
||||||
|
|
||||||
#if CV_SIMD
|
#if CV_SIMD
|
||||||
|
// see the comments for vecmerge_ in merge.cpp
|
||||||
template<typename T, typename VecT> static void
|
template<typename T, typename VecT> static void
|
||||||
vecsplit_( const T* src, T** dst, int len, int cn )
|
vecsplit_( const T* src, T** dst, int len, int cn )
|
||||||
{
|
{
|
||||||
int i;
|
const int VECSZ = VecT::nlanes;
|
||||||
|
int i, i0 = 0;
|
||||||
T* dst0 = dst[0];
|
T* dst0 = dst[0];
|
||||||
T* dst1 = dst[1];
|
T* dst1 = dst[1];
|
||||||
|
|
||||||
const int VECSZ = VecT::nlanes;
|
int r0 = (int)((size_t)(void*)dst0 % (VECSZ*sizeof(T)));
|
||||||
|
int r1 = (int)((size_t)(void*)dst1 % (VECSZ*sizeof(T)));
|
||||||
|
int r2 = cn > 2 ? (int)((size_t)(void*)dst[2] % (VECSZ*sizeof(T))) : r0;
|
||||||
|
int r3 = cn > 3 ? (int)((size_t)(void*)dst[3] % (VECSZ*sizeof(T))) : r0;
|
||||||
|
|
||||||
|
hal::StoreMode mode = hal::STORE_ALIGNED_NOCACHE;
|
||||||
|
if( (r0|r1|r2|r3) != 0 )
|
||||||
|
{
|
||||||
|
mode = hal::STORE_UNALIGNED;
|
||||||
|
if( r0 == r1 && r0 == r2 && r0 == r3 && r0 % cn == 0 && len > VECSZ )
|
||||||
|
i0 = VECSZ - (r0 / cn);
|
||||||
|
}
|
||||||
|
|
||||||
if( cn == 2 )
|
if( cn == 2 )
|
||||||
{
|
{
|
||||||
for( i = 0; i < len; i += VECSZ )
|
for( i = 0; i < len; i += VECSZ )
|
||||||
{
|
{
|
||||||
i = std::min( len - VECSZ, i );
|
if( i > len - VECSZ )
|
||||||
|
{
|
||||||
|
i = len - VECSZ;
|
||||||
|
mode = hal::STORE_UNALIGNED;
|
||||||
|
}
|
||||||
VecT a, b;
|
VecT a, b;
|
||||||
v_load_deinterleave(src + i*cn, a, b);
|
v_load_deinterleave(src + i*cn, a, b);
|
||||||
v_store(dst0 + i, a);
|
v_store(dst0 + i, a, mode);
|
||||||
v_store(dst1 + i, b);
|
v_store(dst1 + i, b, mode);
|
||||||
|
if( i < i0 )
|
||||||
|
{
|
||||||
|
i = i0 - VECSZ;
|
||||||
|
mode = hal::STORE_ALIGNED_NOCACHE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if( cn == 3 )
|
else if( cn == 3 )
|
||||||
@ -33,12 +56,21 @@ vecsplit_( const T* src, T** dst, int len, int cn )
|
|||||||
T* dst2 = dst[2];
|
T* dst2 = dst[2];
|
||||||
for( i = 0; i < len; i += VECSZ )
|
for( i = 0; i < len; i += VECSZ )
|
||||||
{
|
{
|
||||||
i = std::min( len - VECSZ, i );
|
if( i > len - VECSZ )
|
||||||
|
{
|
||||||
|
i = len - VECSZ;
|
||||||
|
mode = hal::STORE_UNALIGNED;
|
||||||
|
}
|
||||||
VecT a, b, c;
|
VecT a, b, c;
|
||||||
v_load_deinterleave(src + i*cn, a, b, c);
|
v_load_deinterleave(src + i*cn, a, b, c);
|
||||||
v_store(dst0 + i, a);
|
v_store(dst0 + i, a, mode);
|
||||||
v_store(dst1 + i, b);
|
v_store(dst1 + i, b, mode);
|
||||||
v_store(dst2 + i, c);
|
v_store(dst2 + i, c, mode);
|
||||||
|
if( i < i0 )
|
||||||
|
{
|
||||||
|
i = i0 - VECSZ;
|
||||||
|
mode = hal::STORE_ALIGNED_NOCACHE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -48,13 +80,22 @@ vecsplit_( const T* src, T** dst, int len, int cn )
|
|||||||
T* dst3 = dst[3];
|
T* dst3 = dst[3];
|
||||||
for( i = 0; i < len; i += VECSZ )
|
for( i = 0; i < len; i += VECSZ )
|
||||||
{
|
{
|
||||||
i = std::min( len - VECSZ, i );
|
if( i > len - VECSZ )
|
||||||
|
{
|
||||||
|
i = len - VECSZ;
|
||||||
|
mode = hal::STORE_UNALIGNED;
|
||||||
|
}
|
||||||
VecT a, b, c, d;
|
VecT a, b, c, d;
|
||||||
v_load_deinterleave(src + i*cn, a, b, c, d);
|
v_load_deinterleave(src + i*cn, a, b, c, d);
|
||||||
v_store(dst0 + i, a);
|
v_store(dst0 + i, a, mode);
|
||||||
v_store(dst1 + i, b);
|
v_store(dst1 + i, b, mode);
|
||||||
v_store(dst2 + i, c);
|
v_store(dst2 + i, c, mode);
|
||||||
v_store(dst3 + i, d);
|
v_store(dst3 + i, d, mode);
|
||||||
|
if( i < i0 )
|
||||||
|
{
|
||||||
|
i = i0 - VECSZ;
|
||||||
|
mode = hal::STORE_ALIGNED_NOCACHE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
vx_cleanup();
|
vx_cleanup();
|
||||||
|
@ -654,6 +654,27 @@ String getHardwareFeatureName(int feature)
|
|||||||
return name ? String(name) : String();
|
return name ? String(name) : String();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string getCPUFeaturesLine()
|
||||||
|
{
|
||||||
|
const int features[] = { CV_CPU_BASELINE_FEATURES, CV_CPU_DISPATCH_FEATURES };
|
||||||
|
const int sz = sizeof(features) / sizeof(features[0]);
|
||||||
|
std::string result;
|
||||||
|
std::string prefix;
|
||||||
|
for (int i = 1; i < sz; ++i)
|
||||||
|
{
|
||||||
|
if (features[i] == 0)
|
||||||
|
{
|
||||||
|
prefix = "*";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (i != 1) result.append(" ");
|
||||||
|
result.append(prefix);
|
||||||
|
result.append(getHWFeatureNameSafe(features[i]));
|
||||||
|
if (!checkHardwareSupport(features[i])) result.append("?");
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
volatile bool useOptimizedFlag = true;
|
volatile bool useOptimizedFlag = true;
|
||||||
|
|
||||||
void setUseOptimized( bool flag )
|
void setUseOptimized( bool flag )
|
||||||
|
@ -84,14 +84,11 @@ UMatData::~UMatData()
|
|||||||
allocatorFlags_ = 0;
|
allocatorFlags_ = 0;
|
||||||
if (originalUMatData)
|
if (originalUMatData)
|
||||||
{
|
{
|
||||||
UMatData* u = originalUMatData;
|
|
||||||
CV_XADD(&(u->urefcount), -1);
|
|
||||||
CV_XADD(&(u->refcount), -1);
|
|
||||||
bool showWarn = false;
|
bool showWarn = false;
|
||||||
if (u->refcount == 0)
|
UMatData* u = originalUMatData;
|
||||||
|
bool zero_Ref = CV_XADD(&(u->refcount), -1) == 1;
|
||||||
|
if (zero_Ref)
|
||||||
{
|
{
|
||||||
if (u->urefcount > 0)
|
|
||||||
showWarn = true;
|
|
||||||
// simulate Mat::deallocate
|
// simulate Mat::deallocate
|
||||||
if (u->mapcount != 0)
|
if (u->mapcount != 0)
|
||||||
{
|
{
|
||||||
@ -102,7 +99,10 @@ UMatData::~UMatData()
|
|||||||
// we don't do "map", so we can't do "unmap"
|
// we don't do "map", so we can't do "unmap"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (u->refcount == 0 && u->urefcount == 0) // oops, we need to free resources
|
bool zero_URef = CV_XADD(&(u->urefcount), -1) == 1;
|
||||||
|
if (zero_Ref && !zero_URef)
|
||||||
|
showWarn = true;
|
||||||
|
if (zero_Ref && zero_URef) // oops, we need to free resources
|
||||||
{
|
{
|
||||||
showWarn = true;
|
showWarn = true;
|
||||||
// simulate UMat::deallocate
|
// simulate UMat::deallocate
|
||||||
|
@ -2008,11 +2008,9 @@ TEST(Subtract, scalarc4_matc4)
|
|||||||
TEST(Compare, empty)
|
TEST(Compare, empty)
|
||||||
{
|
{
|
||||||
cv::Mat temp, dst1, dst2;
|
cv::Mat temp, dst1, dst2;
|
||||||
cv::compare(temp, temp, dst1, cv::CMP_EQ);
|
EXPECT_NO_THROW(cv::compare(temp, temp, dst1, cv::CMP_EQ));
|
||||||
dst2 = temp > 5;
|
|
||||||
|
|
||||||
EXPECT_TRUE(dst1.empty());
|
EXPECT_TRUE(dst1.empty());
|
||||||
EXPECT_TRUE(dst2.empty());
|
EXPECT_THROW(dst2 = temp > 5, cv::Exception);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Compare, regression_8999)
|
TEST(Compare, regression_8999)
|
||||||
@ -2020,9 +2018,7 @@ TEST(Compare, regression_8999)
|
|||||||
Mat_<double> A(4,1); A << 1, 3, 2, 4;
|
Mat_<double> A(4,1); A << 1, 3, 2, 4;
|
||||||
Mat_<double> B(1,1); B << 2;
|
Mat_<double> B(1,1); B << 2;
|
||||||
Mat C;
|
Mat C;
|
||||||
ASSERT_ANY_THROW({
|
EXPECT_THROW(cv::compare(A, B, C, CMP_LT), cv::Exception);
|
||||||
cv::compare(A, B, C, CMP_LT);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -43,106 +43,35 @@
|
|||||||
|
|
||||||
namespace opencv_test { namespace {
|
namespace opencv_test { namespace {
|
||||||
|
|
||||||
class Core_ConcatenationTest : public cvtest::BaseTest
|
TEST(Core_Concatenation, empty)
|
||||||
{
|
{
|
||||||
public:
|
const Mat mat0x5(0,5, CV_8U, Scalar::all(1));
|
||||||
Core_ConcatenationTest(bool horizontal, bool firstEmpty, bool secondEmpty);
|
const Mat mat10x5(10,5, CV_8U, Scalar::all(1));
|
||||||
protected:
|
const Mat mat20x5(20,5, CV_8U, Scalar::all(1));
|
||||||
int prepare_test_case( int );
|
|
||||||
void run_func();
|
|
||||||
int validate_test_results( int );
|
|
||||||
|
|
||||||
Mat mat0x5;
|
const Mat mat5x0(5,0, CV_8U, Scalar::all(1));
|
||||||
Mat mat10x5;
|
const Mat mat5x10(5,10, CV_8U, Scalar::all(1));
|
||||||
Mat mat20x5;
|
const Mat mat5x20(5,20, CV_8U, Scalar::all(1));
|
||||||
|
|
||||||
Mat mat5x0;
|
|
||||||
Mat mat5x10;
|
|
||||||
Mat mat5x20;
|
|
||||||
|
|
||||||
Mat result;
|
Mat result;
|
||||||
|
|
||||||
bool horizontal;
|
cv::hconcat(mat5x0, mat5x0, result);
|
||||||
bool firstEmpty;
|
EXPECT_MAT_N_DIFF(result, mat5x0, 0);
|
||||||
bool secondEmpty;
|
cv::hconcat(mat5x0, mat5x10, result);
|
||||||
|
EXPECT_MAT_N_DIFF(result, mat5x10, 0);
|
||||||
|
cv::hconcat(mat5x10, mat5x0, result);
|
||||||
|
EXPECT_MAT_N_DIFF(result, mat5x10, 0);
|
||||||
|
cv::hconcat(mat5x10, mat5x10, result);
|
||||||
|
EXPECT_MAT_N_DIFF(result, mat5x20, 0);
|
||||||
|
|
||||||
private:
|
cv::vconcat(mat0x5, mat0x5, result);
|
||||||
static bool areEqual(const Mat& m1, const Mat& m2);
|
EXPECT_MAT_N_DIFF(result, mat0x5, 0);
|
||||||
|
cv::vconcat(mat0x5, mat10x5, result);
|
||||||
};
|
EXPECT_MAT_N_DIFF(result, mat10x5, 0);
|
||||||
|
cv::vconcat(mat10x5, mat0x5, result);
|
||||||
Core_ConcatenationTest::Core_ConcatenationTest(bool horizontal_, bool firstEmpty_, bool secondEmpty_)
|
EXPECT_MAT_N_DIFF(result, mat10x5, 0);
|
||||||
: horizontal(horizontal_)
|
cv::vconcat(mat10x5, mat10x5, result);
|
||||||
, firstEmpty(firstEmpty_)
|
EXPECT_MAT_N_DIFF(result, mat20x5, 0);
|
||||||
, secondEmpty(secondEmpty_)
|
|
||||||
{
|
|
||||||
test_case_count = 1;
|
|
||||||
|
|
||||||
mat0x5 = Mat::ones(0,5, CV_8U);
|
|
||||||
mat10x5 = Mat::ones(10,5, CV_8U);
|
|
||||||
mat20x5 = Mat::ones(20,5, CV_8U);
|
|
||||||
|
|
||||||
mat5x0 = Mat::ones(5,0, CV_8U);
|
|
||||||
mat5x10 = Mat::ones(5,10, CV_8U);
|
|
||||||
mat5x20 = Mat::ones(5,20, CV_8U);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int Core_ConcatenationTest::prepare_test_case( int test_case_idx )
|
|
||||||
{
|
|
||||||
cvtest::BaseTest::prepare_test_case( test_case_idx );
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Core_ConcatenationTest::run_func()
|
|
||||||
{
|
|
||||||
if (horizontal)
|
|
||||||
{
|
|
||||||
cv::hconcat((firstEmpty ? mat5x0 : mat5x10),
|
|
||||||
(secondEmpty ? mat5x0 : mat5x10),
|
|
||||||
result);
|
|
||||||
} else {
|
|
||||||
cv::vconcat((firstEmpty ? mat0x5 : mat10x5),
|
|
||||||
(secondEmpty ? mat0x5 : mat10x5),
|
|
||||||
result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int Core_ConcatenationTest::validate_test_results( int )
|
|
||||||
{
|
|
||||||
Mat expected;
|
|
||||||
|
|
||||||
if (firstEmpty && secondEmpty)
|
|
||||||
expected = (horizontal ? mat5x0 : mat0x5);
|
|
||||||
else if ((firstEmpty && !secondEmpty) || (!firstEmpty && secondEmpty))
|
|
||||||
expected = (horizontal ? mat5x10 : mat10x5);
|
|
||||||
else
|
|
||||||
expected = (horizontal ? mat5x20 : mat20x5);
|
|
||||||
|
|
||||||
if (areEqual(expected, result))
|
|
||||||
{
|
|
||||||
return cvtest::TS::OK;
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
ts->printf( cvtest::TS::LOG, "Concatenation failed");
|
|
||||||
ts->set_failed_test_info( cvtest::TS::FAIL_MISMATCH );
|
|
||||||
}
|
|
||||||
|
|
||||||
return cvtest::TS::OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Core_ConcatenationTest::areEqual(const Mat &m1, const Mat &m2)
|
|
||||||
{
|
|
||||||
return m1.size() == m2.size()
|
|
||||||
&& m1.type() == m2.type()
|
|
||||||
&& countNonZero(m1 != m2) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(Core_Concatenation, hconcat_empty_nonempty) { Core_ConcatenationTest test(true, true, false); test.safe_run(); }
|
|
||||||
TEST(Core_Concatenation, hconcat_nonempty_empty) { Core_ConcatenationTest test(true, false, true); test.safe_run(); }
|
|
||||||
TEST(Core_Concatenation, hconcat_empty_empty) { Core_ConcatenationTest test(true, true, true); test.safe_run(); }
|
|
||||||
|
|
||||||
TEST(Core_Concatenation, vconcat_empty_nonempty) { Core_ConcatenationTest test(false, true, false); test.safe_run(); }
|
|
||||||
TEST(Core_Concatenation, vconcat_nonempty_empty) { Core_ConcatenationTest test(false, false, true); test.safe_run(); }
|
|
||||||
TEST(Core_Concatenation, vconcat_empty_empty) { Core_ConcatenationTest test(false, true, true); test.safe_run(); }
|
|
||||||
|
|
||||||
}} // namespace
|
}} // namespace
|
||||||
|
5
modules/core/test/test_intrin.avx2.cpp
Normal file
5
modules/core/test/test_intrin.avx2.cpp
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
#include "test_precomp.hpp"
|
||||||
|
#include "test_intrin.simd.hpp"
|
@ -2,249 +2,101 @@
|
|||||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
// of this distribution and at http://opencv.org/license.html.
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
#include "test_precomp.hpp"
|
#include "test_precomp.hpp"
|
||||||
|
#include "test_intrin.simd.hpp"
|
||||||
|
|
||||||
#include "test_intrin_utils.hpp"
|
#define CV_CPU_SIMD_FILENAME "test_intrin.simd.hpp"
|
||||||
|
|
||||||
#define CV_CPU_SIMD_FILENAME "test_intrin_utils.hpp"
|
|
||||||
#define CV_CPU_DISPATCH_MODE FP16
|
#define CV_CPU_DISPATCH_MODE FP16
|
||||||
#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
|
#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
|
||||||
|
|
||||||
|
#define CV_CPU_DISPATCH_MODE AVX2
|
||||||
using namespace cv;
|
#include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
|
||||||
|
|
||||||
namespace opencv_test { namespace hal {
|
namespace opencv_test { namespace hal {
|
||||||
using namespace CV_CPU_OPTIMIZATION_NAMESPACE;
|
using namespace CV_CPU_OPTIMIZATION_NAMESPACE;
|
||||||
|
|
||||||
//============= 8-bit integer =====================================================================
|
TEST(hal_intrin, uint8x16)
|
||||||
|
{ test_hal_intrin_uint8(); }
|
||||||
|
|
||||||
TEST(hal_intrin, uint8x16) {
|
TEST(hal_intrin, int8x16)
|
||||||
TheTest<v_uint8x16>()
|
{ test_hal_intrin_int8(); }
|
||||||
.test_loadstore()
|
|
||||||
.test_interleave()
|
|
||||||
.test_expand()
|
|
||||||
.test_expand_q()
|
|
||||||
.test_addsub()
|
|
||||||
.test_addsub_wrap()
|
|
||||||
.test_cmp()
|
|
||||||
.test_logic()
|
|
||||||
.test_min_max()
|
|
||||||
.test_absdiff()
|
|
||||||
.test_mask()
|
|
||||||
.test_popcount()
|
|
||||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
|
||||||
.test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
|
|
||||||
.test_unpack()
|
|
||||||
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
|
|
||||||
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(hal_intrin, int8x16) {
|
TEST(hal_intrin, uint16x8)
|
||||||
TheTest<v_int8x16>()
|
{ test_hal_intrin_uint16(); }
|
||||||
.test_loadstore()
|
|
||||||
.test_interleave()
|
|
||||||
.test_expand()
|
|
||||||
.test_expand_q()
|
|
||||||
.test_addsub()
|
|
||||||
.test_addsub_wrap()
|
|
||||||
.test_cmp()
|
|
||||||
.test_logic()
|
|
||||||
.test_min_max()
|
|
||||||
.test_absdiff()
|
|
||||||
.test_abs()
|
|
||||||
.test_mask()
|
|
||||||
.test_popcount()
|
|
||||||
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
|
||||||
.test_unpack()
|
|
||||||
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
|
|
||||||
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
//============= 16-bit integer =====================================================================
|
TEST(hal_intrin, int16x8)
|
||||||
|
{ test_hal_intrin_int16(); }
|
||||||
|
|
||||||
TEST(hal_intrin, uint16x8) {
|
TEST(hal_intrin, int32x4)
|
||||||
TheTest<v_uint16x8>()
|
{ test_hal_intrin_int32(); }
|
||||||
.test_loadstore()
|
|
||||||
.test_interleave()
|
|
||||||
.test_expand()
|
|
||||||
.test_addsub()
|
|
||||||
.test_addsub_wrap()
|
|
||||||
.test_mul()
|
|
||||||
.test_mul_expand()
|
|
||||||
.test_cmp()
|
|
||||||
.test_shift<1>()
|
|
||||||
.test_shift<8>()
|
|
||||||
.test_logic()
|
|
||||||
.test_min_max()
|
|
||||||
.test_absdiff()
|
|
||||||
.test_reduce()
|
|
||||||
.test_mask()
|
|
||||||
.test_popcount()
|
|
||||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
|
||||||
.test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()
|
|
||||||
.test_unpack()
|
|
||||||
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
|
|
||||||
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(hal_intrin, int16x8) {
|
TEST(hal_intrin, uint32x4)
|
||||||
TheTest<v_int16x8>()
|
{ test_hal_intrin_uint32(); }
|
||||||
.test_loadstore()
|
|
||||||
.test_interleave()
|
|
||||||
.test_expand()
|
|
||||||
.test_addsub()
|
|
||||||
.test_addsub_wrap()
|
|
||||||
.test_mul()
|
|
||||||
.test_mul_expand()
|
|
||||||
.test_cmp()
|
|
||||||
.test_shift<1>()
|
|
||||||
.test_shift<8>()
|
|
||||||
.test_dot_prod()
|
|
||||||
.test_logic()
|
|
||||||
.test_min_max()
|
|
||||||
.test_absdiff()
|
|
||||||
.test_abs()
|
|
||||||
.test_reduce()
|
|
||||||
.test_mask()
|
|
||||||
.test_popcount()
|
|
||||||
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
|
||||||
.test_unpack()
|
|
||||||
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
|
|
||||||
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
//============= 32-bit integer =====================================================================
|
TEST(hal_intrin, uint64x2)
|
||||||
|
{ test_hal_intrin_uint64(); }
|
||||||
|
|
||||||
TEST(hal_intrin, uint32x4) {
|
TEST(hal_intrin, int64x2)
|
||||||
TheTest<v_uint32x4>()
|
{ test_hal_intrin_int64(); }
|
||||||
.test_loadstore()
|
|
||||||
.test_interleave()
|
|
||||||
.test_expand()
|
|
||||||
.test_addsub()
|
|
||||||
.test_mul()
|
|
||||||
.test_mul_expand()
|
|
||||||
.test_cmp()
|
|
||||||
.test_shift<1>()
|
|
||||||
.test_shift<8>()
|
|
||||||
.test_logic()
|
|
||||||
.test_min_max()
|
|
||||||
.test_absdiff()
|
|
||||||
.test_reduce()
|
|
||||||
.test_mask()
|
|
||||||
.test_popcount()
|
|
||||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
|
||||||
.test_unpack()
|
|
||||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
|
||||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
|
||||||
.test_transpose()
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(hal_intrin, int32x4) {
|
TEST(hal_intrin, float32x4)
|
||||||
TheTest<v_int32x4>()
|
{ test_hal_intrin_float32(); }
|
||||||
.test_loadstore()
|
|
||||||
.test_interleave()
|
|
||||||
.test_expand()
|
|
||||||
.test_addsub()
|
|
||||||
.test_mul()
|
|
||||||
.test_abs()
|
|
||||||
.test_cmp()
|
|
||||||
.test_popcount()
|
|
||||||
.test_shift<1>().test_shift<8>()
|
|
||||||
.test_logic()
|
|
||||||
.test_min_max()
|
|
||||||
.test_absdiff()
|
|
||||||
.test_reduce()
|
|
||||||
.test_mask()
|
|
||||||
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
|
||||||
.test_unpack()
|
|
||||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
|
||||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
|
||||||
.test_float_cvt32()
|
|
||||||
.test_float_cvt64()
|
|
||||||
.test_transpose()
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
//============= 64-bit integer =====================================================================
|
TEST(hal_intrin, float64x2)
|
||||||
|
{ test_hal_intrin_float64(); }
|
||||||
|
|
||||||
TEST(hal_intrin, uint64x2) {
|
TEST(hal_intrin, float16x8)
|
||||||
TheTest<v_uint64x2>()
|
|
||||||
.test_loadstore()
|
|
||||||
.test_addsub()
|
|
||||||
.test_shift<1>().test_shift<8>()
|
|
||||||
.test_logic()
|
|
||||||
.test_extract<0>().test_extract<1>()
|
|
||||||
.test_rotate<0>().test_rotate<1>()
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(hal_intrin, int64x2) {
|
|
||||||
TheTest<v_int64x2>()
|
|
||||||
.test_loadstore()
|
|
||||||
.test_addsub()
|
|
||||||
.test_shift<1>().test_shift<8>()
|
|
||||||
.test_logic()
|
|
||||||
.test_extract<0>().test_extract<1>()
|
|
||||||
.test_rotate<0>().test_rotate<1>()
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
//============= Floating point =====================================================================
|
|
||||||
|
|
||||||
TEST(hal_intrin, float32x4) {
|
|
||||||
TheTest<v_float32x4>()
|
|
||||||
.test_loadstore()
|
|
||||||
.test_interleave()
|
|
||||||
.test_interleave_2channel()
|
|
||||||
.test_addsub()
|
|
||||||
.test_mul()
|
|
||||||
.test_div()
|
|
||||||
.test_cmp()
|
|
||||||
.test_sqrt_abs()
|
|
||||||
.test_min_max()
|
|
||||||
.test_float_absdiff()
|
|
||||||
.test_reduce()
|
|
||||||
.test_mask()
|
|
||||||
.test_unpack()
|
|
||||||
.test_float_math()
|
|
||||||
.test_float_cvt64()
|
|
||||||
.test_matmul()
|
|
||||||
.test_transpose()
|
|
||||||
.test_reduce_sum4()
|
|
||||||
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
|
||||||
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if CV_SIMD128_64F
|
|
||||||
TEST(hal_intrin, float64x2) {
|
|
||||||
TheTest<v_float64x2>()
|
|
||||||
.test_loadstore()
|
|
||||||
.test_addsub()
|
|
||||||
.test_mul()
|
|
||||||
.test_div()
|
|
||||||
.test_cmp()
|
|
||||||
.test_sqrt_abs()
|
|
||||||
.test_min_max()
|
|
||||||
.test_float_absdiff()
|
|
||||||
.test_mask()
|
|
||||||
.test_unpack()
|
|
||||||
.test_float_math()
|
|
||||||
.test_float_cvt32()
|
|
||||||
.test_extract<0>().test_extract<1>()
|
|
||||||
.test_rotate<0>().test_rotate<1>()
|
|
||||||
;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
TEST(hal_intrin,float16)
|
|
||||||
{
|
{
|
||||||
CV_CPU_CALL_FP16_(test_hal_intrin_float16, ());
|
CV_CPU_CALL_FP16_(test_hal_intrin_float16, ());
|
||||||
throw SkipTestException("Unsupported hardware: FP16 is not available");
|
throw SkipTestException("Unsupported hardware: FP16 is not available");
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
#define DISPATCH_SIMD_MODES AVX2
|
||||||
|
#define DISPATCH_SIMD_NAME "SIMD256"
|
||||||
|
#define DISPATCH_SIMD(fun) \
|
||||||
|
do { \
|
||||||
|
CV_CPU_DISPATCH(fun, (), DISPATCH_SIMD_MODES); \
|
||||||
|
throw SkipTestException( \
|
||||||
|
"Unsupported hardware: " \
|
||||||
|
DISPATCH_SIMD_NAME \
|
||||||
|
" is not available" \
|
||||||
|
); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
TEST(hal_intrin256, uint8x32)
|
||||||
|
{ DISPATCH_SIMD(test_hal_intrin_uint8); }
|
||||||
|
|
||||||
|
TEST(hal_intrin256, int8x32)
|
||||||
|
{ DISPATCH_SIMD(test_hal_intrin_int8); }
|
||||||
|
|
||||||
|
TEST(hal_intrin256, uint16x16)
|
||||||
|
{ DISPATCH_SIMD(test_hal_intrin_uint16); }
|
||||||
|
|
||||||
|
TEST(hal_intrin256, int16x16)
|
||||||
|
{ DISPATCH_SIMD(test_hal_intrin_int16); }
|
||||||
|
|
||||||
|
TEST(hal_intrin256, uint32x8)
|
||||||
|
{ DISPATCH_SIMD(test_hal_intrin_uint32); }
|
||||||
|
|
||||||
|
TEST(hal_intrin256, int32x8)
|
||||||
|
{ DISPATCH_SIMD(test_hal_intrin_int32); }
|
||||||
|
|
||||||
|
TEST(hal_intrin256, uint64x4)
|
||||||
|
{ DISPATCH_SIMD(test_hal_intrin_uint64); }
|
||||||
|
|
||||||
|
TEST(hal_intrin256, int64x4)
|
||||||
|
{ DISPATCH_SIMD(test_hal_intrin_int64); }
|
||||||
|
|
||||||
|
TEST(hal_intrin256, float32x8)
|
||||||
|
{ DISPATCH_SIMD(test_hal_intrin_float32); }
|
||||||
|
|
||||||
|
TEST(hal_intrin256, float64x4)
|
||||||
|
{ DISPATCH_SIMD(test_hal_intrin_float64); }
|
||||||
|
|
||||||
|
TEST(hal_intrin256, float16x16)
|
||||||
|
{
|
||||||
|
if (!CV_CPU_HAS_SUPPORT_FP16)
|
||||||
|
throw SkipTestException("Unsupported hardware: FP16 is not available");
|
||||||
|
DISPATCH_SIMD(test_hal_intrin_float16);
|
||||||
|
}
|
||||||
|
|
||||||
|
}} // namespace
|
@ -9,7 +9,7 @@ CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
void test_hal_intrin_float16()
|
void test_hal_intrin_float16()
|
||||||
{
|
{
|
||||||
TheTest<v_float16x8>()
|
TheTest<v_float16>()
|
||||||
.test_loadstore_fp16()
|
.test_loadstore_fp16()
|
||||||
.test_float_cvt_fp16()
|
.test_float_cvt_fp16()
|
||||||
;
|
;
|
||||||
|
296
modules/core/test/test_intrin.simd.hpp
Normal file
296
modules/core/test/test_intrin.simd.hpp
Normal file
@ -0,0 +1,296 @@
|
|||||||
|
// This file is part of OpenCV project.
|
||||||
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||||
|
// of this distribution and at http://opencv.org/license.html.
|
||||||
|
#include "test_precomp.hpp"
|
||||||
|
#include "test_intrin_utils.hpp"
|
||||||
|
|
||||||
|
namespace opencv_test { namespace hal {
|
||||||
|
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
void test_hal_intrin_uint8();
|
||||||
|
void test_hal_intrin_int8();
|
||||||
|
void test_hal_intrin_uint16();
|
||||||
|
void test_hal_intrin_int16();
|
||||||
|
void test_hal_intrin_uint32();
|
||||||
|
void test_hal_intrin_int32();
|
||||||
|
void test_hal_intrin_uint64();
|
||||||
|
void test_hal_intrin_int64();
|
||||||
|
void test_hal_intrin_float32();
|
||||||
|
void test_hal_intrin_float64();
|
||||||
|
|
||||||
|
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||||
|
|
||||||
|
//============= 8-bit integer =====================================================================
|
||||||
|
|
||||||
|
void test_hal_intrin_uint8()
|
||||||
|
{
|
||||||
|
TheTest<v_uint8>()
|
||||||
|
.test_loadstore()
|
||||||
|
.test_interleave()
|
||||||
|
.test_expand()
|
||||||
|
.test_expand_q()
|
||||||
|
.test_addsub()
|
||||||
|
.test_addsub_wrap()
|
||||||
|
.test_cmp()
|
||||||
|
.test_logic()
|
||||||
|
.test_min_max()
|
||||||
|
.test_absdiff()
|
||||||
|
.test_mask()
|
||||||
|
.test_popcount()
|
||||||
|
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||||
|
.test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
|
||||||
|
.test_unpack()
|
||||||
|
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
|
||||||
|
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
|
||||||
|
;
|
||||||
|
|
||||||
|
#if CV_SIMD256
|
||||||
|
TheTest<v_uint8>()
|
||||||
|
.test_pack<9>().test_pack<10>().test_pack<13>().test_pack<15>()
|
||||||
|
.test_pack_u<9>().test_pack_u<10>().test_pack_u<13>().test_pack_u<15>()
|
||||||
|
.test_extract<16>().test_extract<17>().test_extract<23>().test_extract<31>()
|
||||||
|
.test_rotate<16>().test_rotate<17>().test_rotate<23>().test_rotate<31>()
|
||||||
|
;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_hal_intrin_int8()
|
||||||
|
{
|
||||||
|
TheTest<v_int8>()
|
||||||
|
.test_loadstore()
|
||||||
|
.test_interleave()
|
||||||
|
.test_expand()
|
||||||
|
.test_expand_q()
|
||||||
|
.test_addsub()
|
||||||
|
.test_addsub_wrap()
|
||||||
|
.test_cmp()
|
||||||
|
.test_logic()
|
||||||
|
.test_min_max()
|
||||||
|
.test_absdiff()
|
||||||
|
.test_abs()
|
||||||
|
.test_mask()
|
||||||
|
.test_popcount()
|
||||||
|
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
|
||||||
|
.test_unpack()
|
||||||
|
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
|
||||||
|
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
//============= 16-bit integer =====================================================================
|
||||||
|
|
||||||
|
void test_hal_intrin_uint16()
|
||||||
|
{
|
||||||
|
TheTest<v_uint16>()
|
||||||
|
.test_loadstore()
|
||||||
|
.test_interleave()
|
||||||
|
.test_expand()
|
||||||
|
.test_addsub()
|
||||||
|
.test_addsub_wrap()
|
||||||
|
.test_mul()
|
||||||
|
.test_mul_expand()
|
||||||
|
.test_cmp()
|
||||||
|
.test_shift<1>()
|
||||||
|
.test_shift<8>()
|
||||||
|
.test_logic()
|
||||||
|
.test_min_max()
|
||||||
|
.test_absdiff()
|
||||||
|
.test_reduce()
|
||||||
|
.test_mask()
|
||||||
|
.test_popcount()
|
||||||
|
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||||
|
.test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()
|
||||||
|
.test_unpack()
|
||||||
|
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
|
||||||
|
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_hal_intrin_int16()
|
||||||
|
{
|
||||||
|
TheTest<v_int16>()
|
||||||
|
.test_loadstore()
|
||||||
|
.test_interleave()
|
||||||
|
.test_expand()
|
||||||
|
.test_addsub()
|
||||||
|
.test_addsub_wrap()
|
||||||
|
.test_mul()
|
||||||
|
.test_mul_expand()
|
||||||
|
.test_cmp()
|
||||||
|
.test_shift<1>()
|
||||||
|
.test_shift<8>()
|
||||||
|
.test_dot_prod()
|
||||||
|
.test_logic()
|
||||||
|
.test_min_max()
|
||||||
|
.test_absdiff()
|
||||||
|
.test_abs()
|
||||||
|
.test_reduce()
|
||||||
|
.test_mask()
|
||||||
|
.test_popcount()
|
||||||
|
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
|
||||||
|
.test_unpack()
|
||||||
|
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
|
||||||
|
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
//============= 32-bit integer =====================================================================
|
||||||
|
|
||||||
|
void test_hal_intrin_uint32()
|
||||||
|
{
|
||||||
|
TheTest<v_uint32>()
|
||||||
|
.test_loadstore()
|
||||||
|
.test_interleave()
|
||||||
|
.test_expand()
|
||||||
|
.test_addsub()
|
||||||
|
.test_mul()
|
||||||
|
.test_mul_expand()
|
||||||
|
.test_cmp()
|
||||||
|
.test_shift<1>()
|
||||||
|
.test_shift<8>()
|
||||||
|
.test_logic()
|
||||||
|
.test_min_max()
|
||||||
|
.test_absdiff()
|
||||||
|
.test_reduce()
|
||||||
|
.test_mask()
|
||||||
|
.test_popcount()
|
||||||
|
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||||
|
.test_unpack()
|
||||||
|
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||||
|
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||||
|
.test_transpose()
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_hal_intrin_int32()
|
||||||
|
{
|
||||||
|
TheTest<v_int32>()
|
||||||
|
.test_loadstore()
|
||||||
|
.test_interleave()
|
||||||
|
.test_expand()
|
||||||
|
.test_addsub()
|
||||||
|
.test_mul()
|
||||||
|
.test_abs()
|
||||||
|
.test_cmp()
|
||||||
|
.test_popcount()
|
||||||
|
.test_shift<1>().test_shift<8>()
|
||||||
|
.test_logic()
|
||||||
|
.test_min_max()
|
||||||
|
.test_absdiff()
|
||||||
|
.test_reduce()
|
||||||
|
.test_mask()
|
||||||
|
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
|
||||||
|
.test_unpack()
|
||||||
|
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||||
|
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||||
|
.test_float_cvt32()
|
||||||
|
.test_float_cvt64()
|
||||||
|
.test_transpose()
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
//============= 64-bit integer =====================================================================
|
||||||
|
|
||||||
|
void test_hal_intrin_uint64()
|
||||||
|
{
|
||||||
|
TheTest<v_uint64>()
|
||||||
|
.test_loadstore()
|
||||||
|
.test_addsub()
|
||||||
|
.test_shift<1>().test_shift<8>()
|
||||||
|
.test_logic()
|
||||||
|
.test_extract<0>().test_extract<1>()
|
||||||
|
.test_rotate<0>().test_rotate<1>()
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_hal_intrin_int64()
|
||||||
|
{
|
||||||
|
TheTest<v_int64>()
|
||||||
|
.test_loadstore()
|
||||||
|
.test_addsub()
|
||||||
|
.test_shift<1>().test_shift<8>()
|
||||||
|
.test_logic()
|
||||||
|
.test_extract<0>().test_extract<1>()
|
||||||
|
.test_rotate<0>().test_rotate<1>()
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
//============= Floating point =====================================================================
|
||||||
|
void test_hal_intrin_float32()
|
||||||
|
{
|
||||||
|
TheTest<v_float32>()
|
||||||
|
.test_loadstore()
|
||||||
|
.test_interleave()
|
||||||
|
.test_interleave_2channel()
|
||||||
|
.test_addsub()
|
||||||
|
.test_mul()
|
||||||
|
.test_div()
|
||||||
|
.test_cmp()
|
||||||
|
.test_sqrt_abs()
|
||||||
|
.test_min_max()
|
||||||
|
.test_float_absdiff()
|
||||||
|
.test_reduce()
|
||||||
|
.test_mask()
|
||||||
|
.test_unpack()
|
||||||
|
.test_float_math()
|
||||||
|
.test_float_cvt64()
|
||||||
|
.test_matmul()
|
||||||
|
.test_transpose()
|
||||||
|
.test_reduce_sum4()
|
||||||
|
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
|
||||||
|
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
|
||||||
|
;
|
||||||
|
|
||||||
|
#if CV_SIMD256
|
||||||
|
TheTest<v_float32>()
|
||||||
|
.test_extract<4>().test_extract<5>().test_extract<6>().test_extract<7>()
|
||||||
|
.test_rotate<4>().test_rotate<5>().test_rotate<6>().test_rotate<7>()
|
||||||
|
;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_hal_intrin_float64()
|
||||||
|
{
|
||||||
|
#if CV_SIMD_64F
|
||||||
|
TheTest<v_float64>()
|
||||||
|
.test_loadstore()
|
||||||
|
.test_addsub()
|
||||||
|
.test_mul()
|
||||||
|
.test_div()
|
||||||
|
.test_cmp()
|
||||||
|
.test_sqrt_abs()
|
||||||
|
.test_min_max()
|
||||||
|
.test_float_absdiff()
|
||||||
|
.test_mask()
|
||||||
|
.test_unpack()
|
||||||
|
.test_float_math()
|
||||||
|
.test_float_cvt32()
|
||||||
|
.test_extract<0>().test_extract<1>()
|
||||||
|
.test_rotate<0>().test_rotate<1>()
|
||||||
|
;
|
||||||
|
|
||||||
|
#if CV_SIMD256
|
||||||
|
TheTest<v_float64>()
|
||||||
|
.test_extract<2>().test_extract<3>()
|
||||||
|
.test_rotate<2>().test_rotate<3>()
|
||||||
|
;
|
||||||
|
#endif //CV_SIMD256
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#if CV_FP16 && CV_SIMD_WIDTH > 16
|
||||||
|
void test_hal_intrin_float16()
|
||||||
|
{
|
||||||
|
TheTest<v_float16>()
|
||||||
|
.test_loadstore_fp16()
|
||||||
|
.test_float_cvt_fp16()
|
||||||
|
;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif //CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||||
|
|
||||||
|
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||||
|
|
||||||
|
}} //namespace
|
@ -13,6 +13,27 @@ void test_hal_intrin_float16();
|
|||||||
template <typename R> struct Data;
|
template <typename R> struct Data;
|
||||||
template <int N> struct initializer;
|
template <int N> struct initializer;
|
||||||
|
|
||||||
|
template <> struct initializer<64>
|
||||||
|
{
|
||||||
|
template <typename R> static R init(const Data<R> & d)
|
||||||
|
{
|
||||||
|
return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15],
|
||||||
|
d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31],
|
||||||
|
d[32], d[33], d[34], d[35], d[36], d[37], d[38], d[39], d[40], d[41], d[42], d[43], d[44], d[45], d[46], d[47],
|
||||||
|
d[48], d[49], d[50], d[51], d[52], d[53], d[54], d[55], d[56], d[57], d[58], d[59], d[50], d[51], d[52], d[53],
|
||||||
|
d[54], d[55], d[56], d[57], d[58], d[59], d[60], d[61], d[62], d[63]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <> struct initializer<32>
|
||||||
|
{
|
||||||
|
template <typename R> static R init(const Data<R> & d)
|
||||||
|
{
|
||||||
|
return R(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15],
|
||||||
|
d[16], d[17], d[18], d[19], d[20], d[21], d[22], d[23], d[24], d[25], d[26], d[27], d[28], d[29], d[30], d[31]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <> struct initializer<16>
|
template <> struct initializer<16>
|
||||||
{
|
{
|
||||||
template <typename R> static R init(const Data<R> & d)
|
template <typename R> static R init(const Data<R> & d)
|
||||||
@ -125,6 +146,17 @@ template <typename R> struct Data
|
|||||||
{
|
{
|
||||||
return d + R::nlanes / 2;
|
return d + R::nlanes / 2;
|
||||||
}
|
}
|
||||||
|
LaneType sum(int s, int c)
|
||||||
|
{
|
||||||
|
LaneType res = 0;
|
||||||
|
for (int i = s; i < s + c; ++i)
|
||||||
|
res += d[i];
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
LaneType sum()
|
||||||
|
{
|
||||||
|
return sum(0, R::nlanes);
|
||||||
|
}
|
||||||
bool operator==(const Data<R> & other) const
|
bool operator==(const Data<R> & other) const
|
||||||
{
|
{
|
||||||
for (int i = 0; i < R::nlanes; ++i)
|
for (int i = 0; i < R::nlanes; ++i)
|
||||||
@ -147,13 +179,12 @@ template <typename R> struct Data
|
|||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
LaneType d[R::nlanes];
|
LaneType d[R::nlanes];
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename R> struct AlignedData
|
template<typename R> struct AlignedData
|
||||||
{
|
{
|
||||||
Data<R> CV_DECL_ALIGNED(16) a; // aligned
|
Data<R> CV_DECL_ALIGNED(CV_SIMD_WIDTH) a; // aligned
|
||||||
char dummy;
|
char dummy;
|
||||||
Data<R> u; // unaligned
|
Data<R> u; // unaligned
|
||||||
};
|
};
|
||||||
@ -207,22 +238,22 @@ template<typename R> struct TheTest
|
|||||||
AlignedData<R> out;
|
AlignedData<R> out;
|
||||||
|
|
||||||
// check if addresses are aligned and unaligned respectively
|
// check if addresses are aligned and unaligned respectively
|
||||||
EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
|
EXPECT_EQ((size_t)0, (size_t)&data.a.d % CV_SIMD_WIDTH);
|
||||||
EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
|
EXPECT_NE((size_t)0, (size_t)&data.u.d % CV_SIMD_WIDTH);
|
||||||
EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
|
EXPECT_EQ((size_t)0, (size_t)&out.a.d % CV_SIMD_WIDTH);
|
||||||
EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
|
EXPECT_NE((size_t)0, (size_t)&out.u.d % CV_SIMD_WIDTH);
|
||||||
|
|
||||||
// check some initialization methods
|
// check some initialization methods
|
||||||
R r1 = data.a;
|
R r1 = data.a;
|
||||||
R r2 = v_load(data.u.d);
|
R r2 = vx_load(data.u.d);
|
||||||
R r3 = v_load_aligned(data.a.d);
|
R r3 = vx_load_aligned(data.a.d);
|
||||||
R r4(r2);
|
R r4(r2);
|
||||||
EXPECT_EQ(data.a[0], r1.get0());
|
EXPECT_EQ(data.a[0], r1.get0());
|
||||||
EXPECT_EQ(data.u[0], r2.get0());
|
EXPECT_EQ(data.u[0], r2.get0());
|
||||||
EXPECT_EQ(data.a[0], r3.get0());
|
EXPECT_EQ(data.a[0], r3.get0());
|
||||||
EXPECT_EQ(data.u[0], r4.get0());
|
EXPECT_EQ(data.u[0], r4.get0());
|
||||||
|
|
||||||
R r_low = v_load_low((LaneType*)data.u.d);
|
R r_low = vx_load_low((LaneType*)data.u.d);
|
||||||
EXPECT_EQ(data.u[0], r_low.get0());
|
EXPECT_EQ(data.u[0], r_low.get0());
|
||||||
v_store(out.u.d, r_low);
|
v_store(out.u.d, r_low);
|
||||||
for (int i = 0; i < R::nlanes/2; ++i)
|
for (int i = 0; i < R::nlanes/2; ++i)
|
||||||
@ -230,7 +261,7 @@ template<typename R> struct TheTest
|
|||||||
EXPECT_EQ((LaneType)data.u[i], (LaneType)out.u[i]);
|
EXPECT_EQ((LaneType)data.u[i], (LaneType)out.u[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
R r_low_align8byte = v_load_low((LaneType*)((char*)data.u.d + 8));
|
R r_low_align8byte = vx_load_low((LaneType*)((char*)data.u.d + (CV_SIMD_WIDTH / 2)));
|
||||||
EXPECT_EQ(data.u[R::nlanes/2], r_low_align8byte.get0());
|
EXPECT_EQ(data.u[R::nlanes/2], r_low_align8byte.get0());
|
||||||
v_store(out.u.d, r_low_align8byte);
|
v_store(out.u.d, r_low_align8byte);
|
||||||
for (int i = 0; i < R::nlanes/2; ++i)
|
for (int i = 0; i < R::nlanes/2; ++i)
|
||||||
@ -255,7 +286,7 @@ template<typename R> struct TheTest
|
|||||||
|
|
||||||
// check halves load correctness
|
// check halves load correctness
|
||||||
res.clear();
|
res.clear();
|
||||||
R r6 = v_load_halves(d.d, d.mid());
|
R r6 = vx_load_halves(d.d, d.mid());
|
||||||
v_store(res.d, r6);
|
v_store(res.d, r6);
|
||||||
EXPECT_EQ(d, res);
|
EXPECT_EQ(d, res);
|
||||||
|
|
||||||
@ -270,17 +301,17 @@ template<typename R> struct TheTest
|
|||||||
}
|
}
|
||||||
|
|
||||||
// reinterpret_as
|
// reinterpret_as
|
||||||
v_uint8x16 vu8 = v_reinterpret_as_u8(r1); out.a.clear(); v_store((uchar*)out.a.d, vu8); EXPECT_EQ(data.a, out.a);
|
v_uint8 vu8 = v_reinterpret_as_u8(r1); out.a.clear(); v_store((uchar*)out.a.d, vu8); EXPECT_EQ(data.a, out.a);
|
||||||
v_int8x16 vs8 = v_reinterpret_as_s8(r1); out.a.clear(); v_store((schar*)out.a.d, vs8); EXPECT_EQ(data.a, out.a);
|
v_int8 vs8 = v_reinterpret_as_s8(r1); out.a.clear(); v_store((schar*)out.a.d, vs8); EXPECT_EQ(data.a, out.a);
|
||||||
v_uint16x8 vu16 = v_reinterpret_as_u16(r1); out.a.clear(); v_store((ushort*)out.a.d, vu16); EXPECT_EQ(data.a, out.a);
|
v_uint16 vu16 = v_reinterpret_as_u16(r1); out.a.clear(); v_store((ushort*)out.a.d, vu16); EXPECT_EQ(data.a, out.a);
|
||||||
v_int16x8 vs16 = v_reinterpret_as_s16(r1); out.a.clear(); v_store((short*)out.a.d, vs16); EXPECT_EQ(data.a, out.a);
|
v_int16 vs16 = v_reinterpret_as_s16(r1); out.a.clear(); v_store((short*)out.a.d, vs16); EXPECT_EQ(data.a, out.a);
|
||||||
v_uint32x4 vu32 = v_reinterpret_as_u32(r1); out.a.clear(); v_store((unsigned*)out.a.d, vu32); EXPECT_EQ(data.a, out.a);
|
v_uint32 vu32 = v_reinterpret_as_u32(r1); out.a.clear(); v_store((unsigned*)out.a.d, vu32); EXPECT_EQ(data.a, out.a);
|
||||||
v_int32x4 vs32 = v_reinterpret_as_s32(r1); out.a.clear(); v_store((int*)out.a.d, vs32); EXPECT_EQ(data.a, out.a);
|
v_int32 vs32 = v_reinterpret_as_s32(r1); out.a.clear(); v_store((int*)out.a.d, vs32); EXPECT_EQ(data.a, out.a);
|
||||||
v_uint64x2 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a);
|
v_uint64 vu64 = v_reinterpret_as_u64(r1); out.a.clear(); v_store((uint64*)out.a.d, vu64); EXPECT_EQ(data.a, out.a);
|
||||||
v_int64x2 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a);
|
v_int64 vs64 = v_reinterpret_as_s64(r1); out.a.clear(); v_store((int64*)out.a.d, vs64); EXPECT_EQ(data.a, out.a);
|
||||||
v_float32x4 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a);
|
v_float32 vf32 = v_reinterpret_as_f32(r1); out.a.clear(); v_store((float*)out.a.d, vf32); EXPECT_EQ(data.a, out.a);
|
||||||
#if CV_SIMD128_64F
|
#if CV_SIMD_64F
|
||||||
v_float64x2 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a);
|
v_float64 vf64 = v_reinterpret_as_f64(r1); out.a.clear(); v_store((double*)out.a.d, vf64); EXPECT_EQ(data.a, out.a);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
@ -357,7 +388,7 @@ template<typename R> struct TheTest
|
|||||||
Data<R> dataA;
|
Data<R> dataA;
|
||||||
R a = dataA;
|
R a = dataA;
|
||||||
|
|
||||||
Data<Rx2> resB = v_load_expand(dataA.d);
|
Data<Rx2> resB = vx_load_expand(dataA.d);
|
||||||
|
|
||||||
Rx2 c, d;
|
Rx2 c, d;
|
||||||
v_expand(a, c, d);
|
v_expand(a, c, d);
|
||||||
@ -378,7 +409,7 @@ template<typename R> struct TheTest
|
|||||||
{
|
{
|
||||||
typedef typename V_RegTraits<R>::q_reg Rx4;
|
typedef typename V_RegTraits<R>::q_reg Rx4;
|
||||||
Data<R> data;
|
Data<R> data;
|
||||||
Data<Rx4> out = v_load_expand_q(data.d);
|
Data<Rx4> out = vx_load_expand_q(data.d);
|
||||||
const int n = Rx4::nlanes;
|
const int n = Rx4::nlanes;
|
||||||
for (int i = 0; i < n; ++i)
|
for (int i = 0; i < n; ++i)
|
||||||
EXPECT_EQ(data[i], out[i]);
|
EXPECT_EQ(data[i], out[i]);
|
||||||
@ -610,7 +641,13 @@ template<typename R> struct TheTest
|
|||||||
|
|
||||||
TheTest & test_popcount()
|
TheTest & test_popcount()
|
||||||
{
|
{
|
||||||
static unsigned popcountTable[] = {0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33};
|
static unsigned popcountTable[] = {
|
||||||
|
0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33,
|
||||||
|
35, 37, 40, 42, 45, 48, 52, 54, 57, 60, 64, 67, 71, 75, 80, 81,
|
||||||
|
83, 85, 88, 90, 93, 96, 100, 102, 105, 108, 112, 115, 119, 123,
|
||||||
|
128, 130, 133, 136, 140, 143, 147, 151, 156, 159, 163, 167, 172,
|
||||||
|
176, 181, 186, 192, 193
|
||||||
|
};
|
||||||
Data<R> dataA;
|
Data<R> dataA;
|
||||||
R a = dataA;
|
R a = dataA;
|
||||||
|
|
||||||
@ -918,7 +955,7 @@ template<typename R> struct TheTest
|
|||||||
|
|
||||||
TheTest & test_float_cvt32()
|
TheTest & test_float_cvt32()
|
||||||
{
|
{
|
||||||
typedef v_float32x4 Rt;
|
typedef v_float32 Rt;
|
||||||
Data<R> dataA;
|
Data<R> dataA;
|
||||||
dataA *= 1.1;
|
dataA *= 1.1;
|
||||||
R a = dataA;
|
R a = dataA;
|
||||||
@ -934,8 +971,8 @@ template<typename R> struct TheTest
|
|||||||
|
|
||||||
TheTest & test_float_cvt64()
|
TheTest & test_float_cvt64()
|
||||||
{
|
{
|
||||||
#if CV_SIMD128_64F
|
#if CV_SIMD_64F
|
||||||
typedef v_float64x2 Rt;
|
typedef v_float64 Rt;
|
||||||
Data<R> dataA;
|
Data<R> dataA;
|
||||||
dataA *= 1.1;
|
dataA *= 1.1;
|
||||||
R a = dataA;
|
R a = dataA;
|
||||||
@ -965,23 +1002,29 @@ template<typename R> struct TheTest
|
|||||||
R v = dataV, a = dataA, b = dataB, c = dataC, d = dataD;
|
R v = dataV, a = dataA, b = dataB, c = dataC, d = dataD;
|
||||||
|
|
||||||
Data<R> res = v_matmul(v, a, b, c, d);
|
Data<R> res = v_matmul(v, a, b, c, d);
|
||||||
for (int i = 0; i < R::nlanes; ++i)
|
for (int i = 0; i < R::nlanes; i += 4)
|
||||||
{
|
{
|
||||||
LaneType val = dataV[0] * dataA[i]
|
for (int j = i; j < i + 4; ++j)
|
||||||
+ dataV[1] * dataB[i]
|
{
|
||||||
+ dataV[2] * dataC[i]
|
LaneType val = dataV[i] * dataA[j]
|
||||||
+ dataV[3] * dataD[i];
|
+ dataV[i + 1] * dataB[j]
|
||||||
EXPECT_DOUBLE_EQ(val, res[i]);
|
+ dataV[i + 2] * dataC[j]
|
||||||
|
+ dataV[i + 3] * dataD[j];
|
||||||
|
EXPECT_COMPARE_EQ(val, res[j]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Data<R> resAdd = v_matmuladd(v, a, b, c, d);
|
Data<R> resAdd = v_matmuladd(v, a, b, c, d);
|
||||||
for (int i = 0; i < R::nlanes; ++i)
|
for (int i = 0; i < R::nlanes; i += 4)
|
||||||
{
|
{
|
||||||
LaneType val = dataV[0] * dataA[i]
|
for (int j = i; j < i + 4; ++j)
|
||||||
+ dataV[1] * dataB[i]
|
{
|
||||||
+ dataV[2] * dataC[i]
|
LaneType val = dataV[i] * dataA[j]
|
||||||
+ dataD[i];
|
+ dataV[i + 1] * dataB[j]
|
||||||
EXPECT_DOUBLE_EQ(val, resAdd[i]);
|
+ dataV[i + 2] * dataC[j]
|
||||||
|
+ dataD[j];
|
||||||
|
EXPECT_COMPARE_EQ(val, resAdd[j]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
@ -998,30 +1041,36 @@ template<typename R> struct TheTest
|
|||||||
e, f, g, h);
|
e, f, g, h);
|
||||||
|
|
||||||
Data<R> res[4] = {e, f, g, h};
|
Data<R> res[4] = {e, f, g, h};
|
||||||
for (int i = 0; i < R::nlanes; ++i)
|
for (int i = 0; i < R::nlanes; i += 4)
|
||||||
{
|
{
|
||||||
EXPECT_EQ(dataA[i], res[i][0]);
|
for (int j = 0; j < 4; ++j)
|
||||||
EXPECT_EQ(dataB[i], res[i][1]);
|
{
|
||||||
EXPECT_EQ(dataC[i], res[i][2]);
|
EXPECT_EQ(dataA[i + j], res[j][i]);
|
||||||
EXPECT_EQ(dataD[i], res[i][3]);
|
EXPECT_EQ(dataB[i + j], res[j][i + 1]);
|
||||||
|
EXPECT_EQ(dataC[i + j], res[j][i + 2]);
|
||||||
|
EXPECT_EQ(dataD[i + j], res[j][i + 3]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
TheTest & test_reduce_sum4()
|
TheTest & test_reduce_sum4()
|
||||||
{
|
{
|
||||||
R a(0.1f, 0.02f, 0.003f, 0.0004f);
|
Data<R> dataA, dataB, dataC, dataD;
|
||||||
R b(1, 20, 300, 4000);
|
dataB *= 0.01f;
|
||||||
R c(10, 2, 0.3f, 0.04f);
|
dataC *= 0.001f;
|
||||||
R d(1, 2, 3, 4);
|
dataD *= 0.002f;
|
||||||
|
|
||||||
R sum = v_reduce_sum4(a, b, c, d);
|
R a = dataA, b = dataB, c = dataC, d = dataD;
|
||||||
|
Data<R> res = v_reduce_sum4(a, b, c, d);
|
||||||
|
|
||||||
Data<R> res = sum;
|
for (int i = 0; i < R::nlanes; i += 4)
|
||||||
EXPECT_EQ(0.1234f, res[0]);
|
{
|
||||||
EXPECT_EQ(4321.0f, res[1]);
|
EXPECT_COMPARE_EQ(dataA.sum(i, 4), res[i]);
|
||||||
EXPECT_EQ(12.34f, res[2]);
|
EXPECT_COMPARE_EQ(dataB.sum(i, 4), res[i + 1]);
|
||||||
EXPECT_EQ(10.0f, res[3]);
|
EXPECT_COMPARE_EQ(dataC.sum(i, 4), res[i + 2]);
|
||||||
|
EXPECT_COMPARE_EQ(dataD.sum(i, 4), res[i + 3]);
|
||||||
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1032,14 +1081,14 @@ template<typename R> struct TheTest
|
|||||||
AlignedData<R> out;
|
AlignedData<R> out;
|
||||||
|
|
||||||
// check if addresses are aligned and unaligned respectively
|
// check if addresses are aligned and unaligned respectively
|
||||||
EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
|
EXPECT_EQ((size_t)0, (size_t)&data.a.d % CV_SIMD_WIDTH);
|
||||||
EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
|
EXPECT_NE((size_t)0, (size_t)&data.u.d % CV_SIMD_WIDTH);
|
||||||
EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16);
|
EXPECT_EQ((size_t)0, (size_t)&out.a.d % CV_SIMD_WIDTH);
|
||||||
EXPECT_NE((size_t)0, (size_t)&out.u.d % 16);
|
EXPECT_NE((size_t)0, (size_t)&out.u.d % CV_SIMD_WIDTH);
|
||||||
|
|
||||||
// check some initialization methods
|
// check some initialization methods
|
||||||
R r1 = data.u;
|
R r1 = data.u;
|
||||||
R r2 = v_load_f16(data.a.d);
|
R r2 = vx_load_f16(data.a.d);
|
||||||
R r3(r2);
|
R r3(r2);
|
||||||
EXPECT_EQ(data.u[0], r1.get0());
|
EXPECT_EQ(data.u[0], r1.get0());
|
||||||
EXPECT_EQ(data.a[0], r2.get0());
|
EXPECT_EQ(data.a[0], r2.get0());
|
||||||
|
@ -173,7 +173,6 @@ void Core_RandTest::run( int )
|
|||||||
dsz = slice+1 < maxSlice ? (int)(cvtest::randInt(rng) % (SZ - sz) + 1) : SZ - sz;
|
dsz = slice+1 < maxSlice ? (int)(cvtest::randInt(rng) % (SZ - sz) + 1) : SZ - sz;
|
||||||
Mat aslice = arr[k].colRange(sz, sz + dsz);
|
Mat aslice = arr[k].colRange(sz, sz + dsz);
|
||||||
tested_rng.fill(aslice, dist_type, A, B);
|
tested_rng.fill(aslice, dist_type, A, B);
|
||||||
//printf("%d - %d\n", sz, sz + dsz);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,12 +85,6 @@ else()
|
|||||||
set(sources_options EXCLUDE_OPENCL)
|
set(sources_options EXCLUDE_OPENCL)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(HAVE_INF_ENGINE)
|
|
||||||
add_definitions(-DHAVE_INF_ENGINE=1)
|
|
||||||
list(APPEND include_dirs ${INF_ENGINE_INCLUDE_DIRS})
|
|
||||||
list(APPEND libs ${INF_ENGINE_LIBRARIES})
|
|
||||||
endif()
|
|
||||||
|
|
||||||
ocv_module_include_directories(${include_dirs})
|
ocv_module_include_directories(${include_dirs})
|
||||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||||
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override") # GCC
|
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-suggest-override") # GCC
|
||||||
@ -98,9 +92,9 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
|||||||
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-inconsistent-missing-override") # Clang
|
ocv_append_source_files_cxx_compiler_options(fw_srcs "-Wno-inconsistent-missing-override") # Clang
|
||||||
endif()
|
endif()
|
||||||
ocv_glob_module_sources(${sources_options} SOURCES ${fw_srcs})
|
ocv_glob_module_sources(${sources_options} SOURCES ${fw_srcs})
|
||||||
ocv_create_module(${libs})
|
ocv_create_module(${libs} ${INF_ENGINE_TARGET})
|
||||||
ocv_add_samples()
|
ocv_add_samples()
|
||||||
ocv_add_accuracy_tests()
|
ocv_add_accuracy_tests(${INF_ENGINE_TARGET})
|
||||||
ocv_add_perf_tests()
|
ocv_add_perf_tests()
|
||||||
|
|
||||||
ocv_option(${the_module}_PERF_CAFFE "Add performance tests of Caffe framework" OFF)
|
ocv_option(${the_module}_PERF_CAFFE "Add performance tests of Caffe framework" OFF)
|
||||||
@ -120,9 +114,3 @@ if(BUILD_PERF_TESTS)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Test Intel's Inference Engine models
|
|
||||||
if(HAVE_INF_ENGINE AND TARGET opencv_test_dnn)
|
|
||||||
ocv_target_include_directories(opencv_test_dnn PRIVATE ${INF_ENGINE_INCLUDE_DIRS})
|
|
||||||
ocv_target_link_libraries(opencv_test_dnn LINK_PRIVATE ${INF_ENGINE_LIBRARIES})
|
|
||||||
endif()
|
|
||||||
|
@ -201,7 +201,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
|
|||||||
* @param[out] outputs allocated output blobs, which will store results of the computation.
|
* @param[out] outputs allocated output blobs, which will store results of the computation.
|
||||||
* @param[out] internals allocated internal blobs
|
* @param[out] internals allocated internal blobs
|
||||||
*/
|
*/
|
||||||
virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) = 0;
|
virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals);
|
||||||
|
|
||||||
/** @brief Given the @p input blobs, computes the output @p blobs.
|
/** @brief Given the @p input blobs, computes the output @p blobs.
|
||||||
* @param[in] inputs the input blobs.
|
* @param[in] inputs the input blobs.
|
||||||
|
@ -44,7 +44,9 @@
|
|||||||
|
|
||||||
#include <opencv2/core.hpp>
|
#include <opencv2/core.hpp>
|
||||||
#include <opencv2/core/types_c.h>
|
#include <opencv2/core/types_c.h>
|
||||||
|
#include <iostream>
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
namespace cv {
|
namespace cv {
|
||||||
namespace dnn {
|
namespace dnn {
|
||||||
@ -178,13 +180,25 @@ static inline MatShape concat(const MatShape& a, const MatShape& b)
|
|||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void print(const MatShape& shape, const String& name = "")
|
static inline std::string toString(const MatShape& shape, const String& name = "")
|
||||||
{
|
{
|
||||||
printf("%s: [", name.c_str());
|
std::ostringstream ss;
|
||||||
size_t i, n = shape.size();
|
if (!name.empty())
|
||||||
for( i = 0; i < n; i++ )
|
ss << name << ' ';
|
||||||
printf(" %d", shape[i]);
|
ss << '[';
|
||||||
printf(" ]\n");
|
for(size_t i = 0, n = shape.size(); i < n; ++i)
|
||||||
|
ss << ' ' << shape[i];
|
||||||
|
ss << " ]";
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
static inline void print(const MatShape& shape, const String& name = "")
|
||||||
|
{
|
||||||
|
std::cout << toString(shape, name) << std::endl;
|
||||||
|
}
|
||||||
|
static inline std::ostream& operator<<(std::ostream &out, const MatShape& shape)
|
||||||
|
{
|
||||||
|
out << toString(shape);
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int clamp(int ax, int dims)
|
inline int clamp(int ax, int dims)
|
||||||
|
@ -74,6 +74,10 @@ static int PARAM_DNN_BACKEND_DEFAULT = (int)utils::getConfigurationParameterSize
|
|||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Additional checks (slowdowns execution!)
|
||||||
|
static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF", false);
|
||||||
|
static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false);
|
||||||
|
static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false);
|
||||||
|
|
||||||
using std::vector;
|
using std::vector;
|
||||||
using std::map;
|
using std::map;
|
||||||
@ -2053,10 +2057,75 @@ struct Net::Impl
|
|||||||
{
|
{
|
||||||
if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
|
if (preferableBackend == DNN_BACKEND_OPENCV && IS_DNN_OPENCL_TARGET(preferableTarget))
|
||||||
{
|
{
|
||||||
|
std::vector<UMat> umat_inputBlobs = OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers);
|
||||||
std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
|
std::vector<UMat> umat_outputBlobs = OpenCLBackendWrapper::getUMatVector(ld.outputBlobsWrappers);
|
||||||
layer->forward(OpenCLBackendWrapper::getUMatVector(ld.inputBlobsWrappers),
|
std::vector<UMat> umat_internalBlobs = OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers);
|
||||||
|
layer->forward(umat_inputBlobs,
|
||||||
umat_outputBlobs,
|
umat_outputBlobs,
|
||||||
OpenCLBackendWrapper::getUMatVector(ld.internalBlobsWrappers));
|
umat_internalBlobs);
|
||||||
|
if (DNN_CHECK_NAN_INF)
|
||||||
|
{
|
||||||
|
bool fail = false;
|
||||||
|
for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
|
||||||
|
{
|
||||||
|
UMat& u = umat_outputBlobs[i];
|
||||||
|
Mat m;
|
||||||
|
if (u.depth() == CV_16S) // FP16
|
||||||
|
convertFp16(u, m);
|
||||||
|
else
|
||||||
|
m = u.getMat(ACCESS_READ);
|
||||||
|
if (!checkRange(m))
|
||||||
|
{
|
||||||
|
std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
|
||||||
|
std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
|
||||||
|
fail = true;
|
||||||
|
}
|
||||||
|
else if (!checkRange(m, true, NULL, -1e6, 1e6))
|
||||||
|
{
|
||||||
|
std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
|
||||||
|
std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
|
||||||
|
fail = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fail)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < umat_inputBlobs.size(); ++i)
|
||||||
|
{
|
||||||
|
UMat& u = umat_inputBlobs[i];
|
||||||
|
Mat m;
|
||||||
|
if (u.depth() == CV_16S) // FP16
|
||||||
|
convertFp16(u, m);
|
||||||
|
else
|
||||||
|
m = u.getMat(ACCESS_READ);
|
||||||
|
std::cout << "INPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
|
||||||
|
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < umat_outputBlobs.size(); ++i)
|
||||||
|
{
|
||||||
|
UMat& u = umat_outputBlobs[i];
|
||||||
|
Mat m;
|
||||||
|
if (u.depth() == CV_16S) // FP16
|
||||||
|
convertFp16(u, m);
|
||||||
|
else
|
||||||
|
m = u.getMat(ACCESS_READ);
|
||||||
|
std::cout << "OUTPUT " << i << " " << cv::typeToString(u.type()) << " " << shape(m) << std::endl;
|
||||||
|
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < umat_internalBlobs.size(); ++i)
|
||||||
|
{
|
||||||
|
UMat& u = umat_internalBlobs[i];
|
||||||
|
Mat m;
|
||||||
|
if (u.depth() == CV_16S) // FP16
|
||||||
|
convertFp16(u, m);
|
||||||
|
else
|
||||||
|
m = u.getMat(ACCESS_READ);
|
||||||
|
std::cout << "INTERNAL " << i << " " << shape(m) << std::endl;
|
||||||
|
if (DNN_CHECK_NAN_INF_DUMP) std::cout << cv::typeToString(u.type()) << " " << m.reshape(1, 1) << std::endl;
|
||||||
|
}
|
||||||
|
if (DNN_CHECK_NAN_INF_RAISE_ERROR)
|
||||||
|
CV_Assert(!fail);
|
||||||
|
}
|
||||||
|
}
|
||||||
OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
|
OpenCLBackendWrapper::update(ld.outputBlobsWrappers, umat_outputBlobs);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -2069,6 +2138,56 @@ struct Net::Impl
|
|||||||
|
|
||||||
layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals);
|
layer->forward(ld.inputBlobs, ld.outputBlobs, ld.internals);
|
||||||
|
|
||||||
|
if (DNN_CHECK_NAN_INF)
|
||||||
|
{
|
||||||
|
bool fail = false;
|
||||||
|
for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
|
||||||
|
{
|
||||||
|
const Mat& m = ld.outputBlobs[i];
|
||||||
|
if (!checkRange(m))
|
||||||
|
{
|
||||||
|
std::cerr << "WARNING: NaN detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
|
||||||
|
std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
|
||||||
|
fail = true;
|
||||||
|
}
|
||||||
|
else if (!checkRange(m, true, NULL, -1e6, 1e6))
|
||||||
|
{
|
||||||
|
std::cerr << "WARNING: Inf detected in layer output: id=" << ld.id << " name=" << layer->name << std::endl;
|
||||||
|
std::cerr << "output id=" << i << " output shape=" << shape(m) << std::endl;
|
||||||
|
fail = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fail)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < ld.inputBlobs.size(); ++i)
|
||||||
|
{
|
||||||
|
const Mat* pM = ld.inputBlobs[i];
|
||||||
|
if (!pM)
|
||||||
|
{
|
||||||
|
std::cout << "INPUT " << i << " is NULL" << std::endl;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const Mat& m = *pM;
|
||||||
|
std::cout << "INPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
|
||||||
|
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < ld.outputBlobs.size(); ++i)
|
||||||
|
{
|
||||||
|
const Mat& m = ld.outputBlobs[i];
|
||||||
|
std::cout << "OUTPUT " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
|
||||||
|
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < ld.internals.size(); ++i)
|
||||||
|
{
|
||||||
|
const Mat& m = ld.internals[i];
|
||||||
|
std::cout << "INTERNAL " << i << " " << cv::typeToString(m.type()) << " " << shape(m) << std::endl;
|
||||||
|
if (DNN_CHECK_NAN_INF_DUMP) std::cout << m.reshape(1, 1) << std::endl;
|
||||||
|
}
|
||||||
|
if (DNN_CHECK_NAN_INF_RAISE_ERROR)
|
||||||
|
CV_Assert(!fail);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
|
for (int i = 0, n = ld.outputBlobsWrappers.size(); i < n; ++i)
|
||||||
{
|
{
|
||||||
if (!ld.outputBlobsWrappers[i].empty())
|
if (!ld.outputBlobsWrappers[i].empty())
|
||||||
@ -3071,6 +3190,14 @@ std::vector<Mat> Layer::finalize(const std::vector<Mat> &inputs)
|
|||||||
return outputs;
|
return outputs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Layer::forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals)
|
||||||
|
{
|
||||||
|
CV_TRACE_FUNCTION();
|
||||||
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
Layer::forward_fallback(inputs, outputs, internals);
|
||||||
|
}
|
||||||
|
|
||||||
void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
void Layer::forward_fallback(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
|
@ -196,7 +196,7 @@ public:
|
|||||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||||
{
|
{
|
||||||
return backendId == DNN_BACKEND_OPENCV ||
|
return backendId == DNN_BACKEND_OPENCV ||
|
||||||
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !_locPredTransposed;
|
backendId == DNN_BACKEND_INFERENCE_ENGINE && !_locPredTransposed && _bboxesNormalized;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||||
@ -411,9 +411,12 @@ public:
|
|||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
if (_bboxesNormalized)
|
||||||
|
{
|
||||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) &&
|
||||||
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
|
||||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
}
|
||||||
|
|
||||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
}
|
}
|
||||||
@ -916,6 +919,7 @@ public:
|
|||||||
ieLayer->params["nms_threshold"] = format("%f", _nmsThreshold);
|
ieLayer->params["nms_threshold"] = format("%f", _nmsThreshold);
|
||||||
ieLayer->params["top_k"] = format("%d", _topK);
|
ieLayer->params["top_k"] = format("%d", _topK);
|
||||||
ieLayer->params["keep_top_k"] = format("%d", _keepTopK);
|
ieLayer->params["keep_top_k"] = format("%d", _keepTopK);
|
||||||
|
ieLayer->params["eta"] = "1.0";
|
||||||
ieLayer->params["confidence_threshold"] = format("%f", _confidenceThreshold);
|
ieLayer->params["confidence_threshold"] = format("%f", _confidenceThreshold);
|
||||||
ieLayer->params["variance_encoded_in_target"] = _varianceEncodedInTarget ? "1" : "0";
|
ieLayer->params["variance_encoded_in_target"] = _varianceEncodedInTarget ? "1" : "0";
|
||||||
ieLayer->params["code_type"] = "caffe.PriorBoxParameter." + _codeType;
|
ieLayer->params["code_type"] = "caffe.PriorBoxParameter." + _codeType;
|
||||||
|
@ -135,10 +135,17 @@ public:
|
|||||||
|
|
||||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||||
{
|
{
|
||||||
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||||
|
{
|
||||||
|
if (preferableTarget == DNN_TARGET_MYRIAD)
|
||||||
|
return type == MAX || type == AVE;
|
||||||
|
else
|
||||||
|
return type != STOCHASTIC;
|
||||||
|
}
|
||||||
|
else
|
||||||
return backendId == DNN_BACKEND_OPENCV ||
|
return backendId == DNN_BACKEND_OPENCV ||
|
||||||
backendId == DNN_BACKEND_HALIDE && haveHalide() &&
|
backendId == DNN_BACKEND_HALIDE && haveHalide() &&
|
||||||
(type == MAX || type == AVE && !pad.width && !pad.height) ||
|
(type == MAX || type == AVE && !pad.width && !pad.height);
|
||||||
backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && (type == MAX || type == AVE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
@ -192,8 +199,11 @@ public:
|
|||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
||||||
|
|
||||||
|
if (type == MAX || type == AVE || type == STOCHASTIC)
|
||||||
|
{
|
||||||
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
|
||||||
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
forward_ocl(inputs_arr, outputs_arr, internals_arr))
|
||||||
|
}
|
||||||
|
|
||||||
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
|
||||||
}
|
}
|
||||||
@ -238,22 +248,41 @@ public:
|
|||||||
#ifdef HAVE_INF_ENGINE
|
#ifdef HAVE_INF_ENGINE
|
||||||
InferenceEngine::LayerParams lp;
|
InferenceEngine::LayerParams lp;
|
||||||
lp.name = name;
|
lp.name = name;
|
||||||
lp.type = "Pooling";
|
|
||||||
lp.precision = InferenceEngine::Precision::FP32;
|
lp.precision = InferenceEngine::Precision::FP32;
|
||||||
std::shared_ptr<InferenceEngine::PoolingLayer> ieLayer(new InferenceEngine::PoolingLayer(lp));
|
|
||||||
|
|
||||||
ieLayer->_kernel_x = kernel.width;
|
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer;
|
||||||
ieLayer->_kernel_y = kernel.height;
|
if (type == MAX || type == AVE)
|
||||||
ieLayer->_stride_x = stride.width;
|
{
|
||||||
ieLayer->_stride_y = stride.height;
|
lp.type = "Pooling";
|
||||||
ieLayer->_padding_x = pad.width;
|
InferenceEngine::PoolingLayer* poolLayer = new InferenceEngine::PoolingLayer(lp);
|
||||||
ieLayer->_padding_y = pad.height;
|
poolLayer->_kernel_x = kernel.width;
|
||||||
ieLayer->_exclude_pad = type == AVE && padMode == "SAME";
|
poolLayer->_kernel_y = kernel.height;
|
||||||
ieLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor";
|
poolLayer->_stride_x = stride.width;
|
||||||
if (type == MAX)
|
poolLayer->_stride_y = stride.height;
|
||||||
ieLayer->_type = InferenceEngine::PoolingLayer::PoolType::MAX;
|
poolLayer->_padding_x = pad.width;
|
||||||
else if (type == AVE)
|
poolLayer->_padding_y = pad.height;
|
||||||
ieLayer->_type = InferenceEngine::PoolingLayer::PoolType::AVG;
|
poolLayer->_exclude_pad = type == AVE && padMode == "SAME";
|
||||||
|
poolLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor";
|
||||||
|
poolLayer->_type = type == MAX ? InferenceEngine::PoolingLayer::PoolType::MAX :
|
||||||
|
InferenceEngine::PoolingLayer::PoolType::AVG;
|
||||||
|
ieLayer = std::shared_ptr<InferenceEngine::CNNLayer>(poolLayer);
|
||||||
|
}
|
||||||
|
else if (type == ROI)
|
||||||
|
{
|
||||||
|
lp.type = "ROIPooling";
|
||||||
|
ieLayer = std::shared_ptr<InferenceEngine::CNNLayer>(new InferenceEngine::CNNLayer(lp));
|
||||||
|
ieLayer->params["pooled_w"] = format("%d", pooledSize.width);
|
||||||
|
ieLayer->params["pooled_h"] = format("%d", pooledSize.height);
|
||||||
|
ieLayer->params["spatial_scale"] = format("%f", spatialScale);
|
||||||
|
}
|
||||||
|
else if (type == PSROI)
|
||||||
|
{
|
||||||
|
lp.type = "PSROIPooling";
|
||||||
|
ieLayer = std::shared_ptr<InferenceEngine::CNNLayer>(new InferenceEngine::CNNLayer(lp));
|
||||||
|
ieLayer->params["output_dim"] = format("%d", psRoiOutChannels);
|
||||||
|
ieLayer->params["group_size"] = format("%d", pooledSize.width);
|
||||||
|
ieLayer->params["spatial_scale"] = format("%f", spatialScale);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
CV_Error(Error::StsNotImplemented, "Unsupported pooling type");
|
CV_Error(Error::StsNotImplemented, "Unsupported pooling type");
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
#include "../precomp.hpp"
|
#include "../precomp.hpp"
|
||||||
#include "layers_common.hpp"
|
#include "layers_common.hpp"
|
||||||
|
#include "../op_inf_engine.hpp"
|
||||||
|
|
||||||
namespace cv { namespace dnn {
|
namespace cv { namespace dnn {
|
||||||
|
|
||||||
@ -16,14 +17,14 @@ public:
|
|||||||
{
|
{
|
||||||
setParamsFrom(params);
|
setParamsFrom(params);
|
||||||
|
|
||||||
uint32_t featStride = params.get<uint32_t>("feat_stride", 16);
|
featStride = params.get<uint32_t>("feat_stride", 16);
|
||||||
uint32_t baseSize = params.get<uint32_t>("base_size", 16);
|
baseSize = params.get<uint32_t>("base_size", 16);
|
||||||
// uint32_t minSize = params.get<uint32_t>("min_size", 16);
|
// uint32_t minSize = params.get<uint32_t>("min_size", 16);
|
||||||
uint32_t keepTopBeforeNMS = params.get<uint32_t>("pre_nms_topn", 6000);
|
keepTopBeforeNMS = params.get<uint32_t>("pre_nms_topn", 6000);
|
||||||
keepTopAfterNMS = params.get<uint32_t>("post_nms_topn", 300);
|
keepTopAfterNMS = params.get<uint32_t>("post_nms_topn", 300);
|
||||||
float nmsThreshold = params.get<float>("nms_thresh", 0.7);
|
nmsThreshold = params.get<float>("nms_thresh", 0.7);
|
||||||
DictValue ratios = params.get("ratio");
|
ratios = params.get("ratio");
|
||||||
DictValue scales = params.get("scale");
|
scales = params.get("scale");
|
||||||
|
|
||||||
{
|
{
|
||||||
LayerParams lp;
|
LayerParams lp;
|
||||||
@ -83,6 +84,12 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||||
|
{
|
||||||
|
return backendId == DNN_BACKEND_OPENCV ||
|
||||||
|
backendId == DNN_BACKEND_INFERENCE_ENGINE && preferableTarget != DNN_TARGET_MYRIAD;
|
||||||
|
}
|
||||||
|
|
||||||
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
bool getMemoryShapes(const std::vector<MatShape> &inputs,
|
||||||
const int requiredOutputs,
|
const int requiredOutputs,
|
||||||
std::vector<MatShape> &outputs,
|
std::vector<MatShape> &outputs,
|
||||||
@ -312,6 +319,38 @@ public:
|
|||||||
outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0);
|
outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&) CV_OVERRIDE
|
||||||
|
{
|
||||||
|
#ifdef HAVE_INF_ENGINE
|
||||||
|
InferenceEngine::LayerParams lp;
|
||||||
|
lp.name = name;
|
||||||
|
lp.type = "Proposal";
|
||||||
|
lp.precision = InferenceEngine::Precision::FP32;
|
||||||
|
std::shared_ptr<InferenceEngine::CNNLayer> ieLayer(new InferenceEngine::CNNLayer(lp));
|
||||||
|
|
||||||
|
ieLayer->params["base_size"] = format("%d", baseSize);
|
||||||
|
ieLayer->params["feat_stride"] = format("%d", featStride);
|
||||||
|
ieLayer->params["min_size"] = "16";
|
||||||
|
ieLayer->params["nms_thresh"] = format("%f", nmsThreshold);
|
||||||
|
ieLayer->params["post_nms_topn"] = format("%d", keepTopAfterNMS);
|
||||||
|
ieLayer->params["pre_nms_topn"] = format("%d", keepTopBeforeNMS);
|
||||||
|
if (ratios.size())
|
||||||
|
{
|
||||||
|
ieLayer->params["ratio"] = format("%f", ratios.get<float>(0));
|
||||||
|
for (int i = 1; i < ratios.size(); ++i)
|
||||||
|
ieLayer->params["ratio"] += format(",%f", ratios.get<float>(i));
|
||||||
|
}
|
||||||
|
if (scales.size())
|
||||||
|
{
|
||||||
|
ieLayer->params["scale"] = format("%f", scales.get<float>(0));
|
||||||
|
for (int i = 1; i < scales.size(); ++i)
|
||||||
|
ieLayer->params["scale"] += format(",%f", scales.get<float>(i));
|
||||||
|
}
|
||||||
|
return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
|
||||||
|
#endif // HAVE_INF_ENGINE
|
||||||
|
return Ptr<BackendNode>();
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// A first half of channels are background scores. We need only a second one.
|
// A first half of channels are background scores. We need only a second one.
|
||||||
static Mat getObjectScores(const Mat& m)
|
static Mat getObjectScores(const Mat& m)
|
||||||
@ -342,8 +381,10 @@ private:
|
|||||||
|
|
||||||
Ptr<PermuteLayer> deltasPermute;
|
Ptr<PermuteLayer> deltasPermute;
|
||||||
Ptr<PermuteLayer> scoresPermute;
|
Ptr<PermuteLayer> scoresPermute;
|
||||||
uint32_t keepTopAfterNMS;
|
uint32_t keepTopBeforeNMS, keepTopAfterNMS, featStride, baseSize;
|
||||||
Mat fakeImageBlob;
|
Mat fakeImageBlob;
|
||||||
|
float nmsThreshold;
|
||||||
|
DictValue ratios, scales;
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
UMat umat_fakeImageBlob;
|
UMat umat_fakeImageBlob;
|
||||||
#endif
|
#endif
|
||||||
|
@ -183,8 +183,9 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
|
|||||||
ocl::Kernel oclk_sto_pool_forward(
|
ocl::Kernel oclk_sto_pool_forward(
|
||||||
kname.c_str(),
|
kname.c_str(),
|
||||||
ocl::dnn::ocl4dnn_pooling_oclsrc,
|
ocl::dnn::ocl4dnn_pooling_oclsrc,
|
||||||
format("-D KERNEL_STO_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
|
format(" -D Dtype=%s -D KERNEL_STO_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
|
||||||
" -D STRIDE_W=%d -D STRIDE_H=%d",
|
" -D STRIDE_W=%d -D STRIDE_H=%d",
|
||||||
|
(use_half) ? "half" : "float",
|
||||||
kernel_w_, kernel_h_,
|
kernel_w_, kernel_h_,
|
||||||
stride_w_, stride_h_
|
stride_w_, stride_h_
|
||||||
));
|
));
|
||||||
|
@ -322,12 +322,32 @@ InferenceEngine::StatusCode InfEngineBackendNet::setBatchSize(const size_t) noex
|
|||||||
return InferenceEngine::StatusCode::OK;
|
return InferenceEngine::StatusCode::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
InferenceEngine::StatusCode InfEngineBackendNet::setBatchSize(size_t size, InferenceEngine::ResponseDesc *responseDesc) noexcept
|
||||||
|
{
|
||||||
|
CV_Error(Error::StsNotImplemented, "");
|
||||||
|
return InferenceEngine::StatusCode::OK;
|
||||||
|
}
|
||||||
|
|
||||||
size_t InfEngineBackendNet::getBatchSize() const noexcept
|
size_t InfEngineBackendNet::getBatchSize() const noexcept
|
||||||
{
|
{
|
||||||
CV_Error(Error::StsNotImplemented, "");
|
CV_Error(Error::StsNotImplemented, "");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2018R2)
|
||||||
|
InferenceEngine::StatusCode InfEngineBackendNet::AddExtension(const InferenceEngine::IShapeInferExtensionPtr &extension, InferenceEngine::ResponseDesc *resp) noexcept
|
||||||
|
{
|
||||||
|
CV_Error(Error::StsNotImplemented, "");
|
||||||
|
return InferenceEngine::StatusCode::OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
InferenceEngine::StatusCode InfEngineBackendNet::reshape(const InferenceEngine::ICNNNetwork::InputShapes &inputShapes, InferenceEngine::ResponseDesc *resp) noexcept
|
||||||
|
{
|
||||||
|
CV_Error(Error::StsNotImplemented, "");
|
||||||
|
return InferenceEngine::StatusCode::OK;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void InfEngineBackendNet::init(int targetId)
|
void InfEngineBackendNet::init(int targetId)
|
||||||
{
|
{
|
||||||
if (inputs.empty())
|
if (inputs.empty())
|
||||||
|
@ -9,6 +9,8 @@
|
|||||||
#define __OPENCV_DNN_OP_INF_ENGINE_HPP__
|
#define __OPENCV_DNN_OP_INF_ENGINE_HPP__
|
||||||
|
|
||||||
#include "opencv2/core/cvdef.h"
|
#include "opencv2/core/cvdef.h"
|
||||||
|
#include "opencv2/core/cvstd.hpp"
|
||||||
|
#include "opencv2/dnn.hpp"
|
||||||
|
|
||||||
#ifdef HAVE_INF_ENGINE
|
#ifdef HAVE_INF_ENGINE
|
||||||
#if defined(__GNUC__) && __GNUC__ >= 5
|
#if defined(__GNUC__) && __GNUC__ >= 5
|
||||||
@ -19,6 +21,17 @@
|
|||||||
#if defined(__GNUC__) && __GNUC__ >= 5
|
#if defined(__GNUC__) && __GNUC__ >= 5
|
||||||
//#pragma GCC diagnostic pop
|
//#pragma GCC diagnostic pop
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define INF_ENGINE_RELEASE_2018R1 2018010000
|
||||||
|
#define INF_ENGINE_RELEASE_2018R2 2018020000
|
||||||
|
|
||||||
|
#ifndef INF_ENGINE_RELEASE
|
||||||
|
#warning("IE version have not been provided via command-line. Using 2018R2 by default")
|
||||||
|
#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2018R2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000))
|
||||||
|
|
||||||
#endif // HAVE_INF_ENGINE
|
#endif // HAVE_INF_ENGINE
|
||||||
|
|
||||||
namespace cv { namespace dnn {
|
namespace cv { namespace dnn {
|
||||||
@ -86,8 +99,15 @@ public:
|
|||||||
|
|
||||||
virtual InferenceEngine::StatusCode setBatchSize(const size_t size) noexcept CV_OVERRIDE;
|
virtual InferenceEngine::StatusCode setBatchSize(const size_t size) noexcept CV_OVERRIDE;
|
||||||
|
|
||||||
|
virtual InferenceEngine::StatusCode setBatchSize(size_t size, InferenceEngine::ResponseDesc* responseDesc) noexcept;
|
||||||
|
|
||||||
virtual size_t getBatchSize() const noexcept CV_OVERRIDE;
|
virtual size_t getBatchSize() const noexcept CV_OVERRIDE;
|
||||||
|
|
||||||
|
#if INF_ENGINE_VER_MAJOR_GT(INF_ENGINE_RELEASE_2018R2)
|
||||||
|
virtual InferenceEngine::StatusCode AddExtension(const InferenceEngine::IShapeInferExtensionPtr& extension, InferenceEngine::ResponseDesc* resp) noexcept;
|
||||||
|
virtual InferenceEngine::StatusCode reshape(const InputShapes& inputShapes, InferenceEngine::ResponseDesc* resp) noexcept;
|
||||||
|
#endif
|
||||||
|
|
||||||
void init(int targetId);
|
void init(int targetId);
|
||||||
|
|
||||||
void addBlobs(const std::vector<Ptr<BackendWrapper> >& wrappers);
|
void addBlobs(const std::vector<Ptr<BackendWrapper> >& wrappers);
|
||||||
|
@ -104,7 +104,7 @@ __kernel void
|
|||||||
#elif defined KERNEL_AVE_POOL
|
#elif defined KERNEL_AVE_POOL
|
||||||
|
|
||||||
__kernel void TEMPLATE(ave_pool_forward, Dtype)(
|
__kernel void TEMPLATE(ave_pool_forward, Dtype)(
|
||||||
const int nthreads, __global const Dtype* const bottom_data,
|
const int nthreads, __global const Dtype* bottom_data,
|
||||||
const int channels, const int height, const int width,
|
const int channels, const int height, const int width,
|
||||||
const int pooled_height, const int pooled_width,
|
const int pooled_height, const int pooled_width,
|
||||||
__global Dtype* top_data)
|
__global Dtype* top_data)
|
||||||
@ -150,7 +150,7 @@ __kernel void TEMPLATE(ave_pool_forward, Dtype)(
|
|||||||
#elif defined KERNEL_STO_POOL
|
#elif defined KERNEL_STO_POOL
|
||||||
|
|
||||||
__kernel void TEMPLATE(sto_pool_forward_test,Dtype)(
|
__kernel void TEMPLATE(sto_pool_forward_test,Dtype)(
|
||||||
const int nthreads, __global const Dtype* const bottom_data,
|
const int nthreads, __global const Dtype* bottom_data,
|
||||||
const int channels, const int height, const int width,
|
const int channels, const int height, const int width,
|
||||||
const int pooled_height, const int pooled_width,
|
const int pooled_height, const int pooled_width,
|
||||||
__global Dtype* top_data)
|
__global Dtype* top_data)
|
||||||
|
@ -1293,7 +1293,13 @@ void TFImporter::populateNet(Net dstNet)
|
|||||||
if (!next_layers.empty())
|
if (!next_layers.empty())
|
||||||
{
|
{
|
||||||
int maximumLayerIdx = next_layers[0].second;
|
int maximumLayerIdx = next_layers[0].second;
|
||||||
ExcludeLayer(net, maximumLayerIdx, 0, false);
|
|
||||||
|
CV_Assert(net.node(maximumLayerIdx).input_size() == 2);
|
||||||
|
|
||||||
|
// The input from the Mul layer can also be at index 1.
|
||||||
|
int mulInputIdx = (net.node(maximumLayerIdx).input(0) == name) ? 0 : 1;
|
||||||
|
|
||||||
|
ExcludeLayer(net, maximumLayerIdx, mulInputIdx, false);
|
||||||
layers_to_ignore.insert(next_layers[0].first);
|
layers_to_ignore.insert(next_layers[0].first);
|
||||||
|
|
||||||
layerParams.set("negative_slope", scaleMat.at<float>(0));
|
layerParams.set("negative_slope", scaleMat.at<float>(0));
|
||||||
|
@ -938,6 +938,16 @@ struct TorchImporter
|
|||||||
layerParams.set("end", DictValue::arrayInt<int*>(&ends[0], 4));
|
layerParams.set("end", DictValue::arrayInt<int*>(&ends[0], 4));
|
||||||
curModule->modules.push_back(newModule);
|
curModule->modules.push_back(newModule);
|
||||||
}
|
}
|
||||||
|
else if (nnName == "SpatialUpSamplingNearest")
|
||||||
|
{
|
||||||
|
readTorchTable(scalarParams, tensorParams);
|
||||||
|
CV_Assert(scalarParams.has("scale_factor"));
|
||||||
|
int scale_factor = scalarParams.get<int>("scale_factor");
|
||||||
|
newModule->apiType = "Resize";
|
||||||
|
layerParams.set("interpolation", "nearest");
|
||||||
|
layerParams.set("zoom_factor", scale_factor);
|
||||||
|
curModule->modules.push_back(newModule);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Importer does not know how to map Torch's layer type to an OpenCV's one.
|
// Importer does not know how to map Torch's layer type to an OpenCV's one.
|
||||||
|
@ -175,7 +175,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_v2_TensorFlow)
|
|||||||
Mat sample = imread(findDataFile("dnn/street.png", false));
|
Mat sample = imread(findDataFile("dnn/street.png", false));
|
||||||
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
|
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
|
||||||
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 0.0;
|
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.011 : 0.0;
|
||||||
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.06 : 0.0;
|
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.062 : 0.0;
|
||||||
processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "dnn/ssd_mobilenet_v2_coco_2018_03_29.pbtxt",
|
processNet("dnn/ssd_mobilenet_v2_coco_2018_03_29.pb", "dnn/ssd_mobilenet_v2_coco_2018_03_29.pbtxt",
|
||||||
inp, "detection_out", "", l1, lInf, 0.25);
|
inp, "detection_out", "", l1, lInf, 0.25);
|
||||||
}
|
}
|
||||||
@ -233,11 +233,8 @@ TEST_P(DNNTestNetwork, opencv_face_detector)
|
|||||||
{
|
{
|
||||||
if (backend == DNN_BACKEND_HALIDE)
|
if (backend == DNN_BACKEND_HALIDE)
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
Size inpSize;
|
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
|
||||||
inpSize = Size(300, 300);
|
|
||||||
Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
|
Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
|
||||||
Mat inp = blobFromImage(img, 1.0, inpSize, Scalar(104.0, 177.0, 123.0), false, false);
|
Mat inp = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
|
||||||
processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt",
|
processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt",
|
||||||
inp, "detection_out");
|
inp, "detection_out");
|
||||||
}
|
}
|
||||||
@ -249,7 +246,7 @@ TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
|
|||||||
Mat sample = imread(findDataFile("dnn/street.png", false));
|
Mat sample = imread(findDataFile("dnn/street.png", false));
|
||||||
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
|
Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
|
||||||
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.008 : 0.0;
|
float l1 = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.008 : 0.0;
|
||||||
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.07 : 0.0;
|
float lInf = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 0.0731 : 0.0;
|
||||||
processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt",
|
processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt",
|
||||||
inp, "detection_out", "", l1, lInf);
|
inp, "detection_out", "", l1, lInf);
|
||||||
}
|
}
|
||||||
|
@ -51,6 +51,33 @@ static std::string _tf(TString filename)
|
|||||||
return (getOpenCVExtraDir() + "/dnn/") + filename;
|
return (getOpenCVExtraDir() + "/dnn/") + filename;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class Test_Caffe_nets : public DNNTestLayer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
void testFaster(const std::string& proto, const std::string& model, const Mat& ref,
|
||||||
|
double scoreDiff = 0.0, double iouDiff = 0.0)
|
||||||
|
{
|
||||||
|
checkBackend();
|
||||||
|
Net net = readNetFromCaffe(findDataFile("dnn/" + proto, false),
|
||||||
|
findDataFile("dnn/" + model, false));
|
||||||
|
net.setPreferableBackend(backend);
|
||||||
|
net.setPreferableTarget(target);
|
||||||
|
Mat img = imread(findDataFile("dnn/dog416.png", false));
|
||||||
|
resize(img, img, Size(800, 600));
|
||||||
|
Mat blob = blobFromImage(img, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false);
|
||||||
|
Mat imInfo = (Mat_<float>(1, 3) << img.rows, img.cols, 1.6f);
|
||||||
|
|
||||||
|
net.setInput(blob, "data");
|
||||||
|
net.setInput(imInfo, "im_info");
|
||||||
|
// Output has shape 1x1xNx7 where N - number of detections.
|
||||||
|
// An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
|
||||||
|
Mat out = net.forward();
|
||||||
|
scoreDiff = scoreDiff ? scoreDiff : default_l1;
|
||||||
|
iouDiff = iouDiff ? iouDiff : default_lInf;
|
||||||
|
normAssertDetections(ref, out, ("model name: " + model).c_str(), 0.8, scoreDiff, iouDiff);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
TEST(Test_Caffe, memory_read)
|
TEST(Test_Caffe, memory_read)
|
||||||
{
|
{
|
||||||
const string proto = findDataFile("dnn/bvlc_googlenet.prototxt", false);
|
const string proto = findDataFile("dnn/bvlc_googlenet.prototxt", false);
|
||||||
@ -344,9 +371,15 @@ TEST(Reproducibility_GoogLeNet_fp16, Accuracy)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// https://github.com/richzhang/colorization
|
// https://github.com/richzhang/colorization
|
||||||
TEST(Reproducibility_Colorization, Accuracy)
|
TEST_P(Test_Caffe_nets, Colorization)
|
||||||
{
|
{
|
||||||
const float l1 = 3e-5;
|
checkBackend();
|
||||||
|
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
|
||||||
|
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
|
||||||
|
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||||
|
throw SkipTestException("");
|
||||||
|
|
||||||
|
const float l1 = 4e-4;
|
||||||
const float lInf = 3e-3;
|
const float lInf = 3e-3;
|
||||||
|
|
||||||
Mat inp = blobFromNPY(_tf("colorization_inp.npy"));
|
Mat inp = blobFromNPY(_tf("colorization_inp.npy"));
|
||||||
@ -356,7 +389,8 @@ TEST(Reproducibility_Colorization, Accuracy)
|
|||||||
const string proto = findDataFile("dnn/colorization_deploy_v2.prototxt", false);
|
const string proto = findDataFile("dnn/colorization_deploy_v2.prototxt", false);
|
||||||
const string model = findDataFile("dnn/colorization_release_v2.caffemodel", false);
|
const string model = findDataFile("dnn/colorization_release_v2.caffemodel", false);
|
||||||
Net net = readNetFromCaffe(proto, model);
|
Net net = readNetFromCaffe(proto, model);
|
||||||
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
net.setPreferableBackend(backend);
|
||||||
|
net.setPreferableTarget(target);
|
||||||
|
|
||||||
net.getLayer(net.getLayerId("class8_ab"))->blobs.push_back(kernel);
|
net.getLayer(net.getLayerId("class8_ab"))->blobs.push_back(kernel);
|
||||||
net.getLayer(net.getLayerId("conv8_313_rh"))->blobs.push_back(Mat(1, 313, CV_32F, 2.606));
|
net.getLayer(net.getLayerId("conv8_313_rh"))->blobs.push_back(Mat(1, 313, CV_32F, 2.606));
|
||||||
@ -447,39 +481,40 @@ INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector,
|
|||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
TEST(Test_Caffe, FasterRCNN_and_RFCN)
|
TEST_P(Test_Caffe_nets, FasterRCNN_vgg16)
|
||||||
{
|
{
|
||||||
std::string models[] = {"VGG16_faster_rcnn_final.caffemodel", "ZF_faster_rcnn_final.caffemodel",
|
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
|
||||||
"resnet50_rfcn_final.caffemodel"};
|
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||||
std::string protos[] = {"faster_rcnn_vgg16.prototxt", "faster_rcnn_zf.prototxt",
|
throw SkipTestException("");
|
||||||
"rfcn_pascal_voc_resnet50.prototxt"};
|
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
|
||||||
Mat refs[] = {(Mat_<float>(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849,
|
|
||||||
0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
|
0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953,
|
||||||
0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166),
|
0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166);
|
||||||
(Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
|
testFaster("faster_rcnn_vgg16.prototxt", "VGG16_faster_rcnn_final.caffemodel", ref);
|
||||||
0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
|
|
||||||
0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176),
|
|
||||||
(Mat_<float>(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234,
|
|
||||||
0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16)};
|
|
||||||
for (int i = 0; i < 3; ++i)
|
|
||||||
{
|
|
||||||
std::string proto = findDataFile("dnn/" + protos[i], false);
|
|
||||||
std::string model = findDataFile("dnn/" + models[i], false);
|
|
||||||
|
|
||||||
Net net = readNetFromCaffe(proto, model);
|
|
||||||
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
|
||||||
Mat img = imread(findDataFile("dnn/dog416.png", false));
|
|
||||||
resize(img, img, Size(800, 600));
|
|
||||||
Mat blob = blobFromImage(img, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false);
|
|
||||||
Mat imInfo = (Mat_<float>(1, 3) << img.rows, img.cols, 1.6f);
|
|
||||||
|
|
||||||
net.setInput(blob, "data");
|
|
||||||
net.setInput(imInfo, "im_info");
|
|
||||||
// Output has shape 1x1xNx7 where N - number of detections.
|
|
||||||
// An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
|
|
||||||
Mat out = net.forward();
|
|
||||||
normAssertDetections(refs[i], out, ("model name: " + models[i]).c_str(), 0.8);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(Test_Caffe_nets, FasterRCNN_zf)
|
||||||
|
{
|
||||||
|
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
|
||||||
|
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
|
||||||
|
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||||
|
throw SkipTestException("");
|
||||||
|
static Mat ref = (Mat_<float>(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395,
|
||||||
|
0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762,
|
||||||
|
0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176);
|
||||||
|
testFaster("faster_rcnn_zf.prototxt", "ZF_faster_rcnn_final.caffemodel", ref);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(Test_Caffe_nets, RFCN)
|
||||||
|
{
|
||||||
|
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16) ||
|
||||||
|
(backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD) ||
|
||||||
|
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||||
|
throw SkipTestException("");
|
||||||
|
static Mat ref = (Mat_<float>(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234,
|
||||||
|
0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16);
|
||||||
|
testFaster("rfcn_pascal_voc_resnet50.prototxt", "resnet50_rfcn_final.caffemodel", ref);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(/**/, Test_Caffe_nets, dnnBackendsAndTargets());
|
||||||
|
|
||||||
}} // namespace
|
}} // namespace
|
||||||
|
@ -16,7 +16,7 @@ using namespace cv;
|
|||||||
using namespace cv::dnn;
|
using namespace cv::dnn;
|
||||||
using namespace testing;
|
using namespace testing;
|
||||||
|
|
||||||
static void test(Mat& input, Net& net, int backendId, int targetId)
|
static void test(Mat& input, Net& net, Backend backendId, Target targetId, bool skipCheck = false)
|
||||||
{
|
{
|
||||||
DNNTestLayer::checkBackend(backendId, targetId);
|
DNNTestLayer::checkBackend(backendId, targetId);
|
||||||
randu(input, -1.0f, 1.0f);
|
randu(input, -1.0f, 1.0f);
|
||||||
@ -29,16 +29,19 @@ static void test(Mat& input, Net& net, int backendId, int targetId)
|
|||||||
net.setPreferableTarget(targetId);
|
net.setPreferableTarget(targetId);
|
||||||
Mat outputHalide = net.forward().clone();
|
Mat outputHalide = net.forward().clone();
|
||||||
|
|
||||||
|
if (skipCheck)
|
||||||
|
return;
|
||||||
|
|
||||||
double l1, lInf;
|
double l1, lInf;
|
||||||
DNNTestLayer::getDefaultThresholds(backendId, targetId, &l1, &lInf);
|
DNNTestLayer::getDefaultThresholds(backendId, targetId, &l1, &lInf);
|
||||||
normAssert(outputDefault, outputHalide, "", l1, lInf);
|
normAssert(outputDefault, outputHalide, "", l1, lInf);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test(LayerParams& params, Mat& input, int backendId, int targetId)
|
static void test(LayerParams& params, Mat& input, Backend backendId, Target targetId, bool skipCheck = false)
|
||||||
{
|
{
|
||||||
Net net;
|
Net net;
|
||||||
net.addLayerToPrev(params.name, params.type, params);
|
net.addLayerToPrev(params.name, params.type, params);
|
||||||
test(input, net, backendId, targetId);
|
test(input, net, backendId, targetId, skipCheck);
|
||||||
}
|
}
|
||||||
|
|
||||||
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsWithHalide()
|
static testing::internal::ParamGenerator<tuple<Backend, Target> > dnnBackendsAndTargetsWithHalide()
|
||||||
@ -101,16 +104,17 @@ TEST_P(Convolution, Accuracy)
|
|||||||
Size pad = get<4>(GetParam());
|
Size pad = get<4>(GetParam());
|
||||||
Size dilation = get<5>(GetParam());
|
Size dilation = get<5>(GetParam());
|
||||||
bool hasBias = get<6>(GetParam());
|
bool hasBias = get<6>(GetParam());
|
||||||
int backendId = get<0>(get<7>(GetParam()));
|
Backend backendId = get<0>(get<7>(GetParam()));
|
||||||
int targetId = get<1>(get<7>(GetParam()));
|
Target targetId = get<1>(get<7>(GetParam()));
|
||||||
|
|
||||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD)
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD)
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
|
|
||||||
|
bool skipCheck = false;
|
||||||
if (cvtest::skipUnstableTests && backendId == DNN_BACKEND_OPENCV &&
|
if (cvtest::skipUnstableTests && backendId == DNN_BACKEND_OPENCV &&
|
||||||
(targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) &&
|
(targetId == DNN_TARGET_OPENCL || targetId == DNN_TARGET_OPENCL_FP16) &&
|
||||||
kernel == Size(3, 1) && stride == Size(1, 1) && pad == Size(0, 1))
|
kernel == Size(3, 1) && stride == Size(1, 1) && pad == Size(0, 1))
|
||||||
throw SkipTestException("Skip unstable test");
|
skipCheck = true;
|
||||||
|
|
||||||
int sz[] = {outChannels, inChannels / group, kernel.height, kernel.width};
|
int sz[] = {outChannels, inChannels / group, kernel.height, kernel.width};
|
||||||
Mat weights(4, &sz[0], CV_32F);
|
Mat weights(4, &sz[0], CV_32F);
|
||||||
@ -139,7 +143,9 @@ TEST_P(Convolution, Accuracy)
|
|||||||
}
|
}
|
||||||
int inpSz[] = {1, inChannels, inSize.height, inSize.width};
|
int inpSz[] = {1, inChannels, inSize.height, inSize.width};
|
||||||
Mat input(4, &inpSz[0], CV_32F);
|
Mat input(4, &inpSz[0], CV_32F);
|
||||||
test(lp, input, backendId, targetId);
|
test(lp, input, backendId, targetId, skipCheck);
|
||||||
|
if (skipCheck)
|
||||||
|
throw SkipTestException("Skip checks in unstable test");
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Convolution, Combine(
|
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Convolution, Combine(
|
||||||
@ -171,8 +177,8 @@ TEST_P(Deconvolution, Accuracy)
|
|||||||
Size stride = Size(get<5>(GetParam())[0], get<5>(GetParam())[1]);
|
Size stride = Size(get<5>(GetParam())[0], get<5>(GetParam())[1]);
|
||||||
Size adjPad = Size(get<5>(GetParam())[2], get<5>(GetParam())[3]);
|
Size adjPad = Size(get<5>(GetParam())[2], get<5>(GetParam())[3]);
|
||||||
bool hasBias = get<6>(GetParam());
|
bool hasBias = get<6>(GetParam());
|
||||||
int backendId = get<0>(get<7>(GetParam()));
|
Backend backendId = get<0>(get<7>(GetParam()));
|
||||||
int targetId = get<1>(get<7>(GetParam()));
|
Target targetId = get<1>(get<7>(GetParam()));
|
||||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_CPU &&
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_CPU &&
|
||||||
dilation.width == 2 && dilation.height == 2)
|
dilation.width == 2 && dilation.height == 2)
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
@ -235,8 +241,8 @@ TEST_P(LRN, Accuracy)
|
|||||||
float bias = get<2>(GetParam())[2];
|
float bias = get<2>(GetParam())[2];
|
||||||
bool normBySize = get<3>(GetParam());
|
bool normBySize = get<3>(GetParam());
|
||||||
std::string nrmType = get<4>(GetParam());
|
std::string nrmType = get<4>(GetParam());
|
||||||
int backendId = get<0>(get<5>(GetParam()));
|
Backend backendId = get<0>(get<5>(GetParam()));
|
||||||
int targetId = get<1>(get<5>(GetParam()));
|
Target targetId = get<1>(get<5>(GetParam()));
|
||||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
|
|
||||||
@ -276,8 +282,8 @@ TEST_P(AvePooling, Accuracy)
|
|||||||
Size outSize = get<1>(GetParam());; // Input size will be computed from parameters.
|
Size outSize = get<1>(GetParam());; // Input size will be computed from parameters.
|
||||||
Size kernel = get<2>(GetParam());
|
Size kernel = get<2>(GetParam());
|
||||||
Size stride = get<3>(GetParam());
|
Size stride = get<3>(GetParam());
|
||||||
int backendId = get<0>(get<4>(GetParam()));
|
Backend backendId = get<0>(get<4>(GetParam()));
|
||||||
int targetId = get<1>(get<4>(GetParam()));
|
Target targetId = get<1>(get<4>(GetParam()));
|
||||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD)
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE && targetId == DNN_TARGET_MYRIAD)
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
|
|
||||||
@ -317,8 +323,8 @@ TEST_P(MaxPooling, Accuracy)
|
|||||||
Size kernel = get<2>(GetParam());
|
Size kernel = get<2>(GetParam());
|
||||||
Size stride = get<3>(GetParam());
|
Size stride = get<3>(GetParam());
|
||||||
Size pad = get<4>(GetParam());
|
Size pad = get<4>(GetParam());
|
||||||
int backendId = get<0>(get<5>(GetParam()));
|
Backend backendId = get<0>(get<5>(GetParam()));
|
||||||
int targetId = get<1>(get<5>(GetParam()));
|
Target targetId = get<1>(get<5>(GetParam()));
|
||||||
|
|
||||||
LayerParams lp;
|
LayerParams lp;
|
||||||
lp.set("pool", "max");
|
lp.set("pool", "max");
|
||||||
@ -355,8 +361,8 @@ TEST_P(FullyConnected, Accuracy)
|
|||||||
Size inSize = get<1>(GetParam());
|
Size inSize = get<1>(GetParam());
|
||||||
int outChannels = get<2>(GetParam());
|
int outChannels = get<2>(GetParam());
|
||||||
bool hasBias = get<3>(GetParam());
|
bool hasBias = get<3>(GetParam());
|
||||||
int backendId = get<0>(get<4>(GetParam()));
|
Backend backendId = get<0>(get<4>(GetParam()));
|
||||||
int targetId = get<1>(get<4>(GetParam()));
|
Target targetId = get<1>(get<4>(GetParam()));
|
||||||
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
if (backendId == DNN_BACKEND_INFERENCE_ENGINE)
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
|
|
||||||
@ -394,8 +400,8 @@ typedef TestWithParam<tuple<int, tuple<Backend, Target> > > SoftMax;
|
|||||||
TEST_P(SoftMax, Accuracy)
|
TEST_P(SoftMax, Accuracy)
|
||||||
{
|
{
|
||||||
int inChannels = get<0>(GetParam());
|
int inChannels = get<0>(GetParam());
|
||||||
int backendId = get<0>(get<1>(GetParam()));
|
Backend backendId = get<0>(get<1>(GetParam()));
|
||||||
int targetId = get<1>(get<1>(GetParam()));
|
Target targetId = get<1>(get<1>(GetParam()));
|
||||||
LayerParams lp;
|
LayerParams lp;
|
||||||
lp.type = "SoftMax";
|
lp.type = "SoftMax";
|
||||||
lp.name = "testLayer";
|
lp.name = "testLayer";
|
||||||
@ -457,7 +463,7 @@ TEST_P(Test_Halide_layers, MaxPoolUnpool)
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
static const int kNumChannels = 3;
|
static const int kNumChannels = 3;
|
||||||
|
|
||||||
void testInPlaceActivation(LayerParams& lp, int backendId, int targetId)
|
void testInPlaceActivation(LayerParams& lp, Backend backendId, Target targetId)
|
||||||
{
|
{
|
||||||
EXPECT_FALSE(lp.name.empty());
|
EXPECT_FALSE(lp.name.empty());
|
||||||
|
|
||||||
@ -485,8 +491,8 @@ TEST_P(BatchNorm, Accuracy)
|
|||||||
bool hasWeights = get<0>(GetParam());
|
bool hasWeights = get<0>(GetParam());
|
||||||
bool hasBias = get<1>(GetParam());
|
bool hasBias = get<1>(GetParam());
|
||||||
float epsilon = get<2>(GetParam());
|
float epsilon = get<2>(GetParam());
|
||||||
int backendId = get<0>(get<3>(GetParam()));
|
Backend backendId = get<0>(get<3>(GetParam()));
|
||||||
int targetId = get<1>(get<3>(GetParam()));
|
Target targetId = get<1>(get<3>(GetParam()));
|
||||||
|
|
||||||
LayerParams lp;
|
LayerParams lp;
|
||||||
lp.set("has_weight", hasWeights);
|
lp.set("has_weight", hasWeights);
|
||||||
@ -518,8 +524,8 @@ typedef TestWithParam<tuple<float, tuple<Backend, Target> > > ReLU;
|
|||||||
TEST_P(ReLU, Accuracy)
|
TEST_P(ReLU, Accuracy)
|
||||||
{
|
{
|
||||||
float negativeSlope = get<0>(GetParam());
|
float negativeSlope = get<0>(GetParam());
|
||||||
int backendId = get<0>(get<1>(GetParam()));
|
Backend backendId = get<0>(get<1>(GetParam()));
|
||||||
int targetId = get<1>(get<1>(GetParam()));
|
Target targetId = get<1>(get<1>(GetParam()));
|
||||||
|
|
||||||
LayerParams lp;
|
LayerParams lp;
|
||||||
lp.set("negative_slope", negativeSlope);
|
lp.set("negative_slope", negativeSlope);
|
||||||
@ -536,8 +542,8 @@ INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, ReLU, Combine(
|
|||||||
typedef TestWithParam<tuple<std::string, tuple<Backend, Target> > > NoParamActivation;
|
typedef TestWithParam<tuple<std::string, tuple<Backend, Target> > > NoParamActivation;
|
||||||
TEST_P(NoParamActivation, Accuracy)
|
TEST_P(NoParamActivation, Accuracy)
|
||||||
{
|
{
|
||||||
int backendId = get<0>(get<1>(GetParam()));
|
Backend backendId = get<0>(get<1>(GetParam()));
|
||||||
int targetId = get<1>(get<1>(GetParam()));
|
Target targetId = get<1>(get<1>(GetParam()));
|
||||||
|
|
||||||
LayerParams lp;
|
LayerParams lp;
|
||||||
lp.type = get<0>(GetParam());
|
lp.type = get<0>(GetParam());
|
||||||
@ -555,8 +561,8 @@ TEST_P(Power, Accuracy)
|
|||||||
float power = get<0>(GetParam())[0];
|
float power = get<0>(GetParam())[0];
|
||||||
float scale = get<0>(GetParam())[1];
|
float scale = get<0>(GetParam())[1];
|
||||||
float shift = get<0>(GetParam())[2];
|
float shift = get<0>(GetParam())[2];
|
||||||
int backendId = get<0>(get<1>(GetParam()));
|
Backend backendId = get<0>(get<1>(GetParam()));
|
||||||
int targetId = get<1>(get<1>(GetParam()));
|
Target targetId = get<1>(get<1>(GetParam()));
|
||||||
|
|
||||||
LayerParams lp;
|
LayerParams lp;
|
||||||
lp.set("power", power);
|
lp.set("power", power);
|
||||||
@ -589,8 +595,8 @@ typedef TestWithParam<tuple<bool, tuple<Backend, Target> > > Scale;
|
|||||||
TEST_P(Scale, Accuracy)
|
TEST_P(Scale, Accuracy)
|
||||||
{
|
{
|
||||||
bool hasBias = get<0>(GetParam());
|
bool hasBias = get<0>(GetParam());
|
||||||
int backendId = get<0>(get<1>(GetParam()));
|
Backend backendId = get<0>(get<1>(GetParam()));
|
||||||
int targetId = get<1>(get<1>(GetParam()));
|
Target targetId = get<1>(get<1>(GetParam()));
|
||||||
|
|
||||||
LayerParams lp;
|
LayerParams lp;
|
||||||
lp.set("bias_term", hasBias);
|
lp.set("bias_term", hasBias);
|
||||||
@ -624,8 +630,8 @@ TEST_P(Concat, Accuracy)
|
|||||||
{
|
{
|
||||||
Vec3i inSize = get<0>(GetParam());
|
Vec3i inSize = get<0>(GetParam());
|
||||||
Vec3i numChannels = get<1>(GetParam());
|
Vec3i numChannels = get<1>(GetParam());
|
||||||
int backendId = get<0>(get<2>(GetParam()));
|
Backend backendId = get<0>(get<2>(GetParam()));
|
||||||
int targetId = get<1>(get<2>(GetParam()));
|
Target targetId = get<1>(get<2>(GetParam()));
|
||||||
|
|
||||||
Net net;
|
Net net;
|
||||||
|
|
||||||
@ -692,8 +698,8 @@ TEST_P(Eltwise, Accuracy)
|
|||||||
std::string op = get<1>(GetParam());
|
std::string op = get<1>(GetParam());
|
||||||
int numConv = get<2>(GetParam());
|
int numConv = get<2>(GetParam());
|
||||||
bool weighted = get<3>(GetParam());
|
bool weighted = get<3>(GetParam());
|
||||||
int backendId = get<0>(get<4>(GetParam()));
|
Backend backendId = get<0>(get<4>(GetParam()));
|
||||||
int targetId = get<1>(get<4>(GetParam()));
|
Target targetId = get<1>(get<4>(GetParam()));
|
||||||
|
|
||||||
Net net;
|
Net net;
|
||||||
|
|
||||||
|
@ -1205,14 +1205,6 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) CV_OVERRIDE
|
|
||||||
{
|
|
||||||
CV_TRACE_FUNCTION();
|
|
||||||
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
|
|
||||||
|
|
||||||
Layer::forward_fallback(inputs, outputs, internals);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int outWidth, outHeight, zoomFactor;
|
int outWidth, outHeight, zoomFactor;
|
||||||
};
|
};
|
||||||
@ -1225,7 +1217,7 @@ TEST_P(Test_Caffe_layers, DISABLED_Interp) // requires patched protobuf (availa
|
|||||||
{
|
{
|
||||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
||||||
throw SkipTestException("");
|
throw SkipTestException("");
|
||||||
// Test a cusom layer.
|
// Test a custom layer.
|
||||||
CV_DNN_REGISTER_LAYER_CLASS(Interp, CustomInterpLayer);
|
CV_DNN_REGISTER_LAYER_CLASS(Interp, CustomInterpLayer);
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
@ -230,6 +230,13 @@ TEST_P(Test_TensorFlow_layers, flatten)
|
|||||||
runTensorFlowNet("unfused_flatten_unknown_batch");
|
runTensorFlowNet("unfused_flatten_unknown_batch");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(Test_TensorFlow_layers, leaky_relu)
|
||||||
|
{
|
||||||
|
runTensorFlowNet("leaky_relu_order1");
|
||||||
|
runTensorFlowNet("leaky_relu_order2");
|
||||||
|
runTensorFlowNet("leaky_relu_order3");
|
||||||
|
}
|
||||||
|
|
||||||
TEST_P(Test_TensorFlow_layers, l2_normalize)
|
TEST_P(Test_TensorFlow_layers, l2_normalize)
|
||||||
{
|
{
|
||||||
runTensorFlowNet("l2_normalize");
|
runTensorFlowNet("l2_normalize");
|
||||||
|
@ -69,100 +69,119 @@ TEST(Torch_Importer, simple_read)
|
|||||||
ASSERT_FALSE(net.empty());
|
ASSERT_FALSE(net.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
static void runTorchNet(String prefix, int targetId = DNN_TARGET_CPU, String outLayerName = "",
|
class Test_Torch_layers : public DNNTestLayer
|
||||||
bool check2ndBlob = false, bool isBinary = false)
|
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
void runTorchNet(const String& prefix, String outLayerName = "",
|
||||||
|
bool check2ndBlob = false, bool isBinary = false,
|
||||||
|
double l1 = 0.0, double lInf = 0.0)
|
||||||
|
{
|
||||||
String suffix = (isBinary) ? ".dat" : ".txt";
|
String suffix = (isBinary) ? ".dat" : ".txt";
|
||||||
|
|
||||||
Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary);
|
|
||||||
ASSERT_FALSE(net.empty());
|
|
||||||
|
|
||||||
net.setPreferableBackend(DNN_BACKEND_OPENCV);
|
|
||||||
net.setPreferableTarget(targetId);
|
|
||||||
|
|
||||||
Mat inp, outRef;
|
Mat inp, outRef;
|
||||||
ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) );
|
ASSERT_NO_THROW( inp = readTorchBlob(_tf(prefix + "_input" + suffix), isBinary) );
|
||||||
ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) );
|
ASSERT_NO_THROW( outRef = readTorchBlob(_tf(prefix + "_output" + suffix), isBinary) );
|
||||||
|
|
||||||
|
checkBackend(backend, target, &inp, &outRef);
|
||||||
|
|
||||||
|
Net net = readNetFromTorch(_tf(prefix + "_net" + suffix), isBinary);
|
||||||
|
ASSERT_FALSE(net.empty());
|
||||||
|
|
||||||
|
net.setPreferableBackend(backend);
|
||||||
|
net.setPreferableTarget(target);
|
||||||
|
|
||||||
if (outLayerName.empty())
|
if (outLayerName.empty())
|
||||||
outLayerName = net.getLayerNames().back();
|
outLayerName = net.getLayerNames().back();
|
||||||
|
|
||||||
net.setInput(inp);
|
net.setInput(inp);
|
||||||
std::vector<Mat> outBlobs;
|
std::vector<Mat> outBlobs;
|
||||||
net.forward(outBlobs, outLayerName);
|
net.forward(outBlobs, outLayerName);
|
||||||
normAssert(outRef, outBlobs[0]);
|
l1 = l1 ? l1 : default_l1;
|
||||||
|
lInf = lInf ? lInf : default_lInf;
|
||||||
|
normAssert(outRef, outBlobs[0], "", l1, lInf);
|
||||||
|
|
||||||
if (check2ndBlob)
|
if (check2ndBlob && backend != DNN_BACKEND_INFERENCE_ENGINE)
|
||||||
{
|
{
|
||||||
Mat out2 = outBlobs[1];
|
Mat out2 = outBlobs[1];
|
||||||
Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary);
|
Mat ref2 = readTorchBlob(_tf(prefix + "_output_2" + suffix), isBinary);
|
||||||
normAssert(out2, ref2);
|
normAssert(out2, ref2, "", l1, lInf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
};
|
||||||
typedef testing::TestWithParam<Target> Test_Torch_layers;
|
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, run_convolution)
|
TEST_P(Test_Torch_layers, run_convolution)
|
||||||
{
|
{
|
||||||
runTorchNet("net_conv", GetParam(), "", false, true);
|
if ((backend == DNN_BACKEND_INFERENCE_ENGINE && target != DNN_TARGET_CPU) ||
|
||||||
|
(backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16))
|
||||||
|
throw SkipTestException("");
|
||||||
|
runTorchNet("net_conv", "", false, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, run_pool_max)
|
TEST_P(Test_Torch_layers, run_pool_max)
|
||||||
{
|
{
|
||||||
runTorchNet("net_pool_max", GetParam(), "", true);
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||||
|
throw SkipTestException("");
|
||||||
|
runTorchNet("net_pool_max", "", true);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, run_pool_ave)
|
TEST_P(Test_Torch_layers, run_pool_ave)
|
||||||
{
|
{
|
||||||
runTorchNet("net_pool_ave", GetParam());
|
runTorchNet("net_pool_ave");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, run_reshape)
|
TEST_P(Test_Torch_layers, run_reshape)
|
||||||
{
|
{
|
||||||
int targetId = GetParam();
|
runTorchNet("net_reshape");
|
||||||
runTorchNet("net_reshape", targetId);
|
runTorchNet("net_reshape_batch");
|
||||||
runTorchNet("net_reshape_batch", targetId);
|
runTorchNet("net_reshape_channels", "", false, true);
|
||||||
runTorchNet("net_reshape_single_sample", targetId);
|
}
|
||||||
runTorchNet("net_reshape_channels", targetId, "", false, true);
|
|
||||||
|
TEST_P(Test_Torch_layers, run_reshape_single_sample)
|
||||||
|
{
|
||||||
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL_FP16)
|
||||||
|
throw SkipTestException("");
|
||||||
|
runTorchNet("net_reshape_single_sample", "", false, false,
|
||||||
|
(target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.0052 : 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, run_linear)
|
TEST_P(Test_Torch_layers, run_linear)
|
||||||
{
|
{
|
||||||
runTorchNet("net_linear_2d", GetParam());
|
if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16)
|
||||||
|
throw SkipTestException("");
|
||||||
|
runTorchNet("net_linear_2d");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, run_concat)
|
TEST_P(Test_Torch_layers, run_concat)
|
||||||
{
|
{
|
||||||
int targetId = GetParam();
|
runTorchNet("net_concat", "l5_torchMerge");
|
||||||
runTorchNet("net_concat", targetId, "l5_torchMerge");
|
runTorchNet("net_depth_concat", "", false, true, 0.0,
|
||||||
runTorchNet("net_depth_concat", targetId, "", false, true);
|
target == DNN_TARGET_OPENCL_FP16 ? 0.021 : 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, run_deconv)
|
TEST_P(Test_Torch_layers, run_deconv)
|
||||||
{
|
{
|
||||||
runTorchNet("net_deconv", GetParam());
|
runTorchNet("net_deconv");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, run_batch_norm)
|
TEST_P(Test_Torch_layers, run_batch_norm)
|
||||||
{
|
{
|
||||||
runTorchNet("net_batch_norm", GetParam(), "", false, true);
|
runTorchNet("net_batch_norm", "", false, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, net_prelu)
|
TEST_P(Test_Torch_layers, net_prelu)
|
||||||
{
|
{
|
||||||
runTorchNet("net_prelu", GetParam());
|
runTorchNet("net_prelu");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, net_cadd_table)
|
TEST_P(Test_Torch_layers, net_cadd_table)
|
||||||
{
|
{
|
||||||
runTorchNet("net_cadd_table", GetParam());
|
runTorchNet("net_cadd_table");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, net_softmax)
|
TEST_P(Test_Torch_layers, net_softmax)
|
||||||
{
|
{
|
||||||
int targetId = GetParam();
|
runTorchNet("net_softmax");
|
||||||
runTorchNet("net_softmax", targetId);
|
runTorchNet("net_softmax_spatial");
|
||||||
runTorchNet("net_softmax_spatial", targetId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, net_logsoftmax)
|
TEST_P(Test_Torch_layers, net_logsoftmax)
|
||||||
@ -173,40 +192,55 @@ TEST_P(Test_Torch_layers, net_logsoftmax)
|
|||||||
|
|
||||||
TEST_P(Test_Torch_layers, net_lp_pooling)
|
TEST_P(Test_Torch_layers, net_lp_pooling)
|
||||||
{
|
{
|
||||||
int targetId = GetParam();
|
runTorchNet("net_lp_pooling_square", "", false, true);
|
||||||
runTorchNet("net_lp_pooling_square", targetId, "", false, true);
|
runTorchNet("net_lp_pooling_power", "", false, true);
|
||||||
runTorchNet("net_lp_pooling_power", targetId, "", false, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, net_conv_gemm_lrn)
|
TEST_P(Test_Torch_layers, net_conv_gemm_lrn)
|
||||||
{
|
{
|
||||||
runTorchNet("net_conv_gemm_lrn", GetParam(), "", false, true);
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD)
|
||||||
|
throw SkipTestException("");
|
||||||
|
runTorchNet("net_conv_gemm_lrn", "", false, true,
|
||||||
|
target == DNN_TARGET_OPENCL_FP16 ? 0.046 : 0.0,
|
||||||
|
target == DNN_TARGET_OPENCL_FP16 ? 0.023 : 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, net_inception_block)
|
TEST_P(Test_Torch_layers, net_inception_block)
|
||||||
{
|
{
|
||||||
runTorchNet("net_inception_block", GetParam(), "", false, true);
|
runTorchNet("net_inception_block", "", false, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, net_normalize)
|
TEST_P(Test_Torch_layers, net_normalize)
|
||||||
{
|
{
|
||||||
runTorchNet("net_normalize", GetParam(), "", false, true);
|
runTorchNet("net_normalize", "", false, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, net_padding)
|
TEST_P(Test_Torch_layers, net_padding)
|
||||||
{
|
{
|
||||||
int targetId = GetParam();
|
runTorchNet("net_padding", "", false, true);
|
||||||
runTorchNet("net_padding", targetId, "", false, true);
|
runTorchNet("net_spatial_zero_padding", "", false, true);
|
||||||
runTorchNet("net_spatial_zero_padding", targetId, "", false, true);
|
runTorchNet("net_spatial_reflection_padding", "", false, true);
|
||||||
runTorchNet("net_spatial_reflection_padding", targetId, "", false, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Test_Torch_layers, net_non_spatial)
|
TEST_P(Test_Torch_layers, net_non_spatial)
|
||||||
{
|
{
|
||||||
runTorchNet("net_non_spatial", GetParam(), "", false, true);
|
if (backend == DNN_BACKEND_INFERENCE_ENGINE &&
|
||||||
|
(target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16))
|
||||||
|
throw SkipTestException("");
|
||||||
|
runTorchNet("net_non_spatial", "", false, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, availableDnnTargets());
|
TEST_P(Test_Torch_layers, run_paralel)
|
||||||
|
{
|
||||||
|
if (backend != DNN_BACKEND_OPENCV || target != DNN_TARGET_CPU)
|
||||||
|
throw SkipTestException("");
|
||||||
|
runTorchNet("net_parallel", "l5_torchMerge");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(Test_Torch_layers, net_residual)
|
||||||
|
{
|
||||||
|
runTorchNet("net_residual", "", false, true);
|
||||||
|
}
|
||||||
|
|
||||||
typedef testing::TestWithParam<Target> Test_Torch_nets;
|
typedef testing::TestWithParam<Target> Test_Torch_nets;
|
||||||
|
|
||||||
@ -313,21 +347,6 @@ TEST_P(Test_Torch_nets, FastNeuralStyle_accuracy)
|
|||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, availableDnnTargets());
|
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_nets, availableDnnTargets());
|
||||||
|
|
||||||
// TODO: fix OpenCL and add to the rest of tests
|
|
||||||
TEST(Torch_Importer, run_paralel)
|
|
||||||
{
|
|
||||||
runTorchNet("net_parallel", DNN_TARGET_CPU, "l5_torchMerge");
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(Torch_Importer, DISABLED_run_paralel)
|
|
||||||
{
|
|
||||||
runTorchNet("net_parallel", DNN_TARGET_OPENCL, "l5_torchMerge");
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(Torch_Importer, net_residual)
|
|
||||||
{
|
|
||||||
runTorchNet("net_residual", DNN_TARGET_CPU, "", false, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test a custom layer
|
// Test a custom layer
|
||||||
// https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest
|
// https://github.com/torch/nn/blob/master/doc/convolution.md#nn.SpatialUpSamplingNearest
|
||||||
@ -374,17 +393,29 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void forward(InputArrayOfArrays, OutputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE {}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int scale;
|
int scale;
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST(Torch_Importer, upsampling_nearest)
|
TEST_P(Test_Torch_layers, upsampling_nearest)
|
||||||
{
|
{
|
||||||
|
// Test a custom layer.
|
||||||
CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer);
|
CV_DNN_REGISTER_LAYER_CLASS(SpatialUpSamplingNearest, SpatialUpSamplingNearestLayer);
|
||||||
runTorchNet("net_spatial_upsampling_nearest", DNN_TARGET_CPU, "", false, true);
|
try
|
||||||
|
{
|
||||||
|
runTorchNet("net_spatial_upsampling_nearest", "", false, true);
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
|
LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
LayerFactory::unregisterLayer("SpatialUpSamplingNearest");
|
||||||
|
|
||||||
|
// Test an implemented layer.
|
||||||
|
runTorchNet("net_spatial_upsampling_nearest", "", false, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(/**/, Test_Torch_layers, dnnBackendsAndTargets());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -307,8 +307,8 @@ icvLoadWindowPos( const char* name, CvRect& rect )
|
|||||||
{
|
{
|
||||||
HKEY hkey;
|
HKEY hkey;
|
||||||
char szKey[1024];
|
char szKey[1024];
|
||||||
strcpy( szKey, icvWindowPosRootKey );
|
strcpy_s( szKey, 1024, icvWindowPosRootKey );
|
||||||
strcat( szKey, name );
|
strcat_s( szKey, 1024, name );
|
||||||
|
|
||||||
rect.x = rect.y = CW_USEDEFAULT;
|
rect.x = rect.y = CW_USEDEFAULT;
|
||||||
rect.width = rect.height = 320;
|
rect.width = rect.height = 320;
|
||||||
@ -368,8 +368,8 @@ icvSaveWindowPos( const char* name, CvRect rect )
|
|||||||
HKEY hkey;
|
HKEY hkey;
|
||||||
char szKey[1024];
|
char szKey[1024];
|
||||||
char rootKey[1024];
|
char rootKey[1024];
|
||||||
strcpy( szKey, icvWindowPosRootKey );
|
strcpy_s( szKey, 1024, icvWindowPosRootKey );
|
||||||
strcat( szKey, name );
|
strcat_s( szKey, 1024, name );
|
||||||
|
|
||||||
if( RegOpenKeyEx( HKEY_CURRENT_USER,szKey,0,KEY_READ,&hkey) != ERROR_SUCCESS )
|
if( RegOpenKeyEx( HKEY_CURRENT_USER,szKey,0,KEY_READ,&hkey) != ERROR_SUCCESS )
|
||||||
{
|
{
|
||||||
@ -379,7 +379,7 @@ icvSaveWindowPos( const char* name, CvRect rect )
|
|||||||
char oldestKey[1024];
|
char oldestKey[1024];
|
||||||
char currentKey[1024];
|
char currentKey[1024];
|
||||||
|
|
||||||
strcpy( rootKey, icvWindowPosRootKey );
|
strcpy_s( rootKey, 1024, icvWindowPosRootKey );
|
||||||
rootKey[strlen(rootKey)-1] = '\0';
|
rootKey[strlen(rootKey)-1] = '\0';
|
||||||
if( RegCreateKeyEx(HKEY_CURRENT_USER, rootKey, 0, NULL, REG_OPTION_NON_VOLATILE, KEY_READ+KEY_WRITE, 0, &hroot, NULL) != ERROR_SUCCESS )
|
if( RegCreateKeyEx(HKEY_CURRENT_USER, rootKey, 0, NULL, REG_OPTION_NON_VOLATILE, KEY_READ+KEY_WRITE, 0, &hroot, NULL) != ERROR_SUCCESS )
|
||||||
//RegOpenKeyEx( HKEY_CURRENT_USER,rootKey,0,KEY_READ,&hroot) != ERROR_SUCCESS )
|
//RegOpenKeyEx( HKEY_CURRENT_USER,rootKey,0,KEY_READ,&hroot) != ERROR_SUCCESS )
|
||||||
@ -398,7 +398,7 @@ icvSaveWindowPos( const char* name, CvRect rect )
|
|||||||
oldestTime.dwLowDateTime > accesstime.dwLowDateTime) )
|
oldestTime.dwLowDateTime > accesstime.dwLowDateTime) )
|
||||||
{
|
{
|
||||||
oldestTime = accesstime;
|
oldestTime = accesstime;
|
||||||
strcpy( oldestKey, currentKey );
|
strcpy_s( oldestKey, 1024, currentKey );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1500,6 +1500,8 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam )
|
|||||||
rgn = CreateRectRgn(0, 0, wrc.right, wrc.bottom);
|
rgn = CreateRectRgn(0, 0, wrc.right, wrc.bottom);
|
||||||
rgn1 = CreateRectRgn(cr.left, cr.top, cr.right, cr.bottom);
|
rgn1 = CreateRectRgn(cr.left, cr.top, cr.right, cr.bottom);
|
||||||
rgn2 = CreateRectRgn(tr.left, tr.top, tr.right, tr.bottom);
|
rgn2 = CreateRectRgn(tr.left, tr.top, tr.right, tr.bottom);
|
||||||
|
CV_Assert(rgn != 0, rgn1 != 0, rgn2 != 0);
|
||||||
|
|
||||||
ret = CombineRgn(rgn, rgn, rgn1, RGN_DIFF);
|
ret = CombineRgn(rgn, rgn, rgn1, RGN_DIFF);
|
||||||
ret = CombineRgn(rgn, rgn, rgn2, RGN_DIFF);
|
ret = CombineRgn(rgn, rgn, rgn2, RGN_DIFF);
|
||||||
|
|
||||||
|
@ -1771,7 +1771,7 @@ Corners in the image can be found as the local maxima of this response map.
|
|||||||
size as src .
|
size as src .
|
||||||
@param blockSize Neighborhood size (see the details on #cornerEigenValsAndVecs ).
|
@param blockSize Neighborhood size (see the details on #cornerEigenValsAndVecs ).
|
||||||
@param ksize Aperture parameter for the Sobel operator.
|
@param ksize Aperture parameter for the Sobel operator.
|
||||||
@param k Harris detector free parameter. See the formula below.
|
@param k Harris detector free parameter. See the formula above.
|
||||||
@param borderType Pixel extrapolation method. See #BorderTypes.
|
@param borderType Pixel extrapolation method. See #BorderTypes.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS_W void cornerHarris( InputArray src, OutputArray dst, int blockSize,
|
CV_EXPORTS_W void cornerHarris( InputArray src, OutputArray dst, int blockSize,
|
||||||
|
@ -20,8 +20,12 @@ endforeach()
|
|||||||
set(opencv_hdrs "")
|
set(opencv_hdrs "")
|
||||||
set(opencv_userdef_hdrs "")
|
set(opencv_userdef_hdrs "")
|
||||||
foreach(m ${OPENCV_PYTHON_MODULES})
|
foreach(m ${OPENCV_PYTHON_MODULES})
|
||||||
ocv_list_filter(OPENCV_MODULE_${m}_HEADERS "${OPENCV_MODULE_${m}_LOCATION}/include" __hdrs)
|
foreach (hdr ${OPENCV_MODULE_${m}_HEADERS})
|
||||||
list(APPEND opencv_hdrs ${__hdrs})
|
ocv_is_subdir(is_sub "${OPENCV_MODULE_${m}_LOCATION}/include" "${hdr}")
|
||||||
|
if(is_sub)
|
||||||
|
list(APPEND opencv_hdrs "${hdr}")
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
file(GLOB userdef_hdrs ${OPENCV_MODULE_${m}_LOCATION}/misc/python/pyopencv*.hpp)
|
file(GLOB userdef_hdrs ${OPENCV_MODULE_${m}_LOCATION}/misc/python/pyopencv*.hpp)
|
||||||
list(APPEND opencv_userdef_hdrs ${userdef_hdrs})
|
list(APPEND opencv_userdef_hdrs ${userdef_hdrs})
|
||||||
endforeach(m)
|
endforeach(m)
|
||||||
|
@ -379,10 +379,9 @@ struct TSParams
|
|||||||
|
|
||||||
class TS
|
class TS
|
||||||
{
|
{
|
||||||
public:
|
|
||||||
// constructor(s) and destructor
|
|
||||||
TS();
|
TS();
|
||||||
virtual ~TS();
|
virtual ~TS();
|
||||||
|
public:
|
||||||
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
@ -484,9 +483,6 @@ public:
|
|||||||
SKIPPED=1
|
SKIPPED=1
|
||||||
};
|
};
|
||||||
|
|
||||||
// get file storage
|
|
||||||
CvFileStorage* get_file_storage();
|
|
||||||
|
|
||||||
// get RNG to generate random input data for a test
|
// get RNG to generate random input data for a test
|
||||||
RNG& get_rng() { return rng; }
|
RNG& get_rng() { return rng; }
|
||||||
|
|
||||||
@ -629,9 +625,6 @@ struct DefaultRngAuto
|
|||||||
void fillGradient(Mat& img, int delta = 5);
|
void fillGradient(Mat& img, int delta = 5);
|
||||||
void smoothBorder(Mat& img, const Scalar& color, int delta = 3);
|
void smoothBorder(Mat& img, const Scalar& color, int delta = 3);
|
||||||
|
|
||||||
void printVersionInfo(bool useStdOut = true);
|
|
||||||
|
|
||||||
|
|
||||||
// Utility functions
|
// Utility functions
|
||||||
|
|
||||||
void addDataSearchPath(const std::string& path);
|
void addDataSearchPath(const std::string& path);
|
||||||
@ -660,6 +653,13 @@ std::string findDataFile(const std::string& relative_path, bool required = true)
|
|||||||
*/
|
*/
|
||||||
std::string findDataDirectory(const std::string& relative_path, bool required = true);
|
std::string findDataDirectory(const std::string& relative_path, bool required = true);
|
||||||
|
|
||||||
|
// Test definitions
|
||||||
|
|
||||||
|
class SystemInfoCollector : public testing::EmptyTestEventListener
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
virtual void OnTestProgramStart(const testing::UnitTest&);
|
||||||
|
};
|
||||||
|
|
||||||
#ifndef __CV_TEST_EXEC_ARGS
|
#ifndef __CV_TEST_EXEC_ARGS
|
||||||
#if defined(_MSC_VER) && (_MSC_VER <= 1400)
|
#if defined(_MSC_VER) && (_MSC_VER <= 1400)
|
||||||
@ -671,15 +671,6 @@ std::string findDataDirectory(const std::string& relative_path, bool required =
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
|
||||||
namespace ocl {
|
|
||||||
void dumpOpenCLDevice();
|
|
||||||
}
|
|
||||||
#define TEST_DUMP_OCL_INFO cvtest::ocl::dumpOpenCLDevice();
|
|
||||||
#else
|
|
||||||
#define TEST_DUMP_OCL_INFO
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void parseCustomOptions(int argc, char **argv);
|
void parseCustomOptions(int argc, char **argv);
|
||||||
|
|
||||||
#define CV_TEST_INIT0_NOOP (void)0
|
#define CV_TEST_INIT0_NOOP (void)0
|
||||||
@ -696,8 +687,7 @@ int main(int argc, char **argv) \
|
|||||||
ts->init(resourcesubdir); \
|
ts->init(resourcesubdir); \
|
||||||
__CV_TEST_EXEC_ARGS(CV_TEST_INIT0_ ## INIT0) \
|
__CV_TEST_EXEC_ARGS(CV_TEST_INIT0_ ## INIT0) \
|
||||||
::testing::InitGoogleTest(&argc, argv); \
|
::testing::InitGoogleTest(&argc, argv); \
|
||||||
cvtest::printVersionInfo(); \
|
::testing::UnitTest::GetInstance()->listeners().Append(new SystemInfoCollector); \
|
||||||
TEST_DUMP_OCL_INFO \
|
|
||||||
__CV_TEST_EXEC_ARGS(__VA_ARGS__) \
|
__CV_TEST_EXEC_ARGS(__VA_ARGS__) \
|
||||||
parseCustomOptions(argc, argv); \
|
parseCustomOptions(argc, argv); \
|
||||||
} \
|
} \
|
||||||
|
@ -637,15 +637,6 @@ void PrintTo(const Size& sz, ::std::ostream* os);
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
|
||||||
namespace cvtest { namespace ocl {
|
|
||||||
void dumpOpenCLDevice();
|
|
||||||
}}
|
|
||||||
#define TEST_DUMP_OCL_INFO cvtest::ocl::dumpOpenCLDevice();
|
|
||||||
#else
|
|
||||||
#define TEST_DUMP_OCL_INFO
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#define CV_PERF_TEST_MAIN_INTERNALS(modulename, impls, ...) \
|
#define CV_PERF_TEST_MAIN_INTERNALS(modulename, impls, ...) \
|
||||||
CV_TRACE_FUNCTION(); \
|
CV_TRACE_FUNCTION(); \
|
||||||
@ -654,11 +645,10 @@ void dumpOpenCLDevice();
|
|||||||
::perf::TestBase::Init(std::vector<std::string>(impls, impls + sizeof impls / sizeof *impls), \
|
::perf::TestBase::Init(std::vector<std::string>(impls, impls + sizeof impls / sizeof *impls), \
|
||||||
argc, argv); \
|
argc, argv); \
|
||||||
::testing::InitGoogleTest(&argc, argv); \
|
::testing::InitGoogleTest(&argc, argv); \
|
||||||
cvtest::printVersionInfo(); \
|
::testing::UnitTest::GetInstance()->listeners().Append(new cvtest::SystemInfoCollector); \
|
||||||
::testing::Test::RecordProperty("cv_module_name", #modulename); \
|
::testing::Test::RecordProperty("cv_module_name", #modulename); \
|
||||||
::perf::TestBase::RecordRunParameters(); \
|
::perf::TestBase::RecordRunParameters(); \
|
||||||
__CV_TEST_EXEC_ARGS(__VA_ARGS__) \
|
__CV_TEST_EXEC_ARGS(__VA_ARGS__) \
|
||||||
TEST_DUMP_OCL_INFO \
|
|
||||||
} \
|
} \
|
||||||
return RUN_ALL_TESTS();
|
return RUN_ALL_TESTS();
|
||||||
|
|
||||||
|
@ -43,25 +43,6 @@
|
|||||||
|
|
||||||
#include "opencv2/ts/ocl_test.hpp"
|
#include "opencv2/ts/ocl_test.hpp"
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
|
||||||
|
|
||||||
#define DUMP_CONFIG_PROPERTY(propertyName, propertyValue) \
|
|
||||||
do { \
|
|
||||||
std::stringstream ssName, ssValue;\
|
|
||||||
ssName << propertyName;\
|
|
||||||
ssValue << (propertyValue); \
|
|
||||||
::testing::Test::RecordProperty(ssName.str(), ssValue.str()); \
|
|
||||||
} while (false)
|
|
||||||
|
|
||||||
#define DUMP_MESSAGE_STDOUT(msg) \
|
|
||||||
do { \
|
|
||||||
std::cout << msg << std::endl; \
|
|
||||||
} while (false)
|
|
||||||
|
|
||||||
#include <opencv2/core/opencl/opencl_info.hpp>
|
|
||||||
|
|
||||||
#endif // HAVE_OPENCL
|
|
||||||
|
|
||||||
namespace cvtest {
|
namespace cvtest {
|
||||||
namespace ocl {
|
namespace ocl {
|
||||||
|
|
||||||
@ -69,13 +50,6 @@ using namespace cv;
|
|||||||
|
|
||||||
int test_loop_times = 1; // TODO Read from command line / environment
|
int test_loop_times = 1; // TODO Read from command line / environment
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
|
||||||
void dumpOpenCLDevice()
|
|
||||||
{
|
|
||||||
cv::dumpOpenCLInformation();
|
|
||||||
}
|
|
||||||
#endif // HAVE_OPENCL
|
|
||||||
|
|
||||||
Mat TestUtils::readImage(const String &fileName, int flags)
|
Mat TestUtils::readImage(const String &fileName, int flags)
|
||||||
{
|
{
|
||||||
return cv::imread(cvtest::TS::ptr()->get_data_path() + fileName, flags);
|
return cv::imread(cvtest::TS::ptr()->get_data_path() + fileName, flags);
|
||||||
|
@ -74,7 +74,26 @@
|
|||||||
# include <sys/stat.h>
|
# include <sys/stat.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
#define DUMP_CONFIG_PROPERTY(propertyName, propertyValue) \
|
||||||
|
do { \
|
||||||
|
std::stringstream ssName, ssValue;\
|
||||||
|
ssName << propertyName;\
|
||||||
|
ssValue << (propertyValue); \
|
||||||
|
::testing::Test::RecordProperty(ssName.str(), ssValue.str()); \
|
||||||
|
} while (false)
|
||||||
|
|
||||||
|
#define DUMP_MESSAGE_STDOUT(msg) \
|
||||||
|
do { \
|
||||||
|
std::cout << msg << std::endl; \
|
||||||
|
} while (false)
|
||||||
|
|
||||||
|
#include "opencv2/core/opencl/opencl_info.hpp"
|
||||||
|
|
||||||
|
#endif // HAVE_OPENCL
|
||||||
|
|
||||||
|
#include "opencv2/core/utility.hpp"
|
||||||
#include "opencv_tests_config.hpp"
|
#include "opencv_tests_config.hpp"
|
||||||
|
|
||||||
namespace opencv_test {
|
namespace opencv_test {
|
||||||
@ -230,7 +249,6 @@ bool BaseTest::can_do_fast_forward()
|
|||||||
void BaseTest::safe_run( int start_from )
|
void BaseTest::safe_run( int start_from )
|
||||||
{
|
{
|
||||||
CV_TRACE_FUNCTION();
|
CV_TRACE_FUNCTION();
|
||||||
read_params( ts->get_file_storage() );
|
|
||||||
ts->update_context( 0, -1, true );
|
ts->update_context( 0, -1, true );
|
||||||
ts->update_context( this, -1, true );
|
ts->update_context( this, -1, true );
|
||||||
|
|
||||||
@ -552,8 +570,6 @@ void TS::set_gtest_status()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
CvFileStorage* TS::get_file_storage() { return 0; }
|
|
||||||
|
|
||||||
void TS::update_context( BaseTest* test, int test_case_idx, bool update_ts_context )
|
void TS::update_context( BaseTest* test, int test_case_idx, bool update_ts_context )
|
||||||
{
|
{
|
||||||
if( current_test_info.test != test )
|
if( current_test_info.test != test )
|
||||||
@ -614,8 +630,11 @@ void TS::printf( int streams, const char* fmt, ... )
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static TS ts;
|
TS* TS::ptr()
|
||||||
TS* TS::ptr() { return &ts; }
|
{
|
||||||
|
static TS ts;
|
||||||
|
return &ts;
|
||||||
|
}
|
||||||
|
|
||||||
void fillGradient(Mat& img, int delta)
|
void fillGradient(Mat& img, int delta)
|
||||||
{
|
{
|
||||||
@ -866,6 +885,65 @@ std::string findDataDirectory(const std::string& relative_path, bool required)
|
|||||||
return findData(relative_path, required, true);
|
return findData(relative_path, required, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline static std::string getSnippetFromConfig(const std::string & start, const std::string & end)
|
||||||
|
{
|
||||||
|
const std::string buildInfo = cv::getBuildInformation();
|
||||||
|
size_t pos1 = buildInfo.find(start);
|
||||||
|
if (pos1 != std::string::npos)
|
||||||
|
{
|
||||||
|
pos1 += start.length();
|
||||||
|
pos1 = buildInfo.find_first_not_of(" \t\n\r", pos1);
|
||||||
|
}
|
||||||
|
size_t pos2 = buildInfo.find(end, pos1);
|
||||||
|
if (pos2 != std::string::npos)
|
||||||
|
{
|
||||||
|
pos2 = buildInfo.find_last_not_of(" \t\n\r", pos2);
|
||||||
|
}
|
||||||
|
if (pos1 != std::string::npos && pos2 != std::string::npos && pos1 < pos2)
|
||||||
|
{
|
||||||
|
return buildInfo.substr(pos1, pos2 - pos1 + 1);
|
||||||
|
}
|
||||||
|
return std::string();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static void recordPropertyVerbose(const std::string & property,
|
||||||
|
const std::string & msg,
|
||||||
|
const std::string & value,
|
||||||
|
const std::string & build_value = std::string())
|
||||||
|
{
|
||||||
|
::testing::Test::RecordProperty(property, value);
|
||||||
|
std::cout << msg << ": " << (value.empty() ? std::string("N/A") : value) << std::endl;
|
||||||
|
if (!build_value.empty())
|
||||||
|
{
|
||||||
|
::testing::Test::RecordProperty(property + "_build", build_value);
|
||||||
|
if (build_value != value)
|
||||||
|
std::cout << "WARNING: build value differs from runtime: " << build_value << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _DEBUG
|
||||||
|
#define CV_TEST_BUILD_CONFIG "Debug"
|
||||||
|
#else
|
||||||
|
#define CV_TEST_BUILD_CONFIG "Release"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void SystemInfoCollector::OnTestProgramStart(const testing::UnitTest&)
|
||||||
|
{
|
||||||
|
std::cout << "CTEST_FULL_OUTPUT" << std::endl; // Tell CTest not to discard any output
|
||||||
|
recordPropertyVerbose("cv_version", "OpenCV version", cv::getVersionString(), CV_VERSION);
|
||||||
|
recordPropertyVerbose("cv_vcs_version", "OpenCV VCS version", getSnippetFromConfig("Version control:", "\n"));
|
||||||
|
recordPropertyVerbose("cv_build_type", "Build type", getSnippetFromConfig("Configuration:", "\n"), CV_TEST_BUILD_CONFIG);
|
||||||
|
recordPropertyVerbose("cv_compiler", "Compiler", getSnippetFromConfig("C++ Compiler:", "\n"));
|
||||||
|
recordPropertyVerbose("cv_parallel_framework", "Parallel framework", cv::currentParallelFramework());
|
||||||
|
recordPropertyVerbose("cv_cpu_features", "CPU features", cv::getCPUFeaturesLine());
|
||||||
|
#ifdef HAVE_IPP
|
||||||
|
recordPropertyVerbose("cv_ipp_version", "Intel(R) IPP version", cv::ipp::useIPP() ? cv::ipp::getIppVersion() : "disabled");
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
cv::dumpOpenCLInformation();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
} //namespace cvtest
|
} //namespace cvtest
|
||||||
|
|
||||||
/* End of file. */
|
/* End of file. */
|
||||||
|
@ -2973,143 +2973,6 @@ MatComparator::operator()(const char* expr1, const char* expr2,
|
|||||||
<< "- " << expr2 << ":\n" << MatPart(m2part, border > 0 ? &loc : 0) << ".\n";
|
<< "- " << expr2 << ":\n" << MatPart(m2part, border > 0 ? &loc : 0) << ".\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
void printVersionInfo(bool useStdOut)
|
|
||||||
{
|
|
||||||
// Tell CTest not to discard any output
|
|
||||||
if(useStdOut) std::cout << "CTEST_FULL_OUTPUT" << std::endl;
|
|
||||||
|
|
||||||
::testing::Test::RecordProperty("cv_version", CV_VERSION);
|
|
||||||
if(useStdOut) std::cout << "OpenCV version: " << CV_VERSION << std::endl;
|
|
||||||
|
|
||||||
std::string buildInfo( cv::getBuildInformation() );
|
|
||||||
|
|
||||||
size_t pos1 = buildInfo.find("Version control");
|
|
||||||
size_t pos2 = buildInfo.find('\n', pos1);
|
|
||||||
if(pos1 != std::string::npos && pos2 != std::string::npos)
|
|
||||||
{
|
|
||||||
size_t value_start = buildInfo.rfind(' ', pos2) + 1;
|
|
||||||
std::string ver( buildInfo.substr(value_start, pos2 - value_start) );
|
|
||||||
::testing::Test::RecordProperty("cv_vcs_version", ver);
|
|
||||||
if (useStdOut) std::cout << "OpenCV VCS version: " << ver << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
pos1 = buildInfo.find("inner version");
|
|
||||||
pos2 = buildInfo.find('\n', pos1);
|
|
||||||
if(pos1 != std::string::npos && pos2 != std::string::npos)
|
|
||||||
{
|
|
||||||
size_t value_start = buildInfo.rfind(' ', pos2) + 1;
|
|
||||||
std::string ver( buildInfo.substr(value_start, pos2 - value_start) );
|
|
||||||
::testing::Test::RecordProperty("cv_inner_vcs_version", ver);
|
|
||||||
if(useStdOut) std::cout << "Inner VCS version: " << ver << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char * build_type =
|
|
||||||
#ifdef _DEBUG
|
|
||||||
"debug";
|
|
||||||
#else
|
|
||||||
"release";
|
|
||||||
#endif
|
|
||||||
|
|
||||||
::testing::Test::RecordProperty("cv_build_type", build_type);
|
|
||||||
if (useStdOut) std::cout << "Build type: " << build_type << std::endl;
|
|
||||||
|
|
||||||
const char* parallel_framework = currentParallelFramework();
|
|
||||||
|
|
||||||
if (parallel_framework) {
|
|
||||||
::testing::Test::RecordProperty("cv_parallel_framework", parallel_framework);
|
|
||||||
if (useStdOut) std::cout << "Parallel framework: " << parallel_framework << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string cpu_features;
|
|
||||||
|
|
||||||
#if CV_POPCNT
|
|
||||||
if (checkHardwareSupport(CV_CPU_POPCNT)) cpu_features += " popcnt";
|
|
||||||
#endif
|
|
||||||
#if CV_MMX
|
|
||||||
if (checkHardwareSupport(CV_CPU_MMX)) cpu_features += " mmx";
|
|
||||||
#endif
|
|
||||||
#if CV_SSE
|
|
||||||
if (checkHardwareSupport(CV_CPU_SSE)) cpu_features += " sse";
|
|
||||||
#endif
|
|
||||||
#if CV_SSE2
|
|
||||||
if (checkHardwareSupport(CV_CPU_SSE2)) cpu_features += " sse2";
|
|
||||||
#endif
|
|
||||||
#if CV_SSE3
|
|
||||||
if (checkHardwareSupport(CV_CPU_SSE3)) cpu_features += " sse3";
|
|
||||||
#endif
|
|
||||||
#if CV_SSSE3
|
|
||||||
if (checkHardwareSupport(CV_CPU_SSSE3)) cpu_features += " ssse3";
|
|
||||||
#endif
|
|
||||||
#if CV_SSE4_1
|
|
||||||
if (checkHardwareSupport(CV_CPU_SSE4_1)) cpu_features += " sse4.1";
|
|
||||||
#endif
|
|
||||||
#if CV_SSE4_2
|
|
||||||
if (checkHardwareSupport(CV_CPU_SSE4_2)) cpu_features += " sse4.2";
|
|
||||||
#endif
|
|
||||||
#if CV_AVX
|
|
||||||
if (checkHardwareSupport(CV_CPU_AVX)) cpu_features += " avx";
|
|
||||||
#endif
|
|
||||||
#if CV_AVX2
|
|
||||||
if (checkHardwareSupport(CV_CPU_AVX2)) cpu_features += " avx2";
|
|
||||||
#endif
|
|
||||||
#if CV_FMA3
|
|
||||||
if (checkHardwareSupport(CV_CPU_FMA3)) cpu_features += " fma3";
|
|
||||||
#endif
|
|
||||||
#if CV_AVX_512F
|
|
||||||
if (checkHardwareSupport(CV_CPU_AVX_512F)) cpu_features += " avx-512f";
|
|
||||||
#endif
|
|
||||||
#if CV_AVX_512BW
|
|
||||||
if (checkHardwareSupport(CV_CPU_AVX_512BW)) cpu_features += " avx-512bw";
|
|
||||||
#endif
|
|
||||||
#if CV_AVX_512CD
|
|
||||||
if (checkHardwareSupport(CV_CPU_AVX_512CD)) cpu_features += " avx-512cd";
|
|
||||||
#endif
|
|
||||||
#if CV_AVX_512DQ
|
|
||||||
if (checkHardwareSupport(CV_CPU_AVX_512DQ)) cpu_features += " avx-512dq";
|
|
||||||
#endif
|
|
||||||
#if CV_AVX_512ER
|
|
||||||
if (checkHardwareSupport(CV_CPU_AVX_512ER)) cpu_features += " avx-512er";
|
|
||||||
#endif
|
|
||||||
#if CV_AVX_512IFMA512
|
|
||||||
if (checkHardwareSupport(CV_CPU_AVX_512IFMA512)) cpu_features += " avx-512ifma512";
|
|
||||||
#endif
|
|
||||||
#if CV_AVX_512PF
|
|
||||||
if (checkHardwareSupport(CV_CPU_AVX_512PF)) cpu_features += " avx-512pf";
|
|
||||||
#endif
|
|
||||||
#if CV_AVX_512VBMI
|
|
||||||
if (checkHardwareSupport(CV_CPU_AVX_512VBMI)) cpu_features += " avx-512vbmi";
|
|
||||||
#endif
|
|
||||||
#if CV_AVX_512VL
|
|
||||||
if (checkHardwareSupport(CV_CPU_AVX_512VL)) cpu_features += " avx-512vl";
|
|
||||||
#endif
|
|
||||||
#if CV_NEON
|
|
||||||
if (checkHardwareSupport(CV_CPU_NEON)) cpu_features += " neon";
|
|
||||||
#endif
|
|
||||||
#if CV_FP16
|
|
||||||
if (checkHardwareSupport(CV_CPU_FP16)) cpu_features += " fp16";
|
|
||||||
#endif
|
|
||||||
#if CV_VSX
|
|
||||||
if (checkHardwareSupport(CV_CPU_VSX)) cpu_features += " VSX";
|
|
||||||
#endif
|
|
||||||
|
|
||||||
cpu_features.erase(0, 1); // erase initial space
|
|
||||||
|
|
||||||
::testing::Test::RecordProperty("cv_cpu_features", cpu_features);
|
|
||||||
if (useStdOut) std::cout << "CPU features: " << cpu_features << std::endl;
|
|
||||||
|
|
||||||
#ifdef HAVE_IPP
|
|
||||||
const char * ipp_optimization = cv::ipp::useIPP()? "enabled" : "disabled";
|
|
||||||
::testing::Test::RecordProperty("cv_ipp_optimization", ipp_optimization);
|
|
||||||
if (useStdOut) std::cout << "Intel(R) IPP optimization: " << ipp_optimization << std::endl;
|
|
||||||
|
|
||||||
cv::String ippVer = cv::ipp::getIppVersion();
|
|
||||||
::testing::Test::RecordProperty("cv_ipp_version", ippVer);
|
|
||||||
if(useStdOut) std::cout << "Intel(R) IPP version: " << ippVer.c_str() << std::endl;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void threshold( const Mat& _src, Mat& _dst,
|
void threshold( const Mat& _src, Mat& _dst,
|
||||||
double thresh, double maxval, int thresh_type )
|
double thresh, double maxval, int thresh_type )
|
||||||
{
|
{
|
||||||
|
@ -905,7 +905,7 @@ public:
|
|||||||
|
|
||||||
/** @brief Writes the next video frame
|
/** @brief Writes the next video frame
|
||||||
|
|
||||||
@param image The written frame
|
@param image The written frame. In general, color images are expected in BGR format.
|
||||||
|
|
||||||
The function/method writes the specified image to video file. It must have the same size as has
|
The function/method writes the specified image to video file. It must have the same size as has
|
||||||
been specified when opening the video writer.
|
been specified when opening the video writer.
|
||||||
|
@ -811,6 +811,8 @@ void videoDevice::NukeDownstream(IBaseFilter *pBF){
|
|||||||
IEnumPins *pins = NULL;
|
IEnumPins *pins = NULL;
|
||||||
PIN_INFO pininfo;
|
PIN_INFO pininfo;
|
||||||
HRESULT hr = pBF->EnumPins(&pins);
|
HRESULT hr = pBF->EnumPins(&pins);
|
||||||
|
if (hr != S_OK || !pins)
|
||||||
|
return;
|
||||||
pins->Reset();
|
pins->Reset();
|
||||||
while (hr == NOERROR)
|
while (hr == NOERROR)
|
||||||
{
|
{
|
||||||
@ -838,7 +840,7 @@ void videoDevice::NukeDownstream(IBaseFilter *pBF){
|
|||||||
pP->Release();
|
pP->Release();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (pins) pins->Release();
|
pins->Release();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -999,17 +1001,6 @@ videoDevice::~videoDevice(){
|
|||||||
(pGraph) = 0;
|
(pGraph) = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
//delete our pointers
|
|
||||||
delete pDestFilter;
|
|
||||||
delete pVideoInputFilter;
|
|
||||||
delete pGrabberF;
|
|
||||||
delete pGrabber;
|
|
||||||
delete pControl;
|
|
||||||
delete streamConf;
|
|
||||||
delete pMediaEvent;
|
|
||||||
delete pCaptureGraph;
|
|
||||||
delete pGraph;
|
|
||||||
|
|
||||||
DebugPrintOut("SETUP: Device %i disconnected and freed\n\n",myID);
|
DebugPrintOut("SETUP: Device %i disconnected and freed\n\n",myID);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1654,7 +1645,7 @@ bool videoInput::getVideoSettingFilter(int deviceID, long Property, long &min, l
|
|||||||
IAMVideoProcAmp *pAMVideoProcAmp = NULL;
|
IAMVideoProcAmp *pAMVideoProcAmp = NULL;
|
||||||
|
|
||||||
hr = VD->pVideoInputFilter->QueryInterface(IID_IAMVideoProcAmp, (void**)&pAMVideoProcAmp);
|
hr = VD->pVideoInputFilter->QueryInterface(IID_IAMVideoProcAmp, (void**)&pAMVideoProcAmp);
|
||||||
if(FAILED(hr)){
|
if(FAILED(hr) || !pAMVideoProcAmp){
|
||||||
DebugPrintOut("setVideoSetting - QueryInterface Error\n");
|
DebugPrintOut("setVideoSetting - QueryInterface Error\n");
|
||||||
#if 0
|
#if 0
|
||||||
if(VD->pVideoInputFilter)VD->pVideoInputFilter->Release();
|
if(VD->pVideoInputFilter)VD->pVideoInputFilter->Release();
|
||||||
@ -1676,7 +1667,7 @@ bool videoInput::getVideoSettingFilter(int deviceID, long Property, long &min, l
|
|||||||
hr = pAMVideoProcAmp->Get(Property, ¤tValue, &flags);
|
hr = pAMVideoProcAmp->Get(Property, ¤tValue, &flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(pAMVideoProcAmp)pAMVideoProcAmp->Release();
|
pAMVideoProcAmp->Release();
|
||||||
#if 0
|
#if 0
|
||||||
if(VD->pVideoInputFilter)VD->pVideoInputFilter->Release();
|
if(VD->pVideoInputFilter)VD->pVideoInputFilter->Release();
|
||||||
if(VD->pVideoInputFilter)VD->pVideoInputFilter = NULL;
|
if(VD->pVideoInputFilter)VD->pVideoInputFilter = NULL;
|
||||||
@ -1881,7 +1872,7 @@ bool videoInput::getVideoSettingCamera(int deviceID, long Property, long &min, l
|
|||||||
IAMCameraControl *pIAMCameraControl = NULL;
|
IAMCameraControl *pIAMCameraControl = NULL;
|
||||||
|
|
||||||
hr = VD->pVideoInputFilter->QueryInterface(IID_IAMCameraControl, (void**)&pIAMCameraControl);
|
hr = VD->pVideoInputFilter->QueryInterface(IID_IAMCameraControl, (void**)&pIAMCameraControl);
|
||||||
if(FAILED(hr)){
|
if(FAILED(hr) || !pIAMCameraControl){
|
||||||
DebugPrintOut("setVideoSetting - QueryInterface Error\n");
|
DebugPrintOut("setVideoSetting - QueryInterface Error\n");
|
||||||
#if 0
|
#if 0
|
||||||
if(VD->pVideoInputFilter)VD->pVideoInputFilter->Release();
|
if(VD->pVideoInputFilter)VD->pVideoInputFilter->Release();
|
||||||
@ -1902,7 +1893,7 @@ bool videoInput::getVideoSettingCamera(int deviceID, long Property, long &min, l
|
|||||||
hr = pIAMCameraControl->Get(Property, ¤tValue, &flags);
|
hr = pIAMCameraControl->Get(Property, ¤tValue, &flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(pIAMCameraControl)pIAMCameraControl->Release();
|
pIAMCameraControl->Release();
|
||||||
#if 0
|
#if 0
|
||||||
if(VD->pVideoInputFilter)VD->pVideoInputFilter->Release();
|
if(VD->pVideoInputFilter)VD->pVideoInputFilter->Release();
|
||||||
if(VD->pVideoInputFilter)VD->pVideoInputFilter = NULL;
|
if(VD->pVideoInputFilter)VD->pVideoInputFilter = NULL;
|
||||||
@ -2595,7 +2586,7 @@ int videoInput::start(int deviceID, videoDevice *VD){
|
|||||||
|
|
||||||
//we do this because webcams don't have a preview mode
|
//we do this because webcams don't have a preview mode
|
||||||
hr = VD->pCaptureGraph->FindInterface(&CAPTURE_MODE, &MEDIATYPE_Video, VD->pVideoInputFilter, IID_IAMStreamConfig, (void **)&VD->streamConf);
|
hr = VD->pCaptureGraph->FindInterface(&CAPTURE_MODE, &MEDIATYPE_Video, VD->pVideoInputFilter, IID_IAMStreamConfig, (void **)&VD->streamConf);
|
||||||
if(FAILED(hr)){
|
if(FAILED(hr) || !VD->streamConf){
|
||||||
DebugPrintOut("ERROR: Couldn't config the stream!\n");
|
DebugPrintOut("ERROR: Couldn't config the stream!\n");
|
||||||
stopDevice(deviceID);
|
stopDevice(deviceID);
|
||||||
return hr;
|
return hr;
|
||||||
@ -2737,14 +2728,8 @@ int videoInput::start(int deviceID, videoDevice *VD){
|
|||||||
|
|
||||||
//lets try freeing our stream conf here too
|
//lets try freeing our stream conf here too
|
||||||
//this will fail if the device is already running
|
//this will fail if the device is already running
|
||||||
if(VD->streamConf){
|
|
||||||
VD->streamConf->Release();
|
VD->streamConf->Release();
|
||||||
VD->streamConf = NULL;
|
VD->streamConf = NULL;
|
||||||
}else{
|
|
||||||
DebugPrintOut("ERROR: connecting device - prehaps it is already being used?\n");
|
|
||||||
stopDevice(deviceID);
|
|
||||||
return S_FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//NULL RENDERER//
|
//NULL RENDERER//
|
||||||
@ -3093,7 +3078,7 @@ HRESULT videoInput::routeCrossbar(ICaptureGraphBuilder2 **ppBuild, IBaseFilter *
|
|||||||
IAMCrossbar *pXBar1 = NULL;
|
IAMCrossbar *pXBar1 = NULL;
|
||||||
HRESULT hr = pBuild->FindInterface(&LOOK_UPSTREAM_ONLY, NULL, pVidFilter,
|
HRESULT hr = pBuild->FindInterface(&LOOK_UPSTREAM_ONLY, NULL, pVidFilter,
|
||||||
IID_IAMCrossbar, (void**)&pXBar1);
|
IID_IAMCrossbar, (void**)&pXBar1);
|
||||||
if (SUCCEEDED(hr))
|
if (SUCCEEDED(hr) && pXBar1)
|
||||||
{
|
{
|
||||||
|
|
||||||
bool foundDevice = false;
|
bool foundDevice = false;
|
||||||
@ -3163,10 +3148,6 @@ HRESULT videoInput::routeCrossbar(ICaptureGraphBuilder2 **ppBuild, IBaseFilter *
|
|||||||
//we were getting a crash otherwise
|
//we were getting a crash otherwise
|
||||||
//if(Crossbar)Crossbar->Release();
|
//if(Crossbar)Crossbar->Release();
|
||||||
//if(Crossbar)Crossbar = NULL;
|
//if(Crossbar)Crossbar = NULL;
|
||||||
|
|
||||||
if(pXBar1)pXBar1->Release();
|
|
||||||
if(pXBar1)pXBar1 = NULL;
|
|
||||||
|
|
||||||
}else{
|
}else{
|
||||||
DebugPrintOut("SETUP: You are a webcam or snazzy firewire cam! No Crossbar needed\n");
|
DebugPrintOut("SETUP: You are a webcam or snazzy firewire cam! No Crossbar needed\n");
|
||||||
return hr;
|
return hr;
|
||||||
|
@ -1224,7 +1224,11 @@ Ptr<IVideoCapture> cv::createGStreamerCapture(int index)
|
|||||||
class CvVideoWriter_GStreamer : public CvVideoWriter
|
class CvVideoWriter_GStreamer : public CvVideoWriter
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
CvVideoWriter_GStreamer() { init(); }
|
CvVideoWriter_GStreamer()
|
||||||
|
: pipeline(0), source(0), encodebin(0), file(0), buffer(0), input_pix_fmt(0),
|
||||||
|
num_frames(0), framerate(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
virtual ~CvVideoWriter_GStreamer() CV_OVERRIDE { close(); }
|
virtual ~CvVideoWriter_GStreamer() CV_OVERRIDE { close(); }
|
||||||
|
|
||||||
virtual bool open( const char* filename, int fourcc,
|
virtual bool open( const char* filename, int fourcc,
|
||||||
@ -1232,7 +1236,6 @@ public:
|
|||||||
virtual void close();
|
virtual void close();
|
||||||
virtual bool writeFrame( const IplImage* image ) CV_OVERRIDE;
|
virtual bool writeFrame( const IplImage* image ) CV_OVERRIDE;
|
||||||
protected:
|
protected:
|
||||||
void init();
|
|
||||||
const char* filenameToMimetype(const char* filename);
|
const char* filenameToMimetype(const char* filename);
|
||||||
GstElement* pipeline;
|
GstElement* pipeline;
|
||||||
GstElement* source;
|
GstElement* source;
|
||||||
@ -1245,22 +1248,6 @@ protected:
|
|||||||
double framerate;
|
double framerate;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*!
|
|
||||||
* \brief CvVideoWriter_GStreamer::init
|
|
||||||
* initialise all variables
|
|
||||||
*/
|
|
||||||
void CvVideoWriter_GStreamer::init()
|
|
||||||
{
|
|
||||||
pipeline = NULL;
|
|
||||||
source = NULL;
|
|
||||||
encodebin = NULL;
|
|
||||||
file = NULL;
|
|
||||||
buffer = NULL;
|
|
||||||
|
|
||||||
num_frames = 0;
|
|
||||||
framerate = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief CvVideoWriter_GStreamer::close
|
* \brief CvVideoWriter_GStreamer::close
|
||||||
* ends the pipeline by sending EOS and destroys the pipeline and all
|
* ends the pipeline by sending EOS and destroys the pipeline and all
|
||||||
@ -1282,17 +1269,19 @@ void CvVideoWriter_GStreamer::close()
|
|||||||
//wait for EOS to trickle down the pipeline. This will let all elements finish properly
|
//wait for EOS to trickle down the pipeline. This will let all elements finish properly
|
||||||
GstBus* bus = gst_element_get_bus(pipeline);
|
GstBus* bus = gst_element_get_bus(pipeline);
|
||||||
GstMessage *msg = gst_bus_timed_pop_filtered(bus, GST_CLOCK_TIME_NONE, (GstMessageType)(GST_MESSAGE_ERROR | GST_MESSAGE_EOS));
|
GstMessage *msg = gst_bus_timed_pop_filtered(bus, GST_CLOCK_TIME_NONE, (GstMessageType)(GST_MESSAGE_ERROR | GST_MESSAGE_EOS));
|
||||||
if (GST_MESSAGE_TYPE(msg) == GST_MESSAGE_ERROR)
|
if (!msg || GST_MESSAGE_TYPE(msg) == GST_MESSAGE_ERROR)
|
||||||
{
|
{
|
||||||
CV_WARN("Error during VideoWriter finalization\n");
|
CV_WARN("Error during VideoWriter finalization\n");
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(msg != NULL)
|
if(msg != NULL)
|
||||||
{
|
{
|
||||||
gst_message_unref(msg);
|
gst_message_unref(msg);
|
||||||
g_object_unref(G_OBJECT(bus));
|
g_object_unref(G_OBJECT(bus));
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
gst_message_unref(msg);
|
||||||
|
g_object_unref(G_OBJECT(bus));
|
||||||
|
|
||||||
status = gst_element_set_state (pipeline, GST_STATE_NULL);
|
status = gst_element_set_state (pipeline, GST_STATE_NULL);
|
||||||
if (status == GST_STATE_CHANGE_ASYNC)
|
if (status == GST_STATE_CHANGE_ASYNC)
|
||||||
|
@ -91,7 +91,7 @@ static bool pMFCreateDXGIDeviceManager_initialized = false;
|
|||||||
static FN_MFCreateDXGIDeviceManager pMFCreateDXGIDeviceManager = NULL;
|
static FN_MFCreateDXGIDeviceManager pMFCreateDXGIDeviceManager = NULL;
|
||||||
static void init_MFCreateDXGIDeviceManager()
|
static void init_MFCreateDXGIDeviceManager()
|
||||||
{
|
{
|
||||||
HMODULE h = LoadLibraryA("mfplat.dll");
|
HMODULE h = LoadLibraryExA("mfplat.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
|
||||||
if (h)
|
if (h)
|
||||||
{
|
{
|
||||||
pMFCreateDXGIDeviceManager = (FN_MFCreateDXGIDeviceManager)GetProcAddress(h, "MFCreateDXGIDeviceManager");
|
pMFCreateDXGIDeviceManager = (FN_MFCreateDXGIDeviceManager)GetProcAddress(h, "MFCreateDXGIDeviceManager");
|
||||||
@ -1720,7 +1720,7 @@ bool CvCapture_MSMF::setProperty( int property_id, double value )
|
|||||||
return setTime(duration * value, true);
|
return setTime(duration * value, true);
|
||||||
break;
|
break;
|
||||||
case CV_CAP_PROP_POS_FRAMES:
|
case CV_CAP_PROP_POS_FRAMES:
|
||||||
if (getFramerate(nativeFormat) != 0)
|
if (std::fabs(getFramerate(nativeFormat)) > 0)
|
||||||
return setTime(value * 1e7 / getFramerate(nativeFormat), false);
|
return setTime(value * 1e7 / getFramerate(nativeFormat), false);
|
||||||
break;
|
break;
|
||||||
case CV_CAP_PROP_POS_MSEC:
|
case CV_CAP_PROP_POS_MSEC:
|
||||||
@ -1978,7 +1978,17 @@ private:
|
|||||||
|
|
||||||
CvVideoWriter_MSMF::CvVideoWriter_MSMF():
|
CvVideoWriter_MSMF::CvVideoWriter_MSMF():
|
||||||
MF(Media_Foundation::getInstance()),
|
MF(Media_Foundation::getInstance()),
|
||||||
initiated(false)
|
videoWidth(0),
|
||||||
|
videoHeight(0),
|
||||||
|
fps(0),
|
||||||
|
bitRate(0),
|
||||||
|
frameSize(0),
|
||||||
|
encodingFormat(),
|
||||||
|
inputFormat(),
|
||||||
|
streamIndex(0),
|
||||||
|
initiated(false),
|
||||||
|
rtStart(0),
|
||||||
|
rtDuration(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -377,8 +377,8 @@ LRESULT PASCAL CvCaptureCAM_VFW::frameCallback( HWND hWnd, VIDEOHDR* hdr )
|
|||||||
if (!hWnd) return FALSE;
|
if (!hWnd) return FALSE;
|
||||||
|
|
||||||
capture = (CvCaptureCAM_VFW*)capGetUserData(hWnd);
|
capture = (CvCaptureCAM_VFW*)capGetUserData(hWnd);
|
||||||
|
if (!capture) return (LRESULT)FALSE;
|
||||||
capture->hdr = hdr;
|
capture->hdr = hdr;
|
||||||
|
|
||||||
return (LRESULT)TRUE;
|
return (LRESULT)TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,26 +12,18 @@
|
|||||||
#include "opencv2/imgproc.hpp"
|
#include "opencv2/imgproc.hpp"
|
||||||
#include "opencv2/imgcodecs.hpp"
|
#include "opencv2/imgcodecs.hpp"
|
||||||
#include "opencv2/highgui.hpp"
|
#include "opencv2/highgui.hpp"
|
||||||
#include "opencv2/core.hpp"
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
|
||||||
Mat img0, img1, res1, final;
|
Mat src, img1, mask, final;
|
||||||
|
|
||||||
Point point;
|
Point point;
|
||||||
|
vector<Point> pts;
|
||||||
int drag = 0;
|
int drag = 0;
|
||||||
|
|
||||||
int numpts = 100;
|
|
||||||
Point* pts = new Point[100];
|
|
||||||
|
|
||||||
int var = 0;
|
int var = 0;
|
||||||
int flag = 0;
|
int flag = 0;
|
||||||
int flag1 = 0;
|
|
||||||
|
|
||||||
int minx,miny,maxx,maxy,lenx,leny;
|
|
||||||
|
|
||||||
void mouseHandler(int, int, int, int, void*);
|
void mouseHandler(int, int, int, int, void*);
|
||||||
|
|
||||||
@ -40,16 +32,17 @@ void mouseHandler(int event, int x, int y, int, void*)
|
|||||||
|
|
||||||
if (event == EVENT_LBUTTONDOWN && !drag)
|
if (event == EVENT_LBUTTONDOWN && !drag)
|
||||||
{
|
{
|
||||||
if(flag1 == 0)
|
if (flag == 0)
|
||||||
{
|
{
|
||||||
if(var==0)
|
if (var == 0)
|
||||||
img1 = img0.clone();
|
img1 = src.clone();
|
||||||
point = Point(x, y);
|
point = Point(x, y);
|
||||||
circle(img1,point,2,Scalar(0, 0, 255),-1, 8, 0);
|
circle(img1, point, 2, Scalar(0, 0, 255), -1, 8, 0);
|
||||||
pts[var] = point;
|
pts.push_back(point);
|
||||||
var++;
|
var++;
|
||||||
drag = 1;
|
drag = 1;
|
||||||
if(var>1)
|
|
||||||
|
if (var > 1)
|
||||||
line(img1,pts[var-2], point, Scalar(0, 0, 255), 2, 8, 0);
|
line(img1,pts[var-2], point, Scalar(0, 0, 255), 2, 8, 0);
|
||||||
|
|
||||||
imshow("Source", img1);
|
imshow("Source", img1);
|
||||||
@ -59,103 +52,68 @@ void mouseHandler(int event, int x, int y, int, void*)
|
|||||||
if (event == EVENT_LBUTTONUP && drag)
|
if (event == EVENT_LBUTTONUP && drag)
|
||||||
{
|
{
|
||||||
imshow("Source", img1);
|
imshow("Source", img1);
|
||||||
|
|
||||||
drag = 0;
|
drag = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (event == EVENT_RBUTTONDOWN)
|
if (event == EVENT_RBUTTONDOWN)
|
||||||
{
|
{
|
||||||
flag1 = 1;
|
flag = 1;
|
||||||
img1 = img0.clone();
|
img1 = src.clone();
|
||||||
for(int i = var; i < numpts ; i++)
|
|
||||||
pts[i] = point;
|
|
||||||
|
|
||||||
if(var!=0)
|
if (var != 0)
|
||||||
{
|
{
|
||||||
const Point* pts3[1] = {&pts[0]};
|
polylines( img1, pts, 1, Scalar(0,0,0), 2, 8, 0);
|
||||||
polylines( img1, pts3, &numpts,1, 1, Scalar(0,0,0), 2, 8, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int i=0;i<var;i++)
|
|
||||||
{
|
|
||||||
minx = min(minx,pts[i].x);
|
|
||||||
maxx = max(maxx,pts[i].x);
|
|
||||||
miny = min(miny,pts[i].y);
|
|
||||||
maxy = max(maxy,pts[i].y);
|
|
||||||
}
|
|
||||||
lenx = maxx - minx;
|
|
||||||
leny = maxy - miny;
|
|
||||||
|
|
||||||
imshow("Source", img1);
|
imshow("Source", img1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (event == EVENT_RBUTTONUP)
|
if (event == EVENT_RBUTTONUP)
|
||||||
{
|
{
|
||||||
flag = var;
|
flag = var;
|
||||||
|
final = Mat::zeros(src.size(), CV_8UC3);
|
||||||
|
mask = Mat::zeros(src.size(), CV_8UC1);
|
||||||
|
|
||||||
final = Mat::zeros(img0.size(),CV_8UC3);
|
vector<vector<Point> > vpts;
|
||||||
res1 = Mat::zeros(img0.size(),CV_8UC1);
|
vpts.push_back(pts);
|
||||||
const Point* pts4[1] = {&pts[0]};
|
fillPoly(mask, vpts, Scalar(255, 255, 255), 8, 0);
|
||||||
|
bitwise_and(src, src, final, mask);
|
||||||
fillPoly(res1, pts4,&numpts, 1, Scalar(255, 255, 255), 8, 0);
|
imshow("Mask", mask);
|
||||||
bitwise_and(img0, img0, final,res1);
|
imshow("Result", final);
|
||||||
imshow("mask",res1);
|
|
||||||
imwrite("mask.png",res1);
|
|
||||||
|
|
||||||
imshow("Source", img1);
|
imshow("Source", img1);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (event == EVENT_MBUTTONDOWN)
|
if (event == EVENT_MBUTTONDOWN)
|
||||||
{
|
{
|
||||||
for(int i = 0; i < numpts ; i++)
|
pts.clear();
|
||||||
{
|
|
||||||
pts[i].x=0;
|
|
||||||
pts[i].y=0;
|
|
||||||
}
|
|
||||||
var = 0;
|
var = 0;
|
||||||
flag1 = 0;
|
|
||||||
minx = INT_MAX; miny = INT_MAX; maxx = INT_MIN; maxy = INT_MIN;
|
|
||||||
imshow("Source", img0);
|
|
||||||
drag = 0;
|
drag = 0;
|
||||||
|
flag = 0;
|
||||||
|
imshow("Source", src);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void help()
|
|
||||||
{
|
|
||||||
cout << "\nThis program demonstrates using mouse events"
|
|
||||||
"\nCall:\n"
|
|
||||||
"./create_mask <image_name>\n"
|
|
||||||
"\n"
|
|
||||||
"\tleft mouse button - set a point to create mask shape"
|
|
||||||
"\n"
|
|
||||||
"\tright mouse button - create mask from points\n"
|
|
||||||
"\tmiddle mouse button - reset\n" << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
cv::CommandLineParser parser(argc, argv, "{@input | ../data/lena.jpg | input image}");
|
CommandLineParser parser(argc, argv, "{@input | ../data/lena.jpg | input image}");
|
||||||
help();
|
parser.about("This program demonstrates using mouse events\n");
|
||||||
string input_image = parser.get<string>("@input");
|
|
||||||
if (input_image.empty())
|
|
||||||
{
|
|
||||||
parser.printMessage();
|
parser.printMessage();
|
||||||
parser.printErrors();
|
cout << "\n\tleft mouse button - set a point to create mask shape\n"
|
||||||
|
"\tright mouse button - create mask from points\n"
|
||||||
|
"\tmiddle mouse button - reset\n";
|
||||||
|
String input_image = parser.get<String>("@input");
|
||||||
|
|
||||||
|
src = imread(input_image);
|
||||||
|
|
||||||
|
if (src.empty())
|
||||||
|
{
|
||||||
|
printf("Error opening image: %s\n", input_image.c_str());
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat src = imread(input_image);
|
namedWindow("Source", WINDOW_AUTOSIZE);
|
||||||
|
|
||||||
minx = INT_MAX; miny = INT_MAX; maxx = INT_MIN; maxy = INT_MIN;
|
|
||||||
|
|
||||||
img0 = src;
|
|
||||||
|
|
||||||
res1 = Mat::zeros(img0.size(),CV_8UC1);
|
|
||||||
final = Mat::zeros(img0.size(),CV_8UC3);
|
|
||||||
//////////// source image ///////////////////
|
|
||||||
|
|
||||||
namedWindow("Source", 1);
|
|
||||||
setMouseCallback("Source", mouseHandler, NULL);
|
setMouseCallback("Source", mouseHandler, NULL);
|
||||||
imshow("Source", img0);
|
imshow("Source", src);
|
||||||
waitKey(0);
|
waitKey(0);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -0,0 +1,149 @@
|
|||||||
|
/**
|
||||||
|
* @brief You will learn how to recover an out-of-focus image by Wiener filter
|
||||||
|
* @author Karpushin Vladislav, karpushin@ngs.ru, https://github.com/VladKarpushin
|
||||||
|
*/
|
||||||
|
#include <iostream>
|
||||||
|
#include "opencv2/imgproc.hpp"
|
||||||
|
#include "opencv2/imgcodecs.hpp"
|
||||||
|
|
||||||
|
using namespace cv;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
void help();
|
||||||
|
void calcPSF(Mat& outputImg, Size filterSize, int R);
|
||||||
|
void fftshift(const Mat& inputImg, Mat& outputImg);
|
||||||
|
void filter2DFreq(const Mat& inputImg, Mat& outputImg, const Mat& H);
|
||||||
|
void calcWnrFilter(const Mat& input_h_PSF, Mat& output_G, double nsr);
|
||||||
|
|
||||||
|
const String keys =
|
||||||
|
"{help h usage ? | | print this message }"
|
||||||
|
"{image |original.JPG | input image name }"
|
||||||
|
"{R |53 | radius }"
|
||||||
|
"{SNR |5200 | signal to noise ratio}"
|
||||||
|
;
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
help();
|
||||||
|
CommandLineParser parser(argc, argv, keys);
|
||||||
|
if (parser.has("help"))
|
||||||
|
{
|
||||||
|
parser.printMessage();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int R = parser.get<int>("R");
|
||||||
|
int snr = parser.get<int>("SNR");
|
||||||
|
string strInFileName = parser.get<String>("image");
|
||||||
|
|
||||||
|
if (!parser.check())
|
||||||
|
{
|
||||||
|
parser.printErrors();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Mat imgIn;
|
||||||
|
imgIn = imread(strInFileName, IMREAD_GRAYSCALE);
|
||||||
|
if (imgIn.empty()) //check whether the image is loaded or not
|
||||||
|
{
|
||||||
|
cout << "ERROR : Image cannot be loaded..!!" << endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Mat imgOut;
|
||||||
|
|
||||||
|
//! [main]
|
||||||
|
// it needs to process even image only
|
||||||
|
Rect roi = Rect(0, 0, imgIn.cols & -2, imgIn.rows & -2);
|
||||||
|
|
||||||
|
//Hw calculation (start)
|
||||||
|
Mat Hw, h;
|
||||||
|
calcPSF(h, roi.size(), R);
|
||||||
|
calcWnrFilter(h, Hw, 1.0 / double(snr));
|
||||||
|
//Hw calculation (stop)
|
||||||
|
|
||||||
|
// filtering (start)
|
||||||
|
filter2DFreq(imgIn(roi), imgOut, Hw);
|
||||||
|
// filtering (stop)
|
||||||
|
//! [main]
|
||||||
|
|
||||||
|
imgOut.convertTo(imgOut, CV_8U);
|
||||||
|
normalize(imgOut, imgOut, 0, 255, NORM_MINMAX);
|
||||||
|
imwrite("result.jpg", imgOut);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void help()
|
||||||
|
{
|
||||||
|
cout << "2018-07-12" << endl;
|
||||||
|
cout << "DeBlur_v8" << endl;
|
||||||
|
cout << "You will learn how to recover an out-of-focus image by Wiener filter" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
//! [calcPSF]
|
||||||
|
void calcPSF(Mat& outputImg, Size filterSize, int R)
|
||||||
|
{
|
||||||
|
Mat h(filterSize, CV_32F, Scalar(0));
|
||||||
|
Point point(filterSize.width / 2, filterSize.height / 2);
|
||||||
|
circle(h, point, R, 255, -1, 8);
|
||||||
|
Scalar summa = sum(h);
|
||||||
|
outputImg = h / summa[0];
|
||||||
|
}
|
||||||
|
//! [calcPSF]
|
||||||
|
|
||||||
|
//! [fftshift]
|
||||||
|
void fftshift(const Mat& inputImg, Mat& outputImg)
|
||||||
|
{
|
||||||
|
outputImg = inputImg.clone();
|
||||||
|
int cx = outputImg.cols / 2;
|
||||||
|
int cy = outputImg.rows / 2;
|
||||||
|
Mat q0(outputImg, Rect(0, 0, cx, cy));
|
||||||
|
Mat q1(outputImg, Rect(cx, 0, cx, cy));
|
||||||
|
Mat q2(outputImg, Rect(0, cy, cx, cy));
|
||||||
|
Mat q3(outputImg, Rect(cx, cy, cx, cy));
|
||||||
|
Mat tmp;
|
||||||
|
q0.copyTo(tmp);
|
||||||
|
q3.copyTo(q0);
|
||||||
|
tmp.copyTo(q3);
|
||||||
|
q1.copyTo(tmp);
|
||||||
|
q2.copyTo(q1);
|
||||||
|
tmp.copyTo(q2);
|
||||||
|
}
|
||||||
|
//! [fftshift]
|
||||||
|
|
||||||
|
//! [filter2DFreq]
|
||||||
|
void filter2DFreq(const Mat& inputImg, Mat& outputImg, const Mat& H)
|
||||||
|
{
|
||||||
|
Mat planes[2] = { Mat_<float>(inputImg.clone()), Mat::zeros(inputImg.size(), CV_32F) };
|
||||||
|
Mat complexI;
|
||||||
|
merge(planes, 2, complexI);
|
||||||
|
dft(complexI, complexI, DFT_SCALE);
|
||||||
|
|
||||||
|
Mat planesH[2] = { Mat_<float>(H.clone()), Mat::zeros(H.size(), CV_32F) };
|
||||||
|
Mat complexH;
|
||||||
|
merge(planesH, 2, complexH);
|
||||||
|
Mat complexIH;
|
||||||
|
mulSpectrums(complexI, complexH, complexIH, 0);
|
||||||
|
|
||||||
|
idft(complexIH, complexIH);
|
||||||
|
split(complexIH, planes);
|
||||||
|
outputImg = planes[0];
|
||||||
|
}
|
||||||
|
//! [filter2DFreq]
|
||||||
|
|
||||||
|
//! [calcWnrFilter]
|
||||||
|
void calcWnrFilter(const Mat& input_h_PSF, Mat& output_G, double nsr)
|
||||||
|
{
|
||||||
|
Mat h_PSF_shifted;
|
||||||
|
fftshift(input_h_PSF, h_PSF_shifted);
|
||||||
|
Mat planes[2] = { Mat_<float>(h_PSF_shifted.clone()), Mat::zeros(h_PSF_shifted.size(), CV_32F) };
|
||||||
|
Mat complexI;
|
||||||
|
merge(planes, 2, complexI);
|
||||||
|
dft(complexI, complexI);
|
||||||
|
split(complexI, planes);
|
||||||
|
Mat denom;
|
||||||
|
pow(abs(planes[0]), 2, denom);
|
||||||
|
denom += nsr;
|
||||||
|
divide(planes[0], denom, output_G);
|
||||||
|
}
|
||||||
|
//! [calcWnrFilter]
|
@ -190,7 +190,7 @@ while cv.waitKey(1) < 0:
|
|||||||
net.setInput(blob)
|
net.setInput(blob)
|
||||||
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN
|
if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN
|
||||||
frame = cv.resize(frame, (inpWidth, inpHeight))
|
frame = cv.resize(frame, (inpWidth, inpHeight))
|
||||||
net.setInput(np.array([inpHeight, inpWidth, 1.6], dtype=np.float32), 'im_info')
|
net.setInput(np.array([[inpHeight, inpWidth, 1.6]], dtype=np.float32), 'im_info')
|
||||||
outs = net.forward(getOutputsNames(net))
|
outs = net.forward(getOutputsNames(net))
|
||||||
|
|
||||||
postprocess(frame, outs)
|
postprocess(frame, outs)
|
||||||
|
Loading…
Reference in New Issue
Block a user