mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 22:44:02 +08:00
Merge pull request #8441 from alalek:dispatch_mathfuncs_core
This commit is contained in:
commit
297ba85323
@ -275,6 +275,11 @@ set(CPU_BASELINE_FLAGS "")
|
||||
set(CPU_BASELINE_FINAL "")
|
||||
set(CPU_DISPATCH_FINAL "")
|
||||
|
||||
if(CV_DISABLE_OPTIMIZATION)
|
||||
set(CPU_DISPATCH "")
|
||||
set(CPU_DISPATCH_REQUIRE "")
|
||||
endif()
|
||||
|
||||
macro(ocv_check_compiler_optimization OPT)
|
||||
if(NOT DEFINED CPU_${OPT}_SUPPORTED)
|
||||
if((DEFINED CPU_${OPT}_FLAGS_ON AND NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") OR CPU_${OPT}_TEST_FILE)
|
||||
@ -319,7 +324,7 @@ macro(ocv_check_compiler_optimization OPT)
|
||||
endmacro()
|
||||
|
||||
foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
|
||||
set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "" FORCE)
|
||||
set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "")
|
||||
if(NOT DEFINED CPU_${OPT}_FORCE)
|
||||
set(CPU_${OPT}_FORCE "${CPU_${OPT}_IMPLIES}")
|
||||
endif()
|
||||
@ -515,15 +520,27 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T
|
||||
endforeach()
|
||||
foreach(fname ${${SOURCES_VAR_NAME}})
|
||||
string(TOLOWER "${fname}" fname_LOWER)
|
||||
if(fname_LOWER MATCHES "[.]opt_.*[.]cpp$")
|
||||
if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS)
|
||||
message(STATUS "Excluding from source files list: ${fname}")
|
||||
if(fname_LOWER MATCHES "\\.(.*)\\.cpp$")
|
||||
string(TOUPPER "${CMAKE_MATCH_1}" OPT_)
|
||||
if(OPT_ MATCHES "(CUDA.*|DISPATCH.*|OCL)") # don't touch files like filename.cuda.cpp
|
||||
list(APPEND __result "${fname}")
|
||||
#continue()
|
||||
elseif(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS)
|
||||
message(STATUS "Excluding from source files list (optimization is disabled): ${fname}")
|
||||
#continue()
|
||||
else()
|
||||
get_source_file_property(__definitions "${fname}" COMPILE_DEFINITIONS)
|
||||
if(__definitions)
|
||||
list(APPEND __definitions "CV_CPU_DISPATCH_MODE=${OPT_}")
|
||||
else()
|
||||
set(__definitions "CV_CPU_DISPATCH_MODE=${OPT_}")
|
||||
endif()
|
||||
set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${__definitions}")
|
||||
|
||||
set(__opt_found 0)
|
||||
foreach(OPT ${CPU_BASELINE_FINAL})
|
||||
string(TOLOWER "${OPT}" OPT_LOWER)
|
||||
if(fname_LOWER MATCHES "_${OPT_LOWER}[.]cpp$")
|
||||
if(fname_LOWER MATCHES "\\.${OPT_LOWER}\\.cpp$")
|
||||
#message("${fname} BASELINE-${OPT}")
|
||||
set(__opt_found 1)
|
||||
list(APPEND __result "${fname}")
|
||||
@ -533,11 +550,11 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T
|
||||
foreach(OPT ${CPU_DISPATCH_FINAL})
|
||||
foreach(OPT2 ${CPU_DISPATCH_${OPT}_FORCED})
|
||||
string(TOLOWER "${OPT2}" OPT2_LOWER)
|
||||
if(fname_LOWER MATCHES "_${OPT2_LOWER}[.]cpp$")
|
||||
if(fname_LOWER MATCHES "\\.${OPT2_LOWER}\\.cpp$")
|
||||
list(APPEND __result_${OPT} "${fname}")
|
||||
math(EXPR CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}+1")
|
||||
set(CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}" CACHE INTERNAL "" FORCE)
|
||||
#message("${fname} ${OPT}")
|
||||
#message("(${CPU_${OPT}_USAGE_COUNT})${fname} ${OPT}")
|
||||
#message(" ${CPU_DISPATCH_${OPT}_INCLUDED}")
|
||||
#message(" ${CPU_DISPATCH_DEFINITIONS_${OPT}}")
|
||||
#message(" ${CPU_DISPATCH_FLAGS_${OPT}}")
|
||||
@ -573,7 +590,13 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T
|
||||
list(APPEND __result "$<TARGET_OBJECTS:${TARGET_BASE_NAME}_${OPT}>")
|
||||
else()
|
||||
foreach(fname ${__result_${OPT}})
|
||||
set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
|
||||
get_source_file_property(__definitions "${fname}" COMPILE_DEFINITIONS)
|
||||
if(__definitions)
|
||||
list(APPEND __definitions "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
|
||||
else()
|
||||
set(__definitions "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
|
||||
endif()
|
||||
set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${__definitions}")
|
||||
set_source_files_properties("${fname}" PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}")
|
||||
endforeach()
|
||||
list(APPEND __result ${__result_${OPT}})
|
||||
@ -620,18 +643,25 @@ macro(ocv_compiler_optimization_fill_cpu_config)
|
||||
set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_${OPT}
|
||||
# define CV_CPU_HAS_SUPPORT_${OPT} 1
|
||||
# define CV_CPU_CALL_${OPT}(...) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_${OPT}(fn, args) return (opt_${OPT}::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_${OPT}
|
||||
# define CV_CPU_HAS_SUPPORT_${OPT} (cv::checkHardwareSupport(CV_CPU_${OPT}))
|
||||
# define CV_CPU_CALL_${OPT}(...) if (CV_CPU_HAS_SUPPORT_${OPT}) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_${OPT}(fn, args) if (CV_CPU_HAS_SUPPORT_${OPT}) return (opt_${OPT}::fn args)
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_${OPT} 0
|
||||
# define CV_CPU_CALL_${OPT}(...)
|
||||
# define CV_CPU_CALL_${OPT}(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_${OPT}(fn, args, mode, ...) CV_CPU_CALL_${OPT}(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}
|
||||
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
|
||||
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
|
||||
")
|
||||
|
||||
|
||||
set(__file "${CMAKE_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h")
|
||||
if(EXISTS "${__file}")
|
||||
file(READ "${__file}" __content)
|
||||
@ -644,6 +674,57 @@ macro(ocv_compiler_optimization_fill_cpu_config)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(ocv_add_dispatched_file filename)
|
||||
if(NOT OPENCV_INITIAL_PASS)
|
||||
set(__codestr "
|
||||
#include \"precomp.hpp\"
|
||||
#include \"${filename}.simd.hpp\"
|
||||
")
|
||||
|
||||
set(__declarations_str "#define CV_CPU_SIMD_FILENAME \"${filename}.simd.hpp\"")
|
||||
set(__dispatch_modes "BASELINE")
|
||||
|
||||
set(__optimizations "${ARGN}")
|
||||
if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS)
|
||||
set(__optimizations "")
|
||||
endif()
|
||||
|
||||
foreach(OPT ${__optimizations})
|
||||
string(TOLOWER "${OPT}" OPT_LOWER)
|
||||
set(__file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.${OPT_LOWER}.cpp")
|
||||
if(EXISTS "${__file}")
|
||||
file(READ "${__file}" __content)
|
||||
endif()
|
||||
if(__content STREQUAL __codestr)
|
||||
#message(STATUS "${__file} contains up-to-date content")
|
||||
else()
|
||||
file(WRITE "${__file}" "${__codestr}")
|
||||
endif()
|
||||
list(APPEND OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED "${__file}")
|
||||
|
||||
set(__declarations_str "${__declarations_str}
|
||||
#define CV_CPU_DISPATCH_MODE ${OPT}
|
||||
#include \"opencv2/core/private/cv_cpu_include_simd_declarations.hpp\"
|
||||
")
|
||||
set(__dispatch_modes "${OPT}, ${__dispatch_modes}")
|
||||
endforeach()
|
||||
|
||||
set(__declarations_str "${__declarations_str}
|
||||
#define CV_CPU_DISPATCH_MODES_ALL ${__dispatch_modes}
|
||||
")
|
||||
|
||||
set(__file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.simd_declarations.hpp")
|
||||
if(EXISTS "${__file}")
|
||||
file(READ "${__file}" __content)
|
||||
endif()
|
||||
if(__content STREQUAL __declarations_str)
|
||||
#message(STATUS "${__file} contains up-to-date content")
|
||||
else()
|
||||
file(WRITE "${__file}" "${__declarations_str}")
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
if(CV_DISABLE_OPTIMIZATION OR CV_ICC)
|
||||
ocv_update(CV_ENABLE_UNROLLED 0)
|
||||
else()
|
||||
|
@ -314,6 +314,7 @@ macro(ocv_glob_modules)
|
||||
set(OPENCV_INITIAL_PASS OFF)
|
||||
if(${BUILD_opencv_world})
|
||||
foreach(m ${OPENCV_MODULES_BUILD})
|
||||
set(the_module "${m}")
|
||||
if("${m}" STREQUAL opencv_world)
|
||||
add_subdirectory("${OPENCV_MODULE_opencv_world_LOCATION}" "${CMAKE_CURRENT_BINARY_DIR}/world")
|
||||
elseif(NOT OPENCV_MODULE_${m}_IS_PART_OF_WORLD AND NOT ${m} STREQUAL opencv_world)
|
||||
@ -329,6 +330,7 @@ macro(ocv_glob_modules)
|
||||
endforeach()
|
||||
else()
|
||||
foreach(m ${OPENCV_MODULES_BUILD})
|
||||
set(the_module "${m}")
|
||||
if(m MATCHES "^opencv_")
|
||||
string(REGEX REPLACE "^opencv_" "" __shortname "${m}")
|
||||
add_subdirectory("${OPENCV_MODULE_${m}_LOCATION}" "${CMAKE_CURRENT_BINARY_DIR}/${__shortname}")
|
||||
@ -646,11 +648,13 @@ macro(ocv_set_module_sources)
|
||||
ocv_get_module_external_sources()
|
||||
endif()
|
||||
|
||||
if(OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED)
|
||||
list(APPEND OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED})
|
||||
endif()
|
||||
|
||||
# use full paths for module to be independent from the module location
|
||||
ocv_convert_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS)
|
||||
|
||||
ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module})
|
||||
|
||||
set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}")
|
||||
set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}")
|
||||
endmacro()
|
||||
@ -766,6 +770,11 @@ macro(ocv_create_module)
|
||||
endmacro()
|
||||
|
||||
macro(_ocv_create_module)
|
||||
|
||||
ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module})
|
||||
set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}")
|
||||
set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}")
|
||||
|
||||
# The condition we ought to be testing here is whether ocv_add_precompiled_headers will
|
||||
# be called at some point in the future. We can't look into the future, though,
|
||||
# so this will have to do.
|
||||
|
@ -288,11 +288,12 @@ MACRO(ADD_PRECOMPILED_HEADER _targetName _input)
|
||||
foreach(src ${_sources})
|
||||
if(NOT "${src}" MATCHES "\\.mm$")
|
||||
get_source_file_property(oldProps "${src}" COMPILE_FLAGS)
|
||||
if(NOT oldProps)
|
||||
get_source_file_property(oldProps2 "${src}" COMPILE_DEFINITIONS)
|
||||
if(NOT oldProps AND NOT oldProps2)
|
||||
set(newProperties "-include \"${CMAKE_CURRENT_BINARY_DIR}/${_name}\"")
|
||||
set_source_files_properties("${src}" PROPERTIES COMPILE_FLAGS "${newProperties}")
|
||||
else()
|
||||
ocv_debug_message("Skip PCH, flags: ${oldProps} , file: ${src}")
|
||||
ocv_debug_message("Skip PCH, flags: ${oldProps} defines: ${oldProps2}, file: ${src}")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
@ -339,11 +340,12 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input)
|
||||
AND NOT "${src}" MATCHES "^\$" # CMake generator expressions
|
||||
)
|
||||
get_source_file_property(oldProps "${src}" COMPILE_FLAGS)
|
||||
if(NOT oldProps)
|
||||
get_source_file_property(oldProps2 "${src}" COMPILE_DEFINITIONS)
|
||||
if(NOT oldProps AND NOT oldProps2)
|
||||
set(newProperties "/Yu\"${_input}\" /FI\"${_input}\"")
|
||||
set_source_files_properties("${src}" PROPERTIES COMPILE_FLAGS "${newProperties}")
|
||||
else()
|
||||
ocv_debug_message("Skip PCH, flags: ${oldProps} , file: ${src}")
|
||||
ocv_debug_message("Skip PCH, flags: ${oldProps} defines: ${oldProps2}, file: ${src}")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
@ -1,4 +1,7 @@
|
||||
set(the_description "The Core Functionality")
|
||||
|
||||
ocv_add_dispatched_file(mathfuncs_core SSE2 AVX AVX2)
|
||||
|
||||
ocv_add_module(core
|
||||
"${OPENCV_HAL_LINKER_LIBS}"
|
||||
OPTIONAL opencv_cudev
|
||||
|
@ -7,6 +7,23 @@
|
||||
#include "cv_cpu_config.h"
|
||||
#include "cv_cpu_helper.h"
|
||||
|
||||
#ifdef CV_CPU_DISPATCH_MODE
|
||||
#define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
|
||||
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
|
||||
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
|
||||
#else
|
||||
#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
|
||||
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
|
||||
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
|
||||
#endif
|
||||
|
||||
|
||||
#define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...) /* done */
|
||||
#define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
#define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
|
||||
#define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros
|
||||
|
||||
|
||||
#if defined CV_ENABLE_INTRINSICS \
|
||||
&& !defined CV_DISABLE_OPTIMIZATION \
|
||||
&& !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
|
||||
@ -76,6 +93,16 @@
|
||||
|
||||
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
|
||||
|
||||
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
|
||||
struct VZeroUpperGuard {
|
||||
#ifdef __GNUC__
|
||||
__attribute__((always_inline))
|
||||
#endif
|
||||
inline ~VZeroUpperGuard() { _mm256_zeroupper(); }
|
||||
};
|
||||
#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard;
|
||||
#endif
|
||||
|
||||
#endif // __OPENCV_BUILD
|
||||
|
||||
|
||||
|
@ -2,132 +2,147 @@
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
|
||||
# define CV_CPU_HAS_SUPPORT_SSE 1
|
||||
# define CV_CPU_CALL_SSE(...) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_SSE(fn, args) return (opt_SSE::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
|
||||
# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
|
||||
# define CV_CPU_CALL_SSE(...) if (CV_CPU_HAS_SUPPORT_SSE) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_SSE 0
|
||||
# define CV_CPU_CALL_SSE(...)
|
||||
# define CV_CPU_CALL_SSE(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...) CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
|
||||
# define CV_CPU_HAS_SUPPORT_SSE2 1
|
||||
# define CV_CPU_CALL_SSE2(...) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_SSE2(fn, args) return (opt_SSE2::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
|
||||
# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
|
||||
# define CV_CPU_CALL_SSE2(...) if (CV_CPU_HAS_SUPPORT_SSE2) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_SSE2 0
|
||||
# define CV_CPU_CALL_SSE2(...)
|
||||
# define CV_CPU_CALL_SSE2(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...) CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
|
||||
# define CV_CPU_HAS_SUPPORT_SSE3 1
|
||||
# define CV_CPU_CALL_SSE3(...) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_SSE3(fn, args) return (opt_SSE3::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
|
||||
# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
|
||||
# define CV_CPU_CALL_SSE3(...) if (CV_CPU_HAS_SUPPORT_SSE3) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_SSE3 0
|
||||
# define CV_CPU_CALL_SSE3(...)
|
||||
# define CV_CPU_CALL_SSE3(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...) CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
|
||||
# define CV_CPU_HAS_SUPPORT_SSSE3 1
|
||||
# define CV_CPU_CALL_SSSE3(...) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_SSSE3(fn, args) return (opt_SSSE3::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
|
||||
# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
|
||||
# define CV_CPU_CALL_SSSE3(...) if (CV_CPU_HAS_SUPPORT_SSSE3) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_SSSE3 0
|
||||
# define CV_CPU_CALL_SSSE3(...)
|
||||
# define CV_CPU_CALL_SSSE3(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...) CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_1 1
|
||||
# define CV_CPU_CALL_SSE4_1(...) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_SSE4_1(fn, args) return (opt_SSE4_1::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
|
||||
# define CV_CPU_CALL_SSE4_1(...) if (CV_CPU_HAS_SUPPORT_SSE4_1) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_1 0
|
||||
# define CV_CPU_CALL_SSE4_1(...)
|
||||
# define CV_CPU_CALL_SSE4_1(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...) CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_2 1
|
||||
# define CV_CPU_CALL_SSE4_2(...) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_SSE4_2(fn, args) return (opt_SSE4_2::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
|
||||
# define CV_CPU_CALL_SSE4_2(...) if (CV_CPU_HAS_SUPPORT_SSE4_2) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_2 0
|
||||
# define CV_CPU_CALL_SSE4_2(...)
|
||||
# define CV_CPU_CALL_SSE4_2(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...) CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
|
||||
# define CV_CPU_HAS_SUPPORT_POPCNT 1
|
||||
# define CV_CPU_CALL_POPCNT(...) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_POPCNT(fn, args) return (opt_POPCNT::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
|
||||
# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
|
||||
# define CV_CPU_CALL_POPCNT(...) if (CV_CPU_HAS_SUPPORT_POPCNT) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_POPCNT 0
|
||||
# define CV_CPU_CALL_POPCNT(...)
|
||||
# define CV_CPU_CALL_POPCNT(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...) CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
|
||||
# define CV_CPU_HAS_SUPPORT_AVX 1
|
||||
# define CV_CPU_CALL_AVX(...) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_AVX(fn, args) return (opt_AVX::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
|
||||
# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
|
||||
# define CV_CPU_CALL_AVX(...) if (CV_CPU_HAS_SUPPORT_AVX) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_AVX 0
|
||||
# define CV_CPU_CALL_AVX(...)
|
||||
# define CV_CPU_CALL_AVX(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...) CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
|
||||
# define CV_CPU_HAS_SUPPORT_FP16 1
|
||||
# define CV_CPU_CALL_FP16(...) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_FP16(fn, args) return (opt_FP16::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
|
||||
# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
|
||||
# define CV_CPU_CALL_FP16(...) if (CV_CPU_HAS_SUPPORT_FP16) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_FP16 0
|
||||
# define CV_CPU_CALL_FP16(...)
|
||||
# define CV_CPU_CALL_FP16(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...) CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
|
||||
# define CV_CPU_HAS_SUPPORT_AVX2 1
|
||||
# define CV_CPU_CALL_AVX2(...) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_AVX2(fn, args) return (opt_AVX2::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
|
||||
# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
|
||||
# define CV_CPU_CALL_AVX2(...) if (CV_CPU_HAS_SUPPORT_AVX2) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_AVX2 0
|
||||
# define CV_CPU_CALL_AVX2(...)
|
||||
# define CV_CPU_CALL_AVX2(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...) CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
|
||||
# define CV_CPU_HAS_SUPPORT_FMA3 1
|
||||
# define CV_CPU_CALL_FMA3(...) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_FMA3(fn, args) return (opt_FMA3::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
|
||||
# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
|
||||
# define CV_CPU_CALL_FMA3(...) if (CV_CPU_HAS_SUPPORT_FMA3) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_FMA3 0
|
||||
# define CV_CPU_CALL_FMA3(...)
|
||||
# define CV_CPU_CALL_FMA3(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
|
||||
# define CV_CPU_HAS_SUPPORT_NEON 1
|
||||
# define CV_CPU_CALL_NEON(...) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_NEON(fn, args) return (opt_NEON::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
|
||||
# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
|
||||
# define CV_CPU_CALL_NEON(...) if (CV_CPU_HAS_SUPPORT_NEON) return __VA_ARGS__
|
||||
# define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
|
||||
#else
|
||||
# define CV_CPU_HAS_SUPPORT_NEON 0
|
||||
# define CV_CPU_CALL_NEON(...)
|
||||
# define CV_CPU_CALL_NEON(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
|
||||
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
|
||||
|
@ -52,6 +52,17 @@
|
||||
#include "cvconfig.h"
|
||||
#endif
|
||||
|
||||
#ifndef __CV_EXPAND
|
||||
#define __CV_EXPAND(x) x
|
||||
#endif
|
||||
|
||||
#ifndef __CV_CAT
|
||||
#define __CV_CAT__(x, y) x ## y
|
||||
#define __CV_CAT_(x, y) __CV_CAT__(x, y)
|
||||
#define __CV_CAT(x, y) __CV_CAT_(x, y)
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
|
||||
# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */
|
||||
#endif
|
||||
|
@ -60,6 +60,25 @@
|
||||
// access from within opencv code more accessible
|
||||
namespace cv {
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
|
||||
#ifdef CV_CPU_DISPATCH_MODE
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
|
||||
#else
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
|
||||
#endif
|
||||
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
#endif
|
||||
|
||||
//! @addtogroup core_hal_intrin
|
||||
//! @{
|
||||
|
||||
@ -281,6 +300,9 @@ template <typename T> struct V_SIMD128Traits
|
||||
|
||||
//! @}
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CV_DOXYGEN
|
||||
@ -323,6 +345,10 @@ template <typename T> struct V_SIMD128Traits
|
||||
|
||||
namespace cv {
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
#endif
|
||||
|
||||
template <typename R> struct V_RegTrait128;
|
||||
|
||||
template <> struct V_RegTrait128<uchar> {
|
||||
@ -407,6 +433,10 @@ template <> struct V_RegTrait128<double> {
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
#endif
|
||||
|
||||
} // cv::
|
||||
|
||||
//! @endcond
|
||||
|
@ -53,6 +53,10 @@
|
||||
namespace cv
|
||||
{
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
#endif
|
||||
|
||||
/** @addtogroup core_hal_intrin
|
||||
|
||||
"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on
|
||||
@ -1827,7 +1831,9 @@ static inline bool hasSIMD128()
|
||||
|
||||
//! @}
|
||||
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -53,6 +53,8 @@ namespace cv
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
|
||||
#define CV_SIMD128 1
|
||||
#if defined(__aarch64__)
|
||||
#define CV_SIMD128_64F 1
|
||||
@ -1238,11 +1240,13 @@ inline v_float16x4 v_cvt_f16(const v_float32x4& a)
|
||||
//! @brief Check CPU capability of SIMD operation
|
||||
static inline bool hasSIMD128()
|
||||
{
|
||||
return checkHardwareSupport(CV_CPU_NEON);
|
||||
return (CV_CPU_HAS_SUPPORT_NEON) ? true : false;
|
||||
}
|
||||
|
||||
//! @}
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
|
||||
//! @endcond
|
||||
|
||||
}
|
||||
|
@ -56,6 +56,8 @@ namespace cv
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
|
||||
struct v_uint8x16
|
||||
{
|
||||
typedef uchar lane_type;
|
||||
@ -1791,11 +1793,13 @@ inline v_float16x4 v_cvt_f16(const v_float32x4& a)
|
||||
//! @brief Check CPU capability of SIMD operation
|
||||
static inline bool hasSIMD128()
|
||||
{
|
||||
return checkHardwareSupport(CV_CPU_SSE2);
|
||||
return (CV_CPU_HAS_SUPPORT_SSE2) ? true : false;
|
||||
}
|
||||
|
||||
//! @}
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
|
||||
//! @endcond
|
||||
|
||||
}
|
||||
|
@ -540,7 +540,7 @@ CV_EXPORTS InstrNode* getCurrentNode();
|
||||
|
||||
///// General instrumentation
|
||||
// General OpenCV region instrumentation macro
|
||||
#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
|
||||
#define CV_INSTRUMENT_REGION_() CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
|
||||
// Custom OpenCV region instrumentation macro
|
||||
#define CV_INSTRUMENT_REGION_NAME(NAME) CV_INSTRUMENT_REGION_CUSTOM_META(NAME, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
|
||||
// Instrumentation for parallel_for_ or other regions which forks and gathers threads
|
||||
@ -566,7 +566,7 @@ CV_EXPORTS InstrNode* getCurrentNode();
|
||||
#else
|
||||
#define CV_INSTRUMENT_REGION_META(...)
|
||||
|
||||
#define CV_INSTRUMENT_REGION()
|
||||
#define CV_INSTRUMENT_REGION_()
|
||||
#define CV_INSTRUMENT_REGION_NAME(...)
|
||||
#define CV_INSTRUMENT_REGION_MT_FORK()
|
||||
|
||||
@ -580,6 +580,12 @@ CV_EXPORTS InstrNode* getCurrentNode();
|
||||
#define CV_INSTRUMENT_MARK_OPENCL(...)
|
||||
#endif
|
||||
|
||||
#ifdef __CV_AVX_GUARD
|
||||
#define CV_INSTRUMENT_REGION() __CV_AVX_GUARD CV_INSTRUMENT_REGION_()
|
||||
#else
|
||||
#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_()
|
||||
#endif
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif // OPENCV_CORE_PRIVATE_HPP
|
||||
|
@ -0,0 +1,30 @@
|
||||
// Helper file to include dispatched functions declaration:
|
||||
//
|
||||
// Usage:
|
||||
// #define CV_CPU_SIMD_FILENAME "<filename>.simd.hpp"
|
||||
// #define CV_CPU_DISPATCH_MODE AVX2
|
||||
// #include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
|
||||
// #define CV_CPU_DISPATCH_MODE SSE2
|
||||
// #include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
|
||||
|
||||
#ifndef CV_DISABLE_OPTIMIZATION
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable: 4702) // unreachable code
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
#define CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
#endif
|
||||
|
||||
#undef CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
||||
#undef CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||
|
||||
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
|
||||
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
|
||||
|
||||
#include CV_CPU_SIMD_FILENAME
|
||||
|
||||
#undef CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
||||
#undef CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||
#undef CV_CPU_DISPATCH_MODE
|
215
modules/core/src/mathfuncs_core.dispatch.cpp
Normal file
215
modules/core/src/mathfuncs_core.dispatch.cpp
Normal file
@ -0,0 +1,215 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
#include "mathfuncs_core.simd.hpp"
|
||||
#include "mathfuncs_core.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
|
||||
|
||||
namespace cv { namespace hal {
|
||||
|
||||
///////////////////////////////////// ATAN2 ////////////////////////////////////
|
||||
|
||||
void fastAtan32f(const float *Y, const float *X, float *angle, int len, bool angleInDegrees )
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(fastAtan32f, cv_hal_fastAtan32f, Y, X, angle, len, angleInDegrees);
|
||||
|
||||
CV_CPU_DISPATCH(fastAtan32f, (Y, X, angle, len, angleInDegrees),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
void fastAtan64f(const double *Y, const double *X, double *angle, int len, bool angleInDegrees)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(fastAtan64f, cv_hal_fastAtan64f, Y, X, angle, len, angleInDegrees);
|
||||
|
||||
CV_CPU_DISPATCH(fastAtan64f, (Y, X, angle, len, angleInDegrees),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
// deprecated
|
||||
void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angleInDegrees )
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
fastAtan32f(Y, X, angle, len, angleInDegrees);
|
||||
}
|
||||
|
||||
void magnitude32f(const float* x, const float* y, float* mag, int len)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(magnitude32f, (x, y, mag, len),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
void magnitude64f(const double* x, const double* y, double* mag, int len)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(magnitude64f, cv_hal_magnitude64f, x, y, mag, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(magnitude64f, (x, y, mag, len),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
|
||||
void invSqrt32f(const float* src, float* dst, int len)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(invSqrt32f, cv_hal_invSqrt32f, src, dst, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsInvSqrt_32f_A21, src, dst, len) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(invSqrt32f, (src, dst, len),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
|
||||
void invSqrt64f(const double* src, double* dst, int len)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(invSqrt64f, cv_hal_invSqrt64f, src, dst, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsInvSqrt_64f_A50, src, dst, len) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(invSqrt64f, (src, dst, len),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
|
||||
void sqrt32f(const float* src, float* dst, int len)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(sqrt32f, cv_hal_sqrt32f, src, dst, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_32f_A21, src, dst, len) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(sqrt32f, (src, dst, len),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
|
||||
void sqrt64f(const double* src, double* dst, int len)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(sqrt64f, cv_hal_sqrt64f, src, dst, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_64f_A50, src, dst, len) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(sqrt64f, (src, dst, len),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
void exp32f(const float *src, float *dst, int n)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(exp32f, cv_hal_exp32f, src, dst, n);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsExp_32f_A21, src, dst, n) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(exp32f, (src, dst, n),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
void exp64f(const double *src, double *dst, int n)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(exp64f, cv_hal_exp64f, src, dst, n);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsExp_64f_A50, src, dst, n) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(exp64f, (src, dst, n),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
void log32f(const float *src, float *dst, int n)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(log32f, cv_hal_log32f, src, dst, n);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsLn_32f_A21, src, dst, n) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(log32f, (src, dst, n),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
void log64f(const double *src, double *dst, int n)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(log64f, cv_hal_log64f, src, dst, n);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsLn_64f_A50, src, dst, n) >= 0);
|
||||
|
||||
CV_CPU_DISPATCH(log64f, (src, dst, n),
|
||||
CV_CPU_DISPATCH_MODES_ALL);
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// for compatibility with 3.0
|
||||
|
||||
void exp(const float* src, float* dst, int n)
|
||||
{
|
||||
exp32f(src, dst, n);
|
||||
}
|
||||
|
||||
void exp(const double* src, double* dst, int n)
|
||||
{
|
||||
exp64f(src, dst, n);
|
||||
}
|
||||
|
||||
void log(const float* src, float* dst, int n)
|
||||
{
|
||||
log32f(src, dst, n);
|
||||
}
|
||||
|
||||
void log(const double* src, double* dst, int n)
|
||||
{
|
||||
log64f(src, dst, n);
|
||||
}
|
||||
|
||||
void magnitude(const float* x, const float* y, float* dst, int n)
|
||||
{
|
||||
magnitude32f(x, y, dst, n);
|
||||
}
|
||||
|
||||
void magnitude(const double* x, const double* y, double* dst, int n)
|
||||
{
|
||||
magnitude64f(x, y, dst, n);
|
||||
}
|
||||
|
||||
void sqrt(const float* src, float* dst, int len)
|
||||
{
|
||||
sqrt32f(src, dst, len);
|
||||
}
|
||||
|
||||
void sqrt(const double* src, double* dst, int len)
|
||||
{
|
||||
sqrt64f(src, dst, len);
|
||||
}
|
||||
|
||||
void invSqrt(const float* src, float* dst, int len)
|
||||
{
|
||||
invSqrt32f(src, dst, len);
|
||||
}
|
||||
|
||||
void invSqrt(const double* src, double* dst, int len)
|
||||
{
|
||||
invSqrt64f(src, dst, len);
|
||||
}
|
||||
|
||||
}} // namespace cv::hal::
|
||||
|
||||
float cv::fastAtan2( float y, float x )
|
||||
{
|
||||
using namespace cv::hal;
|
||||
CV_CPU_CALL_BASELINE(fastAtan2, (y, x));
|
||||
}
|
@ -1,46 +1,29 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
#include "precomp.hpp"
|
||||
namespace cv { namespace hal {
|
||||
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
|
||||
|
||||
// forward declarations
|
||||
void fastAtan32f(const float *Y, const float *X, float *angle, int len, bool angleInDegrees);
|
||||
void fastAtan64f(const double *Y, const double *X, double *angle, int len, bool angleInDegrees);
|
||||
void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angleInDegrees);
|
||||
void magnitude32f(const float* x, const float* y, float* mag, int len);
|
||||
void magnitude64f(const double* x, const double* y, double* mag, int len);
|
||||
void invSqrt32f(const float* src, float* dst, int len);
|
||||
void invSqrt64f(const double* src, double* dst, int len);
|
||||
void sqrt32f(const float* src, float* dst, int len);
|
||||
void sqrt64f(const double* src, double* dst, int len);
|
||||
void exp32f(const float *src, float *dst, int n);
|
||||
void exp64f(const double *src, double *dst, int n);
|
||||
void log32f(const float *src, float *dst, int n);
|
||||
void log64f(const double *src, double *dst, int n);
|
||||
float fastAtan2(float y, float x);
|
||||
|
||||
|
||||
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -197,23 +180,17 @@ static inline void atanImpl(const T *Y, const T *X, T *angle, int len, bool angl
|
||||
|
||||
} // anonymous::
|
||||
|
||||
namespace cv { namespace hal {
|
||||
|
||||
///////////////////////////////////// ATAN2 ////////////////////////////////////
|
||||
|
||||
void fastAtan32f(const float *Y, const float *X, float *angle, int len, bool angleInDegrees )
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(fastAtan32f, cv_hal_fastAtan32f, Y, X, angle, len, angleInDegrees);
|
||||
atanImpl<float>(Y, X, angle, len, angleInDegrees);
|
||||
}
|
||||
|
||||
void fastAtan64f(const double *Y, const double *X, double *angle, int len, bool angleInDegrees)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(fastAtan64f, cv_hal_fastAtan64f, Y, X, angle, len, angleInDegrees);
|
||||
atanImpl<double>(Y, X, angle, len, angleInDegrees);
|
||||
}
|
||||
|
||||
@ -221,7 +198,6 @@ void fastAtan64f(const double *Y, const double *X, double *angle, int len, bool
|
||||
void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angleInDegrees )
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
fastAtan32f(Y, X, angle, len, angleInDegrees);
|
||||
}
|
||||
|
||||
@ -229,9 +205,6 @@ void magnitude32f(const float* x, const float* y, float* mag, int len)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);
|
||||
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD128
|
||||
@ -257,9 +230,6 @@ void magnitude64f(const double* x, const double* y, double* mag, int len)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(magnitude64f, cv_hal_magnitude64f, x, y, mag, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0);
|
||||
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
@ -286,9 +256,6 @@ void invSqrt32f(const float* src, float* dst, int len)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(invSqrt32f, cv_hal_invSqrt32f, src, dst, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsInvSqrt_32f_A21, src, dst, len) >= 0);
|
||||
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD128
|
||||
@ -310,9 +277,6 @@ void invSqrt64f(const double* src, double* dst, int len)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(invSqrt64f, cv_hal_invSqrt64f, src, dst, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsInvSqrt_64f_A50, src, dst, len) >= 0);
|
||||
|
||||
int i = 0;
|
||||
|
||||
#if CV_SSE2
|
||||
@ -330,9 +294,6 @@ void sqrt32f(const float* src, float* dst, int len)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(sqrt32f, cv_hal_sqrt32f, src, dst, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_32f_A21, src, dst, len) >= 0);
|
||||
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD128
|
||||
@ -354,9 +315,6 @@ void sqrt64f(const double* src, double* dst, int len)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(sqrt64f, cv_hal_sqrt64f, src, dst, len);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_64f_A50, src, dst, len) >= 0);
|
||||
|
||||
int i = 0;
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
@ -381,9 +339,6 @@ void exp32f(const float *src, float *dst, int n)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(exp32f, cv_hal_exp32f, src, dst, n);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsExp_32f_A21, src, dst, n) >= 0);
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
dst[i] = std::exp(src[i]);
|
||||
@ -394,9 +349,6 @@ void exp64f(const double *src, double *dst, int n)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(exp64f, cv_hal_exp64f, src, dst, n);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsExp_64f_A50, src, dst, n) >= 0);
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
dst[i] = std::exp(src[i]);
|
||||
@ -407,9 +359,6 @@ void log32f(const float *src, float *dst, int n)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(log32f, cv_hal_log32f, src, dst, n);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsLn_32f_A21, src, dst, n) >= 0);
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
dst[i] = std::log(src[i]);
|
||||
@ -419,9 +368,6 @@ void log64f(const double *src, double *dst, int n)
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(log64f, cv_hal_log64f, src, dst, n);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsLn_64f_A50, src, dst, n) >= 0);
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
dst[i] = std::log(src[i]);
|
||||
@ -534,9 +480,6 @@ void exp32f( const float *_x, float *y, int n )
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(exp32f, cv_hal_exp32f, _x, y, n);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsExp_32f_A21, _x, y, n) >= 0);
|
||||
|
||||
static const float
|
||||
A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0),
|
||||
A3 = (float)(.6931471805521448196800669615864773144641 / EXPPOLY_32F_A0),
|
||||
@ -551,7 +494,90 @@ void exp32f( const float *_x, float *y, int n )
|
||||
const Cv32suf* x = (const Cv32suf*)_x;
|
||||
Cv32suf buf[4];
|
||||
|
||||
#if CV_SSE2
|
||||
#if CV_AVX2
|
||||
if( n >= 8 )
|
||||
{
|
||||
static const __m256d prescale4 = _mm256_set1_pd(exp_prescale);
|
||||
static const __m256 postscale8 = _mm256_set1_ps((float)exp_postscale);
|
||||
static const __m128 maxval4 = _mm_set1_ps((float)(exp_max_val/exp_prescale));
|
||||
static const __m128 minval4 = _mm_set1_ps((float)(-exp_max_val/exp_prescale));
|
||||
|
||||
static const __m256 mA1 = _mm256_set1_ps(A1);
|
||||
static const __m256 mA2 = _mm256_set1_ps(A2);
|
||||
static const __m256 mA3 = _mm256_set1_ps(A3);
|
||||
static const __m256 mA4 = _mm256_set1_ps(A4);
|
||||
bool y_aligned = (size_t)(void*)y % 32 == 0;
|
||||
|
||||
ushort CV_DECL_ALIGNED(32) tab_idx[16];
|
||||
|
||||
for( ; i <= n - 8; i += 8 )
|
||||
{
|
||||
__m128i xi0, xi1;
|
||||
|
||||
__m256d xd0 = _mm256_cvtps_pd(_mm_min_ps(_mm_max_ps(_mm_loadu_ps(&x[i].f), minval4), maxval4));
|
||||
__m256d xd1 = _mm256_cvtps_pd(_mm_min_ps(_mm_max_ps(_mm_loadu_ps(&x[i+4].f), minval4), maxval4));
|
||||
|
||||
xd0 = _mm256_mul_pd(xd0, prescale4);
|
||||
xd1 = _mm256_mul_pd(xd1, prescale4);
|
||||
|
||||
xi0 = _mm256_cvtpd_epi32(xd0);
|
||||
xi1 = _mm256_cvtpd_epi32(xd1);
|
||||
|
||||
xd0 = _mm256_sub_pd(xd0, _mm256_cvtepi32_pd(xi0));
|
||||
xd1 = _mm256_sub_pd(xd1, _mm256_cvtepi32_pd(xi1));
|
||||
|
||||
// gcc does not support _mm256_set_m128
|
||||
//xf = _mm256_set_m128(_mm256_cvtpd_ps(xd1), _mm256_cvtpd_ps(xd0));
|
||||
__m256 xf = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm256_cvtpd_ps(xd0)), _mm256_cvtpd_ps(xd1), 1);
|
||||
|
||||
xf = _mm256_mul_ps(xf, postscale8);
|
||||
|
||||
xi0 = _mm_packs_epi32(xi0, xi1);
|
||||
|
||||
_mm_store_si128((__m128i*)tab_idx, _mm_and_si128(xi0, _mm_set1_epi16(EXPTAB_MASK)));
|
||||
|
||||
xi0 = _mm_add_epi16(_mm_srai_epi16(xi0, EXPTAB_SCALE), _mm_set1_epi16(127));
|
||||
xi0 = _mm_max_epi16(xi0, _mm_setzero_si128());
|
||||
xi0 = _mm_min_epi16(xi0, _mm_set1_epi16(255));
|
||||
xi1 = _mm_unpackhi_epi16(xi0, _mm_setzero_si128());
|
||||
xi0 = _mm_unpacklo_epi16(xi0, _mm_setzero_si128());
|
||||
|
||||
__m256d yd0 = _mm256_set_pd(expTab[tab_idx[3]], expTab[tab_idx[2]], expTab[tab_idx[1]], expTab[tab_idx[0]]);
|
||||
__m256d yd1 = _mm256_set_pd(expTab[tab_idx[7]], expTab[tab_idx[6]], expTab[tab_idx[5]], expTab[tab_idx[4]]);
|
||||
|
||||
// gcc does not support _mm256_set_m128
|
||||
//__m256 yf = _mm256_set_m128(_mm256_cvtpd_ps(yd1), _mm256_cvtpd_ps(yd0));
|
||||
__m256 yf = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm256_cvtpd_ps(yd0)), _mm256_cvtpd_ps(yd1), 1);
|
||||
|
||||
//_mm256_set_m128i(xi1, xi0)
|
||||
__m256i temp = (__m256i)_mm256_insertf128_ps(_mm256_castps128_ps256((__m128)xi0), (__m128)xi1, 1);
|
||||
|
||||
yf = _mm256_mul_ps(yf, _mm256_castsi256_ps(_mm256_slli_epi32(temp, 23)));
|
||||
|
||||
__m256 zf = _mm256_add_ps(xf, mA1);
|
||||
|
||||
#if CV_FMA3
|
||||
zf = _mm256_fmadd_ps(zf, xf, mA2);
|
||||
zf = _mm256_fmadd_ps(zf, xf, mA3);
|
||||
zf = _mm256_fmadd_ps(zf, xf, mA4);
|
||||
#else
|
||||
zf = _mm256_add_ps(_mm256_mul_ps(zf, xf), mA2);
|
||||
zf = _mm256_add_ps(_mm256_mul_ps(zf, xf), mA3);
|
||||
zf = _mm256_add_ps(_mm256_mul_ps(zf, xf), mA4);
|
||||
#endif
|
||||
zf = _mm256_mul_ps(zf, yf);
|
||||
|
||||
if( y_aligned )
|
||||
{
|
||||
_mm256_store_ps(y + i, zf);
|
||||
}
|
||||
else
|
||||
{
|
||||
_mm256_storeu_ps(y + i, zf);
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif CV_SSE2
|
||||
if( n >= 8 )
|
||||
{
|
||||
static const __m128d prescale2 = _mm_set1_pd(exp_prescale);
|
||||
@ -738,9 +764,6 @@ void exp64f( const double *_x, double *y, int n )
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(exp64f, cv_hal_exp64f, _x, y, n);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsExp_64f_A50, _x, y, n) >= 0);
|
||||
|
||||
static const double
|
||||
A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0,
|
||||
A4 = .69314718055994546743029643825322 / EXPPOLY_32F_A0,
|
||||
@ -1187,9 +1210,6 @@ void log32f( const float *_x, float *y, int n )
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(log32f, cv_hal_log32f, _x, y, n);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsLn_32f_A21, _x, y, n) >= 0);
|
||||
|
||||
static const float shift[] = { 0, -1.f/512 };
|
||||
static const float
|
||||
A0 = 0.3333333333333333333333333f,
|
||||
@ -1336,9 +1356,6 @@ void log64f( const double *x, double *y, int n )
|
||||
{
|
||||
CV_INSTRUMENT_REGION()
|
||||
|
||||
CALL_HAL(log64f, cv_hal_log64f, x, y, n);
|
||||
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsLn_64f_A50, x, y, n) >= 0);
|
||||
|
||||
static const double shift[] = { 0, -1./512 };
|
||||
static const double
|
||||
A7 = 1.0,
|
||||
@ -1524,64 +1541,13 @@ void log64f( const double *x, double *y, int n )
|
||||
|
||||
#endif // issue 7795
|
||||
|
||||
//=============================================================================
|
||||
// for compatibility with 3.0
|
||||
|
||||
void exp(const float* src, float* dst, int n)
|
||||
{
|
||||
exp32f(src, dst, n);
|
||||
}
|
||||
|
||||
void exp(const double* src, double* dst, int n)
|
||||
{
|
||||
exp64f(src, dst, n);
|
||||
}
|
||||
|
||||
void log(const float* src, float* dst, int n)
|
||||
{
|
||||
log32f(src, dst, n);
|
||||
}
|
||||
|
||||
void log(const double* src, double* dst, int n)
|
||||
{
|
||||
log64f(src, dst, n);
|
||||
}
|
||||
|
||||
void magnitude(const float* x, const float* y, float* dst, int n)
|
||||
{
|
||||
magnitude32f(x, y, dst, n);
|
||||
}
|
||||
|
||||
void magnitude(const double* x, const double* y, double* dst, int n)
|
||||
{
|
||||
magnitude64f(x, y, dst, n);
|
||||
}
|
||||
|
||||
void sqrt(const float* src, float* dst, int len)
|
||||
{
|
||||
sqrt32f(src, dst, len);
|
||||
}
|
||||
|
||||
void sqrt(const double* src, double* dst, int len)
|
||||
{
|
||||
sqrt64f(src, dst, len);
|
||||
}
|
||||
|
||||
void invSqrt(const float* src, float* dst, int len)
|
||||
{
|
||||
invSqrt32f(src, dst, len);
|
||||
}
|
||||
|
||||
void invSqrt(const double* src, double* dst, int len)
|
||||
{
|
||||
invSqrt64f(src, dst, len);
|
||||
}
|
||||
|
||||
|
||||
} // cv::hal::
|
||||
} // cv::
|
||||
|
||||
float cv::fastAtan2( float y, float x )
|
||||
float fastAtan2( float y, float x )
|
||||
{
|
||||
return atanImpl<float>(y, x);
|
||||
}
|
||||
|
||||
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
|
||||
|
||||
CV_CPU_OPTIMIZATION_NAMESPACE_END
|
||||
|
||||
}} // namespace cv::hal
|
@ -24,6 +24,7 @@ if(NOT OPENCV_INITIAL_PASS)
|
||||
|
||||
message(STATUS "Processing WORLD modules...")
|
||||
foreach(m ${OPENCV_MODULES_BUILD})
|
||||
set(the_module ${m})
|
||||
if(OPENCV_MODULE_${m}_IS_PART_OF_WORLD)
|
||||
message(STATUS " module ${m}...")
|
||||
set(CMAKE_CURRENT_SOURCE_DIR "${OPENCV_MODULE_${m}_LOCATION}")
|
||||
|
Loading…
Reference in New Issue
Block a user