mirror of
https://github.com/opencv/opencv.git
synced 2025-08-05 22:19:14 +08:00
core: several improves and fixes on ppc64le infrastructure
- add infrastructure support for Power9/VSX3 - fix missing VSX flags on GCC4.9 and CLANG4(#13210, #13222) - fix disable VSX optimzation on GCC by using flag ENABLE_VSX - flag ENABLE_VSX is deprecated now, use CPU_BASELINE, CPU_DISPATCH instead - add VSX3 to arithmetic dispatchable flags
This commit is contained in:
parent
183bc5c281
commit
474a0dac49
@ -327,7 +327,6 @@ OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add
|
||||
OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CV_GCC )
|
||||
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CV_GCC )
|
||||
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CV_GCC AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
|
||||
OCV_OPTION(ENABLE_VSX "Enable POWER8 and above VSX (64-bit little-endian)" ON IF ((CV_GCC OR CV_CLANG) AND PPC64LE) )
|
||||
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CV_GCC AND (X86 OR X86_64)) )
|
||||
if(NOT IOS) # Use CPU_BASELINE instead
|
||||
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" (NEON OR ANDROID_ARM_NEON OR AARCH64) IF (CV_GCC OR CV_CLANG) AND (ARM OR AARCH64 OR IOS) )
|
||||
|
@ -5,6 +5,10 @@
|
||||
# AVX / AVX2 / AVX_512F
|
||||
# FMA3
|
||||
|
||||
# ppc64le arch:
|
||||
# VSX (always available on Power8)
|
||||
# VSX3 (always available on Power9)
|
||||
|
||||
# CPU_{opt}_SUPPORTED=ON/OFF - compiler support (possibly with additional flag)
|
||||
# CPU_{opt}_IMPLIES=<list>
|
||||
# CPU_{opt}_FORCE=<list> - subset of "implies" list
|
||||
@ -29,7 +33,7 @@
|
||||
|
||||
set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3;AVX_512F;AVX512_SKX")
|
||||
list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16)
|
||||
list(APPEND CPU_ALL_OPTIMIZATIONS VSX)
|
||||
list(APPEND CPU_ALL_OPTIMIZATIONS VSX VSX3)
|
||||
list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS)
|
||||
|
||||
ocv_update(CPU_VFPV3_FEATURE_ALIAS "")
|
||||
@ -81,7 +85,7 @@ ocv_optimization_process_obsolete_option(ENABLE_FMA3 FMA3 ON)
|
||||
ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF)
|
||||
ocv_optimization_process_obsolete_option(ENABLE_NEON NEON OFF)
|
||||
|
||||
ocv_optimization_process_obsolete_option(ENABLE_VSX VSX OFF)
|
||||
ocv_optimization_process_obsolete_option(ENABLE_VSX VSX ON)
|
||||
|
||||
macro(ocv_is_optimization_in_list resultvar check_opt)
|
||||
set(__checked "")
|
||||
@ -289,14 +293,24 @@ elseif(ARM OR AARCH64)
|
||||
set(CPU_BASELINE "NEON;FP16" CACHE STRING "${HELP_CPU_BASELINE}")
|
||||
endif()
|
||||
elseif(PPC64LE)
|
||||
ocv_update(CPU_KNOWN_OPTIMIZATIONS "VSX")
|
||||
ocv_update(CPU_KNOWN_OPTIMIZATIONS "VSX;VSX3")
|
||||
ocv_update(CPU_VSX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx.cpp")
|
||||
ocv_update(CPU_VSX3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx3.cpp")
|
||||
|
||||
if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE)
|
||||
ocv_update(CPU_VSX3_IMPLIES "VSX")
|
||||
endif()
|
||||
|
||||
if(CV_CLANG AND (NOT ${CMAKE_CXX_COMPILER} MATCHES "xlc"))
|
||||
ocv_update(CPU_VSX_FLAGS_ON "-mvsx -maltivec")
|
||||
ocv_update(CPU_VSX3_FLAGS_ON "-mpower9-vector")
|
||||
else()
|
||||
ocv_update(CPU_VSX_FLAGS_ON "-mcpu=power8")
|
||||
ocv_update(CPU_VSX3_FLAGS_ON "-mcpu=power9 -mtune=power9")
|
||||
endif()
|
||||
|
||||
set(CPU_DISPATCH "VSX3" CACHE STRING "${HELP_CPU_DISPATCH}")
|
||||
set(CPU_BASELINE "VSX" CACHE STRING "${HELP_CPU_BASELINE}")
|
||||
endif()
|
||||
|
||||
# Helper values for cmake-gui
|
||||
|
@ -1,8 +1,12 @@
|
||||
# if defined(__VSX__)
|
||||
# include <altivec.h>
|
||||
# else
|
||||
# error "VSX is not supported"
|
||||
# endif
|
||||
#if defined(__VSX__)
|
||||
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
|
||||
#include <altivec.h>
|
||||
#else
|
||||
#error "OpenCV only supports little-endian mode"
|
||||
#endif
|
||||
#else
|
||||
#error "VSX is not supported"
|
||||
#endif
|
||||
|
||||
int main()
|
||||
{
|
||||
|
17
cmake/checks/cpu_vsx3.cpp
Normal file
17
cmake/checks/cpu_vsx3.cpp
Normal file
@ -0,0 +1,17 @@
|
||||
#if defined(__VSX__)
|
||||
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
|
||||
#include <altivec.h>
|
||||
#else
|
||||
#error "OpenCV only supports little-endian mode"
|
||||
#endif
|
||||
#else
|
||||
#error "VSX3 is not supported"
|
||||
#endif
|
||||
|
||||
int main()
|
||||
{
|
||||
__vector unsigned char a = vec_splats((unsigned char)1);
|
||||
__vector unsigned char b = vec_splats((unsigned char)2);
|
||||
__vector unsigned char r = vec_absd(a, b);
|
||||
return 0;
|
||||
}
|
@ -2,7 +2,7 @@ set(the_description "The Core Functionality")
|
||||
|
||||
ocv_add_dispatched_file(mathfuncs_core SSE2 AVX AVX2)
|
||||
ocv_add_dispatched_file(stat SSE4_2 AVX2)
|
||||
ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2)
|
||||
ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2 VSX3)
|
||||
|
||||
# dispatching for accuracy tests
|
||||
ocv_add_dispatched_file_force_all(test_intrin128 TEST SSE2 SSE3 SSSE3 SSE4_1 SSE4_2 AVX FP16 AVX2)
|
||||
|
@ -107,7 +107,7 @@
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#if defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
|
||||
#ifdef CV_CPU_COMPILE_VSX
|
||||
# include <altivec.h>
|
||||
# undef vector
|
||||
# undef pixel
|
||||
@ -115,6 +115,10 @@
|
||||
# define CV_VSX 1
|
||||
#endif
|
||||
|
||||
#ifdef CV_CPU_COMPILE_VSX3
|
||||
# define CV_VSX3 1
|
||||
#endif
|
||||
|
||||
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
|
||||
|
||||
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
|
||||
@ -237,3 +241,7 @@ struct VZeroUpperGuard {
|
||||
#ifndef CV_VSX
|
||||
# define CV_VSX 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_VSX3
|
||||
# define CV_VSX3 0
|
||||
#endif
|
||||
|
@ -315,5 +315,26 @@
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX3
|
||||
# define CV_TRY_VSX3 1
|
||||
# define CV_CPU_FORCE_VSX3 1
|
||||
# define CV_CPU_HAS_SUPPORT_VSX3 1
|
||||
# define CV_CPU_CALL_VSX3(fn, args) return (cpu_baseline::fn args)
|
||||
# define CV_CPU_CALL_VSX3_(fn, args) return (opt_VSX3::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX3
|
||||
# define CV_TRY_VSX3 1
|
||||
# define CV_CPU_FORCE_VSX3 0
|
||||
# define CV_CPU_HAS_SUPPORT_VSX3 (cv::checkHardwareSupport(CV_CPU_VSX3))
|
||||
# define CV_CPU_CALL_VSX3(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
|
||||
# define CV_CPU_CALL_VSX3_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX3) return (opt_VSX3::fn args)
|
||||
#else
|
||||
# define CV_TRY_VSX3 0
|
||||
# define CV_CPU_FORCE_VSX3 0
|
||||
# define CV_CPU_HAS_SUPPORT_VSX3 0
|
||||
# define CV_CPU_CALL_VSX3(fn, args)
|
||||
# define CV_CPU_CALL_VSX3_(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_VSX3(fn, args, mode, ...) CV_CPU_CALL_VSX3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
|
||||
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
|
||||
|
@ -240,9 +240,10 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
|
||||
#define CV_CPU_AVX_512VBMI 20
|
||||
#define CV_CPU_AVX_512VL 21
|
||||
|
||||
#define CV_CPU_NEON 100
|
||||
#define CV_CPU_NEON 100
|
||||
|
||||
#define CV_CPU_VSX 200
|
||||
#define CV_CPU_VSX 200
|
||||
#define CV_CPU_VSX3 201
|
||||
|
||||
// CPU features groups
|
||||
#define CV_CPU_AVX512_SKX 256
|
||||
@ -280,6 +281,7 @@ enum CpuFeatures {
|
||||
CPU_NEON = 100,
|
||||
|
||||
CPU_VSX = 200,
|
||||
CPU_VSX3 = 201,
|
||||
|
||||
CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
|
||||
|
||||
|
@ -107,15 +107,14 @@ void* allocSingletonBuffer(size_t size) { return fastMalloc(size); }
|
||||
# include <cpu-features.h>
|
||||
#endif
|
||||
|
||||
#ifndef __VSX__
|
||||
# if defined __PPC64__ && defined __linux__
|
||||
# include "sys/auxv.h"
|
||||
# ifndef AT_HWCAP2
|
||||
# define AT_HWCAP2 26
|
||||
# endif
|
||||
# ifndef PPC_FEATURE2_ARCH_2_07
|
||||
# define PPC_FEATURE2_ARCH_2_07 0x80000000
|
||||
# endif
|
||||
|
||||
#if CV_VSX && defined __linux__
|
||||
# include "sys/auxv.h"
|
||||
# ifndef AT_HWCAP2
|
||||
# define AT_HWCAP2 26
|
||||
# endif
|
||||
# ifndef PPC_FEATURE2_ARCH_3_00
|
||||
# define PPC_FEATURE2_ARCH_3_00 0x00800000
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@ -359,6 +358,7 @@ struct HWFeatures
|
||||
g_hwFeatureNames[CPU_NEON] = "NEON";
|
||||
|
||||
g_hwFeatureNames[CPU_VSX] = "VSX";
|
||||
g_hwFeatureNames[CPU_VSX3] = "VSX3";
|
||||
|
||||
g_hwFeatureNames[CPU_AVX512_SKX] = "AVX512-SKX";
|
||||
}
|
||||
@ -513,14 +513,14 @@ struct HWFeatures
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __VSX__
|
||||
have[CV_CPU_VSX] = true;
|
||||
#elif (defined __PPC64__ && defined __linux__)
|
||||
uint64 hwcaps = getauxval(AT_HWCAP);
|
||||
// there's no need to check VSX availability in runtime since it's always available on ppc64le CPUs
|
||||
have[CV_CPU_VSX] = (CV_VSX);
|
||||
// TODO: Check VSX3 availability in runtime for other platforms
|
||||
#if CV_VSX && defined __linux__
|
||||
uint64 hwcap2 = getauxval(AT_HWCAP2);
|
||||
have[CV_CPU_VSX] = (hwcaps & PPC_FEATURE_PPC_LE && hwcaps & PPC_FEATURE_HAS_VSX && hwcap2 & PPC_FEATURE2_ARCH_2_07);
|
||||
have[CV_CPU_VSX3] = (hwcap2 & PPC_FEATURE2_ARCH_3_00);
|
||||
#else
|
||||
have[CV_CPU_VSX] = false;
|
||||
have[CV_CPU_VSX3] = (CV_VSX3);
|
||||
#endif
|
||||
|
||||
int baseline_features[] = { CV_CPU_BASELINE_FEATURES };
|
||||
|
Loading…
Reference in New Issue
Block a user