mirror of
https://github.com/opencv/opencv.git
synced 2025-06-10 19:24:07 +08:00
check FP16 build condition correctly
* use __GNUC_MINOR__ in correct place to check the version of GCC * check processor support of FP16 at run time * check compiler support of FP16 and pass correct compiler option * rely on ENABLE_AVX on gcc since AVX is generated when mf16c is passed * guard correctly using ifdef in case of various configuration * use v_float16x4 correctly by including the right header file
This commit is contained in:
parent
d5c202e46b
commit
c7cb116dc0
@ -151,7 +151,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
|
|||||||
add_extra_compiler_option("-mfp16-format=ieee")
|
add_extra_compiler_option("-mfp16-format=ieee")
|
||||||
endif(ARM)
|
endif(ARM)
|
||||||
if(ENABLE_NEON)
|
if(ENABLE_NEON)
|
||||||
add_extra_compiler_option("-mfpu=neon-fp16")
|
add_extra_compiler_option("-mfpu=neon")
|
||||||
endif()
|
endif()
|
||||||
if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
|
if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
|
||||||
add_extra_compiler_option("-mfpu=vfpv3")
|
add_extra_compiler_option("-mfpu=vfpv3")
|
||||||
@ -336,6 +336,34 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399)
|
|||||||
add_extra_compiler_option(-fvisibility-inlines-hidden)
|
add_extra_compiler_option(-fvisibility-inlines-hidden)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(NOT OPENCV_FP16_DISABLE)
|
||||||
|
if(ARM AND ENABLE_NEON)
|
||||||
|
set(FP16_OPTION "-mfpu=neon-fp16")
|
||||||
|
elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX)
|
||||||
|
set(FP16_OPTION "-mf16c")
|
||||||
|
endif()
|
||||||
|
try_compile(__VALID_FP16
|
||||||
|
"${OpenCV_BINARY_DIR}"
|
||||||
|
"${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
|
||||||
|
COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}"
|
||||||
|
OUTPUT_VARIABLE TRY_OUT
|
||||||
|
)
|
||||||
|
if(NOT __VALID_FP16)
|
||||||
|
if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX)
|
||||||
|
# GCC enables AVX when mf16c is passed
|
||||||
|
message(STATUS "FP16: Feature disabled")
|
||||||
|
else()
|
||||||
|
message(STATUS "FP16: Compiler support is not available")
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
message(STATUS "FP16: Compiler support is available")
|
||||||
|
set(HAVE_FP16 1)
|
||||||
|
if(NOT ${FP16_OPTION} STREQUAL "")
|
||||||
|
add_extra_compiler_option(${FP16_OPTION})
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
#combine all "extra" options
|
#combine all "extra" options
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}")
|
||||||
@ -376,21 +404,6 @@ if(MSVC)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(NOT OPENCV_FP16_DISABLE)
|
|
||||||
try_compile(__VALID_FP16
|
|
||||||
"${OpenCV_BINARY_DIR}"
|
|
||||||
"${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
|
|
||||||
COMPILE_DEFINITIONS "-DCHECK_FP16"
|
|
||||||
OUTPUT_VARIABLE TRY_OUT
|
|
||||||
)
|
|
||||||
if(NOT __VALID_FP16)
|
|
||||||
message(STATUS "FP16: Compiler support is not available")
|
|
||||||
else()
|
|
||||||
message(STATUS "FP16: Compiler support is available")
|
|
||||||
set(HAVE_FP16 1)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/usr/local/lib")
|
if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/usr/local/lib")
|
||||||
link_directories("/usr/local/lib")
|
link_directories("/usr/local/lib")
|
||||||
endif()
|
endif()
|
||||||
|
@ -310,7 +310,7 @@ enum CpuFeatures {
|
|||||||
typedef union Cv16suf
|
typedef union Cv16suf
|
||||||
{
|
{
|
||||||
short i;
|
short i;
|
||||||
#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
|
#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) )
|
||||||
__fp16 h;
|
__fp16 h;
|
||||||
#endif
|
#endif
|
||||||
struct _fp16Format
|
struct _fp16Format
|
||||||
|
@ -44,6 +44,7 @@
|
|||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
|
||||||
#include "opencl_kernels_core.hpp"
|
#include "opencl_kernels_core.hpp"
|
||||||
|
#include "opencv2/core/hal/intrin.hpp"
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
#undef CV_NEON
|
#undef CV_NEON
|
||||||
@ -4379,7 +4380,7 @@ struct Cvt_SIMD<float, int>
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) )
|
#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) ) )
|
||||||
// const numbers for floating points format
|
// const numbers for floating points format
|
||||||
const unsigned int kShiftSignificand = 13;
|
const unsigned int kShiftSignificand = 13;
|
||||||
const unsigned int kMaskFp16Significand = 0x3ff;
|
const unsigned int kMaskFp16Significand = 0x3ff;
|
||||||
@ -4387,7 +4388,7 @@ const unsigned int kBiasFp16Exponent = 15;
|
|||||||
const unsigned int kBiasFp32Exponent = 127;
|
const unsigned int kBiasFp32Exponent = 127;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
|
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) )
|
||||||
static float convertFp16SW(short fp16)
|
static float convertFp16SW(short fp16)
|
||||||
{
|
{
|
||||||
// Fp16 -> Fp32
|
// Fp16 -> Fp32
|
||||||
@ -4449,7 +4450,7 @@ static float convertFp16SW(short fp16)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) )
|
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) )
|
||||||
static short convertFp16SW(float fp32)
|
static short convertFp16SW(float fp32)
|
||||||
{
|
{
|
||||||
// Fp32 -> Fp16
|
// Fp32 -> Fp16
|
||||||
@ -4557,7 +4558,7 @@ cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t
|
|||||||
if ( ( (intptr_t)dst & 0xf ) == 0 )
|
if ( ( (intptr_t)dst & 0xf ) == 0 )
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#if CV_FP16
|
#if CV_FP16 && CV_SIMD128
|
||||||
for ( ; x <= size.width - 4; x += 4)
|
for ( ; x <= size.width - 4; x += 4)
|
||||||
{
|
{
|
||||||
v_float32x4 v_src = v_load(src + x);
|
v_float32x4 v_src = v_load(src + x);
|
||||||
@ -4603,7 +4604,7 @@ cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t
|
|||||||
if ( ( (intptr_t)src & 0xf ) == 0 )
|
if ( ( (intptr_t)src & 0xf ) == 0 )
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#if CV_FP16
|
#if CV_FP16 && CV_SIMD128
|
||||||
for ( ; x <= size.width - 4; x += 4)
|
for ( ; x <= size.width - 4; x += 4)
|
||||||
{
|
{
|
||||||
v_float16x4 v_src = v_load_f16(src + x);
|
v_float16x4 v_src = v_load_f16(src + x);
|
||||||
|
@ -711,12 +711,14 @@ template<typename R> struct TheTest
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if CV_FP16
|
|
||||||
TheTest & test_loadstore_fp16()
|
TheTest & test_loadstore_fp16()
|
||||||
{
|
{
|
||||||
|
#if CV_FP16
|
||||||
AlignedData<R> data;
|
AlignedData<R> data;
|
||||||
AlignedData<R> out;
|
AlignedData<R> out;
|
||||||
|
|
||||||
|
if(checkHardwareSupport(CV_CPU_FP16))
|
||||||
|
{
|
||||||
// check if addresses are aligned and unaligned respectively
|
// check if addresses are aligned and unaligned respectively
|
||||||
EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
|
EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
|
||||||
EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
|
EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
|
||||||
@ -735,24 +737,30 @@ template<typename R> struct TheTest
|
|||||||
out.a.clear();
|
out.a.clear();
|
||||||
v_store_f16(out.a.d, r1);
|
v_store_f16(out.a.d, r1);
|
||||||
EXPECT_EQ(data.a, out.a);
|
EXPECT_EQ(data.a, out.a);
|
||||||
|
}
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
TheTest & test_float_cvt_fp16()
|
TheTest & test_float_cvt_fp16()
|
||||||
{
|
{
|
||||||
|
#if CV_FP16
|
||||||
AlignedData<v_float32x4> data;
|
AlignedData<v_float32x4> data;
|
||||||
|
|
||||||
|
if(checkHardwareSupport(CV_CPU_FP16))
|
||||||
|
{
|
||||||
// check conversion
|
// check conversion
|
||||||
v_float32x4 r1 = v_load(data.a.d);
|
v_float32x4 r1 = v_load(data.a.d);
|
||||||
v_float16x4 r2 = v_cvt_f16(r1);
|
v_float16x4 r2 = v_cvt_f16(r1);
|
||||||
v_float32x4 r3 = v_cvt_f32(r2);
|
v_float32x4 r3 = v_cvt_f32(r2);
|
||||||
EXPECT_EQ(0x3c00, r2.get0());
|
EXPECT_EQ(0x3c00, r2.get0());
|
||||||
EXPECT_EQ(r3.get0(), r1.get0());
|
EXPECT_EQ(r3.get0(), r1.get0());
|
||||||
|
}
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user