diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index d0c28ab0fb..a2c118db43 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -369,3 +369,18 @@ if(MSVC) ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4589) # Constructor of abstract class 'cv::ORB' ignores initializer for virtual base class 'cv::Algorithm' endif() endif() + +if(NOT OPENCV_FP16_DISABLE) + try_compile(__VALID_FP16 + "${OpenCV_BINARY_DIR}" + "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" + COMPILE_DEFINITIONS "-DCHECK_FP16" + OUTPUT_VARIABLE TRY_OUT + ) + if(NOT __VALID_FP16) + message(STATUS "FP16: Compiler support is not available") + else() + message(STATUS "FP16: Compiler support is available") + set(HAVE_FP16 1) + endif() +endif() diff --git a/cmake/checks/fp16.cpp b/cmake/checks/fp16.cpp new file mode 100644 index 0000000000..c77c844834 --- /dev/null +++ b/cmake/checks/fp16.cpp @@ -0,0 +1,33 @@ +#include + +#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) +#include +int test() +{ + const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + short dst[8]; + __m128 v_src = _mm_load_ps(src); + __m128i v_dst = _mm_cvtps_ph(v_src, 0); + _mm_storel_epi64((__m128i*)dst, v_dst); + return (int)dst[0]; +} +#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__) +#include "arm_neon.h" +int test() +{ + const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + short dst[8]; + float32x4_t v_src = *(float32x4_t*)src; + float16x4_t v_dst = vcvt_f16_f32(v_src); + *(float16x4_t*)dst = v_dst; + return (int)dst[0]; +} +#else +#error "FP16 is not supported" +#endif + +int main() +{ + printf("%d\n", test()); + return 0; +} diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in index 2312742130..8dac1ed618 100644 --- a/cmake/templates/cvconfig.h.in +++ b/cmake/templates/cvconfig.h.in @@ -203,3 +203,6 @@ /* Lapack */ #cmakedefine HAVE_LAPACK + +/* FP16 */ +#cmakedefine HAVE_FP16 diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index 42e93118b7..5ff8ec4213 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -193,7 +193,7 @@ enum CpuFeatures { # endif # define CV_POPCNT 1 # endif -# if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) +# if defined HAVE_FP16 && (defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700)) # include # define CV_FP16 1 # endif @@ -219,7 +219,7 @@ enum CpuFeatures { #if (defined WIN32 || defined _WIN32) && defined(_M_ARM) # include -# include "arm_neon.h" +# include # define CV_NEON 1 # define CPU_HAS_NEON_FEATURE (true) #elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) @@ -227,8 +227,12 @@ enum CpuFeatures { # define CV_NEON 1 #endif -#if defined __GNUC__ && ((defined (__arm__) && (__ARM_FP & 0x2)) || defined(__aarch64__)) -# define CV_FP16 1 +#if defined(__ARM_NEON__) || defined(__aarch64__) +# include +#endif + +#if defined HAVE_FP16 && defined __GNUC__ +# define CV_FP16 1 #endif #if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index e5c5e43690..dff0f9bc64 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -4591,6 +4591,8 @@ cvtScaleHalf_( const float* src, size_t sstep, short* dst, size_t float16x4_t v_dst = vcvt_f16_f32(v_src); *(float16x4_t*)(dst + x) = v_dst; +#else +#error "Configuration error" #endif } #endif @@ -4643,6 +4645,8 @@ cvtScaleHalf_( const short* src, size_t sstep, float* dst, size_t float32x4_t v_dst = vcvt_f32_f16(v_src); *(float32x4_t*)(dst + x) = v_dst; +#else +#error "Configuration error" #endif } #endif diff --git a/modules/imgproc/src/thresh.cpp b/modules/imgproc/src/thresh.cpp index a63c59ef34..e133de3f29 100644 --- a/modules/imgproc/src/thresh.cpp +++ b/modules/imgproc/src/thresh.cpp @@ -43,6 +43,23 @@ #include "precomp.hpp" #include "opencl_kernels_imgproc.hpp" +#if CV_NEON && defined(__aarch64__) +#include +namespace cv { +// Workaround with missing definitions of vreinterpretq_u64_f64/vreinterpretq_f64_u64 +template static inline +uint64x2_t vreinterpretq_u64_f64(T a) +{ + return (uint64x2_t) a; +} +template static inline +float64x2_t vreinterpretq_f64_u64(T a) +{ + return (float64x2_t) a; +} +} // namespace cv +#endif + namespace cv {