opencv/cmake/checks/cpu_fp16.cpp

#include <stdio.h>

#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700 && defined __AVX__) || (defined __INTEL_COMPILER && defined __AVX__)
#include <immintrin.h>
int test()
{
    const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
    short dst[8];
    __m128 v_src = _mm_load_ps(src);
    __m128i v_dst = _mm_cvtps_ph(v_src, 0);
    _mm_storel_epi64((__m128i*)dst, v_dst);
    return (int)dst[0];
}
#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
#include "arm_neon.h"
int test()
{
    const float src[] = { 0.0f, 1.0f, 2.0f, 3.0f };
    short dst[4];
    float32x4_t v_src = vld1q_f32(src);
    float16x4_t v_dst = vcvt_f16_f32(v_src);
    vst1_f16((__fp16*)dst, v_dst);
    return dst[0] + dst[1] + dst[2] + dst[3];
}
#else
#error "FP16 is not supported"
#endif

int main()
{
  printf("%d\n", test());
  return 0;
}
fix android pack build 2016-07-18 22:45:16 +08:00			`#include <stdio.h>`

cmake: fix CPU_BASELINE_FINAL filling - remove duplicates - restore "always on" missing entries - fix FP16 detection on MSVC 2018-04-26 21:03:45 +08:00			`#if defined __F16C__ \|\| (defined _MSC_VER && _MSC_VER >= 1700 && defined __AVX__) \|\| (defined __INTEL_COMPILER && defined __AVX__)`
fix android pack build 2016-07-18 22:45:16 +08:00			`#include <immintrin.h>`
			`int test()`
			`{`
			`const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };`
			`short dst[8];`
			`__m128 v_src = _mm_load_ps(src);`
			`__m128i v_dst = _mm_cvtps_ph(v_src, 0);`
			`_mm_storel_epi64((__m128i*)dst, v_dst);`
			`return (int)dst[0];`
			`}`
			`#elif defined __GNUC__ && (defined __arm__ \|\| defined __aarch64__)`
			`#include "arm_neon.h"`
			`int test()`
			`{`
DNN: add the Winograd fp16 support (#23654) * add Winograd FP16 implementation * fixed dispatching of FP16 code paths in dnn; use dynamic dispatcher only when NEON_FP16 is enabled in the build and the feature is present in the host CPU at runtime * fixed some warnings * hopefully fixed winograd on x64 (and maybe other platforms) --------- Co-authored-by: Vadim Pisarevsky <vadim.pisarevsky@gmail.com> 2023-11-20 18:45:37 +08:00			`const float src[] = { 0.0f, 1.0f, 2.0f, 3.0f };`
			`short dst[4];`
			`float32x4_t v_src = vld1q_f32(src);`
fix android pack build 2016-07-18 22:45:16 +08:00			`float16x4_t v_dst = vcvt_f16_f32(v_src);`
DNN: add the Winograd fp16 support (#23654) * add Winograd FP16 implementation * fixed dispatching of FP16 code paths in dnn; use dynamic dispatcher only when NEON_FP16 is enabled in the build and the feature is present in the host CPU at runtime * fixed some warnings * hopefully fixed winograd on x64 (and maybe other platforms) --------- Co-authored-by: Vadim Pisarevsky <vadim.pisarevsky@gmail.com> 2023-11-20 18:45:37 +08:00			`vst1_f16((__fp16*)dst, v_dst);`
			`return dst[0] + dst[1] + dst[2] + dst[3];`
fix android pack build 2016-07-18 22:45:16 +08:00			`}`
			`#else`
			`#error "FP16 is not supported"`
			`#endif`

			`int main()`
			`{`
			`printf("%d\n", test());`
			`return 0;`
			`}`