mirror of
https://github.com/opencv/opencv.git
synced 2025-08-06 06:26:29 +08:00
Merge pull request #21630 from shibayan:arm64-msvc-neon
* Added NEON support in builds for Windows on ARM * Fixed `HAVE_CPU_NEON_SUPPORT` display broken during compiler test * Fixed a build error prior to Visual Studio 2022
This commit is contained in:
parent
119d8b3aca
commit
d354ad1c34
@ -314,6 +314,10 @@ if(MSVC)
|
||||
set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} /FS")
|
||||
set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} /FS")
|
||||
endif()
|
||||
|
||||
if(AARCH64 AND NOT MSVC_VERSION LESS 1930)
|
||||
set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /D _ARM64_DISTINCT_NEON_TYPES")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(PROJECT_NAME STREQUAL "OpenCV")
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined _WIN32 && (defined(_M_ARM) || defined(_M_ARM64))
|
||||
# define _ARM64_DISTINCT_NEON_TYPES
|
||||
# include <Intrin.h>
|
||||
# include <arm_neon.h>
|
||||
# define CV_NEON 1
|
||||
|
@ -591,28 +591,26 @@ inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
|
||||
|
||||
inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b)
|
||||
{
|
||||
#if CV_NEON_AARCH64
|
||||
int32x4_t c = vmull_high_s16(a.val, b.val);
|
||||
#else // #if CV_NEON_AARCH64
|
||||
int32x4_t c = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
|
||||
#endif // #if CV_NEON_AARCH64
|
||||
return v_int16x8(vcombine_s16(
|
||||
vshrn_n_s32(vmull_s16( vget_low_s16(a.val), vget_low_s16(b.val)), 16),
|
||||
vshrn_n_s32(
|
||||
#if CV_NEON_AARCH64
|
||||
vmull_high_s16(a.val, b.val)
|
||||
#else // #if CV_NEON_AARCH64
|
||||
vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val))
|
||||
#endif // #if CV_NEON_AARCH64
|
||||
, 16)
|
||||
vshrn_n_s32(c, 16)
|
||||
));
|
||||
}
|
||||
inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b)
|
||||
{
|
||||
#if CV_NEON_AARCH64
|
||||
uint32x4_t c = vmull_high_u16(a.val, b.val);
|
||||
#else // #if CV_NEON_AARCH64
|
||||
uint32x4_t c = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val));
|
||||
#endif // #if CV_NEON_AARCH64
|
||||
return v_uint16x8(vcombine_u16(
|
||||
vshrn_n_u32(vmull_u16( vget_low_u16(a.val), vget_low_u16(b.val)), 16),
|
||||
vshrn_n_u32(
|
||||
#if CV_NEON_AARCH64
|
||||
vmull_high_u16(a.val, b.val)
|
||||
#else // #if CV_NEON_AARCH64
|
||||
vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val))
|
||||
#endif // #if CV_NEON_AARCH64
|
||||
, 16)
|
||||
vshrn_n_u32(c, 16)
|
||||
));
|
||||
}
|
||||
|
||||
@ -1937,10 +1935,14 @@ inline v_int32x4 v_round(const v_float32x4& a)
|
||||
{
|
||||
float32x4_t a_ = a.val;
|
||||
int32x4_t result;
|
||||
#if defined _MSC_VER
|
||||
result = vcvtnq_s32_f32(a_);
|
||||
#else
|
||||
__asm__ ("fcvtns %0.4s, %1.4s"
|
||||
: "=w"(result)
|
||||
: "w"(a_)
|
||||
: /* No clobbers */);
|
||||
#endif
|
||||
return v_int32x4(result);
|
||||
}
|
||||
#else
|
||||
|
@ -615,6 +615,9 @@ struct HWFeatures
|
||||
#if defined _ARM_ && (defined(_WIN32_WCE) && _WIN32_WCE >= 0x800)
|
||||
have[CV_CPU_NEON] = true;
|
||||
#endif
|
||||
#if defined _M_ARM64
|
||||
have[CV_CPU_NEON] = true;
|
||||
#endif
|
||||
#ifdef __riscv_vector
|
||||
have[CV_CPU_RISCVV] = true;
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user