mirror of
https://github.com/opencv/opencv.git
synced 2024-12-27 03:14:05 +08:00
0de26fd78e
Zlib-ng is zlib replacement with optimizations for "next generation" systems. Its optimization may benifits image library decode and encode speed such as libpng. In our tests, if using zlib-ng and libpng combination on a x86_64 machine with AVX2, the time of `imdecode` amd `imencode` will drop 20% approximately. This patch enables zlib-ng's optimization if `CV_DISABLE_OPTIMIZATION` is OFF. Since Zlib-ng can dispatch intrinsics on the fly, port work is much easier. Related discussion: https://github.com/opencv/opencv/issues/22573
101 lines
2.9 KiB
C
101 lines
2.9 KiB
C
#include "../../zbuild.h"
|
|
#include "arm_features.h"
|
|
|
|
#if defined(__linux__) && defined(HAVE_SYS_AUXV_H)
|
|
# include <sys/auxv.h>
|
|
# ifdef ARM_ASM_HWCAP
|
|
# include <asm/hwcap.h>
|
|
# endif
|
|
#elif defined(__FreeBSD__) && defined(__aarch64__)
|
|
# include <machine/armreg.h>
|
|
# ifndef ID_AA64ISAR0_CRC32_VAL
|
|
# define ID_AA64ISAR0_CRC32_VAL ID_AA64ISAR0_CRC32
|
|
# endif
|
|
#elif defined(__APPLE__)
|
|
# if !defined(_DARWIN_C_SOURCE)
|
|
# define _DARWIN_C_SOURCE /* enable types aliases (eg u_int) */
|
|
# endif
|
|
# include <sys/sysctl.h>
|
|
#elif defined(_WIN32)
|
|
# include <windows.h>
|
|
#endif
|
|
|
|
static int arm_has_crc32() {
|
|
#if defined(__linux__) && defined(ARM_AUXV_HAS_CRC32)
|
|
# ifdef HWCAP_CRC32
|
|
return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0 ? 1 : 0;
|
|
# else
|
|
return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0 ? 1 : 0;
|
|
# endif
|
|
#elif defined(__FreeBSD__) && defined(__aarch64__)
|
|
return getenv("QEMU_EMULATING") == NULL
|
|
&& ID_AA64ISAR0_CRC32_VAL(READ_SPECIALREG(id_aa64isar0_el1)) >= ID_AA64ISAR0_CRC32_BASE;
|
|
#elif defined(__APPLE__)
|
|
int hascrc32;
|
|
size_t size = sizeof(hascrc32);
|
|
return sysctlbyname("hw.optional.armv8_crc32", &hascrc32, &size, NULL, 0) == 0
|
|
&& hascrc32 == 1;
|
|
#elif defined(_WIN32)
|
|
return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
|
|
#elif defined(ARM_NOCHECK_ACLE)
|
|
return 1;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
/* AArch64 has neon. */
|
|
#if !defined(__aarch64__) && !defined(_M_ARM64) && !defined(_M_ARM64EC)
|
|
static inline int arm_has_neon() {
|
|
#if defined(__linux__) && defined(ARM_AUXV_HAS_NEON)
|
|
# ifdef HWCAP_ARM_NEON
|
|
return (getauxval(AT_HWCAP) & HWCAP_ARM_NEON) != 0 ? 1 : 0;
|
|
# else
|
|
return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0 ? 1 : 0;
|
|
# endif
|
|
#elif defined(__APPLE__)
|
|
int hasneon;
|
|
size_t size = sizeof(hasneon);
|
|
return sysctlbyname("hw.optional.neon", &hasneon, &size, NULL, 0) == 0
|
|
&& hasneon == 1;
|
|
#elif defined(_M_ARM) && defined(WINAPI_FAMILY_PARTITION)
|
|
# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
|
|
return 1; /* Always supported */
|
|
# endif
|
|
#endif
|
|
|
|
#if defined(ARM_NOCHECK_NEON)
|
|
return 1;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
/* AArch64 does not have ARMv6 SIMD. */
|
|
#if !defined(__aarch64__) && !defined(_M_ARM64) && !defined(_M_ARM64EC)
|
|
static inline int arm_has_simd() {
|
|
#if defined(__linux__) && defined(HAVE_SYS_AUXV_H)
|
|
const char *platform = (const char *)getauxval(AT_PLATFORM);
|
|
return strncmp(platform, "v6l", 3) == 0
|
|
|| strncmp(platform, "v7l", 3) == 0
|
|
|| strncmp(platform, "v8l", 3) == 0;
|
|
#elif defined(ARM_NOCHECK_SIMD)
|
|
return 1;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
void Z_INTERNAL arm_check_features(struct arm_cpu_features *features) {
|
|
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
|
features->has_simd = 0; /* never available */
|
|
features->has_neon = 1; /* always available */
|
|
#else
|
|
features->has_simd = arm_has_simd();
|
|
features->has_neon = arm_has_neon();
|
|
#endif
|
|
features->has_crc32 = arm_has_crc32();
|
|
}
|