mirror of
https://github.com/opencv/opencv.git
synced 2024-12-11 14:39:11 +08:00
0de26fd78e
Zlib-ng is zlib replacement with optimizations for "next generation" systems. Its optimization may benifits image library decode and encode speed such as libpng. In our tests, if using zlib-ng and libpng combination on a x86_64 machine with AVX2, the time of `imdecode` amd `imencode` will drop 20% approximately. This patch enables zlib-ng's optimization if `CV_DISABLE_OPTIMIZATION` is OFF. Since Zlib-ng can dispatch intrinsics on the fly, port work is much easier. Related discussion: https://github.com/opencv/opencv/issues/22573
36 lines
703 B
C
36 lines
703 B
C
#ifndef ARM_ACLE_INTRINS_H
|
|
#define ARM_ACLE_INTRINS_H
|
|
|
|
#include <stdint.h>
|
|
#ifdef _MSC_VER
|
|
# include <intrin.h>
|
|
#elif defined(HAVE_ARM_ACLE_H)
|
|
# include <arm_acle.h>
|
|
#endif
|
|
|
|
#ifdef ARM_ACLE
|
|
#if defined(__aarch64__)
|
|
# define Z_TARGET_CRC Z_TARGET("+crc")
|
|
#else
|
|
# define Z_TARGET_CRC
|
|
#endif
|
|
#endif
|
|
|
|
#ifdef ARM_SIMD
|
|
#ifdef _MSC_VER
|
|
typedef uint32_t uint16x2_t;
|
|
|
|
#define __uqsub16 _arm_uqsub16
|
|
#elif !defined(ARM_SIMD_INTRIN)
|
|
typedef uint32_t uint16x2_t;
|
|
|
|
static inline uint16x2_t __uqsub16(uint16x2_t __a, uint16x2_t __b) {
|
|
uint16x2_t __c;
|
|
__asm__ __volatile__("uqsub16 %0, %1, %2" : "=r" (__c) : "r"(__a), "r"(__b));
|
|
return __c;
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
#endif // include guard ARM_ACLE_INTRINS_H
|