mirror of
https://github.com/opencv/opencv.git
synced 2024-12-17 10:58:00 +08:00
0de26fd78e
Zlib-ng is zlib replacement with optimizations for "next generation" systems. Its optimization may benifits image library decode and encode speed such as libpng. In our tests, if using zlib-ng and libpng combination on a x86_64 machine with AVX2, the time of `imdecode` amd `imencode` will drop 20% approximately. This patch enables zlib-ng's optimization if `CV_DISABLE_OPTIMIZATION` is OFF. Since Zlib-ng can dispatch intrinsics on the fly, port work is much easier. Related discussion: https://github.com/opencv/opencv/issues/22573
51 lines
1.8 KiB
C
51 lines
1.8 KiB
C
#ifndef FALLBACK_BUILTINS_H
|
|
#define FALLBACK_BUILTINS_H
|
|
|
|
#if defined(_MSC_VER) && !defined(__clang__)
|
|
#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
|
|
|
#include <intrin.h>
|
|
#ifdef X86_FEATURES
|
|
# include "arch/x86/x86_features.h"
|
|
#endif
|
|
|
|
/* This is not a general purpose replacement for __builtin_ctz. The function expects that value is != 0.
|
|
* Because of that assumption trailing_zero is not initialized and the return value is not checked.
|
|
* Tzcnt and bsf give identical results except when input value is 0, therefore this can not be allowed.
|
|
* If tzcnt instruction is not supported, the cpu will itself execute bsf instead.
|
|
* Performance tzcnt/bsf is identical on Intel cpu, tzcnt is faster than bsf on AMD cpu.
|
|
*/
|
|
static __forceinline int __builtin_ctz(unsigned int value) {
|
|
Assert(value != 0, "Invalid input value: 0");
|
|
# if defined(X86_FEATURES) && !(_MSC_VER < 1700)
|
|
return (int)_tzcnt_u32(value);
|
|
# else
|
|
unsigned long trailing_zero;
|
|
_BitScanForward(&trailing_zero, value);
|
|
return (int)trailing_zero;
|
|
# endif
|
|
}
|
|
#define HAVE_BUILTIN_CTZ
|
|
|
|
#ifdef _M_AMD64
|
|
/* This is not a general purpose replacement for __builtin_ctzll. The function expects that value is != 0.
|
|
* Because of that assumption trailing_zero is not initialized and the return value is not checked.
|
|
*/
|
|
static __forceinline int __builtin_ctzll(unsigned long long value) {
|
|
Assert(value != 0, "Invalid input value: 0");
|
|
# if defined(X86_FEATURES) && !(_MSC_VER < 1700)
|
|
return (int)_tzcnt_u64(value);
|
|
# else
|
|
unsigned long trailing_zero;
|
|
_BitScanForward64(&trailing_zero, value);
|
|
return (int)trailing_zero;
|
|
# endif
|
|
}
|
|
#define HAVE_BUILTIN_CTZLL
|
|
#endif // Microsoft AMD64
|
|
|
|
#endif // Microsoft AMD64/IA64/x86/ARM/ARM64 test
|
|
#endif // _MSC_VER & !clang
|
|
|
|
#endif // include guard FALLBACK_BUILTINS_H
|