mirror of
https://github.com/opencv/opencv.git
synced 2024-11-23 18:50:21 +08:00
Merge pull request #26113 from FantasqueX:zlib-ng-2-2-1
Update zlib-ng to 2.2.1 #26113 Release: https://github.com/zlib-ng/zlib-ng/releases/tag/2.2.1 ARM diagnostics patch: https://github.com/zlib-ng/zlib-ng/pull/1774 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
7de3a8e960
commit
85923c8f30
1714
3rdparty/zlib-ng/CMakeLists.txt
vendored
1714
3rdparty/zlib-ng/CMakeLists.txt
vendored
File diff suppressed because it is too large
Load Diff
2
3rdparty/zlib-ng/LICENSE.md
vendored
2
3rdparty/zlib-ng/LICENSE.md
vendored
@ -1,4 +1,4 @@
|
||||
(C) 1995-2013 Jean-loup Gailly and Mark Adler
|
||||
(C) 1995-2024 Jean-loup Gailly and Mark Adler
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
|
30
3rdparty/zlib-ng/README.md
vendored
30
3rdparty/zlib-ng/README.md
vendored
@ -21,7 +21,6 @@ Features
|
||||
* Support for CPU intrinsics when available
|
||||
* Adler32 implementation using SSSE3, AVX2, AVX512, AVX512-VNNI, Neon, VMX & VSX
|
||||
* CRC32-B implementation using PCLMULQDQ, VPCLMULQDQ, ACLE, & IBM Z
|
||||
* Hash table implementation using CRC32-C intrinsics on x86 and ARM
|
||||
* Slide hash implementations using SSE2, AVX2, ARMv6, Neon, VMX & VSX
|
||||
* Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV
|
||||
* Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX
|
||||
@ -95,20 +94,21 @@ make test
|
||||
Build Options
|
||||
-------------
|
||||
|
||||
| CMake | configure | Description | Default |
|
||||
|:-------------------------|:-------------------------|:--------------------------------------------------------------------------------------|---------|
|
||||
| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF |
|
||||
| ZLIB_ENABLE_TESTS | | Build test binaries | ON |
|
||||
| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON |
|
||||
| WITH_OPTIM | --without-optimizations | Build with optimisations | ON |
|
||||
| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON |
|
||||
| WITH_NATIVE_INSTRUCTIONS | | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF |
|
||||
| WITH_SANITIZER | | Build with sanitizer (memory, address, undefined) | OFF |
|
||||
| WITH_GTEST | | Build gtest_zlib | ON |
|
||||
| WITH_FUZZERS | | Build test/fuzz | OFF |
|
||||
| WITH_BENCHMARKS | | Build test/benchmarks | OFF |
|
||||
| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF |
|
||||
| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF |
|
||||
| CMake | configure | Description | Default |
|
||||
|:---------------------------|:-------------------------|:------------------------------------------------------------------------------------|---------|
|
||||
| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF |
|
||||
| ZLIB_ENABLE_TESTS | | Build test binaries | ON |
|
||||
| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON |
|
||||
| WITH_OPTIM | --without-optimizations | Build with optimisations | ON |
|
||||
| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON |
|
||||
| WITH_NATIVE_INSTRUCTIONS | | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF |
|
||||
| WITH_RUNTIME_CPU_DETECTION | | Compiles with runtime CPU detection | ON |
|
||||
| WITH_SANITIZER | | Build with sanitizer (memory, address, undefined) | OFF |
|
||||
| WITH_GTEST | | Build gtest_zlib | ON |
|
||||
| WITH_FUZZERS | | Build test/fuzz | OFF |
|
||||
| WITH_BENCHMARKS | | Build test/benchmarks | OFF |
|
||||
| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF |
|
||||
| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF |
|
||||
|
||||
|
||||
Install
|
||||
|
54
3rdparty/zlib-ng/adler32.c
vendored
54
3rdparty/zlib-ng/adler32.c
vendored
@ -7,70 +7,24 @@
|
||||
#include "functable.h"
|
||||
#include "adler32_p.h"
|
||||
|
||||
/* ========================================================================= */
|
||||
Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) {
|
||||
uint32_t sum2;
|
||||
unsigned n;
|
||||
|
||||
/* split Adler-32 into component sums */
|
||||
sum2 = (adler >> 16) & 0xffff;
|
||||
adler &= 0xffff;
|
||||
|
||||
/* in case user likes doing a byte at a time, keep it fast */
|
||||
if (UNLIKELY(len == 1))
|
||||
return adler32_len_1(adler, buf, sum2);
|
||||
|
||||
/* initial Adler-32 value (deferred check for len == 1 speed) */
|
||||
if (UNLIKELY(buf == NULL))
|
||||
return 1L;
|
||||
|
||||
/* in case short lengths are provided, keep it somewhat fast */
|
||||
if (UNLIKELY(len < 16))
|
||||
return adler32_len_16(adler, buf, len, sum2);
|
||||
|
||||
/* do length NMAX blocks -- requires just one modulo operation */
|
||||
while (len >= NMAX) {
|
||||
len -= NMAX;
|
||||
#ifdef UNROLL_MORE
|
||||
n = NMAX / 16; /* NMAX is divisible by 16 */
|
||||
#else
|
||||
n = NMAX / 8; /* NMAX is divisible by 8 */
|
||||
#endif
|
||||
do {
|
||||
#ifdef UNROLL_MORE
|
||||
DO16(adler, sum2, buf); /* 16 sums unrolled */
|
||||
buf += 16;
|
||||
#else
|
||||
DO8(adler, sum2, buf, 0); /* 8 sums unrolled */
|
||||
buf += 8;
|
||||
#endif
|
||||
} while (--n);
|
||||
adler %= BASE;
|
||||
sum2 %= BASE;
|
||||
}
|
||||
|
||||
/* do remaining bytes (less than NMAX, still just one modulo) */
|
||||
return adler32_len_64(adler, buf, len, sum2);
|
||||
}
|
||||
|
||||
#ifdef ZLIB_COMPAT
|
||||
unsigned long Z_EXPORT PREFIX(adler32_z)(unsigned long adler, const unsigned char *buf, size_t len) {
|
||||
return (unsigned long)functable.adler32((uint32_t)adler, buf, len);
|
||||
return (unsigned long)FUNCTABLE_CALL(adler32)((uint32_t)adler, buf, len);
|
||||
}
|
||||
#else
|
||||
uint32_t Z_EXPORT PREFIX(adler32_z)(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
return functable.adler32(adler, buf, len);
|
||||
return FUNCTABLE_CALL(adler32)(adler, buf, len);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ========================================================================= */
|
||||
#ifdef ZLIB_COMPAT
|
||||
unsigned long Z_EXPORT PREFIX(adler32)(unsigned long adler, const unsigned char *buf, unsigned int len) {
|
||||
return (unsigned long)functable.adler32((uint32_t)adler, buf, len);
|
||||
return (unsigned long)FUNCTABLE_CALL(adler32)((uint32_t)adler, buf, len);
|
||||
}
|
||||
#else
|
||||
uint32_t Z_EXPORT PREFIX(adler32)(uint32_t adler, const unsigned char *buf, uint32_t len) {
|
||||
return functable.adler32(adler, buf, len);
|
||||
return FUNCTABLE_CALL(adler32)(adler, buf, len);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
11
3rdparty/zlib-ng/adler32_fold.h
vendored
11
3rdparty/zlib-ng/adler32_fold.h
vendored
@ -1,11 +0,0 @@
|
||||
/* adler32_fold.h -- adler32 folding interface
|
||||
* Copyright (C) 2022 Adam Stylinski
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef ADLER32_FOLD_H_
|
||||
#define ADLER32_FOLD_H_
|
||||
|
||||
Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
|
||||
#endif
|
2
3rdparty/zlib-ng/arch/.gitignore
vendored
2
3rdparty/zlib-ng/arch/.gitignore
vendored
@ -1,2 +0,0 @@
|
||||
# ignore Makefiles; they're all automatically generated
|
||||
Makefile
|
7
3rdparty/zlib-ng/arch/arm/Makefile.in
vendored
7
3rdparty/zlib-ng/arch/arm/Makefile.in
vendored
@ -25,7 +25,6 @@ all: \
|
||||
crc32_acle.o crc32_acle.lo \
|
||||
slide_hash_neon.o slide_hash_neon.lo \
|
||||
slide_hash_armv6.o slide_hash_armv6.lo \
|
||||
insert_string_acle.o insert_string_acle.lo
|
||||
|
||||
adler32_neon.o:
|
||||
$(CC) $(CFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
|
||||
@ -69,12 +68,6 @@ slide_hash_armv6.o:
|
||||
slide_hash_armv6.lo:
|
||||
$(CC) $(SFLAGS) $(ARMV6FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_armv6.c
|
||||
|
||||
insert_string_acle.o:
|
||||
$(CC) $(CFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
|
||||
|
||||
insert_string_acle.lo:
|
||||
$(CC) $(SFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
|
||||
|
||||
mostlyclean: clean
|
||||
clean:
|
||||
rm -f *.o *.lo *~
|
||||
|
4
3rdparty/zlib-ng/arch/arm/adler32_neon.c
vendored
4
3rdparty/zlib-ng/arch/arm/adler32_neon.c
vendored
@ -7,8 +7,8 @@
|
||||
*/
|
||||
#ifdef ARM_NEON
|
||||
#include "neon_intrins.h"
|
||||
#include "../../zbuild.h"
|
||||
#include "../../adler32_p.h"
|
||||
#include "zbuild.h"
|
||||
#include "adler32_p.h"
|
||||
|
||||
static void NEON_accum32(uint32_t *s, const uint8_t *buf, size_t len) {
|
||||
static const uint16_t ALIGNED_(16) taps[64] = {
|
||||
|
17
3rdparty/zlib-ng/arch/arm/arm_features.c
vendored
17
3rdparty/zlib-ng/arch/arm/arm_features.c
vendored
@ -1,4 +1,4 @@
|
||||
#include "../../zbuild.h"
|
||||
#include "zbuild.h"
|
||||
#include "arm_features.h"
|
||||
|
||||
#if defined(__linux__) && defined(HAVE_SYS_AUXV_H)
|
||||
@ -11,6 +11,11 @@
|
||||
# ifndef ID_AA64ISAR0_CRC32_VAL
|
||||
# define ID_AA64ISAR0_CRC32_VAL ID_AA64ISAR0_CRC32
|
||||
# endif
|
||||
#elif defined(__OpenBSD__) && defined(__aarch64__)
|
||||
# include <machine/armreg.h>
|
||||
# include <machine/cpu.h>
|
||||
# include <sys/sysctl.h>
|
||||
# include <sys/types.h>
|
||||
#elif defined(__APPLE__)
|
||||
# if !defined(_DARWIN_C_SOURCE)
|
||||
# define _DARWIN_C_SOURCE /* enable types aliases (eg u_int) */
|
||||
@ -30,6 +35,16 @@ static int arm_has_crc32() {
|
||||
#elif defined(__FreeBSD__) && defined(__aarch64__)
|
||||
return getenv("QEMU_EMULATING") == NULL
|
||||
&& ID_AA64ISAR0_CRC32_VAL(READ_SPECIALREG(id_aa64isar0_el1)) >= ID_AA64ISAR0_CRC32_BASE;
|
||||
#elif defined(__OpenBSD__) && defined(__aarch64__)
|
||||
int hascrc32 = 0;
|
||||
int isar0_mib[] = { CTL_MACHDEP, CPU_ID_AA64ISAR0 };
|
||||
uint64_t isar0 = 0;
|
||||
size_t len = sizeof(isar0);
|
||||
if (sysctl(isar0_mib, 2, &isar0, &len, NULL, 0) != -1) {
|
||||
if (ID_AA64ISAR0_CRC32(isar0) >= ID_AA64ISAR0_CRC32_BASE)
|
||||
hascrc32 = 1;
|
||||
}
|
||||
return hascrc32;
|
||||
#elif defined(__APPLE__)
|
||||
int hascrc32;
|
||||
size_t size = sizeof(hascrc32);
|
||||
|
6
3rdparty/zlib-ng/arch/arm/arm_features.h
vendored
6
3rdparty/zlib-ng/arch/arm/arm_features.h
vendored
@ -2,8 +2,8 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef ARM_H_
|
||||
#define ARM_H_
|
||||
#ifndef ARM_FEATURES_H_
|
||||
#define ARM_FEATURES_H_
|
||||
|
||||
struct arm_cpu_features {
|
||||
int has_simd;
|
||||
@ -13,4 +13,4 @@ struct arm_cpu_features {
|
||||
|
||||
void Z_INTERNAL arm_check_features(struct arm_cpu_features *features);
|
||||
|
||||
#endif /* ARM_H_ */
|
||||
#endif /* ARM_FEATURES_H_ */
|
||||
|
65
3rdparty/zlib-ng/arch/arm/arm_functions.h
vendored
Normal file
65
3rdparty/zlib-ng/arch/arm/arm_functions.h
vendored
Normal file
@ -0,0 +1,65 @@
|
||||
/* arm_functions.h -- ARM implementations for arch-specific functions.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef ARM_FUNCTIONS_H_
|
||||
#define ARM_FUNCTIONS_H_
|
||||
|
||||
#ifdef ARM_NEON
|
||||
uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
uint32_t chunksize_neon(void);
|
||||
uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
|
||||
# ifdef HAVE_BUILTIN_CTZLL
|
||||
uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1);
|
||||
uint32_t longest_match_neon(deflate_state *const s, Pos cur_match);
|
||||
uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match);
|
||||
# endif
|
||||
void slide_hash_neon(deflate_state *s);
|
||||
void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
|
||||
#ifdef ARM_ACLE
|
||||
uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
|
||||
#ifdef ARM_SIMD
|
||||
void slide_hash_armv6(deflate_state *s);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
// ARM - SIMD
|
||||
# if (defined(ARM_SIMD) && defined(__ARM_FEATURE_SIMD32)) || defined(ARM_NOCHECK_SIMD)
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_armv6
|
||||
# endif
|
||||
// ARM - NEON
|
||||
# if (defined(ARM_NEON) && (defined(__ARM_NEON__) || defined(__ARM_NEON))) || ARM_NOCHECK_NEON
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_neon
|
||||
# undef native_chunkmemset_safe
|
||||
# define native_chunkmemset_safe chunkmemset_safe_neon
|
||||
# undef native_chunksize
|
||||
# define native_chunksize chunksize_neon
|
||||
# undef native_inflate_fast
|
||||
# define native_inflate_fast inflate_fast_neon
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_neon
|
||||
# ifdef HAVE_BUILTIN_CTZLL
|
||||
# undef native_compare256
|
||||
# define native_compare256 compare256_neon
|
||||
# undef native_longest_match
|
||||
# define native_longest_match longest_match_neon
|
||||
# undef native_longest_match_slow
|
||||
# define native_longest_match_slow longest_match_slow_neon
|
||||
# endif
|
||||
# endif
|
||||
// ARM - ACLE
|
||||
# if defined(ARM_ACLE) && defined(__ARM_ACLE) && defined(__ARM_FEATURE_CRC32)
|
||||
# undef native_crc32
|
||||
# define native_crc32 crc32_acle
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* ARM_FUNCTIONS_H_ */
|
4
3rdparty/zlib-ng/arch/arm/chunkset_neon.c
vendored
4
3rdparty/zlib-ng/arch/arm/chunkset_neon.c
vendored
@ -4,8 +4,8 @@
|
||||
|
||||
#ifdef ARM_NEON
|
||||
#include "neon_intrins.h"
|
||||
#include "../../zbuild.h"
|
||||
#include "../generic/chunk_permute_table.h"
|
||||
#include "zbuild.h"
|
||||
#include "arch/generic/chunk_permute_table.h"
|
||||
|
||||
typedef uint8x16_t chunk_t;
|
||||
|
||||
|
5
3rdparty/zlib-ng/arch/arm/compare256_neon.c
vendored
5
3rdparty/zlib-ng/arch/arm/compare256_neon.c
vendored
@ -3,8 +3,9 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "../../zbuild.h"
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil_p.h"
|
||||
#include "deflate.h"
|
||||
#include "fallback_builtins.h"
|
||||
|
||||
#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
|
||||
|
2
3rdparty/zlib-ng/arch/arm/crc32_acle.c
vendored
2
3rdparty/zlib-ng/arch/arm/crc32_acle.c
vendored
@ -7,7 +7,7 @@
|
||||
|
||||
#ifdef ARM_ACLE
|
||||
#include "acle_intrins.h"
|
||||
#include "../../zbuild.h"
|
||||
#include "zbuild.h"
|
||||
|
||||
Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len) {
|
||||
Z_REGISTER uint32_t c;
|
||||
|
24
3rdparty/zlib-ng/arch/arm/insert_string_acle.c
vendored
24
3rdparty/zlib-ng/arch/arm/insert_string_acle.c
vendored
@ -1,24 +0,0 @@
|
||||
/* insert_string_acle.c -- insert_string integer hash variant using ACLE's CRC instructions
|
||||
*
|
||||
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef ARM_ACLE
|
||||
#include "acle_intrins.h"
|
||||
#include "../../zbuild.h"
|
||||
#include "../../deflate.h"
|
||||
|
||||
#define HASH_CALC(s, h, val) \
|
||||
h = __crc32w(0, val)
|
||||
|
||||
#define HASH_CALC_VAR h
|
||||
#define HASH_CALC_VAR_INIT uint32_t h = 0
|
||||
|
||||
#define UPDATE_HASH Z_TARGET_CRC update_hash_acle
|
||||
#define INSERT_STRING Z_TARGET_CRC insert_string_acle
|
||||
#define QUICK_INSERT_STRING Z_TARGET_CRC quick_insert_string_acle
|
||||
|
||||
#include "../../insert_string_tpl.h"
|
||||
#endif
|
7
3rdparty/zlib-ng/arch/arm/neon_intrins.h
vendored
7
3rdparty/zlib-ng/arch/arm/neon_intrins.h
vendored
@ -25,6 +25,13 @@
|
||||
out.val[3] = vqsubq_u16(a.val[3], b); \
|
||||
} while (0)
|
||||
|
||||
# if defined(__clang__) && defined(__arm__) && defined(__ANDROID__)
|
||||
/* Clang for 32-bit Android has too strict alignment requirement (:256) for x4 NEON intrinsics */
|
||||
# undef ARM_NEON_HASLD4
|
||||
# undef vld1q_u16_x4
|
||||
# undef vld1q_u8_x4
|
||||
# undef vst1q_u16_x4
|
||||
# endif
|
||||
|
||||
# ifndef ARM_NEON_HASLD4
|
||||
|
||||
|
4
3rdparty/zlib-ng/arch/arm/slide_hash_armv6.c
vendored
4
3rdparty/zlib-ng/arch/arm/slide_hash_armv6.c
vendored
@ -5,8 +5,8 @@
|
||||
|
||||
#if defined(ARM_SIMD)
|
||||
#include "acle_intrins.h"
|
||||
#include "../../zbuild.h"
|
||||
#include "../../deflate.h"
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
|
||||
/* SIMD version of hash_chain rebase */
|
||||
static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) {
|
||||
|
4
3rdparty/zlib-ng/arch/arm/slide_hash_neon.c
vendored
4
3rdparty/zlib-ng/arch/arm/slide_hash_neon.c
vendored
@ -10,8 +10,8 @@
|
||||
|
||||
#ifdef ARM_NEON
|
||||
#include "neon_intrins.h"
|
||||
#include "../../zbuild.h"
|
||||
#include "../../deflate.h"
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
|
||||
/* SIMD version of hash_chain rebase */
|
||||
static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) {
|
||||
|
57
3rdparty/zlib-ng/arch/generic/Makefile.in
vendored
57
3rdparty/zlib-ng/arch/generic/Makefile.in
vendored
@ -1,5 +1,6 @@
|
||||
# Makefile for zlib
|
||||
# Makefile for zlib-ng
|
||||
# Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler
|
||||
# Copyright (C) 2024 Hans Kristian Rosbach
|
||||
# For conditions of distribution and use, see copyright notice in zlib.h
|
||||
|
||||
CC=
|
||||
@ -11,12 +12,62 @@ SRCDIR=.
|
||||
SRCTOP=../..
|
||||
TOPDIR=$(SRCTOP)
|
||||
|
||||
all:
|
||||
all: \
|
||||
adler32_c.o adler32_c.lo \
|
||||
adler32_fold_c.o adler32_fold_c.lo \
|
||||
chunkset_c.o chunkset_c.lo \
|
||||
compare256_c.o compare256_c.lo \
|
||||
crc32_braid_c.o crc32_braid_c.lo \
|
||||
crc32_fold_c.o crc32_fold_c.lo \
|
||||
slide_hash_c.o slide_hash_c.lo
|
||||
|
||||
|
||||
adler32_c.o: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c
|
||||
|
||||
adler32_c.lo: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c
|
||||
|
||||
adler32_fold_c.o: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c
|
||||
|
||||
adler32_fold_c.lo: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c
|
||||
|
||||
chunkset_c.o: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c
|
||||
|
||||
chunkset_c.lo: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c
|
||||
|
||||
compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
|
||||
|
||||
compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c
|
||||
|
||||
crc32_braid_c.o: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_braid_c.c
|
||||
|
||||
crc32_braid_c.lo: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_braid_c.c
|
||||
|
||||
crc32_fold_c.o: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c
|
||||
|
||||
crc32_fold_c.lo: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c
|
||||
|
||||
slide_hash_c.o: $(SRCDIR)/slide_hash_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/deflate.h
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_c.c
|
||||
|
||||
slide_hash_c.lo: $(SRCDIR)/slide_hash_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/deflate.h
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_c.c
|
||||
|
||||
|
||||
mostlyclean: clean
|
||||
clean:
|
||||
rm -f *.o *.lo *~ \
|
||||
rm -f *.o *.lo *~
|
||||
rm -rf objs
|
||||
rm -f *.gcda *.gcno *.gcov
|
||||
|
||||
|
54
3rdparty/zlib-ng/arch/generic/adler32_c.c
vendored
Normal file
54
3rdparty/zlib-ng/arch/generic/adler32_c.c
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
/* adler32.c -- compute the Adler-32 checksum of a data stream
|
||||
* Copyright (C) 1995-2011, 2016 Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "functable.h"
|
||||
#include "adler32_p.h"
|
||||
|
||||
/* ========================================================================= */
|
||||
Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) {
|
||||
uint32_t sum2;
|
||||
unsigned n;
|
||||
|
||||
/* split Adler-32 into component sums */
|
||||
sum2 = (adler >> 16) & 0xffff;
|
||||
adler &= 0xffff;
|
||||
|
||||
/* in case user likes doing a byte at a time, keep it fast */
|
||||
if (UNLIKELY(len == 1))
|
||||
return adler32_len_1(adler, buf, sum2);
|
||||
|
||||
/* initial Adler-32 value (deferred check for len == 1 speed) */
|
||||
if (UNLIKELY(buf == NULL))
|
||||
return 1L;
|
||||
|
||||
/* in case short lengths are provided, keep it somewhat fast */
|
||||
if (UNLIKELY(len < 16))
|
||||
return adler32_len_16(adler, buf, len, sum2);
|
||||
|
||||
/* do length NMAX blocks -- requires just one modulo operation */
|
||||
while (len >= NMAX) {
|
||||
len -= NMAX;
|
||||
#ifdef UNROLL_MORE
|
||||
n = NMAX / 16; /* NMAX is divisible by 16 */
|
||||
#else
|
||||
n = NMAX / 8; /* NMAX is divisible by 8 */
|
||||
#endif
|
||||
do {
|
||||
#ifdef UNROLL_MORE
|
||||
DO16(adler, sum2, buf); /* 16 sums unrolled */
|
||||
buf += 16;
|
||||
#else
|
||||
DO8(adler, sum2, buf, 0); /* 8 sums unrolled */
|
||||
buf += 8;
|
||||
#endif
|
||||
} while (--n);
|
||||
adler %= BASE;
|
||||
sum2 %= BASE;
|
||||
}
|
||||
|
||||
/* do remaining bytes (less than NMAX, still just one modulo) */
|
||||
return adler32_len_64(adler, buf, len, sum2);
|
||||
}
|
@ -5,12 +5,11 @@
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "functable.h"
|
||||
#include "adler32_fold.h"
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
|
||||
adler = functable.adler32(adler, src, len);
|
||||
adler = FUNCTABLE_CALL(adler32)(adler, src, len);
|
||||
memcpy(dst, src, len);
|
||||
return adler;
|
||||
}
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil_p.h"
|
||||
#include "deflate.h"
|
||||
#include "fallback_builtins.h"
|
||||
|
||||
/* ALIGNED, byte comparison */
|
@ -8,43 +8,9 @@
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
#include "functable.h"
|
||||
#include "crc32_braid_p.h"
|
||||
#include "crc32_braid_tbl.h"
|
||||
|
||||
/* ========================================================================= */
|
||||
|
||||
const uint32_t * Z_EXPORT PREFIX(get_crc_table)(void) {
|
||||
return (const uint32_t *)crc_table;
|
||||
}
|
||||
|
||||
#ifdef ZLIB_COMPAT
|
||||
unsigned long Z_EXPORT PREFIX(crc32_z)(unsigned long crc, const unsigned char *buf, size_t len) {
|
||||
if (buf == NULL) return 0;
|
||||
|
||||
return (unsigned long)functable.crc32((uint32_t)crc, buf, len);
|
||||
}
|
||||
#else
|
||||
uint32_t Z_EXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) {
|
||||
if (buf == NULL) return 0;
|
||||
|
||||
return functable.crc32(crc, buf, len);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ZLIB_COMPAT
|
||||
unsigned long Z_EXPORT PREFIX(crc32)(unsigned long crc, const unsigned char *buf, unsigned int len) {
|
||||
return (unsigned long)PREFIX(crc32_z)((uint32_t)crc, buf, len);
|
||||
}
|
||||
#else
|
||||
uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) {
|
||||
return PREFIX(crc32_z)(crc, buf, len);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ========================================================================= */
|
||||
|
||||
/*
|
||||
A CRC of a message is computed on N braids of words in the message, where
|
||||
each word consists of W bytes (4 or 8). If N is 3, for example, then three
|
||||
@ -66,24 +32,6 @@ uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t
|
||||
level. Your mileage may vary.
|
||||
*/
|
||||
|
||||
/* ========================================================================= */
|
||||
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
# define ZSWAPWORD(word) (word)
|
||||
# define BRAID_TABLE crc_braid_table
|
||||
#elif BYTE_ORDER == BIG_ENDIAN
|
||||
# if W == 8
|
||||
# define ZSWAPWORD(word) ZSWAP64(word)
|
||||
# elif W == 4
|
||||
# define ZSWAPWORD(word) ZSWAP32(word)
|
||||
# endif
|
||||
# define BRAID_TABLE crc_braid_big_table
|
||||
#else
|
||||
# error "No endian defined"
|
||||
#endif
|
||||
#define DO1 c = crc_table[(c ^ *buf++) & 0xff] ^ (c >> 8)
|
||||
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
|
||||
|
||||
/* ========================================================================= */
|
||||
#ifdef W
|
||||
/*
|
||||
@ -112,7 +60,7 @@ static z_word_t crc_word(z_word_t data) {
|
||||
|
||||
/* ========================================================================= */
|
||||
Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len) {
|
||||
Z_REGISTER uint32_t c;
|
||||
uint32_t c;
|
||||
|
||||
/* Pre-condition the CRC */
|
||||
c = (~crc) & 0xffffffff;
|
@ -3,11 +3,9 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
#include "functable.h"
|
||||
|
||||
#include "crc32_fold.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include "crc32.h"
|
||||
|
||||
Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc) {
|
||||
crc->value = CRC32_INITIAL_VALUE;
|
||||
@ -15,7 +13,7 @@ Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc) {
|
||||
}
|
||||
|
||||
Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) {
|
||||
crc->value = functable.crc32(crc->value, src, len);
|
||||
crc->value = FUNCTABLE_CALL(crc32)(crc->value, src, len);
|
||||
memcpy(dst, src, len);
|
||||
}
|
||||
|
||||
@ -25,7 +23,7 @@ Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, ui
|
||||
* same arguments for the versions that _do_ do a folding CRC but we don't want a copy. The
|
||||
* init_crc is an unused argument in this context */
|
||||
Z_UNUSED(init_crc);
|
||||
crc->value = functable.crc32(crc->value, src, len);
|
||||
crc->value = FUNCTABLE_CALL(crc32)(crc->value, src, len);
|
||||
}
|
||||
|
||||
Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc) {
|
106
3rdparty/zlib-ng/arch/generic/generic_functions.h
vendored
Normal file
106
3rdparty/zlib-ng/arch/generic/generic_functions.h
vendored
Normal file
@ -0,0 +1,106 @@
|
||||
/* generic_functions.h -- generic C implementations for arch-specific functions.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef GENERIC_FUNCTIONS_H_
|
||||
#define GENERIC_FUNCTIONS_H_
|
||||
|
||||
#include "zendian.h"
|
||||
|
||||
Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc);
|
||||
Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
|
||||
Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc);
|
||||
|
||||
Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
|
||||
|
||||
typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1);
|
||||
typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len);
|
||||
|
||||
uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
|
||||
uint32_t chunksize_c(void);
|
||||
uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
|
||||
|
||||
uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
|
||||
uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
|
||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
|
||||
# ifdef HAVE_BUILTIN_CTZ
|
||||
uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
|
||||
# endif
|
||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
||||
uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
typedef void (*slide_hash_func)(deflate_state *s);
|
||||
|
||||
void slide_hash_c(deflate_state *s);
|
||||
|
||||
uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
|
||||
# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
|
||||
# ifdef HAVE_BUILTIN_CTZ
|
||||
uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
|
||||
# endif
|
||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
||||
uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
|
||||
# endif
|
||||
# endif
|
||||
|
||||
uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
|
||||
# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
|
||||
uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
|
||||
# ifdef UNALIGNED64_OK
|
||||
uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
|
||||
# endif
|
||||
# endif
|
||||
|
||||
|
||||
// Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions.
|
||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
||||
# define longest_match_generic longest_match_unaligned_64
|
||||
# define longest_match_slow_generic longest_match_slow_unaligned_64
|
||||
# define compare256_generic compare256_unaligned_64
|
||||
# elif defined(HAVE_BUILTIN_CTZ)
|
||||
# define longest_match_generic longest_match_unaligned_32
|
||||
# define longest_match_slow_generic longest_match_slow_unaligned_32
|
||||
# define compare256_generic compare256_unaligned_32
|
||||
# else
|
||||
# define longest_match_generic longest_match_unaligned_16
|
||||
# define longest_match_slow_generic longest_match_slow_unaligned_16
|
||||
# define compare256_generic compare256_unaligned_16
|
||||
# endif
|
||||
#else
|
||||
# define longest_match_generic longest_match_c
|
||||
# define longest_match_slow_generic longest_match_slow_c
|
||||
# define compare256_generic compare256_c
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
// Generic code
|
||||
# define native_adler32 adler32_c
|
||||
# define native_adler32_fold_copy adler32_fold_copy_c
|
||||
# define native_chunkmemset_safe chunkmemset_safe_c
|
||||
# define native_chunksize chunksize_c
|
||||
# define native_crc32 PREFIX(crc32_braid)
|
||||
# define native_crc32_fold crc32_fold_c
|
||||
# define native_crc32_fold_copy crc32_fold_copy_c
|
||||
# define native_crc32_fold_final crc32_fold_final_c
|
||||
# define native_crc32_fold_reset crc32_fold_reset_c
|
||||
# define native_inflate_fast inflate_fast_c
|
||||
# define native_slide_hash slide_hash_c
|
||||
# define native_longest_match longest_match_generic
|
||||
# define native_longest_match_slow longest_match_slow_generic
|
||||
# define native_compare256 compare256_generic
|
||||
#endif
|
||||
|
||||
#endif
|
@ -1,6 +1,6 @@
|
||||
/* slide_hash.c -- slide hash table C implementation
|
||||
*
|
||||
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
|
||||
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
#ifdef POWER8_VSX
|
||||
#include <altivec.h>
|
||||
#include "../../zbuild.h"
|
||||
#include "zbuild.h"
|
||||
|
||||
typedef vector unsigned char chunk_t;
|
||||
|
||||
|
@ -5,8 +5,10 @@
|
||||
|
||||
#ifdef POWER9
|
||||
#include <altivec.h>
|
||||
#include "../../zbuild.h"
|
||||
#include "../../zendian.h"
|
||||
#include "zbuild.h"
|
||||
#include "zutil_p.h"
|
||||
#include "deflate.h"
|
||||
#include "zendian.h"
|
||||
|
||||
/* Older versions of GCC misimplemented semantics for these bit counting builtins.
|
||||
* https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3f30f2d1dbb3228b8468b26239fe60c2974ce2ac */
|
||||
|
7
3rdparty/zlib-ng/arch/power/power_features.c
vendored
7
3rdparty/zlib-ng/arch/power/power_features.c
vendored
@ -1,16 +1,19 @@
|
||||
/* power_features.c - POWER feature check
|
||||
* Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
|
||||
* Copyright (C) 2021-2022 Mika T. Lindqvist <postmaster@raasu.org>
|
||||
* Copyright (C) 2021-2024 Mika T. Lindqvist <postmaster@raasu.org>
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifdef HAVE_SYS_AUXV_H
|
||||
# include <sys/auxv.h>
|
||||
#endif
|
||||
#ifdef POWER_NEED_AUXVEC_H
|
||||
# include <linux/auxvec.h>
|
||||
#endif
|
||||
#ifdef __FreeBSD__
|
||||
# include <machine/cpu.h>
|
||||
#endif
|
||||
#include "../../zbuild.h"
|
||||
#include "zbuild.h"
|
||||
#include "power_features.h"
|
||||
|
||||
void Z_INTERNAL power_check_features(struct power_cpu_features *features) {
|
||||
|
6
3rdparty/zlib-ng/arch/power/power_features.h
vendored
6
3rdparty/zlib-ng/arch/power/power_features.h
vendored
@ -4,8 +4,8 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef POWER_H_
|
||||
#define POWER_H_
|
||||
#ifndef POWER_FEATURES_H_
|
||||
#define POWER_FEATURES_H_
|
||||
|
||||
struct power_cpu_features {
|
||||
int has_altivec;
|
||||
@ -15,4 +15,4 @@ struct power_cpu_features {
|
||||
|
||||
void Z_INTERNAL power_check_features(struct power_cpu_features *features);
|
||||
|
||||
#endif /* POWER_H_ */
|
||||
#endif /* POWER_FEATURES_H_ */
|
||||
|
67
3rdparty/zlib-ng/arch/power/power_functions.h
vendored
Normal file
67
3rdparty/zlib-ng/arch/power/power_functions.h
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
/* power_functions.h -- POWER implementations for arch-specific functions.
|
||||
* Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
|
||||
* Copyright (C) 2021 Mika T. Lindqvist <postmaster@raasu.org>
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef POWER_FUNCTIONS_H_
|
||||
#define POWER_FUNCTIONS_H_
|
||||
|
||||
#ifdef PPC_VMX
|
||||
uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
void slide_hash_vmx(deflate_state *s);
|
||||
#endif
|
||||
|
||||
#ifdef POWER8_VSX
|
||||
uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
uint32_t chunksize_power8(void);
|
||||
uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
void slide_hash_power8(deflate_state *s);
|
||||
void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
|
||||
#ifdef POWER9
|
||||
uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
|
||||
uint32_t longest_match_power9(deflate_state *const s, Pos cur_match);
|
||||
uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
// Power - VMX
|
||||
# if defined(PPC_VMX) && defined(__ALTIVEC__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_vmx
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_vmx
|
||||
# endif
|
||||
// Power8 - VSX
|
||||
# if defined(POWER8_VSX) && defined(_ARCH_PWR8) && defined(__VSX__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_power8
|
||||
# undef native_chunkmemset_safe
|
||||
# define native_chunkmemset_safe chunkmemset_safe_power8
|
||||
# undef native_chunksize
|
||||
# define native_chunksize chunksize_power8
|
||||
# undef native_inflate_fast
|
||||
# define native_inflate_fast inflate_fast_power8
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_power8
|
||||
# endif
|
||||
# if defined(POWER8_VSX_CRC32) && defined(_ARCH_PWR8) && defined(__VSX__)
|
||||
# undef native_crc32
|
||||
# define native_crc32 crc32_power8
|
||||
# endif
|
||||
// Power9
|
||||
# if defined(POWER9) && defined(_ARCH_PWR9)
|
||||
# undef native_compare256
|
||||
# define native_compare256 compare256_power9
|
||||
# undef native_longest_match
|
||||
# define native_longest_match longest_match_power9
|
||||
# undef native_longest_match_slow
|
||||
# define native_longest_match_slow longest_match_slow_power9
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* POWER_FUNCTIONS_H_ */
|
4
3rdparty/zlib-ng/arch/riscv/adler32_rvv.c
vendored
4
3rdparty/zlib-ng/arch/riscv/adler32_rvv.c
vendored
@ -9,8 +9,8 @@
|
||||
#include <riscv_vector.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "../../adler32_p.h"
|
||||
#include "zbuild.h"
|
||||
#include "adler32_p.h"
|
||||
|
||||
static inline uint32_t adler32_rvv_impl(uint32_t adler, uint8_t* restrict dst, const uint8_t *src, size_t len, int COPY) {
|
||||
/* split Adler-32 into component sums */
|
||||
|
4
3rdparty/zlib-ng/arch/riscv/compare256_rvv.c
vendored
4
3rdparty/zlib-ng/arch/riscv/compare256_rvv.c
vendored
@ -6,7 +6,9 @@
|
||||
|
||||
#ifdef RISCV_RVV
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "zbuild.h"
|
||||
#include "zutil_p.h"
|
||||
#include "deflate.h"
|
||||
#include "fallback_builtins.h"
|
||||
|
||||
#include <riscv_vector.h>
|
||||
|
11
3rdparty/zlib-ng/arch/riscv/riscv_features.c
vendored
11
3rdparty/zlib-ng/arch/riscv/riscv_features.c
vendored
@ -1,10 +1,13 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/auxv.h>
|
||||
#include <sys/utsname.h>
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#if defined(__linux__) && defined(HAVE_SYS_AUXV_H)
|
||||
# include <sys/auxv.h>
|
||||
#endif
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "riscv_features.h"
|
||||
|
||||
#define ISA_V_HWCAP (1 << ('v' - 'a'))
|
||||
@ -33,7 +36,11 @@ void Z_INTERNAL riscv_check_features_compile_time(struct riscv_cpu_features *fea
|
||||
}
|
||||
|
||||
void Z_INTERNAL riscv_check_features_runtime(struct riscv_cpu_features *features) {
|
||||
#if defined(__linux__) && defined(HAVE_SYS_AUXV_H)
|
||||
unsigned long hw_cap = getauxval(AT_HWCAP);
|
||||
#else
|
||||
unsigned long hw_cap = 0;
|
||||
#endif
|
||||
features->has_rvv = hw_cap & ISA_V_HWCAP;
|
||||
}
|
||||
|
||||
|
6
3rdparty/zlib-ng/arch/riscv/riscv_features.h
vendored
6
3rdparty/zlib-ng/arch/riscv/riscv_features.h
vendored
@ -6,8 +6,8 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef RISCV_H_
|
||||
#define RISCV_H_
|
||||
#ifndef RISCV_FEATURES_H_
|
||||
#define RISCV_FEATURES_H_
|
||||
|
||||
struct riscv_cpu_features {
|
||||
int has_rvv;
|
||||
@ -15,4 +15,4 @@ struct riscv_cpu_features {
|
||||
|
||||
void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features);
|
||||
|
||||
#endif /* RISCV_H_ */
|
||||
#endif /* RISCV_FEATURES_H_ */
|
||||
|
49
3rdparty/zlib-ng/arch/riscv/riscv_functions.h
vendored
Normal file
49
3rdparty/zlib-ng/arch/riscv/riscv_functions.h
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
/* riscv_functions.h -- RISCV implementations for arch-specific functions.
|
||||
*
|
||||
* Copyright (C) 2023 SiFive, Inc. All rights reserved.
|
||||
* Contributed by Alex Chiang <alex.chiang@sifive.com>
|
||||
*
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef RISCV_FUNCTIONS_H_
|
||||
#define RISCV_FUNCTIONS_H_
|
||||
|
||||
#ifdef RISCV_RVV
|
||||
uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
uint32_t chunksize_rvv(void);
|
||||
uint8_t* chunkmemset_safe_rvv(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1);
|
||||
|
||||
uint32_t longest_match_rvv(deflate_state *const s, Pos cur_match);
|
||||
uint32_t longest_match_slow_rvv(deflate_state *const s, Pos cur_match);
|
||||
void slide_hash_rvv(deflate_state *s);
|
||||
void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
// RISCV - RVV
|
||||
# if defined(RISCV_RVV) && defined(__riscv_v) && defined(__linux__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_rvv
|
||||
# undef native_adler32_fold_copy
|
||||
# define native_adler32_fold_copy adler32_fold_copy_rvv
|
||||
# undef native_chunkmemset_safe
|
||||
# define native_chunkmemset_safe chunkmemset_safe_rvv
|
||||
# undef native_chunksize
|
||||
# define native_chunksize chunksize_rvv
|
||||
# undef native_compare256
|
||||
# define native_compare256 compare256_rvv
|
||||
# undef native_inflate_fast
|
||||
# define native_inflate_fast inflate_fast_rvv
|
||||
# undef native_longest_match
|
||||
# define native_longest_match longest_match_rvv
|
||||
# undef native_longest_match_slow
|
||||
# define native_longest_match_slow longest_match_slow_rvv
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_rvv
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* RISCV_FUNCTIONS_H_ */
|
10
3rdparty/zlib-ng/arch/riscv/slide_hash_rvv.c
vendored
10
3rdparty/zlib-ng/arch/riscv/slide_hash_rvv.c
vendored
@ -8,18 +8,16 @@
|
||||
|
||||
#include <riscv_vector.h>
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "../../deflate.h"
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
|
||||
static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) {
|
||||
size_t vl;
|
||||
while (entries > 0) {
|
||||
vl = __riscv_vsetvl_e16m4(entries);
|
||||
vuint16m4_t v_tab = __riscv_vle16_v_u16m4(table, vl);
|
||||
vuint16m4_t v_diff = __riscv_vsub_vx_u16m4(v_tab, wsize, vl);
|
||||
vbool4_t mask = __riscv_vmsltu_vx_u16m4_b4(v_tab, wsize, vl);
|
||||
v_tab = __riscv_vmerge_vxm_u16m4(v_diff, 0, mask, vl);
|
||||
__riscv_vse16_v_u16m4(table, v_tab, vl);
|
||||
vuint16m4_t v_diff = __riscv_vssubu_vx_u16m4(v_tab, wsize, vl);
|
||||
__riscv_vse16_v_u16m4(table, v_diff, vl);
|
||||
table += vl, entries -= vl;
|
||||
}
|
||||
}
|
||||
|
48
3rdparty/zlib-ng/arch/s390/Makefile.in
vendored
Normal file
48
3rdparty/zlib-ng/arch/s390/Makefile.in
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
# Makefile for zlib-ng
|
||||
# Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler
|
||||
# For conditions of distribution and use, see copyright notice in zlib.h
|
||||
|
||||
CC=
|
||||
CFLAGS=
|
||||
SFLAGS=
|
||||
INCLUDES=
|
||||
SUFFIX=
|
||||
VGFMAFLAG=
|
||||
NOLTOFLAG=
|
||||
|
||||
SRCDIR=.
|
||||
SRCTOP=../..
|
||||
TOPDIR=$(SRCTOP)
|
||||
|
||||
s390_features.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/s390_features.c
|
||||
|
||||
s390_features.lo:
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/s390_features.c
|
||||
|
||||
dfltcc_deflate.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_deflate.c
|
||||
|
||||
dfltcc_deflate.lo:
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_deflate.c
|
||||
|
||||
dfltcc_inflate.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_inflate.c
|
||||
|
||||
dfltcc_inflate.lo:
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_inflate.c
|
||||
|
||||
crc32-vx.o:
|
||||
$(CC) $(CFLAGS) $(VGFMAFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32-vx.c
|
||||
|
||||
crc32-vx.lo:
|
||||
$(CC) $(SFLAGS) $(VGFMAFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32-vx.c
|
||||
|
||||
mostlyclean: clean
|
||||
clean:
|
||||
rm -f *.o *.lo *~
|
||||
rm -rf objs
|
||||
rm -f *.gcda *.gcno *.gcov
|
||||
|
||||
distclean: clean
|
||||
rm -f Makefile
|
277
3rdparty/zlib-ng/arch/s390/README.md
vendored
Normal file
277
3rdparty/zlib-ng/arch/s390/README.md
vendored
Normal file
@ -0,0 +1,277 @@
|
||||
# Introduction
|
||||
|
||||
This directory contains SystemZ deflate hardware acceleration support.
|
||||
It can be enabled using the following build commands:
|
||||
|
||||
$ ./configure --with-dfltcc-deflate --with-dfltcc-inflate
|
||||
$ make
|
||||
|
||||
or
|
||||
|
||||
$ cmake -DWITH_DFLTCC_DEFLATE=1 -DWITH_DFLTCC_INFLATE=1 .
|
||||
$ make
|
||||
|
||||
When built like this, zlib-ng would compress using hardware on level 1,
|
||||
and using software on all other levels. Decompression will always happen
|
||||
in hardware. In order to enable hardware compression for levels 1-6
|
||||
(i.e. to make it used by default) one could add
|
||||
`-DDFLTCC_LEVEL_MASK=0x7e` to CFLAGS when building zlib-ng.
|
||||
|
||||
SystemZ deflate hardware acceleration is available on [IBM z15](
|
||||
https://www.ibm.com/products/z15) and newer machines under the name [
|
||||
"Integrated Accelerator for zEnterprise Data Compression"](
|
||||
https://www.ibm.com/support/z-content-solutions/compression/). The
|
||||
programming interface to it is a machine instruction called DEFLATE
|
||||
CONVERSION CALL (DFLTCC). It is documented in Chapter 26 of [Principles
|
||||
of Operation](https://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf). Both
|
||||
the code and the rest of this document refer to this feature simply as
|
||||
"DFLTCC".
|
||||
|
||||
# Performance
|
||||
|
||||
Performance figures are published [here](
|
||||
https://github.com/iii-i/zlib-ng/wiki/Performance-with-dfltcc-patch-applied-and-dfltcc-support-built-on-dfltcc-enabled-machine
|
||||
). The compression speed-up can be as high as 110x and the decompression
|
||||
speed-up can be as high as 15x.
|
||||
|
||||
# Limitations
|
||||
|
||||
Two DFLTCC compression calls with identical inputs are not guaranteed to
|
||||
produce identical outputs. Therefore care should be taken when using
|
||||
hardware compression when reproducible results are desired. In
|
||||
particular, zlib-ng-specific `zng_deflateSetParams` call allows setting
|
||||
`Z_DEFLATE_REPRODUCIBLE` parameter, which disables DFLTCC support for a
|
||||
particular stream.
|
||||
|
||||
DFLTCC does not support every single zlib-ng feature, in particular:
|
||||
|
||||
* `inflate(Z_BLOCK)` and `inflate(Z_TREES)`
|
||||
* `inflateMark()`
|
||||
* `inflatePrime()`
|
||||
* `inflateSyncPoint()`
|
||||
|
||||
When used, these functions will either switch to software, or, in case
|
||||
this is not possible, gracefully fail.
|
||||
|
||||
# Code structure
|
||||
|
||||
All SystemZ-specific code lives in `arch/s390` directory and is
|
||||
integrated with the rest of zlib-ng using hook macros.
|
||||
|
||||
## Hook macros
|
||||
|
||||
DFLTCC takes as arguments a parameter block, an input buffer, an output
|
||||
buffer, and a window. Parameter blocks are stored alongside zlib states;
|
||||
buffers are forwarded from the caller; and window - which must be
|
||||
4k-aligned and is always 64k large, is managed using the `PAD_WINDOW()`,
|
||||
`WINDOW_PAD_SIZE`, `HINT_ALIGNED_WINDOW` and `DEFLATE_ADJUST_WINDOW_SIZE()`
|
||||
and `INFLATE_ADJUST_WINDOW_SIZE()` hooks.
|
||||
|
||||
Software and hardware window formats do not match, therefore,
|
||||
`deflateSetDictionary()`, `deflateGetDictionary()`, `inflateSetDictionary()`
|
||||
and `inflateGetDictionary()` need special handling, which is triggered using
|
||||
`DEFLATE_SET_DICTIONARY_HOOK()`, `DEFLATE_GET_DICTIONARY_HOOK()`,
|
||||
`INFLATE_SET_DICTIONARY_HOOK()` and `INFLATE_GET_DICTIONARY_HOOK()` macros.
|
||||
|
||||
`deflateResetKeep()` and `inflateResetKeep()` update the DFLTCC
|
||||
parameter block using `DEFLATE_RESET_KEEP_HOOK()` and
|
||||
`INFLATE_RESET_KEEP_HOOK()` macros.
|
||||
|
||||
`INFLATE_PRIME_HOOK()`, `INFLATE_MARK_HOOK()` and
|
||||
`INFLATE_SYNC_POINT_HOOK()` macros make the respective unsupported
|
||||
calls gracefully fail.
|
||||
|
||||
`DEFLATE_PARAMS_HOOK()` implements switching between hardware and
|
||||
software compression mid-stream using `deflateParams()`. Switching
|
||||
normally entails flushing the current block, which might not be possible
|
||||
in low memory situations. `deflateParams()` uses `DEFLATE_DONE()` hook
|
||||
in order to detect and gracefully handle such situations.
|
||||
|
||||
The algorithm implemented in hardware has different compression ratio
|
||||
than the one implemented in software. `DEFLATE_BOUND_ADJUST_COMPLEN()`
|
||||
and `DEFLATE_NEED_CONSERVATIVE_BOUND()` macros make `deflateBound()`
|
||||
return the correct results for the hardware implementation.
|
||||
|
||||
Actual compression and decompression are handled by `DEFLATE_HOOK()` and
|
||||
`INFLATE_TYPEDO_HOOK()` macros. Since inflation with DFLTCC manages the
|
||||
window on its own, calling `updatewindow()` is suppressed using
|
||||
`INFLATE_NEED_UPDATEWINDOW()` macro.
|
||||
|
||||
In addition to compression, DFLTCC computes CRC-32 and Adler-32
|
||||
checksums, therefore, whenever it's used, software checksumming is
|
||||
suppressed using `DEFLATE_NEED_CHECKSUM()` and `INFLATE_NEED_CHECKSUM()`
|
||||
macros.
|
||||
|
||||
While software always produces reproducible compression results, this
|
||||
is not the case for DFLTCC. Therefore, zlib-ng users are given the
|
||||
ability to specify whether or not reproducible compression results
|
||||
are required. While it is always possible to specify this setting
|
||||
before the compression begins, it is not always possible to do so in
|
||||
the middle of a deflate stream - the exact conditions for that are
|
||||
determined by `DEFLATE_CAN_SET_REPRODUCIBLE()` macro.
|
||||
|
||||
## SystemZ-specific code
|
||||
|
||||
When zlib-ng is built with DFLTCC, the hooks described above are
|
||||
converted to calls to functions, which are implemented in
|
||||
`arch/s390/dfltcc_*` files. The functions can be grouped in three broad
|
||||
categories:
|
||||
|
||||
* Base DFLTCC support, e.g. wrapping the machine instruction - `dfltcc()`.
|
||||
* Translating between software and hardware data formats, e.g.
|
||||
`dfltcc_deflate_set_dictionary()`.
|
||||
* Translating between software and hardware state machines, e.g.
|
||||
`dfltcc_deflate()` and `dfltcc_inflate()`.
|
||||
|
||||
The functions from the first two categories are fairly simple, however,
|
||||
various quirks in both software and hardware state machines make the
|
||||
functions from the third category quite complicated.
|
||||
|
||||
### `dfltcc_deflate()` function
|
||||
|
||||
This function is called by `deflate()` and has the following
|
||||
responsibilities:
|
||||
|
||||
* Checking whether DFLTCC can be used with the current stream. If this
|
||||
is not the case, then it returns `0`, making `deflate()` use some
|
||||
other function in order to compress in software. Otherwise it returns
|
||||
`1`.
|
||||
* Block management and Huffman table generation. DFLTCC ends blocks only
|
||||
when explicitly instructed to do so by the software. Furthermore,
|
||||
whether to use fixed or dynamic Huffman tables must also be determined
|
||||
by the software. Since looking at data in order to gather statistics
|
||||
would negate performance benefits, the following approach is used: the
|
||||
first `DFLTCC_FIRST_FHT_BLOCK_SIZE` bytes are placed into a fixed
|
||||
block, and every next `DFLTCC_BLOCK_SIZE` bytes are placed into
|
||||
dynamic blocks.
|
||||
* Writing EOBS. Block Closing Control bit in the parameter block
|
||||
instructs DFLTCC to write EOBS, however, certain conditions need to be
|
||||
met: input data length must be non-zero or Continuation Flag must be
|
||||
set. To put this in simpler terms, DFLTCC will silently refuse to
|
||||
write EOBS if this is the only thing that it is asked to do. Since the
|
||||
code has to be able to emit EOBS in software anyway, in order to avoid
|
||||
tricky corner cases Block Closing Control is never used. Whether to
|
||||
write EOBS is instead controlled by `soft_bcc` variable.
|
||||
* Triggering block post-processing. Depending on flush mode, `deflate()`
|
||||
must perform various additional actions when a block or a stream ends.
|
||||
`dfltcc_deflate()` informs `deflate()` about this using
|
||||
`block_state *result` parameter.
|
||||
* Converting software state fields into hardware parameter block fields,
|
||||
and vice versa. For example, `wrap` and Check Value Type or `bi_valid`
|
||||
and Sub-Byte Boundary. Certain fields cannot be translated and must
|
||||
persist untouched in the parameter block between calls, for example,
|
||||
Continuation Flag or Continuation State Buffer.
|
||||
* Handling flush modes and low-memory situations. These aspects are
|
||||
quite intertwined and pervasive. The general idea here is that the
|
||||
code must not do anything in software - whether explicitly by e.g.
|
||||
calling `send_eobs()`, or implicitly - by returning to `deflate()`
|
||||
with certain return and `*result` values, when Continuation Flag is
|
||||
set.
|
||||
* Ending streams. When a new block is started and flush mode is
|
||||
`Z_FINISH`, Block Header Final parameter block bit is used to mark
|
||||
this block as final. However, sometimes an empty final block is
|
||||
needed, and, unfortunately, just like with EOBS, DFLTCC will silently
|
||||
refuse to do this. The general idea of DFLTCC implementation is to
|
||||
rely as much as possible on the existing code. Here in order to do
|
||||
this, the code pretends that it does not support DFLTCC, which makes
|
||||
`deflate()` call a software compression function, which writes an
|
||||
empty final block. Whether this is required is controlled by
|
||||
`need_empty_block` variable.
|
||||
* Error handling. This is simply converting
|
||||
Operation-Ending-Supplemental Code to string. Errors can only happen
|
||||
due to things like memory corruption, and therefore they don't affect
|
||||
the `deflate()` return code.
|
||||
|
||||
### `dfltcc_inflate()` function
|
||||
|
||||
This function is called by `inflate()` from the `TYPEDO` state (that is,
|
||||
when all the metadata is parsed and the stream is positioned at the type
|
||||
bits of deflate block header) and it's responsible for the following:
|
||||
|
||||
* Falling back to software when flush mode is `Z_BLOCK` or `Z_TREES`.
|
||||
Unfortunately, there is no way to ask DFLTCC to stop decompressing on
|
||||
block or tree boundary.
|
||||
* `inflate()` decompression loop management. This is controlled using
|
||||
the return value, which can be either `DFLTCC_INFLATE_BREAK` or
|
||||
`DFLTCC_INFLATE_CONTINUE`.
|
||||
* Converting software state fields into hardware parameter block fields,
|
||||
and vice versa. For example, `whave` and History Length or `wnext` and
|
||||
History Offset.
|
||||
* Ending streams. This instructs `inflate()` to return `Z_STREAM_END`
|
||||
and is controlled by `last` state field.
|
||||
* Error handling. Like deflate, error handling comprises
|
||||
Operation-Ending-Supplemental Code to string conversion. Unlike
|
||||
deflate, errors may happen due to bad inputs, therefore they are
|
||||
propagated to `inflate()` by setting `mode` field to `MEM` or `BAD`.
|
||||
|
||||
# Testing
|
||||
|
||||
Given complexity of DFLTCC machine instruction, it is not clear whether
|
||||
QEMU TCG will ever support it. At the time of writing, one has to have
|
||||
access to an IBM z15+ VM or LPAR in order to test DFLTCC support. Since
|
||||
DFLTCC is a non-privileged instruction, neither special VM/LPAR
|
||||
configuration nor root are required.
|
||||
|
||||
zlib-ng CI uses an IBM-provided z15 self-hosted builder for the DFLTCC
|
||||
testing. There is no official IBM Z GitHub Actions runner, so we build
|
||||
one inspired by `anup-kodlekere/gaplib`.
|
||||
Future updates to actions-runner might need an updated patch. The .net
|
||||
version number patch has been separated into a separate file to avoid a
|
||||
need for constantly changing the patch.
|
||||
|
||||
## Configuring the builder.
|
||||
|
||||
### Install prerequisites.
|
||||
|
||||
```
|
||||
sudo dnf install podman
|
||||
```
|
||||
|
||||
### Add actions-runner service.
|
||||
|
||||
```
|
||||
sudo cp self-hosted-builder/actions-runner.service /etc/systemd/system/
|
||||
sudo systemctl daemon-reload
|
||||
```
|
||||
|
||||
### Create a config file, needs github personal access token.
|
||||
|
||||
```
|
||||
# Create file /etc/actions-runner
|
||||
repo=<owner>/<name>
|
||||
access_token=<ghp_***>
|
||||
```
|
||||
|
||||
Access token should have the repo scope, consult
|
||||
https://docs.github.com/en/rest/reference/actions#create-a-registration-token-for-a-repository
|
||||
for details.
|
||||
|
||||
### Autostart actions-runner.
|
||||
|
||||
```
|
||||
$ sudo systemctl enable --now actions-runner
|
||||
```
|
||||
|
||||
## Rebuilding the container
|
||||
|
||||
In order to update the `gaplib-actions-runner` podman container, e.g. to get the
|
||||
latest OS security fixes, follow these steps:
|
||||
```
|
||||
# Stop actions-runner service
|
||||
sudo systemctl stop actions-runner
|
||||
|
||||
# Delete old container
|
||||
sudo podman container rm gaplib-actions-runner
|
||||
|
||||
# Delete old image
|
||||
sudo podman image rm localhost/zlib-ng/actions-runner
|
||||
|
||||
# Build image
|
||||
sudo podman build --squash -f Dockerfile.zlib-ng --tag zlib-ng/actions-runner --build-arg .
|
||||
|
||||
# Build container
|
||||
sudo podman create --name=gaplib-actions-runner --env-file=/etc/actions-runner --init --interactive --volume=actions-runner-temp:/home/actions-runner zlib-ng/actions-runner
|
||||
|
||||
# Start actions-runner service
|
||||
sudo systemctl start actions-runner
|
||||
```
|
222
3rdparty/zlib-ng/arch/s390/crc32-vx.c
vendored
Normal file
222
3rdparty/zlib-ng/arch/s390/crc32-vx.c
vendored
Normal file
@ -0,0 +1,222 @@
|
||||
/*
|
||||
* Hardware-accelerated CRC-32 variants for Linux on z Systems
|
||||
*
|
||||
* Use the z/Architecture Vector Extension Facility to accelerate the
|
||||
* computing of bitreflected CRC-32 checksums.
|
||||
*
|
||||
* This CRC-32 implementation algorithm is bitreflected and processes
|
||||
* the least-significant bit first (Little-Endian).
|
||||
*
|
||||
* This code was originally written by Hendrik Brueckner
|
||||
* <brueckner@linux.vnet.ibm.com> for use in the Linux kernel and has been
|
||||
* relicensed under the zlib license.
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "arch_functions.h"
|
||||
|
||||
#include <vecintrin.h>
|
||||
|
||||
typedef unsigned char uv16qi __attribute__((vector_size(16)));
|
||||
typedef unsigned int uv4si __attribute__((vector_size(16)));
|
||||
typedef unsigned long long uv2di __attribute__((vector_size(16)));
|
||||
|
||||
static uint32_t crc32_le_vgfm_16(uint32_t crc, const uint8_t *buf, size_t len) {
|
||||
/*
|
||||
* The CRC-32 constant block contains reduction constants to fold and
|
||||
* process particular chunks of the input data stream in parallel.
|
||||
*
|
||||
* For the CRC-32 variants, the constants are precomputed according to
|
||||
* these definitions:
|
||||
*
|
||||
* R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
|
||||
* R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
|
||||
* R3 = [(x128+32 mod P'(x) << 32)]' << 1
|
||||
* R4 = [(x128-32 mod P'(x) << 32)]' << 1
|
||||
* R5 = [(x64 mod P'(x) << 32)]' << 1
|
||||
* R6 = [(x32 mod P'(x) << 32)]' << 1
|
||||
*
|
||||
* The bitreflected Barret reduction constant, u', is defined as
|
||||
* the bit reversal of floor(x**64 / P(x)).
|
||||
*
|
||||
* where P(x) is the polynomial in the normal domain and the P'(x) is the
|
||||
* polynomial in the reversed (bitreflected) domain.
|
||||
*
|
||||
* CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
|
||||
*
|
||||
* P(x) = 0x04C11DB7
|
||||
* P'(x) = 0xEDB88320
|
||||
*/
|
||||
const uv16qi perm_le2be = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; /* BE->LE mask */
|
||||
const uv2di r2r1 = {0x1C6E41596, 0x154442BD4}; /* R2, R1 */
|
||||
const uv2di r4r3 = {0x0CCAA009E, 0x1751997D0}; /* R4, R3 */
|
||||
const uv2di r5 = {0, 0x163CD6124}; /* R5 */
|
||||
const uv2di ru_poly = {0, 0x1F7011641}; /* u' */
|
||||
const uv2di crc_poly = {0, 0x1DB710641}; /* P'(x) << 1 */
|
||||
|
||||
/*
|
||||
* Load the initial CRC value.
|
||||
*
|
||||
* The CRC value is loaded into the rightmost word of the
|
||||
* vector register and is later XORed with the LSB portion
|
||||
* of the loaded input data.
|
||||
*/
|
||||
uv2di v0 = {0, 0};
|
||||
v0 = (uv2di)vec_insert(crc, (uv4si)v0, 3);
|
||||
|
||||
/* Load a 64-byte data chunk and XOR with CRC */
|
||||
uv2di v1 = vec_perm(((uv2di *)buf)[0], ((uv2di *)buf)[0], perm_le2be);
|
||||
uv2di v2 = vec_perm(((uv2di *)buf)[1], ((uv2di *)buf)[1], perm_le2be);
|
||||
uv2di v3 = vec_perm(((uv2di *)buf)[2], ((uv2di *)buf)[2], perm_le2be);
|
||||
uv2di v4 = vec_perm(((uv2di *)buf)[3], ((uv2di *)buf)[3], perm_le2be);
|
||||
|
||||
v1 ^= v0;
|
||||
buf += 64;
|
||||
len -= 64;
|
||||
|
||||
while (len >= 64) {
|
||||
/* Load the next 64-byte data chunk */
|
||||
uv16qi part1 = vec_perm(((uv16qi *)buf)[0], ((uv16qi *)buf)[0], perm_le2be);
|
||||
uv16qi part2 = vec_perm(((uv16qi *)buf)[1], ((uv16qi *)buf)[1], perm_le2be);
|
||||
uv16qi part3 = vec_perm(((uv16qi *)buf)[2], ((uv16qi *)buf)[2], perm_le2be);
|
||||
uv16qi part4 = vec_perm(((uv16qi *)buf)[3], ((uv16qi *)buf)[3], perm_le2be);
|
||||
|
||||
/*
|
||||
* Perform a GF(2) multiplication of the doublewords in V1 with
|
||||
* the R1 and R2 reduction constants in V0. The intermediate result
|
||||
* is then folded (accumulated) with the next data chunk in PART1 and
|
||||
* stored in V1. Repeat this step for the register contents
|
||||
* in V2, V3, and V4 respectively.
|
||||
*/
|
||||
v1 = (uv2di)vec_gfmsum_accum_128(r2r1, v1, part1);
|
||||
v2 = (uv2di)vec_gfmsum_accum_128(r2r1, v2, part2);
|
||||
v3 = (uv2di)vec_gfmsum_accum_128(r2r1, v3, part3);
|
||||
v4 = (uv2di)vec_gfmsum_accum_128(r2r1, v4, part4);
|
||||
|
||||
buf += 64;
|
||||
len -= 64;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3
|
||||
* and R4 and accumulating the next 128-bit chunk until a single 128-bit
|
||||
* value remains.
|
||||
*/
|
||||
v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2);
|
||||
v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v3);
|
||||
v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v4);
|
||||
|
||||
while (len >= 16) {
|
||||
/* Load next data chunk */
|
||||
v2 = vec_perm(*(uv2di *)buf, *(uv2di *)buf, perm_le2be);
|
||||
|
||||
/* Fold next data chunk */
|
||||
v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2);
|
||||
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up a vector register for byte shifts. The shift value must
|
||||
* be loaded in bits 1-4 in byte element 7 of a vector register.
|
||||
* Shift by 8 bytes: 0x40
|
||||
* Shift by 4 bytes: 0x20
|
||||
*/
|
||||
uv16qi v9 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
v9 = vec_insert((unsigned char)0x40, v9, 7);
|
||||
|
||||
/*
|
||||
* Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
|
||||
* to move R4 into the rightmost doubleword and set the leftmost
|
||||
* doubleword to 0x1.
|
||||
*/
|
||||
v0 = vec_srb(r4r3, (uv2di)v9);
|
||||
v0[0] = 1;
|
||||
|
||||
/*
|
||||
* Compute GF(2) product of V1 and V0. The rightmost doubleword
|
||||
* of V1 is multiplied with R4. The leftmost doubleword of V1 is
|
||||
* multiplied by 0x1 and is then XORed with rightmost product.
|
||||
* Implicitly, the intermediate leftmost product becomes padded
|
||||
*/
|
||||
v1 = (uv2di)vec_gfmsum_128(v0, v1);
|
||||
|
||||
/*
|
||||
* Now do the final 32-bit fold by multiplying the rightmost word
|
||||
* in V1 with R5 and XOR the result with the remaining bits in V1.
|
||||
*
|
||||
* To achieve this by a single VGFMAG, right shift V1 by a word
|
||||
* and store the result in V2 which is then accumulated. Use the
|
||||
* vector unpack instruction to load the rightmost half of the
|
||||
* doubleword into the rightmost doubleword element of V1; the other
|
||||
* half is loaded in the leftmost doubleword.
|
||||
* The vector register with CONST_R5 contains the R5 constant in the
|
||||
* rightmost doubleword and the leftmost doubleword is zero to ignore
|
||||
* the leftmost product of V1.
|
||||
*/
|
||||
v9 = vec_insert((unsigned char)0x20, v9, 7);
|
||||
v2 = vec_srb(v1, (uv2di)v9);
|
||||
v1 = vec_unpackl((uv4si)v1); /* Split rightmost doubleword */
|
||||
v1 = (uv2di)vec_gfmsum_accum_128(r5, v1, (uv16qi)v2);
|
||||
|
||||
/*
|
||||
* Apply a Barret reduction to compute the final 32-bit CRC value.
|
||||
*
|
||||
* The input values to the Barret reduction are the degree-63 polynomial
|
||||
* in V1 (R(x)), degree-32 generator polynomial, and the reduction
|
||||
* constant u. The Barret reduction result is the CRC value of R(x) mod
|
||||
* P(x).
|
||||
*
|
||||
* The Barret reduction algorithm is defined as:
|
||||
*
|
||||
* 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
|
||||
* 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
|
||||
* 3. C(x) = R(x) XOR T2(x) mod x^32
|
||||
*
|
||||
* Note: The leftmost doubleword of vector register containing
|
||||
* CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
|
||||
* is zero and does not contribute to the final result.
|
||||
*/
|
||||
|
||||
/* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
|
||||
v2 = vec_unpackl((uv4si)v1);
|
||||
v2 = (uv2di)vec_gfmsum_128(ru_poly, v2);
|
||||
|
||||
/*
|
||||
* Compute the GF(2) product of the CRC polynomial with T1(x) in
|
||||
* V2 and XOR the intermediate result, T2(x), with the value in V1.
|
||||
* The final result is stored in word element 2 of V2.
|
||||
*/
|
||||
v2 = vec_unpackl((uv4si)v2);
|
||||
v2 = (uv2di)vec_gfmsum_accum_128(crc_poly, v2, (uv16qi)v1);
|
||||
|
||||
return ((uv4si)v2)[2];
|
||||
}
|
||||
|
||||
#define VX_MIN_LEN 64
|
||||
#define VX_ALIGNMENT 16L
|
||||
#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
|
||||
|
||||
uint32_t Z_INTERNAL crc32_s390_vx(uint32_t crc, const unsigned char *buf, size_t len) {
|
||||
size_t prealign, aligned, remaining;
|
||||
|
||||
if (len < VX_MIN_LEN + VX_ALIGN_MASK)
|
||||
return PREFIX(crc32_braid)(crc, buf, len);
|
||||
|
||||
if ((uintptr_t)buf & VX_ALIGN_MASK) {
|
||||
prealign = VX_ALIGNMENT - ((uintptr_t)buf & VX_ALIGN_MASK);
|
||||
len -= prealign;
|
||||
crc = PREFIX(crc32_braid)(crc, buf, prealign);
|
||||
buf += prealign;
|
||||
}
|
||||
aligned = len & ~VX_ALIGN_MASK;
|
||||
remaining = len & VX_ALIGN_MASK;
|
||||
|
||||
crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, aligned) ^ 0xffffffff;
|
||||
|
||||
if (remaining)
|
||||
crc = PREFIX(crc32_braid)(crc, buf + aligned, remaining);
|
||||
|
||||
return crc;
|
||||
}
|
119
3rdparty/zlib-ng/arch/s390/dfltcc_common.h
vendored
Normal file
119
3rdparty/zlib-ng/arch/s390/dfltcc_common.h
vendored
Normal file
@ -0,0 +1,119 @@
|
||||
#ifndef DFLTCC_COMMON_H
|
||||
#define DFLTCC_COMMON_H
|
||||
|
||||
#include "zutil.h"
|
||||
|
||||
/*
|
||||
Parameter Block for Query Available Functions.
|
||||
*/
|
||||
struct dfltcc_qaf_param {
|
||||
char fns[16];
|
||||
char reserved1[8];
|
||||
char fmts[2];
|
||||
char reserved2[6];
|
||||
} ALIGNED_(8);
|
||||
|
||||
/*
|
||||
Parameter Block for Generate Dynamic-Huffman Table, Compress and Expand.
|
||||
*/
|
||||
struct dfltcc_param_v0 {
|
||||
uint16_t pbvn; /* Parameter-Block-Version Number */
|
||||
uint8_t mvn; /* Model-Version Number */
|
||||
uint8_t ribm; /* Reserved for IBM use */
|
||||
uint32_t reserved32 : 31;
|
||||
uint32_t cf : 1; /* Continuation Flag */
|
||||
uint8_t reserved64[8];
|
||||
uint32_t nt : 1; /* New Task */
|
||||
uint32_t reserved129 : 1;
|
||||
uint32_t cvt : 1; /* Check Value Type */
|
||||
uint32_t reserved131 : 1;
|
||||
uint32_t htt : 1; /* Huffman-Table Type */
|
||||
uint32_t bcf : 1; /* Block-Continuation Flag */
|
||||
uint32_t bcc : 1; /* Block Closing Control */
|
||||
uint32_t bhf : 1; /* Block Header Final */
|
||||
uint32_t reserved136 : 1;
|
||||
uint32_t reserved137 : 1;
|
||||
uint32_t dhtgc : 1; /* DHT Generation Control */
|
||||
uint32_t reserved139 : 5;
|
||||
uint32_t reserved144 : 5;
|
||||
uint32_t sbb : 3; /* Sub-Byte Boundary */
|
||||
uint8_t oesc; /* Operation-Ending-Supplemental Code */
|
||||
uint32_t reserved160 : 12;
|
||||
uint32_t ifs : 4; /* Incomplete-Function Status */
|
||||
uint16_t ifl; /* Incomplete-Function Length */
|
||||
uint8_t reserved192[8];
|
||||
uint8_t reserved256[8];
|
||||
uint8_t reserved320[4];
|
||||
uint16_t hl; /* History Length */
|
||||
uint32_t reserved368 : 1;
|
||||
uint16_t ho : 15; /* History Offset */
|
||||
uint32_t cv; /* Check Value */
|
||||
uint32_t eobs : 15; /* End-of-block Symbol */
|
||||
uint32_t reserved431: 1;
|
||||
uint8_t eobl : 4; /* End-of-block Length */
|
||||
uint32_t reserved436 : 12;
|
||||
uint32_t reserved448 : 4;
|
||||
uint16_t cdhtl : 12; /* Compressed-Dynamic-Huffman Table
|
||||
Length */
|
||||
uint8_t reserved464[6];
|
||||
uint8_t cdht[288]; /* Compressed-Dynamic-Huffman Table */
|
||||
uint8_t reserved[24];
|
||||
uint8_t ribm2[8]; /* Reserved for IBM use */
|
||||
uint8_t csb[1152]; /* Continuation-State Buffer */
|
||||
} ALIGNED_(8);
|
||||
|
||||
/*
|
||||
Extension of inflate_state and deflate_state.
|
||||
*/
|
||||
struct dfltcc_state {
|
||||
struct dfltcc_param_v0 param; /* Parameter block. */
|
||||
struct dfltcc_qaf_param af; /* Available functions. */
|
||||
char msg[64]; /* Buffer for strm->msg */
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
struct dfltcc_state common;
|
||||
uint16_t level_mask; /* Levels on which to use DFLTCC */
|
||||
uint32_t block_size; /* New block each X bytes */
|
||||
size_t block_threshold; /* New block after total_in > X */
|
||||
uint32_t dht_threshold; /* New block only if avail_in >= X */
|
||||
} arch_deflate_state;
|
||||
|
||||
typedef struct {
|
||||
struct dfltcc_state common;
|
||||
} arch_inflate_state;
|
||||
|
||||
/*
|
||||
History buffer size.
|
||||
*/
|
||||
#define HB_BITS 15
|
||||
#define HB_SIZE (1 << HB_BITS)
|
||||
|
||||
/*
|
||||
Sizes of deflate block parts.
|
||||
*/
|
||||
#define DFLTCC_BLOCK_HEADER_BITS 3
|
||||
#define DFLTCC_HLITS_COUNT_BITS 5
|
||||
#define DFLTCC_HDISTS_COUNT_BITS 5
|
||||
#define DFLTCC_HCLENS_COUNT_BITS 4
|
||||
#define DFLTCC_MAX_HCLENS 19
|
||||
#define DFLTCC_HCLEN_BITS 3
|
||||
#define DFLTCC_MAX_HLITS 286
|
||||
#define DFLTCC_MAX_HDISTS 30
|
||||
#define DFLTCC_MAX_HLIT_HDIST_BITS 7
|
||||
#define DFLTCC_MAX_SYMBOL_BITS 16
|
||||
#define DFLTCC_MAX_EOBS_BITS 15
|
||||
#define DFLTCC_MAX_PADDING_BITS 7
|
||||
|
||||
#define DEFLATE_BOUND_COMPLEN(source_len) \
|
||||
((DFLTCC_BLOCK_HEADER_BITS + \
|
||||
DFLTCC_HLITS_COUNT_BITS + \
|
||||
DFLTCC_HDISTS_COUNT_BITS + \
|
||||
DFLTCC_HCLENS_COUNT_BITS + \
|
||||
DFLTCC_MAX_HCLENS * DFLTCC_HCLEN_BITS + \
|
||||
(DFLTCC_MAX_HLITS + DFLTCC_MAX_HDISTS) * DFLTCC_MAX_HLIT_HDIST_BITS + \
|
||||
(source_len) * DFLTCC_MAX_SYMBOL_BITS + \
|
||||
DFLTCC_MAX_EOBS_BITS + \
|
||||
DFLTCC_MAX_PADDING_BITS) >> 3)
|
||||
|
||||
#endif
|
383
3rdparty/zlib-ng/arch/s390/dfltcc_deflate.c
vendored
Normal file
383
3rdparty/zlib-ng/arch/s390/dfltcc_deflate.c
vendored
Normal file
@ -0,0 +1,383 @@
|
||||
/* dfltcc_deflate.c - IBM Z DEFLATE CONVERSION CALL compression support. */
|
||||
|
||||
/*
|
||||
Use the following commands to build zlib-ng with DFLTCC compression support:
|
||||
|
||||
$ ./configure --with-dfltcc-deflate
|
||||
or
|
||||
|
||||
$ cmake -DWITH_DFLTCC_DEFLATE=1 .
|
||||
|
||||
and then
|
||||
|
||||
$ make
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
#include "trees_emit.h"
|
||||
#include "dfltcc_deflate.h"
|
||||
#include "dfltcc_detail.h"
|
||||
|
||||
void Z_INTERNAL PREFIX(dfltcc_reset_deflate_state)(PREFIX3(streamp) strm) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
arch_deflate_state *dfltcc_state = &state->arch;
|
||||
|
||||
dfltcc_reset_state(&dfltcc_state->common);
|
||||
|
||||
/* Initialize tuning parameters */
|
||||
dfltcc_state->level_mask = DFLTCC_LEVEL_MASK;
|
||||
dfltcc_state->block_size = DFLTCC_BLOCK_SIZE;
|
||||
dfltcc_state->block_threshold = DFLTCC_FIRST_FHT_BLOCK_SIZE;
|
||||
dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE;
|
||||
}
|
||||
|
||||
static inline int dfltcc_can_deflate_with_params(PREFIX3(streamp) strm, int level, uInt window_bits, int strategy,
|
||||
int reproducible) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
arch_deflate_state *dfltcc_state = &state->arch;
|
||||
|
||||
/* Unsupported compression settings */
|
||||
if ((dfltcc_state->level_mask & (1 << level)) == 0)
|
||||
return 0;
|
||||
if (window_bits != HB_BITS)
|
||||
return 0;
|
||||
if (strategy != Z_FIXED && strategy != Z_DEFAULT_STRATEGY)
|
||||
return 0;
|
||||
if (reproducible)
|
||||
return 0;
|
||||
|
||||
/* Unsupported hardware */
|
||||
if (!is_bit_set(dfltcc_state->common.af.fns, DFLTCC_GDHT) ||
|
||||
!is_bit_set(dfltcc_state->common.af.fns, DFLTCC_CMPR) ||
|
||||
!is_bit_set(dfltcc_state->common.af.fmts, DFLTCC_FMT0))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_can_deflate)(PREFIX3(streamp) strm) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
|
||||
return dfltcc_can_deflate_with_params(strm, state->level, state->w_bits, state->strategy, state->reproducible);
|
||||
}
|
||||
|
||||
static inline void dfltcc_gdht(PREFIX3(streamp) strm) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &state->arch.common.param;
|
||||
size_t avail_in = strm->avail_in;
|
||||
|
||||
dfltcc(DFLTCC_GDHT, param, NULL, NULL, &strm->next_in, &avail_in, NULL);
|
||||
}
|
||||
|
||||
static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &state->arch.common.param;
|
||||
size_t avail_in = strm->avail_in;
|
||||
size_t avail_out = strm->avail_out;
|
||||
dfltcc_cc cc;
|
||||
|
||||
cc = dfltcc(DFLTCC_CMPR | HBT_CIRCULAR,
|
||||
param, &strm->next_out, &avail_out,
|
||||
&strm->next_in, &avail_in, state->window);
|
||||
strm->total_in += (strm->avail_in - avail_in);
|
||||
strm->total_out += (strm->avail_out - avail_out);
|
||||
strm->avail_in = avail_in;
|
||||
strm->avail_out = avail_out;
|
||||
return cc;
|
||||
}
|
||||
|
||||
static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 *param) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
|
||||
send_bits(state, PREFIX(bi_reverse)(param->eobs >> (15 - param->eobl), param->eobl), param->eobl, state->bi_buf, state->bi_valid);
|
||||
PREFIX(flush_pending)(strm);
|
||||
if (state->pending != 0) {
|
||||
/* The remaining data is located in pending_out[0:pending]. If someone
|
||||
* calls put_byte() - this might happen in deflate() - the byte will be
|
||||
* placed into pending_buf[pending], which is incorrect. Move the
|
||||
* remaining data to the beginning of pending_buf so that put_byte() is
|
||||
* usable again.
|
||||
*/
|
||||
memmove(state->pending_buf, state->pending_out, state->pending);
|
||||
state->pending_out = state->pending_buf;
|
||||
}
|
||||
#ifdef ZLIB_DEBUG
|
||||
state->compressed_len += param->eobl;
|
||||
#endif
|
||||
}
|
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate)(PREFIX3(streamp) strm, int flush, block_state *result) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
arch_deflate_state *dfltcc_state = &state->arch;
|
||||
struct dfltcc_param_v0 *param = &dfltcc_state->common.param;
|
||||
uInt masked_avail_in;
|
||||
dfltcc_cc cc;
|
||||
int need_empty_block;
|
||||
int soft_bcc;
|
||||
int no_flush;
|
||||
|
||||
if (!PREFIX(dfltcc_can_deflate)(strm)) {
|
||||
/* Clear history. */
|
||||
if (flush == Z_FULL_FLUSH)
|
||||
param->hl = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
again:
|
||||
masked_avail_in = 0;
|
||||
soft_bcc = 0;
|
||||
no_flush = flush == Z_NO_FLUSH;
|
||||
|
||||
/* No input data. Return, except when Continuation Flag is set, which means
|
||||
* that DFLTCC has buffered some output in the parameter block and needs to
|
||||
* be called again in order to flush it.
|
||||
*/
|
||||
if (strm->avail_in == 0 && !param->cf) {
|
||||
/* A block is still open, and the hardware does not support closing
|
||||
* blocks without adding data. Thus, close it manually.
|
||||
*/
|
||||
if (!no_flush && param->bcf) {
|
||||
send_eobs(strm, param);
|
||||
param->bcf = 0;
|
||||
}
|
||||
/* Let one of deflate_* functions write a trailing empty block. */
|
||||
if (flush == Z_FINISH)
|
||||
return 0;
|
||||
/* Clear history. */
|
||||
if (flush == Z_FULL_FLUSH)
|
||||
param->hl = 0;
|
||||
/* Trigger block post-processing if necessary. */
|
||||
*result = no_flush ? need_more : block_done;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* There is an open non-BFINAL block, we are not going to close it just
|
||||
* yet, we have compressed more than DFLTCC_BLOCK_SIZE bytes and we see
|
||||
* more than DFLTCC_DHT_MIN_SAMPLE_SIZE bytes. Open a new block with a new
|
||||
* DHT in order to adapt to a possibly changed input data distribution.
|
||||
*/
|
||||
if (param->bcf && no_flush &&
|
||||
strm->total_in > dfltcc_state->block_threshold &&
|
||||
strm->avail_in >= dfltcc_state->dht_threshold) {
|
||||
if (param->cf) {
|
||||
/* We need to flush the DFLTCC buffer before writing the
|
||||
* End-of-block Symbol. Mask the input data and proceed as usual.
|
||||
*/
|
||||
masked_avail_in += strm->avail_in;
|
||||
strm->avail_in = 0;
|
||||
no_flush = 0;
|
||||
} else {
|
||||
/* DFLTCC buffer is empty, so we can manually write the
|
||||
* End-of-block Symbol right away.
|
||||
*/
|
||||
send_eobs(strm, param);
|
||||
param->bcf = 0;
|
||||
dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* No space for compressed data. If we proceed, dfltcc_cmpr() will return
|
||||
* DFLTCC_CC_OP1_TOO_SHORT without buffering header bits, but we will still
|
||||
* set BCF=1, which is wrong. Avoid complications and return early.
|
||||
*/
|
||||
if (strm->avail_out == 0) {
|
||||
*result = need_more;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* The caller gave us too much data. Pass only one block worth of
|
||||
* uncompressed data to DFLTCC and mask the rest, so that on the next
|
||||
* iteration we start a new block.
|
||||
*/
|
||||
if (no_flush && strm->avail_in > dfltcc_state->block_size) {
|
||||
masked_avail_in += (strm->avail_in - dfltcc_state->block_size);
|
||||
strm->avail_in = dfltcc_state->block_size;
|
||||
}
|
||||
|
||||
/* When we have an open non-BFINAL deflate block and caller indicates that
|
||||
* the stream is ending, we need to close an open deflate block and open a
|
||||
* BFINAL one.
|
||||
*/
|
||||
need_empty_block = flush == Z_FINISH && param->bcf && !param->bhf;
|
||||
|
||||
/* Translate stream to parameter block */
|
||||
param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32;
|
||||
if (!no_flush)
|
||||
/* We need to close a block. Always do this in software - when there is
|
||||
* no input data, the hardware will not honor BCC. */
|
||||
soft_bcc = 1;
|
||||
if (flush == Z_FINISH && !param->bcf)
|
||||
/* We are about to open a BFINAL block, set Block Header Final bit
|
||||
* until the stream ends.
|
||||
*/
|
||||
param->bhf = 1;
|
||||
/* DFLTCC-CMPR will write to next_out, so make sure that buffers with
|
||||
* higher precedence are empty.
|
||||
*/
|
||||
Assert(state->pending == 0, "There must be no pending bytes");
|
||||
Assert(state->bi_valid < 8, "There must be less than 8 pending bits");
|
||||
param->sbb = (unsigned int)state->bi_valid;
|
||||
if (param->sbb > 0)
|
||||
*strm->next_out = (unsigned char)state->bi_buf;
|
||||
/* Honor history and check value */
|
||||
param->nt = 0;
|
||||
if (state->wrap == 1)
|
||||
param->cv = strm->adler;
|
||||
else if (state->wrap == 2)
|
||||
param->cv = ZSWAP32(state->crc_fold.value);
|
||||
|
||||
/* When opening a block, choose a Huffman-Table Type */
|
||||
if (!param->bcf) {
|
||||
if (state->strategy == Z_FIXED || (strm->total_in == 0 && dfltcc_state->block_threshold > 0))
|
||||
param->htt = HTT_FIXED;
|
||||
else {
|
||||
param->htt = HTT_DYNAMIC;
|
||||
dfltcc_gdht(strm);
|
||||
}
|
||||
}
|
||||
|
||||
/* Deflate */
|
||||
do {
|
||||
cc = dfltcc_cmpr(strm);
|
||||
if (strm->avail_in < 4096 && masked_avail_in > 0)
|
||||
/* We are about to call DFLTCC with a small input buffer, which is
|
||||
* inefficient. Since there is masked data, there will be at least
|
||||
* one more DFLTCC call, so skip the current one and make the next
|
||||
* one handle more data.
|
||||
*/
|
||||
break;
|
||||
} while (cc == DFLTCC_CC_AGAIN);
|
||||
|
||||
/* Translate parameter block to stream */
|
||||
strm->msg = oesc_msg(dfltcc_state->common.msg, param->oesc);
|
||||
state->bi_valid = param->sbb;
|
||||
if (state->bi_valid == 0)
|
||||
state->bi_buf = 0; /* Avoid accessing next_out */
|
||||
else
|
||||
state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1);
|
||||
if (state->wrap == 1)
|
||||
strm->adler = param->cv;
|
||||
else if (state->wrap == 2)
|
||||
state->crc_fold.value = ZSWAP32(param->cv);
|
||||
|
||||
/* Unmask the input data */
|
||||
strm->avail_in += masked_avail_in;
|
||||
masked_avail_in = 0;
|
||||
|
||||
/* If we encounter an error, it means there is a bug in DFLTCC call */
|
||||
Assert(cc != DFLTCC_CC_OP2_CORRUPT || param->oesc == 0, "BUG");
|
||||
|
||||
/* Update Block-Continuation Flag. It will be used to check whether to call
|
||||
* GDHT the next time.
|
||||
*/
|
||||
if (cc == DFLTCC_CC_OK) {
|
||||
if (soft_bcc) {
|
||||
send_eobs(strm, param);
|
||||
param->bcf = 0;
|
||||
dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size;
|
||||
} else
|
||||
param->bcf = 1;
|
||||
if (flush == Z_FINISH) {
|
||||
if (need_empty_block)
|
||||
/* Make the current deflate() call also close the stream */
|
||||
return 0;
|
||||
else {
|
||||
bi_windup(state);
|
||||
*result = finish_done;
|
||||
}
|
||||
} else {
|
||||
if (flush == Z_FULL_FLUSH)
|
||||
param->hl = 0; /* Clear history */
|
||||
*result = flush == Z_NO_FLUSH ? need_more : block_done;
|
||||
}
|
||||
} else {
|
||||
param->bcf = 1;
|
||||
*result = need_more;
|
||||
}
|
||||
if (strm->avail_in != 0 && strm->avail_out != 0)
|
||||
goto again; /* deflate() must use all input or all output */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
Switching between hardware and software compression.
|
||||
|
||||
DFLTCC does not support all zlib settings, e.g. generation of non-compressed
|
||||
blocks or alternative window sizes. When such settings are applied on the
|
||||
fly with deflateParams, we need to convert between hardware and software
|
||||
window formats.
|
||||
*/
|
||||
static int dfltcc_was_deflate_used(PREFIX3(streamp) strm) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &state->arch.common.param;
|
||||
|
||||
return strm->total_in > 0 || param->nt == 0 || param->hl > 0;
|
||||
}
|
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_params)(PREFIX3(streamp) strm, int level, int strategy, int *flush) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
int could_deflate = PREFIX(dfltcc_can_deflate)(strm);
|
||||
int can_deflate = dfltcc_can_deflate_with_params(strm, level, state->w_bits, strategy, state->reproducible);
|
||||
|
||||
if (can_deflate == could_deflate)
|
||||
/* We continue to work in the same mode - no changes needed */
|
||||
return Z_OK;
|
||||
|
||||
if (!dfltcc_was_deflate_used(strm))
|
||||
/* DFLTCC was not used yet - no changes needed */
|
||||
return Z_OK;
|
||||
|
||||
/* For now, do not convert between window formats - simply get rid of the old data instead */
|
||||
*flush = Z_FULL_FLUSH;
|
||||
return Z_OK;
|
||||
}
|
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_done)(PREFIX3(streamp) strm, int flush) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &state->arch.common.param;
|
||||
|
||||
/* When deflate(Z_FULL_FLUSH) is called with small avail_out, it might
|
||||
* close the block without resetting the compression state. Detect this
|
||||
* situation and return that deflation is not done.
|
||||
*/
|
||||
if (flush == Z_FULL_FLUSH && strm->avail_out == 0)
|
||||
return 0;
|
||||
|
||||
/* Return that deflation is not done if DFLTCC is used and either it
|
||||
* buffered some data (Continuation Flag is set), or has not written EOBS
|
||||
* yet (Block-Continuation Flag is set).
|
||||
*/
|
||||
return !PREFIX(dfltcc_can_deflate)(strm) || (!param->cf && !param->bcf);
|
||||
}
|
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_can_set_reproducible)(PREFIX3(streamp) strm, int reproducible) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
|
||||
return reproducible != state->reproducible && !dfltcc_was_deflate_used(strm);
|
||||
}
|
||||
|
||||
/*
|
||||
Preloading history.
|
||||
*/
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm,
|
||||
const unsigned char *dictionary, uInt dict_length) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &state->arch.common.param;
|
||||
|
||||
append_history(param, state->window, dictionary, dict_length);
|
||||
state->strstart = 1; /* Add FDICT to zlib header */
|
||||
state->block_start = state->strstart; /* Make deflate_stored happy */
|
||||
return Z_OK;
|
||||
}
|
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &state->arch.common.param;
|
||||
|
||||
if (dictionary)
|
||||
get_history(param, state->window, dictionary);
|
||||
if (dict_length)
|
||||
*dict_length = param->hl;
|
||||
return Z_OK;
|
||||
}
|
58
3rdparty/zlib-ng/arch/s390/dfltcc_deflate.h
vendored
Normal file
58
3rdparty/zlib-ng/arch/s390/dfltcc_deflate.h
vendored
Normal file
@ -0,0 +1,58 @@
|
||||
#ifndef DFLTCC_DEFLATE_H
|
||||
#define DFLTCC_DEFLATE_H
|
||||
|
||||
#include "deflate.h"
|
||||
#include "dfltcc_common.h"
|
||||
|
||||
void Z_INTERNAL PREFIX(dfltcc_reset_deflate_state)(PREFIX3(streamp));
|
||||
int Z_INTERNAL PREFIX(dfltcc_can_deflate)(PREFIX3(streamp) strm);
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate)(PREFIX3(streamp) strm, int flush, block_state *result);
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_params)(PREFIX3(streamp) strm, int level, int strategy, int *flush);
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_done)(PREFIX3(streamp) strm, int flush);
|
||||
int Z_INTERNAL PREFIX(dfltcc_can_set_reproducible)(PREFIX3(streamp) strm, int reproducible);
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm,
|
||||
const unsigned char *dictionary, uInt dict_length);
|
||||
int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length);
|
||||
|
||||
#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \
|
||||
do { \
|
||||
if (PREFIX(dfltcc_can_deflate)((strm))) \
|
||||
return PREFIX(dfltcc_deflate_set_dictionary)((strm), (dict), (dict_len)); \
|
||||
} while (0)
|
||||
|
||||
#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \
|
||||
do { \
|
||||
if (PREFIX(dfltcc_can_deflate)((strm))) \
|
||||
return PREFIX(dfltcc_deflate_get_dictionary)((strm), (dict), (dict_len)); \
|
||||
} while (0)
|
||||
|
||||
#define DEFLATE_RESET_KEEP_HOOK PREFIX(dfltcc_reset_deflate_state)
|
||||
|
||||
#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) \
|
||||
do { \
|
||||
int err; \
|
||||
\
|
||||
err = PREFIX(dfltcc_deflate_params)((strm), (level), (strategy), (hook_flush)); \
|
||||
if (err == Z_STREAM_ERROR) \
|
||||
return err; \
|
||||
} while (0)
|
||||
|
||||
#define DEFLATE_DONE PREFIX(dfltcc_deflate_done)
|
||||
|
||||
#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \
|
||||
do { \
|
||||
if (deflateStateCheck((strm)) || PREFIX(dfltcc_can_deflate)((strm))) \
|
||||
(complen) = DEFLATE_BOUND_COMPLEN(source_len); \
|
||||
} while (0)
|
||||
|
||||
#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (PREFIX(dfltcc_can_deflate)((strm)))
|
||||
|
||||
#define DEFLATE_HOOK PREFIX(dfltcc_deflate)
|
||||
|
||||
#define DEFLATE_NEED_CHECKSUM(strm) (!PREFIX(dfltcc_can_deflate)((strm)))
|
||||
|
||||
#define DEFLATE_CAN_SET_REPRODUCIBLE PREFIX(dfltcc_can_set_reproducible)
|
||||
|
||||
#define DEFLATE_ADJUST_WINDOW_SIZE(n) MAX(n, HB_SIZE)
|
||||
|
||||
#endif
|
275
3rdparty/zlib-ng/arch/s390/dfltcc_detail.h
vendored
Normal file
275
3rdparty/zlib-ng/arch/s390/dfltcc_detail.h
vendored
Normal file
@ -0,0 +1,275 @@
|
||||
#include "zbuild.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef HAVE_SYS_SDT_H
|
||||
#include <sys/sdt.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
Tuning parameters.
|
||||
*/
|
||||
#ifndef DFLTCC_LEVEL_MASK
|
||||
#define DFLTCC_LEVEL_MASK 0x2
|
||||
#endif
|
||||
#ifndef DFLTCC_BLOCK_SIZE
|
||||
#define DFLTCC_BLOCK_SIZE 1048576
|
||||
#endif
|
||||
#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE
|
||||
#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096
|
||||
#endif
|
||||
#ifndef DFLTCC_DHT_MIN_SAMPLE_SIZE
|
||||
#define DFLTCC_DHT_MIN_SAMPLE_SIZE 4096
|
||||
#endif
|
||||
#ifndef DFLTCC_RIBM
|
||||
#define DFLTCC_RIBM 0
|
||||
#endif
|
||||
|
||||
#define static_assert(c, msg) __attribute__((unused)) static char static_assert_failed_ ## msg[c ? 1 : -1]
|
||||
|
||||
#define DFLTCC_SIZEOF_QAF 32
|
||||
static_assert(sizeof(struct dfltcc_qaf_param) == DFLTCC_SIZEOF_QAF, qaf);
|
||||
|
||||
static inline int is_bit_set(const char *bits, int n) {
|
||||
return bits[n / 8] & (1 << (7 - (n % 8)));
|
||||
}
|
||||
|
||||
static inline void clear_bit(char *bits, int n) {
|
||||
bits[n / 8] &= ~(1 << (7 - (n % 8)));
|
||||
}
|
||||
|
||||
#define DFLTCC_FACILITY 151
|
||||
|
||||
static inline int is_dfltcc_enabled(void) {
|
||||
uint64_t facilities[(DFLTCC_FACILITY / 64) + 1];
|
||||
Z_REGISTER uint8_t r0 __asm__("r0");
|
||||
|
||||
memset(facilities, 0, sizeof(facilities));
|
||||
r0 = sizeof(facilities) / sizeof(facilities[0]) - 1;
|
||||
/* STFLE is supported since z9-109 and only in z/Architecture mode. When
|
||||
* compiling with -m31, gcc defaults to ESA mode, however, since the kernel
|
||||
* is 64-bit, it's always z/Architecture mode at runtime.
|
||||
*/
|
||||
__asm__ volatile(
|
||||
#ifndef __clang__
|
||||
".machinemode push\n"
|
||||
".machinemode zarch\n"
|
||||
#endif
|
||||
"stfle %[facilities]\n"
|
||||
#ifndef __clang__
|
||||
".machinemode pop\n"
|
||||
#endif
|
||||
: [facilities] "=Q" (facilities), [r0] "+r" (r0) :: "cc");
|
||||
return is_bit_set((const char *)facilities, DFLTCC_FACILITY);
|
||||
}
|
||||
|
||||
#define DFLTCC_FMT0 0
|
||||
|
||||
#define CVT_CRC32 0
|
||||
#define CVT_ADLER32 1
|
||||
#define HTT_FIXED 0
|
||||
#define HTT_DYNAMIC 1
|
||||
|
||||
#define DFLTCC_SIZEOF_GDHT_V0 384
|
||||
#define DFLTCC_SIZEOF_CMPR_XPND_V0 1536
|
||||
static_assert(offsetof(struct dfltcc_param_v0, csb) == DFLTCC_SIZEOF_GDHT_V0, gdht_v0);
|
||||
static_assert(sizeof(struct dfltcc_param_v0) == DFLTCC_SIZEOF_CMPR_XPND_V0, cmpr_xpnd_v0);
|
||||
|
||||
static inline z_const char *oesc_msg(char *buf, int oesc) {
|
||||
if (oesc == 0x00)
|
||||
return NULL; /* Successful completion */
|
||||
else {
|
||||
sprintf(buf, "Operation-Ending-Supplemental Code is 0x%.2X", oesc);
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
C wrapper for the DEFLATE CONVERSION CALL instruction.
|
||||
*/
|
||||
typedef enum {
|
||||
DFLTCC_CC_OK = 0,
|
||||
DFLTCC_CC_OP1_TOO_SHORT = 1,
|
||||
DFLTCC_CC_OP2_TOO_SHORT = 2,
|
||||
DFLTCC_CC_OP2_CORRUPT = 2,
|
||||
DFLTCC_CC_AGAIN = 3,
|
||||
} dfltcc_cc;
|
||||
|
||||
#define DFLTCC_QAF 0
|
||||
#define DFLTCC_GDHT 1
|
||||
#define DFLTCC_CMPR 2
|
||||
#define DFLTCC_XPND 4
|
||||
#define HBT_CIRCULAR (1 << 7)
|
||||
#define DFLTCC_FN_MASK ((1 << 7) - 1)
|
||||
|
||||
/* Return lengths of high (starting at param->ho) and low (starting at 0) fragments of the circular history buffer. */
|
||||
static inline void get_history_lengths(struct dfltcc_param_v0 *param, size_t *hl_high, size_t *hl_low) {
|
||||
*hl_high = MIN(param->hl, HB_SIZE - param->ho);
|
||||
*hl_low = param->hl - *hl_high;
|
||||
}
|
||||
|
||||
/* Notify instrumentation about an upcoming read/write access to the circular history buffer. */
|
||||
static inline void instrument_read_write_hist(struct dfltcc_param_v0 *param, void *hist) {
|
||||
size_t hl_high, hl_low;
|
||||
|
||||
get_history_lengths(param, &hl_high, &hl_low);
|
||||
instrument_read_write(hist + param->ho, hl_high);
|
||||
instrument_read_write(hist, hl_low);
|
||||
}
|
||||
|
||||
/* Notify MSan about a completed write to the circular history buffer. */
|
||||
static inline void msan_unpoison_hist(struct dfltcc_param_v0 *param, void *hist) {
|
||||
size_t hl_high, hl_low;
|
||||
|
||||
get_history_lengths(param, &hl_high, &hl_low);
|
||||
__msan_unpoison(hist + param->ho, hl_high);
|
||||
__msan_unpoison(hist, hl_low);
|
||||
}
|
||||
|
||||
static inline dfltcc_cc dfltcc(int fn, void *param,
|
||||
unsigned char **op1, size_t *len1,
|
||||
z_const unsigned char **op2, size_t *len2, void *hist) {
|
||||
unsigned char *t2 = op1 ? *op1 : NULL;
|
||||
unsigned char *orig_t2 = t2;
|
||||
size_t t3 = len1 ? *len1 : 0;
|
||||
z_const unsigned char *t4 = op2 ? *op2 : NULL;
|
||||
size_t t5 = len2 ? *len2 : 0;
|
||||
Z_REGISTER int r0 __asm__("r0");
|
||||
Z_REGISTER void *r1 __asm__("r1");
|
||||
Z_REGISTER unsigned char *r2 __asm__("r2");
|
||||
Z_REGISTER size_t r3 __asm__("r3");
|
||||
Z_REGISTER z_const unsigned char *r4 __asm__("r4");
|
||||
Z_REGISTER size_t r5 __asm__("r5");
|
||||
int cc;
|
||||
|
||||
/* Insert pre-instrumentation for DFLTCC. */
|
||||
switch (fn & DFLTCC_FN_MASK) {
|
||||
case DFLTCC_QAF:
|
||||
instrument_write(param, DFLTCC_SIZEOF_QAF);
|
||||
break;
|
||||
case DFLTCC_GDHT:
|
||||
instrument_read_write(param, DFLTCC_SIZEOF_GDHT_V0);
|
||||
instrument_read(t4, t5);
|
||||
break;
|
||||
case DFLTCC_CMPR:
|
||||
case DFLTCC_XPND:
|
||||
instrument_read_write(param, DFLTCC_SIZEOF_CMPR_XPND_V0);
|
||||
instrument_read(t4, t5);
|
||||
instrument_write(t2, t3);
|
||||
instrument_read_write_hist(param, hist);
|
||||
break;
|
||||
}
|
||||
|
||||
r0 = fn; r1 = param; r2 = t2; r3 = t3; r4 = t4; r5 = t5;
|
||||
__asm__ volatile(
|
||||
#ifdef HAVE_SYS_SDT_H
|
||||
STAP_PROBE_ASM(zlib, dfltcc_entry, STAP_PROBE_ASM_TEMPLATE(5))
|
||||
#endif
|
||||
".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n"
|
||||
#ifdef HAVE_SYS_SDT_H
|
||||
STAP_PROBE_ASM(zlib, dfltcc_exit, STAP_PROBE_ASM_TEMPLATE(5))
|
||||
#endif
|
||||
"ipm %[cc]\n"
|
||||
: [r2] "+r" (r2)
|
||||
, [r3] "+r" (r3)
|
||||
, [r4] "+r" (r4)
|
||||
, [r5] "+r" (r5)
|
||||
, [cc] "=r" (cc)
|
||||
: [r0] "r" (r0)
|
||||
, [r1] "r" (r1)
|
||||
, [hist] "r" (hist)
|
||||
#ifdef HAVE_SYS_SDT_H
|
||||
, STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist)
|
||||
#endif
|
||||
: "cc", "memory");
|
||||
t2 = r2; t3 = r3; t4 = r4; t5 = r5;
|
||||
|
||||
/* Insert post-instrumentation for DFLTCC. */
|
||||
switch (fn & DFLTCC_FN_MASK) {
|
||||
case DFLTCC_QAF:
|
||||
__msan_unpoison(param, DFLTCC_SIZEOF_QAF);
|
||||
break;
|
||||
case DFLTCC_GDHT:
|
||||
__msan_unpoison(param, DFLTCC_SIZEOF_GDHT_V0);
|
||||
break;
|
||||
case DFLTCC_CMPR:
|
||||
__msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0);
|
||||
__msan_unpoison(orig_t2, t2 - orig_t2 + (((struct dfltcc_param_v0 *)param)->sbb == 0 ? 0 : 1));
|
||||
msan_unpoison_hist(param, hist);
|
||||
break;
|
||||
case DFLTCC_XPND:
|
||||
__msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0);
|
||||
__msan_unpoison(orig_t2, t2 - orig_t2);
|
||||
msan_unpoison_hist(param, hist);
|
||||
break;
|
||||
}
|
||||
|
||||
if (op1)
|
||||
*op1 = t2;
|
||||
if (len1)
|
||||
*len1 = t3;
|
||||
if (op2)
|
||||
*op2 = t4;
|
||||
if (len2)
|
||||
*len2 = t5;
|
||||
return (cc >> 28) & 3;
|
||||
}
|
||||
|
||||
#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1))
|
||||
|
||||
static inline void dfltcc_reset_state(struct dfltcc_state *dfltcc_state) {
|
||||
/* Initialize available functions */
|
||||
if (is_dfltcc_enabled()) {
|
||||
dfltcc(DFLTCC_QAF, &dfltcc_state->param, NULL, NULL, NULL, NULL, NULL);
|
||||
memmove(&dfltcc_state->af, &dfltcc_state->param, sizeof(dfltcc_state->af));
|
||||
} else
|
||||
memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af));
|
||||
|
||||
/* Initialize parameter block */
|
||||
memset(&dfltcc_state->param, 0, sizeof(dfltcc_state->param));
|
||||
dfltcc_state->param.nt = 1;
|
||||
dfltcc_state->param.ribm = DFLTCC_RIBM;
|
||||
}
|
||||
|
||||
static inline void dfltcc_copy_state(void *dst, const void *src, uInt size, uInt extension_size) {
|
||||
memcpy(dst, src, ALIGN_UP(size, 8) + extension_size);
|
||||
}
|
||||
|
||||
static inline void append_history(struct dfltcc_param_v0 *param, unsigned char *history,
|
||||
const unsigned char *buf, uInt count) {
|
||||
size_t offset;
|
||||
size_t n;
|
||||
|
||||
/* Do not use more than 32K */
|
||||
if (count > HB_SIZE) {
|
||||
buf += count - HB_SIZE;
|
||||
count = HB_SIZE;
|
||||
}
|
||||
offset = (param->ho + param->hl) % HB_SIZE;
|
||||
if (offset + count <= HB_SIZE)
|
||||
/* Circular history buffer does not wrap - copy one chunk */
|
||||
memcpy(history + offset, buf, count);
|
||||
else {
|
||||
/* Circular history buffer wraps - copy two chunks */
|
||||
n = HB_SIZE - offset;
|
||||
memcpy(history + offset, buf, n);
|
||||
memcpy(history, buf + n, count - n);
|
||||
}
|
||||
n = param->hl + count;
|
||||
if (n <= HB_SIZE)
|
||||
/* All history fits into buffer - no need to discard anything */
|
||||
param->hl = n;
|
||||
else {
|
||||
/* History does not fit into buffer - discard extra bytes */
|
||||
param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE;
|
||||
param->hl = HB_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void get_history(struct dfltcc_param_v0 *param, const unsigned char *history,
|
||||
unsigned char *buf) {
|
||||
size_t hl_high, hl_low;
|
||||
|
||||
get_history_lengths(param, &hl_high, &hl_low);
|
||||
memcpy(buf, history + param->ho, hl_high);
|
||||
memcpy(buf + hl_high, history, hl_low);
|
||||
}
|
191
3rdparty/zlib-ng/arch/s390/dfltcc_inflate.c
vendored
Normal file
191
3rdparty/zlib-ng/arch/s390/dfltcc_inflate.c
vendored
Normal file
@ -0,0 +1,191 @@
|
||||
/* dfltcc_inflate.c - IBM Z DEFLATE CONVERSION CALL decompression support. */
|
||||
|
||||
/*
|
||||
Use the following commands to build zlib-ng with DFLTCC decompression support:
|
||||
|
||||
$ ./configure --with-dfltcc-inflate
|
||||
or
|
||||
|
||||
$ cmake -DWITH_DFLTCC_INFLATE=1 .
|
||||
|
||||
and then
|
||||
|
||||
$ make
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
#include "inftrees.h"
|
||||
#include "inflate.h"
|
||||
#include "dfltcc_inflate.h"
|
||||
#include "dfltcc_detail.h"
|
||||
|
||||
void Z_INTERNAL PREFIX(dfltcc_reset_inflate_state)(PREFIX3(streamp) strm) {
|
||||
struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||
|
||||
dfltcc_reset_state(&state->arch.common);
|
||||
}
|
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_can_inflate)(PREFIX3(streamp) strm) {
|
||||
struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||
struct dfltcc_state *dfltcc_state = &state->arch.common;
|
||||
|
||||
/* Unsupported hardware */
|
||||
return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0);
|
||||
}
|
||||
|
||||
static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm) {
|
||||
struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &state->arch.common.param;
|
||||
size_t avail_in = strm->avail_in;
|
||||
size_t avail_out = strm->avail_out;
|
||||
dfltcc_cc cc;
|
||||
|
||||
cc = dfltcc(DFLTCC_XPND | HBT_CIRCULAR,
|
||||
param, &strm->next_out, &avail_out,
|
||||
&strm->next_in, &avail_in, state->window);
|
||||
strm->avail_in = avail_in;
|
||||
strm->avail_out = avail_out;
|
||||
return cc;
|
||||
}
|
||||
|
||||
dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, int flush, int *ret) {
|
||||
struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||
struct dfltcc_state *dfltcc_state = &state->arch.common;
|
||||
struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
||||
dfltcc_cc cc;
|
||||
|
||||
if (flush == Z_BLOCK || flush == Z_TREES) {
|
||||
/* DFLTCC does not support stopping on block boundaries */
|
||||
if (PREFIX(dfltcc_inflate_disable)(strm)) {
|
||||
*ret = Z_STREAM_ERROR;
|
||||
return DFLTCC_INFLATE_BREAK;
|
||||
} else
|
||||
return DFLTCC_INFLATE_SOFTWARE;
|
||||
}
|
||||
|
||||
if (state->last) {
|
||||
if (state->bits != 0) {
|
||||
strm->next_in++;
|
||||
strm->avail_in--;
|
||||
state->bits = 0;
|
||||
}
|
||||
state->mode = CHECK;
|
||||
return DFLTCC_INFLATE_CONTINUE;
|
||||
}
|
||||
|
||||
if (strm->avail_in == 0 && !param->cf)
|
||||
return DFLTCC_INFLATE_BREAK;
|
||||
|
||||
/* if window not in use yet, initialize */
|
||||
if (state->wsize == 0)
|
||||
state->wsize = 1U << state->wbits;
|
||||
|
||||
/* Translate stream to parameter block */
|
||||
param->cvt = ((state->wrap & 4) && state->flags) ? CVT_CRC32 : CVT_ADLER32;
|
||||
param->sbb = state->bits;
|
||||
if (param->hl)
|
||||
param->nt = 0; /* Honor history for the first block */
|
||||
if (state->wrap & 4)
|
||||
param->cv = state->flags ? ZSWAP32(state->check) : state->check;
|
||||
|
||||
/* Inflate */
|
||||
do {
|
||||
cc = dfltcc_xpnd(strm);
|
||||
} while (cc == DFLTCC_CC_AGAIN);
|
||||
|
||||
/* Translate parameter block to stream */
|
||||
strm->msg = oesc_msg(dfltcc_state->msg, param->oesc);
|
||||
state->last = cc == DFLTCC_CC_OK;
|
||||
state->bits = param->sbb;
|
||||
if (state->wrap & 4)
|
||||
strm->adler = state->check = state->flags ? ZSWAP32(param->cv) : param->cv;
|
||||
if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) {
|
||||
/* Report an error if stream is corrupted */
|
||||
state->mode = BAD;
|
||||
return DFLTCC_INFLATE_CONTINUE;
|
||||
}
|
||||
state->mode = TYPEDO;
|
||||
/* Break if operands are exhausted, otherwise continue looping */
|
||||
return (cc == DFLTCC_CC_OP1_TOO_SHORT || cc == DFLTCC_CC_OP2_TOO_SHORT) ?
|
||||
DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE;
|
||||
}
|
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_was_inflate_used)(PREFIX3(streamp) strm) {
|
||||
struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||
|
||||
return !state->arch.common.param.nt;
|
||||
}
|
||||
|
||||
/*
|
||||
Rotates a circular buffer.
|
||||
The implementation is based on https://cplusplus.com/reference/algorithm/rotate/
|
||||
*/
|
||||
static void rotate(unsigned char *start, unsigned char *pivot, unsigned char *end) {
|
||||
unsigned char *p = pivot;
|
||||
unsigned char tmp;
|
||||
|
||||
while (p != start) {
|
||||
tmp = *start;
|
||||
*start = *p;
|
||||
*p = tmp;
|
||||
|
||||
start++;
|
||||
p++;
|
||||
|
||||
if (p == end)
|
||||
p = pivot;
|
||||
else if (start == pivot)
|
||||
pivot = p;
|
||||
}
|
||||
}
|
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm) {
|
||||
struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||
struct dfltcc_state *dfltcc_state = &state->arch.common;
|
||||
struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
||||
|
||||
if (!PREFIX(dfltcc_can_inflate)(strm))
|
||||
return 0;
|
||||
if (PREFIX(dfltcc_was_inflate_used)(strm))
|
||||
/* DFLTCC has already decompressed some data. Since there is not
|
||||
* enough information to resume decompression in software, the call
|
||||
* must fail.
|
||||
*/
|
||||
return 1;
|
||||
/* DFLTCC was not used yet - decompress in software */
|
||||
memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af));
|
||||
/* Convert the window from the hardware to the software format */
|
||||
rotate(state->window, state->window + param->ho, state->window + HB_SIZE);
|
||||
state->whave = state->wnext = MIN(param->hl, state->wsize);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
Preloading history.
|
||||
*/
|
||||
int Z_INTERNAL PREFIX(dfltcc_inflate_set_dictionary)(PREFIX3(streamp) strm,
|
||||
const unsigned char *dictionary, uInt dict_length) {
|
||||
struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &state->arch.common.param;
|
||||
|
||||
/* if window not in use yet, initialize */
|
||||
if (state->wsize == 0)
|
||||
state->wsize = 1U << state->wbits;
|
||||
|
||||
append_history(param, state->window, dictionary, dict_length);
|
||||
state->havedict = 1;
|
||||
return Z_OK;
|
||||
}
|
||||
|
||||
int Z_INTERNAL PREFIX(dfltcc_inflate_get_dictionary)(PREFIX3(streamp) strm,
|
||||
unsigned char *dictionary, uInt *dict_length) {
|
||||
struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &state->arch.common.param;
|
||||
|
||||
if (dictionary && state->window)
|
||||
get_history(param, state->window, dictionary);
|
||||
if (dict_length)
|
||||
*dict_length = param->hl;
|
||||
return Z_OK;
|
||||
}
|
67
3rdparty/zlib-ng/arch/s390/dfltcc_inflate.h
vendored
Normal file
67
3rdparty/zlib-ng/arch/s390/dfltcc_inflate.h
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
#ifndef DFLTCC_INFLATE_H
|
||||
#define DFLTCC_INFLATE_H
|
||||
|
||||
#include "dfltcc_common.h"
|
||||
|
||||
void Z_INTERNAL PREFIX(dfltcc_reset_inflate_state)(PREFIX3(streamp) strm);
|
||||
int Z_INTERNAL PREFIX(dfltcc_can_inflate)(PREFIX3(streamp) strm);
|
||||
typedef enum {
|
||||
DFLTCC_INFLATE_CONTINUE,
|
||||
DFLTCC_INFLATE_BREAK,
|
||||
DFLTCC_INFLATE_SOFTWARE,
|
||||
} dfltcc_inflate_action;
|
||||
dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, int flush, int *ret);
|
||||
int Z_INTERNAL PREFIX(dfltcc_was_inflate_used)(PREFIX3(streamp) strm);
|
||||
int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm);
|
||||
int Z_INTERNAL PREFIX(dfltcc_inflate_set_dictionary)(PREFIX3(streamp) strm,
|
||||
const unsigned char *dictionary, uInt dict_length);
|
||||
int Z_INTERNAL PREFIX(dfltcc_inflate_get_dictionary)(PREFIX3(streamp) strm,
|
||||
unsigned char *dictionary, uInt* dict_length);
|
||||
|
||||
#define INFLATE_RESET_KEEP_HOOK PREFIX(dfltcc_reset_inflate_state)
|
||||
|
||||
#define INFLATE_PRIME_HOOK(strm, bits, value) \
|
||||
do { if (PREFIX(dfltcc_inflate_disable)((strm))) return Z_STREAM_ERROR; } while (0)
|
||||
|
||||
#define INFLATE_TYPEDO_HOOK(strm, flush) \
|
||||
if (PREFIX(dfltcc_can_inflate)((strm))) { \
|
||||
dfltcc_inflate_action action; \
|
||||
\
|
||||
RESTORE(); \
|
||||
action = PREFIX(dfltcc_inflate)((strm), (flush), &ret); \
|
||||
LOAD(); \
|
||||
if (action == DFLTCC_INFLATE_CONTINUE) \
|
||||
break; \
|
||||
else if (action == DFLTCC_INFLATE_BREAK) \
|
||||
goto inf_leave; \
|
||||
}
|
||||
|
||||
#define INFLATE_NEED_CHECKSUM(strm) (!PREFIX(dfltcc_can_inflate)((strm)))
|
||||
|
||||
#define INFLATE_NEED_UPDATEWINDOW(strm) (!PREFIX(dfltcc_can_inflate)((strm)))
|
||||
|
||||
#define INFLATE_MARK_HOOK(strm) \
|
||||
do { \
|
||||
if (PREFIX(dfltcc_was_inflate_used)((strm))) return -(1L << 16); \
|
||||
} while (0)
|
||||
|
||||
#define INFLATE_SYNC_POINT_HOOK(strm) \
|
||||
do { \
|
||||
if (PREFIX(dfltcc_was_inflate_used)((strm))) return Z_STREAM_ERROR; \
|
||||
} while (0)
|
||||
|
||||
#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \
|
||||
do { \
|
||||
if (PREFIX(dfltcc_can_inflate)((strm))) \
|
||||
return PREFIX(dfltcc_inflate_set_dictionary)((strm), (dict), (dict_len)); \
|
||||
} while (0)
|
||||
|
||||
#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \
|
||||
do { \
|
||||
if (PREFIX(dfltcc_can_inflate)((strm))) \
|
||||
return PREFIX(dfltcc_inflate_get_dictionary)((strm), (dict), (dict_len)); \
|
||||
} while (0)
|
||||
|
||||
#define INFLATE_ADJUST_WINDOW_SIZE(n) MAX(n, HB_SIZE)
|
||||
|
||||
#endif
|
14
3rdparty/zlib-ng/arch/s390/s390_features.c
vendored
Normal file
14
3rdparty/zlib-ng/arch/s390/s390_features.c
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
#include "zbuild.h"
|
||||
#include "s390_features.h"
|
||||
|
||||
#ifdef HAVE_SYS_AUXV_H
|
||||
# include <sys/auxv.h>
|
||||
#endif
|
||||
|
||||
#ifndef HWCAP_S390_VXRS
|
||||
#define HWCAP_S390_VXRS HWCAP_S390_VX
|
||||
#endif
|
||||
|
||||
void Z_INTERNAL s390_check_features(struct s390_cpu_features *features) {
|
||||
features->has_vx = getauxval(AT_HWCAP) & HWCAP_S390_VXRS;
|
||||
}
|
14
3rdparty/zlib-ng/arch/s390/s390_features.h
vendored
Normal file
14
3rdparty/zlib-ng/arch/s390/s390_features.h
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
/* s390_features.h -- check for s390 features.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef S390_FEATURES_H_
|
||||
#define S390_FEATURES_H_
|
||||
|
||||
struct s390_cpu_features {
|
||||
int has_vx;
|
||||
};
|
||||
|
||||
void Z_INTERNAL s390_check_features(struct s390_cpu_features *features);
|
||||
|
||||
#endif
|
20
3rdparty/zlib-ng/arch/s390/s390_functions.h
vendored
Normal file
20
3rdparty/zlib-ng/arch/s390/s390_functions.h
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
/* s390_functions.h -- s390 implementations for arch-specific functions.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef S390_FUNCTIONS_H_
|
||||
#define S390_FUNCTIONS_H_
|
||||
|
||||
#ifdef S390_CRC32_VX
|
||||
uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
# if defined(S390_CRC32_VX) && defined(__zarch__) && __ARCH__ >= 11 && defined(__VX__)
|
||||
# undef native_crc32
|
||||
# define native_crc32 = crc32_s390_vx
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif
|
47
3rdparty/zlib-ng/arch/s390/self-hosted-builder/actions-runner.Dockerfile
vendored
Normal file
47
3rdparty/zlib-ng/arch/s390/self-hosted-builder/actions-runner.Dockerfile
vendored
Normal file
@ -0,0 +1,47 @@
|
||||
# Self-Hosted IBM Z Github Actions Runner.
|
||||
|
||||
FROM almalinux:9
|
||||
|
||||
RUN dnf update -y -q && \
|
||||
dnf install -y -q --enablerepo=crb wget git which sudo jq \
|
||||
cmake make automake autoconf m4 libtool ninja-build python3-pip \
|
||||
gcc gcc-c++ clang llvm-toolset glibc-all-langpacks langpacks-en \
|
||||
glibc-static libstdc++-static libstdc++-devel libxslt-devel libxml2-devel
|
||||
|
||||
RUN dnf install -y -q dotnet-sdk-6.0 && \
|
||||
echo "Using SDK - `dotnet --version`"
|
||||
|
||||
COPY runner-s390x.patch /tmp/runner.patch
|
||||
COPY runner-global.json /tmp/global.json
|
||||
|
||||
RUN cd /tmp && \
|
||||
git clone -q https://github.com/actions/runner && \
|
||||
cd runner && \
|
||||
git checkout $(git describe --tags $(git rev-list --tags --max-count=1)) -b build && \
|
||||
git apply /tmp/runner.patch && \
|
||||
cp -f /tmp/global.json src/global.json
|
||||
|
||||
|
||||
RUN cd /tmp/runner/src && \
|
||||
./dev.sh layout && \
|
||||
./dev.sh package && \
|
||||
rm -rf /root/.dotnet /root/.nuget
|
||||
|
||||
RUN useradd -c "Action Runner" -m actions-runner && \
|
||||
usermod -L actions-runner
|
||||
|
||||
RUN tar -xf /tmp/runner/_package/*.tar.gz -C /home/actions-runner && \
|
||||
chown -R actions-runner:actions-runner /home/actions-runner
|
||||
|
||||
#VOLUME /home/actions-runner
|
||||
|
||||
RUN rm -rf /tmp/runner /var/cache/dnf/* /tmp/runner.patch /tmp/global.json && \
|
||||
dnf clean all
|
||||
|
||||
USER actions-runner
|
||||
|
||||
# Scripts.
|
||||
COPY fs/ /
|
||||
WORKDIR /home/actions-runner
|
||||
ENTRYPOINT ["/usr/bin/entrypoint"]
|
||||
CMD ["/usr/bin/actions-runner"]
|
18
3rdparty/zlib-ng/arch/s390/self-hosted-builder/actions-runner.service
vendored
Normal file
18
3rdparty/zlib-ng/arch/s390/self-hosted-builder/actions-runner.service
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
[Unit]
|
||||
Description=Podman container: Gaplib Github Actions Runner
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
StartLimitIntervalSec=1
|
||||
RequiresMountsFor=/run/user/1001/containers
|
||||
|
||||
[Service]
|
||||
Environment=PODMAN_SYSTEMD_UNIT=%n
|
||||
Restart=always
|
||||
TimeoutStopSec=61
|
||||
ExecStart=/usr/bin/podman start gaplib-actions-runner
|
||||
ExecStop=/usr/bin/podman stop -t 1 gaplib-actions-runner
|
||||
ExecStopPost=/usr/bin/podman stop -t 1 gaplib-actions-runner
|
||||
Type=forking
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
5
3rdparty/zlib-ng/arch/s390/self-hosted-builder/runner-global.json
vendored
Normal file
5
3rdparty/zlib-ng/arch/s390/self-hosted-builder/runner-global.json
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"sdk": {
|
||||
"version": "6.0.421"
|
||||
}
|
||||
}
|
243
3rdparty/zlib-ng/arch/s390/self-hosted-builder/runner-s390x.patch
vendored
Normal file
243
3rdparty/zlib-ng/arch/s390/self-hosted-builder/runner-s390x.patch
vendored
Normal file
@ -0,0 +1,243 @@
|
||||
diff --git a/src/Directory.Build.props b/src/Directory.Build.props
|
||||
index 9db5fac..f02e235 100644
|
||||
--- a/src/Directory.Build.props
|
||||
+++ b/src/Directory.Build.props
|
||||
@@ -44,6 +44,9 @@
|
||||
<PropertyGroup Condition="'$(BUILD_OS)' == 'Linux' AND '$(PackageRuntime)' == 'linux-arm64'">
|
||||
<DefineConstants>$(DefineConstants);ARM64</DefineConstants>
|
||||
</PropertyGroup>
|
||||
+ <PropertyGroup Condition="'$(BUILD_OS)' == 'Linux' AND '$(PackageRuntime)' == 'linux-s390x'">
|
||||
+ <DefineConstants>$(DefineConstants);S390X</DefineConstants>
|
||||
+ </PropertyGroup>
|
||||
|
||||
<!-- Set TRACE/DEBUG vars -->
|
||||
<PropertyGroup>
|
||||
diff --git a/src/Misc/externals.sh b/src/Misc/externals.sh
|
||||
index 383221e..1555f67 100755
|
||||
--- a/src/Misc/externals.sh
|
||||
+++ b/src/Misc/externals.sh
|
||||
@@ -189,3 +189,8 @@ if [[ "$PACKAGERUNTIME" == "linux-arm" ]]; then
|
||||
acquireExternalTool "$NODE_URL/v${NODE16_VERSION}/node-v${NODE16_VERSION}-linux-armv7l.tar.gz" node16 fix_nested_dir
|
||||
acquireExternalTool "$NODE_URL/v${NODE20_VERSION}/node-v${NODE20_VERSION}-linux-armv7l.tar.gz" node20 fix_nested_dir
|
||||
fi
|
||||
+
|
||||
+if [[ "$PACKAGERUNTIME" == "linux-s390x" ]]; then
|
||||
+ acquireExternalTool "$NODE_URL/v${NODE16_VERSION}/node-v${NODE16_VERSION}-linux-s390x.tar.gz" node16 fix_nested_dir
|
||||
+ acquireExternalTool "$NODE_URL/v${NODE20_VERSION}/node-v${NODE20_VERSION}-linux-s390x.tar.gz" node20 fix_nested_dir
|
||||
+fi
|
||||
diff --git a/src/Misc/layoutroot/config.sh b/src/Misc/layoutroot/config.sh
|
||||
index 14cc6ba..9b5b8e6 100755
|
||||
--- a/src/Misc/layoutroot/config.sh
|
||||
+++ b/src/Misc/layoutroot/config.sh
|
||||
@@ -20,25 +20,29 @@ then
|
||||
|
||||
message="Execute sudo ./bin/installdependencies.sh to install any missing Dotnet Core 6.0 dependencies."
|
||||
|
||||
- ldd ./bin/libcoreclr.so | grep 'not found'
|
||||
- if [ $? -eq 0 ]; then
|
||||
- echo "Dependencies is missing for Dotnet Core 6.0"
|
||||
- echo $message
|
||||
- exit 1
|
||||
- fi
|
||||
+ ARCH=`uname -m`
|
||||
+ if [ "${ARCH}" != "s390x" -a "${ARCH}" != "ppc64le" ]
|
||||
+ then
|
||||
+ ldd ./bin/libcoreclr.so | grep 'not found'
|
||||
+ if [ $? -eq 0 ]; then
|
||||
+ echo "Dependencies is missing for Dotnet Core 6.0"
|
||||
+ echo $message
|
||||
+ exit 1
|
||||
+ fi
|
||||
|
||||
- ldd ./bin/libSystem.Security.Cryptography.Native.OpenSsl.so | grep 'not found'
|
||||
- if [ $? -eq 0 ]; then
|
||||
- echo "Dependencies is missing for Dotnet Core 6.0"
|
||||
- echo $message
|
||||
- exit 1
|
||||
- fi
|
||||
+ ldd ./bin/libSystem.Security.Cryptography.Native.OpenSsl.so | grep 'not found'
|
||||
+ if [ $? -eq 0 ]; then
|
||||
+ echo "Dependencies is missing for Dotnet Core 6.0"
|
||||
+ echo $message
|
||||
+ exit 1
|
||||
+ fi
|
||||
|
||||
- ldd ./bin/libSystem.IO.Compression.Native.so | grep 'not found'
|
||||
- if [ $? -eq 0 ]; then
|
||||
- echo "Dependencies is missing for Dotnet Core 6.0"
|
||||
- echo $message
|
||||
- exit 1
|
||||
+ ldd ./bin/libSystem.IO.Compression.Native.so | grep 'not found'
|
||||
+ if [ $? -eq 0 ]; then
|
||||
+ echo "Dependencies is missing for Dotnet Core 6.0"
|
||||
+ echo $message
|
||||
+ exit 1
|
||||
+ fi
|
||||
fi
|
||||
|
||||
if ! [ -x "$(command -v ldconfig)" ]; then
|
||||
diff --git a/src/Runner.Common/Constants.cs b/src/Runner.Common/Constants.cs
|
||||
index 177e3c9..9545981 100644
|
||||
--- a/src/Runner.Common/Constants.cs
|
||||
+++ b/src/Runner.Common/Constants.cs
|
||||
@@ -58,7 +58,8 @@ namespace GitHub.Runner.Common
|
||||
X86,
|
||||
X64,
|
||||
Arm,
|
||||
- Arm64
|
||||
+ Arm64,
|
||||
+ S390x
|
||||
}
|
||||
|
||||
public static class Runner
|
||||
@@ -81,6 +82,8 @@ namespace GitHub.Runner.Common
|
||||
public static readonly Architecture PlatformArchitecture = Architecture.Arm;
|
||||
#elif ARM64
|
||||
public static readonly Architecture PlatformArchitecture = Architecture.Arm64;
|
||||
+#elif S390X
|
||||
+ public static readonly Architecture PlatformArchitecture = Architecture.S390x;
|
||||
#else
|
||||
public static readonly Architecture PlatformArchitecture = Architecture.X64;
|
||||
#endif
|
||||
diff --git a/src/Runner.Common/Util/VarUtil.cs b/src/Runner.Common/Util/VarUtil.cs
|
||||
index 97273a1..2a34430 100644
|
||||
--- a/src/Runner.Common/Util/VarUtil.cs
|
||||
+++ b/src/Runner.Common/Util/VarUtil.cs
|
||||
@@ -53,6 +53,8 @@ namespace GitHub.Runner.Common.Util
|
||||
return "ARM";
|
||||
case Constants.Architecture.Arm64:
|
||||
return "ARM64";
|
||||
+ case Constants.Architecture.S390x:
|
||||
+ return "S390X";
|
||||
default:
|
||||
throw new NotSupportedException(); // Should never reach here.
|
||||
}
|
||||
diff --git a/src/Test/L0/ConstantGenerationL0.cs b/src/Test/L0/ConstantGenerationL0.cs
|
||||
index 2042485..a9d8b46 100644
|
||||
--- a/src/Test/L0/ConstantGenerationL0.cs
|
||||
+++ b/src/Test/L0/ConstantGenerationL0.cs
|
||||
@@ -20,6 +20,7 @@ namespace GitHub.Runner.Common.Tests
|
||||
"linux-x64",
|
||||
"linux-arm",
|
||||
"linux-arm64",
|
||||
+ "linux-s390x",
|
||||
"osx-x64",
|
||||
"osx-arm64"
|
||||
};
|
||||
diff --git a/src/Test/L0/Listener/SelfUpdaterL0.cs b/src/Test/L0/Listener/SelfUpdaterL0.cs
|
||||
index 26ba65e..6791df3 100644
|
||||
--- a/src/Test/L0/Listener/SelfUpdaterL0.cs
|
||||
+++ b/src/Test/L0/Listener/SelfUpdaterL0.cs
|
||||
@@ -1,4 +1,4 @@
|
||||
-#if !(OS_WINDOWS && ARM64)
|
||||
+#if !(OS_WINDOWS && ARM64) && !S390X
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
@@ -16,6 +16,7 @@ using Xunit;
|
||||
|
||||
namespace GitHub.Runner.Common.Tests.Listener
|
||||
{
|
||||
+#if !S390X // Self-update is not currently supported on S390X
|
||||
public sealed class SelfUpdaterL0
|
||||
{
|
||||
private Mock<IRunnerServer> _runnerServer;
|
||||
@@ -291,5 +292,6 @@ namespace GitHub.Runner.Common.Tests.Listener
|
||||
}
|
||||
}
|
||||
}
|
||||
+#endif
|
||||
}
|
||||
#endif
|
||||
diff --git a/src/Test/L0/Listener/SelfUpdaterV2L0.cs b/src/Test/L0/Listener/SelfUpdaterV2L0.cs
|
||||
index 5115a6b..dd8d198 100644
|
||||
--- a/src/Test/L0/Listener/SelfUpdaterV2L0.cs
|
||||
+++ b/src/Test/L0/Listener/SelfUpdaterV2L0.cs
|
||||
@@ -1,4 +1,4 @@
|
||||
-#if !(OS_WINDOWS && ARM64)
|
||||
+#if !(OS_WINDOWS && ARM64) && !S390X
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
diff --git a/src/Test/L0/Worker/StepHostL0.cs b/src/Test/L0/Worker/StepHostL0.cs
|
||||
index f6b5889..26f8e21 100644
|
||||
--- a/src/Test/L0/Worker/StepHostL0.cs
|
||||
+++ b/src/Test/L0/Worker/StepHostL0.cs
|
||||
@@ -31,7 +31,7 @@ namespace GitHub.Runner.Common.Tests.Worker
|
||||
return hc;
|
||||
}
|
||||
|
||||
-#if OS_LINUX
|
||||
+#if OS_LINUX && !S390X
|
||||
[Fact]
|
||||
[Trait("Level", "L0")]
|
||||
[Trait("Category", "Worker")]
|
||||
diff --git a/src/dev.sh b/src/dev.sh
|
||||
index fa637d1..8c66f37 100755
|
||||
--- a/src/dev.sh
|
||||
+++ b/src/dev.sh
|
||||
@@ -54,6 +54,7 @@ elif [[ "$CURRENT_PLATFORM" == 'linux' ]]; then
|
||||
case $CPU_NAME in
|
||||
armv7l) RUNTIME_ID="linux-arm";;
|
||||
aarch64) RUNTIME_ID="linux-arm64";;
|
||||
+ s390x) RUNTIME_ID="linux-s390x";;
|
||||
esac
|
||||
fi
|
||||
elif [[ "$CURRENT_PLATFORM" == 'darwin' ]]; then
|
||||
@@ -80,7 +81,7 @@ if [[ "$CURRENT_PLATFORM" == 'windows' ]]; then
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$CURRENT_PLATFORM" == 'linux' ]]; then
|
||||
- if [[ ("$RUNTIME_ID" != 'linux-x64') && ("$RUNTIME_ID" != 'linux-x86') && ("$RUNTIME_ID" != 'linux-arm64') && ("$RUNTIME_ID" != 'linux-arm') ]]; then
|
||||
+ if [[ ("$RUNTIME_ID" != 'linux-x64') && ("$RUNTIME_ID" != 'linux-x86') && ("$RUNTIME_ID" != 'linux-arm64') && ("$RUNTIME_ID" != 'linux-arm') && ("$RUNTIME_ID" != 'linux-s390x') ]]; then
|
||||
echo "Failed: Can't build $RUNTIME_ID package $CURRENT_PLATFORM" >&2
|
||||
exit 1
|
||||
fi
|
||||
@@ -199,7 +200,8 @@ function package ()
|
||||
popd > /dev/null
|
||||
}
|
||||
|
||||
-if [[ (! -d "${DOTNETSDK_INSTALLDIR}") || (! -e "${DOTNETSDK_INSTALLDIR}/.${DOTNETSDK_VERSION}") || (! -e "${DOTNETSDK_INSTALLDIR}/dotnet") ]]; then
|
||||
+if [[ "${RUNTIME_ID}" != "linux-s390x" && ((! -d "${DOTNETSDK_INSTALLDIR}") || (! -e "${DOTNETSDK_INSTALLDIR}/.${DOTNETSDK_VERSION}") || (! -e "${DOTNETSDK_INSTALLDIR}/dotnet")) ]]; then
|
||||
+
|
||||
|
||||
# Download dotnet SDK to ../_dotnetsdk directory
|
||||
heading "Ensure Dotnet SDK"
|
||||
@@ -224,8 +226,10 @@ if [[ (! -d "${DOTNETSDK_INSTALLDIR}") || (! -e "${DOTNETSDK_INSTALLDIR}/.${DOTN
|
||||
echo "${DOTNETSDK_VERSION}" > "${DOTNETSDK_INSTALLDIR}/.${DOTNETSDK_VERSION}"
|
||||
fi
|
||||
|
||||
-echo "Prepend ${DOTNETSDK_INSTALLDIR} to %PATH%"
|
||||
-export PATH=${DOTNETSDK_INSTALLDIR}:$PATH
|
||||
+if [[ -d "${DOTNETSDK_INSTALLDIR}" ]]; then
|
||||
+ echo "Prepend ${DOTNETSDK_INSTALLDIR} to %PATH%"
|
||||
+ export PATH=${DOTNETSDK_INSTALLDIR}:$PATH
|
||||
+fi
|
||||
|
||||
heading "Dotnet SDK Version"
|
||||
dotnet --version
|
||||
diff --git a/src/dir.proj b/src/dir.proj
|
||||
index 056a312..8370922 100644
|
||||
--- a/src/dir.proj
|
||||
+++ b/src/dir.proj
|
||||
@@ -41,8 +41,18 @@
|
||||
</ItemGroup>
|
||||
|
||||
<Target Name="Build" DependsOnTargets="GenerateConstant">
|
||||
- <MSBuild Targets="Restore" Projects="@(ProjectFiles)" StopOnFirstFailure="true" />
|
||||
- <MSBuild Targets="Publish" Projects="@(ProjectFiles)" BuildInParallel="false" StopOnFirstFailure="true" Properties="Configuration=$(BUILDCONFIG);PackageRuntime=$(PackageRuntime);Version=$(RunnerVersion);RuntimeIdentifier=$(PackageRuntime);PublishDir=$(MSBuildProjectDirectory)/../_layout/bin" />
|
||||
+ <PropertyGroup>
|
||||
+ <!-- Normally we want to publish a self-contained app for $(PackageRuntime) -->
|
||||
+ <PublishRuntimeIdentifier>RuntimeIdentifier=$(PackageRuntime)</PublishRuntimeIdentifier>
|
||||
+ <!-- However, on s390x there are no apphost or runtime packages on nuget.org, so self-contained publishing is not supported.
|
||||
+ Perform a non-self-contained publish using the current runtime identifier (normally something like rhel.8-s390x) instead.
|
||||
+ In addition, when not using an explicit runtime identifier, the SDK will copy runtime assets from dependent packages;
|
||||
+ as this would confuse the expected layout, disable that behavior as well. -->
|
||||
+ <PublishRuntimeIdentifier Condition="'$(PackageRuntime)' == 'linux-s390x'">SelfContained=false;CopyLocalRuntimeTargetAssets=false</PublishRuntimeIdentifier>
|
||||
+ </PropertyGroup>
|
||||
+
|
||||
+ <MSBuild Targets="Restore" Projects="@(ProjectFiles)" StopOnFirstFailure="true" Properties="$(PublishRuntimeIdentifier)" />
|
||||
+ <MSBuild Targets="Publish" Projects="@(ProjectFiles)" BuildInParallel="false" StopOnFirstFailure="true" Properties="Configuration=$(BUILDCONFIG);PackageRuntime=$(PackageRuntime);Version=$(RunnerVersion);$(PublishRuntimeIdentifier);PublishDir=$(MSBuildProjectDirectory)/../_layout/bin" />
|
||||
<Exec Command="%22$(DesktopMSBuild)%22 Runner.Service/Windows/RunnerService.csproj /p:Configuration=$(BUILDCONFIG) /p:PackageRuntime=$(PackageRuntime) /p:OutputPath=%22$(MSBuildProjectDirectory)/../_layout/bin%22" ConsoleToMSBuild="true" Condition="'$(PackageRuntime)' == 'win-x64' Or '$(PackageRuntime)' == 'win-x86' Or '$(PackageRuntime)' == 'win-arm64'" />
|
||||
</Target>
|
||||
|
11
3rdparty/zlib-ng/arch/x86/Makefile.in
vendored
11
3rdparty/zlib-ng/arch/x86/Makefile.in
vendored
@ -35,7 +35,6 @@ all: \
|
||||
chunkset_ssse3.o chunkset_ssse3.lo \
|
||||
compare256_avx2.o compare256_avx2.lo \
|
||||
compare256_sse2.o compare256_sse2.lo \
|
||||
insert_string_sse42.o insert_string_sse42.lo \
|
||||
crc32_pclmulqdq.o crc32_pclmulqdq.lo \
|
||||
crc32_vpclmulqdq.o crc32_vpclmulqdq.lo \
|
||||
slide_hash_avx2.o slide_hash_avx2.lo \
|
||||
@ -77,12 +76,6 @@ compare256_sse2.o:
|
||||
compare256_sse2.lo:
|
||||
$(CC) $(SFLAGS) $(SSE2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_sse2.c
|
||||
|
||||
insert_string_sse42.o:
|
||||
$(CC) $(CFLAGS) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse42.c
|
||||
|
||||
insert_string_sse42.lo:
|
||||
$(CC) $(SFLAGS) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse42.c
|
||||
|
||||
crc32_pclmulqdq.o:
|
||||
$(CC) $(CFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_pclmulqdq.c
|
||||
|
||||
@ -90,10 +83,10 @@ crc32_pclmulqdq.lo:
|
||||
$(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_pclmulqdq.c
|
||||
|
||||
crc32_vpclmulqdq.o:
|
||||
$(CC) $(CFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c
|
||||
$(CC) $(CFLAGS) $(PCLMULFLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c
|
||||
|
||||
crc32_vpclmulqdq.lo:
|
||||
$(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c
|
||||
$(CC) $(SFLAGS) $(PCLMULFLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c
|
||||
|
||||
slide_hash_avx2.o:
|
||||
$(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_avx2.c
|
||||
|
17
3rdparty/zlib-ng/arch/x86/adler32_avx2.c
vendored
17
3rdparty/zlib-ng/arch/x86/adler32_avx2.c
vendored
@ -9,24 +9,15 @@
|
||||
|
||||
#ifdef X86_AVX2
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "zbuild.h"
|
||||
#include <immintrin.h>
|
||||
#include "../../adler32_fold.h"
|
||||
#include "../../adler32_p.h"
|
||||
#include "adler32_p.h"
|
||||
#include "adler32_avx2_p.h"
|
||||
#include "x86_intrins.h"
|
||||
|
||||
#ifdef X86_SSE42
|
||||
extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, size_t len);
|
||||
|
||||
#define copy_sub32(a, b, c, d) adler32_fold_copy_sse42(a, b, c, d)
|
||||
#define sub32(a, b, c) adler32_ssse3(a, b, c)
|
||||
#else
|
||||
#define copy_sub32(a, b, c, d) adler32_copy_len_16(adler0, c, b, d, adler1)
|
||||
#define sub32(a, b, c) adler32_len_16(adler0, b, c, adler1)
|
||||
#endif
|
||||
|
||||
static inline uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) {
|
||||
if (src == NULL) return 1L;
|
||||
if (len == 0) return adler;
|
||||
@ -44,9 +35,9 @@ rem_peel:
|
||||
}
|
||||
} else if (len < 32) {
|
||||
if (COPY) {
|
||||
return copy_sub32(adler, dst, src, len);
|
||||
return adler32_fold_copy_sse42(adler, dst, src, len);
|
||||
} else {
|
||||
return sub32(adler, src, len);
|
||||
return adler32_ssse3(adler, src, len);
|
||||
}
|
||||
}
|
||||
|
||||
|
13
3rdparty/zlib-ng/arch/x86/adler32_avx512.c
vendored
13
3rdparty/zlib-ng/arch/x86/adler32_avx512.c
vendored
@ -8,10 +8,9 @@
|
||||
|
||||
#ifdef X86_AVX512
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "../../adler32_p.h"
|
||||
#include "../../adler32_fold.h"
|
||||
#include "../../cpu_features.h"
|
||||
#include "zbuild.h"
|
||||
#include "adler32_p.h"
|
||||
#include "arch_functions.h"
|
||||
#include <immintrin.h>
|
||||
#include "x86_intrins.h"
|
||||
#include "adler32_avx512_p.h"
|
||||
@ -33,13 +32,7 @@ rem_peel:
|
||||
_mm512_mask_storeu_epi8(dst, storemask, copy_vec);
|
||||
}
|
||||
|
||||
#ifdef X86_AVX2
|
||||
return adler32_avx2(adler, src, len);
|
||||
#elif defined(X86_SSSE3)
|
||||
return adler32_ssse3(adler, src, len);
|
||||
#else
|
||||
return adler32_len_16(adler0, src, len, adler1);
|
||||
#endif
|
||||
}
|
||||
|
||||
__m512i vbuf, vs1_0, vs3;
|
||||
|
21
3rdparty/zlib-ng/arch/x86/adler32_avx512_vnni.c
vendored
21
3rdparty/zlib-ng/arch/x86/adler32_avx512_vnni.c
vendored
@ -9,11 +9,10 @@
|
||||
|
||||
#ifdef X86_AVX512VNNI
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "../../adler32_p.h"
|
||||
#include "../../cpu_features.h"
|
||||
#include "zbuild.h"
|
||||
#include "adler32_p.h"
|
||||
#include "arch_functions.h"
|
||||
#include <immintrin.h>
|
||||
#include "../../adler32_fold.h"
|
||||
#include "x86_intrins.h"
|
||||
#include "adler32_avx512_p.h"
|
||||
#include "adler32_avx2_p.h"
|
||||
@ -28,20 +27,10 @@ Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, size
|
||||
|
||||
rem_peel:
|
||||
if (len < 32)
|
||||
#if defined(X86_SSSE3)
|
||||
return adler32_ssse3(adler, src, len);
|
||||
#else
|
||||
return adler32_len_16(adler0, src, len, adler1);
|
||||
#endif
|
||||
|
||||
if (len < 64)
|
||||
#ifdef X86_AVX2
|
||||
return adler32_avx2(adler, src, len);
|
||||
#elif defined(X86_SSE3)
|
||||
return adler32_ssse3(adler, src, len);
|
||||
#else
|
||||
return adler32_len_16(adler0, src, len, adler1);
|
||||
#endif
|
||||
|
||||
const __m512i dot2v = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
||||
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
|
||||
@ -135,11 +124,7 @@ rem_peel_copy:
|
||||
__m256i copy_vec = _mm256_maskz_loadu_epi8(storemask, src);
|
||||
_mm256_mask_storeu_epi8(dst, storemask, copy_vec);
|
||||
|
||||
#if defined(X86_SSSE3)
|
||||
return adler32_ssse3(adler, src, len);
|
||||
#else
|
||||
return adler32_len_16(adler0, src, len, adler1);
|
||||
#endif
|
||||
}
|
||||
|
||||
const __m256i dot2v = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
||||
|
5
3rdparty/zlib-ng/arch/x86/adler32_sse42.c
vendored
5
3rdparty/zlib-ng/arch/x86/adler32_sse42.c
vendored
@ -6,9 +6,8 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "../../adler32_p.h"
|
||||
#include "../../adler32_fold.h"
|
||||
#include "zbuild.h"
|
||||
#include "adler32_p.h"
|
||||
#include "adler32_ssse3_p.h"
|
||||
#include <immintrin.h>
|
||||
|
||||
|
4
3rdparty/zlib-ng/arch/x86/adler32_ssse3.c
vendored
4
3rdparty/zlib-ng/arch/x86/adler32_ssse3.c
vendored
@ -6,8 +6,8 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "../../adler32_p.h"
|
||||
#include "zbuild.h"
|
||||
#include "adler32_p.h"
|
||||
#include "adler32_ssse3_p.h"
|
||||
|
||||
#ifdef X86_SSSE3
|
||||
|
14
3rdparty/zlib-ng/arch/x86/chunkset_ssse3.c
vendored
14
3rdparty/zlib-ng/arch/x86/chunkset_ssse3.c
vendored
@ -4,10 +4,7 @@
|
||||
|
||||
#include "zbuild.h"
|
||||
|
||||
/* This requires SSE2 support. While it's implicit with SSSE3, we can minimize
|
||||
* code size by sharing the chunkcopy functions, which will certainly compile
|
||||
* to identical machine code */
|
||||
#if defined(X86_SSSE3) && defined(X86_SSE2)
|
||||
#if defined(X86_SSSE3)
|
||||
#include <immintrin.h>
|
||||
#include "../generic/chunk_permute_table.h"
|
||||
|
||||
@ -19,8 +16,6 @@ typedef __m128i chunk_t;
|
||||
#define HAVE_CHUNKMEMSET_4
|
||||
#define HAVE_CHUNKMEMSET_8
|
||||
#define HAVE_CHUNK_MAG
|
||||
#define HAVE_CHUNKCOPY
|
||||
#define HAVE_CHUNKUNROLL
|
||||
|
||||
static const lut_rem_pair perm_idx_lut[13] = {
|
||||
{0, 1}, /* 3 */
|
||||
@ -83,14 +78,11 @@ static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t
|
||||
return ret_vec;
|
||||
}
|
||||
|
||||
extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len);
|
||||
extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len);
|
||||
|
||||
#define CHUNKSIZE chunksize_ssse3
|
||||
#define CHUNKMEMSET chunkmemset_ssse3
|
||||
#define CHUNKMEMSET_SAFE chunkmemset_safe_ssse3
|
||||
#define CHUNKCOPY chunkcopy_sse2
|
||||
#define CHUNKUNROLL chunkunroll_sse2
|
||||
#define CHUNKCOPY chunkcopy_ssse3
|
||||
#define CHUNKUNROLL chunkunroll_ssse3
|
||||
|
||||
#include "chunkset_tpl.h"
|
||||
|
||||
|
5
3rdparty/zlib-ng/arch/x86/compare256_avx2.c
vendored
5
3rdparty/zlib-ng/arch/x86/compare256_avx2.c
vendored
@ -3,8 +3,9 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "../../zbuild.h"
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil_p.h"
|
||||
#include "deflate.h"
|
||||
#include "fallback_builtins.h"
|
||||
|
||||
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
|
||||
|
5
3rdparty/zlib-ng/arch/x86/compare256_sse2.c
vendored
5
3rdparty/zlib-ng/arch/x86/compare256_sse2.c
vendored
@ -3,8 +3,9 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "../../zbuild.h"
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil_p.h"
|
||||
#include "deflate.h"
|
||||
#include "fallback_builtins.h"
|
||||
|
||||
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
|
||||
|
@ -26,27 +26,26 @@ Z_INTERNAL void CRC32_FOLD(crc32_fold *crc, const uint8_t *src, size_t len, uint
|
||||
__m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3;
|
||||
__m128i xmm_crc0, xmm_crc1, xmm_crc2, xmm_crc3;
|
||||
__m128i xmm_crc_part = _mm_setzero_si128();
|
||||
#ifdef COPY
|
||||
char ALIGNED_(16) partial_buf[16] = { 0 };
|
||||
#else
|
||||
#ifndef COPY
|
||||
__m128i xmm_initial = _mm_cvtsi32_si128(init_crc);
|
||||
int32_t first = init_crc != 0;
|
||||
|
||||
/* Technically the CRC functions don't even call this for input < 64, but a bare minimum of 31
|
||||
* bytes of input is needed for the aligning load that occurs. If there's an initial CRC, to
|
||||
* carry it forward through the folded CRC there must be 16 - src % 16 + 16 bytes available, which
|
||||
* by definition can be up to 15 bytes + one full vector load. */
|
||||
assert(len >= 31 || first == 0);
|
||||
/* The CRC functions don't call this for input < 16, as a minimum of 16 bytes of input is needed
|
||||
* for the aligning load that occurs. If there's an initial CRC, to carry it forward through
|
||||
* the folded CRC there must be 16 - src % 16 + 16 bytes available, which by definition can be
|
||||
* up to 15 bytes + one full vector load. */
|
||||
assert(len >= 16 || first == 0);
|
||||
#endif
|
||||
crc32_fold_load((__m128i *)crc->fold, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
|
||||
|
||||
if (len < 16) {
|
||||
#ifdef COPY
|
||||
if (len == 0)
|
||||
return;
|
||||
|
||||
memcpy(partial_buf, src, len);
|
||||
xmm_crc_part = _mm_load_si128((const __m128i *)partial_buf);
|
||||
#ifdef COPY
|
||||
memcpy(dst, partial_buf, len);
|
||||
#endif
|
||||
goto partial;
|
||||
@ -63,9 +62,23 @@ Z_INTERNAL void CRC32_FOLD(crc32_fold *crc, const uint8_t *src, size_t len, uint
|
||||
|
||||
if (algn_diff < 4 && init_crc != 0) {
|
||||
xmm_t0 = xmm_crc_part;
|
||||
xmm_crc_part = _mm_loadu_si128((__m128i*)src + 1);
|
||||
fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
|
||||
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0);
|
||||
if (len >= 32) {
|
||||
xmm_crc_part = _mm_loadu_si128((__m128i*)src + 1);
|
||||
fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
|
||||
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0);
|
||||
} else {
|
||||
memcpy(partial_buf, src + 16, len - 16);
|
||||
xmm_crc_part = _mm_load_si128((__m128i*)partial_buf);
|
||||
fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
|
||||
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0);
|
||||
src += 16;
|
||||
len -= 16;
|
||||
#ifdef COPY
|
||||
dst -= algn_diff;
|
||||
#endif
|
||||
goto partial;
|
||||
}
|
||||
|
||||
src += 16;
|
||||
len -= 16;
|
||||
}
|
||||
|
26
3rdparty/zlib-ng/arch/x86/crc32_pclmulqdq_tpl.h
vendored
26
3rdparty/zlib-ng/arch/x86/crc32_pclmulqdq_tpl.h
vendored
@ -17,7 +17,7 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "zbuild.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <wmmintrin.h>
|
||||
@ -26,8 +26,9 @@
|
||||
# include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#include "../../crc32_fold.h"
|
||||
#include "../../crc32_braid_p.h"
|
||||
#include "crc32.h"
|
||||
#include "crc32_braid_p.h"
|
||||
#include "crc32_braid_tbl.h"
|
||||
#include "x86_intrins.h"
|
||||
#include <assert.h>
|
||||
|
||||
@ -350,11 +351,22 @@ Z_INTERNAL uint32_t CRC32_FOLD_FINAL(crc32_fold *crc) {
|
||||
return crc->value;
|
||||
}
|
||||
|
||||
static inline uint32_t crc32_small(uint32_t crc, const uint8_t *buf, size_t len) {
|
||||
uint32_t c = (~crc) & 0xffffffff;
|
||||
|
||||
while (len) {
|
||||
len--;
|
||||
DO1;
|
||||
}
|
||||
|
||||
return c ^ 0xffffffff;
|
||||
}
|
||||
|
||||
Z_INTERNAL uint32_t CRC32(uint32_t crc32, const uint8_t *buf, size_t len) {
|
||||
/* For lens < 64, crc32_braid method is faster. The CRC32 instruction for
|
||||
* these short lengths might also prove to be effective */
|
||||
if (len < 64)
|
||||
return PREFIX(crc32_braid)(crc32, buf, len);
|
||||
/* For lens smaller than ~12, crc32_small method is faster.
|
||||
* But there are also minimum requirements for the pclmul functions due to alignment */
|
||||
if (len < 16)
|
||||
return crc32_small(crc32, buf, len);
|
||||
|
||||
crc32_fold ALIGNED_(16) crc_state;
|
||||
CRC32_FOLD_RESET(&crc_state);
|
||||
|
2
3rdparty/zlib-ng/arch/x86/crc32_vpclmulqdq.c
vendored
2
3rdparty/zlib-ng/arch/x86/crc32_vpclmulqdq.c
vendored
@ -3,7 +3,7 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC)
|
||||
#ifdef X86_VPCLMULQDQ_CRC
|
||||
|
||||
#define X86_VPCLMULQDQ
|
||||
#define CRC32_FOLD_COPY crc32_fold_vpclmulqdq_copy
|
||||
|
24
3rdparty/zlib-ng/arch/x86/insert_string_sse42.c
vendored
24
3rdparty/zlib-ng/arch/x86/insert_string_sse42.c
vendored
@ -1,24 +0,0 @@
|
||||
/* insert_string_sse42.c -- insert_string integer hash variant using SSE4.2's CRC instructions
|
||||
*
|
||||
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef X86_SSE42
|
||||
#include "../../zbuild.h"
|
||||
#include <nmmintrin.h>
|
||||
#include "../../deflate.h"
|
||||
|
||||
#define HASH_CALC(s, h, val)\
|
||||
h = _mm_crc32_u32(h, val)
|
||||
|
||||
#define HASH_CALC_VAR h
|
||||
#define HASH_CALC_VAR_INIT uint32_t h = 0
|
||||
|
||||
#define UPDATE_HASH update_hash_sse42
|
||||
#define INSERT_STRING insert_string_sse42
|
||||
#define QUICK_INSERT_STRING quick_insert_string_sse42
|
||||
|
||||
#include "../../insert_string_tpl.h"
|
||||
#endif
|
4
3rdparty/zlib-ng/arch/x86/slide_hash_avx2.c
vendored
4
3rdparty/zlib-ng/arch/x86/slide_hash_avx2.c
vendored
@ -9,8 +9,8 @@
|
||||
*
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#include "../../zbuild.h"
|
||||
#include "../../deflate.h"
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
|
4
3rdparty/zlib-ng/arch/x86/slide_hash_sse2.c
vendored
4
3rdparty/zlib-ng/arch/x86/slide_hash_sse2.c
vendored
@ -8,8 +8,8 @@
|
||||
*
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#include "../../zbuild.h"
|
||||
#include "../../deflate.h"
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <assert.h>
|
||||
|
24
3rdparty/zlib-ng/arch/x86/x86_features.c
vendored
24
3rdparty/zlib-ng/arch/x86/x86_features.c
vendored
@ -7,7 +7,7 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "zbuild.h"
|
||||
#include "x86_features.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
@ -15,6 +15,13 @@
|
||||
#else
|
||||
// Newer versions of GCC and clang come with cpuid.h
|
||||
# include <cpuid.h>
|
||||
# ifdef X86_HAVE_XSAVE_INTRIN
|
||||
# if __GNUC__ == 8
|
||||
# include <xsaveintrin.h>
|
||||
# else
|
||||
# include <immintrin.h>
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
@ -29,6 +36,7 @@ static inline void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx,
|
||||
*ecx = registers[2];
|
||||
*edx = registers[3];
|
||||
#else
|
||||
*eax = *ebx = *ecx = *edx = 0;
|
||||
__cpuid(info, *eax, *ebx, *ecx, *edx);
|
||||
#endif
|
||||
}
|
||||
@ -43,12 +51,13 @@ static inline void cpuidex(int info, int subinfo, unsigned* eax, unsigned* ebx,
|
||||
*ecx = registers[2];
|
||||
*edx = registers[3];
|
||||
#else
|
||||
*eax = *ebx = *ecx = *edx = 0;
|
||||
__cpuid_count(info, subinfo, *eax, *ebx, *ecx, *edx);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint64_t xgetbv(unsigned int xcr) {
|
||||
#ifdef _MSC_VER
|
||||
#if defined(_MSC_VER) || defined(X86_HAVE_XSAVE_INTRIN)
|
||||
return _xgetbv(xcr);
|
||||
#else
|
||||
uint32_t eax, edx;
|
||||
@ -90,7 +99,16 @@ void Z_INTERNAL x86_check_features(struct x86_cpu_features *features) {
|
||||
|
||||
// check AVX512 bits if the OS supports saving ZMM registers
|
||||
if (features->has_os_save_zmm) {
|
||||
features->has_avx512 = ebx & 0x00010000;
|
||||
features->has_avx512f = ebx & 0x00010000;
|
||||
if (features->has_avx512f) {
|
||||
// According to the Intel Software Developer's Manual, AVX512F must be enabled too in order to enable
|
||||
// AVX512(DQ,BW,VL).
|
||||
features->has_avx512dq = ebx & 0x00020000;
|
||||
features->has_avx512bw = ebx & 0x40000000;
|
||||
features->has_avx512vl = ebx & 0x80000000;
|
||||
}
|
||||
features->has_avx512_common = features->has_avx512f && features->has_avx512dq && features->has_avx512bw \
|
||||
&& features->has_avx512vl;
|
||||
features->has_avx512vnni = ecx & 0x800;
|
||||
}
|
||||
}
|
||||
|
14
3rdparty/zlib-ng/arch/x86/x86_features.h
vendored
14
3rdparty/zlib-ng/arch/x86/x86_features.h
vendored
@ -1,14 +1,18 @@
|
||||
/* x86_features.h -- check for CPU features
|
||||
* Copyright (C) 2013 Intel Corporation Jim Kukunas
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
* Copyright (C) 2013 Intel Corporation Jim Kukunas
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef X86_FEATURES_H_
|
||||
#define X86_FEATURES_H_
|
||||
|
||||
struct x86_cpu_features {
|
||||
int has_avx2;
|
||||
int has_avx512;
|
||||
int has_avx512f;
|
||||
int has_avx512dq;
|
||||
int has_avx512bw;
|
||||
int has_avx512vl;
|
||||
int has_avx512_common; // Enabled when AVX512(F,DQ,BW,VL) are all enabled.
|
||||
int has_avx512vnni;
|
||||
int has_sse2;
|
||||
int has_ssse3;
|
||||
@ -21,4 +25,4 @@ struct x86_cpu_features {
|
||||
|
||||
void Z_INTERNAL x86_check_features(struct x86_cpu_features *features);
|
||||
|
||||
#endif /* CPU_H_ */
|
||||
#endif /* X86_FEATURES_H_ */
|
||||
|
172
3rdparty/zlib-ng/arch/x86/x86_functions.h
vendored
Normal file
172
3rdparty/zlib-ng/arch/x86/x86_functions.h
vendored
Normal file
@ -0,0 +1,172 @@
|
||||
/* x86_functions.h -- x86 implementations for arch-specific functions.
|
||||
* Copyright (C) 2013 Intel Corporation Jim Kukunas
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef X86_FUNCTIONS_H_
|
||||
#define X86_FUNCTIONS_H_
|
||||
|
||||
#ifdef X86_SSE2
|
||||
uint32_t chunksize_sse2(void);
|
||||
uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
|
||||
# ifdef HAVE_BUILTIN_CTZ
|
||||
uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
|
||||
uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match);
|
||||
uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match);
|
||||
void slide_hash_sse2(deflate_state *s);
|
||||
# endif
|
||||
void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start);
|
||||
#endif
|
||||
|
||||
#ifdef X86_SSSE3
|
||||
uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
|
||||
#ifdef X86_SSE42
|
||||
uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
|
||||
#ifdef X86_AVX2
|
||||
uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
uint32_t chunksize_avx2(void);
|
||||
uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
|
||||
# ifdef HAVE_BUILTIN_CTZ
|
||||
uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1);
|
||||
uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match);
|
||||
uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match);
|
||||
void slide_hash_avx2(deflate_state *s);
|
||||
# endif
|
||||
void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start);
|
||||
#endif
|
||||
#ifdef X86_AVX512
|
||||
uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX512VNNI
|
||||
uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
|
||||
#ifdef X86_PCLMULQDQ_CRC
|
||||
uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc);
|
||||
void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
|
||||
uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc);
|
||||
uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef X86_VPCLMULQDQ_CRC
|
||||
uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc);
|
||||
void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
|
||||
uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc);
|
||||
uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
// X86 - SSE2
|
||||
# if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64) || defined(X86_NOCHECK_SSE2)
|
||||
# undef native_chunkmemset_safe
|
||||
# define native_chunkmemset_safe chunkmemset_safe_sse2
|
||||
# undef native_chunksize
|
||||
# define native_chunksize chunksize_sse2
|
||||
# undef native_inflate_fast
|
||||
# define native_inflate_fast inflate_fast_sse2
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_sse2
|
||||
# ifdef HAVE_BUILTIN_CTZ
|
||||
# undef native_compare256
|
||||
# define native_compare256 compare256_sse2
|
||||
# undef native_longest_match
|
||||
# define native_longest_match longest_match_sse2
|
||||
# undef native_longest_match_slow
|
||||
# define native_longest_match_slow longest_match_slow_sse2
|
||||
# endif
|
||||
#endif
|
||||
// X86 - SSSE3
|
||||
# if defined(X86_SSSE3) && defined(__SSSE3__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_ssse3
|
||||
# undef native_chunkmemset_safe
|
||||
# define native_chunkmemset_safe chunkmemset_safe_ssse3
|
||||
# undef native_inflate_fast
|
||||
# define native_inflate_fast inflate_fast_ssse3
|
||||
# endif
|
||||
// X86 - SSE4.2
|
||||
# if defined(X86_SSE42) && defined(__SSE4_2__)
|
||||
# undef native_adler32_fold_copy
|
||||
# define native_adler32_fold_copy adler32_fold_copy_sse42
|
||||
# endif
|
||||
|
||||
// X86 - PCLMUL
|
||||
#if defined(X86_PCLMULQDQ_CRC) && defined(__PCLMUL__)
|
||||
# undef native_crc32
|
||||
# define native_crc32 crc32_pclmulqdq
|
||||
# undef native_crc32_fold
|
||||
# define native_crc32_fold crc32_fold_pclmulqdq
|
||||
# undef native_crc32_fold_copy
|
||||
# define native_crc32_fold_copy crc32_fold_pclmulqdq_copy
|
||||
# undef native_crc32_fold_final
|
||||
# define native_crc32_fold_final crc32_fold_pclmulqdq_final
|
||||
# undef native_crc32_fold_reset
|
||||
# define native_crc32_fold_reset crc32_fold_pclmulqdq_reset
|
||||
#endif
|
||||
// X86 - AVX
|
||||
# if defined(X86_AVX2) && defined(__AVX2__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_avx2
|
||||
# undef native_adler32_fold_copy
|
||||
# define native_adler32_fold_copy adler32_fold_copy_avx2
|
||||
# undef native_chunkmemset_safe
|
||||
# define native_chunkmemset_safe chunkmemset_safe_avx2
|
||||
# undef native_chunksize
|
||||
# define native_chunksize chunksize_avx2
|
||||
# undef native_inflate_fast
|
||||
# define native_inflate_fast inflate_fast_avx2
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_avx2
|
||||
# ifdef HAVE_BUILTIN_CTZ
|
||||
# undef native_compare256
|
||||
# define native_compare256 compare256_avx2
|
||||
# undef native_longest_match
|
||||
# define native_longest_match longest_match_avx2
|
||||
# undef native_longest_match_slow
|
||||
# define native_longest_match_slow longest_match_slow_avx2
|
||||
# endif
|
||||
# endif
|
||||
|
||||
// X86 - AVX512 (F,DQ,BW,Vl)
|
||||
# if defined(X86_AVX512) && defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_avx512
|
||||
# undef native_adler32_fold_copy
|
||||
# define native_adler32_fold_copy adler32_fold_copy_avx512
|
||||
// X86 - AVX512 (VNNI)
|
||||
# if defined(X86_AVX512VNNI) && defined(__AVX512VNNI__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_avx512_vnni
|
||||
# undef native_adler32_fold_copy
|
||||
# define native_adler32_fold_copy adler32_fold_copy_avx512_vnni
|
||||
# endif
|
||||
// X86 - VPCLMULQDQ
|
||||
# if defined(__PCLMUL__) && defined(__AVX512F__) && defined(__VPCLMULQDQ__)
|
||||
# undef native_crc32
|
||||
# define native_crc32 crc32_vpclmulqdq
|
||||
# undef native_crc32_fold
|
||||
# define native_crc32_fold crc32_fold_vpclmulqdq
|
||||
# undef native_crc32_fold_copy
|
||||
# define native_crc32_fold_copy crc32_fold_vpclmulqdq_copy
|
||||
# undef native_crc32_fold_final
|
||||
# define native_crc32_fold_final crc32_fold_vpclmulqdq_final
|
||||
# undef native_crc32_fold_reset
|
||||
# define native_crc32_fold_reset crc32_fold_vpclmulqdq_reset
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* X86_FUNCTIONS_H_ */
|
4
3rdparty/zlib-ng/arch/x86/x86_intrins.h
vendored
4
3rdparty/zlib-ng/arch/x86/x86_intrins.h
vendored
@ -7,7 +7,7 @@
|
||||
#ifdef __AVX2__
|
||||
#include <immintrin.h>
|
||||
|
||||
#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 10) \
|
||||
#if (!defined(__clang__) && !defined(__NVCOMPILER) && defined(__GNUC__) && __GNUC__ < 10) \
|
||||
|| (defined(__apple_build_version__) && __apple_build_version__ < 9020039)
|
||||
static inline __m256i _mm256_zextsi128_si256(__m128i a) {
|
||||
__m128i r;
|
||||
@ -29,7 +29,7 @@ static inline __m512i _mm512_zextsi128_si512(__m128i a) {
|
||||
/* GCC <9 is missing some AVX512 intrinsics.
|
||||
*/
|
||||
#ifdef __AVX512F__
|
||||
#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9)
|
||||
#if (!defined(__clang__) && !defined(__NVCOMPILER) && defined(__GNUC__) && __GNUC__ < 9)
|
||||
#include <immintrin.h>
|
||||
|
||||
#define PACK(c0, c1, c2, c3) (((int)(unsigned char)(c0) << 24) | ((int)(unsigned char)(c1) << 16) | \
|
||||
|
29
3rdparty/zlib-ng/arch_functions.h
vendored
Normal file
29
3rdparty/zlib-ng/arch_functions.h
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
/* arch_functions.h -- Arch-specific function prototypes.
|
||||
* Copyright (C) 2017 Hans Kristian Rosbach
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef CPU_FUNCTIONS_H_
|
||||
#define CPU_FUNCTIONS_H_
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
#include "crc32.h"
|
||||
#include "deflate.h"
|
||||
#include "fallback_builtins.h"
|
||||
|
||||
#include "arch/generic/generic_functions.h"
|
||||
|
||||
#if defined(X86_FEATURES)
|
||||
# include "arch/x86/x86_functions.h"
|
||||
#elif defined(ARM_FEATURES)
|
||||
# include "arch/arm/arm_functions.h"
|
||||
#elif defined(PPC_FEATURES) || defined(POWER_FEATURES)
|
||||
# include "arch/power/power_functions.h"
|
||||
#elif defined(S390_FEATURES)
|
||||
# include "arch/s390/s390_functions.h"
|
||||
#elif defined(RISCV_FEATURES)
|
||||
# include "arch/riscv/riscv_functions.h"
|
||||
#endif
|
||||
|
||||
#endif
|
8
3rdparty/zlib-ng/chunkset_tpl.h
vendored
8
3rdparty/zlib-ng/chunkset_tpl.h
vendored
@ -5,7 +5,7 @@
|
||||
#include "zbuild.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#if CHUNK_SIZE == 32 && defined(X86_SSSE3) && defined(X86_SSE2)
|
||||
#if CHUNK_SIZE == 32 && defined(X86_SSSE3)
|
||||
extern uint8_t* chunkmemset_ssse3(uint8_t *out, unsigned dist, unsigned len);
|
||||
#endif
|
||||
|
||||
@ -25,7 +25,7 @@ Z_INTERNAL uint32_t CHUNKSIZE(void) {
|
||||
without iteration, which will hopefully make the branch prediction more
|
||||
reliable. */
|
||||
#ifndef HAVE_CHUNKCOPY
|
||||
Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) {
|
||||
static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) {
|
||||
Assert(len > 0, "chunkcopy should never have a length 0");
|
||||
chunk_t chunk;
|
||||
int32_t align = ((len - 1) % sizeof(chunk_t)) + 1;
|
||||
@ -54,7 +54,7 @@ Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) {
|
||||
least 258 bytes of output space available (258 being the maximum length
|
||||
output from a single token; see inflate_fast()'s assumptions below). */
|
||||
#ifndef HAVE_CHUNKUNROLL
|
||||
Z_INTERNAL uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) {
|
||||
static inline uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) {
|
||||
unsigned char const *from = out - *dist;
|
||||
chunk_t chunk;
|
||||
while (*dist < *len && *dist < sizeof(chunk_t)) {
|
||||
@ -98,7 +98,7 @@ Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) {
|
||||
Assert(len >= sizeof(uint64_t), "chunkmemset should be called on larger chunks"); */
|
||||
Assert(dist > 0, "chunkmemset cannot have a distance 0");
|
||||
/* Only AVX2 */
|
||||
#if CHUNK_SIZE == 32 && defined(X86_SSSE3) && defined(X86_SSE2)
|
||||
#if CHUNK_SIZE == 32 && defined(X86_SSSE3)
|
||||
if (len <= 16) {
|
||||
return chunkmemset_ssse3(out, dist, len);
|
||||
}
|
||||
|
115
3rdparty/zlib-ng/cmake/detect-arch.c
vendored
Normal file
115
3rdparty/zlib-ng/cmake/detect-arch.c
vendored
Normal file
@ -0,0 +1,115 @@
|
||||
// archdetect.c -- Detect compiler architecture and raise preprocessor error
|
||||
// containing a simple arch identifier.
|
||||
// Copyright (C) 2019 Hans Kristian Rosbach
|
||||
// Licensed under the Zlib license, see LICENSE.md for details
|
||||
|
||||
// x86_64
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
#error archfound x86_64
|
||||
|
||||
// x86
|
||||
#elif defined(__i386) || defined(_M_IX86)
|
||||
#error archfound i686
|
||||
|
||||
// ARM
|
||||
#elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
#error archfound aarch64
|
||||
#elif defined(__arm__) || defined(__arm) || defined(_M_ARM) || defined(__TARGET_ARCH_ARM)
|
||||
#if defined(__ARM64_ARCH_8__) || defined(__ARMv8__) || defined(__ARMv8_A__)
|
||||
#error archfound armv8
|
||||
#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__)
|
||||
#error archfound armv7
|
||||
#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6M__)
|
||||
#error archfound armv6
|
||||
#elif defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
|
||||
#error archfound armv5
|
||||
#elif defined(__ARM_ARCH_4T__) || defined(__TARGET_ARCH_5E__)
|
||||
#error archfound armv4
|
||||
#elif defined(__ARM_ARCH_3__) || defined(__TARGET_ARCH_3M__)
|
||||
#error archfound armv3
|
||||
#elif defined(__ARM_ARCH_2__)
|
||||
#error archfound armv2
|
||||
#endif
|
||||
|
||||
// PowerPC
|
||||
#elif defined(__powerpc__) || defined(_ppc__) || defined(__PPC__)
|
||||
#if defined(__64BIT__) || defined(__powerpc64__) || defined(__ppc64__)
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#error archfound powerpc64le
|
||||
#else
|
||||
#error archfound powerpc64
|
||||
#endif
|
||||
#else
|
||||
#error archfound powerpc
|
||||
#endif
|
||||
|
||||
// --------------- Less common architectures alphabetically below ---------------
|
||||
|
||||
// ALPHA
|
||||
#elif defined(__alpha__) || defined(__alpha)
|
||||
#error archfound alpha
|
||||
|
||||
// Blackfin
|
||||
#elif defined(__BFIN__)
|
||||
#error archfound blackfin
|
||||
|
||||
// Itanium
|
||||
#elif defined(__ia64) || defined(_M_IA64)
|
||||
#error archfound ia64
|
||||
|
||||
// MIPS
|
||||
#elif defined(__mips__) || defined(__mips)
|
||||
#error archfound mips
|
||||
|
||||
// Motorola 68000-series
|
||||
#elif defined(__m68k__)
|
||||
#error archfound m68k
|
||||
|
||||
// SuperH
|
||||
#elif defined(__sh__)
|
||||
#error archfound sh
|
||||
|
||||
// SPARC
|
||||
#elif defined(__sparc__) || defined(__sparc)
|
||||
#if defined(__sparcv9) || defined(__sparc_v9__)
|
||||
#error archfound sparc9
|
||||
#elif defined(__sparcv8) || defined(__sparc_v8__)
|
||||
#error archfound sparc8
|
||||
#endif
|
||||
|
||||
// SystemZ
|
||||
#elif defined(__370__)
|
||||
#error archfound s370
|
||||
#elif defined(__s390__)
|
||||
#error archfound s390
|
||||
#elif defined(__s390x) || defined(__zarch__)
|
||||
#error archfound s390x
|
||||
|
||||
// PARISC
|
||||
#elif defined(__hppa__)
|
||||
#error archfound parisc
|
||||
|
||||
// RS-6000
|
||||
#elif defined(__THW_RS6000)
|
||||
#error archfound rs6000
|
||||
|
||||
// RISC-V
|
||||
#elif defined(__riscv)
|
||||
#if __riscv_xlen == 64
|
||||
#error archfound riscv64
|
||||
#elif __riscv_xlen == 32
|
||||
#error archfound riscv32
|
||||
#endif
|
||||
|
||||
// LOONGARCH
|
||||
#elif defined(__loongarch_lp64)
|
||||
#error archfound loongarch64
|
||||
|
||||
// Emscripten (WebAssembly)
|
||||
#elif defined(__EMSCRIPTEN__)
|
||||
#error archfound wasm32
|
||||
|
||||
// return 'unrecognized' if we do not know what architecture this is
|
||||
#else
|
||||
#error archfound unrecognized
|
||||
#endif
|
104
3rdparty/zlib-ng/cmake/detect-arch.cmake
vendored
Normal file
104
3rdparty/zlib-ng/cmake/detect-arch.cmake
vendored
Normal file
@ -0,0 +1,104 @@
|
||||
# detect-arch.cmake -- Detect compiler architecture and set ARCH and BASEARCH
|
||||
# Copyright (C) 2019 Hans Kristian Rosbach
|
||||
# Licensed under the Zlib license, see LICENSE.md for details
|
||||
set(ARCHDETECT_FOUND TRUE)
|
||||
|
||||
if(CMAKE_OSX_ARCHITECTURES)
|
||||
# If multiple architectures are requested (universal build), pick only the first
|
||||
list(GET CMAKE_OSX_ARCHITECTURES 0 ARCH)
|
||||
elseif(MSVC)
|
||||
if("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "X86")
|
||||
set(ARCH "i686")
|
||||
elseif("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "x64")
|
||||
set(ARCH "x86_64")
|
||||
elseif("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARMV7")
|
||||
set(ARCH "arm")
|
||||
elseif ("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64EC")
|
||||
set(ARCH "aarch64")
|
||||
endif()
|
||||
elseif(EMSCRIPTEN)
|
||||
set(ARCH "wasm32")
|
||||
elseif(CMAKE_CROSSCOMPILING)
|
||||
set(ARCH ${CMAKE_C_COMPILER_TARGET})
|
||||
else()
|
||||
# Let preprocessor parse archdetect.c and raise an error containing the arch identifier
|
||||
enable_language(C)
|
||||
try_run(
|
||||
run_result_unused
|
||||
compile_result_unused
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
${CMAKE_CURRENT_LIST_DIR}/detect-arch.c
|
||||
COMPILE_OUTPUT_VARIABLE RAWOUTPUT
|
||||
CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}
|
||||
)
|
||||
|
||||
# Find basearch tag, and extract the arch word into BASEARCH variable
|
||||
string(REGEX REPLACE ".*archfound ([a-zA-Z0-9_]+).*" "\\1" ARCH "${RAWOUTPUT}")
|
||||
if(NOT ARCH)
|
||||
set(ARCH unknown)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Make sure we have ARCH set
|
||||
if(NOT ARCH OR ARCH STREQUAL "unknown")
|
||||
set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
||||
message(STATUS "Arch not recognized, falling back to cmake arch: '${ARCH}'")
|
||||
else()
|
||||
message(STATUS "Arch detected: '${ARCH}'")
|
||||
endif()
|
||||
|
||||
# Base arch detection
|
||||
if("${ARCH}" MATCHES "(x86_64|AMD64|i[3-6]86)")
|
||||
set(BASEARCH "x86")
|
||||
set(BASEARCH_X86_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "(arm(v[0-9])?|aarch64|cortex)")
|
||||
set(BASEARCH "arm")
|
||||
set(BASEARCH_ARM_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "ppc(64(le)?)?|powerpc(64(le)?)?")
|
||||
set(BASEARCH "ppc")
|
||||
set(BASEARCH_PPC_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "alpha")
|
||||
set(BASEARCH "alpha")
|
||||
set(BASEARCH_ALPHA_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "blackfin")
|
||||
set(BASEARCH "blackfin")
|
||||
set(BASEARCH_BLACKFIN_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "ia64")
|
||||
set(BASEARCH "ia64")
|
||||
set(BASEARCH_IA64_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "mips")
|
||||
set(BASEARCH "mips")
|
||||
set(BASEARCH_MIPS_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "m68k")
|
||||
set(BASEARCH "m68k")
|
||||
set(BASEARCH_M68K_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "sh")
|
||||
set(BASEARCH "sh")
|
||||
set(BASEARCH_SH_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "sparc[89]?")
|
||||
set(BASEARCH "sparc")
|
||||
set(BASEARCH_SPARC_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "s3[679]0x?")
|
||||
set(BASEARCH "s360")
|
||||
set(BASEARCH_S360_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "parisc")
|
||||
set(BASEARCH "parisc")
|
||||
set(BASEARCH_PARISC_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "rs6000")
|
||||
set(BASEARCH "rs6000")
|
||||
set(BASEARCH_RS6000_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "riscv(32|64)")
|
||||
set(BASEARCH "riscv")
|
||||
set(BASEARCH_RISCV_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "loongarch64")
|
||||
set(BASEARCH "loongarch")
|
||||
set(BASEARCH_LOONGARCH_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "wasm32")
|
||||
set(BASEARCH "wasm32")
|
||||
set(BASEARCH_WASM32_FOUND TRUE)
|
||||
else()
|
||||
set(BASEARCH "x86")
|
||||
set(BASEARCH_X86_FOUND TRUE)
|
||||
message(STATUS "Basearch '${ARCH}' not recognized, defaulting to 'x86'.")
|
||||
endif()
|
||||
message(STATUS "Basearch of '${ARCH}' has been detected as: '${BASEARCH}'")
|
46
3rdparty/zlib-ng/cmake/detect-coverage.cmake
vendored
Normal file
46
3rdparty/zlib-ng/cmake/detect-coverage.cmake
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
# detect-coverage.cmake -- Detect supported compiler coverage flags
|
||||
# Licensed under the Zlib license, see LICENSE.md for details
|
||||
|
||||
macro(add_code_coverage)
|
||||
# Check for -coverage flag support for Clang/GCC
|
||||
if(CMAKE_VERSION VERSION_LESS 3.14)
|
||||
set(CMAKE_REQUIRED_LIBRARIES -lgcov)
|
||||
else()
|
||||
set(CMAKE_REQUIRED_LINK_OPTIONS -coverage)
|
||||
endif()
|
||||
check_c_compiler_flag(-coverage HAVE_COVERAGE)
|
||||
set(CMAKE_REQUIRED_LIBRARIES)
|
||||
set(CMAKE_REQUIRED_LINK_OPTIONS)
|
||||
|
||||
if(HAVE_COVERAGE)
|
||||
add_compile_options(-coverage)
|
||||
add_link_options(-coverage)
|
||||
message(STATUS "Code coverage enabled using: -coverage")
|
||||
else()
|
||||
# Some versions of GCC don't support -coverage shorthand
|
||||
if(CMAKE_VERSION VERSION_LESS 3.14)
|
||||
set(CMAKE_REQUIRED_LIBRARIES -lgcov)
|
||||
else()
|
||||
set(CMAKE_REQUIRED_LINK_OPTIONS -lgcov -fprofile-arcs)
|
||||
endif()
|
||||
check_c_compiler_flag("-ftest-coverage -fprofile-arcs -fprofile-values" HAVE_TEST_COVERAGE)
|
||||
set(CMAKE_REQUIRED_LIBRARIES)
|
||||
set(CMAKE_REQUIRED_LINK_OPTIONS)
|
||||
|
||||
if(HAVE_TEST_COVERAGE)
|
||||
add_compile_options(-ftest-coverage -fprofile-arcs -fprofile-values)
|
||||
add_link_options(-lgcov -fprofile-arcs)
|
||||
message(STATUS "Code coverage enabled using: -ftest-coverage")
|
||||
else()
|
||||
message(WARNING "Compiler does not support code coverage")
|
||||
set(WITH_CODE_COVERAGE OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Set optimization level to zero for code coverage builds
|
||||
if (WITH_CODE_COVERAGE)
|
||||
# Use CMake compiler flag variables due to add_compile_options failure on Windows GCC
|
||||
set(CMAKE_C_FLAGS "-O0 ${CMAKE_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "-O0 ${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
endmacro()
|
43
3rdparty/zlib-ng/cmake/detect-install-dirs.cmake
vendored
Normal file
43
3rdparty/zlib-ng/cmake/detect-install-dirs.cmake
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
# detect-install-dirs.cmake -- Detect install directory parameters
|
||||
# Copyright (C) 2021 Hans Kristian Rosbach
|
||||
# Licensed under the Zlib license, see LICENSE.md for details
|
||||
|
||||
# Determine installation directory for executables
|
||||
if (DEFINED BIN_INSTALL_DIR)
|
||||
set(BIN_INSTALL_DIR "${BIN_INSTALL_DIR}" CACHE PATH "Installation directory for executables (Deprecated)" FORCE)
|
||||
set(CMAKE_INSTALL_BINDIR "${BIN_INSTALL_DIR}")
|
||||
elseif (DEFINED INSTALL_BIN_DIR)
|
||||
set(CMAKE_INSTALL_BINDIR "${INSTALL_BIN_DIR}")
|
||||
endif()
|
||||
|
||||
# Determine installation directory for libraries
|
||||
if (DEFINED LIB_INSTALL_DIR)
|
||||
set(LIB_INSTALL_DIR "${LIB_INSTALL_DIR}" CACHE PATH "Installation directory for libraries (Deprecated)" FORCE)
|
||||
set(CMAKE_INSTALL_LIBDIR "${LIB_INSTALL_DIR}")
|
||||
elseif (DEFINED INSTALL_LIB_DIR)
|
||||
set(CMAKE_INSTALL_LIBDIR "${INSTALL_LIB_DIR}")
|
||||
endif()
|
||||
|
||||
# Determine installation directory for include files
|
||||
if (DEFINED INC_INSTALL_DIR)
|
||||
set(INC_INSTALL_DIR "${INC_INSTALL_DIR}" CACHE PATH "Installation directory for headers (Deprecated)" FORCE)
|
||||
set(CMAKE_INSTALL_INCLUDEDIR "${INC_INSTALL_DIR}")
|
||||
elseif (DEFINED INSTALL_INC_DIR)
|
||||
set(CMAKE_INSTALL_INCLUDEDIR "${INSTALL_INC_DIR}")
|
||||
endif()
|
||||
|
||||
# Define GNU standard installation directories
|
||||
include(GNUInstallDirs)
|
||||
|
||||
# Determine installation directory for pkgconfig files
|
||||
if (DEFINED PKGCONFIG_INSTALL_DIR)
|
||||
set(PKGCONFIG_INSTALL_DIR "${PKGCONFIG_INSTALL_DIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE)
|
||||
elseif (DEFINED INSTALL_PKGCONFIG_DIR)
|
||||
set(PKGCONFIG_INSTALL_DIR "${INSTALL_PKGCONFIG_DIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE)
|
||||
elseif (DEFINED CMAKE_INSTALL_PKGCONFIGDIR)
|
||||
set(PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_PKGCONFIGDIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE)
|
||||
elseif (DEFINED CMAKE_INSTALL_FULL_PKGCONFIGDIR)
|
||||
set(PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_FULL_PKGCONFIGDIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE)
|
||||
else()
|
||||
set(PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/pkgconfig" CACHE PATH "Installation directory for pkgconfig (.pc) files")
|
||||
endif()
|
281
3rdparty/zlib-ng/cmake/detect-intrinsics.cmake
vendored
281
3rdparty/zlib-ng/cmake/detect-intrinsics.cmake
vendored
@ -2,40 +2,39 @@
|
||||
# Licensed under the Zlib license, see LICENSE.md for details
|
||||
|
||||
macro(check_acle_compiler_flag)
|
||||
if(MSVC)
|
||||
# Both ARM and ARM64-targeting msvc support intrinsics, but
|
||||
# ARM msvc is missing some intrinsics introduced with ARMv8, e.g. crc32
|
||||
if(MSVC_C_ARCHITECTURE_ID STREQUAL "ARM64")
|
||||
set(HAVE_ACLE_FLAG TRUE)
|
||||
endif()
|
||||
else()
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
check_c_compiler_flag("-march=armv8-a+crc" HAVE_MARCH_ARMV8_CRC)
|
||||
if(HAVE_MARCH_ARMV8_CRC)
|
||||
set(ACLEFLAG "-march=armv8-a+crc" CACHE INTERNAL "Compiler option to enable ACLE support")
|
||||
else()
|
||||
check_c_compiler_flag("-march=armv8-a+crc+simd" HAVE_MARCH_ARMV8_CRC_SIMD)
|
||||
if(HAVE_MARCH_ARMV8_CRC_SIMD)
|
||||
set(ACLEFLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ACLE support")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
# Check whether compiler supports ACLE flag
|
||||
set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
|
||||
check_c_source_compiles(
|
||||
"int main() { return 0; }"
|
||||
HAVE_ACLE_FLAG FAIL_REGEX "not supported")
|
||||
if(NOT NATIVEFLAG AND NOT HAVE_ACLE_FLAG)
|
||||
set(ACLEFLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ACLE support" FORCE)
|
||||
# Check whether compiler supports ACLE flag
|
||||
set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG}")
|
||||
check_c_source_compiles(
|
||||
"int main() { return 0; }"
|
||||
HAVE_ACLE_FLAG2 FAIL_REGEX "not supported")
|
||||
set(HAVE_ACLE_FLAG ${HAVE_ACLE_FLAG2} CACHE INTERNAL "Have compiler option to enable ACLE intrinsics" FORCE)
|
||||
unset(HAVE_ACLE_FLAG2 CACHE) # Don't cache this internal variable
|
||||
endif()
|
||||
set(CMAKE_REQUIRED_FLAGS)
|
||||
endif()
|
||||
# Check whether compiler supports ARMv8 CRC intrinsics
|
||||
set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
|
||||
check_c_source_compiles(
|
||||
"#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <arm_acle.h>
|
||||
#endif
|
||||
unsigned int f(unsigned int a, unsigned int b) {
|
||||
return __crc32w(a, b);
|
||||
}
|
||||
int main(void) { return 0; }"
|
||||
HAVE_ACLE_FLAG
|
||||
)
|
||||
set(CMAKE_REQUIRED_FLAGS)
|
||||
endmacro()
|
||||
|
||||
macro(check_armv6_compiler_flag)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
check_c_compiler_flag("-march=armv6" HAVE_MARCH_ARMV6)
|
||||
if(HAVE_MARCH_ARMV6)
|
||||
set(ARMV6FLAG "-march=armv6" CACHE INTERNAL "Compiler option to enable ARMv6 support")
|
||||
@ -67,21 +66,21 @@ macro(check_armv6_compiler_flag)
|
||||
return __uqsub16(a, b);
|
||||
#endif
|
||||
}
|
||||
int main(void) { return 0; }"
|
||||
int main(void) { return f(1,2); }"
|
||||
HAVE_ARMV6_INTRIN
|
||||
)
|
||||
set(CMAKE_REQUIRED_FLAGS)
|
||||
endmacro()
|
||||
|
||||
macro(check_avx512_intrinsics)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
if(CMAKE_HOST_UNIX OR APPLE)
|
||||
set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl")
|
||||
else()
|
||||
set(AVX512FLAG "/arch:AVX512")
|
||||
endif()
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
if(CMAKE_HOST_UNIX OR APPLE)
|
||||
set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl")
|
||||
else()
|
||||
set(AVX512FLAG "/arch:AVX512")
|
||||
endif()
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
# For CPUs that can benefit from AVX512, it seems GCC generates suboptimal
|
||||
# instruction scheduling unless you specify a reasonable -mtune= target
|
||||
set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl")
|
||||
@ -94,9 +93,9 @@ macro(check_avx512_intrinsics)
|
||||
endif()
|
||||
unset(HAVE_CASCADE_LAKE)
|
||||
endif()
|
||||
elseif(MSVC)
|
||||
set(AVX512FLAG "/arch:AVX512")
|
||||
endif()
|
||||
elseif(MSVC)
|
||||
set(AVX512FLAG "/arch:AVX512")
|
||||
endif()
|
||||
# Check whether compiler supports AVX512 intrinsics
|
||||
set(CMAKE_REQUIRED_FLAGS "${AVX512FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
|
||||
@ -109,26 +108,17 @@ macro(check_avx512_intrinsics)
|
||||
int main(void) { return 0; }"
|
||||
HAVE_AVX512_INTRIN
|
||||
)
|
||||
|
||||
# Evidently both GCC and clang were late to implementing these
|
||||
check_c_source_compiles(
|
||||
"#include <immintrin.h>
|
||||
__mmask16 f(__mmask16 x) { return _knot_mask16(x); }
|
||||
int main(void) { return 0; }"
|
||||
HAVE_MASK_INTRIN
|
||||
)
|
||||
set(CMAKE_REQUIRED_FLAGS)
|
||||
endmacro()
|
||||
|
||||
macro(check_avx512vnni_intrinsics)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
if(CMAKE_HOST_UNIX OR APPLE)
|
||||
set(AVX512VNNIFLAG "-mavx512f -mavx512bw -mavx512dq -mavx512vl -mavx512vnni")
|
||||
else()
|
||||
set(AVX512VNNIFLAG "/arch:AVX512")
|
||||
endif()
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
if(CMAKE_HOST_UNIX OR APPLE OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM")
|
||||
set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni")
|
||||
else()
|
||||
set(AVX512VNNIFLAG "/arch:AVX512")
|
||||
endif()
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni")
|
||||
if(NOT MSVC)
|
||||
check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE)
|
||||
@ -139,11 +129,10 @@ macro(check_avx512vnni_intrinsics)
|
||||
endif()
|
||||
unset(HAVE_CASCADE_LAKE)
|
||||
endif()
|
||||
elseif(MSVC)
|
||||
set(AVX512VNNIFLAG "/arch:AVX512")
|
||||
endif()
|
||||
elseif(MSVC)
|
||||
set(AVX512VNNIFLAG "/arch:AVX512")
|
||||
endif()
|
||||
|
||||
# Check whether compiler supports AVX512vnni intrinsics
|
||||
set(CMAKE_REQUIRED_FLAGS "${AVX512VNNIFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
|
||||
check_c_source_compiles(
|
||||
@ -159,18 +148,18 @@ macro(check_avx512vnni_intrinsics)
|
||||
endmacro()
|
||||
|
||||
macro(check_avx2_intrinsics)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
if(CMAKE_HOST_UNIX OR APPLE)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
if(CMAKE_HOST_UNIX OR APPLE)
|
||||
set(AVX2FLAG "-mavx2")
|
||||
else()
|
||||
set(AVX2FLAG "/arch:AVX2")
|
||||
endif()
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(AVX2FLAG "-mavx2")
|
||||
else()
|
||||
elseif(MSVC)
|
||||
set(AVX2FLAG "/arch:AVX2")
|
||||
endif()
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
set(AVX2FLAG "-mavx2")
|
||||
endif()
|
||||
elseif(MSVC)
|
||||
set(AVX2FLAG "/arch:AVX2")
|
||||
endif()
|
||||
# Check whether compiler supports AVX2 intrinics
|
||||
set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
|
||||
@ -187,8 +176,8 @@ macro(check_avx2_intrinsics)
|
||||
endmacro()
|
||||
|
||||
macro(check_neon_compiler_flag)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if("${ARCH}" MATCHES "aarch64")
|
||||
set(NEONFLAG "-march=armv8-a+simd")
|
||||
else()
|
||||
@ -206,12 +195,52 @@ macro(check_neon_compiler_flag)
|
||||
#endif
|
||||
int main() { return 0; }"
|
||||
NEON_AVAILABLE FAIL_REGEX "not supported")
|
||||
# Check whether compiler native flag is enough for NEON support
|
||||
# Some GCC versions don't enable FPU (vector unit) when using -march=native
|
||||
if(NEON_AVAILABLE AND NATIVEFLAG AND (NOT "${ARCH}" MATCHES "aarch64"))
|
||||
check_c_source_compiles(
|
||||
"#include <arm_neon.h>
|
||||
uint8x16_t f(uint8x16_t x, uint8x16_t y) {
|
||||
return vaddq_u8(x, y);
|
||||
}
|
||||
int main(int argc, char* argv[]) {
|
||||
uint8x16_t a = vdupq_n_u8(argc);
|
||||
uint8x16_t b = vdupq_n_u8(argc);
|
||||
uint8x16_t result = f(a, b);
|
||||
return result[0];
|
||||
}"
|
||||
ARM_NEON_SUPPORT_NATIVE
|
||||
)
|
||||
if(NOT ARM_NEON_SUPPORT_NATIVE)
|
||||
set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG} -mfpu=neon ${ZNOLTOFLAG}")
|
||||
check_c_source_compiles(
|
||||
"#include <arm_neon.h>
|
||||
uint8x16_t f(uint8x16_t x, uint8x16_t y) {
|
||||
return vaddq_u8(x, y);
|
||||
}
|
||||
int main(int argc, char* argv[]) {
|
||||
uint8x16_t a = vdupq_n_u8(argc);
|
||||
uint8x16_t b = vdupq_n_u8(argc);
|
||||
uint8x16_t result = f(a, b);
|
||||
return result[0];
|
||||
}"
|
||||
ARM_NEON_SUPPORT_NATIVE_MFPU
|
||||
)
|
||||
if(ARM_NEON_SUPPORT_NATIVE_MFPU)
|
||||
set(NEONFLAG "-mfpu=neon")
|
||||
else()
|
||||
# Remove local NEON_AVAILABLE variable and overwrite the cache
|
||||
unset(NEON_AVAILABLE)
|
||||
set(NEON_AVAILABLE "" CACHE INTERNAL "NEON support available" FORCE)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
set(CMAKE_REQUIRED_FLAGS)
|
||||
endmacro()
|
||||
|
||||
macro(check_neon_ld4_intrinsics)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if("${ARCH}" MATCHES "aarch64")
|
||||
set(NEONFLAG "-march=armv8-a+simd")
|
||||
else()
|
||||
@ -234,8 +263,8 @@ macro(check_neon_ld4_intrinsics)
|
||||
endmacro()
|
||||
|
||||
macro(check_pclmulqdq_intrinsics)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM")
|
||||
set(PCLMULFLAG "-mpclmul")
|
||||
endif()
|
||||
endif()
|
||||
@ -257,8 +286,8 @@ macro(check_pclmulqdq_intrinsics)
|
||||
endmacro()
|
||||
|
||||
macro(check_vpclmulqdq_intrinsics)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM")
|
||||
set(VPCLMULFLAG "-mvpclmulqdq -mavx512f")
|
||||
endif()
|
||||
endif()
|
||||
@ -341,8 +370,8 @@ macro(check_ppc_intrinsics)
|
||||
endmacro()
|
||||
|
||||
macro(check_power8_intrinsics)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(POWER8FLAG "-mcpu=power8")
|
||||
endif()
|
||||
endif()
|
||||
@ -364,12 +393,27 @@ macro(check_power8_intrinsics)
|
||||
}"
|
||||
HAVE_POWER8_INTRIN
|
||||
)
|
||||
if(NOT HAVE_POWER8_INTRIN AND HAVE_LINUX_AUXVEC_H)
|
||||
check_c_source_compiles(
|
||||
"#include <sys/auxv.h>
|
||||
#include <linux/auxvec.h>
|
||||
int main() {
|
||||
return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
|
||||
}"
|
||||
HAVE_POWER8_INTRIN2
|
||||
)
|
||||
if(HAVE_POWER8_INTRIN2)
|
||||
set(POWER8_NEED_AUXVEC_H 1)
|
||||
set(HAVE_POWER8_INTRIN ${HAVE_POWER8_INTRIN2} CACHE INTERNAL "Have POWER8 intrinsics" FORCE)
|
||||
unset(HAVE_POWER8_INTRIN2 CACHE)
|
||||
endif()
|
||||
endif()
|
||||
set(CMAKE_REQUIRED_FLAGS)
|
||||
endmacro()
|
||||
|
||||
macro(check_rvv_intrinsics)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(RISCVFLAG "-march=rv64gcv")
|
||||
endif()
|
||||
endif()
|
||||
@ -399,8 +443,8 @@ macro(check_s390_intrinsics)
|
||||
endmacro()
|
||||
|
||||
macro(check_power9_intrinsics)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(POWER9FLAG "-mcpu=power9")
|
||||
endif()
|
||||
endif()
|
||||
@ -422,22 +466,37 @@ macro(check_power9_intrinsics)
|
||||
}"
|
||||
HAVE_POWER9_INTRIN
|
||||
)
|
||||
if(NOT HAVE_POWER9_INTRIN AND HAVE_LINUX_AUXVEC_H)
|
||||
check_c_source_compiles(
|
||||
"#include <sys/auxv.h>
|
||||
#include <linux/auxvec.h>
|
||||
int main() {
|
||||
return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00);
|
||||
}"
|
||||
HAVE_POWER9_INTRIN2
|
||||
)
|
||||
if(HAVE_POWER9_INTRIN2)
|
||||
set(POWER9_NEED_AUXVEC_H 1)
|
||||
set(HAVE_POWER9_INTRIN ${HAVE_POWER9_INTRIN2} CACHE INTERNAL "Have POWER9 intrinsics" FORCE)
|
||||
unset(HAVE_POWER9_INTRIN2 CACHE)
|
||||
endif()
|
||||
endif()
|
||||
set(CMAKE_REQUIRED_FLAGS)
|
||||
endmacro()
|
||||
|
||||
macro(check_sse2_intrinsics)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
if(CMAKE_HOST_UNIX OR APPLE)
|
||||
set(SSE2FLAG "-msse2")
|
||||
else()
|
||||
set(SSE2FLAG "/arch:SSE2")
|
||||
endif()
|
||||
elseif(MSVC)
|
||||
if(NOT "${ARCH}" MATCHES "x86_64")
|
||||
set(SSE2FLAG "/arch:SSE2")
|
||||
endif()
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
if(CMAKE_HOST_UNIX OR APPLE)
|
||||
set(SSE2FLAG "-msse2")
|
||||
else()
|
||||
set(SSE2FLAG "/arch:SSE2")
|
||||
endif()
|
||||
elseif(MSVC)
|
||||
if(NOT "${ARCH}" MATCHES "x86_64")
|
||||
set(SSE2FLAG "/arch:SSE2")
|
||||
endif()
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(SSE2FLAG "-msse2")
|
||||
endif()
|
||||
endif()
|
||||
@ -453,14 +512,14 @@ macro(check_sse2_intrinsics)
|
||||
endmacro()
|
||||
|
||||
macro(check_ssse3_intrinsics)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
if(CMAKE_HOST_UNIX OR APPLE)
|
||||
set(SSSE3FLAG "-mssse3")
|
||||
else()
|
||||
set(SSSE3FLAG "/arch:SSSE3")
|
||||
endif()
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
if(CMAKE_HOST_UNIX OR APPLE)
|
||||
set(SSSE3FLAG "-mssse3")
|
||||
else()
|
||||
set(SSSE3FLAG "/arch:SSSE3")
|
||||
endif()
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(SSSE3FLAG "-mssse3")
|
||||
endif()
|
||||
endif()
|
||||
@ -478,14 +537,14 @@ macro(check_ssse3_intrinsics)
|
||||
endmacro()
|
||||
|
||||
macro(check_sse42_intrinsics)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
if(CMAKE_HOST_UNIX OR APPLE)
|
||||
set(SSE42FLAG "-msse4.2")
|
||||
else()
|
||||
set(SSE42FLAG "/arch:SSE4.2")
|
||||
endif()
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
if(NOT NATIVEFLAG)
|
||||
if(NOT NATIVEFLAG)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
if(CMAKE_HOST_UNIX OR APPLE)
|
||||
set(SSE42FLAG "-msse4.2")
|
||||
else()
|
||||
set(SSE42FLAG "/arch:SSE4.2")
|
||||
endif()
|
||||
elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(SSE42FLAG "-msse4.2")
|
||||
endif()
|
||||
endif()
|
||||
@ -526,15 +585,17 @@ macro(check_vgfma_intrinsics)
|
||||
endmacro()
|
||||
|
||||
macro(check_xsave_intrinsics)
|
||||
if(NOT NATIVEFLAG AND NOT MSVC)
|
||||
if(NOT NATIVEFLAG AND NOT MSVC AND NOT CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||
set(XSAVEFLAG "-mxsave")
|
||||
endif()
|
||||
set(CMAKE_REQUIRED_FLAGS "${XSAVEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}")
|
||||
check_c_source_compiles(
|
||||
"#ifdef _MSC_VER
|
||||
# include <intrin.h>
|
||||
#elif __GNUC__ == 8 && __GNUC_MINOR__ > 1
|
||||
# include <xsaveintrin.h>
|
||||
#else
|
||||
# include <x86gprintrin.h>
|
||||
# include <immintrin.h>
|
||||
#endif
|
||||
unsigned int f(unsigned int a) { return (int) _xgetbv(a); }
|
||||
int main(void) { return 0; }"
|
||||
|
166
3rdparty/zlib-ng/cmake/detect-sanitizer.cmake
vendored
Normal file
166
3rdparty/zlib-ng/cmake/detect-sanitizer.cmake
vendored
Normal file
@ -0,0 +1,166 @@
|
||||
# detect-sanitizer.cmake -- Detect supported compiler sanitizer flags
|
||||
# Licensed under the Zlib license, see LICENSE.md for details
|
||||
|
||||
macro(add_common_sanitizer_flags)
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
add_compile_options(-g3)
|
||||
endif()
|
||||
check_c_compiler_flag(-fno-omit-frame-pointer HAVE_NO_OMIT_FRAME_POINTER)
|
||||
if(HAVE_NO_OMIT_FRAME_POINTER)
|
||||
add_compile_options(-fno-omit-frame-pointer)
|
||||
add_link_options(-fno-omit-frame-pointer)
|
||||
endif()
|
||||
check_c_compiler_flag(-fno-optimize-sibling-calls HAVE_NO_OPTIMIZE_SIBLING_CALLS)
|
||||
if(HAVE_NO_OPTIMIZE_SIBLING_CALLS)
|
||||
add_compile_options(-fno-optimize-sibling-calls)
|
||||
add_link_options(-fno-optimize-sibling-calls)
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(check_sanitizer_support known_checks supported_checks)
|
||||
set(available_checks "")
|
||||
|
||||
# Build list of supported sanitizer flags by incrementally trying compilation with
|
||||
# known sanitizer checks
|
||||
|
||||
foreach(check ${known_checks})
|
||||
if(available_checks STREQUAL "")
|
||||
set(compile_checks "${check}")
|
||||
else()
|
||||
set(compile_checks "${available_checks},${check}")
|
||||
endif()
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS -fsanitize=${compile_checks})
|
||||
|
||||
check_c_source_compiles("int main() { return 0; }" HAVE_SANITIZER_${check}
|
||||
FAIL_REGEX "not supported|unrecognized command|unknown option")
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS)
|
||||
|
||||
if(HAVE_SANITIZER_${check})
|
||||
set(available_checks ${compile_checks})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(${supported_checks} ${available_checks})
|
||||
endmacro()
|
||||
|
||||
macro(add_address_sanitizer)
|
||||
set(known_checks
|
||||
address
|
||||
pointer-compare
|
||||
pointer-subtract
|
||||
)
|
||||
|
||||
check_sanitizer_support("${known_checks}" supported_checks)
|
||||
if(NOT ${supported_checks} STREQUAL "")
|
||||
message(STATUS "Address sanitizer is enabled: ${supported_checks}")
|
||||
add_compile_options(-fsanitize=${supported_checks})
|
||||
add_link_options(-fsanitize=${supported_checks})
|
||||
add_common_sanitizer_flags()
|
||||
else()
|
||||
message(STATUS "Address sanitizer is not supported")
|
||||
endif()
|
||||
|
||||
if(CMAKE_CROSSCOMPILING_EMULATOR)
|
||||
# Only check for leak sanitizer if not cross-compiling due to qemu crash
|
||||
message(WARNING "Leak sanitizer is not supported when cross compiling")
|
||||
else()
|
||||
# Leak sanitizer requires address sanitizer
|
||||
check_sanitizer_support("leak" supported_checks)
|
||||
if(NOT ${supported_checks} STREQUAL "")
|
||||
message(STATUS "Leak sanitizer is enabled: ${supported_checks}")
|
||||
add_compile_options(-fsanitize=${supported_checks})
|
||||
add_link_options(-fsanitize=${supported_checks})
|
||||
add_common_sanitizer_flags()
|
||||
else()
|
||||
message(STATUS "Leak sanitizer is not supported")
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(add_memory_sanitizer)
|
||||
check_sanitizer_support("memory" supported_checks)
|
||||
if(NOT ${supported_checks} STREQUAL "")
|
||||
message(STATUS "Memory sanitizer is enabled: ${supported_checks}")
|
||||
add_compile_options(-fsanitize=${supported_checks})
|
||||
add_link_options(-fsanitize=${supported_checks})
|
||||
add_common_sanitizer_flags()
|
||||
|
||||
check_c_compiler_flag(-fsanitize-memory-track-origins HAVE_MEMORY_TRACK_ORIGINS)
|
||||
if(HAVE_MEMORY_TRACK_ORIGINS)
|
||||
add_compile_options(-fsanitize-memory-track-origins)
|
||||
add_link_options(-fsanitize-memory-track-origins)
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "Memory sanitizer is not supported")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(add_thread_sanitizer)
|
||||
check_sanitizer_support("thread" supported_checks)
|
||||
if(NOT ${supported_checks} STREQUAL "")
|
||||
message(STATUS "Thread sanitizer is enabled: ${supported_checks}")
|
||||
add_compile_options(-fsanitize=${supported_checks})
|
||||
add_link_options(-fsanitize=${supported_checks})
|
||||
add_common_sanitizer_flags()
|
||||
else()
|
||||
message(STATUS "Thread sanitizer is not supported")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(add_undefined_sanitizer)
|
||||
set(known_checks
|
||||
array-bounds
|
||||
bool
|
||||
bounds
|
||||
builtin
|
||||
enum
|
||||
float-cast-overflow
|
||||
float-divide-by-zero
|
||||
function
|
||||
integer-divide-by-zero
|
||||
local-bounds
|
||||
null
|
||||
nonnull-attribute
|
||||
pointer-overflow
|
||||
return
|
||||
returns-nonnull-attribute
|
||||
shift
|
||||
shift-base
|
||||
shift-exponent
|
||||
signed-integer-overflow
|
||||
undefined
|
||||
unsigned-integer-overflow
|
||||
unsigned-shift-base
|
||||
vla-bound
|
||||
vptr
|
||||
)
|
||||
|
||||
# Only check for alignment sanitizer flag if unaligned access is not supported
|
||||
if(NOT WITH_UNALIGNED)
|
||||
list(APPEND known_checks alignment)
|
||||
endif()
|
||||
# Object size sanitizer has no effect at -O0 and produces compiler warning if enabled
|
||||
if(NOT CMAKE_C_FLAGS MATCHES "-O0")
|
||||
list(APPEND known_checks object-size)
|
||||
endif()
|
||||
|
||||
check_sanitizer_support("${known_checks}" supported_checks)
|
||||
|
||||
if(NOT ${supported_checks} STREQUAL "")
|
||||
message(STATUS "Undefined behavior sanitizer is enabled: ${supported_checks}")
|
||||
add_compile_options(-fsanitize=${supported_checks})
|
||||
add_link_options(-fsanitize=${supported_checks})
|
||||
|
||||
# Group sanitizer flag -fsanitize=undefined will automatically add alignment, even if
|
||||
# it is not in our sanitize flag list, so we need to explicitly disable alignment sanitizing.
|
||||
if(WITH_UNALIGNED)
|
||||
add_compile_options(-fno-sanitize=alignment)
|
||||
endif()
|
||||
|
||||
add_common_sanitizer_flags()
|
||||
else()
|
||||
message(STATUS "Undefined behavior sanitizer is not supported")
|
||||
endif()
|
||||
endmacro()
|
264
3rdparty/zlib-ng/cpu_features.h
vendored
264
3rdparty/zlib-ng/cpu_features.h
vendored
@ -6,12 +6,10 @@
|
||||
#ifndef CPU_FEATURES_H_
|
||||
#define CPU_FEATURES_H_
|
||||
|
||||
#include "adler32_fold.h"
|
||||
#include "crc32_fold.h"
|
||||
#ifndef DISABLE_RUNTIME_CPU_DETECTION
|
||||
|
||||
#if defined(X86_FEATURES)
|
||||
# include "arch/x86/x86_features.h"
|
||||
# include "fallback_builtins.h"
|
||||
#elif defined(ARM_FEATURES)
|
||||
# include "arch/arm/arm_features.h"
|
||||
#elif defined(PPC_FEATURES) || defined(POWER_FEATURES)
|
||||
@ -38,266 +36,8 @@ struct cpu_features {
|
||||
#endif
|
||||
};
|
||||
|
||||
extern void cpu_check_features(struct cpu_features *features);
|
||||
void cpu_check_features(struct cpu_features *features);
|
||||
|
||||
/* adler32 */
|
||||
typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
|
||||
extern uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#ifdef ARM_NEON
|
||||
extern uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef PPC_VMX
|
||||
extern uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
extern uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef X86_SSSE3
|
||||
extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
extern uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX512
|
||||
extern uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX512VNNI
|
||||
extern uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#ifdef POWER8_VSX
|
||||
extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
|
||||
/* adler32 folding */
|
||||
#ifdef RISCV_RVV
|
||||
extern uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
#ifdef X86_SSE42
|
||||
extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX512
|
||||
extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX512VNNI
|
||||
extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
#endif
|
||||
|
||||
/* CRC32 folding */
|
||||
#ifdef X86_PCLMULQDQ_CRC
|
||||
extern uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc);
|
||||
extern void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
|
||||
extern uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc);
|
||||
extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC)
|
||||
extern uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc);
|
||||
extern void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
extern void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
|
||||
extern uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc);
|
||||
extern uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
|
||||
/* memory chunking */
|
||||
extern uint32_t chunksize_c(void);
|
||||
extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#ifdef X86_SSE2
|
||||
extern uint32_t chunksize_sse2(void);
|
||||
extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
#ifdef X86_SSSE3
|
||||
extern uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
extern uint32_t chunksize_avx2(void);
|
||||
extern uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
#ifdef ARM_NEON
|
||||
extern uint32_t chunksize_neon(void);
|
||||
extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
#ifdef POWER8_VSX
|
||||
extern uint32_t chunksize_power8(void);
|
||||
extern uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
extern uint32_t chunksize_rvv(void);
|
||||
extern uint8_t* chunkmemset_safe_rvv(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
|
||||
#ifdef ZLIB_COMPAT
|
||||
typedef struct z_stream_s z_stream;
|
||||
#else
|
||||
typedef struct zng_stream_s zng_stream;
|
||||
#endif
|
||||
|
||||
/* inflate fast loop */
|
||||
extern void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
|
||||
#ifdef X86_SSE2
|
||||
extern void inflate_fast_sse2(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
#ifdef X86_SSSE3
|
||||
extern void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
extern void inflate_fast_avx2(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
#ifdef ARM_NEON
|
||||
extern void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
#ifdef POWER8_VSX
|
||||
extern void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
extern void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
|
||||
/* CRC32 */
|
||||
typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len);
|
||||
|
||||
extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
#ifdef ARM_ACLE
|
||||
extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
#elif defined(POWER8_VSX)
|
||||
extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
#elif defined(S390_CRC32_VX)
|
||||
extern uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
|
||||
/* compare256 */
|
||||
typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1);
|
||||
|
||||
extern uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
|
||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
extern uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
|
||||
#ifdef HAVE_BUILTIN_CTZ
|
||||
extern uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
||||
extern uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
#endif
|
||||
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
|
||||
extern uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
#ifdef POWER9
|
||||
extern uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
extern uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1);
|
||||
#endif
|
||||
|
||||
#ifdef DEFLATE_H_
|
||||
/* insert_string */
|
||||
extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
#ifdef X86_SSE42
|
||||
extern void insert_string_sse42(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
#elif defined(ARM_ACLE)
|
||||
extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
#endif
|
||||
|
||||
/* longest_match */
|
||||
extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
|
||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
extern uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
|
||||
#ifdef HAVE_BUILTIN_CTZ
|
||||
extern uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
||||
extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#endif
|
||||
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
|
||||
extern uint32_t longest_match_neon(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#ifdef POWER9
|
||||
extern uint32_t longest_match_power9(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
extern uint32_t longest_match_rvv(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
|
||||
/* longest_match_slow */
|
||||
extern uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
|
||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
extern uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
|
||||
extern uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
|
||||
#ifdef UNALIGNED64_OK
|
||||
extern uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#endif
|
||||
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL)
|
||||
extern uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#ifdef POWER9
|
||||
extern uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#ifdef RISCV_RVV
|
||||
extern uint32_t longest_match_slow_rvv(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
|
||||
/* quick_insert_string */
|
||||
extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
|
||||
#ifdef X86_SSE42
|
||||
extern Pos quick_insert_string_sse42(deflate_state *const s, const uint32_t str);
|
||||
#elif defined(ARM_ACLE)
|
||||
extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
|
||||
#endif
|
||||
|
||||
/* slide_hash */
|
||||
typedef void (*slide_hash_func)(deflate_state *s);
|
||||
|
||||
#ifdef X86_SSE2
|
||||
extern void slide_hash_sse2(deflate_state *s);
|
||||
#endif
|
||||
#if defined(ARM_SIMD)
|
||||
extern void slide_hash_armv6(deflate_state *s);
|
||||
#endif
|
||||
#if defined(ARM_NEON)
|
||||
extern void slide_hash_neon(deflate_state *s);
|
||||
#endif
|
||||
#if defined(PPC_VMX)
|
||||
extern void slide_hash_vmx(deflate_state *s);
|
||||
#endif
|
||||
#if defined(POWER8_VSX)
|
||||
extern void slide_hash_power8(deflate_state *s);
|
||||
#endif
|
||||
#if defined(RISCV_RVV)
|
||||
extern void slide_hash_rvv(deflate_state *s);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
extern void slide_hash_avx2(deflate_state *s);
|
||||
#endif
|
||||
|
||||
/* update_hash */
|
||||
extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val);
|
||||
#ifdef X86_SSE42
|
||||
extern uint32_t update_hash_sse42(deflate_state *const s, uint32_t h, uint32_t val);
|
||||
#elif defined(ARM_ACLE)
|
||||
extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
42
3rdparty/zlib-ng/crc32.c
vendored
Normal file
42
3rdparty/zlib-ng/crc32.c
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
/* crc32.c -- compute the CRC-32 of a data stream
|
||||
* Copyright (C) 1995-2022 Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*
|
||||
* This interleaved implementation of a CRC makes use of pipelined multiple
|
||||
* arithmetic-logic units, commonly found in modern CPU cores. It is due to
|
||||
* Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "functable.h"
|
||||
#include "crc32_braid_tbl.h"
|
||||
|
||||
/* ========================================================================= */
|
||||
|
||||
const uint32_t * Z_EXPORT PREFIX(get_crc_table)(void) {
|
||||
return (const uint32_t *)crc_table;
|
||||
}
|
||||
|
||||
#ifdef ZLIB_COMPAT
|
||||
unsigned long Z_EXPORT PREFIX(crc32_z)(unsigned long crc, const unsigned char *buf, size_t len) {
|
||||
if (buf == NULL) return 0;
|
||||
|
||||
return (unsigned long)FUNCTABLE_CALL(crc32)((uint32_t)crc, buf, len);
|
||||
}
|
||||
#else
|
||||
uint32_t Z_EXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) {
|
||||
if (buf == NULL) return 0;
|
||||
|
||||
return FUNCTABLE_CALL(crc32)(crc, buf, len);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ZLIB_COMPAT
|
||||
unsigned long Z_EXPORT PREFIX(crc32)(unsigned long crc, const unsigned char *buf, unsigned int len) {
|
||||
return (unsigned long)PREFIX(crc32_z)((uint32_t)crc, buf, len);
|
||||
}
|
||||
#else
|
||||
uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) {
|
||||
return PREFIX(crc32_z)(crc, buf, len);
|
||||
}
|
||||
#endif
|
16
3rdparty/zlib-ng/crc32.h
vendored
Normal file
16
3rdparty/zlib-ng/crc32.h
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
/* crc32.h -- crc32 folding interface
|
||||
* Copyright (C) 2021 Nathan Moinvaziri
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#ifndef CRC32_H_
|
||||
#define CRC32_H_
|
||||
|
||||
#define CRC32_FOLD_BUFFER_SIZE (16 * 4)
|
||||
/* sizeof(__m128i) * (4 folds) */
|
||||
|
||||
typedef struct crc32_fold_s {
|
||||
uint8_t fold[CRC32_FOLD_BUFFER_SIZE];
|
||||
uint32_t value;
|
||||
} crc32_fold;
|
||||
|
||||
#endif
|
1
3rdparty/zlib-ng/crc32_braid_comb.c
vendored
1
3rdparty/zlib-ng/crc32_braid_comb.c
vendored
@ -7,7 +7,6 @@
|
||||
* Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
#include "crc32_braid_p.h"
|
||||
#include "crc32_braid_tbl.h"
|
||||
|
22
3rdparty/zlib-ng/crc32_braid_p.h
vendored
22
3rdparty/zlib-ng/crc32_braid_p.h
vendored
@ -1,7 +1,6 @@
|
||||
#ifndef CRC32_BRAID_P_H_
|
||||
#define CRC32_BRAID_P_H_
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zendian.h"
|
||||
|
||||
/* Define N */
|
||||
@ -25,7 +24,7 @@
|
||||
# endif
|
||||
#else
|
||||
# ifndef W
|
||||
# if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__)
|
||||
# if defined(__x86_64__) || defined(_M_AMD64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__)
|
||||
# define W 8
|
||||
# else
|
||||
# define W 4
|
||||
@ -42,9 +41,24 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
# define ZSWAPWORD(word) (word)
|
||||
# define BRAID_TABLE crc_braid_table
|
||||
#elif BYTE_ORDER == BIG_ENDIAN
|
||||
# if W == 8
|
||||
# define ZSWAPWORD(word) ZSWAP64(word)
|
||||
# elif W == 4
|
||||
# define ZSWAPWORD(word) ZSWAP32(word)
|
||||
# endif
|
||||
# define BRAID_TABLE crc_braid_big_table
|
||||
#else
|
||||
# error "No endian defined"
|
||||
#endif
|
||||
|
||||
#define DO1 c = crc_table[(c ^ *buf++) & 0xff] ^ (c >> 8)
|
||||
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
|
||||
|
||||
/* CRC polynomial. */
|
||||
#define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
|
||||
|
||||
extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
|
||||
#endif /* CRC32_BRAID_P_H_ */
|
||||
|
21
3rdparty/zlib-ng/crc32_fold.h
vendored
21
3rdparty/zlib-ng/crc32_fold.h
vendored
@ -1,21 +0,0 @@
|
||||
/* crc32_fold.h -- crc32 folding interface
|
||||
* Copyright (C) 2021 Nathan Moinvaziri
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#ifndef CRC32_FOLD_H_
|
||||
#define CRC32_FOLD_H_
|
||||
|
||||
#define CRC32_FOLD_BUFFER_SIZE (16 * 4)
|
||||
/* sizeof(__m128i) * (4 folds) */
|
||||
|
||||
typedef struct crc32_fold_s {
|
||||
uint8_t fold[CRC32_FOLD_BUFFER_SIZE];
|
||||
uint32_t value;
|
||||
} crc32_fold;
|
||||
|
||||
Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc);
|
||||
Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
|
||||
Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
|
||||
Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc);
|
||||
|
||||
#endif
|
243
3rdparty/zlib-ng/deflate.c
vendored
243
3rdparty/zlib-ng/deflate.c
vendored
@ -1,5 +1,5 @@
|
||||
/* deflate.c -- compress data using the deflation algorithm
|
||||
* Copyright (C) 1995-2023 Jean-loup Gailly and Mark Adler
|
||||
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
@ -58,7 +58,7 @@
|
||||
# undef deflateInit2
|
||||
#endif
|
||||
|
||||
const char PREFIX(deflate_copyright)[] = " deflate 1.3.0 Copyright 1995-2023 Jean-loup Gailly and Mark Adler ";
|
||||
const char PREFIX(deflate_copyright)[] = " deflate 1.3.1 Copyright 1995-2024 Jean-loup Gailly and Mark Adler ";
|
||||
/*
|
||||
If you use the zlib library in a product, an acknowledgment is welcome
|
||||
in the documentation of your product. If for some reason you cannot
|
||||
@ -71,14 +71,16 @@ const char PREFIX(deflate_copyright)[] = " deflate 1.3.0 Copyright 1995-2023 Jea
|
||||
*/
|
||||
#ifdef S390_DFLTCC_DEFLATE
|
||||
# include "arch/s390/dfltcc_deflate.h"
|
||||
/* DFLTCC instructions require window to be page-aligned */
|
||||
# define PAD_WINDOW PAD_4096
|
||||
# define WINDOW_PAD_SIZE 4096
|
||||
# define HINT_ALIGNED_WINDOW HINT_ALIGNED_4096
|
||||
#else
|
||||
/* Memory management for the deflate state. Useful for allocating arch-specific extension blocks. */
|
||||
# define ZALLOC_DEFLATE_STATE(strm) ((deflate_state *)ZALLOC(strm, 1, sizeof(deflate_state)))
|
||||
# define ZFREE_STATE(strm, addr) ZFREE(strm, addr)
|
||||
# define ZCOPY_DEFLATE_STATE(dst, src) memcpy(dst, src, sizeof(deflate_state))
|
||||
/* Memory management for the window. Useful for allocation the aligned window. */
|
||||
# define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size)
|
||||
# define TRY_FREE_WINDOW(strm, addr) TRY_FREE(strm, addr)
|
||||
# define PAD_WINDOW PAD_64
|
||||
# define WINDOW_PAD_SIZE 64
|
||||
# define HINT_ALIGNED_WINDOW HINT_ALIGNED_64
|
||||
/* Adjust the window size for the arch-specific deflate code. */
|
||||
# define DEFLATE_ADJUST_WINDOW_SIZE(n) (n)
|
||||
/* Invoked at the beginning of deflateSetDictionary(). Useful for checking arch-specific window data. */
|
||||
# define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
|
||||
/* Invoked at the beginning of deflateGetDictionary(). Useful for adjusting arch-specific window data. */
|
||||
@ -120,10 +122,6 @@ static void lm_set_level (deflate_state *s, int level);
|
||||
static void lm_init (deflate_state *s);
|
||||
Z_INTERNAL unsigned read_buf (PREFIX3(stream) *strm, unsigned char *buf, unsigned size);
|
||||
|
||||
extern uint32_t update_hash_roll (deflate_state *const s, uint32_t h, uint32_t val);
|
||||
extern void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count);
|
||||
extern Pos quick_insert_string_roll(deflate_state *const s, uint32_t str);
|
||||
|
||||
/* ===========================================================================
|
||||
* Local data
|
||||
*/
|
||||
@ -185,17 +183,111 @@ static const config configuration_table[10] = {
|
||||
memset((unsigned char *)s->head, 0, HASH_SIZE * sizeof(*s->head)); \
|
||||
} while (0)
|
||||
|
||||
/* ========================================================================= */
|
||||
/* This function is hidden in ZLIB_COMPAT builds. */
|
||||
|
||||
#ifdef DEF_ALLOC_DEBUG
|
||||
# include <stdio.h>
|
||||
# define LOGSZ(name,size) fprintf(stderr, "%s is %d bytes\n", name, size)
|
||||
# define LOGSZP(name,size,loc,pad) fprintf(stderr, "%s is %d bytes, offset %d, padded %d\n", name, size, loc, pad)
|
||||
# define LOGSZPL(name,size,loc,pad) fprintf(stderr, "%s is %d bytes, offset %ld, padded %d\n", name, size, loc, pad)
|
||||
#else
|
||||
# define LOGSZ(name,size)
|
||||
# define LOGSZP(name,size,loc,pad)
|
||||
# define LOGSZPL(name,size,loc,pad)
|
||||
#endif
|
||||
|
||||
/* ===========================================================================
|
||||
* Allocate a big buffer and divide it up into the various buffers deflate needs.
|
||||
* Handles alignment of allocated buffer and alignment of individual buffers.
|
||||
*/
|
||||
Z_INTERNAL deflate_allocs* alloc_deflate(PREFIX3(stream) *strm, int windowBits, int lit_bufsize) {
|
||||
int curr_size = 0;
|
||||
|
||||
/* Define sizes */
|
||||
int window_size = DEFLATE_ADJUST_WINDOW_SIZE((1 << windowBits) * 2);
|
||||
int prev_size = (1 << windowBits) * sizeof(Pos);
|
||||
int head_size = HASH_SIZE * sizeof(Pos);
|
||||
int pending_size = lit_bufsize * LIT_BUFS;
|
||||
int state_size = sizeof(deflate_state);
|
||||
int alloc_size = sizeof(deflate_allocs);
|
||||
|
||||
/* Calculate relative buffer positions and paddings */
|
||||
LOGSZP("window", window_size, PAD_WINDOW(curr_size), PADSZ(curr_size,WINDOW_PAD_SIZE));
|
||||
int window_pos = PAD_WINDOW(curr_size);
|
||||
curr_size = window_pos + window_size;
|
||||
|
||||
LOGSZP("prev", prev_size, PAD_64(curr_size), PADSZ(curr_size,64));
|
||||
int prev_pos = PAD_64(curr_size);
|
||||
curr_size = prev_pos + prev_size;
|
||||
|
||||
LOGSZP("head", head_size, PAD_64(curr_size), PADSZ(curr_size,64));
|
||||
int head_pos = PAD_64(curr_size);
|
||||
curr_size = head_pos + head_size;
|
||||
|
||||
LOGSZP("pending", pending_size, PAD_64(curr_size), PADSZ(curr_size,64));
|
||||
int pending_pos = PAD_64(curr_size);
|
||||
curr_size = pending_pos + pending_size;
|
||||
|
||||
LOGSZP("state", state_size, PAD_64(curr_size), PADSZ(curr_size,64));
|
||||
int state_pos = PAD_64(curr_size);
|
||||
curr_size = state_pos + state_size;
|
||||
|
||||
LOGSZP("alloc", alloc_size, PAD_16(curr_size), PADSZ(curr_size,16));
|
||||
int alloc_pos = PAD_16(curr_size);
|
||||
curr_size = alloc_pos + alloc_size;
|
||||
|
||||
/* Add 64-1 or 4096-1 to allow window alignment, and round size of buffer up to multiple of 64 */
|
||||
int total_size = PAD_64(curr_size + (WINDOW_PAD_SIZE - 1));
|
||||
|
||||
/* Allocate buffer, align to 64-byte cacheline, and zerofill the resulting buffer */
|
||||
char *original_buf = strm->zalloc(strm->opaque, 1, total_size);
|
||||
if (original_buf == NULL)
|
||||
return NULL;
|
||||
|
||||
char *buff = (char *)HINT_ALIGNED_WINDOW((char *)PAD_WINDOW(original_buf));
|
||||
LOGSZPL("Buffer alloc", total_size, PADSZ((uintptr_t)original_buf,WINDOW_PAD_SIZE), PADSZ(curr_size,WINDOW_PAD_SIZE));
|
||||
|
||||
/* Initialize alloc_bufs */
|
||||
deflate_allocs *alloc_bufs = (struct deflate_allocs_s *)(buff + alloc_pos);
|
||||
alloc_bufs->buf_start = (char *)original_buf;
|
||||
alloc_bufs->zfree = strm->zfree;
|
||||
|
||||
/* Assign buffers */
|
||||
alloc_bufs->window = (unsigned char *)HINT_ALIGNED_WINDOW(buff + window_pos);
|
||||
alloc_bufs->prev = (Pos *)HINT_ALIGNED_64(buff + prev_pos);
|
||||
alloc_bufs->head = (Pos *)HINT_ALIGNED_64(buff + head_pos);
|
||||
alloc_bufs->pending_buf = (unsigned char *)HINT_ALIGNED_64(buff + pending_pos);
|
||||
alloc_bufs->state = (deflate_state *)HINT_ALIGNED_16(buff + state_pos);
|
||||
|
||||
memset((char *)alloc_bufs->prev, 0, prev_size);
|
||||
|
||||
return alloc_bufs;
|
||||
}
|
||||
|
||||
/* ===========================================================================
|
||||
* Free all allocated deflate buffers
|
||||
*/
|
||||
static inline void free_deflate(PREFIX3(stream) *strm) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
|
||||
if (state->alloc_bufs != NULL) {
|
||||
deflate_allocs *alloc_bufs = state->alloc_bufs;
|
||||
alloc_bufs->zfree(strm->opaque, alloc_bufs->buf_start);
|
||||
strm->state = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* ===========================================================================
|
||||
* Initialize deflate state and buffers.
|
||||
* This function is hidden in ZLIB_COMPAT builds.
|
||||
*/
|
||||
int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level, int32_t method, int32_t windowBits,
|
||||
int32_t memLevel, int32_t strategy) {
|
||||
/* Todo: ignore strm->next_in if we use it as window */
|
||||
uint32_t window_padding = 0;
|
||||
deflate_state *s;
|
||||
int wrap = 1;
|
||||
|
||||
/* Force initialization functable, because deflate captures function pointers from functable. */
|
||||
functable.force_init();
|
||||
/* Initialize functable */
|
||||
FUNCTABLE_INIT;
|
||||
|
||||
if (strm == NULL)
|
||||
return Z_STREAM_ERROR;
|
||||
@ -230,9 +322,19 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level
|
||||
if (windowBits == 8)
|
||||
windowBits = 9; /* until 256-byte window bug fixed */
|
||||
|
||||
s = ZALLOC_DEFLATE_STATE(strm);
|
||||
if (s == NULL)
|
||||
/* Allocate buffers */
|
||||
int lit_bufsize = 1 << (memLevel + 6);
|
||||
deflate_allocs *alloc_bufs = alloc_deflate(strm, windowBits, lit_bufsize);
|
||||
if (alloc_bufs == NULL)
|
||||
return Z_MEM_ERROR;
|
||||
|
||||
s = alloc_bufs->state;
|
||||
s->alloc_bufs = alloc_bufs;
|
||||
s->window = alloc_bufs->window;
|
||||
s->prev = alloc_bufs->prev;
|
||||
s->head = alloc_bufs->head;
|
||||
s->pending_buf = alloc_bufs->pending_buf;
|
||||
|
||||
strm->state = (struct internal_state *)s;
|
||||
s->strm = strm;
|
||||
s->status = INIT_STATE; /* to pass state test in deflateReset() */
|
||||
@ -243,18 +345,9 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level
|
||||
s->w_size = 1 << s->w_bits;
|
||||
s->w_mask = s->w_size - 1;
|
||||
|
||||
#ifdef X86_PCLMULQDQ_CRC
|
||||
window_padding = 8;
|
||||
#endif
|
||||
|
||||
s->window = (unsigned char *) ZALLOC_WINDOW(strm, s->w_size + window_padding, 2*sizeof(unsigned char));
|
||||
s->prev = (Pos *) ZALLOC(strm, s->w_size, sizeof(Pos));
|
||||
memset(s->prev, 0, s->w_size * sizeof(Pos));
|
||||
s->head = (Pos *) ZALLOC(strm, HASH_SIZE, sizeof(Pos));
|
||||
|
||||
s->high_water = 0; /* nothing written to s->window yet */
|
||||
|
||||
s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
|
||||
s->lit_bufsize = lit_bufsize; /* 16K elements by default */
|
||||
|
||||
/* We overlay pending_buf and sym_buf. This works since the average size
|
||||
* for length/distance pairs over any compressed block is assured to be 31
|
||||
@ -295,7 +388,6 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level
|
||||
* symbols from which it is being constructed.
|
||||
*/
|
||||
|
||||
s->pending_buf = (unsigned char *) ZALLOC(strm, s->lit_bufsize, 4);
|
||||
s->pending_buf_size = s->lit_bufsize * 4;
|
||||
|
||||
if (s->window == NULL || s->prev == NULL || s->head == NULL || s->pending_buf == NULL) {
|
||||
@ -304,8 +396,15 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level
|
||||
PREFIX(deflateEnd)(strm);
|
||||
return Z_MEM_ERROR;
|
||||
}
|
||||
|
||||
#ifdef LIT_MEM
|
||||
s->d_buf = (uint16_t *)(s->pending_buf + (s->lit_bufsize << 1));
|
||||
s->l_buf = s->pending_buf + (s->lit_bufsize << 2);
|
||||
s->sym_end = s->lit_bufsize - 1;
|
||||
#else
|
||||
s->sym_buf = s->pending_buf + s->lit_bufsize;
|
||||
s->sym_end = (s->lit_bufsize - 1) * 3;
|
||||
#endif
|
||||
/* We avoid equality with lit_bufsize*3 because of wraparound at 64K
|
||||
* on 16 bit machines and because stored blocks are restricted to
|
||||
* 64K-1 bytes.
|
||||
@ -348,7 +447,7 @@ static int deflateStateCheck(PREFIX3(stream) *strm) {
|
||||
if (strm == NULL || strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
|
||||
return 1;
|
||||
s = strm->state;
|
||||
if (s == NULL || s->strm != strm || (s->status < INIT_STATE || s->status > MAX_STATE))
|
||||
if (s == NULL || s->alloc_bufs == NULL || s->strm != strm || (s->status < INIT_STATE || s->status > MAX_STATE))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
@ -370,7 +469,7 @@ int32_t Z_EXPORT PREFIX(deflateSetDictionary)(PREFIX3(stream) *strm, const uint8
|
||||
|
||||
/* when using zlib wrappers, compute Adler-32 for provided dictionary */
|
||||
if (wrap == 1)
|
||||
strm->adler = functable.adler32(strm->adler, dictionary, dictLength);
|
||||
strm->adler = FUNCTABLE_CALL(adler32)(strm->adler, dictionary, dictLength);
|
||||
DEFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength); /* hook for IBM Z DFLTCC */
|
||||
s->wrap = 0; /* avoid computing Adler-32 in read_buf */
|
||||
|
||||
@ -457,7 +556,7 @@ int32_t Z_EXPORT PREFIX(deflateResetKeep)(PREFIX3(stream) *strm) {
|
||||
|
||||
#ifdef GZIP
|
||||
if (s->wrap == 2) {
|
||||
strm->adler = functable.crc32_fold_reset(&s->crc_fold);
|
||||
strm->adler = FUNCTABLE_CALL(crc32_fold_reset)(&s->crc_fold);
|
||||
} else
|
||||
#endif
|
||||
strm->adler = ADLER32_INITIAL_VALUE;
|
||||
@ -506,9 +605,17 @@ int32_t Z_EXPORT PREFIX(deflatePrime)(PREFIX3(stream) *strm, int32_t bits, int32
|
||||
if (deflateStateCheck(strm))
|
||||
return Z_STREAM_ERROR;
|
||||
s = strm->state;
|
||||
|
||||
#ifdef LIT_MEM
|
||||
if (bits < 0 || bits > BIT_BUF_SIZE ||
|
||||
(unsigned char *)s->d_buf < s->pending_out + ((BIT_BUF_SIZE + 7) >> 3))
|
||||
return Z_BUF_ERROR;
|
||||
#else
|
||||
if (bits < 0 || bits > BIT_BUF_SIZE || bits > (int32_t)(sizeof(value) << 3) ||
|
||||
s->sym_buf < s->pending_out + ((BIT_BUF_SIZE + 7) >> 3))
|
||||
return Z_BUF_ERROR;
|
||||
#endif
|
||||
|
||||
do {
|
||||
put = BIT_BUF_SIZE - s->bi_valid;
|
||||
put = MIN(put, bits);
|
||||
@ -555,7 +662,7 @@ int32_t Z_EXPORT PREFIX(deflateParams)(PREFIX3(stream) *strm, int32_t level, int
|
||||
if (s->level != level) {
|
||||
if (s->level == 0 && s->matches != 0) {
|
||||
if (s->matches == 1) {
|
||||
functable.slide_hash(s);
|
||||
FUNCTABLE_CALL(slide_hash)(s);
|
||||
} else {
|
||||
CLEAR_HASH(s);
|
||||
}
|
||||
@ -794,7 +901,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) {
|
||||
#ifdef GZIP
|
||||
if (s->status == GZIP_STATE) {
|
||||
/* gzip header */
|
||||
functable.crc32_fold_reset(&s->crc_fold);
|
||||
FUNCTABLE_CALL(crc32_fold_reset)(&s->crc_fold);
|
||||
put_byte(s, 31);
|
||||
put_byte(s, 139);
|
||||
put_byte(s, 8);
|
||||
@ -911,7 +1018,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) {
|
||||
}
|
||||
}
|
||||
put_short(s, (uint16_t)strm->adler);
|
||||
functable.crc32_fold_reset(&s->crc_fold);
|
||||
FUNCTABLE_CALL(crc32_fold_reset)(&s->crc_fold);
|
||||
}
|
||||
s->status = BUSY_STATE;
|
||||
|
||||
@ -982,7 +1089,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) {
|
||||
/* Write the trailer */
|
||||
#ifdef GZIP
|
||||
if (s->wrap == 2) {
|
||||
strm->adler = functable.crc32_fold_final(&s->crc_fold);
|
||||
strm->adler = FUNCTABLE_CALL(crc32_fold_final)(&s->crc_fold);
|
||||
|
||||
put_uint32(s, strm->adler);
|
||||
put_uint32(s, (uint32_t)strm->total_in);
|
||||
@ -1007,21 +1114,13 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) {
|
||||
|
||||
/* ========================================================================= */
|
||||
int32_t Z_EXPORT PREFIX(deflateEnd)(PREFIX3(stream) *strm) {
|
||||
int32_t status;
|
||||
|
||||
if (deflateStateCheck(strm))
|
||||
return Z_STREAM_ERROR;
|
||||
|
||||
status = strm->state->status;
|
||||
int32_t status = strm->state->status;
|
||||
|
||||
/* Deallocate in reverse order of allocations: */
|
||||
TRY_FREE(strm, strm->state->pending_buf);
|
||||
TRY_FREE(strm, strm->state->head);
|
||||
TRY_FREE(strm, strm->state->prev);
|
||||
TRY_FREE_WINDOW(strm, strm->state->window);
|
||||
|
||||
ZFREE_STATE(strm, strm->state);
|
||||
strm->state = NULL;
|
||||
/* Free allocated buffers */
|
||||
free_deflate(strm);
|
||||
|
||||
return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
|
||||
}
|
||||
@ -1032,7 +1131,6 @@ int32_t Z_EXPORT PREFIX(deflateEnd)(PREFIX3(stream) *strm) {
|
||||
int32_t Z_EXPORT PREFIX(deflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *source) {
|
||||
deflate_state *ds;
|
||||
deflate_state *ss;
|
||||
uint32_t window_padding = 0;
|
||||
|
||||
if (deflateStateCheck(source) || dest == NULL)
|
||||
return Z_STREAM_ERROR;
|
||||
@ -1041,34 +1139,39 @@ int32_t Z_EXPORT PREFIX(deflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *sou
|
||||
|
||||
memcpy((void *)dest, (void *)source, sizeof(PREFIX3(stream)));
|
||||
|
||||
ds = ZALLOC_DEFLATE_STATE(dest);
|
||||
if (ds == NULL)
|
||||
deflate_allocs *alloc_bufs = alloc_deflate(dest, ss->w_bits, ss->lit_bufsize);
|
||||
if (alloc_bufs == NULL)
|
||||
return Z_MEM_ERROR;
|
||||
|
||||
ds = alloc_bufs->state;
|
||||
|
||||
dest->state = (struct internal_state *) ds;
|
||||
ZCOPY_DEFLATE_STATE(ds, ss);
|
||||
memcpy(ds, ss, sizeof(deflate_state));
|
||||
ds->strm = dest;
|
||||
|
||||
#ifdef X86_PCLMULQDQ_CRC
|
||||
window_padding = 8;
|
||||
#endif
|
||||
|
||||
ds->window = (unsigned char *) ZALLOC_WINDOW(dest, ds->w_size + window_padding, 2*sizeof(unsigned char));
|
||||
ds->prev = (Pos *) ZALLOC(dest, ds->w_size, sizeof(Pos));
|
||||
ds->head = (Pos *) ZALLOC(dest, HASH_SIZE, sizeof(Pos));
|
||||
ds->pending_buf = (unsigned char *) ZALLOC(dest, ds->lit_bufsize, 4);
|
||||
ds->alloc_bufs = alloc_bufs;
|
||||
ds->window = alloc_bufs->window;
|
||||
ds->prev = alloc_bufs->prev;
|
||||
ds->head = alloc_bufs->head;
|
||||
ds->pending_buf = alloc_bufs->pending_buf;
|
||||
|
||||
if (ds->window == NULL || ds->prev == NULL || ds->head == NULL || ds->pending_buf == NULL) {
|
||||
PREFIX(deflateEnd)(dest);
|
||||
return Z_MEM_ERROR;
|
||||
}
|
||||
|
||||
memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(unsigned char));
|
||||
memcpy(ds->window, ss->window, DEFLATE_ADJUST_WINDOW_SIZE(ds->w_size * 2 * sizeof(unsigned char)));
|
||||
memcpy((void *)ds->prev, (void *)ss->prev, ds->w_size * sizeof(Pos));
|
||||
memcpy((void *)ds->head, (void *)ss->head, HASH_SIZE * sizeof(Pos));
|
||||
memcpy(ds->pending_buf, ss->pending_buf, ds->pending_buf_size);
|
||||
memcpy(ds->pending_buf, ss->pending_buf, ds->lit_bufsize * LIT_BUFS);
|
||||
|
||||
ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
|
||||
#ifdef LIT_MEM
|
||||
ds->d_buf = (uint16_t *)(ds->pending_buf + (ds->lit_bufsize << 1));
|
||||
ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2);
|
||||
#else
|
||||
ds->sym_buf = ds->pending_buf + ds->lit_bufsize;
|
||||
#endif
|
||||
|
||||
ds->l_desc.dyn_tree = ds->dyn_ltree;
|
||||
ds->d_desc.dyn_tree = ds->dyn_dtree;
|
||||
@ -1095,10 +1198,10 @@ Z_INTERNAL unsigned PREFIX(read_buf)(PREFIX3(stream) *strm, unsigned char *buf,
|
||||
memcpy(buf, strm->next_in, len);
|
||||
#ifdef GZIP
|
||||
} else if (strm->state->wrap == 2) {
|
||||
functable.crc32_fold_copy(&strm->state->crc_fold, buf, strm->next_in, len);
|
||||
FUNCTABLE_CALL(crc32_fold_copy)(&strm->state->crc_fold, buf, strm->next_in, len);
|
||||
#endif
|
||||
} else if (strm->state->wrap == 1) {
|
||||
strm->adler = functable.adler32_fold_copy(strm->adler, buf, strm->next_in, len);
|
||||
strm->adler = FUNCTABLE_CALL(adler32_fold_copy)(strm->adler, buf, strm->next_in, len);
|
||||
} else {
|
||||
memcpy(buf, strm->next_in, len);
|
||||
}
|
||||
@ -1125,9 +1228,9 @@ static void lm_set_level(deflate_state *s, int level) {
|
||||
s->insert_string = &insert_string_roll;
|
||||
s->quick_insert_string = &quick_insert_string_roll;
|
||||
} else {
|
||||
s->update_hash = functable.update_hash;
|
||||
s->insert_string = functable.insert_string;
|
||||
s->quick_insert_string = functable.quick_insert_string;
|
||||
s->update_hash = update_hash;
|
||||
s->insert_string = insert_string;
|
||||
s->quick_insert_string = quick_insert_string;
|
||||
}
|
||||
|
||||
s->level = level;
|
||||
@ -1191,7 +1294,7 @@ void Z_INTERNAL PREFIX(fill_window)(deflate_state *s) {
|
||||
s->block_start -= (int)wsize;
|
||||
if (s->insert > s->strstart)
|
||||
s->insert = s->strstart;
|
||||
functable.slide_hash(s);
|
||||
FUNCTABLE_CALL(slide_hash)(s);
|
||||
more += wsize;
|
||||
}
|
||||
if (s->strm->avail_in == 0)
|
||||
@ -1217,7 +1320,7 @@ void Z_INTERNAL PREFIX(fill_window)(deflate_state *s) {
|
||||
if (s->lookahead + s->insert >= STD_MIN_MATCH) {
|
||||
unsigned int str = s->strstart - s->insert;
|
||||
if (UNLIKELY(s->max_chain_length > 1024)) {
|
||||
s->ins_h = s->update_hash(s, s->window[str], s->window[str+1]);
|
||||
s->ins_h = s->update_hash(s->window[str], s->window[str+1]);
|
||||
} else if (str >= 1) {
|
||||
s->quick_insert_string(s, str + 2 - STD_MIN_MATCH);
|
||||
}
|
||||
|
56
3rdparty/zlib-ng/deflate.h
vendored
56
3rdparty/zlib-ng/deflate.h
vendored
@ -12,8 +12,12 @@
|
||||
|
||||
#include "zutil.h"
|
||||
#include "zendian.h"
|
||||
#include "adler32_fold.h"
|
||||
#include "crc32_fold.h"
|
||||
#include "crc32.h"
|
||||
|
||||
#ifdef S390_DFLTCC_DEFLATE
|
||||
# include "arch/s390/dfltcc_common.h"
|
||||
# define HAVE_ARCH_DEFLATE_STATE
|
||||
#endif
|
||||
|
||||
/* define NO_GZIP when compiling if you want to disable gzip header and
|
||||
trailer creation by deflate(). NO_GZIP would be used to avoid linking in
|
||||
@ -23,6 +27,12 @@
|
||||
# define GZIP
|
||||
#endif
|
||||
|
||||
/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at
|
||||
the cost of a larger memory footprint */
|
||||
#ifndef NO_LIT_MEM
|
||||
# define LIT_MEM
|
||||
#endif
|
||||
|
||||
/* ===========================================================================
|
||||
* Internal compression state.
|
||||
*/
|
||||
@ -108,11 +118,30 @@ typedef uint16_t Pos;
|
||||
/* Type definitions for hash callbacks */
|
||||
typedef struct internal_state deflate_state;
|
||||
|
||||
typedef uint32_t (* update_hash_cb) (deflate_state *const s, uint32_t h, uint32_t val);
|
||||
typedef uint32_t (* update_hash_cb) (uint32_t h, uint32_t val);
|
||||
typedef void (* insert_string_cb) (deflate_state *const s, uint32_t str, uint32_t count);
|
||||
typedef Pos (* quick_insert_string_cb)(deflate_state *const s, uint32_t str);
|
||||
|
||||
struct internal_state {
|
||||
uint32_t update_hash (uint32_t h, uint32_t val);
|
||||
void insert_string (deflate_state *const s, uint32_t str, uint32_t count);
|
||||
Pos quick_insert_string (deflate_state *const s, uint32_t str);
|
||||
|
||||
uint32_t update_hash_roll (uint32_t h, uint32_t val);
|
||||
void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count);
|
||||
Pos quick_insert_string_roll(deflate_state *const s, uint32_t str);
|
||||
|
||||
/* Struct for memory allocation handling */
|
||||
typedef struct deflate_allocs_s {
|
||||
char *buf_start;
|
||||
free_func zfree;
|
||||
deflate_state *state;
|
||||
unsigned char *window;
|
||||
unsigned char *pending_buf;
|
||||
Pos *prev;
|
||||
Pos *head;
|
||||
} deflate_allocs;
|
||||
|
||||
struct ALIGNED_(64) internal_state {
|
||||
PREFIX3(stream) *strm; /* pointer back to this zlib stream */
|
||||
unsigned char *pending_buf; /* output still pending */
|
||||
unsigned char *pending_out; /* next pending byte to output to the stream */
|
||||
@ -260,8 +289,16 @@ struct internal_state {
|
||||
* - I can't count above 4
|
||||
*/
|
||||
|
||||
#ifdef LIT_MEM
|
||||
# define LIT_BUFS 5
|
||||
uint16_t *d_buf; /* buffer for distances */
|
||||
unsigned char *l_buf; /* buffer for literals/lengths */
|
||||
#else
|
||||
# define LIT_BUFS 4
|
||||
unsigned char *sym_buf; /* buffer for distances and literals/lengths */
|
||||
unsigned int sym_next; /* running index in sym_buf */
|
||||
#endif
|
||||
|
||||
unsigned int sym_next; /* running index in symbol buffer */
|
||||
unsigned int sym_end; /* symbol table full when sym_next reaches this */
|
||||
|
||||
unsigned long opt_len; /* bit length of current block with optimal trees */
|
||||
@ -273,8 +310,11 @@ struct internal_state {
|
||||
unsigned long compressed_len; /* total bit length of compressed file mod 2^32 */
|
||||
unsigned long bits_sent; /* bit length of compressed data sent mod 2^32 */
|
||||
|
||||
/* Reserved for future use and alignment purposes */
|
||||
char *reserved_p;
|
||||
deflate_allocs *alloc_bufs;
|
||||
|
||||
#ifdef HAVE_ARCH_DEFLATE_STATE
|
||||
arch_deflate_state arch; /* architecture-specific extensions */
|
||||
#endif
|
||||
|
||||
uint64_t bi_buf;
|
||||
/* Output buffer. bits are inserted starting at the bottom (least significant bits). */
|
||||
@ -284,7 +324,7 @@ struct internal_state {
|
||||
|
||||
/* Reserved for future use and alignment purposes */
|
||||
int32_t reserved[11];
|
||||
} ALIGNED_(8);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
need_more, /* block not completed, need more input or more output */
|
||||
|
10
3rdparty/zlib-ng/deflate_fast.c
vendored
10
3rdparty/zlib-ng/deflate_fast.c
vendored
@ -1,6 +1,6 @@
|
||||
/* deflate_fast.c -- compress data using the fast strategy of deflation algorithm
|
||||
*
|
||||
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
|
||||
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
@ -41,7 +41,7 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
|
||||
* dictionary, and set hash_head to the head of the hash chain:
|
||||
*/
|
||||
if (s->lookahead >= WANT_MIN_MATCH) {
|
||||
hash_head = functable.quick_insert_string(s, s->strstart);
|
||||
hash_head = quick_insert_string(s, s->strstart);
|
||||
dist = (int64_t)s->strstart - hash_head;
|
||||
|
||||
/* Find the longest match, discarding those <= prev_length.
|
||||
@ -52,7 +52,7 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
|
||||
* of window index 0 (in particular we have to avoid a match
|
||||
* of the string with itself at the start of the input file).
|
||||
*/
|
||||
match_len = functable.longest_match(s, hash_head);
|
||||
match_len = FUNCTABLE_CALL(longest_match)(s, hash_head);
|
||||
/* longest_match() sets match_start */
|
||||
}
|
||||
}
|
||||
@ -71,11 +71,11 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
|
||||
match_len--; /* string at strstart already in table */
|
||||
s->strstart++;
|
||||
|
||||
functable.insert_string(s, s->strstart, match_len);
|
||||
insert_string(s, s->strstart, match_len);
|
||||
s->strstart += match_len;
|
||||
} else {
|
||||
s->strstart += match_len;
|
||||
functable.quick_insert_string(s, s->strstart + 2 - STD_MIN_MATCH);
|
||||
quick_insert_string(s, s->strstart + 2 - STD_MIN_MATCH);
|
||||
|
||||
/* If lookahead < STD_MIN_MATCH, ins_h is garbage, but it does not
|
||||
* matter since it will be recomputed at next deflate call.
|
||||
|
2
3rdparty/zlib-ng/deflate_huff.c
vendored
2
3rdparty/zlib-ng/deflate_huff.c
vendored
@ -1,6 +1,6 @@
|
||||
/* deflate_huff.c -- compress data using huffman encoding only strategy
|
||||
*
|
||||
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
|
||||
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
|
57
3rdparty/zlib-ng/deflate_medium.c
vendored
57
3rdparty/zlib-ng/deflate_medium.c
vendored
@ -45,16 +45,18 @@ static void insert_match(deflate_state *s, struct match match) {
|
||||
if (UNLIKELY(s->lookahead <= (unsigned int)(match.match_length + WANT_MIN_MATCH)))
|
||||
return;
|
||||
|
||||
/* string at strstart already in table */
|
||||
match.strstart++;
|
||||
match.match_length--;
|
||||
|
||||
/* matches that are not long enough we need to emit as literals */
|
||||
if (LIKELY(match.match_length < WANT_MIN_MATCH)) {
|
||||
match.strstart++;
|
||||
match.match_length--;
|
||||
if (LIKELY(match.match_length < WANT_MIN_MATCH - 1)) {
|
||||
if (UNLIKELY(match.match_length > 0)) {
|
||||
if (match.strstart >= match.orgstart) {
|
||||
if (match.strstart + match.match_length - 1 >= match.orgstart) {
|
||||
functable.insert_string(s, match.strstart, match.match_length);
|
||||
insert_string(s, match.strstart, match.match_length);
|
||||
} else {
|
||||
functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
|
||||
insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
|
||||
}
|
||||
match.strstart += match.match_length;
|
||||
match.match_length = 0;
|
||||
@ -63,35 +65,18 @@ static void insert_match(deflate_state *s, struct match match) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Insert new strings in the hash table only if the match length
|
||||
* is not too large. This saves time but degrades compression.
|
||||
*/
|
||||
if (match.match_length <= 16 * s->max_insert_length && s->lookahead >= WANT_MIN_MATCH) {
|
||||
match.match_length--; /* string at strstart already in table */
|
||||
match.strstart++;
|
||||
|
||||
if (LIKELY(match.strstart >= match.orgstart)) {
|
||||
if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) {
|
||||
functable.insert_string(s, match.strstart, match.match_length);
|
||||
} else {
|
||||
functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
|
||||
}
|
||||
} else if (match.orgstart < match.strstart + match.match_length) {
|
||||
functable.insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart);
|
||||
/* Insert into hash table. */
|
||||
if (LIKELY(match.strstart >= match.orgstart)) {
|
||||
if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) {
|
||||
insert_string(s, match.strstart, match.match_length);
|
||||
} else {
|
||||
insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
|
||||
}
|
||||
match.strstart += match.match_length;
|
||||
match.match_length = 0;
|
||||
} else {
|
||||
match.strstart += match.match_length;
|
||||
match.match_length = 0;
|
||||
|
||||
if (match.strstart >= (STD_MIN_MATCH - 2))
|
||||
functable.quick_insert_string(s, match.strstart + 2 - STD_MIN_MATCH);
|
||||
|
||||
/* If lookahead < WANT_MIN_MATCH, ins_h is garbage, but it does not
|
||||
* matter since it will be recomputed at next deflate call.
|
||||
*/
|
||||
} else if (match.orgstart < match.strstart + match.match_length) {
|
||||
insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart);
|
||||
}
|
||||
match.strstart += match.match_length;
|
||||
match.match_length = 0;
|
||||
}
|
||||
|
||||
static void fizzle_matches(deflate_state *s, struct match *current, struct match *next) {
|
||||
@ -199,7 +184,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
|
||||
} else {
|
||||
hash_head = 0;
|
||||
if (s->lookahead >= WANT_MIN_MATCH) {
|
||||
hash_head = functable.quick_insert_string(s, s->strstart);
|
||||
hash_head = quick_insert_string(s, s->strstart);
|
||||
}
|
||||
|
||||
current_match.strstart = (uint16_t)s->strstart;
|
||||
@ -215,7 +200,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
|
||||
* of window index 0 (in particular we have to avoid a match
|
||||
* of the string with itself at the start of the input file).
|
||||
*/
|
||||
current_match.match_length = (uint16_t)functable.longest_match(s, hash_head);
|
||||
current_match.match_length = (uint16_t)FUNCTABLE_CALL(longest_match)(s, hash_head);
|
||||
current_match.match_start = (uint16_t)s->match_start;
|
||||
if (UNLIKELY(current_match.match_length < WANT_MIN_MATCH))
|
||||
current_match.match_length = 1;
|
||||
@ -235,7 +220,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
|
||||
/* now, look ahead one */
|
||||
if (LIKELY(!early_exit && s->lookahead > MIN_LOOKAHEAD && (uint32_t)(current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD))) {
|
||||
s->strstart = current_match.strstart + current_match.match_length;
|
||||
hash_head = functable.quick_insert_string(s, s->strstart);
|
||||
hash_head = quick_insert_string(s, s->strstart);
|
||||
|
||||
next_match.strstart = (uint16_t)s->strstart;
|
||||
next_match.orgstart = next_match.strstart;
|
||||
@ -250,7 +235,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
|
||||
* of window index 0 (in particular we have to avoid a match
|
||||
* of the string with itself at the start of the input file).
|
||||
*/
|
||||
next_match.match_length = (uint16_t)functable.longest_match(s, hash_head);
|
||||
next_match.match_length = (uint16_t)FUNCTABLE_CALL(longest_match)(s, hash_head);
|
||||
next_match.match_start = (uint16_t)s->match_start;
|
||||
if (UNLIKELY(next_match.match_start >= next_match.strstart)) {
|
||||
/* this can happen due to some restarts */
|
||||
|
16
3rdparty/zlib-ng/deflate_p.h
vendored
16
3rdparty/zlib-ng/deflate_p.h
vendored
@ -1,7 +1,7 @@
|
||||
/* deflate_p.h -- Private inline functions and macros shared with more than
|
||||
* one deflate method
|
||||
*
|
||||
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
|
||||
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*
|
||||
*/
|
||||
@ -60,27 +60,37 @@ extern const unsigned char Z_INTERNAL zng_dist_code[];
|
||||
|
||||
static inline int zng_tr_tally_lit(deflate_state *s, unsigned char c) {
|
||||
/* c is the unmatched char */
|
||||
#ifdef LIT_MEM
|
||||
s->d_buf[s->sym_next] = 0;
|
||||
s->l_buf[s->sym_next++] = c;
|
||||
#else
|
||||
s->sym_buf[s->sym_next++] = 0;
|
||||
s->sym_buf[s->sym_next++] = 0;
|
||||
s->sym_buf[s->sym_next++] = c;
|
||||
#endif
|
||||
s->dyn_ltree[c].Freq++;
|
||||
Tracevv((stderr, "%c", c));
|
||||
Assert(c <= (STD_MAX_MATCH-STD_MIN_MATCH), "zng_tr_tally: bad literal");
|
||||
return (s->sym_next == s->sym_end);
|
||||
}
|
||||
|
||||
static inline int zng_tr_tally_dist(deflate_state *s, uint32_t dist, uint32_t len) {
|
||||
static inline int zng_tr_tally_dist(deflate_state* s, uint32_t dist, uint32_t len) {
|
||||
/* dist: distance of matched string */
|
||||
/* len: match length-STD_MIN_MATCH */
|
||||
#ifdef LIT_MEM
|
||||
s->d_buf[s->sym_next] = dist;
|
||||
s->l_buf[s->sym_next++] = len;
|
||||
#else
|
||||
s->sym_buf[s->sym_next++] = (uint8_t)(dist);
|
||||
s->sym_buf[s->sym_next++] = (uint8_t)(dist >> 8);
|
||||
s->sym_buf[s->sym_next++] = (uint8_t)len;
|
||||
#endif
|
||||
s->matches++;
|
||||
dist--;
|
||||
Assert(dist < MAX_DIST(s) && (uint16_t)d_code(dist) < (uint16_t)D_CODES,
|
||||
"zng_tr_tally: bad match");
|
||||
|
||||
s->dyn_ltree[zng_length_code[len]+LITERALS+1].Freq++;
|
||||
s->dyn_ltree[zng_length_code[len] + LITERALS + 1].Freq++;
|
||||
s->dyn_dtree[d_code(dist)].Freq++;
|
||||
return (s->sym_next == s->sym_end);
|
||||
}
|
||||
|
4
3rdparty/zlib-ng/deflate_quick.c
vendored
4
3rdparty/zlib-ng/deflate_quick.c
vendored
@ -86,7 +86,7 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
|
||||
}
|
||||
|
||||
if (LIKELY(s->lookahead >= WANT_MIN_MATCH)) {
|
||||
hash_head = functable.quick_insert_string(s, s->strstart);
|
||||
hash_head = quick_insert_string(s, s->strstart);
|
||||
dist = (int64_t)s->strstart - hash_head;
|
||||
|
||||
if (dist <= MAX_DIST(s) && dist > 0) {
|
||||
@ -94,7 +94,7 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
|
||||
const uint8_t *match_start = s->window + hash_head;
|
||||
|
||||
if (zng_memcmp_2(str_start, match_start) == 0) {
|
||||
match_len = functable.compare256(str_start+2, match_start+2) + 2;
|
||||
match_len = FUNCTABLE_CALL(compare256)(str_start+2, match_start+2) + 2;
|
||||
|
||||
if (match_len >= WANT_MIN_MATCH) {
|
||||
if (UNLIKELY(match_len > s->lookahead))
|
||||
|
2
3rdparty/zlib-ng/deflate_rle.c
vendored
2
3rdparty/zlib-ng/deflate_rle.c
vendored
@ -1,6 +1,6 @@
|
||||
/* deflate_rle.c -- compress data using RLE strategy of deflation algorithm
|
||||
*
|
||||
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
|
||||
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
|
12
3rdparty/zlib-ng/deflate_slow.c
vendored
12
3rdparty/zlib-ng/deflate_slow.c
vendored
@ -1,6 +1,6 @@
|
||||
/* deflate_slow.c -- compress data using the slow strategy of deflation algorithm
|
||||
*
|
||||
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
|
||||
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
@ -19,12 +19,12 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
|
||||
int bflush; /* set if current block must be flushed */
|
||||
int64_t dist;
|
||||
uint32_t match_len;
|
||||
match_func *longest_match;
|
||||
match_func longest_match;
|
||||
|
||||
if (s->max_chain_length <= 1024)
|
||||
longest_match = &functable.longest_match;
|
||||
longest_match = FUNCTABLE_FPTR(longest_match);
|
||||
else
|
||||
longest_match = &functable.longest_match_slow;
|
||||
longest_match = FUNCTABLE_FPTR(longest_match_slow);
|
||||
|
||||
/* Process the input block. */
|
||||
for (;;) {
|
||||
@ -61,7 +61,7 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
|
||||
* of window index 0 (in particular we have to avoid a match
|
||||
* of the string with itself at the start of the input file).
|
||||
*/
|
||||
match_len = (*longest_match)(s, hash_head);
|
||||
match_len = longest_match(s, hash_head);
|
||||
/* longest_match() sets match_start */
|
||||
|
||||
if (match_len <= 5 && (s->strategy == Z_FILTERED)) {
|
||||
@ -129,7 +129,7 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
|
||||
}
|
||||
Assert(flush != Z_NO_FLUSH, "no flush?");
|
||||
if (UNLIKELY(s->match_available)) {
|
||||
(void) zng_tr_tally_lit(s, s->window[s->strstart-1]);
|
||||
Z_UNUSED(zng_tr_tally_lit(s, s->window[s->strstart-1]));
|
||||
s->match_available = 0;
|
||||
}
|
||||
s->insert = s->strstart < (STD_MIN_MATCH - 1) ? s->strstart : (STD_MIN_MATCH - 1);
|
||||
|
4
3rdparty/zlib-ng/deflate_stored.c
vendored
4
3rdparty/zlib-ng/deflate_stored.c
vendored
@ -1,6 +1,6 @@
|
||||
/* deflate_stored.c -- store data without compression using deflation algorithm
|
||||
*
|
||||
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
|
||||
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
@ -22,7 +22,7 @@
|
||||
*
|
||||
* deflate_stored() is written to minimize the number of times an input byte is
|
||||
* copied. It is most efficient with large input and output buffers, which
|
||||
* maximizes the opportunites to have a single copy from next_in to next_out.
|
||||
* maximizes the opportunities to have a single copy from next_in to next_out.
|
||||
*/
|
||||
Z_INTERNAL block_state deflate_stored(deflate_state *s, int flush) {
|
||||
/* Smallest worthy block size when not flushing or finishing. By default
|
||||
|
3
3rdparty/zlib-ng/fallback_builtins.h
vendored
3
3rdparty/zlib-ng/fallback_builtins.h
vendored
@ -5,9 +5,6 @@
|
||||
#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
|
||||
#include <intrin.h>
|
||||
#ifdef X86_FEATURES
|
||||
# include "arch/x86/x86_features.h"
|
||||
#endif
|
||||
|
||||
/* This is not a general purpose replacement for __builtin_ctz. The function expects that value is != 0.
|
||||
* Because of that assumption trailing_zero is not initialized and the return value is not checked.
|
||||
|
71
3rdparty/zlib-ng/functable.c
vendored
71
3rdparty/zlib-ng/functable.c
vendored
@ -2,14 +2,12 @@
|
||||
* Copyright (C) 2017 Hans Kristian Rosbach
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#ifndef DISABLE_RUNTIME_CPU_DETECTION
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zendian.h"
|
||||
#include "crc32_braid_p.h"
|
||||
#include "deflate.h"
|
||||
#include "deflate_p.h"
|
||||
#include "functable.h"
|
||||
#include "cpu_features.h"
|
||||
#include "arch_functions.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# include <intrin.h>
|
||||
@ -61,31 +59,10 @@ static void init_functable(void) {
|
||||
ft.crc32_fold_final = &crc32_fold_final_c;
|
||||
ft.crc32_fold_reset = &crc32_fold_reset_c;
|
||||
ft.inflate_fast = &inflate_fast_c;
|
||||
ft.insert_string = &insert_string_c;
|
||||
ft.quick_insert_string = &quick_insert_string_c;
|
||||
ft.slide_hash = &slide_hash_c;
|
||||
ft.update_hash = &update_hash_c;
|
||||
|
||||
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
|
||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
||||
ft.longest_match = &longest_match_unaligned_64;
|
||||
ft.longest_match_slow = &longest_match_slow_unaligned_64;
|
||||
ft.compare256 = &compare256_unaligned_64;
|
||||
# elif defined(HAVE_BUILTIN_CTZ)
|
||||
ft.longest_match = &longest_match_unaligned_32;
|
||||
ft.longest_match_slow = &longest_match_slow_unaligned_32;
|
||||
ft.compare256 = &compare256_unaligned_32;
|
||||
# else
|
||||
ft.longest_match = &longest_match_unaligned_16;
|
||||
ft.longest_match_slow = &longest_match_slow_unaligned_16;
|
||||
ft.compare256 = &compare256_unaligned_16;
|
||||
# endif
|
||||
#else
|
||||
ft.longest_match = &longest_match_c;
|
||||
ft.longest_match_slow = &longest_match_slow_c;
|
||||
ft.compare256 = &compare256_c;
|
||||
#endif
|
||||
|
||||
ft.longest_match = &longest_match_generic;
|
||||
ft.longest_match_slow = &longest_match_slow_generic;
|
||||
ft.compare256 = &compare256_generic;
|
||||
|
||||
// Select arch-optimized functions
|
||||
|
||||
@ -110,19 +87,14 @@ static void init_functable(void) {
|
||||
#ifdef X86_SSSE3
|
||||
if (cf.x86.has_ssse3) {
|
||||
ft.adler32 = &adler32_ssse3;
|
||||
# ifdef X86_SSE2
|
||||
ft.chunkmemset_safe = &chunkmemset_safe_ssse3;
|
||||
ft.inflate_fast = &inflate_fast_ssse3;
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
// X86 - SSE4.2
|
||||
#ifdef X86_SSE42
|
||||
if (cf.x86.has_sse42) {
|
||||
ft.adler32_fold_copy = &adler32_fold_copy_sse42;
|
||||
ft.insert_string = &insert_string_sse42;
|
||||
ft.quick_insert_string = &quick_insert_string_sse42;
|
||||
ft.update_hash = &update_hash_sse42;
|
||||
}
|
||||
#endif
|
||||
// X86 - PCLMUL
|
||||
@ -151,8 +123,9 @@ static void init_functable(void) {
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
// X86 - AVX512 (F,DQ,BW,Vl)
|
||||
#ifdef X86_AVX512
|
||||
if (cf.x86.has_avx512) {
|
||||
if (cf.x86.has_avx512_common) {
|
||||
ft.adler32 = &adler32_avx512;
|
||||
ft.adler32_fold_copy = &adler32_fold_copy_avx512;
|
||||
}
|
||||
@ -164,8 +137,8 @@ static void init_functable(void) {
|
||||
}
|
||||
#endif
|
||||
// X86 - VPCLMULQDQ
|
||||
#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC)
|
||||
if (cf.x86.has_pclmulqdq && cf.x86.has_avx512 && cf.x86.has_vpclmulqdq) {
|
||||
#ifdef X86_VPCLMULQDQ_CRC
|
||||
if (cf.x86.has_pclmulqdq && cf.x86.has_avx512_common && cf.x86.has_vpclmulqdq) {
|
||||
ft.crc32 = &crc32_vpclmulqdq;
|
||||
ft.crc32_fold = &crc32_fold_vpclmulqdq;
|
||||
ft.crc32_fold_copy = &crc32_fold_vpclmulqdq_copy;
|
||||
@ -206,9 +179,6 @@ static void init_functable(void) {
|
||||
#ifdef ARM_ACLE
|
||||
if (cf.arm.has_crc32) {
|
||||
ft.crc32 = &crc32_acle;
|
||||
ft.insert_string = &insert_string_acle;
|
||||
ft.quick_insert_string = &quick_insert_string_acle;
|
||||
ft.update_hash = &update_hash_acle;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -279,12 +249,9 @@ static void init_functable(void) {
|
||||
FUNCTABLE_ASSIGN(ft, crc32_fold_final);
|
||||
FUNCTABLE_ASSIGN(ft, crc32_fold_reset);
|
||||
FUNCTABLE_ASSIGN(ft, inflate_fast);
|
||||
FUNCTABLE_ASSIGN(ft, insert_string);
|
||||
FUNCTABLE_ASSIGN(ft, longest_match);
|
||||
FUNCTABLE_ASSIGN(ft, longest_match_slow);
|
||||
FUNCTABLE_ASSIGN(ft, quick_insert_string);
|
||||
FUNCTABLE_ASSIGN(ft, slide_hash);
|
||||
FUNCTABLE_ASSIGN(ft, update_hash);
|
||||
|
||||
// Memory barrier for weak memory order CPUs
|
||||
FUNCTABLE_BARRIER();
|
||||
@ -350,11 +317,6 @@ static void inflate_fast_stub(PREFIX3(stream) *strm, uint32_t start) {
|
||||
functable.inflate_fast(strm, start);
|
||||
}
|
||||
|
||||
static void insert_string_stub(deflate_state* const s, uint32_t str, uint32_t count) {
|
||||
init_functable();
|
||||
functable.insert_string(s, str, count);
|
||||
}
|
||||
|
||||
static uint32_t longest_match_stub(deflate_state* const s, Pos cur_match) {
|
||||
init_functable();
|
||||
return functable.longest_match(s, cur_match);
|
||||
@ -365,21 +327,11 @@ static uint32_t longest_match_slow_stub(deflate_state* const s, Pos cur_match) {
|
||||
return functable.longest_match_slow(s, cur_match);
|
||||
}
|
||||
|
||||
static Pos quick_insert_string_stub(deflate_state* const s, const uint32_t str) {
|
||||
init_functable();
|
||||
return functable.quick_insert_string(s, str);
|
||||
}
|
||||
|
||||
static void slide_hash_stub(deflate_state* s) {
|
||||
init_functable();
|
||||
functable.slide_hash(s);
|
||||
}
|
||||
|
||||
static uint32_t update_hash_stub(deflate_state* const s, uint32_t h, uint32_t val) {
|
||||
init_functable();
|
||||
return functable.update_hash(s, h, val);
|
||||
}
|
||||
|
||||
/* functable init */
|
||||
Z_INTERNAL struct functable_s functable = {
|
||||
force_init_stub,
|
||||
@ -394,10 +346,9 @@ Z_INTERNAL struct functable_s functable = {
|
||||
crc32_fold_final_stub,
|
||||
crc32_fold_reset_stub,
|
||||
inflate_fast_stub,
|
||||
insert_string_stub,
|
||||
longest_match_stub,
|
||||
longest_match_slow_stub,
|
||||
quick_insert_string_stub,
|
||||
slide_hash_stub,
|
||||
update_hash_stub
|
||||
};
|
||||
|
||||
#endif
|
||||
|
31
3rdparty/zlib-ng/functable.h
vendored
31
3rdparty/zlib-ng/functable.h
vendored
@ -7,14 +7,21 @@
|
||||
#define FUNCTABLE_H_
|
||||
|
||||
#include "deflate.h"
|
||||
#include "crc32_fold.h"
|
||||
#include "adler32_fold.h"
|
||||
#include "crc32.h"
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
|
||||
# include "arch_functions.h"
|
||||
|
||||
/* When compiling with native instructions it is not necessary to use functable.
|
||||
* Instead we use native_ macro indicating the best available variant of arch-specific
|
||||
* functions for the current platform.
|
||||
*/
|
||||
# define FUNCTABLE_INIT ((void)0)
|
||||
# define FUNCTABLE_CALL(name) native_ ## name
|
||||
# define FUNCTABLE_FPTR(name) &native_ ## name
|
||||
|
||||
#ifdef ZLIB_COMPAT
|
||||
typedef struct z_stream_s z_stream;
|
||||
#else
|
||||
typedef struct zng_stream_s zng_stream;
|
||||
#endif
|
||||
|
||||
struct functable_s {
|
||||
void (* force_init) (void);
|
||||
@ -29,14 +36,20 @@ struct functable_s {
|
||||
uint32_t (* crc32_fold_final) (struct crc32_fold_s *crc);
|
||||
uint32_t (* crc32_fold_reset) (struct crc32_fold_s *crc);
|
||||
void (* inflate_fast) (PREFIX3(stream) *strm, uint32_t start);
|
||||
void (* insert_string) (deflate_state *const s, uint32_t str, uint32_t count);
|
||||
uint32_t (* longest_match) (deflate_state *const s, Pos cur_match);
|
||||
uint32_t (* longest_match_slow) (deflate_state *const s, Pos cur_match);
|
||||
Pos (* quick_insert_string)(deflate_state *const s, uint32_t str);
|
||||
void (* slide_hash) (deflate_state *s);
|
||||
uint32_t (* update_hash) (deflate_state *const s, uint32_t h, uint32_t val);
|
||||
};
|
||||
|
||||
Z_INTERNAL extern struct functable_s functable;
|
||||
|
||||
|
||||
/* Explicitly indicate functions are conditionally dispatched.
|
||||
*/
|
||||
# define FUNCTABLE_INIT functable.force_init()
|
||||
# define FUNCTABLE_CALL(name) functable.name
|
||||
# define FUNCTABLE_FPTR(name) functable.name
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user