From 85923c8f3054bc52c9c30c2a8e932dbd96cc26fc Mon Sep 17 00:00:00 2001 From: FantasqueX Date: Thu, 12 Sep 2024 21:05:24 +0800 Subject: [PATCH] Merge pull request #26113 from FantasqueX:zlib-ng-2-2-1 Update zlib-ng to 2.2.1 #26113 Release: https://github.com/zlib-ng/zlib-ng/releases/tag/2.2.1 ARM diagnostics patch: https://github.com/zlib-ng/zlib-ng/pull/1774 ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake --- 3rdparty/zlib-ng/CMakeLists.txt | 1714 +++++++++++------ 3rdparty/zlib-ng/LICENSE.md | 2 +- 3rdparty/zlib-ng/README.md | 30 +- 3rdparty/zlib-ng/adler32.c | 54 +- 3rdparty/zlib-ng/adler32_fold.h | 11 - 3rdparty/zlib-ng/arch/.gitignore | 2 - 3rdparty/zlib-ng/arch/arm/Makefile.in | 7 - 3rdparty/zlib-ng/arch/arm/adler32_neon.c | 4 +- 3rdparty/zlib-ng/arch/arm/arm_features.c | 17 +- 3rdparty/zlib-ng/arch/arm/arm_features.h | 6 +- 3rdparty/zlib-ng/arch/arm/arm_functions.h | 65 + 3rdparty/zlib-ng/arch/arm/chunkset_neon.c | 4 +- 3rdparty/zlib-ng/arch/arm/compare256_neon.c | 5 +- 3rdparty/zlib-ng/arch/arm/crc32_acle.c | 2 +- .../zlib-ng/arch/arm/insert_string_acle.c | 24 - 3rdparty/zlib-ng/arch/arm/neon_intrins.h | 7 + 3rdparty/zlib-ng/arch/arm/slide_hash_armv6.c | 4 +- 3rdparty/zlib-ng/arch/arm/slide_hash_neon.c | 4 +- 3rdparty/zlib-ng/arch/generic/Makefile.in | 57 +- 3rdparty/zlib-ng/arch/generic/adler32_c.c | 54 + .../generic/adler32_fold_c.c} | 3 +- .../{chunkset.c => arch/generic/chunkset_c.c} | 0 .../generic/compare256_c.c} | 1 + .../generic/crc32_braid_c.c} | 54 +- .../generic/crc32_fold_c.c} | 10 +- .../zlib-ng/arch/generic/generic_functions.h | 106 + .../generic/slide_hash_c.c} | 2 +- 3rdparty/zlib-ng/arch/power/chunkset_power8.c | 2 +- .../zlib-ng/arch/power/compare256_power9.c | 6 +- 3rdparty/zlib-ng/arch/power/power_features.c | 7 +- 3rdparty/zlib-ng/arch/power/power_features.h | 6 +- 3rdparty/zlib-ng/arch/power/power_functions.h | 67 + 3rdparty/zlib-ng/arch/riscv/adler32_rvv.c | 4 +- 3rdparty/zlib-ng/arch/riscv/compare256_rvv.c | 4 +- 3rdparty/zlib-ng/arch/riscv/riscv_features.c | 11 +- 3rdparty/zlib-ng/arch/riscv/riscv_features.h | 6 +- 3rdparty/zlib-ng/arch/riscv/riscv_functions.h | 49 + 3rdparty/zlib-ng/arch/riscv/slide_hash_rvv.c | 10 +- 3rdparty/zlib-ng/arch/s390/Makefile.in | 48 + 3rdparty/zlib-ng/arch/s390/README.md | 277 +++ 3rdparty/zlib-ng/arch/s390/crc32-vx.c | 222 +++ 3rdparty/zlib-ng/arch/s390/dfltcc_common.h | 119 ++ 3rdparty/zlib-ng/arch/s390/dfltcc_deflate.c | 383 ++++ 3rdparty/zlib-ng/arch/s390/dfltcc_deflate.h | 58 + 3rdparty/zlib-ng/arch/s390/dfltcc_detail.h | 275 +++ 3rdparty/zlib-ng/arch/s390/dfltcc_inflate.c | 191 ++ 3rdparty/zlib-ng/arch/s390/dfltcc_inflate.h | 67 + 3rdparty/zlib-ng/arch/s390/s390_features.c | 14 + 3rdparty/zlib-ng/arch/s390/s390_features.h | 14 + 3rdparty/zlib-ng/arch/s390/s390_functions.h | 20 + .../actions-runner.Dockerfile | 47 + .../actions-runner.service | 18 + .../self-hosted-builder/runner-global.json | 5 + .../self-hosted-builder/runner-s390x.patch | 243 +++ 3rdparty/zlib-ng/arch/x86/Makefile.in | 11 +- 3rdparty/zlib-ng/arch/x86/adler32_avx2.c | 17 +- 3rdparty/zlib-ng/arch/x86/adler32_avx512.c | 13 +- .../zlib-ng/arch/x86/adler32_avx512_vnni.c | 21 +- 3rdparty/zlib-ng/arch/x86/adler32_sse42.c | 5 +- 3rdparty/zlib-ng/arch/x86/adler32_ssse3.c | 4 +- 3rdparty/zlib-ng/arch/x86/chunkset_ssse3.c | 14 +- 3rdparty/zlib-ng/arch/x86/compare256_avx2.c | 5 +- 3rdparty/zlib-ng/arch/x86/compare256_sse2.c | 5 +- .../arch/x86/crc32_fold_pclmulqdq_tpl.h | 35 +- .../zlib-ng/arch/x86/crc32_pclmulqdq_tpl.h | 26 +- 3rdparty/zlib-ng/arch/x86/crc32_vpclmulqdq.c | 2 +- .../zlib-ng/arch/x86/insert_string_sse42.c | 24 - 3rdparty/zlib-ng/arch/x86/slide_hash_avx2.c | 4 +- 3rdparty/zlib-ng/arch/x86/slide_hash_sse2.c | 4 +- 3rdparty/zlib-ng/arch/x86/x86_features.c | 24 +- 3rdparty/zlib-ng/arch/x86/x86_features.h | 14 +- 3rdparty/zlib-ng/arch/x86/x86_functions.h | 172 ++ 3rdparty/zlib-ng/arch/x86/x86_intrins.h | 4 +- 3rdparty/zlib-ng/arch_functions.h | 29 + 3rdparty/zlib-ng/chunkset_tpl.h | 8 +- 3rdparty/zlib-ng/cmake/detect-arch.c | 115 ++ 3rdparty/zlib-ng/cmake/detect-arch.cmake | 104 + 3rdparty/zlib-ng/cmake/detect-coverage.cmake | 46 + .../zlib-ng/cmake/detect-install-dirs.cmake | 43 + .../zlib-ng/cmake/detect-intrinsics.cmake | 281 +-- 3rdparty/zlib-ng/cmake/detect-sanitizer.cmake | 166 ++ 3rdparty/zlib-ng/cpu_features.h | 264 +-- 3rdparty/zlib-ng/crc32.c | 42 + 3rdparty/zlib-ng/crc32.h | 16 + 3rdparty/zlib-ng/crc32_braid_comb.c | 1 - 3rdparty/zlib-ng/crc32_braid_p.h | 22 +- 3rdparty/zlib-ng/crc32_fold.h | 21 - 3rdparty/zlib-ng/deflate.c | 243 ++- 3rdparty/zlib-ng/deflate.h | 56 +- 3rdparty/zlib-ng/deflate_fast.c | 10 +- 3rdparty/zlib-ng/deflate_huff.c | 2 +- 3rdparty/zlib-ng/deflate_medium.c | 57 +- 3rdparty/zlib-ng/deflate_p.h | 16 +- 3rdparty/zlib-ng/deflate_quick.c | 4 +- 3rdparty/zlib-ng/deflate_rle.c | 2 +- 3rdparty/zlib-ng/deflate_slow.c | 12 +- 3rdparty/zlib-ng/deflate_stored.c | 4 +- 3rdparty/zlib-ng/fallback_builtins.h | 3 - 3rdparty/zlib-ng/functable.c | 71 +- 3rdparty/zlib-ng/functable.h | 31 +- 3rdparty/zlib-ng/gzguts.h | 6 +- 3rdparty/zlib-ng/gzlib.c | 8 +- 3rdparty/zlib-ng/infback.c | 19 +- 3rdparty/zlib-ng/inflate.c | 218 ++- 3rdparty/zlib-ng/inflate.h | 29 +- 3rdparty/zlib-ng/inflate_p.h | 21 +- 3rdparty/zlib-ng/inftrees.c | 6 +- 3rdparty/zlib-ng/insert_string.c | 10 +- 3rdparty/zlib-ng/insert_string_roll.c | 4 +- 3rdparty/zlib-ng/insert_string_tpl.h | 15 +- 3rdparty/zlib-ng/match_tpl.h | 19 +- .../zlib-ng-2.2.1-detect-intrinsics.patch | 13 + 3rdparty/zlib-ng/patches/zlib-ng-2.2.1.patch | 148 ++ 3rdparty/zlib-ng/trees.c | 66 +- 3rdparty/zlib-ng/win32/Makefile.a64 | 252 +++ 3rdparty/zlib-ng/win32/Makefile.arm | 272 +++ 3rdparty/zlib-ng/win32/Makefile.msc | 292 +++ 3rdparty/zlib-ng/win32/depcheck.cpp | 321 +++ 3rdparty/zlib-ng/win32/replace.vbs | 15 + 3rdparty/zlib-ng/win32/zlib-ng.def.in | 60 + 3rdparty/zlib-ng/win32/zlib-ng1.rc | 36 + 3rdparty/zlib-ng/win32/zlib.def.in | 64 + 3rdparty/zlib-ng/win32/zlib1.rc | 36 + 3rdparty/zlib-ng/win32/zlibcompat.def.in | 97 + 3rdparty/zlib-ng/zbuild.h | 67 + 3rdparty/zlib-ng/zconf-ng.h.in | 176 ++ 3rdparty/zlib-ng/zconf.h.in | 2 +- 3rdparty/zlib-ng/zlib.h.in | 24 +- 3rdparty/zlib-ng/zlib.map | 98 + 3rdparty/zlib-ng/zutil.c | 50 +- 3rdparty/zlib-ng/zutil.h | 14 +- 3rdparty/zlib-ng/zutil_p.h | 12 +- 132 files changed, 7214 insertions(+), 1751 deletions(-) delete mode 100644 3rdparty/zlib-ng/adler32_fold.h delete mode 100644 3rdparty/zlib-ng/arch/.gitignore create mode 100644 3rdparty/zlib-ng/arch/arm/arm_functions.h delete mode 100644 3rdparty/zlib-ng/arch/arm/insert_string_acle.c create mode 100644 3rdparty/zlib-ng/arch/generic/adler32_c.c rename 3rdparty/zlib-ng/{adler32_fold.c => arch/generic/adler32_fold_c.c} (83%) rename 3rdparty/zlib-ng/{chunkset.c => arch/generic/chunkset_c.c} (100%) rename 3rdparty/zlib-ng/{compare256.c => arch/generic/compare256_c.c} (99%) rename 3rdparty/zlib-ng/{crc32_braid.c => arch/generic/crc32_braid_c.c} (79%) rename 3rdparty/zlib-ng/{crc32_fold.c => arch/generic/crc32_fold_c.c} (86%) create mode 100644 3rdparty/zlib-ng/arch/generic/generic_functions.h rename 3rdparty/zlib-ng/{slide_hash.c => arch/generic/slide_hash_c.c} (96%) create mode 100644 3rdparty/zlib-ng/arch/power/power_functions.h create mode 100644 3rdparty/zlib-ng/arch/riscv/riscv_functions.h create mode 100644 3rdparty/zlib-ng/arch/s390/Makefile.in create mode 100644 3rdparty/zlib-ng/arch/s390/README.md create mode 100644 3rdparty/zlib-ng/arch/s390/crc32-vx.c create mode 100644 3rdparty/zlib-ng/arch/s390/dfltcc_common.h create mode 100644 3rdparty/zlib-ng/arch/s390/dfltcc_deflate.c create mode 100644 3rdparty/zlib-ng/arch/s390/dfltcc_deflate.h create mode 100644 3rdparty/zlib-ng/arch/s390/dfltcc_detail.h create mode 100644 3rdparty/zlib-ng/arch/s390/dfltcc_inflate.c create mode 100644 3rdparty/zlib-ng/arch/s390/dfltcc_inflate.h create mode 100644 3rdparty/zlib-ng/arch/s390/s390_features.c create mode 100644 3rdparty/zlib-ng/arch/s390/s390_features.h create mode 100644 3rdparty/zlib-ng/arch/s390/s390_functions.h create mode 100644 3rdparty/zlib-ng/arch/s390/self-hosted-builder/actions-runner.Dockerfile create mode 100644 3rdparty/zlib-ng/arch/s390/self-hosted-builder/actions-runner.service create mode 100644 3rdparty/zlib-ng/arch/s390/self-hosted-builder/runner-global.json create mode 100644 3rdparty/zlib-ng/arch/s390/self-hosted-builder/runner-s390x.patch delete mode 100644 3rdparty/zlib-ng/arch/x86/insert_string_sse42.c create mode 100644 3rdparty/zlib-ng/arch/x86/x86_functions.h create mode 100644 3rdparty/zlib-ng/arch_functions.h create mode 100644 3rdparty/zlib-ng/cmake/detect-arch.c create mode 100644 3rdparty/zlib-ng/cmake/detect-arch.cmake create mode 100644 3rdparty/zlib-ng/cmake/detect-coverage.cmake create mode 100644 3rdparty/zlib-ng/cmake/detect-install-dirs.cmake create mode 100644 3rdparty/zlib-ng/cmake/detect-sanitizer.cmake create mode 100644 3rdparty/zlib-ng/crc32.c create mode 100644 3rdparty/zlib-ng/crc32.h delete mode 100644 3rdparty/zlib-ng/crc32_fold.h create mode 100644 3rdparty/zlib-ng/patches/zlib-ng-2.2.1-detect-intrinsics.patch create mode 100644 3rdparty/zlib-ng/patches/zlib-ng-2.2.1.patch create mode 100644 3rdparty/zlib-ng/win32/Makefile.a64 create mode 100644 3rdparty/zlib-ng/win32/Makefile.arm create mode 100644 3rdparty/zlib-ng/win32/Makefile.msc create mode 100644 3rdparty/zlib-ng/win32/depcheck.cpp create mode 100644 3rdparty/zlib-ng/win32/replace.vbs create mode 100644 3rdparty/zlib-ng/win32/zlib-ng.def.in create mode 100644 3rdparty/zlib-ng/win32/zlib-ng1.rc create mode 100644 3rdparty/zlib-ng/win32/zlib.def.in create mode 100644 3rdparty/zlib-ng/win32/zlib1.rc create mode 100644 3rdparty/zlib-ng/win32/zlibcompat.def.in create mode 100644 3rdparty/zlib-ng/zconf-ng.h.in create mode 100644 3rdparty/zlib-ng/zlib.map diff --git a/3rdparty/zlib-ng/CMakeLists.txt b/3rdparty/zlib-ng/CMakeLists.txt index c05511ca87..83e6dac542 100644 --- a/3rdparty/zlib-ng/CMakeLists.txt +++ b/3rdparty/zlib-ng/CMakeLists.txt @@ -1,12 +1,38 @@ -project(${ZLIB_LIBRARY} LANGUAGES C) - -if("c_std_11" IN_LIST CMAKE_C_COMPILE_FEATURES) - set(CMAKE_C_STANDARD 11) # The C standard whose features are requested to build this target +cmake_minimum_required(VERSION 3.5.1) +if(CMAKE_VERSION VERSION_LESS 3.12) + cmake_policy(VERSION ${CMAKE_VERSION}) else() - set(CMAKE_C_STANDARD 99) + cmake_policy(VERSION 3.5.1...3.29.0) endif() -set(CMAKE_C_STANDARD_REQUIRED ON) # Boolean describing whether the value of C_STANDARD is a requirement -set(CMAKE_C_EXTENSIONS OFF) # Boolean specifying whether compiler specific extensions are requested +message(STATUS "Using CMake version ${CMAKE_VERSION}") + +# If not specified on the command line, enable C11 as the default +# Configuration items that affect the global compiler environment standards +# should be issued before the "project" command. +if(NOT CMAKE_C_STANDARD) + set(CMAKE_C_STANDARD 11) # The C standard whose features are requested to build this target +endif() +if(NOT CMAKE_C_STANDARD_REQUIRED) + set(CMAKE_C_STANDARD_REQUIRED ON) # Boolean describing whether the value of C_STANDARD is a requirement +endif() +if(NOT CMAKE_C_EXTENSIONS) + set(CMAKE_C_EXTENSIONS OFF) # Boolean specifying whether compiler specific extensions are requested +endif() +set(VALID_C_STANDARDS "99" "11") +if(NOT CMAKE_C_STANDARD IN_LIST VALID_C_STANDARDS) + MESSAGE(FATAL_ERROR "CMAKE_C_STANDARD:STRING=${CMAKE_C_STANDARD} not in known standards list\n ${VALID_C_STANDARDS}") +endif() + +# Parse the full version number from zlib.h.in and include in ZLIB_FULL_VERSION +file(READ ${CMAKE_CURRENT_SOURCE_DIR}/zlib.h.in _zlib_h_contents) +string(REGEX REPLACE ".*#define[ \t]+ZLIB_VERSION[ \t]+\"([0-9]+.[0-9]+.[0-9]+).*\".*" + "\\1" ZLIB_HEADER_VERSION ${_zlib_h_contents}) +string(REGEX REPLACE ".*#define[ \t]+ZLIBNG_VERSION[ \t]+\"([-0-9A-Za-z.]+)\".*" + "\\1" ZLIBNG_HEADER_VERSION ${_zlib_h_contents}) +message(STATUS "ZLIB_HEADER_VERSION: ${ZLIB_HEADER_VERSION}") +message(STATUS "ZLIBNG_HEADER_VERSION: ${ZLIBNG_HEADER_VERSION}") + +project(zlib VERSION ${ZLIB_HEADER_VERSION} LANGUAGES C) include(CheckTypeSize) include(CheckSymbolExists) @@ -16,142 +42,325 @@ include(CheckCSourceCompiles) include(CheckCSourceRuns) include(CheckCCompilerFlag) include(CMakeDependentOption) +include(CMakePackageConfigHelpers) +include(FeatureSummary) -if(X86_64 OR X86) - set(BASEARCH_X86_FOUND TRUE) -endif() -if(AARCH64 OR ARM) - set(BASEARCH_ARM_FOUND TRUE) -endif() -if(PPC64LE OR PPC64) - set(BASEARCH_PPC_FOUND TRUE) -endif() -if(RISCV) - set(BASEARCH_RISCV_FOUND TRUE) -endif() - +include(cmake/detect-arch.cmake) +include(cmake/detect-install-dirs.cmake) +include(cmake/detect-coverage.cmake) include(cmake/detect-intrinsics.cmake) +include(cmake/detect-sanitizer.cmake) include(cmake/fallback-macros.cmake) -set(ZLIB_SYMBOL_PREFIX "") - -if(BASEARCH_X86_FOUND) - set(WITH_AVX2 ON) - set(WITH_AVX512 ON) - set(WITH_AVX512VNNI ON) - set(WITH_SSE2 ON) - set(WITH_SSSE3 ON) - set(WITH_SSE42 ON) - set(WITH_PCLMULQDQ ON) - set(WITH_VPCLMULQDQ ON) +if(CMAKE_TOOLCHAIN_FILE) + message(STATUS "Using CMake toolchain: ${CMAKE_TOOLCHAIN_FILE}") endif() + +# Make sure we use an appropriate BUILD_TYPE by default, "Release" to be exact +# this should select the maximum generic optimisation on the current platform (i.e. -O3 for gcc/clang) +get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) +if(NOT GENERATOR_IS_MULTI_CONFIG) + if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING + "Choose the type of build, standard options are: Debug Release RelWithDebInfo MinSizeRel." + FORCE) + add_feature_info(CMAKE_BUILD_TYPE 1 "Build type: ${CMAKE_BUILD_TYPE} (default)") + else() + add_feature_info(CMAKE_BUILD_TYPE 1 "Build type: ${CMAKE_BUILD_TYPE} (selected)") + endif() +endif() + +# +# Options parsing +# +option(WITH_GZFILEOP "Compile with support for gzFile related functions" ON) +option(ZLIB_COMPAT "Compile with zlib compatible API" ON) +option(ZLIB_ENABLE_TESTS "Build test binaries" OFF) +option(ZLIBNG_ENABLE_TESTS "Test zlib-ng specific API" OFF) +option(WITH_GTEST "Build gtest_zlib" OFF) +option(WITH_FUZZERS "Build test/fuzz" OFF) +option(WITH_BENCHMARKS "Build test/benchmarks" OFF) +option(WITH_BENCHMARK_APPS "Build application benchmarks" OFF) +option(WITH_OPTIM "Build with optimisation" ON) +option(WITH_REDUCED_MEM "Reduced memory usage for special cases (reduces performance)" OFF) +option(WITH_NEW_STRATEGIES "Use new strategies" ON) +option(WITH_NATIVE_INSTRUCTIONS + "Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)" OFF) +option(WITH_RUNTIME_CPU_DETECTION "Build with runtime detection of CPU architecture" ON) +option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF) +option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF) +option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF) +option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances" OFF) +option(WITH_UNALIGNED "Support unaligned reads on platforms that support it" ON) + +set(ZLIB_SYMBOL_PREFIX "" CACHE STRING "Give this prefix to all publicly exported symbols. +Useful when embedding into a larger library. +Default is no prefix (empty prefix).") + +# Add multi-choice option +set(WITH_SANITIZER AUTO CACHE STRING "Enable sanitizer support") +set_property(CACHE WITH_SANITIZER PROPERTY STRINGS "Memory" "Address" "Undefined" "Thread") + if(BASEARCH_ARM_FOUND) - set(WITH_ACLE ON) - set(WITH_NEON ON) - if(ARM) - set(WITH_ARMV6 ON) - else() - set(WITH_ARMV6 OFF) - endif() -endif() -if(BASEARCH_PPC_FOUND) - set(WITH_ALTIVEC ON) - set(WITH_POWER8 ON) - set(WITH_POWER9 ON) -endif() -if(BASEARCH_RISCV_FOUND) - set(WITH_RVV ON) + option(WITH_ACLE "Build with ACLE" ON) + option(WITH_NEON "Build with NEON intrinsics" ON) + cmake_dependent_option(WITH_ARMV6 "Build with ARMv6 SIMD" ON "NOT ARCH STREQUAL \"aarch64\"" OFF) +elseif(BASEARCH_PPC_FOUND) + option(WITH_ALTIVEC "Build with AltiVec (VMX) optimisations for PowerPC" ON) + option(WITH_POWER8 "Build with optimisations for POWER8" ON) + option(WITH_POWER9 "Build with optimisations for POWER9" ON) +elseif(BASEARCH_RISCV_FOUND) + option(WITH_RVV "Build with RVV intrinsics" ON) +elseif(BASEARCH_S360_FOUND) + option(WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z" OFF) + option(WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z" OFF) + option(WITH_CRC32_VX "Build with vectorized CRC32 on IBM Z" ON) +elseif(BASEARCH_X86_FOUND) + option(WITH_SSE2 "Build with SSE2" ON) + cmake_dependent_option(WITH_SSSE3 "Build with SSSE3" ON "WITH_SSE2" OFF) + cmake_dependent_option(WITH_SSE42 "Build with SSE42" ON "WITH_SSSE3" OFF) + cmake_dependent_option(WITH_PCLMULQDQ "Build with PCLMULQDQ" ON "WITH_SSE42" OFF) + cmake_dependent_option(WITH_AVX2 "Build with AVX2" ON "WITH_SSE42" OFF) + cmake_dependent_option(WITH_AVX512 "Build with AVX512" ON "WITH_AVX2" OFF) + cmake_dependent_option(WITH_AVX512VNNI "Build with AVX512 VNNI extensions" ON "WITH_AVX512" OFF) + cmake_dependent_option(WITH_VPCLMULQDQ "Build with VPCLMULQDQ" ON "WITH_PCLMULQDQ;WITH_AVX512" OFF) endif() +option(INSTALL_UTILS "Copy minigzip and minideflate during install" OFF) -add_definitions(-DZLIB_COMPAT) +set(ZLIB_BUILD_SHARED_LIBS OFF) +set(SKIP_INSTALL_ALL ON) +ocv_warnings_disable(CMAKE_C_FLAGS -Wmissing-prototypes -Wmissing-declarations -Wundef -Wstrict-prototypes -Wtype-limits) +ocv_warnings_disable(CMAKE_C_FLAGS /wd4819 /wd4244 /wd4334) -add_definitions(-DWITH_GZFILEOP) +mark_as_advanced(FORCE + ZLIB_SYMBOL_PREFIX + WITH_REDUCED_MEM + WITH_ACLE WITH_NEON + WITH_ARMV6 + WITH_DFLTCC_DEFLATE + WITH_DFLTCC_INFLATE + WITH_CRC32_VX + WITH_AVX2 WITH_SSE2 + WITH_SSSE3 WITH_SSE42 + WITH_PCLMULQDQ + WITH_ALTIVEC + WITH_POWER8 + WITH_POWER9 + WITH_RVV + WITH_INFLATE_STRICT + WITH_INFLATE_ALLOW_INVALID_DIST + WITH_UNALIGNED + INSTALL_UTILS + ) + +if(ZLIB_COMPAT) + add_definitions(-DZLIB_COMPAT) + set(WITH_GZFILEOP ON) + set(SUFFIX "") + set(ZLIB_FULL_VERSION ${ZLIB_HEADER_VERSION}.zlib-ng) + set(EXPORT_NAME ZLIB) +else() + set(SUFFIX "-ng") + set(ZLIB_FULL_VERSION ${ZLIBNG_HEADER_VERSION}) + set(EXPORT_NAME zlib-ng) +endif() + +if(WITH_GZFILEOP) + add_definitions(-DWITH_GZFILEOP) +endif() if(CMAKE_C_COMPILER_ID MATCHES "^Intel") - set(WARNFLAGS_DISABLE) + if(CMAKE_HOST_UNIX) + set(WARNFLAGS -Wall) + set(WARNFLAGS_MAINTAINER -Wall -Wcheck -Wremarks) + set(WARNFLAGS_DISABLE) + else() + set(WARNFLAGS /Wall) + set(WARNFLAGS_MAINTAINER /W5) + set(WARNFLAGS_DISABLE) + endif() + check_c_compiler_flag(-diag-disable=10441 HAVE_DIAG_10441) + if(HAVE_DIAG_10441) + list(APPEND WARNFLAGS_DISABLE "-diag-disable=10441") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -diag-disable=10441") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -diag-disable=10441") + endif() elseif(MSVC) - # Minimum supported MSVC version is 1800 = Visual Studio 12.0/2013 - # See also https://cmake.org/cmake/help/latest/variable/MSVC_VERSION.html - if(MSVC_VERSION VERSION_LESS 1800) - message(SEND_ERROR "Unsupported Visual Studio compiler version (requires 2013 or later).") - endif() - # TODO. ICC can be used through MSVC. I'm not sure if we'd ever see that combination - # (who'd use cmake from an IDE...) but checking for ICC before checking for MSVC should - # avoid mistakes. - # /Oi ? - set(WARNFLAGS_DISABLE) - if(BASEARCH_ARM_FOUND) - add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) - if(NOT "${ARCH}" MATCHES "aarch64") - set(NEONFLAG "/arch:VFPv4") - endif() - endif() -elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - set(WARNFLAGS_DISABLE) - # Check whether -fno-lto is available - set(CMAKE_REQUIRED_FLAGS "-fno-lto") - check_c_source_compiles( - "int main() { return 0; }" - FNO_LTO_AVAILABLE FAIL_REGEX "not supported") - set(CMAKE_REQUIRED_FLAGS) - if(FNO_LTO_AVAILABLE) - set(ZNOLTOFLAG "-fno-lto") - endif() - if(BASEARCH_ARM_FOUND) - if(ARM AND NOT CMAKE_C_FLAGS MATCHES "-mfloat-abi") - # Auto-detect support for ARM floating point ABI - check_include_file(features.h HAVE_FEATURES_H) - if(HAVE_FEATURES_H) - set(CMAKE_REQUIRED_FLAGS -mfloat-abi=softfp) - check_c_source_compiles( - "#include - int main() { return 0; }" - HAVE_FLOATABI_SOFTFP) - if(HAVE_FLOATABI_SOFTFP) - set(FLOATABI -mfloat-abi=softfp) - else() - set(CMAKE_REQUIRED_FLAGS -mfloat-abi=hard) - check_c_source_compiles( - "#include - int main() { return 0; }" - HAVE_FLOATABI_HARD) - if(HAVE_FLOATABI_HARD) - set(FLOATABI -mfloat-abi=hard) - endif() + # Minimum supported MSVC version is 1800 = Visual Studio 12.0/2013 + # See also https://cmake.org/cmake/help/latest/variable/MSVC_VERSION.html + if(MSVC_VERSION VERSION_LESS 1800) + message(SEND_ERROR "Unsupported Visual Studio compiler version (requires 2013 or later).") + endif() + # TODO. ICC can be used through MSVC. I'm not sure if we'd ever see that combination + # (who'd use cmake from an IDE...) but checking for ICC before checking for MSVC should + # avoid mistakes. + # /Oi ? + set(WARNFLAGS /W3) + set(WARNFLAGS_MAINTAINER /W4) + set(WARNFLAGS_DISABLE) + if(BASEARCH_ARM_FOUND) + add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) + if(NOT "${ARCH}" MATCHES "aarch64") + set(NEONFLAG "/arch:VFPv4") endif() - set(CMAKE_REQUIRED_FLAGS) - endif() - if(FLOATABI) - message(STATUS "${ZLIB_LIBRARY} ARM floating point arch: ${FLOATABI}") - add_compile_options(${FLOATABI}) - else() - message(STATUS "${ZLIB_LIBRARY} ARM floating point arch not auto-detected") - endif() endif() - endif() - if(FNO_LTO_AVAILABLE) - set(NOLTOFLAG ${ZNOLTOFLAG}) - endif() - if(MINGW) - # Add `-Wno-pedantic-ms-format` only if the toolchain supports it - check_c_compiler_flag(-Wno-pedantic-ms-format HAVE_NO_PEDANTIC_MS_FORMAT) - if(HAVE_NO_PEDANTIC_MS_FORMAT) - list(APPEND WARNFLAGS_DISABLE -Wno-pedantic-ms-format) +elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") + # Enable warnings in GCC and Clang + set(WARNFLAGS -Wall) + set(WARNFLAGS_MAINTAINER -Wextra) + set(WARNFLAGS_DISABLE) + # Check whether -fno-lto is available + set(CMAKE_REQUIRED_FLAGS "-fno-lto") + check_c_source_compiles( + "int main() { return 0; }" + FNO_LTO_AVAILABLE FAIL_REGEX "not supported") + set(CMAKE_REQUIRED_FLAGS) + if(FNO_LTO_AVAILABLE) + set(ZNOLTOFLAG "-fno-lto") + endif() + if(NOT WITH_NATIVE_INSTRUCTIONS) + if(BASEARCH_ARM_FOUND) + if("${ARCH}" MATCHES "arm" AND NOT CMAKE_C_FLAGS MATCHES "-mfloat-abi") + # Auto-detect support for ARM floating point ABI + check_include_file(features.h HAVE_FEATURES_H) + if(HAVE_FEATURES_H) + set(CMAKE_REQUIRED_FLAGS -mfloat-abi=softfp) + check_c_source_compiles( + "#include + int main() { return 0; }" + HAVE_FLOATABI_SOFTFP) + if(HAVE_FLOATABI_SOFTFP) + set(FLOATABI -mfloat-abi=softfp) + else() + set(CMAKE_REQUIRED_FLAGS -mfloat-abi=hard) + check_c_source_compiles( + "#include + int main() { return 0; }" + HAVE_FLOATABI_HARD) + if(HAVE_FLOATABI_HARD) + set(FLOATABI -mfloat-abi=hard) + endif() + endif() + set(CMAKE_REQUIRED_FLAGS) + endif() + if(FLOATABI) + message(STATUS "ARM floating point arch: ${FLOATABI}") + add_compile_options(${FLOATABI}) + else() + message(STATUS "ARM floating point arch not auto-detected") + endif() + endif() + endif() + # Disable LTO unless Native Instructions are enabled + if(FNO_LTO_AVAILABLE) + set(NOLTOFLAG ${ZNOLTOFLAG}) + endif() + endif() + if(MINGW) + # Add `-Wno-pedantic-ms-format` only if the toolchain supports it + check_c_compiler_flag(-Wno-pedantic-ms-format HAVE_NO_PEDANTIC_MS_FORMAT) + if(HAVE_NO_PEDANTIC_MS_FORMAT) + list(APPEND WARNFLAGS_DISABLE -Wno-pedantic-ms-format) + endif() endif() - endif() endif() -# Force disable LTO -set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF) +# Set native march/mcpu +if(WITH_NATIVE_INSTRUCTIONS) + if(NATIVE_ARCH_OVERRIDE) + message(STATUS "WARNING: WITH_NATIVE_INSTRUCTIONS enabled, but running with NATIVE_ARCH_OVERRIDE: ${NATIVE_ARCH_OVERRIDE}") + set(NATIVEFLAG "${NATIVE_ARCH_OVERRIDE}") + else() + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") + check_c_compiler_flag(-march=native HAVE_MARCH_NATIVE) + if(HAVE_MARCH_NATIVE) + set(NATIVEFLAG "-march=native") + else() + check_c_compiler_flag(-mcpu=native HAVE_MCPU_NATIVE) + if(HAVE_MCPU_NATIVE) + set(NATIVEFLAG "-mcpu=native") + endif() + endif() + # Fall through + endif() + endif() + if(NATIVEFLAG) + # Apply flags to all source files and compilation checks + if(WIN32) + separate_arguments(NATIVEOPTIONS WINDOWS_COMMAND "${NATIVEFLAG}") + else() + separate_arguments(NATIVEOPTIONS UNIX_COMMAND "${NATIVEFLAG}") + endif() + add_compile_options(${NATIVEOPTIONS}) + set(WITH_RUNTIME_CPU_DETECTION OFF) + else() + message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration") + set(WITH_NATIVE_INSTRUCTIONS OFF) + endif() +endif() + +# Compile without functable or CPU detection +if(NOT WITH_RUNTIME_CPU_DETECTION) + if(MSVC AND BASEARCH_X86_FOUND) + message(STATUS "WARNING: Microsoft Visual Studio does not support compile time detection of CPU features for \"/arch\" before \"AVX\"") + # Workaround for MSVC. By default MSVC does not define the __SSE*__ macros. + # Fix it if AVX is enabled. + set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}") + check_c_source_compiles( + "#ifndef __AVX__ + # error \"AVX is not enabled.\" + #endif + int main(void) { return 0; }" + MSVC_IS_ENABLED_AVX + ) + set(CMAKE_REQUIRED_FLAGS) + if(MSVC_IS_ENABLED_AVX) + add_definitions( + -D__SSE__=1 + -D__SSE2__=1 + -D__SSE3__=1 + -D__SSSE3__=1 + -D__SSE4_1__=1 + -D__SSE4_2__=1 + -D__PCLMUL__=1 + ) + endif() + endif() + add_definitions(-DDISABLE_RUNTIME_CPU_DETECTION) +endif() + +# Force disable LTO if WITH_NATIVE_INSTRUCTIONS is not active +if(NOT WITH_NATIVE_INSTRUCTIONS) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF) + foreach(_cfg_name IN LISTS CMAKE_CONFIGURATION_TYPES) + string(TOUPPER "${_cfg_name}" _cfg_name_uc) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_${_cfg_name_uc} OFF) + endforeach() +endif() + +# Set architecture alignment requirements +if(NOT WITH_UNALIGNED) + add_definitions(-DNO_UNALIGNED) + message(STATUS "Unaligned reads manually disabled") +endif() # Apply warning compiler flags -add_compile_options(${WARNFLAGS_DISABLE}) +if(WITH_MAINTAINER_WARNINGS) + add_compile_options(${WARNFLAGS} ${WARNFLAGS_MAINTAINER} ${WARNFLAGS_DISABLE}) +else() + add_compile_options(${WARNFLAGS} ${WARNFLAGS_DISABLE}) +endif() + +# Set code coverage compiler flags +if(WITH_CODE_COVERAGE) + add_code_coverage() +endif() # Replace optimization level 3 added by default with level 2 -if(NOT MSVC AND NOT CMAKE_C_FLAGS MATCHES "([\\/\\-]O)3") - string(REGEX REPLACE "([\\/\\-]O)3" "\\12" - CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}") +if(NOT WITH_CODE_COVERAGE AND NOT MSVC AND NOT CMAKE_C_FLAGS MATCHES "([\\/\\-]O)3") + string(REGEX REPLACE "([\\/\\-]O)3" "\\12" + CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}") endif() # @@ -159,32 +368,40 @@ endif() # check_include_file(arm_acle.h HAVE_ARM_ACLE_H) if(HAVE_ARM_ACLE_H) - add_definitions(-DHAVE_ARM_ACLE_H) + add_definitions(-DHAVE_ARM_ACLE_H) endif() check_include_file(sys/auxv.h HAVE_SYS_AUXV_H) if(HAVE_SYS_AUXV_H) - add_definitions(-DHAVE_SYS_AUXV_H) + add_definitions(-DHAVE_SYS_AUXV_H) endif() check_include_file(sys/sdt.h HAVE_SYS_SDT_H) if(HAVE_SYS_SDT_H) - add_definitions(-DHAVE_SYS_SDT_H) + add_definitions(-DHAVE_SYS_SDT_H) endif() check_include_file(unistd.h HAVE_UNISTD_H) +# +# Check for Linux includes +# +check_include_file(linux/auxvec.h HAVE_LINUX_AUXVEC_H) +if(HAVE_LINUX_AUXVEC_H) + add_definitions(-DHAVE_LINUX_AUXVEC_H) +endif() + # # Check to see if we have large file support # set(CMAKE_REQUIRED_DEFINITIONS -D_LARGEFILE64_SOURCE=1 -D__USE_LARGEFILE64) check_type_size(off64_t OFF64_T) if(HAVE_OFF64_T) - add_definitions(-D_LARGEFILE64_SOURCE=1 -D__USE_LARGEFILE64) -else() - check_type_size(_off64_t _OFF64_T) - if(HAVE__OFF64_T) add_definitions(-D_LARGEFILE64_SOURCE=1 -D__USE_LARGEFILE64) - else() - check_type_size(__off64_t __OFF64_T) - endif() +else() + check_type_size(_off64_t _OFF64_T) + if(HAVE__OFF64_T) + add_definitions(-D_LARGEFILE64_SOURCE=1 -D__USE_LARGEFILE64) + else() + check_type_size(__off64_t __OFF64_T) + endif() endif() set(CMAKE_REQUIRED_DEFINITIONS) # clear variable @@ -193,499 +410,676 @@ set(CMAKE_REQUIRED_DEFINITIONS) # clear variable # check_function_exists(fseeko HAVE_FSEEKO) if(NOT HAVE_FSEEKO) - add_definitions(-DNO_FSEEKO) + add_definitions(-DNO_FSEEKO) endif() check_function_exists(strerror HAVE_STRERROR) if(NOT HAVE_STRERROR) - add_definitions(-DNO_STRERROR) + add_definitions(-DNO_STRERROR) endif() set(CMAKE_REQUIRED_DEFINITIONS -D_POSIX_C_SOURCE=200112L) check_symbol_exists(posix_memalign stdlib.h HAVE_POSIX_MEMALIGN) if(HAVE_POSIX_MEMALIGN) - add_definitions(-DHAVE_POSIX_MEMALIGN) + add_definitions(-DHAVE_POSIX_MEMALIGN) endif() set(CMAKE_REQUIRED_DEFINITIONS) set(CMAKE_REQUIRED_DEFINITIONS -D_ISOC11_SOURCE=1) check_symbol_exists(aligned_alloc stdlib.h HAVE_ALIGNED_ALLOC) if(HAVE_ALIGNED_ALLOC) - add_definitions(-DHAVE_ALIGNED_ALLOC) + add_definitions(-DHAVE_ALIGNED_ALLOC) endif() set(CMAKE_REQUIRED_DEFINITIONS) +if(WITH_SANITIZER STREQUAL "Address") + add_address_sanitizer() +elseif(WITH_SANITIZER STREQUAL "Memory") + add_memory_sanitizer() +elseif(WITH_SANITIZER STREQUAL "Thread") + add_thread_sanitizer() +elseif(WITH_SANITIZER STREQUAL "Undefined") + add_undefined_sanitizer() +endif() + +# +# Check whether compiler supports -fno-semantic-interposition parameter +# +check_c_compiler_flag(-fno-semantic-interposition HAVE_NO_INTERPOSITION) + # # Check if we can hide zlib internal symbols that are linked between separate source files using hidden # check_c_source_compiles( - "#define Z_INTERNAL __attribute__((visibility (\"hidden\"))) - int Z_INTERNAL foo; - int main() { - return 0; - }" - HAVE_ATTRIBUTE_VISIBILITY_HIDDEN FAIL_REGEX "visibility") + "#define Z_INTERNAL __attribute__((visibility (\"hidden\"))) + int Z_INTERNAL foo; + int main() { + return 0; + }" + HAVE_ATTRIBUTE_VISIBILITY_HIDDEN FAIL_REGEX "visibility") if(HAVE_ATTRIBUTE_VISIBILITY_HIDDEN) - add_definitions(-DHAVE_VISIBILITY_HIDDEN) + add_definitions(-DHAVE_VISIBILITY_HIDDEN) endif() # # Check if we can hide zlib internal symbols that are linked between separate source files using internal # check_c_source_compiles( - "#define Z_INTERNAL __attribute__((visibility (\"internal\"))) - int Z_INTERNAL foo; - int main() { - return 0; - }" - HAVE_ATTRIBUTE_VISIBILITY_INTERNAL FAIL_REGEX "visibility") + "#define Z_INTERNAL __attribute__((visibility (\"internal\"))) + int Z_INTERNAL foo; + int main() { + return 0; + }" + HAVE_ATTRIBUTE_VISIBILITY_INTERNAL FAIL_REGEX "visibility") if(HAVE_ATTRIBUTE_VISIBILITY_INTERNAL) - add_definitions(-DHAVE_VISIBILITY_INTERNAL) + add_definitions(-DHAVE_VISIBILITY_INTERNAL) endif() # # Check for __attribute__((aligned(x))) support in the compiler # check_c_source_compiles( - "int main(void) { - __attribute__((aligned(8))) int test = 0; - (void)test; - return 0; - }" - HAVE_ATTRIBUTE_ALIGNED FAIL_REGEX "aligned") + "int main(void) { + __attribute__((aligned(8))) int test = 0; + (void)test; + return 0; + }" + HAVE_ATTRIBUTE_ALIGNED FAIL_REGEX "aligned") if(HAVE_ATTRIBUTE_ALIGNED) - add_definitions(-DHAVE_ATTRIBUTE_ALIGNED) + add_definitions(-DHAVE_ATTRIBUTE_ALIGNED) +endif() + +# +# Check for __builtin_assume_aligned(x,n) support in the compiler +# +check_c_source_compiles( + "char *test(char *buffer) { + char *abuffer = __builtin_assume_aligned(buffer,64); + return abuffer; + } + int main() { + return 0; + }" + HAVE_BUILTIN_ASSUME_ALIGNED) +if(HAVE_BUILTIN_ASSUME_ALIGNED) + add_definitions(-DHAVE_BUILTIN_ASSUME_ALIGNED) endif() # # check for __builtin_ctz() support in the compiler # check_c_source_compiles( - "int main(void) { - unsigned int zero = 0; - long test = __builtin_ctz(zero); - (void)test; - return 0; - }" - HAVE_BUILTIN_CTZ + "int main(void) { + unsigned int zero = 0; + long test = __builtin_ctz(zero); + (void)test; + return 0; + }" + HAVE_BUILTIN_CTZ ) if(HAVE_BUILTIN_CTZ) - add_definitions(-DHAVE_BUILTIN_CTZ) + add_definitions(-DHAVE_BUILTIN_CTZ) endif() # # check for __builtin_ctzll() support in the compiler # check_c_source_compiles( - "int main(void) { - unsigned int zero = 0; - long test = __builtin_ctzll(zero); - (void)test; - return 0; - }" - HAVE_BUILTIN_CTZLL + "int main(void) { + unsigned int zero = 0; + long test = __builtin_ctzll(zero); + (void)test; + return 0; + }" + HAVE_BUILTIN_CTZLL ) if(HAVE_BUILTIN_CTZLL) - add_definitions(-DHAVE_BUILTIN_CTZLL) + add_definitions(-DHAVE_BUILTIN_CTZLL) endif() # # check for ptrdiff_t support # check_c_source_compiles( - "#include - int main() { - ptrdiff_t *a; - (void)a; - return 0; - }" - HAVE_PTRDIFF_T + "#include + int main() { + ptrdiff_t *a; + (void)a; + return 0; + }" + HAVE_PTRDIFF_T ) if(NOT HAVE_PTRDIFF_T) - set(NEED_PTRDIFF_T 1) + set(NEED_PTRDIFF_T 1) - check_type_size("void *" SIZEOF_DATA_PTR) - message(STATUS "sizeof(void *) is ${SIZEOF_DATA_PTR} bytes") + check_type_size("void *" SIZEOF_DATA_PTR) + message(STATUS "sizeof(void *) is ${SIZEOF_DATA_PTR} bytes") - if(${SIZEOF_DATA_PTR} MATCHES "4") - set(PTRDIFF_TYPE "uint32_t") - elseif(${SIZEOF_DATA_PTR} MATCHES "8") - set(PTRDIFF_TYPE "uint64_t") - else() - message(FATAL_ERROR "sizeof(void *) is neither 32 nor 64 bit") - endif() + if(${SIZEOF_DATA_PTR} MATCHES "4") + set(PTRDIFF_TYPE "uint32_t") + elseif(${SIZEOF_DATA_PTR} MATCHES "8") + set(PTRDIFF_TYPE "uint64_t") + else() + message(FATAL_ERROR "sizeof(void *) is neither 32 nor 64 bit") + endif() endif() +add_compile_options($<$:-DZLIB_DEBUG>) + if(MSVC) - add_definitions(-D_CRT_SECURE_NO_DEPRECATE) - add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE) + set(CMAKE_DEBUG_POSTFIX "d") + add_definitions(-D_CRT_SECURE_NO_DEPRECATE) + add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE) endif() +if(BASEARCH_X86_FOUND) + # FORCE_SSE2 option will only be shown if HAVE_SSE2_INTRIN is true + if("${ARCH}" MATCHES "i[3-6]86") + cmake_dependent_option(FORCE_SSE2 "Always assume CPU is SSE2 capable" OFF "HAVE_SSE2_INTRIN" OFF) + endif() +endif() + +# +# Enable deflate_quick at level 1 +# +if(NOT WITH_NEW_STRATEGIES) + add_definitions(-DNO_QUICK_STRATEGY) +endif() +# +# Enable deflate_medium at level 4-6 +# +if(NOT WITH_NEW_STRATEGIES) + add_definitions(-DNO_MEDIUM_STRATEGY) +endif() +# +# Enable inflate compilation options +# +if(WITH_INFLATE_STRICT) + add_definitions(-DINFLATE_STRICT) + message(STATUS "Inflate strict distance checking enabled") +endif() +if(WITH_INFLATE_ALLOW_INVALID_DIST) + add_definitions(-DINFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR) + message(STATUS "Inflate zero data for invalid distances enabled") +endif() +# +# Enable reduced memory configuration +# +if(WITH_REDUCED_MEM) + add_definitions(-DHASH_SIZE=32768u -DGZBUFSIZE=8192 -DNO_LIT_MEM) + message(STATUS "Configured for reduced memory environment") +endif() + +set(GENERIC_ARCHDIR "arch/generic") + set(ZLIB_ARCH_SRCS) -set(ZLIB_ARCH_HDRS) -set(ARCHDIR "arch/generic") -if(BASEARCH_X86_FOUND) - set(ARCHDIR "arch/x86") -endif() +set(ZLIB_ARCH_HDRS ${GENERIC_ARCHDIR}/generic_functions.h) + if(BASEARCH_ARM_FOUND) - set(ARCHDIR "arch/arm") -endif() -if(BASEARCH_PPC_FOUND) - set(ARCHDIR "arch/power") -endif() -if(BASEARCH_RISCV_FOUND) - set(ARCHDIR "arch/riscv") + set(ARCHDIR "arch/arm") +elseif(BASEARCH_PPC_FOUND) + set(ARCHDIR "arch/power") +elseif(BASEARCH_RISCV_FOUND) + set(ARCHDIR "arch/riscv") +elseif(BASEARCH_S360_FOUND) + set(ARCHDIR "arch/s390") +elseif(BASEARCH_X86_FOUND) + set(ARCHDIR "arch/x86") + if(NOT ${ARCH} MATCHES "x86_64") + add_feature_info(SSE2 1 "Support the SSE2 instruction set, using \"${SSE2FLAG}\"") + endif() +else() + set(ARCHDIR ${GENERIC_ARCHDIR}) + message(STATUS "No optimized architecture: using ${ARCHDIR}") endif() -if(NOT CV_DISABLE_OPTIMIZATION) - if(BASEARCH_ARM_FOUND) - add_definitions(-DARM_FEATURES) - if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") - if("${ARCH}" MATCHES "aarch64") - check_c_source_compiles( - "#include - int main() { - return (getauxval(AT_HWCAP) & HWCAP_CRC32); - }" - ARM_AUXV_HAS_CRC32 - ) - if(ARM_AUXV_HAS_CRC32) - add_definitions(-DARM_AUXV_HAS_CRC32) - else() - message(STATUS "HWCAP_CRC32 not present in sys/auxv.h; cannot detect support at runtime.") +if(WITH_OPTIM) + if(BASEARCH_ARM_FOUND) + add_definitions(-DARM_FEATURES) + if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + if("${ARCH}" MATCHES "aarch64") + check_c_source_compiles( + "#include + int main() { + return (getauxval(AT_HWCAP) & HWCAP_CRC32); + }" + ARM_AUXV_HAS_CRC32 + ) + if(ARM_AUXV_HAS_CRC32) + add_definitions(-DARM_AUXV_HAS_CRC32) + else() + message(STATUS "HWCAP_CRC32 not present in sys/auxv.h; cannot detect support at runtime.") + endif() + else() + check_c_source_compiles( + "#include + int main() { + return (getauxval(AT_HWCAP2) & HWCAP2_CRC32); + }" + ARM_AUXV_HAS_CRC32 + ) + if(ARM_AUXV_HAS_CRC32) + add_definitions(-DARM_AUXV_HAS_CRC32) + else() + check_c_source_compiles( + "#include + #include + int main() { + return (getauxval(AT_HWCAP2) & HWCAP2_CRC32); + }" + ARM_HWCAP_HAS_CRC32 + ) + if (ARM_HWCAP_HAS_CRC32) + add_definitions(-DARM_AUXV_HAS_CRC32 -DARM_ASM_HWCAP) + else() + message(STATUS "HWCAP2_CRC32 not present in sys/auxv.h; cannot detect support at runtime.") + endif() + endif() + check_c_source_compiles( + "#include + int main() { + return (getauxval(AT_HWCAP) & HWCAP_ARM_NEON); + }" + ARM_AUXV_HAS_NEON + ) + if(ARM_AUXV_HAS_NEON) + add_definitions(-DARM_AUXV_HAS_NEON) + else() + check_c_source_compiles( + "#include + int main() { + return (getauxval(AT_HWCAP) & HWCAP_NEON); + }" + ARM_AUXV_HAS_NEON + ) + if (ARM_AUXV_HAS_NEON) + add_definitions(-DARM_AUXV_HAS_NEON) + else() + message(STATUS "Neither HWCAP_ARM_NEON or HWCAP_NEON present in sys/auxv.h; cannot detect support at runtime.") + endif() + endif() + endif() endif() - else() - check_c_source_compiles( - "#include - int main() { - return (getauxval(AT_HWCAP2) & HWCAP2_CRC32); - }" - ARM_AUXV_HAS_CRC32 - ) - if(ARM_AUXV_HAS_CRC32) - add_definitions(-DARM_AUXV_HAS_CRC32) - else() - check_c_source_compiles( - "#include - #include - int main() { - return (getauxval(AT_HWCAP2) & HWCAP2_CRC32); - }" - ARM_HWCAP_HAS_CRC32 - ) - if(ARM_HWCAP_HAS_CRC32) - add_definitions(-DARM_AUXV_HAS_CRC32 -DARM_ASM_HWCAP) - else() - message(STATUS "HWCAP2_CRC32 not present in sys/auxv.h; cannot detect support at runtime.") - endif() + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_functions.h) + if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c) endif() - check_c_source_compiles( - "#include - int main() { - return (getauxval(AT_HWCAP) & HWCAP_ARM_NEON); - }" - ARM_AUXV_HAS_NEON - ) - if(ARM_AUXV_HAS_NEON) - add_definitions(-DARM_AUXV_HAS_NEON) + + if(WITH_ACLE) + check_acle_compiler_flag() + if(HAVE_ACLE_FLAG) + add_definitions(-DARM_ACLE) + set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c) + set_property(SOURCE ${ACLE_SRCS} PROPERTY COMPILE_FLAGS "${ACLEFLAG} ${NOLTOFLAG}") + list(APPEND ZLIB_ARCH_SRCS ${ACLE_SRCS}) + add_feature_info(ACLE_CRC 1 "Support ACLE optimized CRC hash generation, using \"${ACLEFLAG}\"") + else() + set(WITH_ACLE OFF) + endif() else() - check_c_source_compiles( - "#include - int main() { - return (getauxval(AT_HWCAP) & HWCAP_NEON); - }" - ARM_AUXV_HAS_NEON - ) - if (ARM_AUXV_HAS_NEON) - add_definitions(-DARM_AUXV_HAS_NEON) - else() - message(STATUS "Neither HWCAP_ARM_NEON or HWCAP_NEON present in sys/auxv.h; cannot detect support at runtime.") - endif() + set(WITH_ACLE OFF) + endif() + if(WITH_NEON) + check_neon_compiler_flag() + if(NEON_AVAILABLE) + add_definitions(-DARM_NEON) + set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c + ${ARCHDIR}/compare256_neon.c ${ARCHDIR}/slide_hash_neon.c) + list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS}) + set_property(SOURCE ${NEON_SRCS} PROPERTY COMPILE_FLAGS "${NEONFLAG} ${NOLTOFLAG}") + if(MSVC) + add_definitions(-D__ARM_NEON__) + endif() + add_feature_info(NEON_ADLER32 1 "Support NEON instructions in adler32, using \"${NEONFLAG}\"") + add_feature_info(NEON_SLIDEHASH 1 "Support NEON instructions in slide_hash, using \"${NEONFLAG}\"") + check_neon_ld4_intrinsics() + if(NEON_HAS_LD4) + add_definitions(-DARM_NEON_HASLD4) + endif() + else() + set(WITH_NEON OFF) + endif() + endif() + if(WITH_ARMV6) + check_armv6_compiler_flag() + if(HAVE_ARMV6_INLINE_ASM OR HAVE_ARMV6_INTRIN) + add_definitions(-DARM_SIMD) + set(ARMV6_SRCS ${ARCHDIR}/slide_hash_armv6.c) + set_property(SOURCE ${ARMV6_SRCS} PROPERTY COMPILE_FLAGS "${ARMV6FLAG} ${NOLTOFLAG}") + list(APPEND ZLIB_ARCH_SRCS ${ARMV6_SRCS}) + add_feature_info(ARMV6 1 "Support ARMv6 SIMD instructions in slide_hash, using \"${ARMV6FLAG}\"") + if(HAVE_ARMV6_INTRIN) + add_definitions(-DARM_SIMD_INTRIN) + endif() + else() + set(WITH_ARMV6 OFF) + endif() + else() + set(WITH_ARMV6 OFF) + endif() + elseif(BASEARCH_PPC_FOUND) + # Common arch detection code + if(WITH_ALTIVEC) + check_ppc_intrinsics() + endif() + if(WITH_POWER8) + check_power8_intrinsics() + endif() + if(WITH_POWER9) + check_power9_intrinsics() + endif() + if(POWER8_NEED_AUXVEC_H OR POWER9_NEED_AUXVEC_H) + add_definitions(-DPOWER_NEED_AUXVEC_H) + endif() + if(HAVE_POWER8_INTRIN OR HAVE_POWER9_INTRIN) + add_definitions(-DPOWER_FEATURES) + endif() + if(HAVE_VMX OR HAVE_POWER8_INTRIN OR HAVE_POWER9_INTRIN) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_functions.h) + if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power_features.c) + endif() + endif() + # VMX specific options and files + if(WITH_ALTIVEC) + if(HAVE_VMX) + add_definitions(-DPPC_FEATURES) + if(HAVE_ALTIVEC) + add_definitions(-DPPC_VMX) + set(PPC_SRCS ${ARCHDIR}/adler32_vmx.c ${ARCHDIR}/slide_hash_vmx.c) + list(APPEND ZLIB_ARCH_SRCS ${PPC_SRCS}) + add_feature_info(ALTIVEC 1 "Support the AltiVec instruction set, using \"-maltivec\"") + set_property(SOURCE ${PPC_SRCS} PROPERTY COMPILE_FLAGS "${PPCFLAGS}") + else() + set(WITH_ALTIVEC OFF) + endif() + endif() + endif() + # Power8 specific options and files + if(WITH_POWER8) + if(HAVE_POWER8_INTRIN) + add_definitions(-DPOWER8_VSX) + set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/chunkset_power8.c ${ARCHDIR}/slide_hash_power8.c) + if("${ARCH}" MATCHES "powerpc64(le)?") + add_definitions(-DPOWER8_VSX_CRC32) + list(APPEND POWER8_SRCS ${ARCHDIR}/crc32_power8.c) + endif() + list(APPEND ZLIB_ARCH_SRCS ${POWER8_SRCS}) + set_property(SOURCE ${POWER8_SRCS} PROPERTY COMPILE_FLAGS "${POWER8FLAG} ${NOLTOFLAG}") + else() + set(WITH_POWER8 OFF) + endif() + endif() + # Power9 specific options and files + if(WITH_POWER9) + if(HAVE_POWER9_INTRIN) + add_definitions(-DPOWER9) + set(POWER9_SRCS ${ARCHDIR}/compare256_power9.c) + list(APPEND ZLIB_ARCH_SRCS ${POWER9_SRCS}) + set_property(SOURCE ${POWER9_SRCS} PROPERTY COMPILE_FLAGS "${POWER9FLAG} ${NOLTOFLAG}") + else() + set(WITH_POWER9 OFF) + endif() + endif() + elseif(BASEARCH_RISCV_FOUND) + if(WITH_RVV) + check_rvv_intrinsics() + if(HAVE_RVV_INTRIN) + add_definitions(-DRISCV_FEATURES) + add_definitions(-DRISCV_RVV) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_functions.h) + if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c) + endif() + # FIXME: we will not set compile flags for riscv_features.c when + # the kernels update hwcap or hwprobe for riscv + set(RVV_SRCS ${ARCHDIR}/adler32_rvv.c ${ARCHDIR}/chunkset_rvv.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c) + if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND RVV_SRCS ${ARCHDIR}/riscv_features.c) + endif() + list(APPEND ZLIB_ARCH_SRCS ${RVV_SRCS}) + set_property(SOURCE ${RVV_SRCS} PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}") + else() + set(WITH_RVV OFF) + endif() + endif() + elseif(BASEARCH_S360_FOUND) + check_s390_intrinsics() + if(HAVE_S390_INTRIN) + add_definitions(-DS390_FEATURES) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_functions.h) + if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_features.h) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/s390_features.c) + endif() + endif() + if(WITH_DFLTCC_DEFLATE) + add_definitions(-DS390_DFLTCC_DEFLATE) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/dfltcc_deflate.c) + endif() + if(WITH_DFLTCC_INFLATE) + add_definitions(-DS390_DFLTCC_INFLATE) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/dfltcc_inflate.c) + endif() + if(WITH_CRC32_VX) + check_vgfma_intrinsics() + if(HAVE_VGFMA_INTRIN) + add_definitions(-DS390_CRC32_VX) + set(CRC32_VX_SRCS ${ARCHDIR}/crc32-vx.c) + list(APPEND ZLIB_ARCH_SRCS ${CRC32_VX_SRCS}) + set_property(SOURCE ${CRC32_VX_SRCS} PROPERTY COMPILE_FLAGS "${VGFMAFLAG} ${NOLTOFLAG}") + else() + set(WITH_CRC32_VX OFF) + endif() + endif() + elseif(BASEARCH_X86_FOUND) + add_definitions(-DX86_FEATURES) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_functions.h) + if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c) endif() - endif() - endif() - list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h) - list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c) - if(WITH_ACLE) - check_acle_compiler_flag() - if(HAVE_ACLE_FLAG) - add_definitions(-DARM_ACLE) - set(ACLE_SRCS ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c) - set_property(SOURCE ${ACLE_SRCS} PROPERTY COMPILE_FLAGS "${ACLEFLAG} ${NOLTOFLAG}") - list(APPEND ZLIB_ARCH_SRCS ${ACLE_SRCS}) - else() - set(WITH_ACLE OFF) - endif() - else() - set(WITH_ACLE OFF) - endif() - if(WITH_NEON) - check_neon_compiler_flag() - if(NEON_AVAILABLE) - add_definitions(-DARM_NEON) - set(NEON_SRCS ${ARCHDIR}/adler32_neon.c ${ARCHDIR}/chunkset_neon.c - ${ARCHDIR}/compare256_neon.c ${ARCHDIR}/slide_hash_neon.c) - list(APPEND ZLIB_ARCH_SRCS ${NEON_SRCS}) - set_property(SOURCE ${NEON_SRCS} PROPERTY COMPILE_FLAGS "${NEONFLAG} ${NOLTOFLAG}") if(MSVC) - add_definitions(-D__ARM_NEON__) + list(APPEND ZLIB_ARCH_HDRS fallback_builtins.h) endif() - check_neon_ld4_intrinsics() - if(NEON_HAS_LD4) - add_definitions(-DARM_NEON_HASLD4) + check_xsave_intrinsics() + if(HAVE_XSAVE_INTRIN) + add_feature_info(XSAVE 1 "Support XSAVE intrinsics using \"${XSAVEFLAG}\"") + if(WITH_RUNTIME_CPU_DETECTION) + set_property(SOURCE ${ARCHDIR}/x86_features.c PROPERTY COMPILE_FLAGS "${XSAVEFLAG}") + endif() + if(NOT (CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 8.2)) + add_definitions(-DX86_HAVE_XSAVE_INTRIN) + endif() endif() - else() - set(WITH_NEON OFF) - endif() - endif() - if(WITH_ARMV6) - check_armv6_compiler_flag() - if(HAVE_ARMV6_INLINE_ASM OR HAVE_ARMV6_INTRIN) - add_definitions(-DARM_SIMD) - set(ARMV6_SRCS ${ARCHDIR}/slide_hash_armv6.c) - set_property(SOURCE ${ARMV6_SRCS} PROPERTY COMPILE_FLAGS "${ARMV6FLAG} ${NOLTOFLAG}") - list(APPEND ZLIB_ARCH_SRCS ${ARMV6_SRCS}) - if(HAVE_ARMV6_INTRIN) - add_definitions(-DARM_SIMD_INTRIN) + if(WITH_SSE2) + check_sse2_intrinsics() + if(HAVE_SSE2_INTRIN) + add_definitions(-DX86_SSE2) + set(SSE2_SRCS ${ARCHDIR}/chunkset_sse2.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c) + list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS}) + if(NOT ${ARCH} MATCHES "x86_64") + set_property(SOURCE ${SSE2_SRCS} PROPERTY COMPILE_FLAGS "${SSE2FLAG} ${NOLTOFLAG}") + add_feature_info(FORCE_SSE2 FORCE_SSE2 "Assume CPU is SSE2 capable") + if(FORCE_SSE2) + add_definitions(-DX86_NOCHECK_SSE2) + endif() + endif() + else() + set(WITH_SSE2 OFF) + endif() endif() - else() - set(WITH_ARMV6 OFF) - endif() - else() - set(WITH_ARMV6 OFF) - endif() - endif() - if(BASEARCH_PPC_FOUND) - # Common arch detection code - if(WITH_ALTIVEC) - check_ppc_intrinsics() - endif() - if(WITH_POWER8) - check_power8_intrinsics() - endif() - if(WITH_POWER9) - check_power9_intrinsics() - endif() - if(HAVE_VMX OR HAVE_POWER8_INTRIN OR HAVE_POWER9_INTRIN) - add_definitions(-DPOWER_FEATURES) - list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h) - list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power_features.c) - endif() - # VMX specific options and files - if(WITH_ALTIVEC) - if(HAVE_VMX) - add_definitions(-DPPC_FEATURES) - if(HAVE_ALTIVEC) - add_definitions(-DPPC_VMX) - set(PPC_SRCS ${ARCHDIR}/adler32_vmx.c ${ARCHDIR}/slide_hash_vmx.c) - list(APPEND ZLIB_ARCH_SRCS ${PPC_SRCS}) - set_property(SOURCE ${PPC_SRCS} PROPERTY COMPILE_FLAGS "${PPCFLAGS}") - else() - set(WITH_ALTIVEC OFF) + if(WITH_SSSE3) + check_ssse3_intrinsics() + if(HAVE_SSSE3_INTRIN AND WITH_SSE2) + add_definitions(-DX86_SSSE3) + set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/chunkset_ssse3.c) + add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"") + list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS}) + set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}") + else() + set(WITH_SSSE3 OFF) + endif() endif() - endif() - endif() - # Power8 specific options and files - if(WITH_POWER8) - if(HAVE_POWER8_INTRIN) - add_definitions(-DPOWER8_VSX) - set(POWER8_SRCS ${ARCHDIR}/adler32_power8.c ${ARCHDIR}/chunkset_power8.c ${ARCHDIR}/slide_hash_power8.c) - if("${ARCH}" MATCHES "powerpc64(le)?") - add_definitions(-DPOWER8_VSX_CRC32) - list(APPEND POWER8_SRCS ${ARCHDIR}/crc32_power8.c) + if(WITH_SSE42) + check_sse42_intrinsics() + if(HAVE_SSE42_INTRIN AND WITH_SSSE3) + add_definitions(-DX86_SSE42) + set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c) + add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized adler32 hash generation, using \"${SSE42FLAG}\"") + list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS}) + set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}") + else() + set(WITH_SSE42 OFF) + endif() endif() - list(APPEND ZLIB_ARCH_SRCS ${POWER8_SRCS}) - set_property(SOURCE ${POWER8_SRCS} PROPERTY COMPILE_FLAGS "${POWER8FLAG} ${NOLTOFLAG}") - else() - set(WITH_POWER8 OFF) - endif() - endif() - # Power9 specific options and files - if(WITH_POWER9) - if(HAVE_POWER9_INTRIN) - add_definitions(-DPOWER9) - set(POWER9_SRCS ${ARCHDIR}/compare256_power9.c) - list(APPEND ZLIB_ARCH_SRCS ${POWER9_SRCS}) - set_property(SOURCE ${POWER9_SRCS} PROPERTY COMPILE_FLAGS "${POWER9FLAG} ${NOLTOFLAG}") - else() - set(WITH_POWER9 OFF) - endif() - endif() - endif() - if(BASEARCH_RISCV_FOUND) - if(WITH_RVV) - check_rvv_intrinsics() - if(HAVE_RVV_INTRIN) - add_definitions(-DRISCV_FEATURES) - add_definitions(-DRISCV_RVV) - list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h) - list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c) - # FIXME: we will not set compile flags for riscv_features.c when - # the kernels update hwcap or hwprobe for riscv - set(RVV_SRCS ${ARCHDIR}/riscv_features.c ${ARCHDIR}/adler32_rvv.c ${ARCHDIR}/chunkset_rvv.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c) - list(APPEND ZLIB_ARCH_SRCS ${RVV_SRCS}) - set_property(SOURCE ${RVV_SRCS} PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}") - else() - set(WITH_RVV OFF) - endif() - endif() - endif() - if(BASEARCH_X86_FOUND) - add_definitions(-DX86_FEATURES) - list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h) - list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c) - if(MSVC) - list(APPEND ZLIB_ARCH_HDRS fallback_builtins.h) - endif() - if(WITH_AVX2) - check_avx2_intrinsics() - if(HAVE_AVX2_INTRIN) - add_definitions(-DX86_AVX2) - set(AVX2_SRCS ${ARCHDIR}/slide_hash_avx2.c) - list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx2.c) - list(APPEND AVX2_SRCS ${ARCHDIR}/compare256_avx2.c) - list(APPEND AVX2_SRCS ${ARCHDIR}/adler32_avx2.c) - list(APPEND ZLIB_ARCH_SRCS ${AVX2_SRCS}) - set_property(SOURCE ${AVX2_SRCS} PROPERTY COMPILE_FLAGS "${AVX2FLAG} ${NOLTOFLAG}") - else() - set(WITH_AVX2 OFF) - endif() - endif() - if(WITH_AVX512) - check_avx512_intrinsics() - if(HAVE_AVX512_INTRIN) - add_definitions(-DX86_AVX512) - list(APPEND AVX512_SRCS ${ARCHDIR}/adler32_avx512.c) - list(APPEND ZLIB_ARCH_SRCS ${AVX512_SRCS}) - list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/adler32_avx512_p.h) - if(HAVE_MASK_INTRIN) - add_definitions(-DX86_MASK_INTRIN) + if(WITH_PCLMULQDQ) + check_pclmulqdq_intrinsics() + if(HAVE_PCLMULQDQ_INTRIN AND WITH_SSE42) + add_definitions(-DX86_PCLMULQDQ_CRC) + set(PCLMULQDQ_SRCS ${ARCHDIR}/crc32_pclmulqdq.c) + add_feature_info(PCLMUL_CRC 1 "Support CRC hash generation using PCLMULQDQ, using \"${SSE42FLAG} ${PCLMULFLAG}\"") + list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS}) + set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${PCLMULFLAG} ${NOLTOFLAG}") + else() + set(WITH_PCLMULQDQ OFF) + endif() endif() - set_property(SOURCE ${AVX512_SRCS} PROPERTY COMPILE_FLAGS "${AVX512FLAG} ${NOLTOFLAG}") - else() - set(WITH_AVX512 OFF) - endif() - endif() - if(WITH_AVX512VNNI) - check_avx512vnni_intrinsics() - if(HAVE_AVX512VNNI_INTRIN) - add_definitions(-DX86_AVX512VNNI) - list(APPEND AVX512VNNI_SRCS ${ARCHDIR}/adler32_avx512_vnni.c) - list(APPEND ZLIB_ARCH_SRCS ${AVX512VNNI_SRCS}) - set_property(SOURCE ${AVX512VNNI_SRCS} PROPERTY COMPILE_FLAGS "${AVX512VNNIFLAG} ${NOLTOFLAG}") - else() - set(WITH_AVX512VNNI OFF) - endif() - endif() - if(WITH_SSE42) - check_sse42_intrinsics() - if(HAVE_SSE42_INTRIN) - add_definitions(-DX86_SSE42) - set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c) - list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS}) - set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}") - else() - set(WITH_SSE42 OFF) - endif() - endif() - if(WITH_SSE2) - check_sse2_intrinsics() - if(HAVE_SSE2_INTRIN) - add_definitions(-DX86_SSE2) - set(SSE2_SRCS ${ARCHDIR}/chunkset_sse2.c ${ARCHDIR}/compare256_sse2.c ${ARCHDIR}/slide_hash_sse2.c) - list(APPEND ZLIB_ARCH_SRCS ${SSE2_SRCS}) - if(NOT ${ARCH} MATCHES "x86_64") - set_property(SOURCE ${SSE2_SRCS} PROPERTY COMPILE_FLAGS "${SSE2FLAG} ${NOLTOFLAG}") - add_definitions(-DX86_NOCHECK_SSE2) + if(WITH_AVX2) + check_avx2_intrinsics() + if(HAVE_AVX2_INTRIN AND WITH_SSE42) + add_definitions(-DX86_AVX2) + set(AVX2_SRCS ${ARCHDIR}/slide_hash_avx2.c) + add_feature_info(AVX2_SLIDEHASH 1 "Support AVX2 optimized slide_hash, using \"${AVX2FLAG}\"") + list(APPEND AVX2_SRCS ${ARCHDIR}/chunkset_avx2.c) + add_feature_info(AVX2_CHUNKSET 1 "Support AVX2 optimized chunkset, using \"${AVX2FLAG}\"") + list(APPEND AVX2_SRCS ${ARCHDIR}/compare256_avx2.c) + add_feature_info(AVX2_COMPARE256 1 "Support AVX2 optimized compare256, using \"${AVX2FLAG}\"") + list(APPEND AVX2_SRCS ${ARCHDIR}/adler32_avx2.c) + add_feature_info(AVX2_ADLER32 1 "Support AVX2-accelerated adler32, using \"${AVX2FLAG}\"") + list(APPEND ZLIB_ARCH_SRCS ${AVX2_SRCS}) + set_property(SOURCE ${AVX2_SRCS} PROPERTY COMPILE_FLAGS "${AVX2FLAG} ${NOLTOFLAG}") + else() + set(WITH_AVX2 OFF) + endif() endif() - else() - set(WITH_SSE2 OFF) - endif() - endif() - if(WITH_SSSE3) - check_ssse3_intrinsics() - if(HAVE_SSSE3_INTRIN) - add_definitions(-DX86_SSSE3) - set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c ${ARCHDIR}/chunkset_ssse3.c) - list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS}) - set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}") - else() - set(WITH_SSSE3 OFF) - endif() - endif() - if(WITH_PCLMULQDQ AND WITH_SSSE3 AND WITH_SSE42) - check_pclmulqdq_intrinsics() - if(HAVE_PCLMULQDQ_INTRIN AND HAVE_SSSE3_INTRIN) - add_definitions(-DX86_PCLMULQDQ_CRC) - set(PCLMULQDQ_SRCS ${ARCHDIR}/crc32_pclmulqdq.c) - list(APPEND ZLIB_ARCH_SRCS ${PCLMULQDQ_SRCS}) - set_property(SOURCE ${PCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG} ${NOLTOFLAG}") - - if(WITH_VPCLMULQDQ AND WITH_AVX512) - check_vpclmulqdq_intrinsics() - if(HAVE_VPCLMULQDQ_INTRIN AND HAVE_AVX512_INTRIN) - add_definitions(-DX86_VPCLMULQDQ_CRC) - set(VPCLMULQDQ_SRCS ${ARCHDIR}/crc32_vpclmulqdq.c) - list(APPEND ZLIB_ARCH_SRCS ${VPCLMULQDQ_SRCS}) - set_property(SOURCE ${VPCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${SSE42FLAG} ${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG} ${NOLTOFLAG}") - else() - set(WITH_VPCLMULQDQ OFF) - endif() - else() - set(WITH_VPCLMULQDQ OFF) + if(WITH_AVX512) + check_avx512_intrinsics() + if(HAVE_AVX512_INTRIN AND WITH_AVX2) + add_definitions(-DX86_AVX512) + list(APPEND AVX512_SRCS ${ARCHDIR}/adler32_avx512.c) + add_feature_info(AVX512_ADLER32 1 "Support AVX512-accelerated adler32, using \"${AVX512FLAG}\"") + list(APPEND ZLIB_ARCH_SRCS ${AVX512_SRCS}) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/adler32_avx512_p.h) + set_property(SOURCE ${AVX512_SRCS} PROPERTY COMPILE_FLAGS "${AVX512FLAG} ${NOLTOFLAG}") + else() + set(WITH_AVX512 OFF) + endif() + endif() + if(WITH_AVX512VNNI) + check_avx512vnni_intrinsics() + if(HAVE_AVX512VNNI_INTRIN AND WITH_AVX2) + add_definitions(-DX86_AVX512VNNI) + add_feature_info(AVX512VNNI_ADLER32 1 "Support AVX512VNNI adler32, using \"${AVX512VNNIFLAG}\"") + list(APPEND AVX512VNNI_SRCS ${ARCHDIR}/adler32_avx512_vnni.c) + list(APPEND ZLIB_ARCH_SRCS ${AVX512VNNI_SRCS}) + set_property(SOURCE ${AVX512VNNI_SRCS} PROPERTY COMPILE_FLAGS "${AVX512VNNIFLAG} ${NOLTOFLAG}") + else() + set(WITH_AVX512VNNI OFF) + endif() + endif() + if(WITH_VPCLMULQDQ) + check_vpclmulqdq_intrinsics() + if(HAVE_VPCLMULQDQ_INTRIN AND WITH_PCLMULQDQ AND WITH_AVX512) + add_definitions(-DX86_VPCLMULQDQ_CRC) + set(VPCLMULQDQ_SRCS ${ARCHDIR}/crc32_vpclmulqdq.c) + add_feature_info(VPCLMUL_CRC 1 "Support CRC hash generation using VPCLMULQDQ, using \"${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG}\"") + list(APPEND ZLIB_ARCH_SRCS ${VPCLMULQDQ_SRCS}) + set_property(SOURCE ${VPCLMULQDQ_SRCS} PROPERTY COMPILE_FLAGS "${PCLMULFLAG} ${VPCLMULFLAG} ${AVX512FLAG} ${NOLTOFLAG}") + else() + set(WITH_VPCLMULQDQ OFF) + endif() endif() - else() - set(WITH_PCLMULQDQ OFF) - set(WITH_VPCLMULQDQ OFF) - endif() - else() - set(WITH_PCLMULQDQ OFF) - set(WITH_VPCLMULQDQ OFF) endif() - check_xsave_intrinsics() - if(HAVE_XSAVE_INTRIN) - set_property(SOURCE ${ARCHDIR}/x86_features.c PROPERTY COMPILE_FLAGS "${XSAVEFLAG}") - endif() - endif() endif() +message(STATUS "Architecture-specific source files: ${ZLIB_ARCH_SRCS}") + #============================================================================ # zconf.h #============================================================================ macro(generate_cmakein input output) - file(REMOVE ${output}) - file(STRINGS ${input} _lines) - foreach(_line IN LISTS _lines) - string(REGEX REPLACE "#ifdef HAVE_UNISTD_H.*" "@ZCONF_UNISTD_LINE@" _line "${_line}") - string(REGEX REPLACE "#ifdef NEED_PTRDIFF_T.*" "@ZCONF_PTRDIFF_LINE@" _line "${_line}") - if(NEED_PTRDIFF_T) - string(REGEX REPLACE "typedef PTRDIFF_TYPE" "typedef @PTRDIFF_TYPE@" _line "${_line}") - endif() - file(APPEND ${output} "${_line}\n") - endforeach() + file(REMOVE ${output}) + file(STRINGS ${input} _lines) + foreach(_line IN LISTS _lines) + string(REGEX REPLACE "#ifdef HAVE_UNISTD_H.*" "@ZCONF_UNISTD_LINE@" _line "${_line}") + string(REGEX REPLACE "#ifdef NEED_PTRDIFF_T.*" "@ZCONF_PTRDIFF_LINE@" _line "${_line}") + if(NEED_PTRDIFF_T) + string(REGEX REPLACE "typedef PTRDIFF_TYPE" "typedef @PTRDIFF_TYPE@" _line "${_line}") + endif() + file(APPEND ${output} "${_line}\n") + endforeach() endmacro(generate_cmakein) -generate_cmakein( ${CMAKE_CURRENT_SOURCE_DIR}/zconf.h.in ${CMAKE_CURRENT_BINARY_DIR}/zconf.h.cmakein ) +generate_cmakein( ${CMAKE_CURRENT_SOURCE_DIR}/zconf${SUFFIX}.h.in ${CMAKE_CURRENT_BINARY_DIR}/zconf${SUFFIX}.h.cmakein ) + +if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR) + # If we're doing an out of source build and the user has a zconf.h + # in their source tree... + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/zconf${SUFFIX}.h) + message(STATUS "Renaming") + message(STATUS " ${CMAKE_CURRENT_SOURCE_DIR}/zconf${SUFFIX}.h") + message(STATUS "to 'zconf${SUFFIX}.h.included' because this file is included with zlib") + message(STATUS "but CMake generates it automatically in the build directory.") + file(RENAME ${CMAKE_CURRENT_SOURCE_DIR}/zconf${SUFFIX}.h ${CMAKE_CURRENT_SOURCE_DIR}/zconf${SUFFIX}.h.included) + endif() + + # If we're doing an out of source build and the user has a zconf.h.cmakein + # in their source tree... + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/zconf${SUFFIX}.h.cmakein) + message(STATUS "Renaming") + message(STATUS " ${CMAKE_CURRENT_SOURCE_DIR}/zconf${SUFFIX}.h.cmakein") + message(STATUS "to 'zconf${SUFFIX}.h.cmakeincluded' because this file is included with zlib") + message(STATUS "but CMake generates it automatically in the build directory.") + file(RENAME ${CMAKE_CURRENT_SOURCE_DIR}/zconf${SUFFIX}.h.cmakein ${CMAKE_CURRENT_SOURCE_DIR}/zconf${SUFFIX}.h.cmakeincluded) + endif() +endif() + +# The user is allowed (but discouraged) to set absolute CMAKE_INSTALL_*DIR paths. +# If they do, we copy these non-relocatable paths into the pkg-config file. +if(IS_ABSOLUTE "${CMAKE_INSTALL_INCLUDEDIR}") + set(PC_INC_INSTALL_DIR "${CMAKE_INSTALL_INCLUDEDIR}") +else() + set(PC_INC_INSTALL_DIR "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}") +endif() + +if(IS_ABSOLUTE "${CMAKE_INSTALL_LIBDIR}") + set(PC_LIB_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}") +else() + set(PC_LIB_INSTALL_DIR "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}") +endif() #============================================================================ # zlib #============================================================================ set(ZLIB_PUBLIC_HDRS - ${CMAKE_CURRENT_BINARY_DIR}/zconf.h - ${CMAKE_CURRENT_BINARY_DIR}/zlib_name_mangling.h - ${CMAKE_CURRENT_BINARY_DIR}/zlib.h + ${CMAKE_CURRENT_BINARY_DIR}/zconf${SUFFIX}.h + ${CMAKE_CURRENT_BINARY_DIR}/zlib_name_mangling${SUFFIX}.h + ${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.h ) set(ZLIB_PRIVATE_HDRS adler32_p.h chunkset_tpl.h compare256_rle.h - cpu_features.h + arch_functions.h crc32_braid_p.h crc32_braid_comb_p.h crc32_braid_tbl.h - crc32_fold.h deflate.h deflate_p.h functable.h @@ -704,15 +1098,17 @@ set(ZLIB_PRIVATE_HDRS zutil.h ) set(ZLIB_SRCS + arch/generic/adler32_c.c + arch/generic/adler32_fold_c.c + arch/generic/chunkset_c.c + arch/generic/compare256_c.c + arch/generic/crc32_braid_c.c + arch/generic/crc32_fold_c.c + arch/generic/slide_hash_c.c adler32.c - adler32_fold.c - chunkset.c - compare256.c compress.c - cpu_features.c - crc32_braid.c + crc32.c crc32_braid_comb.c - crc32_fold.c deflate.c deflate_fast.c deflate_huff.c @@ -727,12 +1123,16 @@ set(ZLIB_SRCS inftrees.c insert_string.c insert_string_roll.c - slide_hash.c trees.c uncompr.c zutil.c ) +if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND ZLIB_PRIVATE_HDRS cpu_features.h) + list(APPEND ZLIB_SRCS cpu_features.c) +endif() + set(ZLIB_GZFILE_PRIVATE_HDRS gzguts.h ) @@ -743,13 +1143,124 @@ set(ZLIB_GZFILE_SRCS ) set(ZLIB_ALL_SRCS ${ZLIB_SRCS} ${ZLIB_ARCH_HDRS} ${ZLIB_ARCH_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS}) -list(APPEND ZLIB_ALL_SRCS ${ZLIB_GZFILE_PRIVATE_HDRS} ${ZLIB_GZFILE_SRCS}) +if(WITH_GZFILEOP) + list(APPEND ZLIB_ALL_SRCS ${ZLIB_GZFILE_PRIVATE_HDRS} ${ZLIB_GZFILE_SRCS}) +endif() -add_library(zlib STATIC ${ZLIB_ALL_SRCS}) +if(NOT DEFINED ZLIB_BUILD_SHARED_LIBS OR ZLIB_BUILD_SHARED_LIBS) + set(ZLIB_DLL_SRCS win32/zlib${SUFFIX}1.rc) +endif() -target_include_directories(zlib PUBLIC - "$" - "$") +if(NOT DEFINED ZLIB_BUILD_SHARED_LIBS) + add_library(zlib SHARED ${ZLIB_ALL_SRCS} ${ZLIB_DLL_SRCS}) + add_library(zlibstatic STATIC ${ZLIB_ALL_SRCS}) + + set(ZLIB_INSTALL_LIBRARIES zlib zlibstatic) +else() + + if(ZLIB_BUILD_SHARED_LIBS) + add_library(zlib SHARED ${ZLIB_ALL_SRCS} ${ZLIB_DLL_SRCS}) + target_sources(zlib PRIVATE ${ZLIB_DLL_SRCS}) + else() + add_library(zlib STATIC ${ZLIB_ALL_SRCS}) + add_library(zlibstatic ALIAS zlib) + endif() + + set(ZLIB_INSTALL_LIBRARIES zlib) +endif() + +# INFO: Mimics official zlib CMake target +# Generates ZLIB.cmake in case ZLIB_COMPAT=ON and always exports the CMake target ZLIB::ZLIB +# In case ZLIB_COMPAT=OFF, the CMake target and file follows zlib-ng naming convention +if (ZLIB_COMPAT) + if (TARGET zlib) + set_target_properties(zlib PROPERTIES EXPORT_NAME ZLIB) + else() + set_target_properties(zlibstatic PROPERTIES EXPORT_NAME ZLIB) + endif() +endif() + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${ARCHDIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/arch/generic) + +foreach(ZLIB_INSTALL_LIBRARY ${ZLIB_INSTALL_LIBRARIES}) + if(NOT ZLIB_COMPAT) + target_compile_definitions(${ZLIB_INSTALL_LIBRARY} PUBLIC ZLIBNG_NATIVE_API) + endif() + target_include_directories(${ZLIB_INSTALL_LIBRARY} PUBLIC + "$${CMAKE_CURRENT_SOURCE_DIR}>" + "$") +endforeach() + +if(WIN32) + # Shared library + if(NOT DEFINED ZLIB_BUILD_SHARED_LIBS OR ZLIB_BUILD_SHARED_LIBS) + set_target_properties(zlib PROPERTIES OUTPUT_NAME zlib${SUFFIX}) + endif() + # Static library + if(NOT DEFINED ZLIB_BUILD_SHARED_LIBS) + if(MSVC) + set_target_properties(zlibstatic PROPERTIES OUTPUT_NAME zlibstatic${SUFFIX}) + else() + set_target_properties(zlibstatic PROPERTIES OUTPUT_NAME z${SUFFIX}) + endif() + elseif(NOT ZLIB_BUILD_SHARED_LIBS) + if(MSVC) + set_target_properties(zlib PROPERTIES OUTPUT_NAME zlibstatic${SUFFIX}) + else() + set_target_properties(zlib PROPERTIES OUTPUT_NAME z${SUFFIX}) + endif() + endif() +else() + # On unix-like platforms the library is almost always called libz + set_target_properties(${ZLIB_INSTALL_LIBRARIES} PROPERTIES OUTPUT_NAME z${SUFFIX}) +endif() + +if(NOT DEFINED ZLIB_BUILD_SHARED_LIBS OR ZLIB_BUILD_SHARED_LIBS) + set_target_properties(zlib PROPERTIES DEFINE_SYMBOL ZLIB_DLL) + + if(ZLIB_COMPAT) + set_target_properties(zlib PROPERTIES SOVERSION 1) + else() + set_target_properties(zlib PROPERTIES SOVERSION 2) + endif() + + if(NOT CYGWIN) + # This property causes shared libraries on Linux to have the full version + # encoded into their final filename. We disable this on Cygwin because + # it causes cygz-${ZLIB_FULL_VERSION}.dll to be created when cygz.dll + # seems to be the default. + # + # This has no effect with MSVC, on that platform the version info for + # the DLL comes from the resource file win32/zlib1.rc + set_target_properties(zlib PROPERTIES VERSION ${ZLIB_FULL_VERSION}) + endif() + + if(UNIX) + if(HAVE_NO_INTERPOSITION) + set_target_properties(zlib PROPERTIES COMPILE_FLAGS "-fno-semantic-interposition") + endif() + if(NOT APPLE AND NOT CMAKE_SYSTEM_NAME STREQUAL AIX) + if(NOT ZLIB_COMPAT) + add_definitions(-DHAVE_SYMVER) + endif() + set_target_properties(zlib PROPERTIES LINK_FLAGS + "-Wl,--version-script,\"${CMAKE_CURRENT_SOURCE_DIR}/zlib${SUFFIX}.map\"") + endif() + endif() + if(MSYS) + # Suppress version number from shared library name + set(CMAKE_SHARED_LIBRARY_NAME_WITH_VERSION 0) + elseif(WIN32) + # Creates zlib1.dll when building shared library version + if(ZLIB_COMPAT) + set_target_properties(zlib PROPERTIES SUFFIX "1.dll") + else() + set_target_properties(zlib PROPERTIES SUFFIX "2.dll") + endif() + endif() +endif() if(HAVE_UNISTD_H) SET(ZCONF_UNISTD_LINE "#if 1 /* was set to #if 1 by configure/cmake/etc */") @@ -757,40 +1268,143 @@ else() SET(ZCONF_UNISTD_LINE "#if 0 /* was set to #if 0 by configure/cmake/etc */") endif() if(NEED_PTRDIFF_T) - SET(ZCONF_PTRDIFF_LINE "#if 1 /* was set to #if 1 by configure/cmake/etc */") + SET(ZCONF_PTRDIFF_LINE "#if 1 /* was set to #if 1 by configure/cmake/etc */") else() - SET(ZCONF_PTRDIFF_LINE "#ifdef NEED_PTRDIFF_T /* may be set to #if 1 by configure/cmake/etc */") + SET(ZCONF_PTRDIFF_LINE "#ifdef NEED_PTRDIFF_T /* may be set to #if 1 by configure/cmake/etc */") endif() -configure_file(${CMAKE_CURRENT_BINARY_DIR}/zconf.h.cmakein - ${CMAKE_CURRENT_BINARY_DIR}/zconf.h @ONLY) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/zlib.h.in - ${CMAKE_CURRENT_BINARY_DIR}/zlib.h @ONLY) +set(ZLIB_PC ${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.pc) +if(WITH_GZFILEOP) + set(PKG_CONFIG_CFLAGS "-DWITH_GZFILEOP") +endif() +configure_file(${CMAKE_CURRENT_BINARY_DIR}/zconf${SUFFIX}.h.cmakein + ${CMAKE_CURRENT_BINARY_DIR}/zconf${SUFFIX}.h @ONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/zlib${SUFFIX}.h.in + ${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.h @ONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/gzread.c.in - ${CMAKE_CURRENT_BINARY_DIR}/gzread.c @ONLY) + ${CMAKE_CURRENT_BINARY_DIR}/gzread.c @ONLY) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/zlib_name_mangling.h.empty - ${CMAKE_CURRENT_BINARY_DIR}/zlib_name_mangling${SUFFIX}.h COPYONLY) +if (NOT ZLIB_SYMBOL_PREFIX STREQUAL "") + add_feature_info(ZLIB_SYMBOL_PREFIX ON "Publicly exported symbols have a custom prefix") + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/zlib_name_mangling${SUFFIX}.h.in + ${CMAKE_CURRENT_BINARY_DIR}/zlib_name_mangling${SUFFIX}.h @ONLY) +else() + add_feature_info(ZLIB_SYMBOL_PREFIX OFF "Publicly exported symbols DO NOT have a custom prefix") + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/zlib_name_mangling.h.empty + ${CMAKE_CURRENT_BINARY_DIR}/zlib_name_mangling${SUFFIX}.h COPYONLY) +endif() +# add_definitions(-DZLIB_SYMBOL_PREFIX=${ZLIB_SYMBOL_PREFIX}) # not needed -ocv_warnings_disable(CMAKE_C_FLAGS -Wmissing-prototypes - -Wundef - -Wmissing-declarations -) -set_target_properties(${ZLIB_LIBRARY} PROPERTIES - OUTPUT_NAME ${ZLIB_LIBRARY} - DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}" - COMPILE_PDB_NAME ${ZLIB_LIBRARY} - COMPILE_PDB_NAME_DEBUG "${ZLIB_LIBRARY}${OPENCV_DEBUG_POSTFIX}" - ARCHIVE_OUTPUT_DIRECTORY ${3P_LIBRARY_OUTPUT_PATH} -) +if(NOT SKIP_INSTALL_LIBRARIES AND NOT SKIP_INSTALL_ALL) + install(TARGETS ${ZLIB_INSTALL_LIBRARIES} + EXPORT ${EXPORT_NAME} + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() +if(NOT SKIP_INSTALL_HEADERS AND NOT SKIP_INSTALL_ALL) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.h + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" RENAME zlib${SUFFIX}.h) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zlib_name_mangling${SUFFIX}.h + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" RENAME zlib_name_mangling${SUFFIX}.h) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/zconf${SUFFIX}.h + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" RENAME zconf${SUFFIX}.h) +endif() +if(NOT SKIP_INSTALL_FILES AND NOT SKIP_INSTALL_ALL) + install(FILES ${ZLIB_PC} DESTINATION "${PKGCONFIG_INSTALL_DIR}") + install(EXPORT ${EXPORT_NAME} + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${EXPORT_NAME}" + NAMESPACE ${EXPORT_NAME}::) + # Use GNU-style variable names + set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}) + set(LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR}) + if (ZLIB_COMPAT) + set(PACKAGE_CONFIGNAME zlib) + set(PACKAGE_VERSION ${ZLIB_HEADER_VERSION}) + else() + set(PACKAGE_CONFIGNAME zlib-ng) + set(PACKAGE_VERSION ${ZLIBNG_HEADER_VERSION}) + endif() +endif() + +#============================================================================ +# Example binaries +#============================================================================ + +if(ZLIB_ENABLE_TESTS) + enable_testing() + + if(ZLIB_BUILD_SHARED_LIBS) + if(ZLIBNG_ENABLE_TESTS) + message(STATUS "Disabling zlib-ng tests because shared libraries are enabled") + set(ZLIBNG_ENABLE_TESTS OFF) + endif() + + if(WITH_BENCHMARKS OR WITH_BENCHMARK_APPS) + message(STATUS "Disabling benchmarks because shared libraries are enabled") + set(WITH_BENCHMARKS OFF) + set(WITH_BENCHMARK_APPS OFF) + endif() + endif() + + add_subdirectory(test) +endif() + +add_feature_info(WITH_GZFILEOP WITH_GZFILEOP "Compile with support for gzFile related functions") +add_feature_info(ZLIB_COMPAT ZLIB_COMPAT "Compile with zlib compatible API") +add_feature_info(ZLIB_ENABLE_TESTS ZLIB_ENABLE_TESTS "Build test binaries") +add_feature_info(ZLIBNG_ENABLE_TESTS ZLIBNG_ENABLE_TESTS "Test zlib-ng specific API") +add_feature_info(WITH_SANITIZER WITH_SANITIZER "Enable sanitizer support") +add_feature_info(WITH_GTEST WITH_GTEST "Build gtest_zlib") +add_feature_info(WITH_FUZZERS WITH_FUZZERS "Build test/fuzz") +add_feature_info(WITH_BENCHMARKS WITH_BENCHMARKS "Build test/benchmarks") +add_feature_info(WITH_BENCHMARK_APPS WITH_BENCHMARK_APPS "Build application benchmarks") +add_feature_info(WITH_OPTIM WITH_OPTIM "Build with optimisation") +add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies") +add_feature_info(WITH_NATIVE_INSTRUCTIONS WITH_NATIVE_INSTRUCTIONS + "Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)") +add_feature_info(WITH_RUNTIME_CPU_DETECTION WITH_RUNTIME_CPU_DETECTION "Build with runtime CPU detection") +add_feature_info(WITH_MAINTAINER_WARNINGS WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings") +add_feature_info(WITH_CODE_COVERAGE WITH_CODE_COVERAGE "Enable code coverage reporting") +add_feature_info(WITH_INFLATE_STRICT WITH_INFLATE_STRICT "Build with strict inflate distance checking") +add_feature_info(WITH_INFLATE_ALLOW_INVALID_DIST WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances") + +if(BASEARCH_ARM_FOUND) + add_feature_info(WITH_ACLE WITH_ACLE "Build with ACLE") + add_feature_info(WITH_NEON WITH_NEON "Build with NEON intrinsics") + add_feature_info(WITH_ARMV6 WITH_ARMV6 "Build with ARMv6 SIMD") +elseif(BASEARCH_PPC_FOUND) + add_feature_info(WITH_ALTIVEC WITH_ALTIVEC "Build with AltiVec optimisations") + add_feature_info(WITH_POWER8 WITH_POWER8 "Build with optimisations for POWER8") + add_feature_info(WITH_POWER9 WITH_POWER9 "Build with optimisations for POWER9") +elseif(BASEARCH_RISCV_FOUND) + add_feature_info(WITH_RVV WITH_RVV "Build with RVV intrinsics") +elseif(BASEARCH_S360_FOUND) + add_feature_info(WITH_DFLTCC_DEFLATE WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z") + add_feature_info(WITH_DFLTCC_INFLATE WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z") + add_feature_info(WITH_CRC32_VX WITH_CRC32_VX "Build with vectorized CRC32 on IBM Z") +elseif(BASEARCH_X86_FOUND) + add_feature_info(WITH_AVX2 WITH_AVX2 "Build with AVX2") + add_feature_info(WITH_AVX512 WITH_AVX512 "Build with AVX512") + add_feature_info(WITH_AVX512VNNI WITH_AVX512VNNI "Build with AVX512 VNNI") + add_feature_info(WITH_SSE2 WITH_SSE2 "Build with SSE2") + add_feature_info(WITH_SSSE3 WITH_SSSE3 "Build with SSSE3") + add_feature_info(WITH_SSE42 WITH_SSE42 "Build with SSE42") + add_feature_info(WITH_PCLMULQDQ WITH_PCLMULQDQ "Build with PCLMULQDQ") + add_feature_info(WITH_VPCLMULQDQ WITH_VPCLMULQDQ "Build with VPCLMULQDQ") +endif() + +add_feature_info(INSTALL_UTILS INSTALL_UTILS "Copy minigzip and minideflate during install") + +FEATURE_SUMMARY(WHAT ALL INCLUDE_QUIET_PACKAGES) if(ENABLE_SOLUTION_FOLDERS) - set_target_properties(${ZLIB_LIBRARY} PROPERTIES FOLDER "3rdparty") + set_target_properties(${ZLIB_INSTALL_LIBRARIES} PROPERTIES FOLDER "3rdparty") endif() if(NOT BUILD_SHARED_LIBS) - ocv_install_target(${ZLIB_LIBRARY} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev) + ocv_install_target(${ZLIB_INSTALL_LIBRARIES} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev) endif() -ocv_install_3rdparty_licenses(${ZLIB_LIBRARY} LICENSE.md) +ocv_install_3rdparty_licenses(${ZLIB_INSTALL_LIBRARIES} LICENSE.md) diff --git a/3rdparty/zlib-ng/LICENSE.md b/3rdparty/zlib-ng/LICENSE.md index adb48d4729..e866d7ac18 100644 --- a/3rdparty/zlib-ng/LICENSE.md +++ b/3rdparty/zlib-ng/LICENSE.md @@ -1,4 +1,4 @@ -(C) 1995-2013 Jean-loup Gailly and Mark Adler +(C) 1995-2024 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages diff --git a/3rdparty/zlib-ng/README.md b/3rdparty/zlib-ng/README.md index 4f9fe09c69..411621b52f 100644 --- a/3rdparty/zlib-ng/README.md +++ b/3rdparty/zlib-ng/README.md @@ -21,7 +21,6 @@ Features * Support for CPU intrinsics when available * Adler32 implementation using SSSE3, AVX2, AVX512, AVX512-VNNI, Neon, VMX & VSX * CRC32-B implementation using PCLMULQDQ, VPCLMULQDQ, ACLE, & IBM Z - * Hash table implementation using CRC32-C intrinsics on x86 and ARM * Slide hash implementations using SSE2, AVX2, ARMv6, Neon, VMX & VSX * Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV * Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX @@ -95,20 +94,21 @@ make test Build Options ------------- -| CMake | configure | Description | Default | -|:-------------------------|:-------------------------|:--------------------------------------------------------------------------------------|---------| -| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF | -| ZLIB_ENABLE_TESTS | | Build test binaries | ON | -| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON | -| WITH_OPTIM | --without-optimizations | Build with optimisations | ON | -| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON | -| WITH_NATIVE_INSTRUCTIONS | | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF | -| WITH_SANITIZER | | Build with sanitizer (memory, address, undefined) | OFF | -| WITH_GTEST | | Build gtest_zlib | ON | -| WITH_FUZZERS | | Build test/fuzz | OFF | -| WITH_BENCHMARKS | | Build test/benchmarks | OFF | -| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF | -| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF | +| CMake | configure | Description | Default | +|:---------------------------|:-------------------------|:------------------------------------------------------------------------------------|---------| +| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF | +| ZLIB_ENABLE_TESTS | | Build test binaries | ON | +| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON | +| WITH_OPTIM | --without-optimizations | Build with optimisations | ON | +| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON | +| WITH_NATIVE_INSTRUCTIONS | | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF | +| WITH_RUNTIME_CPU_DETECTION | | Compiles with runtime CPU detection | ON | +| WITH_SANITIZER | | Build with sanitizer (memory, address, undefined) | OFF | +| WITH_GTEST | | Build gtest_zlib | ON | +| WITH_FUZZERS | | Build test/fuzz | OFF | +| WITH_BENCHMARKS | | Build test/benchmarks | OFF | +| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF | +| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF | Install diff --git a/3rdparty/zlib-ng/adler32.c b/3rdparty/zlib-ng/adler32.c index 95ac13c304..1a643ed53b 100644 --- a/3rdparty/zlib-ng/adler32.c +++ b/3rdparty/zlib-ng/adler32.c @@ -7,70 +7,24 @@ #include "functable.h" #include "adler32_p.h" -/* ========================================================================= */ -Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) { - uint32_t sum2; - unsigned n; - - /* split Adler-32 into component sums */ - sum2 = (adler >> 16) & 0xffff; - adler &= 0xffff; - - /* in case user likes doing a byte at a time, keep it fast */ - if (UNLIKELY(len == 1)) - return adler32_len_1(adler, buf, sum2); - - /* initial Adler-32 value (deferred check for len == 1 speed) */ - if (UNLIKELY(buf == NULL)) - return 1L; - - /* in case short lengths are provided, keep it somewhat fast */ - if (UNLIKELY(len < 16)) - return adler32_len_16(adler, buf, len, sum2); - - /* do length NMAX blocks -- requires just one modulo operation */ - while (len >= NMAX) { - len -= NMAX; -#ifdef UNROLL_MORE - n = NMAX / 16; /* NMAX is divisible by 16 */ -#else - n = NMAX / 8; /* NMAX is divisible by 8 */ -#endif - do { -#ifdef UNROLL_MORE - DO16(adler, sum2, buf); /* 16 sums unrolled */ - buf += 16; -#else - DO8(adler, sum2, buf, 0); /* 8 sums unrolled */ - buf += 8; -#endif - } while (--n); - adler %= BASE; - sum2 %= BASE; - } - - /* do remaining bytes (less than NMAX, still just one modulo) */ - return adler32_len_64(adler, buf, len, sum2); -} - #ifdef ZLIB_COMPAT unsigned long Z_EXPORT PREFIX(adler32_z)(unsigned long adler, const unsigned char *buf, size_t len) { - return (unsigned long)functable.adler32((uint32_t)adler, buf, len); + return (unsigned long)FUNCTABLE_CALL(adler32)((uint32_t)adler, buf, len); } #else uint32_t Z_EXPORT PREFIX(adler32_z)(uint32_t adler, const unsigned char *buf, size_t len) { - return functable.adler32(adler, buf, len); + return FUNCTABLE_CALL(adler32)(adler, buf, len); } #endif /* ========================================================================= */ #ifdef ZLIB_COMPAT unsigned long Z_EXPORT PREFIX(adler32)(unsigned long adler, const unsigned char *buf, unsigned int len) { - return (unsigned long)functable.adler32((uint32_t)adler, buf, len); + return (unsigned long)FUNCTABLE_CALL(adler32)((uint32_t)adler, buf, len); } #else uint32_t Z_EXPORT PREFIX(adler32)(uint32_t adler, const unsigned char *buf, uint32_t len) { - return functable.adler32(adler, buf, len); + return FUNCTABLE_CALL(adler32)(adler, buf, len); } #endif diff --git a/3rdparty/zlib-ng/adler32_fold.h b/3rdparty/zlib-ng/adler32_fold.h deleted file mode 100644 index 20aa1c7400..0000000000 --- a/3rdparty/zlib-ng/adler32_fold.h +++ /dev/null @@ -1,11 +0,0 @@ -/* adler32_fold.h -- adler32 folding interface - * Copyright (C) 2022 Adam Stylinski - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#ifndef ADLER32_FOLD_H_ -#define ADLER32_FOLD_H_ - -Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); - -#endif diff --git a/3rdparty/zlib-ng/arch/.gitignore b/3rdparty/zlib-ng/arch/.gitignore deleted file mode 100644 index 2c3af0a08c..0000000000 --- a/3rdparty/zlib-ng/arch/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# ignore Makefiles; they're all automatically generated -Makefile diff --git a/3rdparty/zlib-ng/arch/arm/Makefile.in b/3rdparty/zlib-ng/arch/arm/Makefile.in index 9d05b00b54..b6f0aaf211 100644 --- a/3rdparty/zlib-ng/arch/arm/Makefile.in +++ b/3rdparty/zlib-ng/arch/arm/Makefile.in @@ -25,7 +25,6 @@ all: \ crc32_acle.o crc32_acle.lo \ slide_hash_neon.o slide_hash_neon.lo \ slide_hash_armv6.o slide_hash_armv6.lo \ - insert_string_acle.o insert_string_acle.lo adler32_neon.o: $(CC) $(CFLAGS) $(NEONFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c @@ -69,12 +68,6 @@ slide_hash_armv6.o: slide_hash_armv6.lo: $(CC) $(SFLAGS) $(ARMV6FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_armv6.c -insert_string_acle.o: - $(CC) $(CFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c - -insert_string_acle.lo: - $(CC) $(SFLAGS) $(ACLEFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c - mostlyclean: clean clean: rm -f *.o *.lo *~ diff --git a/3rdparty/zlib-ng/arch/arm/adler32_neon.c b/3rdparty/zlib-ng/arch/arm/adler32_neon.c index f1c43ff047..8e46b38017 100644 --- a/3rdparty/zlib-ng/arch/arm/adler32_neon.c +++ b/3rdparty/zlib-ng/arch/arm/adler32_neon.c @@ -7,8 +7,8 @@ */ #ifdef ARM_NEON #include "neon_intrins.h" -#include "../../zbuild.h" -#include "../../adler32_p.h" +#include "zbuild.h" +#include "adler32_p.h" static void NEON_accum32(uint32_t *s, const uint8_t *buf, size_t len) { static const uint16_t ALIGNED_(16) taps[64] = { diff --git a/3rdparty/zlib-ng/arch/arm/arm_features.c b/3rdparty/zlib-ng/arch/arm/arm_features.c index a0e070ba95..d0d49764f4 100644 --- a/3rdparty/zlib-ng/arch/arm/arm_features.c +++ b/3rdparty/zlib-ng/arch/arm/arm_features.c @@ -1,4 +1,4 @@ -#include "../../zbuild.h" +#include "zbuild.h" #include "arm_features.h" #if defined(__linux__) && defined(HAVE_SYS_AUXV_H) @@ -11,6 +11,11 @@ # ifndef ID_AA64ISAR0_CRC32_VAL # define ID_AA64ISAR0_CRC32_VAL ID_AA64ISAR0_CRC32 # endif +#elif defined(__OpenBSD__) && defined(__aarch64__) +# include +# include +# include +# include #elif defined(__APPLE__) # if !defined(_DARWIN_C_SOURCE) # define _DARWIN_C_SOURCE /* enable types aliases (eg u_int) */ @@ -30,6 +35,16 @@ static int arm_has_crc32() { #elif defined(__FreeBSD__) && defined(__aarch64__) return getenv("QEMU_EMULATING") == NULL && ID_AA64ISAR0_CRC32_VAL(READ_SPECIALREG(id_aa64isar0_el1)) >= ID_AA64ISAR0_CRC32_BASE; +#elif defined(__OpenBSD__) && defined(__aarch64__) + int hascrc32 = 0; + int isar0_mib[] = { CTL_MACHDEP, CPU_ID_AA64ISAR0 }; + uint64_t isar0 = 0; + size_t len = sizeof(isar0); + if (sysctl(isar0_mib, 2, &isar0, &len, NULL, 0) != -1) { + if (ID_AA64ISAR0_CRC32(isar0) >= ID_AA64ISAR0_CRC32_BASE) + hascrc32 = 1; + } + return hascrc32; #elif defined(__APPLE__) int hascrc32; size_t size = sizeof(hascrc32); diff --git a/3rdparty/zlib-ng/arch/arm/arm_features.h b/3rdparty/zlib-ng/arch/arm/arm_features.h index eca078e310..d968e02fbb 100644 --- a/3rdparty/zlib-ng/arch/arm/arm_features.h +++ b/3rdparty/zlib-ng/arch/arm/arm_features.h @@ -2,8 +2,8 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#ifndef ARM_H_ -#define ARM_H_ +#ifndef ARM_FEATURES_H_ +#define ARM_FEATURES_H_ struct arm_cpu_features { int has_simd; @@ -13,4 +13,4 @@ struct arm_cpu_features { void Z_INTERNAL arm_check_features(struct arm_cpu_features *features); -#endif /* ARM_H_ */ +#endif /* ARM_FEATURES_H_ */ diff --git a/3rdparty/zlib-ng/arch/arm/arm_functions.h b/3rdparty/zlib-ng/arch/arm/arm_functions.h new file mode 100644 index 0000000000..61c682710a --- /dev/null +++ b/3rdparty/zlib-ng/arch/arm/arm_functions.h @@ -0,0 +1,65 @@ +/* arm_functions.h -- ARM implementations for arch-specific functions. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef ARM_FUNCTIONS_H_ +#define ARM_FUNCTIONS_H_ + +#ifdef ARM_NEON +uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t chunksize_neon(void); +uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left); + +# ifdef HAVE_BUILTIN_CTZLL +uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1); +uint32_t longest_match_neon(deflate_state *const s, Pos cur_match); +uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match); +# endif +void slide_hash_neon(deflate_state *s); +void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start); +#endif + +#ifdef ARM_ACLE +uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len); +#endif + +#ifdef ARM_SIMD +void slide_hash_armv6(deflate_state *s); +#endif + + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +// ARM - SIMD +# if (defined(ARM_SIMD) && defined(__ARM_FEATURE_SIMD32)) || defined(ARM_NOCHECK_SIMD) +# undef native_slide_hash +# define native_slide_hash slide_hash_armv6 +# endif +// ARM - NEON +# if (defined(ARM_NEON) && (defined(__ARM_NEON__) || defined(__ARM_NEON))) || ARM_NOCHECK_NEON +# undef native_adler32 +# define native_adler32 adler32_neon +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_neon +# undef native_chunksize +# define native_chunksize chunksize_neon +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_neon +# undef native_slide_hash +# define native_slide_hash slide_hash_neon +# ifdef HAVE_BUILTIN_CTZLL +# undef native_compare256 +# define native_compare256 compare256_neon +# undef native_longest_match +# define native_longest_match longest_match_neon +# undef native_longest_match_slow +# define native_longest_match_slow longest_match_slow_neon +# endif +# endif +// ARM - ACLE +# if defined(ARM_ACLE) && defined(__ARM_ACLE) && defined(__ARM_FEATURE_CRC32) +# undef native_crc32 +# define native_crc32 crc32_acle +# endif +#endif + +#endif /* ARM_FUNCTIONS_H_ */ diff --git a/3rdparty/zlib-ng/arch/arm/chunkset_neon.c b/3rdparty/zlib-ng/arch/arm/chunkset_neon.c index f9a444b068..1c49ef5612 100644 --- a/3rdparty/zlib-ng/arch/arm/chunkset_neon.c +++ b/3rdparty/zlib-ng/arch/arm/chunkset_neon.c @@ -4,8 +4,8 @@ #ifdef ARM_NEON #include "neon_intrins.h" -#include "../../zbuild.h" -#include "../generic/chunk_permute_table.h" +#include "zbuild.h" +#include "arch/generic/chunk_permute_table.h" typedef uint8x16_t chunk_t; diff --git a/3rdparty/zlib-ng/arch/arm/compare256_neon.c b/3rdparty/zlib-ng/arch/arm/compare256_neon.c index 7daeba411e..87d14c89c0 100644 --- a/3rdparty/zlib-ng/arch/arm/compare256_neon.c +++ b/3rdparty/zlib-ng/arch/arm/compare256_neon.c @@ -3,8 +3,9 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" - +#include "zbuild.h" +#include "zutil_p.h" +#include "deflate.h" #include "fallback_builtins.h" #if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) diff --git a/3rdparty/zlib-ng/arch/arm/crc32_acle.c b/3rdparty/zlib-ng/arch/arm/crc32_acle.c index ac7d6ff66b..116bcab1c2 100644 --- a/3rdparty/zlib-ng/arch/arm/crc32_acle.c +++ b/3rdparty/zlib-ng/arch/arm/crc32_acle.c @@ -7,7 +7,7 @@ #ifdef ARM_ACLE #include "acle_intrins.h" -#include "../../zbuild.h" +#include "zbuild.h" Z_INTERNAL Z_TARGET_CRC uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len) { Z_REGISTER uint32_t c; diff --git a/3rdparty/zlib-ng/arch/arm/insert_string_acle.c b/3rdparty/zlib-ng/arch/arm/insert_string_acle.c deleted file mode 100644 index aa8385c712..0000000000 --- a/3rdparty/zlib-ng/arch/arm/insert_string_acle.c +++ /dev/null @@ -1,24 +0,0 @@ -/* insert_string_acle.c -- insert_string integer hash variant using ACLE's CRC instructions - * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - * - */ - -#ifdef ARM_ACLE -#include "acle_intrins.h" -#include "../../zbuild.h" -#include "../../deflate.h" - -#define HASH_CALC(s, h, val) \ - h = __crc32w(0, val) - -#define HASH_CALC_VAR h -#define HASH_CALC_VAR_INIT uint32_t h = 0 - -#define UPDATE_HASH Z_TARGET_CRC update_hash_acle -#define INSERT_STRING Z_TARGET_CRC insert_string_acle -#define QUICK_INSERT_STRING Z_TARGET_CRC quick_insert_string_acle - -#include "../../insert_string_tpl.h" -#endif diff --git a/3rdparty/zlib-ng/arch/arm/neon_intrins.h b/3rdparty/zlib-ng/arch/arm/neon_intrins.h index 51df77dbe6..a9e99ec88a 100644 --- a/3rdparty/zlib-ng/arch/arm/neon_intrins.h +++ b/3rdparty/zlib-ng/arch/arm/neon_intrins.h @@ -25,6 +25,13 @@ out.val[3] = vqsubq_u16(a.val[3], b); \ } while (0) +# if defined(__clang__) && defined(__arm__) && defined(__ANDROID__) +/* Clang for 32-bit Android has too strict alignment requirement (:256) for x4 NEON intrinsics */ +# undef ARM_NEON_HASLD4 +# undef vld1q_u16_x4 +# undef vld1q_u8_x4 +# undef vst1q_u16_x4 +# endif # ifndef ARM_NEON_HASLD4 diff --git a/3rdparty/zlib-ng/arch/arm/slide_hash_armv6.c b/3rdparty/zlib-ng/arch/arm/slide_hash_armv6.c index 0a2eeccf92..07f71b59eb 100644 --- a/3rdparty/zlib-ng/arch/arm/slide_hash_armv6.c +++ b/3rdparty/zlib-ng/arch/arm/slide_hash_armv6.c @@ -5,8 +5,8 @@ #if defined(ARM_SIMD) #include "acle_intrins.h" -#include "../../zbuild.h" -#include "../../deflate.h" +#include "zbuild.h" +#include "deflate.h" /* SIMD version of hash_chain rebase */ static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { diff --git a/3rdparty/zlib-ng/arch/arm/slide_hash_neon.c b/3rdparty/zlib-ng/arch/arm/slide_hash_neon.c index a96ca11799..a601e6099a 100644 --- a/3rdparty/zlib-ng/arch/arm/slide_hash_neon.c +++ b/3rdparty/zlib-ng/arch/arm/slide_hash_neon.c @@ -10,8 +10,8 @@ #ifdef ARM_NEON #include "neon_intrins.h" -#include "../../zbuild.h" -#include "../../deflate.h" +#include "zbuild.h" +#include "deflate.h" /* SIMD version of hash_chain rebase */ static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { diff --git a/3rdparty/zlib-ng/arch/generic/Makefile.in b/3rdparty/zlib-ng/arch/generic/Makefile.in index c717026f86..32c8242d02 100644 --- a/3rdparty/zlib-ng/arch/generic/Makefile.in +++ b/3rdparty/zlib-ng/arch/generic/Makefile.in @@ -1,5 +1,6 @@ -# Makefile for zlib +# Makefile for zlib-ng # Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler +# Copyright (C) 2024 Hans Kristian Rosbach # For conditions of distribution and use, see copyright notice in zlib.h CC= @@ -11,12 +12,62 @@ SRCDIR=. SRCTOP=../.. TOPDIR=$(SRCTOP) -all: +all: \ + adler32_c.o adler32_c.lo \ + adler32_fold_c.o adler32_fold_c.lo \ + chunkset_c.o chunkset_c.lo \ + compare256_c.o compare256_c.lo \ + crc32_braid_c.o crc32_braid_c.lo \ + crc32_fold_c.o crc32_fold_c.lo \ + slide_hash_c.o slide_hash_c.lo + + +adler32_c.o: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c + +adler32_c.lo: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c + +adler32_fold_c.o: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c + +adler32_fold_c.lo: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c + +chunkset_c.o: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c + +chunkset_c.lo: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c + +compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c + +compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zutil_p.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c + +crc32_braid_c.o: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_braid_c.c + +crc32_braid_c.lo: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_braid_c.c + +crc32_fold_c.o: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c + +crc32_fold_c.lo: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c + +slide_hash_c.o: $(SRCDIR)/slide_hash_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/deflate.h + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_c.c + +slide_hash_c.lo: $(SRCDIR)/slide_hash_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/deflate.h + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_c.c mostlyclean: clean clean: - rm -f *.o *.lo *~ \ + rm -f *.o *.lo *~ rm -rf objs rm -f *.gcda *.gcno *.gcov diff --git a/3rdparty/zlib-ng/arch/generic/adler32_c.c b/3rdparty/zlib-ng/arch/generic/adler32_c.c new file mode 100644 index 0000000000..64258c89b4 --- /dev/null +++ b/3rdparty/zlib-ng/arch/generic/adler32_c.c @@ -0,0 +1,54 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2011, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zbuild.h" +#include "functable.h" +#include "adler32_p.h" + +/* ========================================================================= */ +Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) { + uint32_t sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (UNLIKELY(len == 1)) + return adler32_len_1(adler, buf, sum2); + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (UNLIKELY(buf == NULL)) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (UNLIKELY(len < 16)) + return adler32_len_16(adler, buf, len, sum2); + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; +#ifdef UNROLL_MORE + n = NMAX / 16; /* NMAX is divisible by 16 */ +#else + n = NMAX / 8; /* NMAX is divisible by 8 */ +#endif + do { +#ifdef UNROLL_MORE + DO16(adler, sum2, buf); /* 16 sums unrolled */ + buf += 16; +#else + DO8(adler, sum2, buf, 0); /* 8 sums unrolled */ + buf += 8; +#endif + } while (--n); + adler %= BASE; + sum2 %= BASE; + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + return adler32_len_64(adler, buf, len, sum2); +} diff --git a/3rdparty/zlib-ng/adler32_fold.c b/3rdparty/zlib-ng/arch/generic/adler32_fold_c.c similarity index 83% rename from 3rdparty/zlib-ng/adler32_fold.c rename to 3rdparty/zlib-ng/arch/generic/adler32_fold_c.c index e2f6f9ac7d..397dd10400 100644 --- a/3rdparty/zlib-ng/adler32_fold.c +++ b/3rdparty/zlib-ng/arch/generic/adler32_fold_c.c @@ -5,12 +5,11 @@ #include "zbuild.h" #include "functable.h" -#include "adler32_fold.h" #include Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) { - adler = functable.adler32(adler, src, len); + adler = FUNCTABLE_CALL(adler32)(adler, src, len); memcpy(dst, src, len); return adler; } diff --git a/3rdparty/zlib-ng/chunkset.c b/3rdparty/zlib-ng/arch/generic/chunkset_c.c similarity index 100% rename from 3rdparty/zlib-ng/chunkset.c rename to 3rdparty/zlib-ng/arch/generic/chunkset_c.c diff --git a/3rdparty/zlib-ng/compare256.c b/3rdparty/zlib-ng/arch/generic/compare256_c.c similarity index 99% rename from 3rdparty/zlib-ng/compare256.c rename to 3rdparty/zlib-ng/arch/generic/compare256_c.c index 82551cdd57..0c12cb3a4e 100644 --- a/3rdparty/zlib-ng/compare256.c +++ b/3rdparty/zlib-ng/arch/generic/compare256_c.c @@ -5,6 +5,7 @@ #include "zbuild.h" #include "zutil_p.h" +#include "deflate.h" #include "fallback_builtins.h" /* ALIGNED, byte comparison */ diff --git a/3rdparty/zlib-ng/crc32_braid.c b/3rdparty/zlib-ng/arch/generic/crc32_braid_c.c similarity index 79% rename from 3rdparty/zlib-ng/crc32_braid.c rename to 3rdparty/zlib-ng/arch/generic/crc32_braid_c.c index 96754b53df..7d8028f6d7 100644 --- a/3rdparty/zlib-ng/crc32_braid.c +++ b/3rdparty/zlib-ng/arch/generic/crc32_braid_c.c @@ -8,43 +8,9 @@ */ #include "zbuild.h" -#include "zutil.h" -#include "functable.h" #include "crc32_braid_p.h" #include "crc32_braid_tbl.h" -/* ========================================================================= */ - -const uint32_t * Z_EXPORT PREFIX(get_crc_table)(void) { - return (const uint32_t *)crc_table; -} - -#ifdef ZLIB_COMPAT -unsigned long Z_EXPORT PREFIX(crc32_z)(unsigned long crc, const unsigned char *buf, size_t len) { - if (buf == NULL) return 0; - - return (unsigned long)functable.crc32((uint32_t)crc, buf, len); -} -#else -uint32_t Z_EXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) { - if (buf == NULL) return 0; - - return functable.crc32(crc, buf, len); -} -#endif - -#ifdef ZLIB_COMPAT -unsigned long Z_EXPORT PREFIX(crc32)(unsigned long crc, const unsigned char *buf, unsigned int len) { - return (unsigned long)PREFIX(crc32_z)((uint32_t)crc, buf, len); -} -#else -uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) { - return PREFIX(crc32_z)(crc, buf, len); -} -#endif - -/* ========================================================================= */ - /* A CRC of a message is computed on N braids of words in the message, where each word consists of W bytes (4 or 8). If N is 3, for example, then three @@ -66,24 +32,6 @@ uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t level. Your mileage may vary. */ -/* ========================================================================= */ - -#if BYTE_ORDER == LITTLE_ENDIAN -# define ZSWAPWORD(word) (word) -# define BRAID_TABLE crc_braid_table -#elif BYTE_ORDER == BIG_ENDIAN -# if W == 8 -# define ZSWAPWORD(word) ZSWAP64(word) -# elif W == 4 -# define ZSWAPWORD(word) ZSWAP32(word) -# endif -# define BRAID_TABLE crc_braid_big_table -#else -# error "No endian defined" -#endif -#define DO1 c = crc_table[(c ^ *buf++) & 0xff] ^ (c >> 8) -#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 - /* ========================================================================= */ #ifdef W /* @@ -112,7 +60,7 @@ static z_word_t crc_word(z_word_t data) { /* ========================================================================= */ Z_INTERNAL uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len) { - Z_REGISTER uint32_t c; + uint32_t c; /* Pre-condition the CRC */ c = (~crc) & 0xffffffff; diff --git a/3rdparty/zlib-ng/crc32_fold.c b/3rdparty/zlib-ng/arch/generic/crc32_fold_c.c similarity index 86% rename from 3rdparty/zlib-ng/crc32_fold.c rename to 3rdparty/zlib-ng/arch/generic/crc32_fold_c.c index 5b3c7c459f..43930e97c6 100644 --- a/3rdparty/zlib-ng/crc32_fold.c +++ b/3rdparty/zlib-ng/arch/generic/crc32_fold_c.c @@ -3,11 +3,9 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ #include "zbuild.h" +#include "zutil.h" #include "functable.h" - -#include "crc32_fold.h" - -#include +#include "crc32.h" Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc) { crc->value = CRC32_INITIAL_VALUE; @@ -15,7 +13,7 @@ Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc) { } Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len) { - crc->value = functable.crc32(crc->value, src, len); + crc->value = FUNCTABLE_CALL(crc32)(crc->value, src, len); memcpy(dst, src, len); } @@ -25,7 +23,7 @@ Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, ui * same arguments for the versions that _do_ do a folding CRC but we don't want a copy. The * init_crc is an unused argument in this context */ Z_UNUSED(init_crc); - crc->value = functable.crc32(crc->value, src, len); + crc->value = FUNCTABLE_CALL(crc32)(crc->value, src, len); } Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc) { diff --git a/3rdparty/zlib-ng/arch/generic/generic_functions.h b/3rdparty/zlib-ng/arch/generic/generic_functions.h new file mode 100644 index 0000000000..997dd4d01e --- /dev/null +++ b/3rdparty/zlib-ng/arch/generic/generic_functions.h @@ -0,0 +1,106 @@ +/* generic_functions.h -- generic C implementations for arch-specific functions. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef GENERIC_FUNCTIONS_H_ +#define GENERIC_FUNCTIONS_H_ + +#include "zendian.h" + +Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc); +Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); +Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); +Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc); + +Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); + + +typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len); +typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1); +typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len); + +uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len); + +uint32_t chunksize_c(void); +uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left); +void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start); + +uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len); + +uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1); +#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN +uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1); +# ifdef HAVE_BUILTIN_CTZ + uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1); +# endif +# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) + uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1); +# endif +#endif + +typedef void (*slide_hash_func)(deflate_state *s); + +void slide_hash_c(deflate_state *s); + +uint32_t longest_match_c(deflate_state *const s, Pos cur_match); +# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN + uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match); +# ifdef HAVE_BUILTIN_CTZ + uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match); +# endif +# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) + uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match); +# endif +# endif + +uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match); +# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN + uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match); + uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match); +# ifdef UNALIGNED64_OK + uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match); +# endif +# endif + + +// Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions. +#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN +# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) +# define longest_match_generic longest_match_unaligned_64 +# define longest_match_slow_generic longest_match_slow_unaligned_64 +# define compare256_generic compare256_unaligned_64 +# elif defined(HAVE_BUILTIN_CTZ) +# define longest_match_generic longest_match_unaligned_32 +# define longest_match_slow_generic longest_match_slow_unaligned_32 +# define compare256_generic compare256_unaligned_32 +# else +# define longest_match_generic longest_match_unaligned_16 +# define longest_match_slow_generic longest_match_slow_unaligned_16 +# define compare256_generic compare256_unaligned_16 +# endif +#else +# define longest_match_generic longest_match_c +# define longest_match_slow_generic longest_match_slow_c +# define compare256_generic compare256_c +#endif + + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +// Generic code +# define native_adler32 adler32_c +# define native_adler32_fold_copy adler32_fold_copy_c +# define native_chunkmemset_safe chunkmemset_safe_c +# define native_chunksize chunksize_c +# define native_crc32 PREFIX(crc32_braid) +# define native_crc32_fold crc32_fold_c +# define native_crc32_fold_copy crc32_fold_copy_c +# define native_crc32_fold_final crc32_fold_final_c +# define native_crc32_fold_reset crc32_fold_reset_c +# define native_inflate_fast inflate_fast_c +# define native_slide_hash slide_hash_c +# define native_longest_match longest_match_generic +# define native_longest_match_slow longest_match_slow_generic +# define native_compare256 compare256_generic +#endif + +#endif diff --git a/3rdparty/zlib-ng/slide_hash.c b/3rdparty/zlib-ng/arch/generic/slide_hash_c.c similarity index 96% rename from 3rdparty/zlib-ng/slide_hash.c rename to 3rdparty/zlib-ng/arch/generic/slide_hash_c.c index b9fbbdb69f..8345b9e36b 100644 --- a/3rdparty/zlib-ng/slide_hash.c +++ b/3rdparty/zlib-ng/arch/generic/slide_hash_c.c @@ -1,6 +1,6 @@ /* slide_hash.c -- slide hash table C implementation * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ diff --git a/3rdparty/zlib-ng/arch/power/chunkset_power8.c b/3rdparty/zlib-ng/arch/power/chunkset_power8.c index 7cbb8029b3..aef1973273 100644 --- a/3rdparty/zlib-ng/arch/power/chunkset_power8.c +++ b/3rdparty/zlib-ng/arch/power/chunkset_power8.c @@ -4,7 +4,7 @@ #ifdef POWER8_VSX #include -#include "../../zbuild.h" +#include "zbuild.h" typedef vector unsigned char chunk_t; diff --git a/3rdparty/zlib-ng/arch/power/compare256_power9.c b/3rdparty/zlib-ng/arch/power/compare256_power9.c index 9b0ddaf800..c8be498e4f 100644 --- a/3rdparty/zlib-ng/arch/power/compare256_power9.c +++ b/3rdparty/zlib-ng/arch/power/compare256_power9.c @@ -5,8 +5,10 @@ #ifdef POWER9 #include -#include "../../zbuild.h" -#include "../../zendian.h" +#include "zbuild.h" +#include "zutil_p.h" +#include "deflate.h" +#include "zendian.h" /* Older versions of GCC misimplemented semantics for these bit counting builtins. * https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3f30f2d1dbb3228b8468b26239fe60c2974ce2ac */ diff --git a/3rdparty/zlib-ng/arch/power/power_features.c b/3rdparty/zlib-ng/arch/power/power_features.c index f73503734b..4939d1c18f 100644 --- a/3rdparty/zlib-ng/arch/power/power_features.c +++ b/3rdparty/zlib-ng/arch/power/power_features.c @@ -1,16 +1,19 @@ /* power_features.c - POWER feature check * Copyright (C) 2020 Matheus Castanho , IBM - * Copyright (C) 2021-2022 Mika T. Lindqvist + * Copyright (C) 2021-2024 Mika T. Lindqvist * For conditions of distribution and use, see copyright notice in zlib.h */ #ifdef HAVE_SYS_AUXV_H # include #endif +#ifdef POWER_NEED_AUXVEC_H +# include +#endif #ifdef __FreeBSD__ # include #endif -#include "../../zbuild.h" +#include "zbuild.h" #include "power_features.h" void Z_INTERNAL power_check_features(struct power_cpu_features *features) { diff --git a/3rdparty/zlib-ng/arch/power/power_features.h b/3rdparty/zlib-ng/arch/power/power_features.h index 9252364cc4..1ff51de5dd 100644 --- a/3rdparty/zlib-ng/arch/power/power_features.h +++ b/3rdparty/zlib-ng/arch/power/power_features.h @@ -4,8 +4,8 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#ifndef POWER_H_ -#define POWER_H_ +#ifndef POWER_FEATURES_H_ +#define POWER_FEATURES_H_ struct power_cpu_features { int has_altivec; @@ -15,4 +15,4 @@ struct power_cpu_features { void Z_INTERNAL power_check_features(struct power_cpu_features *features); -#endif /* POWER_H_ */ +#endif /* POWER_FEATURES_H_ */ diff --git a/3rdparty/zlib-ng/arch/power/power_functions.h b/3rdparty/zlib-ng/arch/power/power_functions.h new file mode 100644 index 0000000000..cb6b7650ec --- /dev/null +++ b/3rdparty/zlib-ng/arch/power/power_functions.h @@ -0,0 +1,67 @@ +/* power_functions.h -- POWER implementations for arch-specific functions. + * Copyright (C) 2020 Matheus Castanho , IBM + * Copyright (C) 2021 Mika T. Lindqvist + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef POWER_FUNCTIONS_H_ +#define POWER_FUNCTIONS_H_ + +#ifdef PPC_VMX +uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len); +void slide_hash_vmx(deflate_state *s); +#endif + +#ifdef POWER8_VSX +uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t chunksize_power8(void); +uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left); +uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len); +void slide_hash_power8(deflate_state *s); +void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start); +#endif + +#ifdef POWER9 +uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1); +uint32_t longest_match_power9(deflate_state *const s, Pos cur_match); +uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match); +#endif + + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +// Power - VMX +# if defined(PPC_VMX) && defined(__ALTIVEC__) +# undef native_adler32 +# define native_adler32 adler32_vmx +# undef native_slide_hash +# define native_slide_hash slide_hash_vmx +# endif +// Power8 - VSX +# if defined(POWER8_VSX) && defined(_ARCH_PWR8) && defined(__VSX__) +# undef native_adler32 +# define native_adler32 adler32_power8 +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_power8 +# undef native_chunksize +# define native_chunksize chunksize_power8 +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_power8 +# undef native_slide_hash +# define native_slide_hash slide_hash_power8 +# endif +# if defined(POWER8_VSX_CRC32) && defined(_ARCH_PWR8) && defined(__VSX__) +# undef native_crc32 +# define native_crc32 crc32_power8 +# endif +// Power9 +# if defined(POWER9) && defined(_ARCH_PWR9) +# undef native_compare256 +# define native_compare256 compare256_power9 +# undef native_longest_match +# define native_longest_match longest_match_power9 +# undef native_longest_match_slow +# define native_longest_match_slow longest_match_slow_power9 +# endif +#endif + +#endif /* POWER_FUNCTIONS_H_ */ diff --git a/3rdparty/zlib-ng/arch/riscv/adler32_rvv.c b/3rdparty/zlib-ng/arch/riscv/adler32_rvv.c index da46f37e73..d0f9aaa567 100644 --- a/3rdparty/zlib-ng/arch/riscv/adler32_rvv.c +++ b/3rdparty/zlib-ng/arch/riscv/adler32_rvv.c @@ -9,8 +9,8 @@ #include #include -#include "../../zbuild.h" -#include "../../adler32_p.h" +#include "zbuild.h" +#include "adler32_p.h" static inline uint32_t adler32_rvv_impl(uint32_t adler, uint8_t* restrict dst, const uint8_t *src, size_t len, int COPY) { /* split Adler-32 into component sums */ diff --git a/3rdparty/zlib-ng/arch/riscv/compare256_rvv.c b/3rdparty/zlib-ng/arch/riscv/compare256_rvv.c index 0fd6082c44..3d6c3e3aa5 100644 --- a/3rdparty/zlib-ng/arch/riscv/compare256_rvv.c +++ b/3rdparty/zlib-ng/arch/riscv/compare256_rvv.c @@ -6,7 +6,9 @@ #ifdef RISCV_RVV -#include "../../zbuild.h" +#include "zbuild.h" +#include "zutil_p.h" +#include "deflate.h" #include "fallback_builtins.h" #include diff --git a/3rdparty/zlib-ng/arch/riscv/riscv_features.c b/3rdparty/zlib-ng/arch/riscv/riscv_features.c index b066f427e0..1e3f45e0a7 100644 --- a/3rdparty/zlib-ng/arch/riscv/riscv_features.c +++ b/3rdparty/zlib-ng/arch/riscv/riscv_features.c @@ -1,10 +1,13 @@ #include #include #include -#include #include -#include "../../zbuild.h" +#if defined(__linux__) && defined(HAVE_SYS_AUXV_H) +# include +#endif + +#include "zbuild.h" #include "riscv_features.h" #define ISA_V_HWCAP (1 << ('v' - 'a')) @@ -33,7 +36,11 @@ void Z_INTERNAL riscv_check_features_compile_time(struct riscv_cpu_features *fea } void Z_INTERNAL riscv_check_features_runtime(struct riscv_cpu_features *features) { +#if defined(__linux__) && defined(HAVE_SYS_AUXV_H) unsigned long hw_cap = getauxval(AT_HWCAP); +#else + unsigned long hw_cap = 0; +#endif features->has_rvv = hw_cap & ISA_V_HWCAP; } diff --git a/3rdparty/zlib-ng/arch/riscv/riscv_features.h b/3rdparty/zlib-ng/arch/riscv/riscv_features.h index c76e967c36..b1593acc25 100644 --- a/3rdparty/zlib-ng/arch/riscv/riscv_features.h +++ b/3rdparty/zlib-ng/arch/riscv/riscv_features.h @@ -6,8 +6,8 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#ifndef RISCV_H_ -#define RISCV_H_ +#ifndef RISCV_FEATURES_H_ +#define RISCV_FEATURES_H_ struct riscv_cpu_features { int has_rvv; @@ -15,4 +15,4 @@ struct riscv_cpu_features { void Z_INTERNAL riscv_check_features(struct riscv_cpu_features *features); -#endif /* RISCV_H_ */ +#endif /* RISCV_FEATURES_H_ */ diff --git a/3rdparty/zlib-ng/arch/riscv/riscv_functions.h b/3rdparty/zlib-ng/arch/riscv/riscv_functions.h new file mode 100644 index 0000000000..015b2fbd75 --- /dev/null +++ b/3rdparty/zlib-ng/arch/riscv/riscv_functions.h @@ -0,0 +1,49 @@ +/* riscv_functions.h -- RISCV implementations for arch-specific functions. + * + * Copyright (C) 2023 SiFive, Inc. All rights reserved. + * Contributed by Alex Chiang + * + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef RISCV_FUNCTIONS_H_ +#define RISCV_FUNCTIONS_H_ + +#ifdef RISCV_RVV +uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +uint32_t chunksize_rvv(void); +uint8_t* chunkmemset_safe_rvv(uint8_t *out, unsigned dist, unsigned len, unsigned left); +uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1); + +uint32_t longest_match_rvv(deflate_state *const s, Pos cur_match); +uint32_t longest_match_slow_rvv(deflate_state *const s, Pos cur_match); +void slide_hash_rvv(deflate_state *s); +void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start); +#endif + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +// RISCV - RVV +# if defined(RISCV_RVV) && defined(__riscv_v) && defined(__linux__) +# undef native_adler32 +# define native_adler32 adler32_rvv +# undef native_adler32_fold_copy +# define native_adler32_fold_copy adler32_fold_copy_rvv +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_rvv +# undef native_chunksize +# define native_chunksize chunksize_rvv +# undef native_compare256 +# define native_compare256 compare256_rvv +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_rvv +# undef native_longest_match +# define native_longest_match longest_match_rvv +# undef native_longest_match_slow +# define native_longest_match_slow longest_match_slow_rvv +# undef native_slide_hash +# define native_slide_hash slide_hash_rvv +# endif +#endif + +#endif /* RISCV_FUNCTIONS_H_ */ diff --git a/3rdparty/zlib-ng/arch/riscv/slide_hash_rvv.c b/3rdparty/zlib-ng/arch/riscv/slide_hash_rvv.c index 1164e89ba2..ac28bbd9f2 100644 --- a/3rdparty/zlib-ng/arch/riscv/slide_hash_rvv.c +++ b/3rdparty/zlib-ng/arch/riscv/slide_hash_rvv.c @@ -8,18 +8,16 @@ #include -#include "../../zbuild.h" -#include "../../deflate.h" +#include "zbuild.h" +#include "deflate.h" static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize) { size_t vl; while (entries > 0) { vl = __riscv_vsetvl_e16m4(entries); vuint16m4_t v_tab = __riscv_vle16_v_u16m4(table, vl); - vuint16m4_t v_diff = __riscv_vsub_vx_u16m4(v_tab, wsize, vl); - vbool4_t mask = __riscv_vmsltu_vx_u16m4_b4(v_tab, wsize, vl); - v_tab = __riscv_vmerge_vxm_u16m4(v_diff, 0, mask, vl); - __riscv_vse16_v_u16m4(table, v_tab, vl); + vuint16m4_t v_diff = __riscv_vssubu_vx_u16m4(v_tab, wsize, vl); + __riscv_vse16_v_u16m4(table, v_diff, vl); table += vl, entries -= vl; } } diff --git a/3rdparty/zlib-ng/arch/s390/Makefile.in b/3rdparty/zlib-ng/arch/s390/Makefile.in new file mode 100644 index 0000000000..e994157df2 --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/Makefile.in @@ -0,0 +1,48 @@ +# Makefile for zlib-ng +# Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler +# For conditions of distribution and use, see copyright notice in zlib.h + +CC= +CFLAGS= +SFLAGS= +INCLUDES= +SUFFIX= +VGFMAFLAG= +NOLTOFLAG= + +SRCDIR=. +SRCTOP=../.. +TOPDIR=$(SRCTOP) + +s390_features.o: + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/s390_features.c + +s390_features.lo: + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/s390_features.c + +dfltcc_deflate.o: + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_deflate.c + +dfltcc_deflate.lo: + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_deflate.c + +dfltcc_inflate.o: + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_inflate.c + +dfltcc_inflate.lo: + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/dfltcc_inflate.c + +crc32-vx.o: + $(CC) $(CFLAGS) $(VGFMAFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32-vx.c + +crc32-vx.lo: + $(CC) $(SFLAGS) $(VGFMAFLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32-vx.c + +mostlyclean: clean +clean: + rm -f *.o *.lo *~ + rm -rf objs + rm -f *.gcda *.gcno *.gcov + +distclean: clean + rm -f Makefile diff --git a/3rdparty/zlib-ng/arch/s390/README.md b/3rdparty/zlib-ng/arch/s390/README.md new file mode 100644 index 0000000000..7b383cc998 --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/README.md @@ -0,0 +1,277 @@ +# Introduction + +This directory contains SystemZ deflate hardware acceleration support. +It can be enabled using the following build commands: + + $ ./configure --with-dfltcc-deflate --with-dfltcc-inflate + $ make + +or + + $ cmake -DWITH_DFLTCC_DEFLATE=1 -DWITH_DFLTCC_INFLATE=1 . + $ make + +When built like this, zlib-ng would compress using hardware on level 1, +and using software on all other levels. Decompression will always happen +in hardware. In order to enable hardware compression for levels 1-6 +(i.e. to make it used by default) one could add +`-DDFLTCC_LEVEL_MASK=0x7e` to CFLAGS when building zlib-ng. + +SystemZ deflate hardware acceleration is available on [IBM z15]( +https://www.ibm.com/products/z15) and newer machines under the name [ +"Integrated Accelerator for zEnterprise Data Compression"]( +https://www.ibm.com/support/z-content-solutions/compression/). The +programming interface to it is a machine instruction called DEFLATE +CONVERSION CALL (DFLTCC). It is documented in Chapter 26 of [Principles +of Operation](https://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf). Both +the code and the rest of this document refer to this feature simply as +"DFLTCC". + +# Performance + +Performance figures are published [here]( +https://github.com/iii-i/zlib-ng/wiki/Performance-with-dfltcc-patch-applied-and-dfltcc-support-built-on-dfltcc-enabled-machine +). The compression speed-up can be as high as 110x and the decompression +speed-up can be as high as 15x. + +# Limitations + +Two DFLTCC compression calls with identical inputs are not guaranteed to +produce identical outputs. Therefore care should be taken when using +hardware compression when reproducible results are desired. In +particular, zlib-ng-specific `zng_deflateSetParams` call allows setting +`Z_DEFLATE_REPRODUCIBLE` parameter, which disables DFLTCC support for a +particular stream. + +DFLTCC does not support every single zlib-ng feature, in particular: + +* `inflate(Z_BLOCK)` and `inflate(Z_TREES)` +* `inflateMark()` +* `inflatePrime()` +* `inflateSyncPoint()` + +When used, these functions will either switch to software, or, in case +this is not possible, gracefully fail. + +# Code structure + +All SystemZ-specific code lives in `arch/s390` directory and is +integrated with the rest of zlib-ng using hook macros. + +## Hook macros + +DFLTCC takes as arguments a parameter block, an input buffer, an output +buffer, and a window. Parameter blocks are stored alongside zlib states; +buffers are forwarded from the caller; and window - which must be +4k-aligned and is always 64k large, is managed using the `PAD_WINDOW()`, +`WINDOW_PAD_SIZE`, `HINT_ALIGNED_WINDOW` and `DEFLATE_ADJUST_WINDOW_SIZE()` +and `INFLATE_ADJUST_WINDOW_SIZE()` hooks. + +Software and hardware window formats do not match, therefore, +`deflateSetDictionary()`, `deflateGetDictionary()`, `inflateSetDictionary()` +and `inflateGetDictionary()` need special handling, which is triggered using +`DEFLATE_SET_DICTIONARY_HOOK()`, `DEFLATE_GET_DICTIONARY_HOOK()`, +`INFLATE_SET_DICTIONARY_HOOK()` and `INFLATE_GET_DICTIONARY_HOOK()` macros. + +`deflateResetKeep()` and `inflateResetKeep()` update the DFLTCC +parameter block using `DEFLATE_RESET_KEEP_HOOK()` and +`INFLATE_RESET_KEEP_HOOK()` macros. + +`INFLATE_PRIME_HOOK()`, `INFLATE_MARK_HOOK()` and +`INFLATE_SYNC_POINT_HOOK()` macros make the respective unsupported +calls gracefully fail. + +`DEFLATE_PARAMS_HOOK()` implements switching between hardware and +software compression mid-stream using `deflateParams()`. Switching +normally entails flushing the current block, which might not be possible +in low memory situations. `deflateParams()` uses `DEFLATE_DONE()` hook +in order to detect and gracefully handle such situations. + +The algorithm implemented in hardware has different compression ratio +than the one implemented in software. `DEFLATE_BOUND_ADJUST_COMPLEN()` +and `DEFLATE_NEED_CONSERVATIVE_BOUND()` macros make `deflateBound()` +return the correct results for the hardware implementation. + +Actual compression and decompression are handled by `DEFLATE_HOOK()` and +`INFLATE_TYPEDO_HOOK()` macros. Since inflation with DFLTCC manages the +window on its own, calling `updatewindow()` is suppressed using +`INFLATE_NEED_UPDATEWINDOW()` macro. + +In addition to compression, DFLTCC computes CRC-32 and Adler-32 +checksums, therefore, whenever it's used, software checksumming is +suppressed using `DEFLATE_NEED_CHECKSUM()` and `INFLATE_NEED_CHECKSUM()` +macros. + +While software always produces reproducible compression results, this +is not the case for DFLTCC. Therefore, zlib-ng users are given the +ability to specify whether or not reproducible compression results +are required. While it is always possible to specify this setting +before the compression begins, it is not always possible to do so in +the middle of a deflate stream - the exact conditions for that are +determined by `DEFLATE_CAN_SET_REPRODUCIBLE()` macro. + +## SystemZ-specific code + +When zlib-ng is built with DFLTCC, the hooks described above are +converted to calls to functions, which are implemented in +`arch/s390/dfltcc_*` files. The functions can be grouped in three broad +categories: + +* Base DFLTCC support, e.g. wrapping the machine instruction - `dfltcc()`. +* Translating between software and hardware data formats, e.g. + `dfltcc_deflate_set_dictionary()`. +* Translating between software and hardware state machines, e.g. + `dfltcc_deflate()` and `dfltcc_inflate()`. + +The functions from the first two categories are fairly simple, however, +various quirks in both software and hardware state machines make the +functions from the third category quite complicated. + +### `dfltcc_deflate()` function + +This function is called by `deflate()` and has the following +responsibilities: + +* Checking whether DFLTCC can be used with the current stream. If this + is not the case, then it returns `0`, making `deflate()` use some + other function in order to compress in software. Otherwise it returns + `1`. +* Block management and Huffman table generation. DFLTCC ends blocks only + when explicitly instructed to do so by the software. Furthermore, + whether to use fixed or dynamic Huffman tables must also be determined + by the software. Since looking at data in order to gather statistics + would negate performance benefits, the following approach is used: the + first `DFLTCC_FIRST_FHT_BLOCK_SIZE` bytes are placed into a fixed + block, and every next `DFLTCC_BLOCK_SIZE` bytes are placed into + dynamic blocks. +* Writing EOBS. Block Closing Control bit in the parameter block + instructs DFLTCC to write EOBS, however, certain conditions need to be + met: input data length must be non-zero or Continuation Flag must be + set. To put this in simpler terms, DFLTCC will silently refuse to + write EOBS if this is the only thing that it is asked to do. Since the + code has to be able to emit EOBS in software anyway, in order to avoid + tricky corner cases Block Closing Control is never used. Whether to + write EOBS is instead controlled by `soft_bcc` variable. +* Triggering block post-processing. Depending on flush mode, `deflate()` + must perform various additional actions when a block or a stream ends. + `dfltcc_deflate()` informs `deflate()` about this using + `block_state *result` parameter. +* Converting software state fields into hardware parameter block fields, + and vice versa. For example, `wrap` and Check Value Type or `bi_valid` + and Sub-Byte Boundary. Certain fields cannot be translated and must + persist untouched in the parameter block between calls, for example, + Continuation Flag or Continuation State Buffer. +* Handling flush modes and low-memory situations. These aspects are + quite intertwined and pervasive. The general idea here is that the + code must not do anything in software - whether explicitly by e.g. + calling `send_eobs()`, or implicitly - by returning to `deflate()` + with certain return and `*result` values, when Continuation Flag is + set. +* Ending streams. When a new block is started and flush mode is + `Z_FINISH`, Block Header Final parameter block bit is used to mark + this block as final. However, sometimes an empty final block is + needed, and, unfortunately, just like with EOBS, DFLTCC will silently + refuse to do this. The general idea of DFLTCC implementation is to + rely as much as possible on the existing code. Here in order to do + this, the code pretends that it does not support DFLTCC, which makes + `deflate()` call a software compression function, which writes an + empty final block. Whether this is required is controlled by + `need_empty_block` variable. +* Error handling. This is simply converting + Operation-Ending-Supplemental Code to string. Errors can only happen + due to things like memory corruption, and therefore they don't affect + the `deflate()` return code. + +### `dfltcc_inflate()` function + +This function is called by `inflate()` from the `TYPEDO` state (that is, +when all the metadata is parsed and the stream is positioned at the type +bits of deflate block header) and it's responsible for the following: + +* Falling back to software when flush mode is `Z_BLOCK` or `Z_TREES`. + Unfortunately, there is no way to ask DFLTCC to stop decompressing on + block or tree boundary. +* `inflate()` decompression loop management. This is controlled using + the return value, which can be either `DFLTCC_INFLATE_BREAK` or + `DFLTCC_INFLATE_CONTINUE`. +* Converting software state fields into hardware parameter block fields, + and vice versa. For example, `whave` and History Length or `wnext` and + History Offset. +* Ending streams. This instructs `inflate()` to return `Z_STREAM_END` + and is controlled by `last` state field. +* Error handling. Like deflate, error handling comprises + Operation-Ending-Supplemental Code to string conversion. Unlike + deflate, errors may happen due to bad inputs, therefore they are + propagated to `inflate()` by setting `mode` field to `MEM` or `BAD`. + +# Testing + +Given complexity of DFLTCC machine instruction, it is not clear whether +QEMU TCG will ever support it. At the time of writing, one has to have +access to an IBM z15+ VM or LPAR in order to test DFLTCC support. Since +DFLTCC is a non-privileged instruction, neither special VM/LPAR +configuration nor root are required. + +zlib-ng CI uses an IBM-provided z15 self-hosted builder for the DFLTCC +testing. There is no official IBM Z GitHub Actions runner, so we build +one inspired by `anup-kodlekere/gaplib`. +Future updates to actions-runner might need an updated patch. The .net +version number patch has been separated into a separate file to avoid a +need for constantly changing the patch. + +## Configuring the builder. + +### Install prerequisites. + +``` +sudo dnf install podman +``` + +### Add actions-runner service. + +``` +sudo cp self-hosted-builder/actions-runner.service /etc/systemd/system/ +sudo systemctl daemon-reload +``` + +### Create a config file, needs github personal access token. + +``` +# Create file /etc/actions-runner +repo=/ +access_token= +``` + +Access token should have the repo scope, consult +https://docs.github.com/en/rest/reference/actions#create-a-registration-token-for-a-repository +for details. + +### Autostart actions-runner. + +``` +$ sudo systemctl enable --now actions-runner +``` + +## Rebuilding the container + +In order to update the `gaplib-actions-runner` podman container, e.g. to get the +latest OS security fixes, follow these steps: +``` +# Stop actions-runner service +sudo systemctl stop actions-runner + +# Delete old container +sudo podman container rm gaplib-actions-runner + +# Delete old image +sudo podman image rm localhost/zlib-ng/actions-runner + +# Build image +sudo podman build --squash -f Dockerfile.zlib-ng --tag zlib-ng/actions-runner --build-arg . + +# Build container +sudo podman create --name=gaplib-actions-runner --env-file=/etc/actions-runner --init --interactive --volume=actions-runner-temp:/home/actions-runner zlib-ng/actions-runner + +# Start actions-runner service +sudo systemctl start actions-runner +``` diff --git a/3rdparty/zlib-ng/arch/s390/crc32-vx.c b/3rdparty/zlib-ng/arch/s390/crc32-vx.c new file mode 100644 index 0000000000..b3dcbf7030 --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/crc32-vx.c @@ -0,0 +1,222 @@ +/* + * Hardware-accelerated CRC-32 variants for Linux on z Systems + * + * Use the z/Architecture Vector Extension Facility to accelerate the + * computing of bitreflected CRC-32 checksums. + * + * This CRC-32 implementation algorithm is bitreflected and processes + * the least-significant bit first (Little-Endian). + * + * This code was originally written by Hendrik Brueckner + * for use in the Linux kernel and has been + * relicensed under the zlib license. + */ + +#include "zbuild.h" +#include "arch_functions.h" + +#include + +typedef unsigned char uv16qi __attribute__((vector_size(16))); +typedef unsigned int uv4si __attribute__((vector_size(16))); +typedef unsigned long long uv2di __attribute__((vector_size(16))); + +static uint32_t crc32_le_vgfm_16(uint32_t crc, const uint8_t *buf, size_t len) { + /* + * The CRC-32 constant block contains reduction constants to fold and + * process particular chunks of the input data stream in parallel. + * + * For the CRC-32 variants, the constants are precomputed according to + * these definitions: + * + * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1 + * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1 + * R3 = [(x128+32 mod P'(x) << 32)]' << 1 + * R4 = [(x128-32 mod P'(x) << 32)]' << 1 + * R5 = [(x64 mod P'(x) << 32)]' << 1 + * R6 = [(x32 mod P'(x) << 32)]' << 1 + * + * The bitreflected Barret reduction constant, u', is defined as + * the bit reversal of floor(x**64 / P(x)). + * + * where P(x) is the polynomial in the normal domain and the P'(x) is the + * polynomial in the reversed (bitreflected) domain. + * + * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: + * + * P(x) = 0x04C11DB7 + * P'(x) = 0xEDB88320 + */ + const uv16qi perm_le2be = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; /* BE->LE mask */ + const uv2di r2r1 = {0x1C6E41596, 0x154442BD4}; /* R2, R1 */ + const uv2di r4r3 = {0x0CCAA009E, 0x1751997D0}; /* R4, R3 */ + const uv2di r5 = {0, 0x163CD6124}; /* R5 */ + const uv2di ru_poly = {0, 0x1F7011641}; /* u' */ + const uv2di crc_poly = {0, 0x1DB710641}; /* P'(x) << 1 */ + + /* + * Load the initial CRC value. + * + * The CRC value is loaded into the rightmost word of the + * vector register and is later XORed with the LSB portion + * of the loaded input data. + */ + uv2di v0 = {0, 0}; + v0 = (uv2di)vec_insert(crc, (uv4si)v0, 3); + + /* Load a 64-byte data chunk and XOR with CRC */ + uv2di v1 = vec_perm(((uv2di *)buf)[0], ((uv2di *)buf)[0], perm_le2be); + uv2di v2 = vec_perm(((uv2di *)buf)[1], ((uv2di *)buf)[1], perm_le2be); + uv2di v3 = vec_perm(((uv2di *)buf)[2], ((uv2di *)buf)[2], perm_le2be); + uv2di v4 = vec_perm(((uv2di *)buf)[3], ((uv2di *)buf)[3], perm_le2be); + + v1 ^= v0; + buf += 64; + len -= 64; + + while (len >= 64) { + /* Load the next 64-byte data chunk */ + uv16qi part1 = vec_perm(((uv16qi *)buf)[0], ((uv16qi *)buf)[0], perm_le2be); + uv16qi part2 = vec_perm(((uv16qi *)buf)[1], ((uv16qi *)buf)[1], perm_le2be); + uv16qi part3 = vec_perm(((uv16qi *)buf)[2], ((uv16qi *)buf)[2], perm_le2be); + uv16qi part4 = vec_perm(((uv16qi *)buf)[3], ((uv16qi *)buf)[3], perm_le2be); + + /* + * Perform a GF(2) multiplication of the doublewords in V1 with + * the R1 and R2 reduction constants in V0. The intermediate result + * is then folded (accumulated) with the next data chunk in PART1 and + * stored in V1. Repeat this step for the register contents + * in V2, V3, and V4 respectively. + */ + v1 = (uv2di)vec_gfmsum_accum_128(r2r1, v1, part1); + v2 = (uv2di)vec_gfmsum_accum_128(r2r1, v2, part2); + v3 = (uv2di)vec_gfmsum_accum_128(r2r1, v3, part3); + v4 = (uv2di)vec_gfmsum_accum_128(r2r1, v4, part4); + + buf += 64; + len -= 64; + } + + /* + * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3 + * and R4 and accumulating the next 128-bit chunk until a single 128-bit + * value remains. + */ + v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); + v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v3); + v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v4); + + while (len >= 16) { + /* Load next data chunk */ + v2 = vec_perm(*(uv2di *)buf, *(uv2di *)buf, perm_le2be); + + /* Fold next data chunk */ + v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); + + buf += 16; + len -= 16; + } + + /* + * Set up a vector register for byte shifts. The shift value must + * be loaded in bits 1-4 in byte element 7 of a vector register. + * Shift by 8 bytes: 0x40 + * Shift by 4 bytes: 0x20 + */ + uv16qi v9 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + v9 = vec_insert((unsigned char)0x40, v9, 7); + + /* + * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes + * to move R4 into the rightmost doubleword and set the leftmost + * doubleword to 0x1. + */ + v0 = vec_srb(r4r3, (uv2di)v9); + v0[0] = 1; + + /* + * Compute GF(2) product of V1 and V0. The rightmost doubleword + * of V1 is multiplied with R4. The leftmost doubleword of V1 is + * multiplied by 0x1 and is then XORed with rightmost product. + * Implicitly, the intermediate leftmost product becomes padded + */ + v1 = (uv2di)vec_gfmsum_128(v0, v1); + + /* + * Now do the final 32-bit fold by multiplying the rightmost word + * in V1 with R5 and XOR the result with the remaining bits in V1. + * + * To achieve this by a single VGFMAG, right shift V1 by a word + * and store the result in V2 which is then accumulated. Use the + * vector unpack instruction to load the rightmost half of the + * doubleword into the rightmost doubleword element of V1; the other + * half is loaded in the leftmost doubleword. + * The vector register with CONST_R5 contains the R5 constant in the + * rightmost doubleword and the leftmost doubleword is zero to ignore + * the leftmost product of V1. + */ + v9 = vec_insert((unsigned char)0x20, v9, 7); + v2 = vec_srb(v1, (uv2di)v9); + v1 = vec_unpackl((uv4si)v1); /* Split rightmost doubleword */ + v1 = (uv2di)vec_gfmsum_accum_128(r5, v1, (uv16qi)v2); + + /* + * Apply a Barret reduction to compute the final 32-bit CRC value. + * + * The input values to the Barret reduction are the degree-63 polynomial + * in V1 (R(x)), degree-32 generator polynomial, and the reduction + * constant u. The Barret reduction result is the CRC value of R(x) mod + * P(x). + * + * The Barret reduction algorithm is defined as: + * + * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u + * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) + * 3. C(x) = R(x) XOR T2(x) mod x^32 + * + * Note: The leftmost doubleword of vector register containing + * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product + * is zero and does not contribute to the final result. + */ + + /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ + v2 = vec_unpackl((uv4si)v1); + v2 = (uv2di)vec_gfmsum_128(ru_poly, v2); + + /* + * Compute the GF(2) product of the CRC polynomial with T1(x) in + * V2 and XOR the intermediate result, T2(x), with the value in V1. + * The final result is stored in word element 2 of V2. + */ + v2 = vec_unpackl((uv4si)v2); + v2 = (uv2di)vec_gfmsum_accum_128(crc_poly, v2, (uv16qi)v1); + + return ((uv4si)v2)[2]; +} + +#define VX_MIN_LEN 64 +#define VX_ALIGNMENT 16L +#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) + +uint32_t Z_INTERNAL crc32_s390_vx(uint32_t crc, const unsigned char *buf, size_t len) { + size_t prealign, aligned, remaining; + + if (len < VX_MIN_LEN + VX_ALIGN_MASK) + return PREFIX(crc32_braid)(crc, buf, len); + + if ((uintptr_t)buf & VX_ALIGN_MASK) { + prealign = VX_ALIGNMENT - ((uintptr_t)buf & VX_ALIGN_MASK); + len -= prealign; + crc = PREFIX(crc32_braid)(crc, buf, prealign); + buf += prealign; + } + aligned = len & ~VX_ALIGN_MASK; + remaining = len & VX_ALIGN_MASK; + + crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, aligned) ^ 0xffffffff; + + if (remaining) + crc = PREFIX(crc32_braid)(crc, buf + aligned, remaining); + + return crc; +} diff --git a/3rdparty/zlib-ng/arch/s390/dfltcc_common.h b/3rdparty/zlib-ng/arch/s390/dfltcc_common.h new file mode 100644 index 0000000000..a6527ab5df --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/dfltcc_common.h @@ -0,0 +1,119 @@ +#ifndef DFLTCC_COMMON_H +#define DFLTCC_COMMON_H + +#include "zutil.h" + +/* + Parameter Block for Query Available Functions. + */ +struct dfltcc_qaf_param { + char fns[16]; + char reserved1[8]; + char fmts[2]; + char reserved2[6]; +} ALIGNED_(8); + +/* + Parameter Block for Generate Dynamic-Huffman Table, Compress and Expand. + */ +struct dfltcc_param_v0 { + uint16_t pbvn; /* Parameter-Block-Version Number */ + uint8_t mvn; /* Model-Version Number */ + uint8_t ribm; /* Reserved for IBM use */ + uint32_t reserved32 : 31; + uint32_t cf : 1; /* Continuation Flag */ + uint8_t reserved64[8]; + uint32_t nt : 1; /* New Task */ + uint32_t reserved129 : 1; + uint32_t cvt : 1; /* Check Value Type */ + uint32_t reserved131 : 1; + uint32_t htt : 1; /* Huffman-Table Type */ + uint32_t bcf : 1; /* Block-Continuation Flag */ + uint32_t bcc : 1; /* Block Closing Control */ + uint32_t bhf : 1; /* Block Header Final */ + uint32_t reserved136 : 1; + uint32_t reserved137 : 1; + uint32_t dhtgc : 1; /* DHT Generation Control */ + uint32_t reserved139 : 5; + uint32_t reserved144 : 5; + uint32_t sbb : 3; /* Sub-Byte Boundary */ + uint8_t oesc; /* Operation-Ending-Supplemental Code */ + uint32_t reserved160 : 12; + uint32_t ifs : 4; /* Incomplete-Function Status */ + uint16_t ifl; /* Incomplete-Function Length */ + uint8_t reserved192[8]; + uint8_t reserved256[8]; + uint8_t reserved320[4]; + uint16_t hl; /* History Length */ + uint32_t reserved368 : 1; + uint16_t ho : 15; /* History Offset */ + uint32_t cv; /* Check Value */ + uint32_t eobs : 15; /* End-of-block Symbol */ + uint32_t reserved431: 1; + uint8_t eobl : 4; /* End-of-block Length */ + uint32_t reserved436 : 12; + uint32_t reserved448 : 4; + uint16_t cdhtl : 12; /* Compressed-Dynamic-Huffman Table + Length */ + uint8_t reserved464[6]; + uint8_t cdht[288]; /* Compressed-Dynamic-Huffman Table */ + uint8_t reserved[24]; + uint8_t ribm2[8]; /* Reserved for IBM use */ + uint8_t csb[1152]; /* Continuation-State Buffer */ +} ALIGNED_(8); + +/* + Extension of inflate_state and deflate_state. + */ +struct dfltcc_state { + struct dfltcc_param_v0 param; /* Parameter block. */ + struct dfltcc_qaf_param af; /* Available functions. */ + char msg[64]; /* Buffer for strm->msg */ +}; + +typedef struct { + struct dfltcc_state common; + uint16_t level_mask; /* Levels on which to use DFLTCC */ + uint32_t block_size; /* New block each X bytes */ + size_t block_threshold; /* New block after total_in > X */ + uint32_t dht_threshold; /* New block only if avail_in >= X */ +} arch_deflate_state; + +typedef struct { + struct dfltcc_state common; +} arch_inflate_state; + +/* + History buffer size. + */ +#define HB_BITS 15 +#define HB_SIZE (1 << HB_BITS) + +/* + Sizes of deflate block parts. + */ +#define DFLTCC_BLOCK_HEADER_BITS 3 +#define DFLTCC_HLITS_COUNT_BITS 5 +#define DFLTCC_HDISTS_COUNT_BITS 5 +#define DFLTCC_HCLENS_COUNT_BITS 4 +#define DFLTCC_MAX_HCLENS 19 +#define DFLTCC_HCLEN_BITS 3 +#define DFLTCC_MAX_HLITS 286 +#define DFLTCC_MAX_HDISTS 30 +#define DFLTCC_MAX_HLIT_HDIST_BITS 7 +#define DFLTCC_MAX_SYMBOL_BITS 16 +#define DFLTCC_MAX_EOBS_BITS 15 +#define DFLTCC_MAX_PADDING_BITS 7 + +#define DEFLATE_BOUND_COMPLEN(source_len) \ + ((DFLTCC_BLOCK_HEADER_BITS + \ + DFLTCC_HLITS_COUNT_BITS + \ + DFLTCC_HDISTS_COUNT_BITS + \ + DFLTCC_HCLENS_COUNT_BITS + \ + DFLTCC_MAX_HCLENS * DFLTCC_HCLEN_BITS + \ + (DFLTCC_MAX_HLITS + DFLTCC_MAX_HDISTS) * DFLTCC_MAX_HLIT_HDIST_BITS + \ + (source_len) * DFLTCC_MAX_SYMBOL_BITS + \ + DFLTCC_MAX_EOBS_BITS + \ + DFLTCC_MAX_PADDING_BITS) >> 3) + +#endif diff --git a/3rdparty/zlib-ng/arch/s390/dfltcc_deflate.c b/3rdparty/zlib-ng/arch/s390/dfltcc_deflate.c new file mode 100644 index 0000000000..90b4b96e9c --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/dfltcc_deflate.c @@ -0,0 +1,383 @@ +/* dfltcc_deflate.c - IBM Z DEFLATE CONVERSION CALL compression support. */ + +/* + Use the following commands to build zlib-ng with DFLTCC compression support: + + $ ./configure --with-dfltcc-deflate + or + + $ cmake -DWITH_DFLTCC_DEFLATE=1 . + + and then + + $ make +*/ + +#include "zbuild.h" +#include "deflate.h" +#include "trees_emit.h" +#include "dfltcc_deflate.h" +#include "dfltcc_detail.h" + +void Z_INTERNAL PREFIX(dfltcc_reset_deflate_state)(PREFIX3(streamp) strm) { + deflate_state *state = (deflate_state *)strm->state; + arch_deflate_state *dfltcc_state = &state->arch; + + dfltcc_reset_state(&dfltcc_state->common); + + /* Initialize tuning parameters */ + dfltcc_state->level_mask = DFLTCC_LEVEL_MASK; + dfltcc_state->block_size = DFLTCC_BLOCK_SIZE; + dfltcc_state->block_threshold = DFLTCC_FIRST_FHT_BLOCK_SIZE; + dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE; +} + +static inline int dfltcc_can_deflate_with_params(PREFIX3(streamp) strm, int level, uInt window_bits, int strategy, + int reproducible) { + deflate_state *state = (deflate_state *)strm->state; + arch_deflate_state *dfltcc_state = &state->arch; + + /* Unsupported compression settings */ + if ((dfltcc_state->level_mask & (1 << level)) == 0) + return 0; + if (window_bits != HB_BITS) + return 0; + if (strategy != Z_FIXED && strategy != Z_DEFAULT_STRATEGY) + return 0; + if (reproducible) + return 0; + + /* Unsupported hardware */ + if (!is_bit_set(dfltcc_state->common.af.fns, DFLTCC_GDHT) || + !is_bit_set(dfltcc_state->common.af.fns, DFLTCC_CMPR) || + !is_bit_set(dfltcc_state->common.af.fmts, DFLTCC_FMT0)) + return 0; + + return 1; +} + +int Z_INTERNAL PREFIX(dfltcc_can_deflate)(PREFIX3(streamp) strm) { + deflate_state *state = (deflate_state *)strm->state; + + return dfltcc_can_deflate_with_params(strm, state->level, state->w_bits, state->strategy, state->reproducible); +} + +static inline void dfltcc_gdht(PREFIX3(streamp) strm) { + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_param_v0 *param = &state->arch.common.param; + size_t avail_in = strm->avail_in; + + dfltcc(DFLTCC_GDHT, param, NULL, NULL, &strm->next_in, &avail_in, NULL); +} + +static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm) { + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_param_v0 *param = &state->arch.common.param; + size_t avail_in = strm->avail_in; + size_t avail_out = strm->avail_out; + dfltcc_cc cc; + + cc = dfltcc(DFLTCC_CMPR | HBT_CIRCULAR, + param, &strm->next_out, &avail_out, + &strm->next_in, &avail_in, state->window); + strm->total_in += (strm->avail_in - avail_in); + strm->total_out += (strm->avail_out - avail_out); + strm->avail_in = avail_in; + strm->avail_out = avail_out; + return cc; +} + +static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 *param) { + deflate_state *state = (deflate_state *)strm->state; + + send_bits(state, PREFIX(bi_reverse)(param->eobs >> (15 - param->eobl), param->eobl), param->eobl, state->bi_buf, state->bi_valid); + PREFIX(flush_pending)(strm); + if (state->pending != 0) { + /* The remaining data is located in pending_out[0:pending]. If someone + * calls put_byte() - this might happen in deflate() - the byte will be + * placed into pending_buf[pending], which is incorrect. Move the + * remaining data to the beginning of pending_buf so that put_byte() is + * usable again. + */ + memmove(state->pending_buf, state->pending_out, state->pending); + state->pending_out = state->pending_buf; + } +#ifdef ZLIB_DEBUG + state->compressed_len += param->eobl; +#endif +} + +int Z_INTERNAL PREFIX(dfltcc_deflate)(PREFIX3(streamp) strm, int flush, block_state *result) { + deflate_state *state = (deflate_state *)strm->state; + arch_deflate_state *dfltcc_state = &state->arch; + struct dfltcc_param_v0 *param = &dfltcc_state->common.param; + uInt masked_avail_in; + dfltcc_cc cc; + int need_empty_block; + int soft_bcc; + int no_flush; + + if (!PREFIX(dfltcc_can_deflate)(strm)) { + /* Clear history. */ + if (flush == Z_FULL_FLUSH) + param->hl = 0; + return 0; + } + +again: + masked_avail_in = 0; + soft_bcc = 0; + no_flush = flush == Z_NO_FLUSH; + + /* No input data. Return, except when Continuation Flag is set, which means + * that DFLTCC has buffered some output in the parameter block and needs to + * be called again in order to flush it. + */ + if (strm->avail_in == 0 && !param->cf) { + /* A block is still open, and the hardware does not support closing + * blocks without adding data. Thus, close it manually. + */ + if (!no_flush && param->bcf) { + send_eobs(strm, param); + param->bcf = 0; + } + /* Let one of deflate_* functions write a trailing empty block. */ + if (flush == Z_FINISH) + return 0; + /* Clear history. */ + if (flush == Z_FULL_FLUSH) + param->hl = 0; + /* Trigger block post-processing if necessary. */ + *result = no_flush ? need_more : block_done; + return 1; + } + + /* There is an open non-BFINAL block, we are not going to close it just + * yet, we have compressed more than DFLTCC_BLOCK_SIZE bytes and we see + * more than DFLTCC_DHT_MIN_SAMPLE_SIZE bytes. Open a new block with a new + * DHT in order to adapt to a possibly changed input data distribution. + */ + if (param->bcf && no_flush && + strm->total_in > dfltcc_state->block_threshold && + strm->avail_in >= dfltcc_state->dht_threshold) { + if (param->cf) { + /* We need to flush the DFLTCC buffer before writing the + * End-of-block Symbol. Mask the input data and proceed as usual. + */ + masked_avail_in += strm->avail_in; + strm->avail_in = 0; + no_flush = 0; + } else { + /* DFLTCC buffer is empty, so we can manually write the + * End-of-block Symbol right away. + */ + send_eobs(strm, param); + param->bcf = 0; + dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size; + } + } + + /* No space for compressed data. If we proceed, dfltcc_cmpr() will return + * DFLTCC_CC_OP1_TOO_SHORT without buffering header bits, but we will still + * set BCF=1, which is wrong. Avoid complications and return early. + */ + if (strm->avail_out == 0) { + *result = need_more; + return 1; + } + + /* The caller gave us too much data. Pass only one block worth of + * uncompressed data to DFLTCC and mask the rest, so that on the next + * iteration we start a new block. + */ + if (no_flush && strm->avail_in > dfltcc_state->block_size) { + masked_avail_in += (strm->avail_in - dfltcc_state->block_size); + strm->avail_in = dfltcc_state->block_size; + } + + /* When we have an open non-BFINAL deflate block and caller indicates that + * the stream is ending, we need to close an open deflate block and open a + * BFINAL one. + */ + need_empty_block = flush == Z_FINISH && param->bcf && !param->bhf; + + /* Translate stream to parameter block */ + param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32; + if (!no_flush) + /* We need to close a block. Always do this in software - when there is + * no input data, the hardware will not honor BCC. */ + soft_bcc = 1; + if (flush == Z_FINISH && !param->bcf) + /* We are about to open a BFINAL block, set Block Header Final bit + * until the stream ends. + */ + param->bhf = 1; + /* DFLTCC-CMPR will write to next_out, so make sure that buffers with + * higher precedence are empty. + */ + Assert(state->pending == 0, "There must be no pending bytes"); + Assert(state->bi_valid < 8, "There must be less than 8 pending bits"); + param->sbb = (unsigned int)state->bi_valid; + if (param->sbb > 0) + *strm->next_out = (unsigned char)state->bi_buf; + /* Honor history and check value */ + param->nt = 0; + if (state->wrap == 1) + param->cv = strm->adler; + else if (state->wrap == 2) + param->cv = ZSWAP32(state->crc_fold.value); + + /* When opening a block, choose a Huffman-Table Type */ + if (!param->bcf) { + if (state->strategy == Z_FIXED || (strm->total_in == 0 && dfltcc_state->block_threshold > 0)) + param->htt = HTT_FIXED; + else { + param->htt = HTT_DYNAMIC; + dfltcc_gdht(strm); + } + } + + /* Deflate */ + do { + cc = dfltcc_cmpr(strm); + if (strm->avail_in < 4096 && masked_avail_in > 0) + /* We are about to call DFLTCC with a small input buffer, which is + * inefficient. Since there is masked data, there will be at least + * one more DFLTCC call, so skip the current one and make the next + * one handle more data. + */ + break; + } while (cc == DFLTCC_CC_AGAIN); + + /* Translate parameter block to stream */ + strm->msg = oesc_msg(dfltcc_state->common.msg, param->oesc); + state->bi_valid = param->sbb; + if (state->bi_valid == 0) + state->bi_buf = 0; /* Avoid accessing next_out */ + else + state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1); + if (state->wrap == 1) + strm->adler = param->cv; + else if (state->wrap == 2) + state->crc_fold.value = ZSWAP32(param->cv); + + /* Unmask the input data */ + strm->avail_in += masked_avail_in; + masked_avail_in = 0; + + /* If we encounter an error, it means there is a bug in DFLTCC call */ + Assert(cc != DFLTCC_CC_OP2_CORRUPT || param->oesc == 0, "BUG"); + + /* Update Block-Continuation Flag. It will be used to check whether to call + * GDHT the next time. + */ + if (cc == DFLTCC_CC_OK) { + if (soft_bcc) { + send_eobs(strm, param); + param->bcf = 0; + dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size; + } else + param->bcf = 1; + if (flush == Z_FINISH) { + if (need_empty_block) + /* Make the current deflate() call also close the stream */ + return 0; + else { + bi_windup(state); + *result = finish_done; + } + } else { + if (flush == Z_FULL_FLUSH) + param->hl = 0; /* Clear history */ + *result = flush == Z_NO_FLUSH ? need_more : block_done; + } + } else { + param->bcf = 1; + *result = need_more; + } + if (strm->avail_in != 0 && strm->avail_out != 0) + goto again; /* deflate() must use all input or all output */ + return 1; +} + +/* + Switching between hardware and software compression. + + DFLTCC does not support all zlib settings, e.g. generation of non-compressed + blocks or alternative window sizes. When such settings are applied on the + fly with deflateParams, we need to convert between hardware and software + window formats. +*/ +static int dfltcc_was_deflate_used(PREFIX3(streamp) strm) { + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_param_v0 *param = &state->arch.common.param; + + return strm->total_in > 0 || param->nt == 0 || param->hl > 0; +} + +int Z_INTERNAL PREFIX(dfltcc_deflate_params)(PREFIX3(streamp) strm, int level, int strategy, int *flush) { + deflate_state *state = (deflate_state *)strm->state; + int could_deflate = PREFIX(dfltcc_can_deflate)(strm); + int can_deflate = dfltcc_can_deflate_with_params(strm, level, state->w_bits, strategy, state->reproducible); + + if (can_deflate == could_deflate) + /* We continue to work in the same mode - no changes needed */ + return Z_OK; + + if (!dfltcc_was_deflate_used(strm)) + /* DFLTCC was not used yet - no changes needed */ + return Z_OK; + + /* For now, do not convert between window formats - simply get rid of the old data instead */ + *flush = Z_FULL_FLUSH; + return Z_OK; +} + +int Z_INTERNAL PREFIX(dfltcc_deflate_done)(PREFIX3(streamp) strm, int flush) { + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_param_v0 *param = &state->arch.common.param; + + /* When deflate(Z_FULL_FLUSH) is called with small avail_out, it might + * close the block without resetting the compression state. Detect this + * situation and return that deflation is not done. + */ + if (flush == Z_FULL_FLUSH && strm->avail_out == 0) + return 0; + + /* Return that deflation is not done if DFLTCC is used and either it + * buffered some data (Continuation Flag is set), or has not written EOBS + * yet (Block-Continuation Flag is set). + */ + return !PREFIX(dfltcc_can_deflate)(strm) || (!param->cf && !param->bcf); +} + +int Z_INTERNAL PREFIX(dfltcc_can_set_reproducible)(PREFIX3(streamp) strm, int reproducible) { + deflate_state *state = (deflate_state *)strm->state; + + return reproducible != state->reproducible && !dfltcc_was_deflate_used(strm); +} + +/* + Preloading history. +*/ +int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm, + const unsigned char *dictionary, uInt dict_length) { + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_param_v0 *param = &state->arch.common.param; + + append_history(param, state->window, dictionary, dict_length); + state->strstart = 1; /* Add FDICT to zlib header */ + state->block_start = state->strstart; /* Make deflate_stored happy */ + return Z_OK; +} + +int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) { + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_param_v0 *param = &state->arch.common.param; + + if (dictionary) + get_history(param, state->window, dictionary); + if (dict_length) + *dict_length = param->hl; + return Z_OK; +} diff --git a/3rdparty/zlib-ng/arch/s390/dfltcc_deflate.h b/3rdparty/zlib-ng/arch/s390/dfltcc_deflate.h new file mode 100644 index 0000000000..35e2fd3f62 --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/dfltcc_deflate.h @@ -0,0 +1,58 @@ +#ifndef DFLTCC_DEFLATE_H +#define DFLTCC_DEFLATE_H + +#include "deflate.h" +#include "dfltcc_common.h" + +void Z_INTERNAL PREFIX(dfltcc_reset_deflate_state)(PREFIX3(streamp)); +int Z_INTERNAL PREFIX(dfltcc_can_deflate)(PREFIX3(streamp) strm); +int Z_INTERNAL PREFIX(dfltcc_deflate)(PREFIX3(streamp) strm, int flush, block_state *result); +int Z_INTERNAL PREFIX(dfltcc_deflate_params)(PREFIX3(streamp) strm, int level, int strategy, int *flush); +int Z_INTERNAL PREFIX(dfltcc_deflate_done)(PREFIX3(streamp) strm, int flush); +int Z_INTERNAL PREFIX(dfltcc_can_set_reproducible)(PREFIX3(streamp) strm, int reproducible); +int Z_INTERNAL PREFIX(dfltcc_deflate_set_dictionary)(PREFIX3(streamp) strm, + const unsigned char *dictionary, uInt dict_length); +int Z_INTERNAL PREFIX(dfltcc_deflate_get_dictionary)(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length); + +#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (PREFIX(dfltcc_can_deflate)((strm))) \ + return PREFIX(dfltcc_deflate_set_dictionary)((strm), (dict), (dict_len)); \ + } while (0) + +#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (PREFIX(dfltcc_can_deflate)((strm))) \ + return PREFIX(dfltcc_deflate_get_dictionary)((strm), (dict), (dict_len)); \ + } while (0) + +#define DEFLATE_RESET_KEEP_HOOK PREFIX(dfltcc_reset_deflate_state) + +#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) \ + do { \ + int err; \ +\ + err = PREFIX(dfltcc_deflate_params)((strm), (level), (strategy), (hook_flush)); \ + if (err == Z_STREAM_ERROR) \ + return err; \ + } while (0) + +#define DEFLATE_DONE PREFIX(dfltcc_deflate_done) + +#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \ + do { \ + if (deflateStateCheck((strm)) || PREFIX(dfltcc_can_deflate)((strm))) \ + (complen) = DEFLATE_BOUND_COMPLEN(source_len); \ + } while (0) + +#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (PREFIX(dfltcc_can_deflate)((strm))) + +#define DEFLATE_HOOK PREFIX(dfltcc_deflate) + +#define DEFLATE_NEED_CHECKSUM(strm) (!PREFIX(dfltcc_can_deflate)((strm))) + +#define DEFLATE_CAN_SET_REPRODUCIBLE PREFIX(dfltcc_can_set_reproducible) + +#define DEFLATE_ADJUST_WINDOW_SIZE(n) MAX(n, HB_SIZE) + +#endif diff --git a/3rdparty/zlib-ng/arch/s390/dfltcc_detail.h b/3rdparty/zlib-ng/arch/s390/dfltcc_detail.h new file mode 100644 index 0000000000..ae6001ba38 --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/dfltcc_detail.h @@ -0,0 +1,275 @@ +#include "zbuild.h" +#include + +#ifdef HAVE_SYS_SDT_H +#include +#endif + +/* + Tuning parameters. + */ +#ifndef DFLTCC_LEVEL_MASK +#define DFLTCC_LEVEL_MASK 0x2 +#endif +#ifndef DFLTCC_BLOCK_SIZE +#define DFLTCC_BLOCK_SIZE 1048576 +#endif +#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE +#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096 +#endif +#ifndef DFLTCC_DHT_MIN_SAMPLE_SIZE +#define DFLTCC_DHT_MIN_SAMPLE_SIZE 4096 +#endif +#ifndef DFLTCC_RIBM +#define DFLTCC_RIBM 0 +#endif + +#define static_assert(c, msg) __attribute__((unused)) static char static_assert_failed_ ## msg[c ? 1 : -1] + +#define DFLTCC_SIZEOF_QAF 32 +static_assert(sizeof(struct dfltcc_qaf_param) == DFLTCC_SIZEOF_QAF, qaf); + +static inline int is_bit_set(const char *bits, int n) { + return bits[n / 8] & (1 << (7 - (n % 8))); +} + +static inline void clear_bit(char *bits, int n) { + bits[n / 8] &= ~(1 << (7 - (n % 8))); +} + +#define DFLTCC_FACILITY 151 + +static inline int is_dfltcc_enabled(void) { + uint64_t facilities[(DFLTCC_FACILITY / 64) + 1]; + Z_REGISTER uint8_t r0 __asm__("r0"); + + memset(facilities, 0, sizeof(facilities)); + r0 = sizeof(facilities) / sizeof(facilities[0]) - 1; + /* STFLE is supported since z9-109 and only in z/Architecture mode. When + * compiling with -m31, gcc defaults to ESA mode, however, since the kernel + * is 64-bit, it's always z/Architecture mode at runtime. + */ + __asm__ volatile( +#ifndef __clang__ + ".machinemode push\n" + ".machinemode zarch\n" +#endif + "stfle %[facilities]\n" +#ifndef __clang__ + ".machinemode pop\n" +#endif + : [facilities] "=Q" (facilities), [r0] "+r" (r0) :: "cc"); + return is_bit_set((const char *)facilities, DFLTCC_FACILITY); +} + +#define DFLTCC_FMT0 0 + +#define CVT_CRC32 0 +#define CVT_ADLER32 1 +#define HTT_FIXED 0 +#define HTT_DYNAMIC 1 + +#define DFLTCC_SIZEOF_GDHT_V0 384 +#define DFLTCC_SIZEOF_CMPR_XPND_V0 1536 +static_assert(offsetof(struct dfltcc_param_v0, csb) == DFLTCC_SIZEOF_GDHT_V0, gdht_v0); +static_assert(sizeof(struct dfltcc_param_v0) == DFLTCC_SIZEOF_CMPR_XPND_V0, cmpr_xpnd_v0); + +static inline z_const char *oesc_msg(char *buf, int oesc) { + if (oesc == 0x00) + return NULL; /* Successful completion */ + else { + sprintf(buf, "Operation-Ending-Supplemental Code is 0x%.2X", oesc); + return buf; + } +} + +/* + C wrapper for the DEFLATE CONVERSION CALL instruction. + */ +typedef enum { + DFLTCC_CC_OK = 0, + DFLTCC_CC_OP1_TOO_SHORT = 1, + DFLTCC_CC_OP2_TOO_SHORT = 2, + DFLTCC_CC_OP2_CORRUPT = 2, + DFLTCC_CC_AGAIN = 3, +} dfltcc_cc; + +#define DFLTCC_QAF 0 +#define DFLTCC_GDHT 1 +#define DFLTCC_CMPR 2 +#define DFLTCC_XPND 4 +#define HBT_CIRCULAR (1 << 7) +#define DFLTCC_FN_MASK ((1 << 7) - 1) + +/* Return lengths of high (starting at param->ho) and low (starting at 0) fragments of the circular history buffer. */ +static inline void get_history_lengths(struct dfltcc_param_v0 *param, size_t *hl_high, size_t *hl_low) { + *hl_high = MIN(param->hl, HB_SIZE - param->ho); + *hl_low = param->hl - *hl_high; +} + +/* Notify instrumentation about an upcoming read/write access to the circular history buffer. */ +static inline void instrument_read_write_hist(struct dfltcc_param_v0 *param, void *hist) { + size_t hl_high, hl_low; + + get_history_lengths(param, &hl_high, &hl_low); + instrument_read_write(hist + param->ho, hl_high); + instrument_read_write(hist, hl_low); +} + +/* Notify MSan about a completed write to the circular history buffer. */ +static inline void msan_unpoison_hist(struct dfltcc_param_v0 *param, void *hist) { + size_t hl_high, hl_low; + + get_history_lengths(param, &hl_high, &hl_low); + __msan_unpoison(hist + param->ho, hl_high); + __msan_unpoison(hist, hl_low); +} + +static inline dfltcc_cc dfltcc(int fn, void *param, + unsigned char **op1, size_t *len1, + z_const unsigned char **op2, size_t *len2, void *hist) { + unsigned char *t2 = op1 ? *op1 : NULL; + unsigned char *orig_t2 = t2; + size_t t3 = len1 ? *len1 : 0; + z_const unsigned char *t4 = op2 ? *op2 : NULL; + size_t t5 = len2 ? *len2 : 0; + Z_REGISTER int r0 __asm__("r0"); + Z_REGISTER void *r1 __asm__("r1"); + Z_REGISTER unsigned char *r2 __asm__("r2"); + Z_REGISTER size_t r3 __asm__("r3"); + Z_REGISTER z_const unsigned char *r4 __asm__("r4"); + Z_REGISTER size_t r5 __asm__("r5"); + int cc; + + /* Insert pre-instrumentation for DFLTCC. */ + switch (fn & DFLTCC_FN_MASK) { + case DFLTCC_QAF: + instrument_write(param, DFLTCC_SIZEOF_QAF); + break; + case DFLTCC_GDHT: + instrument_read_write(param, DFLTCC_SIZEOF_GDHT_V0); + instrument_read(t4, t5); + break; + case DFLTCC_CMPR: + case DFLTCC_XPND: + instrument_read_write(param, DFLTCC_SIZEOF_CMPR_XPND_V0); + instrument_read(t4, t5); + instrument_write(t2, t3); + instrument_read_write_hist(param, hist); + break; + } + + r0 = fn; r1 = param; r2 = t2; r3 = t3; r4 = t4; r5 = t5; + __asm__ volatile( +#ifdef HAVE_SYS_SDT_H + STAP_PROBE_ASM(zlib, dfltcc_entry, STAP_PROBE_ASM_TEMPLATE(5)) +#endif + ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" +#ifdef HAVE_SYS_SDT_H + STAP_PROBE_ASM(zlib, dfltcc_exit, STAP_PROBE_ASM_TEMPLATE(5)) +#endif + "ipm %[cc]\n" + : [r2] "+r" (r2) + , [r3] "+r" (r3) + , [r4] "+r" (r4) + , [r5] "+r" (r5) + , [cc] "=r" (cc) + : [r0] "r" (r0) + , [r1] "r" (r1) + , [hist] "r" (hist) +#ifdef HAVE_SYS_SDT_H + , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist) +#endif + : "cc", "memory"); + t2 = r2; t3 = r3; t4 = r4; t5 = r5; + + /* Insert post-instrumentation for DFLTCC. */ + switch (fn & DFLTCC_FN_MASK) { + case DFLTCC_QAF: + __msan_unpoison(param, DFLTCC_SIZEOF_QAF); + break; + case DFLTCC_GDHT: + __msan_unpoison(param, DFLTCC_SIZEOF_GDHT_V0); + break; + case DFLTCC_CMPR: + __msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0); + __msan_unpoison(orig_t2, t2 - orig_t2 + (((struct dfltcc_param_v0 *)param)->sbb == 0 ? 0 : 1)); + msan_unpoison_hist(param, hist); + break; + case DFLTCC_XPND: + __msan_unpoison(param, DFLTCC_SIZEOF_CMPR_XPND_V0); + __msan_unpoison(orig_t2, t2 - orig_t2); + msan_unpoison_hist(param, hist); + break; + } + + if (op1) + *op1 = t2; + if (len1) + *len1 = t3; + if (op2) + *op2 = t4; + if (len2) + *len2 = t5; + return (cc >> 28) & 3; +} + +#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) + +static inline void dfltcc_reset_state(struct dfltcc_state *dfltcc_state) { + /* Initialize available functions */ + if (is_dfltcc_enabled()) { + dfltcc(DFLTCC_QAF, &dfltcc_state->param, NULL, NULL, NULL, NULL, NULL); + memmove(&dfltcc_state->af, &dfltcc_state->param, sizeof(dfltcc_state->af)); + } else + memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); + + /* Initialize parameter block */ + memset(&dfltcc_state->param, 0, sizeof(dfltcc_state->param)); + dfltcc_state->param.nt = 1; + dfltcc_state->param.ribm = DFLTCC_RIBM; +} + +static inline void dfltcc_copy_state(void *dst, const void *src, uInt size, uInt extension_size) { + memcpy(dst, src, ALIGN_UP(size, 8) + extension_size); +} + +static inline void append_history(struct dfltcc_param_v0 *param, unsigned char *history, + const unsigned char *buf, uInt count) { + size_t offset; + size_t n; + + /* Do not use more than 32K */ + if (count > HB_SIZE) { + buf += count - HB_SIZE; + count = HB_SIZE; + } + offset = (param->ho + param->hl) % HB_SIZE; + if (offset + count <= HB_SIZE) + /* Circular history buffer does not wrap - copy one chunk */ + memcpy(history + offset, buf, count); + else { + /* Circular history buffer wraps - copy two chunks */ + n = HB_SIZE - offset; + memcpy(history + offset, buf, n); + memcpy(history, buf + n, count - n); + } + n = param->hl + count; + if (n <= HB_SIZE) + /* All history fits into buffer - no need to discard anything */ + param->hl = n; + else { + /* History does not fit into buffer - discard extra bytes */ + param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE; + param->hl = HB_SIZE; + } +} + +static inline void get_history(struct dfltcc_param_v0 *param, const unsigned char *history, + unsigned char *buf) { + size_t hl_high, hl_low; + + get_history_lengths(param, &hl_high, &hl_low); + memcpy(buf, history + param->ho, hl_high); + memcpy(buf + hl_high, history, hl_low); +} diff --git a/3rdparty/zlib-ng/arch/s390/dfltcc_inflate.c b/3rdparty/zlib-ng/arch/s390/dfltcc_inflate.c new file mode 100644 index 0000000000..cc3cb39781 --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/dfltcc_inflate.c @@ -0,0 +1,191 @@ +/* dfltcc_inflate.c - IBM Z DEFLATE CONVERSION CALL decompression support. */ + +/* + Use the following commands to build zlib-ng with DFLTCC decompression support: + + $ ./configure --with-dfltcc-inflate + or + + $ cmake -DWITH_DFLTCC_INFLATE=1 . + + and then + + $ make +*/ + +#include "zbuild.h" +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "dfltcc_inflate.h" +#include "dfltcc_detail.h" + +void Z_INTERNAL PREFIX(dfltcc_reset_inflate_state)(PREFIX3(streamp) strm) { + struct inflate_state *state = (struct inflate_state *)strm->state; + + dfltcc_reset_state(&state->arch.common); +} + +int Z_INTERNAL PREFIX(dfltcc_can_inflate)(PREFIX3(streamp) strm) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = &state->arch.common; + + /* Unsupported hardware */ + return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0); +} + +static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_param_v0 *param = &state->arch.common.param; + size_t avail_in = strm->avail_in; + size_t avail_out = strm->avail_out; + dfltcc_cc cc; + + cc = dfltcc(DFLTCC_XPND | HBT_CIRCULAR, + param, &strm->next_out, &avail_out, + &strm->next_in, &avail_in, state->window); + strm->avail_in = avail_in; + strm->avail_out = avail_out; + return cc; +} + +dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, int flush, int *ret) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = &state->arch.common; + struct dfltcc_param_v0 *param = &dfltcc_state->param; + dfltcc_cc cc; + + if (flush == Z_BLOCK || flush == Z_TREES) { + /* DFLTCC does not support stopping on block boundaries */ + if (PREFIX(dfltcc_inflate_disable)(strm)) { + *ret = Z_STREAM_ERROR; + return DFLTCC_INFLATE_BREAK; + } else + return DFLTCC_INFLATE_SOFTWARE; + } + + if (state->last) { + if (state->bits != 0) { + strm->next_in++; + strm->avail_in--; + state->bits = 0; + } + state->mode = CHECK; + return DFLTCC_INFLATE_CONTINUE; + } + + if (strm->avail_in == 0 && !param->cf) + return DFLTCC_INFLATE_BREAK; + + /* if window not in use yet, initialize */ + if (state->wsize == 0) + state->wsize = 1U << state->wbits; + + /* Translate stream to parameter block */ + param->cvt = ((state->wrap & 4) && state->flags) ? CVT_CRC32 : CVT_ADLER32; + param->sbb = state->bits; + if (param->hl) + param->nt = 0; /* Honor history for the first block */ + if (state->wrap & 4) + param->cv = state->flags ? ZSWAP32(state->check) : state->check; + + /* Inflate */ + do { + cc = dfltcc_xpnd(strm); + } while (cc == DFLTCC_CC_AGAIN); + + /* Translate parameter block to stream */ + strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); + state->last = cc == DFLTCC_CC_OK; + state->bits = param->sbb; + if (state->wrap & 4) + strm->adler = state->check = state->flags ? ZSWAP32(param->cv) : param->cv; + if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { + /* Report an error if stream is corrupted */ + state->mode = BAD; + return DFLTCC_INFLATE_CONTINUE; + } + state->mode = TYPEDO; + /* Break if operands are exhausted, otherwise continue looping */ + return (cc == DFLTCC_CC_OP1_TOO_SHORT || cc == DFLTCC_CC_OP2_TOO_SHORT) ? + DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE; +} + +int Z_INTERNAL PREFIX(dfltcc_was_inflate_used)(PREFIX3(streamp) strm) { + struct inflate_state *state = (struct inflate_state *)strm->state; + + return !state->arch.common.param.nt; +} + +/* + Rotates a circular buffer. + The implementation is based on https://cplusplus.com/reference/algorithm/rotate/ + */ +static void rotate(unsigned char *start, unsigned char *pivot, unsigned char *end) { + unsigned char *p = pivot; + unsigned char tmp; + + while (p != start) { + tmp = *start; + *start = *p; + *p = tmp; + + start++; + p++; + + if (p == end) + p = pivot; + else if (start == pivot) + pivot = p; + } +} + +int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = &state->arch.common; + struct dfltcc_param_v0 *param = &dfltcc_state->param; + + if (!PREFIX(dfltcc_can_inflate)(strm)) + return 0; + if (PREFIX(dfltcc_was_inflate_used)(strm)) + /* DFLTCC has already decompressed some data. Since there is not + * enough information to resume decompression in software, the call + * must fail. + */ + return 1; + /* DFLTCC was not used yet - decompress in software */ + memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); + /* Convert the window from the hardware to the software format */ + rotate(state->window, state->window + param->ho, state->window + HB_SIZE); + state->whave = state->wnext = MIN(param->hl, state->wsize); + return 0; +} + +/* + Preloading history. +*/ +int Z_INTERNAL PREFIX(dfltcc_inflate_set_dictionary)(PREFIX3(streamp) strm, + const unsigned char *dictionary, uInt dict_length) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_param_v0 *param = &state->arch.common.param; + + /* if window not in use yet, initialize */ + if (state->wsize == 0) + state->wsize = 1U << state->wbits; + + append_history(param, state->window, dictionary, dict_length); + state->havedict = 1; + return Z_OK; +} + +int Z_INTERNAL PREFIX(dfltcc_inflate_get_dictionary)(PREFIX3(streamp) strm, + unsigned char *dictionary, uInt *dict_length) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_param_v0 *param = &state->arch.common.param; + + if (dictionary && state->window) + get_history(param, state->window, dictionary); + if (dict_length) + *dict_length = param->hl; + return Z_OK; +} diff --git a/3rdparty/zlib-ng/arch/s390/dfltcc_inflate.h b/3rdparty/zlib-ng/arch/s390/dfltcc_inflate.h new file mode 100644 index 0000000000..3623f8ed7f --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/dfltcc_inflate.h @@ -0,0 +1,67 @@ +#ifndef DFLTCC_INFLATE_H +#define DFLTCC_INFLATE_H + +#include "dfltcc_common.h" + +void Z_INTERNAL PREFIX(dfltcc_reset_inflate_state)(PREFIX3(streamp) strm); +int Z_INTERNAL PREFIX(dfltcc_can_inflate)(PREFIX3(streamp) strm); +typedef enum { + DFLTCC_INFLATE_CONTINUE, + DFLTCC_INFLATE_BREAK, + DFLTCC_INFLATE_SOFTWARE, +} dfltcc_inflate_action; +dfltcc_inflate_action Z_INTERNAL PREFIX(dfltcc_inflate)(PREFIX3(streamp) strm, int flush, int *ret); +int Z_INTERNAL PREFIX(dfltcc_was_inflate_used)(PREFIX3(streamp) strm); +int Z_INTERNAL PREFIX(dfltcc_inflate_disable)(PREFIX3(streamp) strm); +int Z_INTERNAL PREFIX(dfltcc_inflate_set_dictionary)(PREFIX3(streamp) strm, + const unsigned char *dictionary, uInt dict_length); +int Z_INTERNAL PREFIX(dfltcc_inflate_get_dictionary)(PREFIX3(streamp) strm, + unsigned char *dictionary, uInt* dict_length); + +#define INFLATE_RESET_KEEP_HOOK PREFIX(dfltcc_reset_inflate_state) + +#define INFLATE_PRIME_HOOK(strm, bits, value) \ + do { if (PREFIX(dfltcc_inflate_disable)((strm))) return Z_STREAM_ERROR; } while (0) + +#define INFLATE_TYPEDO_HOOK(strm, flush) \ + if (PREFIX(dfltcc_can_inflate)((strm))) { \ + dfltcc_inflate_action action; \ +\ + RESTORE(); \ + action = PREFIX(dfltcc_inflate)((strm), (flush), &ret); \ + LOAD(); \ + if (action == DFLTCC_INFLATE_CONTINUE) \ + break; \ + else if (action == DFLTCC_INFLATE_BREAK) \ + goto inf_leave; \ + } + +#define INFLATE_NEED_CHECKSUM(strm) (!PREFIX(dfltcc_can_inflate)((strm))) + +#define INFLATE_NEED_UPDATEWINDOW(strm) (!PREFIX(dfltcc_can_inflate)((strm))) + +#define INFLATE_MARK_HOOK(strm) \ + do { \ + if (PREFIX(dfltcc_was_inflate_used)((strm))) return -(1L << 16); \ + } while (0) + +#define INFLATE_SYNC_POINT_HOOK(strm) \ + do { \ + if (PREFIX(dfltcc_was_inflate_used)((strm))) return Z_STREAM_ERROR; \ + } while (0) + +#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (PREFIX(dfltcc_can_inflate)((strm))) \ + return PREFIX(dfltcc_inflate_set_dictionary)((strm), (dict), (dict_len)); \ + } while (0) + +#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (PREFIX(dfltcc_can_inflate)((strm))) \ + return PREFIX(dfltcc_inflate_get_dictionary)((strm), (dict), (dict_len)); \ + } while (0) + +#define INFLATE_ADJUST_WINDOW_SIZE(n) MAX(n, HB_SIZE) + +#endif diff --git a/3rdparty/zlib-ng/arch/s390/s390_features.c b/3rdparty/zlib-ng/arch/s390/s390_features.c new file mode 100644 index 0000000000..629025d5bb --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/s390_features.c @@ -0,0 +1,14 @@ +#include "zbuild.h" +#include "s390_features.h" + +#ifdef HAVE_SYS_AUXV_H +# include +#endif + +#ifndef HWCAP_S390_VXRS +#define HWCAP_S390_VXRS HWCAP_S390_VX +#endif + +void Z_INTERNAL s390_check_features(struct s390_cpu_features *features) { + features->has_vx = getauxval(AT_HWCAP) & HWCAP_S390_VXRS; +} diff --git a/3rdparty/zlib-ng/arch/s390/s390_features.h b/3rdparty/zlib-ng/arch/s390/s390_features.h new file mode 100644 index 0000000000..fb2ac14b26 --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/s390_features.h @@ -0,0 +1,14 @@ +/* s390_features.h -- check for s390 features. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef S390_FEATURES_H_ +#define S390_FEATURES_H_ + +struct s390_cpu_features { + int has_vx; +}; + +void Z_INTERNAL s390_check_features(struct s390_cpu_features *features); + +#endif diff --git a/3rdparty/zlib-ng/arch/s390/s390_functions.h b/3rdparty/zlib-ng/arch/s390/s390_functions.h new file mode 100644 index 0000000000..e9c67978f0 --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/s390_functions.h @@ -0,0 +1,20 @@ +/* s390_functions.h -- s390 implementations for arch-specific functions. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef S390_FUNCTIONS_H_ +#define S390_FUNCTIONS_H_ + +#ifdef S390_CRC32_VX +uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len); +#endif + + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +# if defined(S390_CRC32_VX) && defined(__zarch__) && __ARCH__ >= 11 && defined(__VX__) +# undef native_crc32 +# define native_crc32 = crc32_s390_vx +# endif +#endif + +#endif diff --git a/3rdparty/zlib-ng/arch/s390/self-hosted-builder/actions-runner.Dockerfile b/3rdparty/zlib-ng/arch/s390/self-hosted-builder/actions-runner.Dockerfile new file mode 100644 index 0000000000..cf5c3e7271 --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/self-hosted-builder/actions-runner.Dockerfile @@ -0,0 +1,47 @@ +# Self-Hosted IBM Z Github Actions Runner. + +FROM almalinux:9 + +RUN dnf update -y -q && \ + dnf install -y -q --enablerepo=crb wget git which sudo jq \ + cmake make automake autoconf m4 libtool ninja-build python3-pip \ + gcc gcc-c++ clang llvm-toolset glibc-all-langpacks langpacks-en \ + glibc-static libstdc++-static libstdc++-devel libxslt-devel libxml2-devel + +RUN dnf install -y -q dotnet-sdk-6.0 && \ + echo "Using SDK - `dotnet --version`" + +COPY runner-s390x.patch /tmp/runner.patch +COPY runner-global.json /tmp/global.json + +RUN cd /tmp && \ + git clone -q https://github.com/actions/runner && \ + cd runner && \ + git checkout $(git describe --tags $(git rev-list --tags --max-count=1)) -b build && \ + git apply /tmp/runner.patch && \ + cp -f /tmp/global.json src/global.json + + +RUN cd /tmp/runner/src && \ + ./dev.sh layout && \ + ./dev.sh package && \ + rm -rf /root/.dotnet /root/.nuget + +RUN useradd -c "Action Runner" -m actions-runner && \ + usermod -L actions-runner + +RUN tar -xf /tmp/runner/_package/*.tar.gz -C /home/actions-runner && \ + chown -R actions-runner:actions-runner /home/actions-runner + +#VOLUME /home/actions-runner + +RUN rm -rf /tmp/runner /var/cache/dnf/* /tmp/runner.patch /tmp/global.json && \ + dnf clean all + +USER actions-runner + +# Scripts. +COPY fs/ / +WORKDIR /home/actions-runner +ENTRYPOINT ["/usr/bin/entrypoint"] +CMD ["/usr/bin/actions-runner"] diff --git a/3rdparty/zlib-ng/arch/s390/self-hosted-builder/actions-runner.service b/3rdparty/zlib-ng/arch/s390/self-hosted-builder/actions-runner.service new file mode 100644 index 0000000000..b6c20b65ec --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/self-hosted-builder/actions-runner.service @@ -0,0 +1,18 @@ +[Unit] +Description=Podman container: Gaplib Github Actions Runner +Wants=network-online.target +After=network-online.target +StartLimitIntervalSec=1 +RequiresMountsFor=/run/user/1001/containers + +[Service] +Environment=PODMAN_SYSTEMD_UNIT=%n +Restart=always +TimeoutStopSec=61 +ExecStart=/usr/bin/podman start gaplib-actions-runner +ExecStop=/usr/bin/podman stop -t 1 gaplib-actions-runner +ExecStopPost=/usr/bin/podman stop -t 1 gaplib-actions-runner +Type=forking + +[Install] +WantedBy=default.target diff --git a/3rdparty/zlib-ng/arch/s390/self-hosted-builder/runner-global.json b/3rdparty/zlib-ng/arch/s390/self-hosted-builder/runner-global.json new file mode 100644 index 0000000000..e7028fe0dd --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/self-hosted-builder/runner-global.json @@ -0,0 +1,5 @@ +{ + "sdk": { + "version": "6.0.421" + } +} diff --git a/3rdparty/zlib-ng/arch/s390/self-hosted-builder/runner-s390x.patch b/3rdparty/zlib-ng/arch/s390/self-hosted-builder/runner-s390x.patch new file mode 100644 index 0000000000..8260f3ccdd --- /dev/null +++ b/3rdparty/zlib-ng/arch/s390/self-hosted-builder/runner-s390x.patch @@ -0,0 +1,243 @@ +diff --git a/src/Directory.Build.props b/src/Directory.Build.props +index 9db5fac..f02e235 100644 +--- a/src/Directory.Build.props ++++ b/src/Directory.Build.props +@@ -44,6 +44,9 @@ + + $(DefineConstants);ARM64 + ++ ++ $(DefineConstants);S390X ++ + + + +diff --git a/src/Misc/externals.sh b/src/Misc/externals.sh +index 383221e..1555f67 100755 +--- a/src/Misc/externals.sh ++++ b/src/Misc/externals.sh +@@ -189,3 +189,8 @@ if [[ "$PACKAGERUNTIME" == "linux-arm" ]]; then + acquireExternalTool "$NODE_URL/v${NODE16_VERSION}/node-v${NODE16_VERSION}-linux-armv7l.tar.gz" node16 fix_nested_dir + acquireExternalTool "$NODE_URL/v${NODE20_VERSION}/node-v${NODE20_VERSION}-linux-armv7l.tar.gz" node20 fix_nested_dir + fi ++ ++if [[ "$PACKAGERUNTIME" == "linux-s390x" ]]; then ++ acquireExternalTool "$NODE_URL/v${NODE16_VERSION}/node-v${NODE16_VERSION}-linux-s390x.tar.gz" node16 fix_nested_dir ++ acquireExternalTool "$NODE_URL/v${NODE20_VERSION}/node-v${NODE20_VERSION}-linux-s390x.tar.gz" node20 fix_nested_dir ++fi +diff --git a/src/Misc/layoutroot/config.sh b/src/Misc/layoutroot/config.sh +index 14cc6ba..9b5b8e6 100755 +--- a/src/Misc/layoutroot/config.sh ++++ b/src/Misc/layoutroot/config.sh +@@ -20,25 +20,29 @@ then + + message="Execute sudo ./bin/installdependencies.sh to install any missing Dotnet Core 6.0 dependencies." + +- ldd ./bin/libcoreclr.so | grep 'not found' +- if [ $? -eq 0 ]; then +- echo "Dependencies is missing for Dotnet Core 6.0" +- echo $message +- exit 1 +- fi ++ ARCH=`uname -m` ++ if [ "${ARCH}" != "s390x" -a "${ARCH}" != "ppc64le" ] ++ then ++ ldd ./bin/libcoreclr.so | grep 'not found' ++ if [ $? -eq 0 ]; then ++ echo "Dependencies is missing for Dotnet Core 6.0" ++ echo $message ++ exit 1 ++ fi + +- ldd ./bin/libSystem.Security.Cryptography.Native.OpenSsl.so | grep 'not found' +- if [ $? -eq 0 ]; then +- echo "Dependencies is missing for Dotnet Core 6.0" +- echo $message +- exit 1 +- fi ++ ldd ./bin/libSystem.Security.Cryptography.Native.OpenSsl.so | grep 'not found' ++ if [ $? -eq 0 ]; then ++ echo "Dependencies is missing for Dotnet Core 6.0" ++ echo $message ++ exit 1 ++ fi + +- ldd ./bin/libSystem.IO.Compression.Native.so | grep 'not found' +- if [ $? -eq 0 ]; then +- echo "Dependencies is missing for Dotnet Core 6.0" +- echo $message +- exit 1 ++ ldd ./bin/libSystem.IO.Compression.Native.so | grep 'not found' ++ if [ $? -eq 0 ]; then ++ echo "Dependencies is missing for Dotnet Core 6.0" ++ echo $message ++ exit 1 ++ fi + fi + + if ! [ -x "$(command -v ldconfig)" ]; then +diff --git a/src/Runner.Common/Constants.cs b/src/Runner.Common/Constants.cs +index 177e3c9..9545981 100644 +--- a/src/Runner.Common/Constants.cs ++++ b/src/Runner.Common/Constants.cs +@@ -58,7 +58,8 @@ namespace GitHub.Runner.Common + X86, + X64, + Arm, +- Arm64 ++ Arm64, ++ S390x + } + + public static class Runner +@@ -81,6 +82,8 @@ namespace GitHub.Runner.Common + public static readonly Architecture PlatformArchitecture = Architecture.Arm; + #elif ARM64 + public static readonly Architecture PlatformArchitecture = Architecture.Arm64; ++#elif S390X ++ public static readonly Architecture PlatformArchitecture = Architecture.S390x; + #else + public static readonly Architecture PlatformArchitecture = Architecture.X64; + #endif +diff --git a/src/Runner.Common/Util/VarUtil.cs b/src/Runner.Common/Util/VarUtil.cs +index 97273a1..2a34430 100644 +--- a/src/Runner.Common/Util/VarUtil.cs ++++ b/src/Runner.Common/Util/VarUtil.cs +@@ -53,6 +53,8 @@ namespace GitHub.Runner.Common.Util + return "ARM"; + case Constants.Architecture.Arm64: + return "ARM64"; ++ case Constants.Architecture.S390x: ++ return "S390X"; + default: + throw new NotSupportedException(); // Should never reach here. + } +diff --git a/src/Test/L0/ConstantGenerationL0.cs b/src/Test/L0/ConstantGenerationL0.cs +index 2042485..a9d8b46 100644 +--- a/src/Test/L0/ConstantGenerationL0.cs ++++ b/src/Test/L0/ConstantGenerationL0.cs +@@ -20,6 +20,7 @@ namespace GitHub.Runner.Common.Tests + "linux-x64", + "linux-arm", + "linux-arm64", ++ "linux-s390x", + "osx-x64", + "osx-arm64" + }; +diff --git a/src/Test/L0/Listener/SelfUpdaterL0.cs b/src/Test/L0/Listener/SelfUpdaterL0.cs +index 26ba65e..6791df3 100644 +--- a/src/Test/L0/Listener/SelfUpdaterL0.cs ++++ b/src/Test/L0/Listener/SelfUpdaterL0.cs +@@ -1,4 +1,4 @@ +-#if !(OS_WINDOWS && ARM64) ++#if !(OS_WINDOWS && ARM64) && !S390X + using System; + using System.Collections.Generic; + using System.IO; +@@ -16,6 +16,7 @@ using Xunit; + + namespace GitHub.Runner.Common.Tests.Listener + { ++#if !S390X // Self-update is not currently supported on S390X + public sealed class SelfUpdaterL0 + { + private Mock _runnerServer; +@@ -291,5 +292,6 @@ namespace GitHub.Runner.Common.Tests.Listener + } + } + } ++#endif + } + #endif +diff --git a/src/Test/L0/Listener/SelfUpdaterV2L0.cs b/src/Test/L0/Listener/SelfUpdaterV2L0.cs +index 5115a6b..dd8d198 100644 +--- a/src/Test/L0/Listener/SelfUpdaterV2L0.cs ++++ b/src/Test/L0/Listener/SelfUpdaterV2L0.cs +@@ -1,4 +1,4 @@ +-#if !(OS_WINDOWS && ARM64) ++#if !(OS_WINDOWS && ARM64) && !S390X + using System; + using System.Collections.Generic; + using System.IO; +diff --git a/src/Test/L0/Worker/StepHostL0.cs b/src/Test/L0/Worker/StepHostL0.cs +index f6b5889..26f8e21 100644 +--- a/src/Test/L0/Worker/StepHostL0.cs ++++ b/src/Test/L0/Worker/StepHostL0.cs +@@ -31,7 +31,7 @@ namespace GitHub.Runner.Common.Tests.Worker + return hc; + } + +-#if OS_LINUX ++#if OS_LINUX && !S390X + [Fact] + [Trait("Level", "L0")] + [Trait("Category", "Worker")] +diff --git a/src/dev.sh b/src/dev.sh +index fa637d1..8c66f37 100755 +--- a/src/dev.sh ++++ b/src/dev.sh +@@ -54,6 +54,7 @@ elif [[ "$CURRENT_PLATFORM" == 'linux' ]]; then + case $CPU_NAME in + armv7l) RUNTIME_ID="linux-arm";; + aarch64) RUNTIME_ID="linux-arm64";; ++ s390x) RUNTIME_ID="linux-s390x";; + esac + fi + elif [[ "$CURRENT_PLATFORM" == 'darwin' ]]; then +@@ -80,7 +81,7 @@ if [[ "$CURRENT_PLATFORM" == 'windows' ]]; then + exit 1 + fi + elif [[ "$CURRENT_PLATFORM" == 'linux' ]]; then +- if [[ ("$RUNTIME_ID" != 'linux-x64') && ("$RUNTIME_ID" != 'linux-x86') && ("$RUNTIME_ID" != 'linux-arm64') && ("$RUNTIME_ID" != 'linux-arm') ]]; then ++ if [[ ("$RUNTIME_ID" != 'linux-x64') && ("$RUNTIME_ID" != 'linux-x86') && ("$RUNTIME_ID" != 'linux-arm64') && ("$RUNTIME_ID" != 'linux-arm') && ("$RUNTIME_ID" != 'linux-s390x') ]]; then + echo "Failed: Can't build $RUNTIME_ID package $CURRENT_PLATFORM" >&2 + exit 1 + fi +@@ -199,7 +200,8 @@ function package () + popd > /dev/null + } + +-if [[ (! -d "${DOTNETSDK_INSTALLDIR}") || (! -e "${DOTNETSDK_INSTALLDIR}/.${DOTNETSDK_VERSION}") || (! -e "${DOTNETSDK_INSTALLDIR}/dotnet") ]]; then ++if [[ "${RUNTIME_ID}" != "linux-s390x" && ((! -d "${DOTNETSDK_INSTALLDIR}") || (! -e "${DOTNETSDK_INSTALLDIR}/.${DOTNETSDK_VERSION}") || (! -e "${DOTNETSDK_INSTALLDIR}/dotnet")) ]]; then ++ + + # Download dotnet SDK to ../_dotnetsdk directory + heading "Ensure Dotnet SDK" +@@ -224,8 +226,10 @@ if [[ (! -d "${DOTNETSDK_INSTALLDIR}") || (! -e "${DOTNETSDK_INSTALLDIR}/.${DOTN + echo "${DOTNETSDK_VERSION}" > "${DOTNETSDK_INSTALLDIR}/.${DOTNETSDK_VERSION}" + fi + +-echo "Prepend ${DOTNETSDK_INSTALLDIR} to %PATH%" +-export PATH=${DOTNETSDK_INSTALLDIR}:$PATH ++if [[ -d "${DOTNETSDK_INSTALLDIR}" ]]; then ++ echo "Prepend ${DOTNETSDK_INSTALLDIR} to %PATH%" ++ export PATH=${DOTNETSDK_INSTALLDIR}:$PATH ++fi + + heading "Dotnet SDK Version" + dotnet --version +diff --git a/src/dir.proj b/src/dir.proj +index 056a312..8370922 100644 +--- a/src/dir.proj ++++ b/src/dir.proj +@@ -41,8 +41,18 @@ + + + +- +- ++ ++ ++ RuntimeIdentifier=$(PackageRuntime) ++ ++ SelfContained=false;CopyLocalRuntimeTargetAssets=false ++ ++ ++ ++ + + + diff --git a/3rdparty/zlib-ng/arch/x86/Makefile.in b/3rdparty/zlib-ng/arch/x86/Makefile.in index 7c052469b2..c13cd179c0 100644 --- a/3rdparty/zlib-ng/arch/x86/Makefile.in +++ b/3rdparty/zlib-ng/arch/x86/Makefile.in @@ -35,7 +35,6 @@ all: \ chunkset_ssse3.o chunkset_ssse3.lo \ compare256_avx2.o compare256_avx2.lo \ compare256_sse2.o compare256_sse2.lo \ - insert_string_sse42.o insert_string_sse42.lo \ crc32_pclmulqdq.o crc32_pclmulqdq.lo \ crc32_vpclmulqdq.o crc32_vpclmulqdq.lo \ slide_hash_avx2.o slide_hash_avx2.lo \ @@ -77,12 +76,6 @@ compare256_sse2.o: compare256_sse2.lo: $(CC) $(SFLAGS) $(SSE2FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_sse2.c -insert_string_sse42.o: - $(CC) $(CFLAGS) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse42.c - -insert_string_sse42.lo: - $(CC) $(SFLAGS) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse42.c - crc32_pclmulqdq.o: $(CC) $(CFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_pclmulqdq.c @@ -90,10 +83,10 @@ crc32_pclmulqdq.lo: $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_pclmulqdq.c crc32_vpclmulqdq.o: - $(CC) $(CFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c + $(CC) $(CFLAGS) $(PCLMULFLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c crc32_vpclmulqdq.lo: - $(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE42FLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c + $(CC) $(SFLAGS) $(PCLMULFLAG) $(VPCLMULFLAG) $(AVX512FLAG) $(NOLTOFLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_vpclmulqdq.c slide_hash_avx2.o: $(CC) $(CFLAGS) $(AVX2FLAG) $(NOLTOFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_avx2.c diff --git a/3rdparty/zlib-ng/arch/x86/adler32_avx2.c b/3rdparty/zlib-ng/arch/x86/adler32_avx2.c index e3ac6705ce..38e7f068e3 100644 --- a/3rdparty/zlib-ng/arch/x86/adler32_avx2.c +++ b/3rdparty/zlib-ng/arch/x86/adler32_avx2.c @@ -9,24 +9,15 @@ #ifdef X86_AVX2 -#include "../../zbuild.h" +#include "zbuild.h" #include -#include "../../adler32_fold.h" -#include "../../adler32_p.h" +#include "adler32_p.h" #include "adler32_avx2_p.h" #include "x86_intrins.h" -#ifdef X86_SSE42 extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, size_t len); -#define copy_sub32(a, b, c, d) adler32_fold_copy_sse42(a, b, c, d) -#define sub32(a, b, c) adler32_ssse3(a, b, c) -#else -#define copy_sub32(a, b, c, d) adler32_copy_len_16(adler0, c, b, d, adler1) -#define sub32(a, b, c) adler32_len_16(adler0, b, c, adler1) -#endif - static inline uint32_t adler32_fold_copy_impl(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len, const int COPY) { if (src == NULL) return 1L; if (len == 0) return adler; @@ -44,9 +35,9 @@ rem_peel: } } else if (len < 32) { if (COPY) { - return copy_sub32(adler, dst, src, len); + return adler32_fold_copy_sse42(adler, dst, src, len); } else { - return sub32(adler, src, len); + return adler32_ssse3(adler, src, len); } } diff --git a/3rdparty/zlib-ng/arch/x86/adler32_avx512.c b/3rdparty/zlib-ng/arch/x86/adler32_avx512.c index aa6cc17018..626c4807f8 100644 --- a/3rdparty/zlib-ng/arch/x86/adler32_avx512.c +++ b/3rdparty/zlib-ng/arch/x86/adler32_avx512.c @@ -8,10 +8,9 @@ #ifdef X86_AVX512 -#include "../../zbuild.h" -#include "../../adler32_p.h" -#include "../../adler32_fold.h" -#include "../../cpu_features.h" +#include "zbuild.h" +#include "adler32_p.h" +#include "arch_functions.h" #include #include "x86_intrins.h" #include "adler32_avx512_p.h" @@ -33,13 +32,7 @@ rem_peel: _mm512_mask_storeu_epi8(dst, storemask, copy_vec); } -#ifdef X86_AVX2 return adler32_avx2(adler, src, len); -#elif defined(X86_SSSE3) - return adler32_ssse3(adler, src, len); -#else - return adler32_len_16(adler0, src, len, adler1); -#endif } __m512i vbuf, vs1_0, vs3; diff --git a/3rdparty/zlib-ng/arch/x86/adler32_avx512_vnni.c b/3rdparty/zlib-ng/arch/x86/adler32_avx512_vnni.c index 771f7ebe04..4c5cfc1cad 100644 --- a/3rdparty/zlib-ng/arch/x86/adler32_avx512_vnni.c +++ b/3rdparty/zlib-ng/arch/x86/adler32_avx512_vnni.c @@ -9,11 +9,10 @@ #ifdef X86_AVX512VNNI -#include "../../zbuild.h" -#include "../../adler32_p.h" -#include "../../cpu_features.h" +#include "zbuild.h" +#include "adler32_p.h" +#include "arch_functions.h" #include -#include "../../adler32_fold.h" #include "x86_intrins.h" #include "adler32_avx512_p.h" #include "adler32_avx2_p.h" @@ -28,20 +27,10 @@ Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, size rem_peel: if (len < 32) -#if defined(X86_SSSE3) return adler32_ssse3(adler, src, len); -#else - return adler32_len_16(adler0, src, len, adler1); -#endif if (len < 64) -#ifdef X86_AVX2 return adler32_avx2(adler, src, len); -#elif defined(X86_SSE3) - return adler32_ssse3(adler, src, len); -#else - return adler32_len_16(adler0, src, len, adler1); -#endif const __m512i dot2v = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, @@ -135,11 +124,7 @@ rem_peel_copy: __m256i copy_vec = _mm256_maskz_loadu_epi8(storemask, src); _mm256_mask_storeu_epi8(dst, storemask, copy_vec); -#if defined(X86_SSSE3) return adler32_ssse3(adler, src, len); -#else - return adler32_len_16(adler0, src, len, adler1); -#endif } const __m256i dot2v = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, diff --git a/3rdparty/zlib-ng/arch/x86/adler32_sse42.c b/3rdparty/zlib-ng/arch/x86/adler32_sse42.c index 257a360982..df0739d165 100644 --- a/3rdparty/zlib-ng/arch/x86/adler32_sse42.c +++ b/3rdparty/zlib-ng/arch/x86/adler32_sse42.c @@ -6,9 +6,8 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" -#include "../../adler32_p.h" -#include "../../adler32_fold.h" +#include "zbuild.h" +#include "adler32_p.h" #include "adler32_ssse3_p.h" #include diff --git a/3rdparty/zlib-ng/arch/x86/adler32_ssse3.c b/3rdparty/zlib-ng/arch/x86/adler32_ssse3.c index ae819d632e..15e2f78ba3 100644 --- a/3rdparty/zlib-ng/arch/x86/adler32_ssse3.c +++ b/3rdparty/zlib-ng/arch/x86/adler32_ssse3.c @@ -6,8 +6,8 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" -#include "../../adler32_p.h" +#include "zbuild.h" +#include "adler32_p.h" #include "adler32_ssse3_p.h" #ifdef X86_SSSE3 diff --git a/3rdparty/zlib-ng/arch/x86/chunkset_ssse3.c b/3rdparty/zlib-ng/arch/x86/chunkset_ssse3.c index c06d1b37bd..722ecd3d51 100644 --- a/3rdparty/zlib-ng/arch/x86/chunkset_ssse3.c +++ b/3rdparty/zlib-ng/arch/x86/chunkset_ssse3.c @@ -4,10 +4,7 @@ #include "zbuild.h" -/* This requires SSE2 support. While it's implicit with SSSE3, we can minimize - * code size by sharing the chunkcopy functions, which will certainly compile - * to identical machine code */ -#if defined(X86_SSSE3) && defined(X86_SSE2) +#if defined(X86_SSSE3) #include #include "../generic/chunk_permute_table.h" @@ -19,8 +16,6 @@ typedef __m128i chunk_t; #define HAVE_CHUNKMEMSET_4 #define HAVE_CHUNKMEMSET_8 #define HAVE_CHUNK_MAG -#define HAVE_CHUNKCOPY -#define HAVE_CHUNKUNROLL static const lut_rem_pair perm_idx_lut[13] = { {0, 1}, /* 3 */ @@ -83,14 +78,11 @@ static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t return ret_vec; } -extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len); -extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len); - #define CHUNKSIZE chunksize_ssse3 #define CHUNKMEMSET chunkmemset_ssse3 #define CHUNKMEMSET_SAFE chunkmemset_safe_ssse3 -#define CHUNKCOPY chunkcopy_sse2 -#define CHUNKUNROLL chunkunroll_sse2 +#define CHUNKCOPY chunkcopy_ssse3 +#define CHUNKUNROLL chunkunroll_ssse3 #include "chunkset_tpl.h" diff --git a/3rdparty/zlib-ng/arch/x86/compare256_avx2.c b/3rdparty/zlib-ng/arch/x86/compare256_avx2.c index 1318a0e333..d2c835e4ee 100644 --- a/3rdparty/zlib-ng/arch/x86/compare256_avx2.c +++ b/3rdparty/zlib-ng/arch/x86/compare256_avx2.c @@ -3,8 +3,9 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" - +#include "zbuild.h" +#include "zutil_p.h" +#include "deflate.h" #include "fallback_builtins.h" #if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) diff --git a/3rdparty/zlib-ng/arch/x86/compare256_sse2.c b/3rdparty/zlib-ng/arch/x86/compare256_sse2.c index aad4bd240d..216bb3a705 100644 --- a/3rdparty/zlib-ng/arch/x86/compare256_sse2.c +++ b/3rdparty/zlib-ng/arch/x86/compare256_sse2.c @@ -3,8 +3,9 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" - +#include "zbuild.h" +#include "zutil_p.h" +#include "deflate.h" #include "fallback_builtins.h" #if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) diff --git a/3rdparty/zlib-ng/arch/x86/crc32_fold_pclmulqdq_tpl.h b/3rdparty/zlib-ng/arch/x86/crc32_fold_pclmulqdq_tpl.h index 3e79928317..1ffe201dda 100644 --- a/3rdparty/zlib-ng/arch/x86/crc32_fold_pclmulqdq_tpl.h +++ b/3rdparty/zlib-ng/arch/x86/crc32_fold_pclmulqdq_tpl.h @@ -26,27 +26,26 @@ Z_INTERNAL void CRC32_FOLD(crc32_fold *crc, const uint8_t *src, size_t len, uint __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3; __m128i xmm_crc0, xmm_crc1, xmm_crc2, xmm_crc3; __m128i xmm_crc_part = _mm_setzero_si128(); -#ifdef COPY char ALIGNED_(16) partial_buf[16] = { 0 }; -#else +#ifndef COPY __m128i xmm_initial = _mm_cvtsi32_si128(init_crc); int32_t first = init_crc != 0; - /* Technically the CRC functions don't even call this for input < 64, but a bare minimum of 31 - * bytes of input is needed for the aligning load that occurs. If there's an initial CRC, to - * carry it forward through the folded CRC there must be 16 - src % 16 + 16 bytes available, which - * by definition can be up to 15 bytes + one full vector load. */ - assert(len >= 31 || first == 0); + /* The CRC functions don't call this for input < 16, as a minimum of 16 bytes of input is needed + * for the aligning load that occurs. If there's an initial CRC, to carry it forward through + * the folded CRC there must be 16 - src % 16 + 16 bytes available, which by definition can be + * up to 15 bytes + one full vector load. */ + assert(len >= 16 || first == 0); #endif crc32_fold_load((__m128i *)crc->fold, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); if (len < 16) { -#ifdef COPY if (len == 0) return; memcpy(partial_buf, src, len); xmm_crc_part = _mm_load_si128((const __m128i *)partial_buf); +#ifdef COPY memcpy(dst, partial_buf, len); #endif goto partial; @@ -63,9 +62,23 @@ Z_INTERNAL void CRC32_FOLD(crc32_fold *crc, const uint8_t *src, size_t len, uint if (algn_diff < 4 && init_crc != 0) { xmm_t0 = xmm_crc_part; - xmm_crc_part = _mm_loadu_si128((__m128i*)src + 1); - fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); - xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0); + if (len >= 32) { + xmm_crc_part = _mm_loadu_si128((__m128i*)src + 1); + fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0); + } else { + memcpy(partial_buf, src + 16, len - 16); + xmm_crc_part = _mm_load_si128((__m128i*)partial_buf); + fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3); + xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0); + src += 16; + len -= 16; +#ifdef COPY + dst -= algn_diff; +#endif + goto partial; + } + src += 16; len -= 16; } diff --git a/3rdparty/zlib-ng/arch/x86/crc32_pclmulqdq_tpl.h b/3rdparty/zlib-ng/arch/x86/crc32_pclmulqdq_tpl.h index 05d3b15257..3a4f6af5af 100644 --- a/3rdparty/zlib-ng/arch/x86/crc32_pclmulqdq_tpl.h +++ b/3rdparty/zlib-ng/arch/x86/crc32_pclmulqdq_tpl.h @@ -17,7 +17,7 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" +#include "zbuild.h" #include #include @@ -26,8 +26,9 @@ # include #endif -#include "../../crc32_fold.h" -#include "../../crc32_braid_p.h" +#include "crc32.h" +#include "crc32_braid_p.h" +#include "crc32_braid_tbl.h" #include "x86_intrins.h" #include @@ -350,11 +351,22 @@ Z_INTERNAL uint32_t CRC32_FOLD_FINAL(crc32_fold *crc) { return crc->value; } +static inline uint32_t crc32_small(uint32_t crc, const uint8_t *buf, size_t len) { + uint32_t c = (~crc) & 0xffffffff; + + while (len) { + len--; + DO1; + } + + return c ^ 0xffffffff; +} + Z_INTERNAL uint32_t CRC32(uint32_t crc32, const uint8_t *buf, size_t len) { - /* For lens < 64, crc32_braid method is faster. The CRC32 instruction for - * these short lengths might also prove to be effective */ - if (len < 64) - return PREFIX(crc32_braid)(crc32, buf, len); + /* For lens smaller than ~12, crc32_small method is faster. + * But there are also minimum requirements for the pclmul functions due to alignment */ + if (len < 16) + return crc32_small(crc32, buf, len); crc32_fold ALIGNED_(16) crc_state; CRC32_FOLD_RESET(&crc_state); diff --git a/3rdparty/zlib-ng/arch/x86/crc32_vpclmulqdq.c b/3rdparty/zlib-ng/arch/x86/crc32_vpclmulqdq.c index ec641b4326..cad35b14ee 100644 --- a/3rdparty/zlib-ng/arch/x86/crc32_vpclmulqdq.c +++ b/3rdparty/zlib-ng/arch/x86/crc32_vpclmulqdq.c @@ -3,7 +3,7 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC) +#ifdef X86_VPCLMULQDQ_CRC #define X86_VPCLMULQDQ #define CRC32_FOLD_COPY crc32_fold_vpclmulqdq_copy diff --git a/3rdparty/zlib-ng/arch/x86/insert_string_sse42.c b/3rdparty/zlib-ng/arch/x86/insert_string_sse42.c deleted file mode 100644 index ae092a7e47..0000000000 --- a/3rdparty/zlib-ng/arch/x86/insert_string_sse42.c +++ /dev/null @@ -1,24 +0,0 @@ -/* insert_string_sse42.c -- insert_string integer hash variant using SSE4.2's CRC instructions - * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - * - */ - -#ifdef X86_SSE42 -#include "../../zbuild.h" -#include -#include "../../deflate.h" - -#define HASH_CALC(s, h, val)\ - h = _mm_crc32_u32(h, val) - -#define HASH_CALC_VAR h -#define HASH_CALC_VAR_INIT uint32_t h = 0 - -#define UPDATE_HASH update_hash_sse42 -#define INSERT_STRING insert_string_sse42 -#define QUICK_INSERT_STRING quick_insert_string_sse42 - -#include "../../insert_string_tpl.h" -#endif diff --git a/3rdparty/zlib-ng/arch/x86/slide_hash_avx2.c b/3rdparty/zlib-ng/arch/x86/slide_hash_avx2.c index 94fe10c7bf..f49ad3331b 100644 --- a/3rdparty/zlib-ng/arch/x86/slide_hash_avx2.c +++ b/3rdparty/zlib-ng/arch/x86/slide_hash_avx2.c @@ -9,8 +9,8 @@ * * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" -#include "../../deflate.h" +#include "zbuild.h" +#include "deflate.h" #include diff --git a/3rdparty/zlib-ng/arch/x86/slide_hash_sse2.c b/3rdparty/zlib-ng/arch/x86/slide_hash_sse2.c index 5daac4a739..cfdf7bee49 100644 --- a/3rdparty/zlib-ng/arch/x86/slide_hash_sse2.c +++ b/3rdparty/zlib-ng/arch/x86/slide_hash_sse2.c @@ -8,8 +8,8 @@ * * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" -#include "../../deflate.h" +#include "zbuild.h" +#include "deflate.h" #include #include diff --git a/3rdparty/zlib-ng/arch/x86/x86_features.c b/3rdparty/zlib-ng/arch/x86/x86_features.c index 8d11564c24..58cb4df341 100644 --- a/3rdparty/zlib-ng/arch/x86/x86_features.c +++ b/3rdparty/zlib-ng/arch/x86/x86_features.c @@ -7,7 +7,7 @@ * For conditions of distribution and use, see copyright notice in zlib.h */ -#include "../../zbuild.h" +#include "zbuild.h" #include "x86_features.h" #ifdef _MSC_VER @@ -15,6 +15,13 @@ #else // Newer versions of GCC and clang come with cpuid.h # include +# ifdef X86_HAVE_XSAVE_INTRIN +# if __GNUC__ == 8 +# include +# else +# include +# endif +# endif #endif #include @@ -29,6 +36,7 @@ static inline void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, *ecx = registers[2]; *edx = registers[3]; #else + *eax = *ebx = *ecx = *edx = 0; __cpuid(info, *eax, *ebx, *ecx, *edx); #endif } @@ -43,12 +51,13 @@ static inline void cpuidex(int info, int subinfo, unsigned* eax, unsigned* ebx, *ecx = registers[2]; *edx = registers[3]; #else + *eax = *ebx = *ecx = *edx = 0; __cpuid_count(info, subinfo, *eax, *ebx, *ecx, *edx); #endif } static inline uint64_t xgetbv(unsigned int xcr) { -#ifdef _MSC_VER +#if defined(_MSC_VER) || defined(X86_HAVE_XSAVE_INTRIN) return _xgetbv(xcr); #else uint32_t eax, edx; @@ -90,7 +99,16 @@ void Z_INTERNAL x86_check_features(struct x86_cpu_features *features) { // check AVX512 bits if the OS supports saving ZMM registers if (features->has_os_save_zmm) { - features->has_avx512 = ebx & 0x00010000; + features->has_avx512f = ebx & 0x00010000; + if (features->has_avx512f) { + // According to the Intel Software Developer's Manual, AVX512F must be enabled too in order to enable + // AVX512(DQ,BW,VL). + features->has_avx512dq = ebx & 0x00020000; + features->has_avx512bw = ebx & 0x40000000; + features->has_avx512vl = ebx & 0x80000000; + } + features->has_avx512_common = features->has_avx512f && features->has_avx512dq && features->has_avx512bw \ + && features->has_avx512vl; features->has_avx512vnni = ecx & 0x800; } } diff --git a/3rdparty/zlib-ng/arch/x86/x86_features.h b/3rdparty/zlib-ng/arch/x86/x86_features.h index 4a36bde835..6daa5e3828 100644 --- a/3rdparty/zlib-ng/arch/x86/x86_features.h +++ b/3rdparty/zlib-ng/arch/x86/x86_features.h @@ -1,14 +1,18 @@ /* x86_features.h -- check for CPU features -* Copyright (C) 2013 Intel Corporation Jim Kukunas -* For conditions of distribution and use, see copyright notice in zlib.h -*/ + * Copyright (C) 2013 Intel Corporation Jim Kukunas + * For conditions of distribution and use, see copyright notice in zlib.h + */ #ifndef X86_FEATURES_H_ #define X86_FEATURES_H_ struct x86_cpu_features { int has_avx2; - int has_avx512; + int has_avx512f; + int has_avx512dq; + int has_avx512bw; + int has_avx512vl; + int has_avx512_common; // Enabled when AVX512(F,DQ,BW,VL) are all enabled. int has_avx512vnni; int has_sse2; int has_ssse3; @@ -21,4 +25,4 @@ struct x86_cpu_features { void Z_INTERNAL x86_check_features(struct x86_cpu_features *features); -#endif /* CPU_H_ */ +#endif /* X86_FEATURES_H_ */ diff --git a/3rdparty/zlib-ng/arch/x86/x86_functions.h b/3rdparty/zlib-ng/arch/x86/x86_functions.h new file mode 100644 index 0000000000..5aa9b31747 --- /dev/null +++ b/3rdparty/zlib-ng/arch/x86/x86_functions.h @@ -0,0 +1,172 @@ +/* x86_functions.h -- x86 implementations for arch-specific functions. + * Copyright (C) 2013 Intel Corporation Jim Kukunas + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef X86_FUNCTIONS_H_ +#define X86_FUNCTIONS_H_ + +#ifdef X86_SSE2 +uint32_t chunksize_sse2(void); +uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left); + +# ifdef HAVE_BUILTIN_CTZ + uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1); + uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match); + uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match); + void slide_hash_sse2(deflate_state *s); +# endif + void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start); +#endif + +#ifdef X86_SSSE3 +uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len); +uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left); +void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start); +#endif + +#ifdef X86_SSE42 +uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +#endif + +#ifdef X86_AVX2 +uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +uint32_t chunksize_avx2(void); +uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left); + +# ifdef HAVE_BUILTIN_CTZ + uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1); + uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match); + uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match); + void slide_hash_avx2(deflate_state *s); +# endif + void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start); +#endif +#ifdef X86_AVX512 +uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +#endif +#ifdef X86_AVX512VNNI +uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len); +uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); +#endif + +#ifdef X86_PCLMULQDQ_CRC +uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc); +void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); +void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); +uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc); +uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); +#endif +#ifdef X86_VPCLMULQDQ_CRC +uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc); +void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); +void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); +uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc); +uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); +#endif + + +#ifdef DISABLE_RUNTIME_CPU_DETECTION +// X86 - SSE2 +# if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64) || defined(X86_NOCHECK_SSE2) +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_sse2 +# undef native_chunksize +# define native_chunksize chunksize_sse2 +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_sse2 +# undef native_slide_hash +# define native_slide_hash slide_hash_sse2 +# ifdef HAVE_BUILTIN_CTZ +# undef native_compare256 +# define native_compare256 compare256_sse2 +# undef native_longest_match +# define native_longest_match longest_match_sse2 +# undef native_longest_match_slow +# define native_longest_match_slow longest_match_slow_sse2 +# endif +#endif +// X86 - SSSE3 +# if defined(X86_SSSE3) && defined(__SSSE3__) +# undef native_adler32 +# define native_adler32 adler32_ssse3 +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_ssse3 +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_ssse3 +# endif +// X86 - SSE4.2 +# if defined(X86_SSE42) && defined(__SSE4_2__) +# undef native_adler32_fold_copy +# define native_adler32_fold_copy adler32_fold_copy_sse42 +# endif + +// X86 - PCLMUL +#if defined(X86_PCLMULQDQ_CRC) && defined(__PCLMUL__) +# undef native_crc32 +# define native_crc32 crc32_pclmulqdq +# undef native_crc32_fold +# define native_crc32_fold crc32_fold_pclmulqdq +# undef native_crc32_fold_copy +# define native_crc32_fold_copy crc32_fold_pclmulqdq_copy +# undef native_crc32_fold_final +# define native_crc32_fold_final crc32_fold_pclmulqdq_final +# undef native_crc32_fold_reset +# define native_crc32_fold_reset crc32_fold_pclmulqdq_reset +#endif +// X86 - AVX +# if defined(X86_AVX2) && defined(__AVX2__) +# undef native_adler32 +# define native_adler32 adler32_avx2 +# undef native_adler32_fold_copy +# define native_adler32_fold_copy adler32_fold_copy_avx2 +# undef native_chunkmemset_safe +# define native_chunkmemset_safe chunkmemset_safe_avx2 +# undef native_chunksize +# define native_chunksize chunksize_avx2 +# undef native_inflate_fast +# define native_inflate_fast inflate_fast_avx2 +# undef native_slide_hash +# define native_slide_hash slide_hash_avx2 +# ifdef HAVE_BUILTIN_CTZ +# undef native_compare256 +# define native_compare256 compare256_avx2 +# undef native_longest_match +# define native_longest_match longest_match_avx2 +# undef native_longest_match_slow +# define native_longest_match_slow longest_match_slow_avx2 +# endif +# endif + +// X86 - AVX512 (F,DQ,BW,Vl) +# if defined(X86_AVX512) && defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__) +# undef native_adler32 +# define native_adler32 adler32_avx512 +# undef native_adler32_fold_copy +# define native_adler32_fold_copy adler32_fold_copy_avx512 +// X86 - AVX512 (VNNI) +# if defined(X86_AVX512VNNI) && defined(__AVX512VNNI__) +# undef native_adler32 +# define native_adler32 adler32_avx512_vnni +# undef native_adler32_fold_copy +# define native_adler32_fold_copy adler32_fold_copy_avx512_vnni +# endif +// X86 - VPCLMULQDQ +# if defined(__PCLMUL__) && defined(__AVX512F__) && defined(__VPCLMULQDQ__) +# undef native_crc32 +# define native_crc32 crc32_vpclmulqdq +# undef native_crc32_fold +# define native_crc32_fold crc32_fold_vpclmulqdq +# undef native_crc32_fold_copy +# define native_crc32_fold_copy crc32_fold_vpclmulqdq_copy +# undef native_crc32_fold_final +# define native_crc32_fold_final crc32_fold_vpclmulqdq_final +# undef native_crc32_fold_reset +# define native_crc32_fold_reset crc32_fold_vpclmulqdq_reset +# endif +# endif +#endif + +#endif /* X86_FUNCTIONS_H_ */ diff --git a/3rdparty/zlib-ng/arch/x86/x86_intrins.h b/3rdparty/zlib-ng/arch/x86/x86_intrins.h index 52e1085d66..0e596d18a1 100644 --- a/3rdparty/zlib-ng/arch/x86/x86_intrins.h +++ b/3rdparty/zlib-ng/arch/x86/x86_intrins.h @@ -7,7 +7,7 @@ #ifdef __AVX2__ #include -#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 10) \ +#if (!defined(__clang__) && !defined(__NVCOMPILER) && defined(__GNUC__) && __GNUC__ < 10) \ || (defined(__apple_build_version__) && __apple_build_version__ < 9020039) static inline __m256i _mm256_zextsi128_si256(__m128i a) { __m128i r; @@ -29,7 +29,7 @@ static inline __m512i _mm512_zextsi128_si512(__m128i a) { /* GCC <9 is missing some AVX512 intrinsics. */ #ifdef __AVX512F__ -#if (!defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9) +#if (!defined(__clang__) && !defined(__NVCOMPILER) && defined(__GNUC__) && __GNUC__ < 9) #include #define PACK(c0, c1, c2, c3) (((int)(unsigned char)(c0) << 24) | ((int)(unsigned char)(c1) << 16) | \ diff --git a/3rdparty/zlib-ng/arch_functions.h b/3rdparty/zlib-ng/arch_functions.h new file mode 100644 index 0000000000..9a7f8d9379 --- /dev/null +++ b/3rdparty/zlib-ng/arch_functions.h @@ -0,0 +1,29 @@ +/* arch_functions.h -- Arch-specific function prototypes. + * Copyright (C) 2017 Hans Kristian Rosbach + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef CPU_FUNCTIONS_H_ +#define CPU_FUNCTIONS_H_ + +#include "zbuild.h" +#include "zutil.h" +#include "crc32.h" +#include "deflate.h" +#include "fallback_builtins.h" + +#include "arch/generic/generic_functions.h" + +#if defined(X86_FEATURES) +# include "arch/x86/x86_functions.h" +#elif defined(ARM_FEATURES) +# include "arch/arm/arm_functions.h" +#elif defined(PPC_FEATURES) || defined(POWER_FEATURES) +# include "arch/power/power_functions.h" +#elif defined(S390_FEATURES) +# include "arch/s390/s390_functions.h" +#elif defined(RISCV_FEATURES) +# include "arch/riscv/riscv_functions.h" +#endif + +#endif diff --git a/3rdparty/zlib-ng/chunkset_tpl.h b/3rdparty/zlib-ng/chunkset_tpl.h index f909a12557..f5cc5c0450 100644 --- a/3rdparty/zlib-ng/chunkset_tpl.h +++ b/3rdparty/zlib-ng/chunkset_tpl.h @@ -5,7 +5,7 @@ #include "zbuild.h" #include -#if CHUNK_SIZE == 32 && defined(X86_SSSE3) && defined(X86_SSE2) +#if CHUNK_SIZE == 32 && defined(X86_SSSE3) extern uint8_t* chunkmemset_ssse3(uint8_t *out, unsigned dist, unsigned len); #endif @@ -25,7 +25,7 @@ Z_INTERNAL uint32_t CHUNKSIZE(void) { without iteration, which will hopefully make the branch prediction more reliable. */ #ifndef HAVE_CHUNKCOPY -Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { +static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { Assert(len > 0, "chunkcopy should never have a length 0"); chunk_t chunk; int32_t align = ((len - 1) % sizeof(chunk_t)) + 1; @@ -54,7 +54,7 @@ Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { least 258 bytes of output space available (258 being the maximum length output from a single token; see inflate_fast()'s assumptions below). */ #ifndef HAVE_CHUNKUNROLL -Z_INTERNAL uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) { +static inline uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) { unsigned char const *from = out - *dist; chunk_t chunk; while (*dist < *len && *dist < sizeof(chunk_t)) { @@ -98,7 +98,7 @@ Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) { Assert(len >= sizeof(uint64_t), "chunkmemset should be called on larger chunks"); */ Assert(dist > 0, "chunkmemset cannot have a distance 0"); /* Only AVX2 */ -#if CHUNK_SIZE == 32 && defined(X86_SSSE3) && defined(X86_SSE2) +#if CHUNK_SIZE == 32 && defined(X86_SSSE3) if (len <= 16) { return chunkmemset_ssse3(out, dist, len); } diff --git a/3rdparty/zlib-ng/cmake/detect-arch.c b/3rdparty/zlib-ng/cmake/detect-arch.c new file mode 100644 index 0000000000..92590182c2 --- /dev/null +++ b/3rdparty/zlib-ng/cmake/detect-arch.c @@ -0,0 +1,115 @@ +// archdetect.c -- Detect compiler architecture and raise preprocessor error +// containing a simple arch identifier. +// Copyright (C) 2019 Hans Kristian Rosbach +// Licensed under the Zlib license, see LICENSE.md for details + +// x86_64 +#if defined(__x86_64__) || defined(_M_X64) + #error archfound x86_64 + +// x86 +#elif defined(__i386) || defined(_M_IX86) + #error archfound i686 + +// ARM +#elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC) + #error archfound aarch64 +#elif defined(__arm__) || defined(__arm) || defined(_M_ARM) || defined(__TARGET_ARCH_ARM) + #if defined(__ARM64_ARCH_8__) || defined(__ARMv8__) || defined(__ARMv8_A__) + #error archfound armv8 + #elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) + #error archfound armv7 + #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6M__) + #error archfound armv6 + #elif defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) + #error archfound armv5 + #elif defined(__ARM_ARCH_4T__) || defined(__TARGET_ARCH_5E__) + #error archfound armv4 + #elif defined(__ARM_ARCH_3__) || defined(__TARGET_ARCH_3M__) + #error archfound armv3 + #elif defined(__ARM_ARCH_2__) + #error archfound armv2 + #endif + +// PowerPC +#elif defined(__powerpc__) || defined(_ppc__) || defined(__PPC__) + #if defined(__64BIT__) || defined(__powerpc64__) || defined(__ppc64__) + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #error archfound powerpc64le + #else + #error archfound powerpc64 + #endif + #else + #error archfound powerpc + #endif + +// --------------- Less common architectures alphabetically below --------------- + +// ALPHA +#elif defined(__alpha__) || defined(__alpha) + #error archfound alpha + +// Blackfin +#elif defined(__BFIN__) + #error archfound blackfin + +// Itanium +#elif defined(__ia64) || defined(_M_IA64) + #error archfound ia64 + +// MIPS +#elif defined(__mips__) || defined(__mips) + #error archfound mips + +// Motorola 68000-series +#elif defined(__m68k__) + #error archfound m68k + +// SuperH +#elif defined(__sh__) + #error archfound sh + +// SPARC +#elif defined(__sparc__) || defined(__sparc) + #if defined(__sparcv9) || defined(__sparc_v9__) + #error archfound sparc9 + #elif defined(__sparcv8) || defined(__sparc_v8__) + #error archfound sparc8 + #endif + +// SystemZ +#elif defined(__370__) + #error archfound s370 +#elif defined(__s390__) + #error archfound s390 +#elif defined(__s390x) || defined(__zarch__) + #error archfound s390x + +// PARISC +#elif defined(__hppa__) + #error archfound parisc + +// RS-6000 +#elif defined(__THW_RS6000) + #error archfound rs6000 + +// RISC-V +#elif defined(__riscv) + #if __riscv_xlen == 64 + #error archfound riscv64 + #elif __riscv_xlen == 32 + #error archfound riscv32 + #endif + +// LOONGARCH +#elif defined(__loongarch_lp64) + #error archfound loongarch64 + +// Emscripten (WebAssembly) +#elif defined(__EMSCRIPTEN__) + #error archfound wasm32 + +// return 'unrecognized' if we do not know what architecture this is +#else + #error archfound unrecognized +#endif diff --git a/3rdparty/zlib-ng/cmake/detect-arch.cmake b/3rdparty/zlib-ng/cmake/detect-arch.cmake new file mode 100644 index 0000000000..dfdc6013ce --- /dev/null +++ b/3rdparty/zlib-ng/cmake/detect-arch.cmake @@ -0,0 +1,104 @@ +# detect-arch.cmake -- Detect compiler architecture and set ARCH and BASEARCH +# Copyright (C) 2019 Hans Kristian Rosbach +# Licensed under the Zlib license, see LICENSE.md for details +set(ARCHDETECT_FOUND TRUE) + +if(CMAKE_OSX_ARCHITECTURES) + # If multiple architectures are requested (universal build), pick only the first + list(GET CMAKE_OSX_ARCHITECTURES 0 ARCH) +elseif(MSVC) + if("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "X86") + set(ARCH "i686") + elseif("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "x64") + set(ARCH "x86_64") + elseif("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARMV7") + set(ARCH "arm") + elseif ("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64EC") + set(ARCH "aarch64") + endif() +elseif(EMSCRIPTEN) + set(ARCH "wasm32") +elseif(CMAKE_CROSSCOMPILING) + set(ARCH ${CMAKE_C_COMPILER_TARGET}) +else() + # Let preprocessor parse archdetect.c and raise an error containing the arch identifier + enable_language(C) + try_run( + run_result_unused + compile_result_unused + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_LIST_DIR}/detect-arch.c + COMPILE_OUTPUT_VARIABLE RAWOUTPUT + CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} + ) + + # Find basearch tag, and extract the arch word into BASEARCH variable + string(REGEX REPLACE ".*archfound ([a-zA-Z0-9_]+).*" "\\1" ARCH "${RAWOUTPUT}") + if(NOT ARCH) + set(ARCH unknown) + endif() +endif() + +# Make sure we have ARCH set +if(NOT ARCH OR ARCH STREQUAL "unknown") + set(ARCH ${CMAKE_SYSTEM_PROCESSOR}) + message(STATUS "Arch not recognized, falling back to cmake arch: '${ARCH}'") +else() + message(STATUS "Arch detected: '${ARCH}'") +endif() + +# Base arch detection +if("${ARCH}" MATCHES "(x86_64|AMD64|i[3-6]86)") + set(BASEARCH "x86") + set(BASEARCH_X86_FOUND TRUE) +elseif("${ARCH}" MATCHES "(arm(v[0-9])?|aarch64|cortex)") + set(BASEARCH "arm") + set(BASEARCH_ARM_FOUND TRUE) +elseif("${ARCH}" MATCHES "ppc(64(le)?)?|powerpc(64(le)?)?") + set(BASEARCH "ppc") + set(BASEARCH_PPC_FOUND TRUE) +elseif("${ARCH}" MATCHES "alpha") + set(BASEARCH "alpha") + set(BASEARCH_ALPHA_FOUND TRUE) +elseif("${ARCH}" MATCHES "blackfin") + set(BASEARCH "blackfin") + set(BASEARCH_BLACKFIN_FOUND TRUE) +elseif("${ARCH}" MATCHES "ia64") + set(BASEARCH "ia64") + set(BASEARCH_IA64_FOUND TRUE) +elseif("${ARCH}" MATCHES "mips") + set(BASEARCH "mips") + set(BASEARCH_MIPS_FOUND TRUE) +elseif("${ARCH}" MATCHES "m68k") + set(BASEARCH "m68k") + set(BASEARCH_M68K_FOUND TRUE) +elseif("${ARCH}" MATCHES "sh") + set(BASEARCH "sh") + set(BASEARCH_SH_FOUND TRUE) +elseif("${ARCH}" MATCHES "sparc[89]?") + set(BASEARCH "sparc") + set(BASEARCH_SPARC_FOUND TRUE) +elseif("${ARCH}" MATCHES "s3[679]0x?") + set(BASEARCH "s360") + set(BASEARCH_S360_FOUND TRUE) +elseif("${ARCH}" MATCHES "parisc") + set(BASEARCH "parisc") + set(BASEARCH_PARISC_FOUND TRUE) +elseif("${ARCH}" MATCHES "rs6000") + set(BASEARCH "rs6000") + set(BASEARCH_RS6000_FOUND TRUE) +elseif("${ARCH}" MATCHES "riscv(32|64)") + set(BASEARCH "riscv") + set(BASEARCH_RISCV_FOUND TRUE) +elseif("${ARCH}" MATCHES "loongarch64") + set(BASEARCH "loongarch") + set(BASEARCH_LOONGARCH_FOUND TRUE) +elseif("${ARCH}" MATCHES "wasm32") + set(BASEARCH "wasm32") + set(BASEARCH_WASM32_FOUND TRUE) +else() + set(BASEARCH "x86") + set(BASEARCH_X86_FOUND TRUE) + message(STATUS "Basearch '${ARCH}' not recognized, defaulting to 'x86'.") +endif() +message(STATUS "Basearch of '${ARCH}' has been detected as: '${BASEARCH}'") diff --git a/3rdparty/zlib-ng/cmake/detect-coverage.cmake b/3rdparty/zlib-ng/cmake/detect-coverage.cmake new file mode 100644 index 0000000000..8e67a085cd --- /dev/null +++ b/3rdparty/zlib-ng/cmake/detect-coverage.cmake @@ -0,0 +1,46 @@ +# detect-coverage.cmake -- Detect supported compiler coverage flags +# Licensed under the Zlib license, see LICENSE.md for details + +macro(add_code_coverage) + # Check for -coverage flag support for Clang/GCC + if(CMAKE_VERSION VERSION_LESS 3.14) + set(CMAKE_REQUIRED_LIBRARIES -lgcov) + else() + set(CMAKE_REQUIRED_LINK_OPTIONS -coverage) + endif() + check_c_compiler_flag(-coverage HAVE_COVERAGE) + set(CMAKE_REQUIRED_LIBRARIES) + set(CMAKE_REQUIRED_LINK_OPTIONS) + + if(HAVE_COVERAGE) + add_compile_options(-coverage) + add_link_options(-coverage) + message(STATUS "Code coverage enabled using: -coverage") + else() + # Some versions of GCC don't support -coverage shorthand + if(CMAKE_VERSION VERSION_LESS 3.14) + set(CMAKE_REQUIRED_LIBRARIES -lgcov) + else() + set(CMAKE_REQUIRED_LINK_OPTIONS -lgcov -fprofile-arcs) + endif() + check_c_compiler_flag("-ftest-coverage -fprofile-arcs -fprofile-values" HAVE_TEST_COVERAGE) + set(CMAKE_REQUIRED_LIBRARIES) + set(CMAKE_REQUIRED_LINK_OPTIONS) + + if(HAVE_TEST_COVERAGE) + add_compile_options(-ftest-coverage -fprofile-arcs -fprofile-values) + add_link_options(-lgcov -fprofile-arcs) + message(STATUS "Code coverage enabled using: -ftest-coverage") + else() + message(WARNING "Compiler does not support code coverage") + set(WITH_CODE_COVERAGE OFF) + endif() + endif() + + # Set optimization level to zero for code coverage builds + if (WITH_CODE_COVERAGE) + # Use CMake compiler flag variables due to add_compile_options failure on Windows GCC + set(CMAKE_C_FLAGS "-O0 ${CMAKE_C_FLAGS}") + set(CMAKE_CXX_FLAGS "-O0 ${CMAKE_CXX_FLAGS}") + endif() +endmacro() diff --git a/3rdparty/zlib-ng/cmake/detect-install-dirs.cmake b/3rdparty/zlib-ng/cmake/detect-install-dirs.cmake new file mode 100644 index 0000000000..a7c774f474 --- /dev/null +++ b/3rdparty/zlib-ng/cmake/detect-install-dirs.cmake @@ -0,0 +1,43 @@ +# detect-install-dirs.cmake -- Detect install directory parameters +# Copyright (C) 2021 Hans Kristian Rosbach +# Licensed under the Zlib license, see LICENSE.md for details + +# Determine installation directory for executables +if (DEFINED BIN_INSTALL_DIR) + set(BIN_INSTALL_DIR "${BIN_INSTALL_DIR}" CACHE PATH "Installation directory for executables (Deprecated)" FORCE) + set(CMAKE_INSTALL_BINDIR "${BIN_INSTALL_DIR}") +elseif (DEFINED INSTALL_BIN_DIR) + set(CMAKE_INSTALL_BINDIR "${INSTALL_BIN_DIR}") +endif() + +# Determine installation directory for libraries +if (DEFINED LIB_INSTALL_DIR) + set(LIB_INSTALL_DIR "${LIB_INSTALL_DIR}" CACHE PATH "Installation directory for libraries (Deprecated)" FORCE) + set(CMAKE_INSTALL_LIBDIR "${LIB_INSTALL_DIR}") +elseif (DEFINED INSTALL_LIB_DIR) + set(CMAKE_INSTALL_LIBDIR "${INSTALL_LIB_DIR}") +endif() + +# Determine installation directory for include files +if (DEFINED INC_INSTALL_DIR) + set(INC_INSTALL_DIR "${INC_INSTALL_DIR}" CACHE PATH "Installation directory for headers (Deprecated)" FORCE) + set(CMAKE_INSTALL_INCLUDEDIR "${INC_INSTALL_DIR}") +elseif (DEFINED INSTALL_INC_DIR) + set(CMAKE_INSTALL_INCLUDEDIR "${INSTALL_INC_DIR}") +endif() + +# Define GNU standard installation directories +include(GNUInstallDirs) + +# Determine installation directory for pkgconfig files +if (DEFINED PKGCONFIG_INSTALL_DIR) + set(PKGCONFIG_INSTALL_DIR "${PKGCONFIG_INSTALL_DIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE) +elseif (DEFINED INSTALL_PKGCONFIG_DIR) + set(PKGCONFIG_INSTALL_DIR "${INSTALL_PKGCONFIG_DIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE) +elseif (DEFINED CMAKE_INSTALL_PKGCONFIGDIR) + set(PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_PKGCONFIGDIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE) +elseif (DEFINED CMAKE_INSTALL_FULL_PKGCONFIGDIR) + set(PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_FULL_PKGCONFIGDIR}" CACHE PATH "Installation directory for pkgconfig (.pc) files" FORCE) +else() + set(PKGCONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/pkgconfig" CACHE PATH "Installation directory for pkgconfig (.pc) files") +endif() diff --git a/3rdparty/zlib-ng/cmake/detect-intrinsics.cmake b/3rdparty/zlib-ng/cmake/detect-intrinsics.cmake index 74ac3910b8..78e46e14bb 100644 --- a/3rdparty/zlib-ng/cmake/detect-intrinsics.cmake +++ b/3rdparty/zlib-ng/cmake/detect-intrinsics.cmake @@ -2,40 +2,39 @@ # Licensed under the Zlib license, see LICENSE.md for details macro(check_acle_compiler_flag) - if(MSVC) - # Both ARM and ARM64-targeting msvc support intrinsics, but - # ARM msvc is missing some intrinsics introduced with ARMv8, e.g. crc32 - if(MSVC_C_ARCHITECTURE_ID STREQUAL "ARM64") - set(HAVE_ACLE_FLAG TRUE) - endif() - else() + if(NOT NATIVEFLAG) if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + check_c_compiler_flag("-march=armv8-a+crc" HAVE_MARCH_ARMV8_CRC) + if(HAVE_MARCH_ARMV8_CRC) set(ACLEFLAG "-march=armv8-a+crc" CACHE INTERNAL "Compiler option to enable ACLE support") + else() + check_c_compiler_flag("-march=armv8-a+crc+simd" HAVE_MARCH_ARMV8_CRC_SIMD) + if(HAVE_MARCH_ARMV8_CRC_SIMD) + set(ACLEFLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ACLE support") + endif() endif() endif() - # Check whether compiler supports ACLE flag - set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") - check_c_source_compiles( - "int main() { return 0; }" - HAVE_ACLE_FLAG FAIL_REGEX "not supported") - if(NOT NATIVEFLAG AND NOT HAVE_ACLE_FLAG) - set(ACLEFLAG "-march=armv8-a+crc+simd" CACHE INTERNAL "Compiler option to enable ACLE support" FORCE) - # Check whether compiler supports ACLE flag - set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG}") - check_c_source_compiles( - "int main() { return 0; }" - HAVE_ACLE_FLAG2 FAIL_REGEX "not supported") - set(HAVE_ACLE_FLAG ${HAVE_ACLE_FLAG2} CACHE INTERNAL "Have compiler option to enable ACLE intrinsics" FORCE) - unset(HAVE_ACLE_FLAG2 CACHE) # Don't cache this internal variable - endif() - set(CMAKE_REQUIRED_FLAGS) endif() + # Check whether compiler supports ARMv8 CRC intrinsics + set(CMAKE_REQUIRED_FLAGS "${ACLEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") + check_c_source_compiles( + "#if defined(_MSC_VER) + #include + #else + #include + #endif + unsigned int f(unsigned int a, unsigned int b) { + return __crc32w(a, b); + } + int main(void) { return 0; }" + HAVE_ACLE_FLAG + ) + set(CMAKE_REQUIRED_FLAGS) endmacro() macro(check_armv6_compiler_flag) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") check_c_compiler_flag("-march=armv6" HAVE_MARCH_ARMV6) if(HAVE_MARCH_ARMV6) set(ARMV6FLAG "-march=armv6" CACHE INTERNAL "Compiler option to enable ARMv6 support") @@ -67,21 +66,21 @@ macro(check_armv6_compiler_flag) return __uqsub16(a, b); #endif } - int main(void) { return 0; }" + int main(void) { return f(1,2); }" HAVE_ARMV6_INTRIN ) set(CMAKE_REQUIRED_FLAGS) endmacro() macro(check_avx512_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "Intel") - if(CMAKE_HOST_UNIX OR APPLE) - set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl") - else() - set(AVX512FLAG "/arch:AVX512") - endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "Intel") + if(CMAKE_HOST_UNIX OR APPLE) + set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl") + else() + set(AVX512FLAG "/arch:AVX512") + endif() + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") # For CPUs that can benefit from AVX512, it seems GCC generates suboptimal # instruction scheduling unless you specify a reasonable -mtune= target set(AVX512FLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl") @@ -94,9 +93,9 @@ macro(check_avx512_intrinsics) endif() unset(HAVE_CASCADE_LAKE) endif() + elseif(MSVC) + set(AVX512FLAG "/arch:AVX512") endif() - elseif(MSVC) - set(AVX512FLAG "/arch:AVX512") endif() # Check whether compiler supports AVX512 intrinsics set(CMAKE_REQUIRED_FLAGS "${AVX512FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") @@ -109,26 +108,17 @@ macro(check_avx512_intrinsics) int main(void) { return 0; }" HAVE_AVX512_INTRIN ) - - # Evidently both GCC and clang were late to implementing these - check_c_source_compiles( - "#include - __mmask16 f(__mmask16 x) { return _knot_mask16(x); } - int main(void) { return 0; }" - HAVE_MASK_INTRIN - ) - set(CMAKE_REQUIRED_FLAGS) endmacro() macro(check_avx512vnni_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "Intel") - if(CMAKE_HOST_UNIX OR APPLE) - set(AVX512VNNIFLAG "-mavx512f -mavx512bw -mavx512dq -mavx512vl -mavx512vnni") - else() - set(AVX512VNNIFLAG "/arch:AVX512") - endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "Intel") + if(CMAKE_HOST_UNIX OR APPLE OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM") + set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni") + else() + set(AVX512VNNIFLAG "/arch:AVX512") + endif() + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(AVX512VNNIFLAG "-mavx512f -mavx512dq -mavx512bw -mavx512vl -mavx512vnni") if(NOT MSVC) check_c_compiler_flag("-mtune=cascadelake" HAVE_CASCADE_LAKE) @@ -139,11 +129,10 @@ macro(check_avx512vnni_intrinsics) endif() unset(HAVE_CASCADE_LAKE) endif() + elseif(MSVC) + set(AVX512VNNIFLAG "/arch:AVX512") endif() - elseif(MSVC) - set(AVX512VNNIFLAG "/arch:AVX512") endif() - # Check whether compiler supports AVX512vnni intrinsics set(CMAKE_REQUIRED_FLAGS "${AVX512VNNIFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") check_c_source_compiles( @@ -159,18 +148,18 @@ macro(check_avx512vnni_intrinsics) endmacro() macro(check_avx2_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "Intel") - if(CMAKE_HOST_UNIX OR APPLE) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "Intel") + if(CMAKE_HOST_UNIX OR APPLE) + set(AVX2FLAG "-mavx2") + else() + set(AVX2FLAG "/arch:AVX2") + endif() + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(AVX2FLAG "-mavx2") - else() + elseif(MSVC) set(AVX2FLAG "/arch:AVX2") endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) - set(AVX2FLAG "-mavx2") - endif() - elseif(MSVC) - set(AVX2FLAG "/arch:AVX2") endif() # Check whether compiler supports AVX2 intrinics set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") @@ -187,8 +176,8 @@ macro(check_avx2_intrinsics) endmacro() macro(check_neon_compiler_flag) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") if("${ARCH}" MATCHES "aarch64") set(NEONFLAG "-march=armv8-a+simd") else() @@ -206,12 +195,52 @@ macro(check_neon_compiler_flag) #endif int main() { return 0; }" NEON_AVAILABLE FAIL_REGEX "not supported") + # Check whether compiler native flag is enough for NEON support + # Some GCC versions don't enable FPU (vector unit) when using -march=native + if(NEON_AVAILABLE AND NATIVEFLAG AND (NOT "${ARCH}" MATCHES "aarch64")) + check_c_source_compiles( + "#include + uint8x16_t f(uint8x16_t x, uint8x16_t y) { + return vaddq_u8(x, y); + } + int main(int argc, char* argv[]) { + uint8x16_t a = vdupq_n_u8(argc); + uint8x16_t b = vdupq_n_u8(argc); + uint8x16_t result = f(a, b); + return result[0]; + }" + ARM_NEON_SUPPORT_NATIVE + ) + if(NOT ARM_NEON_SUPPORT_NATIVE) + set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG} -mfpu=neon ${ZNOLTOFLAG}") + check_c_source_compiles( + "#include + uint8x16_t f(uint8x16_t x, uint8x16_t y) { + return vaddq_u8(x, y); + } + int main(int argc, char* argv[]) { + uint8x16_t a = vdupq_n_u8(argc); + uint8x16_t b = vdupq_n_u8(argc); + uint8x16_t result = f(a, b); + return result[0]; + }" + ARM_NEON_SUPPORT_NATIVE_MFPU + ) + if(ARM_NEON_SUPPORT_NATIVE_MFPU) + set(NEONFLAG "-mfpu=neon") + else() + # Remove local NEON_AVAILABLE variable and overwrite the cache + unset(NEON_AVAILABLE) + set(NEON_AVAILABLE "" CACHE INTERNAL "NEON support available" FORCE) + endif() + endif() + endif() set(CMAKE_REQUIRED_FLAGS) endmacro() macro(check_neon_ld4_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") if("${ARCH}" MATCHES "aarch64") set(NEONFLAG "-march=armv8-a+simd") else() @@ -234,8 +263,8 @@ macro(check_neon_ld4_intrinsics) endmacro() macro(check_pclmulqdq_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM") set(PCLMULFLAG "-mpclmul") endif() endif() @@ -257,8 +286,8 @@ macro(check_pclmulqdq_intrinsics) endmacro() macro(check_vpclmulqdq_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID MATCHES "IntelLLVM") set(VPCLMULFLAG "-mvpclmulqdq -mavx512f") endif() endif() @@ -341,8 +370,8 @@ macro(check_ppc_intrinsics) endmacro() macro(check_power8_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(POWER8FLAG "-mcpu=power8") endif() endif() @@ -364,12 +393,27 @@ macro(check_power8_intrinsics) }" HAVE_POWER8_INTRIN ) + if(NOT HAVE_POWER8_INTRIN AND HAVE_LINUX_AUXVEC_H) + check_c_source_compiles( + "#include + #include + int main() { + return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); + }" + HAVE_POWER8_INTRIN2 + ) + if(HAVE_POWER8_INTRIN2) + set(POWER8_NEED_AUXVEC_H 1) + set(HAVE_POWER8_INTRIN ${HAVE_POWER8_INTRIN2} CACHE INTERNAL "Have POWER8 intrinsics" FORCE) + unset(HAVE_POWER8_INTRIN2 CACHE) + endif() + endif() set(CMAKE_REQUIRED_FLAGS) endmacro() macro(check_rvv_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(RISCVFLAG "-march=rv64gcv") endif() endif() @@ -399,8 +443,8 @@ macro(check_s390_intrinsics) endmacro() macro(check_power9_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(POWER9FLAG "-mcpu=power9") endif() endif() @@ -422,22 +466,37 @@ macro(check_power9_intrinsics) }" HAVE_POWER9_INTRIN ) + if(NOT HAVE_POWER9_INTRIN AND HAVE_LINUX_AUXVEC_H) + check_c_source_compiles( + "#include + #include + int main() { + return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00); + }" + HAVE_POWER9_INTRIN2 + ) + if(HAVE_POWER9_INTRIN2) + set(POWER9_NEED_AUXVEC_H 1) + set(HAVE_POWER9_INTRIN ${HAVE_POWER9_INTRIN2} CACHE INTERNAL "Have POWER9 intrinsics" FORCE) + unset(HAVE_POWER9_INTRIN2 CACHE) + endif() + endif() set(CMAKE_REQUIRED_FLAGS) endmacro() macro(check_sse2_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "Intel") - if(CMAKE_HOST_UNIX OR APPLE) - set(SSE2FLAG "-msse2") - else() - set(SSE2FLAG "/arch:SSE2") - endif() - elseif(MSVC) - if(NOT "${ARCH}" MATCHES "x86_64") - set(SSE2FLAG "/arch:SSE2") - endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "Intel") + if(CMAKE_HOST_UNIX OR APPLE) + set(SSE2FLAG "-msse2") + else() + set(SSE2FLAG "/arch:SSE2") + endif() + elseif(MSVC) + if(NOT "${ARCH}" MATCHES "x86_64") + set(SSE2FLAG "/arch:SSE2") + endif() + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(SSE2FLAG "-msse2") endif() endif() @@ -453,14 +512,14 @@ macro(check_sse2_intrinsics) endmacro() macro(check_ssse3_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "Intel") - if(CMAKE_HOST_UNIX OR APPLE) - set(SSSE3FLAG "-mssse3") - else() - set(SSSE3FLAG "/arch:SSSE3") - endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "Intel") + if(CMAKE_HOST_UNIX OR APPLE) + set(SSSE3FLAG "-mssse3") + else() + set(SSSE3FLAG "/arch:SSSE3") + endif() + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(SSSE3FLAG "-mssse3") endif() endif() @@ -478,14 +537,14 @@ macro(check_ssse3_intrinsics) endmacro() macro(check_sse42_intrinsics) - if(CMAKE_C_COMPILER_ID MATCHES "Intel") - if(CMAKE_HOST_UNIX OR APPLE) - set(SSE42FLAG "-msse4.2") - else() - set(SSE42FLAG "/arch:SSE4.2") - endif() - elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") - if(NOT NATIVEFLAG) + if(NOT NATIVEFLAG) + if(CMAKE_C_COMPILER_ID MATCHES "Intel") + if(CMAKE_HOST_UNIX OR APPLE) + set(SSE42FLAG "-msse4.2") + else() + set(SSE42FLAG "/arch:SSE4.2") + endif() + elseif(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") set(SSE42FLAG "-msse4.2") endif() endif() @@ -526,15 +585,17 @@ macro(check_vgfma_intrinsics) endmacro() macro(check_xsave_intrinsics) - if(NOT NATIVEFLAG AND NOT MSVC) + if(NOT NATIVEFLAG AND NOT MSVC AND NOT CMAKE_C_COMPILER_ID MATCHES "Intel") set(XSAVEFLAG "-mxsave") endif() set(CMAKE_REQUIRED_FLAGS "${XSAVEFLAG} ${NATIVEFLAG} ${ZNOLTOFLAG}") check_c_source_compiles( "#ifdef _MSC_VER # include + #elif __GNUC__ == 8 && __GNUC_MINOR__ > 1 + # include #else - # include + # include #endif unsigned int f(unsigned int a) { return (int) _xgetbv(a); } int main(void) { return 0; }" diff --git a/3rdparty/zlib-ng/cmake/detect-sanitizer.cmake b/3rdparty/zlib-ng/cmake/detect-sanitizer.cmake new file mode 100644 index 0000000000..f9521ec2f5 --- /dev/null +++ b/3rdparty/zlib-ng/cmake/detect-sanitizer.cmake @@ -0,0 +1,166 @@ +# detect-sanitizer.cmake -- Detect supported compiler sanitizer flags +# Licensed under the Zlib license, see LICENSE.md for details + +macro(add_common_sanitizer_flags) + if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") + add_compile_options(-g3) + endif() + check_c_compiler_flag(-fno-omit-frame-pointer HAVE_NO_OMIT_FRAME_POINTER) + if(HAVE_NO_OMIT_FRAME_POINTER) + add_compile_options(-fno-omit-frame-pointer) + add_link_options(-fno-omit-frame-pointer) + endif() + check_c_compiler_flag(-fno-optimize-sibling-calls HAVE_NO_OPTIMIZE_SIBLING_CALLS) + if(HAVE_NO_OPTIMIZE_SIBLING_CALLS) + add_compile_options(-fno-optimize-sibling-calls) + add_link_options(-fno-optimize-sibling-calls) + endif() +endmacro() + +macro(check_sanitizer_support known_checks supported_checks) + set(available_checks "") + + # Build list of supported sanitizer flags by incrementally trying compilation with + # known sanitizer checks + + foreach(check ${known_checks}) + if(available_checks STREQUAL "") + set(compile_checks "${check}") + else() + set(compile_checks "${available_checks},${check}") + endif() + + set(CMAKE_REQUIRED_FLAGS -fsanitize=${compile_checks}) + + check_c_source_compiles("int main() { return 0; }" HAVE_SANITIZER_${check} + FAIL_REGEX "not supported|unrecognized command|unknown option") + + set(CMAKE_REQUIRED_FLAGS) + + if(HAVE_SANITIZER_${check}) + set(available_checks ${compile_checks}) + endif() + endforeach() + + set(${supported_checks} ${available_checks}) +endmacro() + +macro(add_address_sanitizer) + set(known_checks + address + pointer-compare + pointer-subtract + ) + + check_sanitizer_support("${known_checks}" supported_checks) + if(NOT ${supported_checks} STREQUAL "") + message(STATUS "Address sanitizer is enabled: ${supported_checks}") + add_compile_options(-fsanitize=${supported_checks}) + add_link_options(-fsanitize=${supported_checks}) + add_common_sanitizer_flags() + else() + message(STATUS "Address sanitizer is not supported") + endif() + + if(CMAKE_CROSSCOMPILING_EMULATOR) + # Only check for leak sanitizer if not cross-compiling due to qemu crash + message(WARNING "Leak sanitizer is not supported when cross compiling") + else() + # Leak sanitizer requires address sanitizer + check_sanitizer_support("leak" supported_checks) + if(NOT ${supported_checks} STREQUAL "") + message(STATUS "Leak sanitizer is enabled: ${supported_checks}") + add_compile_options(-fsanitize=${supported_checks}) + add_link_options(-fsanitize=${supported_checks}) + add_common_sanitizer_flags() + else() + message(STATUS "Leak sanitizer is not supported") + endif() + endif() +endmacro() + +macro(add_memory_sanitizer) + check_sanitizer_support("memory" supported_checks) + if(NOT ${supported_checks} STREQUAL "") + message(STATUS "Memory sanitizer is enabled: ${supported_checks}") + add_compile_options(-fsanitize=${supported_checks}) + add_link_options(-fsanitize=${supported_checks}) + add_common_sanitizer_flags() + + check_c_compiler_flag(-fsanitize-memory-track-origins HAVE_MEMORY_TRACK_ORIGINS) + if(HAVE_MEMORY_TRACK_ORIGINS) + add_compile_options(-fsanitize-memory-track-origins) + add_link_options(-fsanitize-memory-track-origins) + endif() + else() + message(STATUS "Memory sanitizer is not supported") + endif() +endmacro() + +macro(add_thread_sanitizer) + check_sanitizer_support("thread" supported_checks) + if(NOT ${supported_checks} STREQUAL "") + message(STATUS "Thread sanitizer is enabled: ${supported_checks}") + add_compile_options(-fsanitize=${supported_checks}) + add_link_options(-fsanitize=${supported_checks}) + add_common_sanitizer_flags() + else() + message(STATUS "Thread sanitizer is not supported") + endif() +endmacro() + +macro(add_undefined_sanitizer) + set(known_checks + array-bounds + bool + bounds + builtin + enum + float-cast-overflow + float-divide-by-zero + function + integer-divide-by-zero + local-bounds + null + nonnull-attribute + pointer-overflow + return + returns-nonnull-attribute + shift + shift-base + shift-exponent + signed-integer-overflow + undefined + unsigned-integer-overflow + unsigned-shift-base + vla-bound + vptr + ) + + # Only check for alignment sanitizer flag if unaligned access is not supported + if(NOT WITH_UNALIGNED) + list(APPEND known_checks alignment) + endif() + # Object size sanitizer has no effect at -O0 and produces compiler warning if enabled + if(NOT CMAKE_C_FLAGS MATCHES "-O0") + list(APPEND known_checks object-size) + endif() + + check_sanitizer_support("${known_checks}" supported_checks) + + if(NOT ${supported_checks} STREQUAL "") + message(STATUS "Undefined behavior sanitizer is enabled: ${supported_checks}") + add_compile_options(-fsanitize=${supported_checks}) + add_link_options(-fsanitize=${supported_checks}) + + # Group sanitizer flag -fsanitize=undefined will automatically add alignment, even if + # it is not in our sanitize flag list, so we need to explicitly disable alignment sanitizing. + if(WITH_UNALIGNED) + add_compile_options(-fno-sanitize=alignment) + endif() + + add_common_sanitizer_flags() + else() + message(STATUS "Undefined behavior sanitizer is not supported") + endif() +endmacro() diff --git a/3rdparty/zlib-ng/cpu_features.h b/3rdparty/zlib-ng/cpu_features.h index 00fa6c747c..8708724bc0 100644 --- a/3rdparty/zlib-ng/cpu_features.h +++ b/3rdparty/zlib-ng/cpu_features.h @@ -6,12 +6,10 @@ #ifndef CPU_FEATURES_H_ #define CPU_FEATURES_H_ -#include "adler32_fold.h" -#include "crc32_fold.h" +#ifndef DISABLE_RUNTIME_CPU_DETECTION #if defined(X86_FEATURES) # include "arch/x86/x86_features.h" -# include "fallback_builtins.h" #elif defined(ARM_FEATURES) # include "arch/arm/arm_features.h" #elif defined(PPC_FEATURES) || defined(POWER_FEATURES) @@ -38,266 +36,8 @@ struct cpu_features { #endif }; -extern void cpu_check_features(struct cpu_features *features); +void cpu_check_features(struct cpu_features *features); -/* adler32 */ -typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len); - -extern uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len); -#ifdef ARM_NEON -extern uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef PPC_VMX -extern uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef RISCV_RVV -extern uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef X86_SSSE3 -extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef X86_AVX2 -extern uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef X86_AVX512 -extern uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef X86_AVX512VNNI -extern uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len); -#endif -#ifdef POWER8_VSX -extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len); -#endif - -/* adler32 folding */ -#ifdef RISCV_RVV -extern uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -#endif -#ifdef X86_SSE42 -extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -#endif -#ifdef X86_AVX2 -extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -#endif -#ifdef X86_AVX512 -extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -#endif -#ifdef X86_AVX512VNNI -extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); -#endif - -/* CRC32 folding */ -#ifdef X86_PCLMULQDQ_CRC -extern uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc); -extern void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); -extern void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); -extern uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc); -extern uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); -#endif -#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC) -extern uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc); -extern void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); -extern void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); -extern uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc); -extern uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); -#endif - -/* memory chunking */ -extern uint32_t chunksize_c(void); -extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#ifdef X86_SSE2 -extern uint32_t chunksize_sse2(void); -extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#endif -#ifdef X86_SSSE3 -extern uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#endif -#ifdef X86_AVX2 -extern uint32_t chunksize_avx2(void); -extern uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#endif -#ifdef ARM_NEON -extern uint32_t chunksize_neon(void); -extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#endif -#ifdef POWER8_VSX -extern uint32_t chunksize_power8(void); -extern uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#endif -#ifdef RISCV_RVV -extern uint32_t chunksize_rvv(void); -extern uint8_t* chunkmemset_safe_rvv(uint8_t *out, unsigned dist, unsigned len, unsigned left); -#endif - -#ifdef ZLIB_COMPAT -typedef struct z_stream_s z_stream; -#else -typedef struct zng_stream_s zng_stream; -#endif - -/* inflate fast loop */ -extern void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start); -#ifdef X86_SSE2 -extern void inflate_fast_sse2(PREFIX3(stream) *strm, uint32_t start); -#endif -#ifdef X86_SSSE3 -extern void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start); -#endif -#ifdef X86_AVX2 -extern void inflate_fast_avx2(PREFIX3(stream) *strm, uint32_t start); -#endif -#ifdef ARM_NEON -extern void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start); -#endif -#ifdef POWER8_VSX -extern void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start); -#endif -#ifdef RISCV_RVV -extern void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start); -#endif - -/* CRC32 */ -typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len); - -extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len); -#ifdef ARM_ACLE -extern uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len); -#elif defined(POWER8_VSX) -extern uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len); -#elif defined(S390_CRC32_VX) -extern uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len); -#endif - -/* compare256 */ -typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1); - -extern uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1); -#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN -extern uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1); -#ifdef HAVE_BUILTIN_CTZ -extern uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1); -#endif -#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) -extern uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1); -#endif -#endif -#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) -extern uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1); -#endif -#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) -extern uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1); -#endif -#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) -extern uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1); -#endif -#ifdef POWER9 -extern uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1); -#endif -#ifdef RISCV_RVV -extern uint32_t compare256_rvv(const uint8_t *src0, const uint8_t *src1); -#endif - -#ifdef DEFLATE_H_ -/* insert_string */ -extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count); -#ifdef X86_SSE42 -extern void insert_string_sse42(deflate_state *const s, const uint32_t str, uint32_t count); -#elif defined(ARM_ACLE) -extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count); -#endif - -/* longest_match */ -extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match); -#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN -extern uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match); -#ifdef HAVE_BUILTIN_CTZ -extern uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match); -#endif -#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) -extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match); -#endif -#endif -#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) -extern uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match); -#endif -#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) -extern uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match); -#endif -#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) -extern uint32_t longest_match_neon(deflate_state *const s, Pos cur_match); -#endif -#ifdef POWER9 -extern uint32_t longest_match_power9(deflate_state *const s, Pos cur_match); -#endif -#ifdef RISCV_RVV -extern uint32_t longest_match_rvv(deflate_state *const s, Pos cur_match); -#endif - -/* longest_match_slow */ -extern uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match); -#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN -extern uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match); -extern uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match); -#ifdef UNALIGNED64_OK -extern uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match); -#endif -#endif -#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ) -extern uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match); -#endif -#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ) -extern uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match); -#endif -#if defined(ARM_NEON) && defined(HAVE_BUILTIN_CTZLL) -extern uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match); -#endif -#ifdef POWER9 -extern uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match); -#endif -#ifdef RISCV_RVV -extern uint32_t longest_match_slow_rvv(deflate_state *const s, Pos cur_match); -#endif - -/* quick_insert_string */ -extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str); -#ifdef X86_SSE42 -extern Pos quick_insert_string_sse42(deflate_state *const s, const uint32_t str); -#elif defined(ARM_ACLE) -extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str); -#endif - -/* slide_hash */ -typedef void (*slide_hash_func)(deflate_state *s); - -#ifdef X86_SSE2 -extern void slide_hash_sse2(deflate_state *s); -#endif -#if defined(ARM_SIMD) -extern void slide_hash_armv6(deflate_state *s); -#endif -#if defined(ARM_NEON) -extern void slide_hash_neon(deflate_state *s); -#endif -#if defined(PPC_VMX) -extern void slide_hash_vmx(deflate_state *s); -#endif -#if defined(POWER8_VSX) -extern void slide_hash_power8(deflate_state *s); -#endif -#if defined(RISCV_RVV) -extern void slide_hash_rvv(deflate_state *s); -#endif -#ifdef X86_AVX2 -extern void slide_hash_avx2(deflate_state *s); -#endif - -/* update_hash */ -extern uint32_t update_hash_c(deflate_state *const s, uint32_t h, uint32_t val); -#ifdef X86_SSE42 -extern uint32_t update_hash_sse42(deflate_state *const s, uint32_t h, uint32_t val); -#elif defined(ARM_ACLE) -extern uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val); -#endif #endif #endif diff --git a/3rdparty/zlib-ng/crc32.c b/3rdparty/zlib-ng/crc32.c new file mode 100644 index 0000000000..54f6ecd420 --- /dev/null +++ b/3rdparty/zlib-ng/crc32.c @@ -0,0 +1,42 @@ +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2022 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * This interleaved implementation of a CRC makes use of pipelined multiple + * arithmetic-logic units, commonly found in modern CPU cores. It is due to + * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. + */ + +#include "zbuild.h" +#include "functable.h" +#include "crc32_braid_tbl.h" + +/* ========================================================================= */ + +const uint32_t * Z_EXPORT PREFIX(get_crc_table)(void) { + return (const uint32_t *)crc_table; +} + +#ifdef ZLIB_COMPAT +unsigned long Z_EXPORT PREFIX(crc32_z)(unsigned long crc, const unsigned char *buf, size_t len) { + if (buf == NULL) return 0; + + return (unsigned long)FUNCTABLE_CALL(crc32)((uint32_t)crc, buf, len); +} +#else +uint32_t Z_EXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) { + if (buf == NULL) return 0; + + return FUNCTABLE_CALL(crc32)(crc, buf, len); +} +#endif + +#ifdef ZLIB_COMPAT +unsigned long Z_EXPORT PREFIX(crc32)(unsigned long crc, const unsigned char *buf, unsigned int len) { + return (unsigned long)PREFIX(crc32_z)((uint32_t)crc, buf, len); +} +#else +uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) { + return PREFIX(crc32_z)(crc, buf, len); +} +#endif diff --git a/3rdparty/zlib-ng/crc32.h b/3rdparty/zlib-ng/crc32.h new file mode 100644 index 0000000000..8c3d7a8a3e --- /dev/null +++ b/3rdparty/zlib-ng/crc32.h @@ -0,0 +1,16 @@ +/* crc32.h -- crc32 folding interface + * Copyright (C) 2021 Nathan Moinvaziri + * For conditions of distribution and use, see copyright notice in zlib.h + */ +#ifndef CRC32_H_ +#define CRC32_H_ + +#define CRC32_FOLD_BUFFER_SIZE (16 * 4) +/* sizeof(__m128i) * (4 folds) */ + +typedef struct crc32_fold_s { + uint8_t fold[CRC32_FOLD_BUFFER_SIZE]; + uint32_t value; +} crc32_fold; + +#endif diff --git a/3rdparty/zlib-ng/crc32_braid_comb.c b/3rdparty/zlib-ng/crc32_braid_comb.c index 75fb474258..f253ae10a2 100644 --- a/3rdparty/zlib-ng/crc32_braid_comb.c +++ b/3rdparty/zlib-ng/crc32_braid_comb.c @@ -7,7 +7,6 @@ * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. */ -#include "zbuild.h" #include "zutil.h" #include "crc32_braid_p.h" #include "crc32_braid_tbl.h" diff --git a/3rdparty/zlib-ng/crc32_braid_p.h b/3rdparty/zlib-ng/crc32_braid_p.h index 1d8a07068a..003bf91920 100644 --- a/3rdparty/zlib-ng/crc32_braid_p.h +++ b/3rdparty/zlib-ng/crc32_braid_p.h @@ -1,7 +1,6 @@ #ifndef CRC32_BRAID_P_H_ #define CRC32_BRAID_P_H_ -#include "zbuild.h" #include "zendian.h" /* Define N */ @@ -25,7 +24,7 @@ # endif #else # ifndef W -# if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) +# if defined(__x86_64__) || defined(_M_AMD64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__) # define W 8 # else # define W 4 @@ -42,9 +41,24 @@ # endif #endif +#if BYTE_ORDER == LITTLE_ENDIAN +# define ZSWAPWORD(word) (word) +# define BRAID_TABLE crc_braid_table +#elif BYTE_ORDER == BIG_ENDIAN +# if W == 8 +# define ZSWAPWORD(word) ZSWAP64(word) +# elif W == 4 +# define ZSWAPWORD(word) ZSWAP32(word) +# endif +# define BRAID_TABLE crc_braid_big_table +#else +# error "No endian defined" +#endif + +#define DO1 c = crc_table[(c ^ *buf++) & 0xff] ^ (c >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + /* CRC polynomial. */ #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ -extern uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len); - #endif /* CRC32_BRAID_P_H_ */ diff --git a/3rdparty/zlib-ng/crc32_fold.h b/3rdparty/zlib-ng/crc32_fold.h deleted file mode 100644 index 0d2ff66967..0000000000 --- a/3rdparty/zlib-ng/crc32_fold.h +++ /dev/null @@ -1,21 +0,0 @@ -/* crc32_fold.h -- crc32 folding interface - * Copyright (C) 2021 Nathan Moinvaziri - * For conditions of distribution and use, see copyright notice in zlib.h - */ -#ifndef CRC32_FOLD_H_ -#define CRC32_FOLD_H_ - -#define CRC32_FOLD_BUFFER_SIZE (16 * 4) -/* sizeof(__m128i) * (4 folds) */ - -typedef struct crc32_fold_s { - uint8_t fold[CRC32_FOLD_BUFFER_SIZE]; - uint32_t value; -} crc32_fold; - -Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc); -Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); -Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); -Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc); - -#endif diff --git a/3rdparty/zlib-ng/deflate.c b/3rdparty/zlib-ng/deflate.c index 2a0a20e5d2..66b5506a52 100644 --- a/3rdparty/zlib-ng/deflate.c +++ b/3rdparty/zlib-ng/deflate.c @@ -1,5 +1,5 @@ /* deflate.c -- compress data using the deflation algorithm - * Copyright (C) 1995-2023 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -58,7 +58,7 @@ # undef deflateInit2 #endif -const char PREFIX(deflate_copyright)[] = " deflate 1.3.0 Copyright 1995-2023 Jean-loup Gailly and Mark Adler "; +const char PREFIX(deflate_copyright)[] = " deflate 1.3.1 Copyright 1995-2024 Jean-loup Gailly and Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -71,14 +71,16 @@ const char PREFIX(deflate_copyright)[] = " deflate 1.3.0 Copyright 1995-2023 Jea */ #ifdef S390_DFLTCC_DEFLATE # include "arch/s390/dfltcc_deflate.h" +/* DFLTCC instructions require window to be page-aligned */ +# define PAD_WINDOW PAD_4096 +# define WINDOW_PAD_SIZE 4096 +# define HINT_ALIGNED_WINDOW HINT_ALIGNED_4096 #else -/* Memory management for the deflate state. Useful for allocating arch-specific extension blocks. */ -# define ZALLOC_DEFLATE_STATE(strm) ((deflate_state *)ZALLOC(strm, 1, sizeof(deflate_state))) -# define ZFREE_STATE(strm, addr) ZFREE(strm, addr) -# define ZCOPY_DEFLATE_STATE(dst, src) memcpy(dst, src, sizeof(deflate_state)) -/* Memory management for the window. Useful for allocation the aligned window. */ -# define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) -# define TRY_FREE_WINDOW(strm, addr) TRY_FREE(strm, addr) +# define PAD_WINDOW PAD_64 +# define WINDOW_PAD_SIZE 64 +# define HINT_ALIGNED_WINDOW HINT_ALIGNED_64 +/* Adjust the window size for the arch-specific deflate code. */ +# define DEFLATE_ADJUST_WINDOW_SIZE(n) (n) /* Invoked at the beginning of deflateSetDictionary(). Useful for checking arch-specific window data. */ # define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) /* Invoked at the beginning of deflateGetDictionary(). Useful for adjusting arch-specific window data. */ @@ -120,10 +122,6 @@ static void lm_set_level (deflate_state *s, int level); static void lm_init (deflate_state *s); Z_INTERNAL unsigned read_buf (PREFIX3(stream) *strm, unsigned char *buf, unsigned size); -extern uint32_t update_hash_roll (deflate_state *const s, uint32_t h, uint32_t val); -extern void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count); -extern Pos quick_insert_string_roll(deflate_state *const s, uint32_t str); - /* =========================================================================== * Local data */ @@ -185,17 +183,111 @@ static const config configuration_table[10] = { memset((unsigned char *)s->head, 0, HASH_SIZE * sizeof(*s->head)); \ } while (0) -/* ========================================================================= */ -/* This function is hidden in ZLIB_COMPAT builds. */ + +#ifdef DEF_ALLOC_DEBUG +# include +# define LOGSZ(name,size) fprintf(stderr, "%s is %d bytes\n", name, size) +# define LOGSZP(name,size,loc,pad) fprintf(stderr, "%s is %d bytes, offset %d, padded %d\n", name, size, loc, pad) +# define LOGSZPL(name,size,loc,pad) fprintf(stderr, "%s is %d bytes, offset %ld, padded %d\n", name, size, loc, pad) +#else +# define LOGSZ(name,size) +# define LOGSZP(name,size,loc,pad) +# define LOGSZPL(name,size,loc,pad) +#endif + +/* =========================================================================== + * Allocate a big buffer and divide it up into the various buffers deflate needs. + * Handles alignment of allocated buffer and alignment of individual buffers. + */ +Z_INTERNAL deflate_allocs* alloc_deflate(PREFIX3(stream) *strm, int windowBits, int lit_bufsize) { + int curr_size = 0; + + /* Define sizes */ + int window_size = DEFLATE_ADJUST_WINDOW_SIZE((1 << windowBits) * 2); + int prev_size = (1 << windowBits) * sizeof(Pos); + int head_size = HASH_SIZE * sizeof(Pos); + int pending_size = lit_bufsize * LIT_BUFS; + int state_size = sizeof(deflate_state); + int alloc_size = sizeof(deflate_allocs); + + /* Calculate relative buffer positions and paddings */ + LOGSZP("window", window_size, PAD_WINDOW(curr_size), PADSZ(curr_size,WINDOW_PAD_SIZE)); + int window_pos = PAD_WINDOW(curr_size); + curr_size = window_pos + window_size; + + LOGSZP("prev", prev_size, PAD_64(curr_size), PADSZ(curr_size,64)); + int prev_pos = PAD_64(curr_size); + curr_size = prev_pos + prev_size; + + LOGSZP("head", head_size, PAD_64(curr_size), PADSZ(curr_size,64)); + int head_pos = PAD_64(curr_size); + curr_size = head_pos + head_size; + + LOGSZP("pending", pending_size, PAD_64(curr_size), PADSZ(curr_size,64)); + int pending_pos = PAD_64(curr_size); + curr_size = pending_pos + pending_size; + + LOGSZP("state", state_size, PAD_64(curr_size), PADSZ(curr_size,64)); + int state_pos = PAD_64(curr_size); + curr_size = state_pos + state_size; + + LOGSZP("alloc", alloc_size, PAD_16(curr_size), PADSZ(curr_size,16)); + int alloc_pos = PAD_16(curr_size); + curr_size = alloc_pos + alloc_size; + + /* Add 64-1 or 4096-1 to allow window alignment, and round size of buffer up to multiple of 64 */ + int total_size = PAD_64(curr_size + (WINDOW_PAD_SIZE - 1)); + + /* Allocate buffer, align to 64-byte cacheline, and zerofill the resulting buffer */ + char *original_buf = strm->zalloc(strm->opaque, 1, total_size); + if (original_buf == NULL) + return NULL; + + char *buff = (char *)HINT_ALIGNED_WINDOW((char *)PAD_WINDOW(original_buf)); + LOGSZPL("Buffer alloc", total_size, PADSZ((uintptr_t)original_buf,WINDOW_PAD_SIZE), PADSZ(curr_size,WINDOW_PAD_SIZE)); + + /* Initialize alloc_bufs */ + deflate_allocs *alloc_bufs = (struct deflate_allocs_s *)(buff + alloc_pos); + alloc_bufs->buf_start = (char *)original_buf; + alloc_bufs->zfree = strm->zfree; + + /* Assign buffers */ + alloc_bufs->window = (unsigned char *)HINT_ALIGNED_WINDOW(buff + window_pos); + alloc_bufs->prev = (Pos *)HINT_ALIGNED_64(buff + prev_pos); + alloc_bufs->head = (Pos *)HINT_ALIGNED_64(buff + head_pos); + alloc_bufs->pending_buf = (unsigned char *)HINT_ALIGNED_64(buff + pending_pos); + alloc_bufs->state = (deflate_state *)HINT_ALIGNED_16(buff + state_pos); + + memset((char *)alloc_bufs->prev, 0, prev_size); + + return alloc_bufs; +} + +/* =========================================================================== + * Free all allocated deflate buffers + */ +static inline void free_deflate(PREFIX3(stream) *strm) { + deflate_state *state = (deflate_state *)strm->state; + + if (state->alloc_bufs != NULL) { + deflate_allocs *alloc_bufs = state->alloc_bufs; + alloc_bufs->zfree(strm->opaque, alloc_bufs->buf_start); + strm->state = NULL; + } +} + +/* =========================================================================== + * Initialize deflate state and buffers. + * This function is hidden in ZLIB_COMPAT builds. + */ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level, int32_t method, int32_t windowBits, int32_t memLevel, int32_t strategy) { /* Todo: ignore strm->next_in if we use it as window */ - uint32_t window_padding = 0; deflate_state *s; int wrap = 1; - /* Force initialization functable, because deflate captures function pointers from functable. */ - functable.force_init(); + /* Initialize functable */ + FUNCTABLE_INIT; if (strm == NULL) return Z_STREAM_ERROR; @@ -230,9 +322,19 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ - s = ZALLOC_DEFLATE_STATE(strm); - if (s == NULL) + /* Allocate buffers */ + int lit_bufsize = 1 << (memLevel + 6); + deflate_allocs *alloc_bufs = alloc_deflate(strm, windowBits, lit_bufsize); + if (alloc_bufs == NULL) return Z_MEM_ERROR; + + s = alloc_bufs->state; + s->alloc_bufs = alloc_bufs; + s->window = alloc_bufs->window; + s->prev = alloc_bufs->prev; + s->head = alloc_bufs->head; + s->pending_buf = alloc_bufs->pending_buf; + strm->state = (struct internal_state *)s; s->strm = strm; s->status = INIT_STATE; /* to pass state test in deflateReset() */ @@ -243,18 +345,9 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level s->w_size = 1 << s->w_bits; s->w_mask = s->w_size - 1; -#ifdef X86_PCLMULQDQ_CRC - window_padding = 8; -#endif - - s->window = (unsigned char *) ZALLOC_WINDOW(strm, s->w_size + window_padding, 2*sizeof(unsigned char)); - s->prev = (Pos *) ZALLOC(strm, s->w_size, sizeof(Pos)); - memset(s->prev, 0, s->w_size * sizeof(Pos)); - s->head = (Pos *) ZALLOC(strm, HASH_SIZE, sizeof(Pos)); - s->high_water = 0; /* nothing written to s->window yet */ - s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + s->lit_bufsize = lit_bufsize; /* 16K elements by default */ /* We overlay pending_buf and sym_buf. This works since the average size * for length/distance pairs over any compressed block is assured to be 31 @@ -295,7 +388,6 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level * symbols from which it is being constructed. */ - s->pending_buf = (unsigned char *) ZALLOC(strm, s->lit_bufsize, 4); s->pending_buf_size = s->lit_bufsize * 4; if (s->window == NULL || s->prev == NULL || s->head == NULL || s->pending_buf == NULL) { @@ -304,8 +396,15 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level PREFIX(deflateEnd)(strm); return Z_MEM_ERROR; } + +#ifdef LIT_MEM + s->d_buf = (uint16_t *)(s->pending_buf + (s->lit_bufsize << 1)); + s->l_buf = s->pending_buf + (s->lit_bufsize << 2); + s->sym_end = s->lit_bufsize - 1; +#else s->sym_buf = s->pending_buf + s->lit_bufsize; s->sym_end = (s->lit_bufsize - 1) * 3; +#endif /* We avoid equality with lit_bufsize*3 because of wraparound at 64K * on 16 bit machines and because stored blocks are restricted to * 64K-1 bytes. @@ -348,7 +447,7 @@ static int deflateStateCheck(PREFIX3(stream) *strm) { if (strm == NULL || strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) return 1; s = strm->state; - if (s == NULL || s->strm != strm || (s->status < INIT_STATE || s->status > MAX_STATE)) + if (s == NULL || s->alloc_bufs == NULL || s->strm != strm || (s->status < INIT_STATE || s->status > MAX_STATE)) return 1; return 0; } @@ -370,7 +469,7 @@ int32_t Z_EXPORT PREFIX(deflateSetDictionary)(PREFIX3(stream) *strm, const uint8 /* when using zlib wrappers, compute Adler-32 for provided dictionary */ if (wrap == 1) - strm->adler = functable.adler32(strm->adler, dictionary, dictLength); + strm->adler = FUNCTABLE_CALL(adler32)(strm->adler, dictionary, dictLength); DEFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength); /* hook for IBM Z DFLTCC */ s->wrap = 0; /* avoid computing Adler-32 in read_buf */ @@ -457,7 +556,7 @@ int32_t Z_EXPORT PREFIX(deflateResetKeep)(PREFIX3(stream) *strm) { #ifdef GZIP if (s->wrap == 2) { - strm->adler = functable.crc32_fold_reset(&s->crc_fold); + strm->adler = FUNCTABLE_CALL(crc32_fold_reset)(&s->crc_fold); } else #endif strm->adler = ADLER32_INITIAL_VALUE; @@ -506,9 +605,17 @@ int32_t Z_EXPORT PREFIX(deflatePrime)(PREFIX3(stream) *strm, int32_t bits, int32 if (deflateStateCheck(strm)) return Z_STREAM_ERROR; s = strm->state; + +#ifdef LIT_MEM + if (bits < 0 || bits > BIT_BUF_SIZE || + (unsigned char *)s->d_buf < s->pending_out + ((BIT_BUF_SIZE + 7) >> 3)) + return Z_BUF_ERROR; +#else if (bits < 0 || bits > BIT_BUF_SIZE || bits > (int32_t)(sizeof(value) << 3) || s->sym_buf < s->pending_out + ((BIT_BUF_SIZE + 7) >> 3)) return Z_BUF_ERROR; +#endif + do { put = BIT_BUF_SIZE - s->bi_valid; put = MIN(put, bits); @@ -555,7 +662,7 @@ int32_t Z_EXPORT PREFIX(deflateParams)(PREFIX3(stream) *strm, int32_t level, int if (s->level != level) { if (s->level == 0 && s->matches != 0) { if (s->matches == 1) { - functable.slide_hash(s); + FUNCTABLE_CALL(slide_hash)(s); } else { CLEAR_HASH(s); } @@ -794,7 +901,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { #ifdef GZIP if (s->status == GZIP_STATE) { /* gzip header */ - functable.crc32_fold_reset(&s->crc_fold); + FUNCTABLE_CALL(crc32_fold_reset)(&s->crc_fold); put_byte(s, 31); put_byte(s, 139); put_byte(s, 8); @@ -911,7 +1018,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { } } put_short(s, (uint16_t)strm->adler); - functable.crc32_fold_reset(&s->crc_fold); + FUNCTABLE_CALL(crc32_fold_reset)(&s->crc_fold); } s->status = BUSY_STATE; @@ -982,7 +1089,7 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { /* Write the trailer */ #ifdef GZIP if (s->wrap == 2) { - strm->adler = functable.crc32_fold_final(&s->crc_fold); + strm->adler = FUNCTABLE_CALL(crc32_fold_final)(&s->crc_fold); put_uint32(s, strm->adler); put_uint32(s, (uint32_t)strm->total_in); @@ -1007,21 +1114,13 @@ int32_t Z_EXPORT PREFIX(deflate)(PREFIX3(stream) *strm, int32_t flush) { /* ========================================================================= */ int32_t Z_EXPORT PREFIX(deflateEnd)(PREFIX3(stream) *strm) { - int32_t status; - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - status = strm->state->status; + int32_t status = strm->state->status; - /* Deallocate in reverse order of allocations: */ - TRY_FREE(strm, strm->state->pending_buf); - TRY_FREE(strm, strm->state->head); - TRY_FREE(strm, strm->state->prev); - TRY_FREE_WINDOW(strm, strm->state->window); - - ZFREE_STATE(strm, strm->state); - strm->state = NULL; + /* Free allocated buffers */ + free_deflate(strm); return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; } @@ -1032,7 +1131,6 @@ int32_t Z_EXPORT PREFIX(deflateEnd)(PREFIX3(stream) *strm) { int32_t Z_EXPORT PREFIX(deflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *source) { deflate_state *ds; deflate_state *ss; - uint32_t window_padding = 0; if (deflateStateCheck(source) || dest == NULL) return Z_STREAM_ERROR; @@ -1041,34 +1139,39 @@ int32_t Z_EXPORT PREFIX(deflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *sou memcpy((void *)dest, (void *)source, sizeof(PREFIX3(stream))); - ds = ZALLOC_DEFLATE_STATE(dest); - if (ds == NULL) + deflate_allocs *alloc_bufs = alloc_deflate(dest, ss->w_bits, ss->lit_bufsize); + if (alloc_bufs == NULL) return Z_MEM_ERROR; + + ds = alloc_bufs->state; + dest->state = (struct internal_state *) ds; - ZCOPY_DEFLATE_STATE(ds, ss); + memcpy(ds, ss, sizeof(deflate_state)); ds->strm = dest; -#ifdef X86_PCLMULQDQ_CRC - window_padding = 8; -#endif - - ds->window = (unsigned char *) ZALLOC_WINDOW(dest, ds->w_size + window_padding, 2*sizeof(unsigned char)); - ds->prev = (Pos *) ZALLOC(dest, ds->w_size, sizeof(Pos)); - ds->head = (Pos *) ZALLOC(dest, HASH_SIZE, sizeof(Pos)); - ds->pending_buf = (unsigned char *) ZALLOC(dest, ds->lit_bufsize, 4); + ds->alloc_bufs = alloc_bufs; + ds->window = alloc_bufs->window; + ds->prev = alloc_bufs->prev; + ds->head = alloc_bufs->head; + ds->pending_buf = alloc_bufs->pending_buf; if (ds->window == NULL || ds->prev == NULL || ds->head == NULL || ds->pending_buf == NULL) { PREFIX(deflateEnd)(dest); return Z_MEM_ERROR; } - memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(unsigned char)); + memcpy(ds->window, ss->window, DEFLATE_ADJUST_WINDOW_SIZE(ds->w_size * 2 * sizeof(unsigned char))); memcpy((void *)ds->prev, (void *)ss->prev, ds->w_size * sizeof(Pos)); memcpy((void *)ds->head, (void *)ss->head, HASH_SIZE * sizeof(Pos)); - memcpy(ds->pending_buf, ss->pending_buf, ds->pending_buf_size); + memcpy(ds->pending_buf, ss->pending_buf, ds->lit_bufsize * LIT_BUFS); ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); +#ifdef LIT_MEM + ds->d_buf = (uint16_t *)(ds->pending_buf + (ds->lit_bufsize << 1)); + ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2); +#else ds->sym_buf = ds->pending_buf + ds->lit_bufsize; +#endif ds->l_desc.dyn_tree = ds->dyn_ltree; ds->d_desc.dyn_tree = ds->dyn_dtree; @@ -1095,10 +1198,10 @@ Z_INTERNAL unsigned PREFIX(read_buf)(PREFIX3(stream) *strm, unsigned char *buf, memcpy(buf, strm->next_in, len); #ifdef GZIP } else if (strm->state->wrap == 2) { - functable.crc32_fold_copy(&strm->state->crc_fold, buf, strm->next_in, len); + FUNCTABLE_CALL(crc32_fold_copy)(&strm->state->crc_fold, buf, strm->next_in, len); #endif } else if (strm->state->wrap == 1) { - strm->adler = functable.adler32_fold_copy(strm->adler, buf, strm->next_in, len); + strm->adler = FUNCTABLE_CALL(adler32_fold_copy)(strm->adler, buf, strm->next_in, len); } else { memcpy(buf, strm->next_in, len); } @@ -1125,9 +1228,9 @@ static void lm_set_level(deflate_state *s, int level) { s->insert_string = &insert_string_roll; s->quick_insert_string = &quick_insert_string_roll; } else { - s->update_hash = functable.update_hash; - s->insert_string = functable.insert_string; - s->quick_insert_string = functable.quick_insert_string; + s->update_hash = update_hash; + s->insert_string = insert_string; + s->quick_insert_string = quick_insert_string; } s->level = level; @@ -1191,7 +1294,7 @@ void Z_INTERNAL PREFIX(fill_window)(deflate_state *s) { s->block_start -= (int)wsize; if (s->insert > s->strstart) s->insert = s->strstart; - functable.slide_hash(s); + FUNCTABLE_CALL(slide_hash)(s); more += wsize; } if (s->strm->avail_in == 0) @@ -1217,7 +1320,7 @@ void Z_INTERNAL PREFIX(fill_window)(deflate_state *s) { if (s->lookahead + s->insert >= STD_MIN_MATCH) { unsigned int str = s->strstart - s->insert; if (UNLIKELY(s->max_chain_length > 1024)) { - s->ins_h = s->update_hash(s, s->window[str], s->window[str+1]); + s->ins_h = s->update_hash(s->window[str], s->window[str+1]); } else if (str >= 1) { s->quick_insert_string(s, str + 2 - STD_MIN_MATCH); } diff --git a/3rdparty/zlib-ng/deflate.h b/3rdparty/zlib-ng/deflate.h index 8001b47c99..e122ae1aad 100644 --- a/3rdparty/zlib-ng/deflate.h +++ b/3rdparty/zlib-ng/deflate.h @@ -12,8 +12,12 @@ #include "zutil.h" #include "zendian.h" -#include "adler32_fold.h" -#include "crc32_fold.h" +#include "crc32.h" + +#ifdef S390_DFLTCC_DEFLATE +# include "arch/s390/dfltcc_common.h" +# define HAVE_ARCH_DEFLATE_STATE +#endif /* define NO_GZIP when compiling if you want to disable gzip header and trailer creation by deflate(). NO_GZIP would be used to avoid linking in @@ -23,6 +27,12 @@ # define GZIP #endif +/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at + the cost of a larger memory footprint */ +#ifndef NO_LIT_MEM +# define LIT_MEM +#endif + /* =========================================================================== * Internal compression state. */ @@ -108,11 +118,30 @@ typedef uint16_t Pos; /* Type definitions for hash callbacks */ typedef struct internal_state deflate_state; -typedef uint32_t (* update_hash_cb) (deflate_state *const s, uint32_t h, uint32_t val); +typedef uint32_t (* update_hash_cb) (uint32_t h, uint32_t val); typedef void (* insert_string_cb) (deflate_state *const s, uint32_t str, uint32_t count); typedef Pos (* quick_insert_string_cb)(deflate_state *const s, uint32_t str); -struct internal_state { +uint32_t update_hash (uint32_t h, uint32_t val); +void insert_string (deflate_state *const s, uint32_t str, uint32_t count); +Pos quick_insert_string (deflate_state *const s, uint32_t str); + +uint32_t update_hash_roll (uint32_t h, uint32_t val); +void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count); +Pos quick_insert_string_roll(deflate_state *const s, uint32_t str); + +/* Struct for memory allocation handling */ +typedef struct deflate_allocs_s { + char *buf_start; + free_func zfree; + deflate_state *state; + unsigned char *window; + unsigned char *pending_buf; + Pos *prev; + Pos *head; +} deflate_allocs; + +struct ALIGNED_(64) internal_state { PREFIX3(stream) *strm; /* pointer back to this zlib stream */ unsigned char *pending_buf; /* output still pending */ unsigned char *pending_out; /* next pending byte to output to the stream */ @@ -260,8 +289,16 @@ struct internal_state { * - I can't count above 4 */ +#ifdef LIT_MEM +# define LIT_BUFS 5 + uint16_t *d_buf; /* buffer for distances */ + unsigned char *l_buf; /* buffer for literals/lengths */ +#else +# define LIT_BUFS 4 unsigned char *sym_buf; /* buffer for distances and literals/lengths */ - unsigned int sym_next; /* running index in sym_buf */ +#endif + + unsigned int sym_next; /* running index in symbol buffer */ unsigned int sym_end; /* symbol table full when sym_next reaches this */ unsigned long opt_len; /* bit length of current block with optimal trees */ @@ -273,8 +310,11 @@ struct internal_state { unsigned long compressed_len; /* total bit length of compressed file mod 2^32 */ unsigned long bits_sent; /* bit length of compressed data sent mod 2^32 */ - /* Reserved for future use and alignment purposes */ - char *reserved_p; + deflate_allocs *alloc_bufs; + +#ifdef HAVE_ARCH_DEFLATE_STATE + arch_deflate_state arch; /* architecture-specific extensions */ +#endif uint64_t bi_buf; /* Output buffer. bits are inserted starting at the bottom (least significant bits). */ @@ -284,7 +324,7 @@ struct internal_state { /* Reserved for future use and alignment purposes */ int32_t reserved[11]; -} ALIGNED_(8); +}; typedef enum { need_more, /* block not completed, need more input or more output */ diff --git a/3rdparty/zlib-ng/deflate_fast.c b/3rdparty/zlib-ng/deflate_fast.c index 3184aa718c..2d0444cd73 100644 --- a/3rdparty/zlib-ng/deflate_fast.c +++ b/3rdparty/zlib-ng/deflate_fast.c @@ -1,6 +1,6 @@ /* deflate_fast.c -- compress data using the fast strategy of deflation algorithm * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -41,7 +41,7 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { * dictionary, and set hash_head to the head of the hash chain: */ if (s->lookahead >= WANT_MIN_MATCH) { - hash_head = functable.quick_insert_string(s, s->strstart); + hash_head = quick_insert_string(s, s->strstart); dist = (int64_t)s->strstart - hash_head; /* Find the longest match, discarding those <= prev_length. @@ -52,7 +52,7 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - match_len = functable.longest_match(s, hash_head); + match_len = FUNCTABLE_CALL(longest_match)(s, hash_head); /* longest_match() sets match_start */ } } @@ -71,11 +71,11 @@ Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) { match_len--; /* string at strstart already in table */ s->strstart++; - functable.insert_string(s, s->strstart, match_len); + insert_string(s, s->strstart, match_len); s->strstart += match_len; } else { s->strstart += match_len; - functable.quick_insert_string(s, s->strstart + 2 - STD_MIN_MATCH); + quick_insert_string(s, s->strstart + 2 - STD_MIN_MATCH); /* If lookahead < STD_MIN_MATCH, ins_h is garbage, but it does not * matter since it will be recomputed at next deflate call. diff --git a/3rdparty/zlib-ng/deflate_huff.c b/3rdparty/zlib-ng/deflate_huff.c index b197e24d7c..d5a234b114 100644 --- a/3rdparty/zlib-ng/deflate_huff.c +++ b/3rdparty/zlib-ng/deflate_huff.c @@ -1,6 +1,6 @@ /* deflate_huff.c -- compress data using huffman encoding only strategy * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ diff --git a/3rdparty/zlib-ng/deflate_medium.c b/3rdparty/zlib-ng/deflate_medium.c index 47796e3221..2aeebe2026 100644 --- a/3rdparty/zlib-ng/deflate_medium.c +++ b/3rdparty/zlib-ng/deflate_medium.c @@ -45,16 +45,18 @@ static void insert_match(deflate_state *s, struct match match) { if (UNLIKELY(s->lookahead <= (unsigned int)(match.match_length + WANT_MIN_MATCH))) return; + /* string at strstart already in table */ + match.strstart++; + match.match_length--; + /* matches that are not long enough we need to emit as literals */ - if (LIKELY(match.match_length < WANT_MIN_MATCH)) { - match.strstart++; - match.match_length--; + if (LIKELY(match.match_length < WANT_MIN_MATCH - 1)) { if (UNLIKELY(match.match_length > 0)) { if (match.strstart >= match.orgstart) { if (match.strstart + match.match_length - 1 >= match.orgstart) { - functable.insert_string(s, match.strstart, match.match_length); + insert_string(s, match.strstart, match.match_length); } else { - functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1); + insert_string(s, match.strstart, match.orgstart - match.strstart + 1); } match.strstart += match.match_length; match.match_length = 0; @@ -63,35 +65,18 @@ static void insert_match(deflate_state *s, struct match match) { return; } - /* Insert new strings in the hash table only if the match length - * is not too large. This saves time but degrades compression. - */ - if (match.match_length <= 16 * s->max_insert_length && s->lookahead >= WANT_MIN_MATCH) { - match.match_length--; /* string at strstart already in table */ - match.strstart++; - - if (LIKELY(match.strstart >= match.orgstart)) { - if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) { - functable.insert_string(s, match.strstart, match.match_length); - } else { - functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1); - } - } else if (match.orgstart < match.strstart + match.match_length) { - functable.insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart); + /* Insert into hash table. */ + if (LIKELY(match.strstart >= match.orgstart)) { + if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) { + insert_string(s, match.strstart, match.match_length); + } else { + insert_string(s, match.strstart, match.orgstart - match.strstart + 1); } - match.strstart += match.match_length; - match.match_length = 0; - } else { - match.strstart += match.match_length; - match.match_length = 0; - - if (match.strstart >= (STD_MIN_MATCH - 2)) - functable.quick_insert_string(s, match.strstart + 2 - STD_MIN_MATCH); - - /* If lookahead < WANT_MIN_MATCH, ins_h is garbage, but it does not - * matter since it will be recomputed at next deflate call. - */ + } else if (match.orgstart < match.strstart + match.match_length) { + insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart); } + match.strstart += match.match_length; + match.match_length = 0; } static void fizzle_matches(deflate_state *s, struct match *current, struct match *next) { @@ -199,7 +184,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { } else { hash_head = 0; if (s->lookahead >= WANT_MIN_MATCH) { - hash_head = functable.quick_insert_string(s, s->strstart); + hash_head = quick_insert_string(s, s->strstart); } current_match.strstart = (uint16_t)s->strstart; @@ -215,7 +200,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - current_match.match_length = (uint16_t)functable.longest_match(s, hash_head); + current_match.match_length = (uint16_t)FUNCTABLE_CALL(longest_match)(s, hash_head); current_match.match_start = (uint16_t)s->match_start; if (UNLIKELY(current_match.match_length < WANT_MIN_MATCH)) current_match.match_length = 1; @@ -235,7 +220,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { /* now, look ahead one */ if (LIKELY(!early_exit && s->lookahead > MIN_LOOKAHEAD && (uint32_t)(current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD))) { s->strstart = current_match.strstart + current_match.match_length; - hash_head = functable.quick_insert_string(s, s->strstart); + hash_head = quick_insert_string(s, s->strstart); next_match.strstart = (uint16_t)s->strstart; next_match.orgstart = next_match.strstart; @@ -250,7 +235,7 @@ Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - next_match.match_length = (uint16_t)functable.longest_match(s, hash_head); + next_match.match_length = (uint16_t)FUNCTABLE_CALL(longest_match)(s, hash_head); next_match.match_start = (uint16_t)s->match_start; if (UNLIKELY(next_match.match_start >= next_match.strstart)) { /* this can happen due to some restarts */ diff --git a/3rdparty/zlib-ng/deflate_p.h b/3rdparty/zlib-ng/deflate_p.h index dd2021a0f5..7c74ebf5ad 100644 --- a/3rdparty/zlib-ng/deflate_p.h +++ b/3rdparty/zlib-ng/deflate_p.h @@ -1,7 +1,7 @@ /* deflate_p.h -- Private inline functions and macros shared with more than * one deflate method * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h * */ @@ -60,27 +60,37 @@ extern const unsigned char Z_INTERNAL zng_dist_code[]; static inline int zng_tr_tally_lit(deflate_state *s, unsigned char c) { /* c is the unmatched char */ +#ifdef LIT_MEM + s->d_buf[s->sym_next] = 0; + s->l_buf[s->sym_next++] = c; +#else s->sym_buf[s->sym_next++] = 0; s->sym_buf[s->sym_next++] = 0; s->sym_buf[s->sym_next++] = c; +#endif s->dyn_ltree[c].Freq++; Tracevv((stderr, "%c", c)); Assert(c <= (STD_MAX_MATCH-STD_MIN_MATCH), "zng_tr_tally: bad literal"); return (s->sym_next == s->sym_end); } -static inline int zng_tr_tally_dist(deflate_state *s, uint32_t dist, uint32_t len) { +static inline int zng_tr_tally_dist(deflate_state* s, uint32_t dist, uint32_t len) { /* dist: distance of matched string */ /* len: match length-STD_MIN_MATCH */ +#ifdef LIT_MEM + s->d_buf[s->sym_next] = dist; + s->l_buf[s->sym_next++] = len; +#else s->sym_buf[s->sym_next++] = (uint8_t)(dist); s->sym_buf[s->sym_next++] = (uint8_t)(dist >> 8); s->sym_buf[s->sym_next++] = (uint8_t)len; +#endif s->matches++; dist--; Assert(dist < MAX_DIST(s) && (uint16_t)d_code(dist) < (uint16_t)D_CODES, "zng_tr_tally: bad match"); - s->dyn_ltree[zng_length_code[len]+LITERALS+1].Freq++; + s->dyn_ltree[zng_length_code[len] + LITERALS + 1].Freq++; s->dyn_dtree[d_code(dist)].Freq++; return (s->sym_next == s->sym_end); } diff --git a/3rdparty/zlib-ng/deflate_quick.c b/3rdparty/zlib-ng/deflate_quick.c index df5a17b9e6..5a1937b679 100644 --- a/3rdparty/zlib-ng/deflate_quick.c +++ b/3rdparty/zlib-ng/deflate_quick.c @@ -86,7 +86,7 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) { } if (LIKELY(s->lookahead >= WANT_MIN_MATCH)) { - hash_head = functable.quick_insert_string(s, s->strstart); + hash_head = quick_insert_string(s, s->strstart); dist = (int64_t)s->strstart - hash_head; if (dist <= MAX_DIST(s) && dist > 0) { @@ -94,7 +94,7 @@ Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) { const uint8_t *match_start = s->window + hash_head; if (zng_memcmp_2(str_start, match_start) == 0) { - match_len = functable.compare256(str_start+2, match_start+2) + 2; + match_len = FUNCTABLE_CALL(compare256)(str_start+2, match_start+2) + 2; if (match_len >= WANT_MIN_MATCH) { if (UNLIKELY(match_len > s->lookahead)) diff --git a/3rdparty/zlib-ng/deflate_rle.c b/3rdparty/zlib-ng/deflate_rle.c index cd08509946..ee442141be 100644 --- a/3rdparty/zlib-ng/deflate_rle.c +++ b/3rdparty/zlib-ng/deflate_rle.c @@ -1,6 +1,6 @@ /* deflate_rle.c -- compress data using RLE strategy of deflation algorithm * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ diff --git a/3rdparty/zlib-ng/deflate_slow.c b/3rdparty/zlib-ng/deflate_slow.c index 9f1c913467..de70cc1bba 100644 --- a/3rdparty/zlib-ng/deflate_slow.c +++ b/3rdparty/zlib-ng/deflate_slow.c @@ -1,6 +1,6 @@ /* deflate_slow.c -- compress data using the slow strategy of deflation algorithm * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -19,12 +19,12 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { int bflush; /* set if current block must be flushed */ int64_t dist; uint32_t match_len; - match_func *longest_match; + match_func longest_match; if (s->max_chain_length <= 1024) - longest_match = &functable.longest_match; + longest_match = FUNCTABLE_FPTR(longest_match); else - longest_match = &functable.longest_match_slow; + longest_match = FUNCTABLE_FPTR(longest_match_slow); /* Process the input block. */ for (;;) { @@ -61,7 +61,7 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - match_len = (*longest_match)(s, hash_head); + match_len = longest_match(s, hash_head); /* longest_match() sets match_start */ if (match_len <= 5 && (s->strategy == Z_FILTERED)) { @@ -129,7 +129,7 @@ Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) { } Assert(flush != Z_NO_FLUSH, "no flush?"); if (UNLIKELY(s->match_available)) { - (void) zng_tr_tally_lit(s, s->window[s->strstart-1]); + Z_UNUSED(zng_tr_tally_lit(s, s->window[s->strstart-1])); s->match_available = 0; } s->insert = s->strstart < (STD_MIN_MATCH - 1) ? s->strstart : (STD_MIN_MATCH - 1); diff --git a/3rdparty/zlib-ng/deflate_stored.c b/3rdparty/zlib-ng/deflate_stored.c index 6160896b3f..9e5acfbf96 100644 --- a/3rdparty/zlib-ng/deflate_stored.c +++ b/3rdparty/zlib-ng/deflate_stored.c @@ -1,6 +1,6 @@ /* deflate_stored.c -- store data without compression using deflation algorithm * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -22,7 +22,7 @@ * * deflate_stored() is written to minimize the number of times an input byte is * copied. It is most efficient with large input and output buffers, which - * maximizes the opportunites to have a single copy from next_in to next_out. + * maximizes the opportunities to have a single copy from next_in to next_out. */ Z_INTERNAL block_state deflate_stored(deflate_state *s, int flush) { /* Smallest worthy block size when not flushing or finishing. By default diff --git a/3rdparty/zlib-ng/fallback_builtins.h b/3rdparty/zlib-ng/fallback_builtins.h index 79072a1028..8303508fa1 100644 --- a/3rdparty/zlib-ng/fallback_builtins.h +++ b/3rdparty/zlib-ng/fallback_builtins.h @@ -5,9 +5,6 @@ #if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) #include -#ifdef X86_FEATURES -# include "arch/x86/x86_features.h" -#endif /* This is not a general purpose replacement for __builtin_ctz. The function expects that value is != 0. * Because of that assumption trailing_zero is not initialized and the return value is not checked. diff --git a/3rdparty/zlib-ng/functable.c b/3rdparty/zlib-ng/functable.c index 37c4aeef7d..495d11edd2 100644 --- a/3rdparty/zlib-ng/functable.c +++ b/3rdparty/zlib-ng/functable.c @@ -2,14 +2,12 @@ * Copyright (C) 2017 Hans Kristian Rosbach * For conditions of distribution and use, see copyright notice in zlib.h */ +#ifndef DISABLE_RUNTIME_CPU_DETECTION #include "zbuild.h" -#include "zendian.h" -#include "crc32_braid_p.h" -#include "deflate.h" -#include "deflate_p.h" #include "functable.h" #include "cpu_features.h" +#include "arch_functions.h" #if defined(_MSC_VER) # include @@ -61,31 +59,10 @@ static void init_functable(void) { ft.crc32_fold_final = &crc32_fold_final_c; ft.crc32_fold_reset = &crc32_fold_reset_c; ft.inflate_fast = &inflate_fast_c; - ft.insert_string = &insert_string_c; - ft.quick_insert_string = &quick_insert_string_c; ft.slide_hash = &slide_hash_c; - ft.update_hash = &update_hash_c; - -#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN -# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL) - ft.longest_match = &longest_match_unaligned_64; - ft.longest_match_slow = &longest_match_slow_unaligned_64; - ft.compare256 = &compare256_unaligned_64; -# elif defined(HAVE_BUILTIN_CTZ) - ft.longest_match = &longest_match_unaligned_32; - ft.longest_match_slow = &longest_match_slow_unaligned_32; - ft.compare256 = &compare256_unaligned_32; -# else - ft.longest_match = &longest_match_unaligned_16; - ft.longest_match_slow = &longest_match_slow_unaligned_16; - ft.compare256 = &compare256_unaligned_16; -# endif -#else - ft.longest_match = &longest_match_c; - ft.longest_match_slow = &longest_match_slow_c; - ft.compare256 = &compare256_c; -#endif - + ft.longest_match = &longest_match_generic; + ft.longest_match_slow = &longest_match_slow_generic; + ft.compare256 = &compare256_generic; // Select arch-optimized functions @@ -110,19 +87,14 @@ static void init_functable(void) { #ifdef X86_SSSE3 if (cf.x86.has_ssse3) { ft.adler32 = &adler32_ssse3; -# ifdef X86_SSE2 ft.chunkmemset_safe = &chunkmemset_safe_ssse3; ft.inflate_fast = &inflate_fast_ssse3; -# endif } #endif // X86 - SSE4.2 #ifdef X86_SSE42 if (cf.x86.has_sse42) { ft.adler32_fold_copy = &adler32_fold_copy_sse42; - ft.insert_string = &insert_string_sse42; - ft.quick_insert_string = &quick_insert_string_sse42; - ft.update_hash = &update_hash_sse42; } #endif // X86 - PCLMUL @@ -151,8 +123,9 @@ static void init_functable(void) { # endif } #endif + // X86 - AVX512 (F,DQ,BW,Vl) #ifdef X86_AVX512 - if (cf.x86.has_avx512) { + if (cf.x86.has_avx512_common) { ft.adler32 = &adler32_avx512; ft.adler32_fold_copy = &adler32_fold_copy_avx512; } @@ -164,8 +137,8 @@ static void init_functable(void) { } #endif // X86 - VPCLMULQDQ -#if defined(X86_PCLMULQDQ_CRC) && defined(X86_VPCLMULQDQ_CRC) - if (cf.x86.has_pclmulqdq && cf.x86.has_avx512 && cf.x86.has_vpclmulqdq) { +#ifdef X86_VPCLMULQDQ_CRC + if (cf.x86.has_pclmulqdq && cf.x86.has_avx512_common && cf.x86.has_vpclmulqdq) { ft.crc32 = &crc32_vpclmulqdq; ft.crc32_fold = &crc32_fold_vpclmulqdq; ft.crc32_fold_copy = &crc32_fold_vpclmulqdq_copy; @@ -206,9 +179,6 @@ static void init_functable(void) { #ifdef ARM_ACLE if (cf.arm.has_crc32) { ft.crc32 = &crc32_acle; - ft.insert_string = &insert_string_acle; - ft.quick_insert_string = &quick_insert_string_acle; - ft.update_hash = &update_hash_acle; } #endif @@ -279,12 +249,9 @@ static void init_functable(void) { FUNCTABLE_ASSIGN(ft, crc32_fold_final); FUNCTABLE_ASSIGN(ft, crc32_fold_reset); FUNCTABLE_ASSIGN(ft, inflate_fast); - FUNCTABLE_ASSIGN(ft, insert_string); FUNCTABLE_ASSIGN(ft, longest_match); FUNCTABLE_ASSIGN(ft, longest_match_slow); - FUNCTABLE_ASSIGN(ft, quick_insert_string); FUNCTABLE_ASSIGN(ft, slide_hash); - FUNCTABLE_ASSIGN(ft, update_hash); // Memory barrier for weak memory order CPUs FUNCTABLE_BARRIER(); @@ -350,11 +317,6 @@ static void inflate_fast_stub(PREFIX3(stream) *strm, uint32_t start) { functable.inflate_fast(strm, start); } -static void insert_string_stub(deflate_state* const s, uint32_t str, uint32_t count) { - init_functable(); - functable.insert_string(s, str, count); -} - static uint32_t longest_match_stub(deflate_state* const s, Pos cur_match) { init_functable(); return functable.longest_match(s, cur_match); @@ -365,21 +327,11 @@ static uint32_t longest_match_slow_stub(deflate_state* const s, Pos cur_match) { return functable.longest_match_slow(s, cur_match); } -static Pos quick_insert_string_stub(deflate_state* const s, const uint32_t str) { - init_functable(); - return functable.quick_insert_string(s, str); -} - static void slide_hash_stub(deflate_state* s) { init_functable(); functable.slide_hash(s); } -static uint32_t update_hash_stub(deflate_state* const s, uint32_t h, uint32_t val) { - init_functable(); - return functable.update_hash(s, h, val); -} - /* functable init */ Z_INTERNAL struct functable_s functable = { force_init_stub, @@ -394,10 +346,9 @@ Z_INTERNAL struct functable_s functable = { crc32_fold_final_stub, crc32_fold_reset_stub, inflate_fast_stub, - insert_string_stub, longest_match_stub, longest_match_slow_stub, - quick_insert_string_stub, slide_hash_stub, - update_hash_stub }; + +#endif diff --git a/3rdparty/zlib-ng/functable.h b/3rdparty/zlib-ng/functable.h index 9f78188e10..173a030c66 100644 --- a/3rdparty/zlib-ng/functable.h +++ b/3rdparty/zlib-ng/functable.h @@ -7,14 +7,21 @@ #define FUNCTABLE_H_ #include "deflate.h" -#include "crc32_fold.h" -#include "adler32_fold.h" +#include "crc32.h" + +#ifdef DISABLE_RUNTIME_CPU_DETECTION + +# include "arch_functions.h" + +/* When compiling with native instructions it is not necessary to use functable. + * Instead we use native_ macro indicating the best available variant of arch-specific + * functions for the current platform. + */ +# define FUNCTABLE_INIT ((void)0) +# define FUNCTABLE_CALL(name) native_ ## name +# define FUNCTABLE_FPTR(name) &native_ ## name -#ifdef ZLIB_COMPAT -typedef struct z_stream_s z_stream; #else -typedef struct zng_stream_s zng_stream; -#endif struct functable_s { void (* force_init) (void); @@ -29,14 +36,20 @@ struct functable_s { uint32_t (* crc32_fold_final) (struct crc32_fold_s *crc); uint32_t (* crc32_fold_reset) (struct crc32_fold_s *crc); void (* inflate_fast) (PREFIX3(stream) *strm, uint32_t start); - void (* insert_string) (deflate_state *const s, uint32_t str, uint32_t count); uint32_t (* longest_match) (deflate_state *const s, Pos cur_match); uint32_t (* longest_match_slow) (deflate_state *const s, Pos cur_match); - Pos (* quick_insert_string)(deflate_state *const s, uint32_t str); void (* slide_hash) (deflate_state *s); - uint32_t (* update_hash) (deflate_state *const s, uint32_t h, uint32_t val); }; Z_INTERNAL extern struct functable_s functable; + +/* Explicitly indicate functions are conditionally dispatched. + */ +# define FUNCTABLE_INIT functable.force_init() +# define FUNCTABLE_CALL(name) functable.name +# define FUNCTABLE_FPTR(name) functable.name + +#endif + #endif diff --git a/3rdparty/zlib-ng/gzguts.h b/3rdparty/zlib-ng/gzguts.h index a663844b69..14f2391152 100644 --- a/3rdparty/zlib-ng/gzguts.h +++ b/3rdparty/zlib-ng/gzguts.h @@ -1,7 +1,7 @@ #ifndef GZGUTS_H_ #define GZGUTS_H_ /* gzguts.h -- zlib internal header definitions for gz* operations - * Copyright (C) 2004-2019 Mark Adler + * Copyright (C) 2004-2024 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -135,7 +135,9 @@ typedef gz_state *gz_statep; /* shared functions */ void Z_INTERNAL gz_error(gz_state *, int, const char *); - +#ifdef ZLIB_COMPAT +unsigned Z_INTERNAL gz_intmax(void); +#endif /* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t value -- needed when comparing unsigned to z_off64_t, which is signed (possible z_off64_t types off_t, off64_t, and long are all signed) */ diff --git a/3rdparty/zlib-ng/gzlib.c b/3rdparty/zlib-ng/gzlib.c index e613837efb..b8a506b6a5 100644 --- a/3rdparty/zlib-ng/gzlib.c +++ b/3rdparty/zlib-ng/gzlib.c @@ -1,5 +1,5 @@ /* gzlib.c -- zlib functions common to reading and writing gzip files - * Copyright (C) 2004-2019 Mark Adler + * Copyright (C) 2004-2024 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -523,3 +523,9 @@ void Z_INTERNAL gz_error(gz_state *state, int err, const char *msg) { } (void)snprintf(state->msg, strlen(state->path) + strlen(msg) + 3, "%s%s%s", state->path, ": ", msg); } + +#ifdef ZLIB_COMPAT +unsigned Z_INTERNAL gz_intmax(void) { + return INT_MAX; +} +#endif diff --git a/3rdparty/zlib-ng/infback.c b/3rdparty/zlib-ng/infback.c index 9f5042b4d3..307d05ca3c 100644 --- a/3rdparty/zlib-ng/infback.c +++ b/3rdparty/zlib-ng/infback.c @@ -43,10 +43,15 @@ int32_t ZNG_CONDEXPORT PREFIX(inflateBackInit)(PREFIX3(stream) *strm, int32_t wi } if (strm->zfree == NULL) strm->zfree = PREFIX(zcfree); - state = ZALLOC_INFLATE_STATE(strm); - if (state == NULL) + + inflate_allocs *alloc_bufs = alloc_inflate(strm); + if (alloc_bufs == NULL) return Z_MEM_ERROR; + + state = alloc_bufs->state; + state->alloc_bufs = alloc_bufs; Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state *)state; state->dmax = 32768U; state->wbits = (unsigned int)windowBits; @@ -55,7 +60,7 @@ int32_t ZNG_CONDEXPORT PREFIX(inflateBackInit)(PREFIX3(stream) *strm, int32_t wi state->wnext = 0; state->whave = 0; state->sane = 1; - state->chunksize = functable.chunksize(); + state->chunksize = FUNCTABLE_CALL(chunksize)(); return Z_OK; } @@ -357,7 +362,7 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in RESTORE(); if (state->whave < state->wsize) state->whave = state->wsize - left; - functable.inflate_fast(strm, state->wsize); + FUNCTABLE_CALL(inflate_fast)(strm, state->wsize); LOAD(); break; } @@ -504,8 +509,10 @@ int32_t Z_EXPORT PREFIX(inflateBack)(PREFIX3(stream) *strm, in_func in, void *in int32_t Z_EXPORT PREFIX(inflateBackEnd)(PREFIX3(stream) *strm) { if (strm == NULL || strm->state == NULL || strm->zfree == NULL) return Z_STREAM_ERROR; - ZFREE_STATE(strm, strm->state); - strm->state = NULL; + + /* Free allocated buffers */ + free_inflate(strm); + Tracev((stderr, "inflate: end\n")); return Z_OK; } diff --git a/3rdparty/zlib-ng/inflate.c b/3rdparty/zlib-ng/inflate.c index fe55c498e3..956f37db7d 100644 --- a/3rdparty/zlib-ng/inflate.c +++ b/3rdparty/zlib-ng/inflate.c @@ -19,7 +19,7 @@ /* function prototypes */ static int inflateStateCheck(PREFIX3(stream) *strm); -static int updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t len, int32_t cksum); +static void updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t len, int32_t cksum); static uint32_t syncsearch(uint32_t *have, const unsigned char *buf, uint32_t len); static inline void inf_chksum_cpy(PREFIX3(stream) *strm, uint8_t *dst, @@ -28,11 +28,11 @@ static inline void inf_chksum_cpy(PREFIX3(stream) *strm, uint8_t *dst, struct inflate_state *state = (struct inflate_state*)strm->state; #ifdef GUNZIP if (state->flags) { - functable.crc32_fold_copy(&state->crc_fold, dst, src, copy); + FUNCTABLE_CALL(crc32_fold_copy)(&state->crc_fold, dst, src, copy); } else #endif { - strm->adler = state->check = functable.adler32_fold_copy(state->check, dst, src, copy); + strm->adler = state->check = FUNCTABLE_CALL(adler32_fold_copy)(state->check, dst, src, copy); } } @@ -40,11 +40,11 @@ static inline void inf_chksum(PREFIX3(stream) *strm, const uint8_t *src, uint32_ struct inflate_state *state = (struct inflate_state*)strm->state; #ifdef GUNZIP if (state->flags) { - functable.crc32_fold(&state->crc_fold, src, len, 0); + FUNCTABLE_CALL(crc32_fold)(&state->crc_fold, src, len, 0); } else #endif { - strm->adler = state->check = functable.adler32(state->check, src, len); + strm->adler = state->check = FUNCTABLE_CALL(adler32)(state->check, src, len); } } @@ -53,7 +53,7 @@ static int inflateStateCheck(PREFIX3(stream) *strm) { if (strm == NULL || strm->zalloc == NULL || strm->zfree == NULL) return 1; state = (struct inflate_state *)strm->state; - if (state == NULL || state->strm != strm || state->mode < HEAD || state->mode > SYNC) + if (state == NULL || state->alloc_bufs == NULL || state->strm != strm || state->mode < HEAD || state->mode > SYNC) return 1; return 0; } @@ -120,13 +120,9 @@ int32_t Z_EXPORT PREFIX(inflateReset2)(PREFIX3(stream) *strm, int32_t windowBits #endif } - /* set number of window bits, free window if different */ + /* set number of window bits */ if (windowBits && (windowBits < MIN_WBITS || windowBits > MAX_WBITS)) return Z_STREAM_ERROR; - if (state->window != NULL && state->wbits != (unsigned)windowBits) { - ZFREE_WINDOW(strm, state->window); - state->window = NULL; - } /* update state and reset the rest of it */ state->wrap = wrap; @@ -134,13 +130,94 @@ int32_t Z_EXPORT PREFIX(inflateReset2)(PREFIX3(stream) *strm, int32_t windowBits return PREFIX(inflateReset)(strm); } -/* This function is hidden in ZLIB_COMPAT builds. */ +#ifdef INF_ALLOC_DEBUG +# include +# define LOGSZ(name,size) fprintf(stderr, "%s is %d bytes\n", name, size) +# define LOGSZP(name,size,loc,pad) fprintf(stderr, "%s is %d bytes, offset %d, padded %d\n", name, size, loc, pad) +# define LOGSZPL(name,size,loc,pad) fprintf(stderr, "%s is %d bytes, offset %ld, padded %d\n", name, size, loc, pad) +#else +# define LOGSZ(name,size) +# define LOGSZP(name,size,loc,pad) +# define LOGSZPL(name,size,loc,pad) +#endif + +/* =========================================================================== + * Allocate a big buffer and divide it up into the various buffers inflate needs. + * Handles alignment of allocated buffer and alignment of individual buffers. + */ +Z_INTERNAL inflate_allocs* alloc_inflate(PREFIX3(stream) *strm) { + int curr_size = 0; + + /* Define sizes */ + int window_size = INFLATE_ADJUST_WINDOW_SIZE((1 << MAX_WBITS) + 64); /* 64B padding for chunksize */ + int state_size = sizeof(inflate_state); + int alloc_size = sizeof(inflate_allocs); + + /* Calculate relative buffer positions and paddings */ + LOGSZP("window", window_size, PAD_WINDOW(curr_size), PADSZ(curr_size,WINDOW_PAD_SIZE)); + int window_pos = PAD_WINDOW(curr_size); + curr_size = window_pos + window_size; + + LOGSZP("state", state_size, PAD_64(curr_size), PADSZ(curr_size,64)); + int state_pos = PAD_64(curr_size); + curr_size = state_pos + state_size; + + LOGSZP("alloc", alloc_size, PAD_16(curr_size), PADSZ(curr_size,16)); + int alloc_pos = PAD_16(curr_size); + curr_size = alloc_pos + alloc_size; + + /* Add 64-1 or 4096-1 to allow window alignment, and round size of buffer up to multiple of 64 */ + int total_size = PAD_64(curr_size + (WINDOW_PAD_SIZE - 1)); + + /* Allocate buffer, align to 64-byte cacheline, and zerofill the resulting buffer */ + char *original_buf = strm->zalloc(strm->opaque, 1, total_size); + if (original_buf == NULL) + return NULL; + + char *buff = (char *)HINT_ALIGNED_WINDOW((char *)PAD_WINDOW(original_buf)); + LOGSZPL("Buffer alloc", total_size, PADSZ((uintptr_t)original_buf,WINDOW_PAD_SIZE), PADSZ(curr_size,WINDOW_PAD_SIZE)); + + /* Initialize alloc_bufs */ + inflate_allocs *alloc_bufs = (struct inflate_allocs_s *)(buff + alloc_pos); + alloc_bufs->buf_start = (char *)original_buf; + alloc_bufs->zfree = strm->zfree; + + alloc_bufs->window = (unsigned char *)HINT_ALIGNED_WINDOW((buff + window_pos)); + alloc_bufs->state = (inflate_state *)HINT_ALIGNED_64((buff + state_pos)); + +#ifdef Z_MEMORY_SANITIZER + /* This is _not_ to subvert the memory sanitizer but to instead unposion some + data we willingly and purposefully load uninitialized into vector registers + in order to safely read the last < chunksize bytes of the window. */ + __msan_unpoison(alloc_bufs->window + window_size, 64); +#endif + + return alloc_bufs; +} + +/* =========================================================================== + * Free all allocated inflate buffers + */ +Z_INTERNAL void free_inflate(PREFIX3(stream) *strm) { + struct inflate_state *state = (struct inflate_state *)strm->state; + + if (state->alloc_bufs != NULL) { + inflate_allocs *alloc_bufs = state->alloc_bufs; + alloc_bufs->zfree(strm->opaque, alloc_bufs->buf_start); + strm->state = NULL; + } +} + +/* =========================================================================== + * Initialize inflate state and buffers. + * This function is hidden in ZLIB_COMPAT builds. + */ int32_t ZNG_CONDEXPORT PREFIX(inflateInit2)(PREFIX3(stream) *strm, int32_t windowBits) { int32_t ret; struct inflate_state *state; - /* Initialize functable earlier. */ - functable.force_init(); + /* Initialize functable */ + FUNCTABLE_INIT; if (strm == NULL) return Z_STREAM_ERROR; @@ -151,19 +228,23 @@ int32_t ZNG_CONDEXPORT PREFIX(inflateInit2)(PREFIX3(stream) *strm, int32_t windo } if (strm->zfree == NULL) strm->zfree = PREFIX(zcfree); - state = ZALLOC_INFLATE_STATE(strm); - if (state == NULL) + + inflate_allocs *alloc_bufs = alloc_inflate(strm); + if (alloc_bufs == NULL) return Z_MEM_ERROR; + + state = alloc_bufs->state; + state->window = alloc_bufs->window; + state->alloc_bufs = alloc_bufs; Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state *)state; state->strm = strm; - state->window = NULL; state->mode = HEAD; /* to pass state test in inflateReset2() */ - state->chunksize = functable.chunksize(); + state->chunksize = FUNCTABLE_CALL(chunksize)(); ret = PREFIX(inflateReset2)(strm, windowBits); if (ret != Z_OK) { - ZFREE_STATE(strm, state); - strm->state = NULL; + free_inflate(strm); } return ret; } @@ -222,31 +303,6 @@ void Z_INTERNAL PREFIX(fixedtables)(struct inflate_state *state) { state->distbits = 5; } -int Z_INTERNAL PREFIX(inflate_ensure_window)(struct inflate_state *state) { - /* if it hasn't been done already, allocate space for the window */ - if (state->window == NULL) { - unsigned wsize = 1U << state->wbits; - state->window = (unsigned char *)ZALLOC_WINDOW(state->strm, wsize + state->chunksize, sizeof(unsigned char)); - if (state->window == NULL) - return Z_MEM_ERROR; -#ifdef Z_MEMORY_SANITIZER - /* This is _not_ to subvert the memory sanitizer but to instead unposion some - data we willingly and purposefully load uninitialized into vector registers - in order to safely read the last < chunksize bytes of the window. */ - __msan_unpoison(state->window + wsize, state->chunksize); -#endif - } - - /* if window not in use yet, initialize */ - if (state->wsize == 0) { - state->wsize = 1U << state->wbits; - state->wnext = 0; - state->whave = 0; - } - - return Z_OK; -} - /* Update the window with the last wsize (normally 32K) bytes written before returning. If window does not exist yet, create it. This is only called @@ -261,20 +317,20 @@ int Z_INTERNAL PREFIX(inflate_ensure_window)(struct inflate_state *state) { output will fall in the output data, making match copies simpler and faster. The advantage may be dependent on the size of the processor's data caches. */ -static int32_t updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t len, int32_t cksum) { +static void updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t len, int32_t cksum) { struct inflate_state *state; uint32_t dist; state = (struct inflate_state *)strm->state; - if (PREFIX(inflate_ensure_window)(state)) return 1; + /* if window not in use yet, initialize */ + if (state->wsize == 0) + state->wsize = 1U << state->wbits; /* len state->wsize or less output bytes into the circular window */ if (len >= state->wsize) { /* Only do this if the caller specifies to checksum bytes AND the platform requires - * it (s/390 being the primary exception to this. Also, for now, do the adler checksums - * if not a gzip based header. The inline adler checksums will come in the near future, - * possibly the next commit */ + * it (s/390 being the primary exception to this) */ if (INFLATE_NEED_CHECKSUM(strm) && cksum) { /* We have to split the checksum over non-copied and copied bytes */ if (len > state->wsize) @@ -314,7 +370,6 @@ static int32_t updatewindow(PREFIX3(stream) *strm, const uint8_t *end, uint32_t state->whave += dist; } } - return 0; } /* @@ -636,7 +691,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { } /* compute crc32 checksum if not in raw mode */ if ((state->wrap & 4) && state->flags) - strm->adler = state->check = functable.crc32_fold_reset(&state->crc_fold); + strm->adler = state->check = FUNCTABLE_CALL(crc32_fold_reset)(&state->crc_fold); state->mode = TYPE; break; #endif @@ -867,7 +922,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { /* use inflate_fast() if we have enough input and output */ if (have >= INFLATE_FAST_MIN_HAVE && left >= INFLATE_FAST_MIN_LEFT) { RESTORE(); - functable.inflate_fast(strm, out); + FUNCTABLE_CALL(inflate_fast)(strm, out); LOAD(); if (state->mode == TYPE) state->back = -1; @@ -1026,7 +1081,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { } else { copy = MIN(state->length, left); - put = functable.chunkmemset_safe(put, state->offset, copy, left); + put = FUNCTABLE_CALL(chunkmemset_safe)(put, state->offset, copy, left); } left -= copy; state->length -= copy; @@ -1056,7 +1111,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { } #ifdef GUNZIP if (state->flags) - strm->adler = state->check = functable.crc32_fold_final(&state->crc_fold); + strm->adler = state->check = FUNCTABLE_CALL(crc32_fold_final)(&state->crc_fold); #endif } out = left; @@ -1098,9 +1153,6 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { ret = Z_DATA_ERROR; goto inf_leave; - case MEM: - return Z_MEM_ERROR; - case SYNC: default: /* can't happen, but makes compilers happy */ @@ -1111,7 +1163,6 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { Return from inflate(), updating the total counts and the check value. If there was no progress during the inflate() call, return a buffer error. Call updatewindow() to create and/or update the window state. - Note: a memory error from inflate() is non-recoverable. */ inf_leave: RESTORE(); @@ -1120,10 +1171,7 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { (state->wsize || (out != strm->avail_out && state->mode < BAD && (state->mode < CHECK || flush != Z_FINISH)))) { /* update sliding window with respective checksum if not in "raw" mode */ - if (updatewindow(strm, strm->next_out, check_bytes, state->wrap & 4)) { - state->mode = MEM; - return Z_MEM_ERROR; - } + updatewindow(strm, strm->next_out, check_bytes, state->wrap & 4); } in -= strm->avail_in; out -= strm->avail_out; @@ -1144,14 +1192,12 @@ int32_t Z_EXPORT PREFIX(inflate)(PREFIX3(stream) *strm, int32_t flush) { } int32_t Z_EXPORT PREFIX(inflateEnd)(PREFIX3(stream) *strm) { - struct inflate_state *state; if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state *)strm->state; - if (state->window != NULL) - ZFREE_WINDOW(strm, state->window); - ZFREE_STATE(strm, strm->state); - strm->state = NULL; + + /* Free allocated buffers */ + free_inflate(strm); + Tracev((stderr, "inflate: end\n")); return Z_OK; } @@ -1179,7 +1225,6 @@ int32_t Z_EXPORT PREFIX(inflateGetDictionary)(PREFIX3(stream) *strm, uint8_t *di int32_t Z_EXPORT PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm, const uint8_t *dictionary, uint32_t dictLength) { struct inflate_state *state; unsigned long dictid; - int32_t ret; /* check state */ if (inflateStateCheck(strm)) @@ -1190,7 +1235,7 @@ int32_t Z_EXPORT PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm, const uint8 /* check for correct dictionary identifier */ if (state->mode == DICT) { - dictid = functable.adler32(ADLER32_INITIAL_VALUE, dictionary, dictLength); + dictid = FUNCTABLE_CALL(adler32)(ADLER32_INITIAL_VALUE, dictionary, dictLength); if (dictid != state->check) return Z_DATA_ERROR; } @@ -1199,11 +1244,8 @@ int32_t Z_EXPORT PREFIX(inflateSetDictionary)(PREFIX3(stream) *strm, const uint8 /* copy dictionary to window using updatewindow(), which will amend the existing dictionary if appropriate */ - ret = updatewindow(strm, dictionary + dictLength, dictLength, 0); - if (ret) { - state->mode = MEM; - return Z_MEM_ERROR; - } + updatewindow(strm, dictionary + dictLength, dictLength, 0); + state->havedict = 1; Tracev((stderr, "inflate: dictionary set\n")); return Z_OK; @@ -1271,7 +1313,7 @@ int32_t Z_EXPORT PREFIX(inflateSync)(PREFIX3(stream) *strm) { /* if first time, start search in bit buffer */ if (state->mode != SYNC) { state->mode = SYNC; - state->hold <<= state->bits & 7; + state->hold >>= state->bits & 7; state->bits -= state->bits & 7; len = 0; while (state->bits >= 8) { @@ -1334,30 +1376,28 @@ int32_t Z_EXPORT PREFIX(inflateCopy)(PREFIX3(stream) *dest, PREFIX3(stream) *sou return Z_STREAM_ERROR; state = (struct inflate_state *)source->state; + /* copy stream */ + memcpy((void *)dest, (void *)source, sizeof(PREFIX3(stream))); + /* allocate space */ - copy = ZALLOC_INFLATE_STATE(source); - if (copy == NULL) + inflate_allocs *alloc_bufs = alloc_inflate(dest); + if (alloc_bufs == NULL) return Z_MEM_ERROR; + copy = alloc_bufs->state; /* copy state */ - memcpy((void *)dest, (void *)source, sizeof(PREFIX3(stream))); - ZCOPY_INFLATE_STATE(copy, state); + memcpy(copy, state, sizeof(struct inflate_state)); copy->strm = dest; if (state->lencode >= state->codes && state->lencode <= state->codes + ENOUGH - 1) { copy->lencode = copy->codes + (state->lencode - state->codes); copy->distcode = copy->codes + (state->distcode - state->codes); } copy->next = copy->codes + (state->next - state->codes); + copy->window = alloc_bufs->window; + copy->alloc_bufs = alloc_bufs; /* window */ - copy->window = NULL; - if (state->window != NULL) { - if (PREFIX(inflate_ensure_window)(copy)) { - ZFREE_STATE(source, copy); - return Z_MEM_ERROR; - } - ZCOPY_WINDOW(copy->window, state->window, (size_t)state->wsize); - } + memcpy(copy->window, state->window, INFLATE_ADJUST_WINDOW_SIZE((size_t)state->wsize)); dest->state = (struct internal_state *)copy; return Z_OK; diff --git a/3rdparty/zlib-ng/inflate.h b/3rdparty/zlib-ng/inflate.h index 39cdf5d683..536da7d1f8 100644 --- a/3rdparty/zlib-ng/inflate.h +++ b/3rdparty/zlib-ng/inflate.h @@ -11,8 +11,12 @@ #ifndef INFLATE_H_ #define INFLATE_H_ -#include "adler32_fold.h" -#include "crc32_fold.h" +#include "crc32.h" + +#ifdef S390_DFLTCC_INFLATE +# include "arch/s390/dfltcc_common.h" +# define HAVE_ARCH_INFLATE_STATE +#endif /* define NO_GZIP when compiling if you want to disable gzip header and trailer decoding by inflate(). NO_GZIP would be used to avoid linking in the crc code when it is not needed. @@ -53,14 +57,13 @@ typedef enum { LENGTH, /* i: waiting for 32-bit length (gzip) */ DONE, /* finished check, done -- remain here until reset */ BAD, /* got a data error -- remain here until reset */ - MEM, /* got an inflate() memory error -- remain here until reset */ SYNC /* looking for synchronization bytes to restart inflate() */ } inflate_mode; /* State transitions between above modes - - (most modes can go to BAD or MEM on error -- not shown for clarity) + (most modes can go to BAD on error -- not shown for clarity) Process header: HEAD -> (gzip) or (zlib) or (raw) @@ -81,10 +84,19 @@ typedef enum { Process trailer: CHECK -> LENGTH -> DONE */ +typedef struct inflate_state inflate_state; + +/* Struct for memory allocation handling */ +typedef struct inflate_allocs_s { + char *buf_start; + free_func zfree; + inflate_state *state; + unsigned char *window; +} inflate_allocs; /* State maintained between inflate() calls -- approximately 7K bytes, not including the allocated sliding window, which is up to 32K bytes. */ -struct inflate_state { +struct ALIGNED_(64) inflate_state { PREFIX3(stream) *strm; /* pointer back to this zlib stream */ inflate_mode mode; /* current inflate mode */ int last; /* true if processing last block */ @@ -132,9 +144,14 @@ struct inflate_state { int back; /* bits back of last unprocessed length/lit */ unsigned was; /* initial length of match */ uint32_t chunksize; /* size of memory copying chunk */ + inflate_allocs *alloc_bufs; /* struct for handling memory allocations */ +#ifdef HAVE_ARCH_INFLATE_STATE + arch_inflate_state arch; /* architecture-specific extensions */ +#endif }; -int Z_INTERNAL PREFIX(inflate_ensure_window)(struct inflate_state *state); void Z_INTERNAL PREFIX(fixedtables)(struct inflate_state *state); +Z_INTERNAL inflate_allocs* alloc_inflate(PREFIX3(stream) *strm); +Z_INTERNAL void free_inflate(PREFIX3(stream) *strm); #endif /* INFLATE_H_ */ diff --git a/3rdparty/zlib-ng/inflate_p.h b/3rdparty/zlib-ng/inflate_p.h index eff73876da..c324b0486a 100644 --- a/3rdparty/zlib-ng/inflate_p.h +++ b/3rdparty/zlib-ng/inflate_p.h @@ -10,15 +10,16 @@ /* Architecture-specific hooks. */ #ifdef S390_DFLTCC_INFLATE # include "arch/s390/dfltcc_inflate.h" +/* DFLTCC instructions require window to be page-aligned */ +# define PAD_WINDOW PAD_4096 +# define WINDOW_PAD_SIZE 4096 +# define HINT_ALIGNED_WINDOW HINT_ALIGNED_4096 #else -/* Memory management for the inflate state. Useful for allocating arch-specific extension blocks. */ -# define ZALLOC_INFLATE_STATE(strm) ((struct inflate_state *)ZALLOC(strm, 1, sizeof(struct inflate_state))) -# define ZFREE_STATE(strm, addr) ZFREE(strm, addr) -# define ZCOPY_INFLATE_STATE(dst, src) memcpy(dst, src, sizeof(struct inflate_state)) -/* Memory management for the window. Useful for allocation the aligned window. */ -# define ZALLOC_WINDOW(strm, items, size) ZALLOC(strm, items, size) -# define ZCOPY_WINDOW(dest, src, n) memcpy(dest, src, n) -# define ZFREE_WINDOW(strm, addr) ZFREE(strm, addr) +# define PAD_WINDOW PAD_64 +# define WINDOW_PAD_SIZE 64 +# define HINT_ALIGNED_WINDOW HINT_ALIGNED_64 +/* Adjust the window size for the arch-specific inflate code. */ +# define INFLATE_ADJUST_WINDOW_SIZE(n) (n) /* Invoked at the end of inflateResetKeep(). Useful for initializing arch-specific extension blocks. */ # define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) /* Invoked at the beginning of inflatePrime(). Useful for updating arch-specific buffers. */ @@ -46,9 +47,9 @@ /* check function to use adler32() for zlib or crc32() for gzip */ #ifdef GUNZIP # define UPDATE(check, buf, len) \ - (state->flags ? PREFIX(crc32)(check, buf, len) : functable.adler32(check, buf, len)) + (state->flags ? PREFIX(crc32)(check, buf, len) : FUNCTABLE_CALL(adler32)(check, buf, len)) #else -# define UPDATE(check, buf, len) functable.adler32(check, buf, len) +# define UPDATE(check, buf, len) FUNCTABLE_CALL(adler32)(check, buf, len) #endif /* check macros for header crc */ diff --git a/3rdparty/zlib-ng/inftrees.c b/3rdparty/zlib-ng/inftrees.c index 423f7b461d..5234fe7ae0 100644 --- a/3rdparty/zlib-ng/inftrees.c +++ b/3rdparty/zlib-ng/inftrees.c @@ -1,5 +1,5 @@ /* inftrees.c -- generate Huffman trees for efficient decoding - * Copyright (C) 1995-2023 Mark Adler + * Copyright (C) 1995-2024 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -7,7 +7,7 @@ #include "zutil.h" #include "inftrees.h" -const char PREFIX(inflate_copyright)[] = " inflate 1.3.0 Copyright 1995-2023 Mark Adler "; +const char PREFIX(inflate_copyright)[] = " inflate 1.3.1 Copyright 1995-2024 Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -54,7 +54,7 @@ int Z_INTERNAL zng_inflate_table(codetype type, uint16_t *lens, unsigned codes, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; static const uint16_t lext[31] = { /* Length codes 257..285 extra */ 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, - 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 203, 77}; static const uint16_t dbase[32] = { /* Distance codes 0..29 base */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, diff --git a/3rdparty/zlib-ng/insert_string.c b/3rdparty/zlib-ng/insert_string.c index cfe39837f8..11a5b97ffe 100644 --- a/3rdparty/zlib-ng/insert_string.c +++ b/3rdparty/zlib-ng/insert_string.c @@ -1,6 +1,6 @@ /* insert_string.c -- insert_string integer hash variant * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h * */ @@ -10,12 +10,12 @@ #define HASH_SLIDE 16 -#define HASH_CALC(s, h, val) h = ((val * 2654435761U) >> HASH_SLIDE); +#define HASH_CALC(h, val) h = ((val * 2654435761U) >> HASH_SLIDE); #define HASH_CALC_VAR h #define HASH_CALC_VAR_INIT uint32_t h = 0 -#define UPDATE_HASH update_hash_c -#define INSERT_STRING insert_string_c -#define QUICK_INSERT_STRING quick_insert_string_c +#define UPDATE_HASH update_hash +#define INSERT_STRING insert_string +#define QUICK_INSERT_STRING quick_insert_string #include "insert_string_tpl.h" diff --git a/3rdparty/zlib-ng/insert_string_roll.c b/3rdparty/zlib-ng/insert_string_roll.c index dfea347bcc..8693f96f59 100644 --- a/3rdparty/zlib-ng/insert_string_roll.c +++ b/3rdparty/zlib-ng/insert_string_roll.c @@ -1,6 +1,6 @@ /* insert_string_roll.c -- insert_string rolling hash variant * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h * */ @@ -10,7 +10,7 @@ #define HASH_SLIDE 5 -#define HASH_CALC(s, h, val) h = ((h << HASH_SLIDE) ^ ((uint8_t)val)) +#define HASH_CALC(h, val) h = ((h << HASH_SLIDE) ^ ((uint8_t)val)) #define HASH_CALC_VAR s->ins_h #define HASH_CALC_VAR_INIT #define HASH_CALC_READ val = strstart[0] diff --git a/3rdparty/zlib-ng/insert_string_tpl.h b/3rdparty/zlib-ng/insert_string_tpl.h index c84617730a..281c013463 100644 --- a/3rdparty/zlib-ng/insert_string_tpl.h +++ b/3rdparty/zlib-ng/insert_string_tpl.h @@ -1,10 +1,10 @@ #ifndef INSERT_STRING_H_ #define INSERT_STRING_H_ -/* insert_string.h -- Private insert_string functions shared with more than - * one insert string implementation +/* insert_string_tpl.h -- Private insert_string functions shared with more than + * one insert string implementation * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * * Copyright (C) 2013 Intel Corporation. All rights reserved. * Authors: @@ -47,9 +47,8 @@ * input characters, so that a running hash key can be computed from the * previous key instead of complete recalculation each time. */ -Z_INTERNAL uint32_t UPDATE_HASH(deflate_state *const s, uint32_t h, uint32_t val) { - (void)s; - HASH_CALC(s, h, val); +Z_INTERNAL uint32_t UPDATE_HASH(uint32_t h, uint32_t val) { + HASH_CALC(h, val); return h & HASH_CALC_MASK; } @@ -65,7 +64,7 @@ Z_INTERNAL Pos QUICK_INSERT_STRING(deflate_state *const s, uint32_t str) { HASH_CALC_VAR_INIT; HASH_CALC_READ; - HASH_CALC(s, HASH_CALC_VAR, val); + HASH_CALC(HASH_CALC_VAR, val); HASH_CALC_VAR &= HASH_CALC_MASK; hm = HASH_CALC_VAR; @@ -94,7 +93,7 @@ Z_INTERNAL void INSERT_STRING(deflate_state *const s, uint32_t str, uint32_t cou HASH_CALC_VAR_INIT; HASH_CALC_READ; - HASH_CALC(s, HASH_CALC_VAR, val); + HASH_CALC(HASH_CALC_VAR, val); HASH_CALC_VAR &= HASH_CALC_MASK; hm = HASH_CALC_VAR; diff --git a/3rdparty/zlib-ng/match_tpl.h b/3rdparty/zlib-ng/match_tpl.h index d076798520..9c258242cd 100644 --- a/3rdparty/zlib-ng/match_tpl.h +++ b/3rdparty/zlib-ng/match_tpl.h @@ -1,6 +1,6 @@ /* match_tpl.h -- find longest match template for compare256 variants * - * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h * * Portions copyright (C) 2014-2021 Konstantin Nosov @@ -8,11 +8,6 @@ * https://github.com/gildor2/fast_zlib */ -#include "zbuild.h" -#include "zutil_p.h" -#include "deflate.h" -#include "functable.h" - #ifndef MATCH_TPL_H #define MATCH_TPL_H @@ -107,11 +102,11 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because * these strings are not yet inserted into the hash table. */ - hash = s->update_hash(s, 0, scan[1]); - hash = s->update_hash(s, hash, scan[2]); + hash = s->update_hash(0, scan[1]); + hash = s->update_hash(hash, scan[2]); for (i = 3; i <= best_len; i++) { - hash = s->update_hash(s, hash, scan[i]); + hash = s->update_hash(hash, scan[i]); /* If we're starting with best_len >= 3, we can use offset search. */ pos = s->head[hash]; @@ -241,9 +236,9 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { */ scan_endstr = scan + len - (STD_MIN_MATCH+1); - hash = s->update_hash(s, 0, scan_endstr[0]); - hash = s->update_hash(s, hash, scan_endstr[1]); - hash = s->update_hash(s, hash, scan_endstr[2]); + hash = s->update_hash(0, scan_endstr[0]); + hash = s->update_hash(hash, scan_endstr[1]); + hash = s->update_hash(hash, scan_endstr[2]); pos = s->head[hash]; if (pos < cur_match) { diff --git a/3rdparty/zlib-ng/patches/zlib-ng-2.2.1-detect-intrinsics.patch b/3rdparty/zlib-ng/patches/zlib-ng-2.2.1-detect-intrinsics.patch new file mode 100644 index 0000000000..237770d204 --- /dev/null +++ b/3rdparty/zlib-ng/patches/zlib-ng-2.2.1-detect-intrinsics.patch @@ -0,0 +1,13 @@ +diff --git a/3rdparty/zlib-ng/cmake/detect-intrinsics.cmake b/3rdparty/zlib-ng/cmake/detect-intrinsics.cmake +index 14f82fc..78e46e1 100644 +--- a/3rdparty/zlib-ng/cmake/detect-intrinsics.cmake ++++ b/3rdparty/zlib-ng/cmake/detect-intrinsics.cmake +@@ -66,7 +66,7 @@ macro(check_armv6_compiler_flag) + return __uqsub16(a, b); + #endif + } +- int main(void) { return 0; }" ++ int main(void) { return f(1,2); }" + HAVE_ARMV6_INTRIN + ) + set(CMAKE_REQUIRED_FLAGS) diff --git a/3rdparty/zlib-ng/patches/zlib-ng-2.2.1.patch b/3rdparty/zlib-ng/patches/zlib-ng-2.2.1.patch new file mode 100644 index 0000000000..fb3699dcff --- /dev/null +++ b/3rdparty/zlib-ng/patches/zlib-ng-2.2.1.patch @@ -0,0 +1,148 @@ +--- ./CMakeLists.txt 2024-09-11 12:28:30.597680661 +0300 ++++ ../../../zlib-ng/CMakeLists.txt 2024-09-11 12:29:10.013644583 +0300 +@@ -74,10 +74,10 @@ + # Options parsing + # + option(WITH_GZFILEOP "Compile with support for gzFile related functions" ON) +-option(ZLIB_COMPAT "Compile with zlib compatible API" ON) +-option(ZLIB_ENABLE_TESTS "Build test binaries" OFF) +-option(ZLIBNG_ENABLE_TESTS "Test zlib-ng specific API" OFF) +-option(WITH_GTEST "Build gtest_zlib" OFF) ++option(ZLIB_COMPAT "Compile with zlib compatible API" OFF) ++option(ZLIB_ENABLE_TESTS "Build test binaries" ON) ++option(ZLIBNG_ENABLE_TESTS "Test zlib-ng specific API" ON) ++option(WITH_GTEST "Build gtest_zlib" ON) + option(WITH_FUZZERS "Build test/fuzz" OFF) + option(WITH_BENCHMARKS "Build test/benchmarks" OFF) + option(WITH_BENCHMARK_APPS "Build application benchmarks" OFF) +@@ -128,11 +128,6 @@ + + option(INSTALL_UTILS "Copy minigzip and minideflate during install" OFF) + +-set(ZLIB_BUILD_SHARED_LIBS OFF) +-set(SKIP_INSTALL_ALL ON) +-ocv_warnings_disable(CMAKE_C_FLAGS -Wmissing-prototypes -Wmissing-declarations -Wundef -Wstrict-prototypes -Wtype-limits) +-ocv_warnings_disable(CMAKE_C_FLAGS /wd4819 /wd4244 /wd4334) +- + mark_as_advanced(FORCE + ZLIB_SYMBOL_PREFIX + WITH_REDUCED_MEM +@@ -1147,22 +1142,21 @@ + list(APPEND ZLIB_ALL_SRCS ${ZLIB_GZFILE_PRIVATE_HDRS} ${ZLIB_GZFILE_SRCS}) + endif() + +-if(NOT DEFINED ZLIB_BUILD_SHARED_LIBS OR ZLIB_BUILD_SHARED_LIBS) ++if(NOT DEFINED BUILD_SHARED_LIBS OR BUILD_SHARED_LIBS) + set(ZLIB_DLL_SRCS win32/zlib${SUFFIX}1.rc) + endif() + +-if(NOT DEFINED ZLIB_BUILD_SHARED_LIBS) ++if(NOT DEFINED BUILD_SHARED_LIBS) + add_library(zlib SHARED ${ZLIB_ALL_SRCS} ${ZLIB_DLL_SRCS}) + add_library(zlibstatic STATIC ${ZLIB_ALL_SRCS}) + + set(ZLIB_INSTALL_LIBRARIES zlib zlibstatic) + else() ++ add_library(zlib ${ZLIB_ALL_SRCS}) + +- if(ZLIB_BUILD_SHARED_LIBS) +- add_library(zlib SHARED ${ZLIB_ALL_SRCS} ${ZLIB_DLL_SRCS}) ++ if(BUILD_SHARED_LIBS) + target_sources(zlib PRIVATE ${ZLIB_DLL_SRCS}) + else() +- add_library(zlib STATIC ${ZLIB_ALL_SRCS}) + add_library(zlibstatic ALIAS zlib) + endif() + +@@ -1195,17 +1189,17 @@ + + if(WIN32) + # Shared library +- if(NOT DEFINED ZLIB_BUILD_SHARED_LIBS OR ZLIB_BUILD_SHARED_LIBS) ++ if(NOT DEFINED BUILD_SHARED_LIBS OR BUILD_SHARED_LIBS) + set_target_properties(zlib PROPERTIES OUTPUT_NAME zlib${SUFFIX}) + endif() + # Static library +- if(NOT DEFINED ZLIB_BUILD_SHARED_LIBS) ++ if(NOT DEFINED BUILD_SHARED_LIBS) + if(MSVC) + set_target_properties(zlibstatic PROPERTIES OUTPUT_NAME zlibstatic${SUFFIX}) + else() + set_target_properties(zlibstatic PROPERTIES OUTPUT_NAME z${SUFFIX}) + endif() +- elseif(NOT ZLIB_BUILD_SHARED_LIBS) ++ elseif(NOT BUILD_SHARED_LIBS) + if(MSVC) + set_target_properties(zlib PROPERTIES OUTPUT_NAME zlibstatic${SUFFIX}) + else() +@@ -1217,7 +1211,7 @@ + set_target_properties(${ZLIB_INSTALL_LIBRARIES} PROPERTIES OUTPUT_NAME z${SUFFIX}) + endif() + +-if(NOT DEFINED ZLIB_BUILD_SHARED_LIBS OR ZLIB_BUILD_SHARED_LIBS) ++if(NOT DEFINED BUILD_SHARED_LIBS OR BUILD_SHARED_LIBS) + set_target_properties(zlib PROPERTIES DEFINE_SYMBOL ZLIB_DLL) + + if(ZLIB_COMPAT) +@@ -1277,6 +1271,8 @@ + if(WITH_GZFILEOP) + set(PKG_CONFIG_CFLAGS "-DWITH_GZFILEOP") + endif() ++configure_file(${CMAKE_CURRENT_SOURCE_DIR}/zlib.pc.cmakein ++ ${ZLIB_PC} @ONLY) + configure_file(${CMAKE_CURRENT_BINARY_DIR}/zconf${SUFFIX}.h.cmakein + ${CMAKE_CURRENT_BINARY_DIR}/zconf${SUFFIX}.h @ONLY) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/zlib${SUFFIX}.h.in +@@ -1326,6 +1322,17 @@ + set(PACKAGE_CONFIGNAME zlib-ng) + set(PACKAGE_VERSION ${ZLIBNG_HEADER_VERSION}) + endif() ++ configure_package_config_file(${PACKAGE_CONFIGNAME}-config.cmake.in ++ ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_CONFIGNAME}-config.cmake ++ INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${EXPORT_NAME} ++ PATH_VARS INCLUDE_INSTALL_DIR LIB_INSTALL_DIR) ++ write_basic_package_version_file( ++ ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_CONFIGNAME}-config-version.cmake ++ VERSION ${PACKAGE_VERSION} ++ COMPATIBILITY AnyNewerVersion) ++ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_CONFIGNAME}-config.cmake ++ ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_CONFIGNAME}-config-version.cmake ++ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${EXPORT_NAME}) + endif() + + #============================================================================ +@@ -1335,7 +1342,7 @@ + if(ZLIB_ENABLE_TESTS) + enable_testing() + +- if(ZLIB_BUILD_SHARED_LIBS) ++ if(BUILD_SHARED_LIBS) + if(ZLIBNG_ENABLE_TESTS) + message(STATUS "Disabling zlib-ng tests because shared libraries are enabled") + set(ZLIBNG_ENABLE_TESTS OFF) +@@ -1399,12 +1406,19 @@ + + FEATURE_SUMMARY(WHAT ALL INCLUDE_QUIET_PACKAGES) + +-if(ENABLE_SOLUTION_FOLDERS) +- set_target_properties(${ZLIB_INSTALL_LIBRARIES} PROPERTIES FOLDER "3rdparty") +-endif() ++#============================================================================ ++# CPack ++#============================================================================ ++set(CPACK_GENERATOR "TGZ") ++set(CPACK_SOURCE_GENERATOR "TGZ") ++set(CPACK_SOURCE_IGNORE_FILES .git/ _CPack_Packages/ "${PROJECT_BINARY_DIR}/") ++ ++set(CPACK_PACKAGE_NAME "zlib${SUFFIX}") ++set(CPACK_PACKAGE_VERSION ${ZLIB_FULL_VERSION}) ++set(CPACK_PACKAGE_DIRECTORY "${PROJECT_BINARY_DIR}/package") + +-if(NOT BUILD_SHARED_LIBS) +- ocv_install_target(${ZLIB_INSTALL_LIBRARIES} EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev) ++if("${PROJECT_BINARY_DIR}" STREQUAL "${PROJECT_SOURCE_DIR}") ++ message(WARNING "Building to source folder is not recommended. Cpack will be unable to generate source package.") + endif() + +-ocv_install_3rdparty_licenses(${ZLIB_INSTALL_LIBRARIES} LICENSE.md) ++include(CPack) diff --git a/3rdparty/zlib-ng/trees.c b/3rdparty/zlib-ng/trees.c index 5bb88389ba..9f2f49137f 100644 --- a/3rdparty/zlib-ng/trees.c +++ b/3rdparty/zlib-ng/trees.c @@ -1,5 +1,5 @@ /* trees.c -- output deflated data using Huffman coding - * Copyright (C) 1995-2021 Jean-loup Gailly + * Copyright (C) 1995-2024 Jean-loup Gailly * detect_data_type() function provided freely by Cosmin Truta, 2006 * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -75,7 +75,6 @@ static int build_bl_tree (deflate_state *s); static void send_all_trees (deflate_state *s, int lcodes, int dcodes, int blcodes); static void compress_block (deflate_state *s, const ct_data *ltree, const ct_data *dtree); static int detect_data_type (deflate_state *s); -static void bi_flush (deflate_state *s); /* =========================================================================== * Initialize the tree data structures for a new zlib stream. @@ -609,13 +608,6 @@ void Z_INTERNAL zng_tr_stored_block(deflate_state *s, char *buf, uint32_t stored } } -/* =========================================================================== - * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) - */ -void Z_INTERNAL zng_tr_flush_bits(deflate_state *s) { - bi_flush(s); -} - /* =========================================================================== * Send one empty static block to give enough lookahead for inflate. * This takes 10 bits, of which 7 may remain in the bit buffer. @@ -623,7 +615,7 @@ void Z_INTERNAL zng_tr_flush_bits(deflate_state *s) { void Z_INTERNAL zng_tr_align(deflate_state *s) { zng_tr_emit_tree(s, STATIC_TREES, 0); zng_tr_emit_end_block(s, static_ltree, 0); - bi_flush(s); + zng_tr_flush_bits(s); } /* =========================================================================== @@ -718,21 +710,30 @@ static void compress_block(deflate_state *s, const ct_data *ltree, const ct_data /* dtree: distance tree */ unsigned dist; /* distance of matched string */ int lc; /* match length or unmatched char (if dist == 0) */ - unsigned sx = 0; /* running index in sym_buf */ + unsigned sx = 0; /* running index in symbol buffers */ if (s->sym_next != 0) { do { +#ifdef LIT_MEM + dist = s->d_buf[sx]; + lc = s->l_buf[sx++]; +#else dist = s->sym_buf[sx++] & 0xff; dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; lc = s->sym_buf[sx++]; +#endif if (dist == 0) { zng_emit_lit(s, ltree, lc); } else { zng_emit_dist(s, ltree, dtree, lc, dist); } /* literal or match pair ? */ - /* Check that the overlay between pending_buf and sym_buf is ok: */ + /* Check for no overlay of pending_buf on needed symbols */ +#ifdef LIT_MEM + Assert(s->pending < 2 * (s->lit_bufsize + sx), "pending_buf overflow"); +#else Assert(s->pending < s->lit_bufsize + sx, "pending_buf overflow"); +#endif } while (sx < s->sym_next); } @@ -781,27 +782,26 @@ static int detect_data_type(deflate_state *s) { /* =========================================================================== * Flush the bit buffer, keeping at most 7 bits in it. */ -static void bi_flush(deflate_state *s) { - if (s->bi_valid == 64) { - put_uint64(s, s->bi_buf); - s->bi_buf = 0; - s->bi_valid = 0; - } else { - if (s->bi_valid >= 32) { - put_uint32(s, (uint32_t)s->bi_buf); - s->bi_buf >>= 32; - s->bi_valid -= 32; - } - if (s->bi_valid >= 16) { - put_short(s, (uint16_t)s->bi_buf); - s->bi_buf >>= 16; - s->bi_valid -= 16; - } - if (s->bi_valid >= 8) { - put_byte(s, s->bi_buf); - s->bi_buf >>= 8; - s->bi_valid -= 8; - } +void Z_INTERNAL zng_tr_flush_bits(deflate_state *s) { + if (s->bi_valid >= 48) { + put_uint32(s, (uint32_t)s->bi_buf); + put_short(s, (uint16_t)(s->bi_buf >> 32)); + s->bi_buf >>= 48; + s->bi_valid -= 48; + } else if (s->bi_valid >= 32) { + put_uint32(s, (uint32_t)s->bi_buf); + s->bi_buf >>= 32; + s->bi_valid -= 32; + } + if (s->bi_valid >= 16) { + put_short(s, (uint16_t)s->bi_buf); + s->bi_buf >>= 16; + s->bi_valid -= 16; + } + if (s->bi_valid >= 8) { + put_byte(s, s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; } } diff --git a/3rdparty/zlib-ng/win32/Makefile.a64 b/3rdparty/zlib-ng/win32/Makefile.a64 new file mode 100644 index 0000000000..9f8d6fb7fa --- /dev/null +++ b/3rdparty/zlib-ng/win32/Makefile.a64 @@ -0,0 +1,252 @@ +# Makefile for zlib using Microsoft (Visual) C +# zlib is copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler +# +# Usage: +# nmake -f win32/Makefile.a64 (standard build) +# nmake -f win32/Makefile.a64 LOC=-DFOO (nonstandard build) + +# The toplevel directory of the source tree. +# +TOP = . + +# optional build flags +LOC = + +# variables +STATICLIB = zlib.lib +SHAREDLIB = zlib1.dll +IMPLIB = zdll.lib +SYMBOL_PREFIX = + +CC = cl +LD = link +AR = lib +RC = rc +CP = copy /y +INCLUDES = -I$(TOP) -I$(TOP)/arch/arm -I$(TOP)/arch/generic +CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) $(INCLUDES) +WFLAGS = \ + -D_ARM64_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1 \ + -D_CRT_SECURE_NO_DEPRECATE \ + -D_CRT_NONSTDC_NO_DEPRECATE \ + -DARM_FEATURES \ + -DARM_NEON_HASLD4 \ + # +LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest +ARFLAGS = -nologo +RCFLAGS = /dARM64 /r +DEFFILE = zlib.def +RCFILE = zlib1.rc +RESFILE = zlib1.res +WITH_GZFILEOP = yes +ZLIB_COMPAT = +SUFFIX = + +OBJS = \ + adler32.obj \ + adler32_c.obj \ + adler32_fold_c.obj \ + arm_features.obj \ + chunkset_c.obj \ + compare256_c.obj \ + compress.obj \ + cpu_features.obj \ + crc32.obj \ + crc32_braid_c.obj \ + crc32_braid_comb.obj \ + crc32_fold_c.obj \ + deflate.obj \ + deflate_fast.obj \ + deflate_huff.obj \ + deflate_medium.obj \ + deflate_quick.obj \ + deflate_rle.obj \ + deflate_slow.obj \ + deflate_stored.obj \ + functable.obj \ + infback.obj \ + inflate.obj \ + inftrees.obj \ + insert_string.obj \ + insert_string_roll.obj \ + slide_hash_c.obj \ + trees.obj \ + uncompr.obj \ + zutil.obj \ + # +!if "$(ZLIB_COMPAT)" != "" +WITH_GZFILEOP = yes +WFLAGS = $(WFLAGS) -DZLIB_COMPAT +DEFFILE = zlibcompat.def +!else +STATICLIB = zlib-ng.lib +SHAREDLIB = zlib-ng1.dll +IMPLIB = zngdll.lib +DEFFILE = zlib-ng.def +RCFILE = zlib-ng1.rc +RESFILE = zlib-ng1.res +SUFFIX = -ng +!endif + +!if "$(WITH_GZFILEOP)" != "" +WFLAGS = $(WFLAGS) -DWITH_GZFILEOP +OBJS = $(OBJS) gzlib.obj gzread.obj gzwrite.obj +!endif + +WFLAGS = $(WFLAGS) \ + -DARM_ACLE \ + -D__ARM_NEON__=1 \ + -DARM_NEON \ + -DARM_NOCHECK_NEON \ + # +OBJS = $(OBJS) crc32_acle.obj adler32_neon.obj chunkset_neon.obj compare256_neon.obj slide_hash_neon.obj + +# targets +all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) \ + example.exe minigzip.exe example_d.exe minigzip_d.exe + +!if "$(SYMBOL_PREFIX)" != "" +zlib_name_mangling$(SUFFIX).h: zlib_name_mangling$(SUFFIX).h.in + cscript $(TOP)\win32\replace.vbs $(TOP)\zlib_name_mangling$(SUFFIX).h.in zlib_name_mangling$(SUFFIX).h "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" +!else +zlib_name_mangling$(SUFFIX).h: zlib_name_mangling.h.empty + $(CP) $(TOP)\zlib_name_mangling.h.empty zlib_name_mangling$(SUFFIX).h +!endif + +zlib$(SUFFIX).h: zlib$(SUFFIX).h.in + cscript $(TOP)\win32\replace.vbs $(TOP)\zlib$(SUFFIX).h.in zlib$(SUFFIX).h "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" + +gzread.c: gzread.c.in + cscript $(TOP)\win32\replace.vbs $(TOP)\gzread.c.in gzread.c "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" + +zconf: $(TOP)/zconf$(SUFFIX).h.in $(TOP)/zlib$(SUFFIX).h $(TOP)/zlib_name_mangling$(SUFFIX).h + $(CP) $(TOP)\zconf$(SUFFIX).h.in $(TOP)\zconf$(SUFFIX).h + +$(TOP)/win32/$(DEFFILE): $(TOP)/win32/$(DEFFILE).in + cscript $(TOP)\win32\replace.vbs $(TOP)/win32/$(DEFFILE).in $(TOP)/win32/$(DEFFILE) "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" + +$(STATICLIB): zconf $(OBJS) + $(AR) $(ARFLAGS) -out:$@ $(OBJS) + +$(IMPLIB): $(SHAREDLIB) + +$(SHAREDLIB): zconf $(TOP)/win32/$(DEFFILE) $(OBJS) $(RESFILE) + $(LD) $(LDFLAGS) -def:$(TOP)/win32/$(DEFFILE) -dll -implib:$(IMPLIB) \ + -out:$@ -base:0x55A4C0000 $(OBJS) $(RESFILE) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;2 + +example.exe: example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) + $(LD) $(LDFLAGS) example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + +minigzip.exe: minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) + $(LD) $(LDFLAGS) minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + +example_d.exe: example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) + $(LD) $(LDFLAGS) -out:$@ example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + +minigzip_d.exe: minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) + $(LD) $(LDFLAGS) -out:$@ minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + +{$(TOP)}.c.obj: + $(CC) -c $(WFLAGS) $(CFLAGS) $< + +gzlib2.obj: gzlib.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h + $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzlib2.obj gzlib.c + +gzread2.obj: gzread.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h + $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzread2.obj gzread.c + +gzwrite2.obj: gzwrite.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h + $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzwrite2.obj gzwrite.c + +{$(TOP)/arch/arm}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< + +{$(TOP)/arch/generic}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< + +{$(TOP)/test}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP $< + +$(TOP)/zconf$(SUFFIX).h: zconf + +adler32.obj: $(TOP)/adler32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h +adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h +adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h +chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h +compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h +compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h +cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h +crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h +crc32_braid_c.obj: $(TOP)/arch/generic/crc32_braid_c.c $(TOP)/zbuild.h $(TOP)/crc32_braid_p.h $(TOP)/crc32_braid_tbl.h +crc32_braid_comb.obj: $(TOP)/crc32_braid_comb.c $(TOP)/zutil.h $(TOP)/crc32_braid_p.h $(TOP)/crc32_braid_tbl.h $(TOP)/crc32_braid_comb_p.h +crc32_fold_c.obj: $(TOP)/arch/generic/crc32_fold_c.c $(TOP)/zbuild.h $(TOP)/crc32.h $(TOP)/functable.h $(TOP)/zutil.h +deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h +deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h +deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +functable.obj: $(TOP)/functable.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/cpu_features.h $(TOP)/arch/arm/arm_features.h $(TOP)/arch_functions.h +gzlib.obj: $(TOP)/gzlib.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +gzread.obj: $(TOP)/gzread.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +gzwrite.obj: $(TOP)/gzwrite.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +infback.obj: $(TOP)/infback.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h $(TOP)/inflate.h $(TOP)/inflate_p.h $(TOP)/functable.h +inflate.obj: $(TOP)/inflate.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h $(TOP)/inflate.h $(TOP)/inflate_p.h $(TOP)/functable.h $(TOP)/inffixed_tbl.h +inftrees.obj: $(TOP)/inftrees.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h +insert_string.obj: $(TOP)/insert_string.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/insert_string_tpl.h +insert_string_roll.obj: $(TOP)/insert_string_roll.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/insert_string_tpl.h +slide_hash_c.obj: $(TOP)/arch/generic/slide_hash_c.c $(TOP)/zbuild.h $(TOP)/deflate.h +slide_hash_neon.obj: $(TOP)/arch/arm/slide_hash_neon.c $(TOP)/arch/arm/neon_intrins.h $(TOP)/zbuild.h $(TOP)/deflate.h +trees.obj: $(TOP)/trees.c $(TOP)/trees.h $(TOP)/trees_emit.h $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/trees_tbl.h +uncompr.obj: $(TOP)/uncompr.c $(TOP)/zbuild.h $(TOP)/zutil.h +zutil.obj: $(TOP)/zutil.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/zutil_p.h + +$(RESFILE): $(TOP)/win32/$(RCFILE) + $(RC) $(RCFLAGS) /fo$@ $(TOP)/win32/$(RCFILE) + +# testing +test: example.exe minigzip.exe + example + echo hello world | minigzip | minigzip -d + +testdll: example_d.exe minigzip_d.exe + example_d + echo hello world | minigzip_d | minigzip_d -d + +example.obj: $(TOP)/test/example.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h $(TOP)/deflate.h $(TOP)/test/test_shared_ng.h + +minigzip.obj: $(TOP)/test/minigzip.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h + + +# cleanup +clean: + -del $(STATICLIB) + -del $(SHAREDLIB) + -del $(IMPLIB) + -del *.obj + -del *.res + -del *.exp + -del *.exe + -del *.pdb + -del *.manifest + +distclean: clean + -del zconf$(SUFFIX).h + -del zlib$(SUFFIX).h + -del zlib_name_mangling$(SUFFIX).h + -del $(TOP)\win32\zlib.def + -del $(TOP)\win32\zlibcompat.def + -del $(TOP)\win32\zlib-ng.def + -del gzread.c diff --git a/3rdparty/zlib-ng/win32/Makefile.arm b/3rdparty/zlib-ng/win32/Makefile.arm new file mode 100644 index 0000000000..cab999dfe0 --- /dev/null +++ b/3rdparty/zlib-ng/win32/Makefile.arm @@ -0,0 +1,272 @@ +# Makefile for zlib using Microsoft (Visual) C +# zlib is copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler +# +# Usage: +# nmake -f win32/Makefile.arm (standard build) +# nmake -f win32/Makefile.arm LOC=-DFOO (nonstandard build) + +# The toplevel directory of the source tree. +# +TOP = . + +# optional build flags +LOC = + +# variables +STATICLIB = zlib.lib +SHAREDLIB = zlib1.dll +IMPLIB = zdll.lib +SYMBOL_PREFIX = + +CC = cl +LD = link +AR = lib +RC = rc +CP = copy /y +INCLUDES = -I$(TOP) -I$(TOP)/arch/arm -I$(TOP)/arch/generic +CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) $(INCLUDES) +WFLAGS = \ + -D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1 \ + -D_CRT_SECURE_NO_DEPRECATE \ + -D_CRT_NONSTDC_NO_DEPRECATE \ + -DARM_FEATURES \ + -DARM_NEON_HASLD4 \ + # +LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest +ARFLAGS = -nologo +RCFLAGS = /dARM /r +DEFFILE = zlib.def +RCFILE = zlib1.rc +RESFILE = zlib1.res +WITH_GZFILEOP = yes +ZLIB_COMPAT = +WITH_ACLE = +WITH_NEON = +WITH_ARMV6 = +WITH_VFPV3 = +NEON_ARCH = /arch:VFPv4 +SUFFIX = + +OBJS = \ + adler32.obj \ + adler32_c.obj \ + adler32_fold_c.obj \ + arm_features.obj \ + chunkset_c.obj \ + compare256_c.obj \ + compress.obj \ + cpu_features.obj \ + crc32.obj \ + crc32_braid_c.obj \ + crc32_braid_comb.obj \ + crc32_fold_c.obj \ + deflate.obj \ + deflate_fast.obj \ + deflate_huff.obj \ + deflate_medium.obj \ + deflate_quick.obj \ + deflate_rle.obj \ + deflate_slow.obj \ + deflate_stored.obj \ + functable.obj \ + infback.obj \ + inflate.obj \ + inftrees.obj \ + insert_string.obj \ + insert_string_roll.obj \ + slide_hash_c.obj \ + trees.obj \ + uncompr.obj \ + zutil.obj \ + # +!if "$(ZLIB_COMPAT)" != "" +WITH_GZFILEOP = yes +WFLAGS = $(WFLAGS) -DZLIB_COMPAT +DEFFILE = zlibcompat.def +!else +STATICLIB = zlib-ng.lib +SHAREDLIB = zlib-ng1.dll +IMPLIB = zngdll.lib +DEFFILE = zlib-ng.def +RCFILE = zlib-ng1.rc +RESFILE = zlib-ng1.res +SUFFIX = -ng +!endif + +!if "$(WITH_GZFILEOP)" != "" +WFLAGS = $(WFLAGS) -DWITH_GZFILEOP +OBJS = $(OBJS) gzlib.obj gzread.obj gzwrite.obj +!endif + +!if "$(WITH_ACLE)" != "" +WFLAGS = $(WFLAGS) -DARM_ACLE +OBJS = $(OBJS) crc32_acle.obj +!endif +!if "$(WITH_VFPV3)" != "" +NEON_ARCH = /arch:VFPv3 +!endif +!if "$(WITH_NEON)" != "" +CFLAGS = $(CFLAGS) $(NEON_ARCH) +WFLAGS = $(WFLAGS) \ + -D__ARM_NEON__=1 \ + -DARM_NEON \ + -DARM_NOCHECK_NEON \ + # +OBJS = $(OBJS) adler32_neon.obj chunkset_neon.obj compare256_neon.obj slide_hash_neon.obj +!endif +!if "$(WITH_ARMV6)" != "" +WFLAGS = $(WFLAGS) \ + -DARM_SIMD \ + -DARM_NOCHECK_SIMD \ + # +OBJS = $(OBJS) slide_hash_armv6.obj +!endif + +# targets +all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) \ + example.exe minigzip.exe example_d.exe minigzip_d.exe + +!if "$(SYMBOL_PREFIX)" != "" +zlib_name_mangling$(SUFFIX).h: zlib_name_mangling$(SUFFIX).h.in + cscript $(TOP)\win32\replace.vbs $(TOP)\zlib_name_mangling$(SUFFIX).h.in zlib_name_mangling$(SUFFIX).h "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" +!else +zlib_name_mangling$(SUFFIX).h: zlib_name_mangling.h.empty + $(CP) $(TOP)\zlib_name_mangling.h.empty zlib_name_mangling$(SUFFIX).h +!endif + +zlib$(SUFFIX).h: zlib$(SUFFIX).h.in + cscript $(TOP)\win32\replace.vbs $(TOP)\zlib$(SUFFIX).h.in zlib$(SUFFIX).h "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" + +gzread.c: gzread.c.in + cscript $(TOP)\win32\replace.vbs $(TOP)\gzread.c.in gzread.c "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" + +zconf: $(TOP)/zconf$(SUFFIX).h.in $(TOP)/zlib$(SUFFIX).h $(TOP)/zlib_name_mangling$(SUFFIX).h + $(CP) $(TOP)\zconf$(SUFFIX).h.in $(TOP)\zconf$(SUFFIX).h + +$(TOP)/win32/$(DEFFILE): $(TOP)/win32/$(DEFFILE).in + cscript $(TOP)\win32\replace.vbs $(TOP)/win32/$(DEFFILE).in $(TOP)/win32/$(DEFFILE) "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" + +$(STATICLIB): zconf $(OBJS) + $(AR) $(ARFLAGS) -out:$@ $(OBJS) + +$(IMPLIB): $(SHAREDLIB) + +$(SHAREDLIB): zconf $(TOP)/win32/$(DEFFILE) $(OBJS) $(RESFILE) + $(LD) $(LDFLAGS) -def:$(TOP)/win32/$(DEFFILE) -dll -implib:$(IMPLIB) \ + -out:$@ -base:0x5A4C0000 $(OBJS) $(RESFILE) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;2 + +example.exe: example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) + $(LD) $(LDFLAGS) example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + +minigzip.exe: minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) + $(LD) $(LDFLAGS) minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + +example_d.exe: example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) + $(LD) $(LDFLAGS) -out:$@ example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + +minigzip_d.exe: minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) + $(LD) $(LDFLAGS) -out:$@ minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + +{$(TOP)}.c.obj: + $(CC) -c $(WFLAGS) $(CFLAGS) $< + +gzlib2.obj: gzlib.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h + $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzlib2.obj gzlib.c + +gzread2.obj: gzread.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h + $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzread2.obj gzread.c + +gzwrite2.obj: gzwrite.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h + $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzwrite2.obj gzwrite.c + +{$(TOP)/arch/arm}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< + +{$(TOP)/arch/generic}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< + +{$(TOP)/test}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP $< + +$(TOP)/zconf$(SUFFIX).h: zconf + +adler32.obj: $(TOP)/adler32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h +adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h +adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h +chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h +compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h +compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h +cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h +crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h +crc32_braid_c.obj: $(TOP)/arch/generic/crc32_braid_c.c $(TOP)/zbuild.h $(TOP)/crc32_braid_p.h $(TOP)/crc32_braid_tbl.h +crc32_braid_comb.obj: $(TOP)/crc32_braid_comb.c $(TOP)/zutil.h $(TOP)/crc32_braid_p.h $(TOP)/crc32_braid_tbl.h $(TOP)/crc32_braid_comb_p.h +crc32_fold_c.obj: $(TOP)/arch/generic/crc32_fold_c.c $(TOP)/zbuild.h $(TOP)/crc32.h $(TOP)/functable.h $(TOP)/zutil.h +deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h +deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h +deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +functable.obj: $(TOP)/functable.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/cpu_features.h $(TOP)/arch/arm/arm_features.h $(TOP)/arch_functions.h +gzlib.obj: $(TOP)/gzlib.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +gzread.obj: $(TOP)/gzread.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +gzwrite.obj: $(TOP)/gzwrite.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +infback.obj: $(TOP)/infback.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h $(TOP)/inflate.h $(TOP)/inflate_p.h $(TOP)/functable.h +inflate.obj: $(TOP)/inflate.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h $(TOP)/inflate.h $(TOP)/inflate_p.h $(TOP)/functable.h $(TOP)/inffixed_tbl.h +inftrees.obj: $(TOP)/inftrees.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h +insert_string.obj: $(TOP)/insert_string.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/insert_string_tpl.h +insert_string_roll.obj: $(TOP)/insert_string_roll.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/insert_string_tpl.h +slide_hash_c.obj: $(TOP)/arch/generic/slide_hash_c.c $(TOP)/zbuild.h $(TOP)/deflate.h +trees.obj: $(TOP)/trees.c $(TOP)/trees.h $(TOP)/trees_emit.h $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/trees_tbl.h +uncompr.obj: $(TOP)/uncompr.c $(TOP)/zbuild.h $(TOP)/zutil.h +zutil.obj: $(TOP)/zutil.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/zutil_p.h + +$(RESFILE): $(TOP)/win32/$(RCFILE) + $(RC) $(RCFLAGS) /fo$@ $(TOP)/win32/$(RCFILE) + +# testing +test: example.exe minigzip.exe + example + echo hello world | minigzip | minigzip -d + +testdll: example_d.exe minigzip_d.exe + example_d + echo hello world | minigzip_d | minigzip_d -d + +example.obj: $(TOP)/test/example.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h $(TOP)/deflate.h $(TOP)/test/test_shared_ng.h + +minigzip.obj: $(TOP)/test/minigzip.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h + + +# cleanup +clean: + -del $(STATICLIB) + -del $(SHAREDLIB) + -del $(IMPLIB) + -del *.obj + -del *.res + -del *.exp + -del *.exe + -del *.pdb + -del *.manifest + +distclean: clean + -del zconf$(SUFFIX).h + -del zlib$(SUFFIX).h + -del zlib_name_mangling$(SUFFIX).h + -del $(TOP)\win32\zlib.def + -del $(TOP)\win32\zlibcompat.def + -del $(TOP)\win32\zlib-ng.def + -del gzread.c diff --git a/3rdparty/zlib-ng/win32/Makefile.msc b/3rdparty/zlib-ng/win32/Makefile.msc new file mode 100644 index 0000000000..8392fe46e7 --- /dev/null +++ b/3rdparty/zlib-ng/win32/Makefile.msc @@ -0,0 +1,292 @@ +# Makefile for zlib using Microsoft (Visual) C +# zlib is copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler +# +# Usage: +# nmake -f win32/Makefile.msc (standard build) +# nmake -f win32/Makefile.msc LOC=-DFOO (nonstandard build) + +# The toplevel directory of the source tree. +# +TOP = . + +# optional build flags +LOC = + +# variables +STATICLIB = zlib.lib +SHAREDLIB = zlib1.dll +IMPLIB = zdll.lib +SYMBOL_PREFIX = + +CC = cl +CXX = cl +LD = link +AR = lib +RC = rc +CP = copy /y +INCLUDES = -I$(TOP) -I$(TOP)/arch/x86 -I$(TOP)/arch/generic +CFLAGS = -nologo -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) $(INCLUDES) +CXXFLAGS = -nologo -EHsc -MD -W3 -O2 -Oy- -Zi -Fd"zlib" $(LOC) $(INCLUDES) +WFLAGS = \ + -D_CRT_SECURE_NO_DEPRECATE \ + -D_CRT_NONSTDC_NO_DEPRECATE \ + -DX86_FEATURES \ + -DX86_PCLMULQDQ_CRC \ + -DX86_SSE2 \ + -DX86_SSE42 \ + -DX86_SSSE3 \ + -DX86_AVX2 + +LDFLAGS = -nologo -debug -incremental:no -opt:ref -manifest +ARFLAGS = -nologo +RCFLAGS = /dWIN32 /r +DEFFILE = zlib.def +RCFILE = zlib1.rc +RESFILE = zlib1.res +WITH_GZFILEOP = yes +ZLIB_COMPAT = +SUFFIX = + +OBJS = \ + adler32.obj \ + adler32_c.obj \ + adler32_avx2.obj \ + adler32_avx512.obj \ + adler32_avx512_vnni.obj \ + adler32_sse42.obj \ + adler32_ssse3.obj \ + adler32_fold_c.obj \ + chunkset_c.obj \ + chunkset_avx2.obj \ + chunkset_sse2.obj \ + chunkset_ssse3.obj \ + compare256_c.obj \ + compare256_avx2.obj \ + compare256_sse2.obj \ + compress.obj \ + cpu_features.obj \ + crc32.obj \ + crc32_braid_c.obj \ + crc32_braid_comb.obj \ + crc32_fold_c.obj \ + crc32_pclmulqdq.obj \ + deflate.obj \ + deflate_fast.obj \ + deflate_huff.obj \ + deflate_medium.obj \ + deflate_quick.obj \ + deflate_rle.obj \ + deflate_slow.obj \ + deflate_stored.obj \ + functable.obj \ + infback.obj \ + inflate.obj \ + inftrees.obj \ + insert_string.obj \ + insert_string_roll.obj \ + slide_hash_c.obj \ + slide_hash_avx2.obj \ + slide_hash_sse2.obj \ + trees.obj \ + uncompr.obj \ + zutil.obj \ + x86_features.obj \ + # +!if "$(ZLIB_COMPAT)" != "" +WITH_GZFILEOP = yes +WFLAGS = $(WFLAGS) -DZLIB_COMPAT +DEFFILE = zlibcompat.def +!else +STATICLIB = zlib-ng.lib +SHAREDLIB = zlib-ng1.dll +IMPLIB = zngdll.lib +DEFFILE = zlib-ng.def +RCFILE = zlib-ng1.rc +RESFILE = zlib-ng1.res +SUFFIX = -ng +!endif + +!if "$(WITH_GZFILEOP)" != "" +WFLAGS = $(WFLAGS) -DWITH_GZFILEOP +OBJS = $(OBJS) gzlib.obj gzread.obj gzwrite.obj +!endif + +# targets +all: $(STATICLIB) $(SHAREDLIB) $(IMPLIB) \ + example.exe minigzip.exe example_d.exe minigzip_d.exe + +!if "$(SYMBOL_PREFIX)" != "" +zlib_name_mangling$(SUFFIX).h: zlib_name_mangling$(SUFFIX).h.in + cscript $(TOP)\win32\replace.vbs $(TOP)\zlib_name_mangling$(SUFFIX).h.in zlib_name_mangling$(SUFFIX).h "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" +!else +zlib_name_mangling$(SUFFIX).h: zlib_name_mangling.h.empty + $(CP) $(TOP)\zlib_name_mangling.h.empty zlib_name_mangling$(SUFFIX).h +!endif + +zlib$(SUFFIX).h: zlib$(SUFFIX).h.in + cscript $(TOP)\win32\replace.vbs $(TOP)\zlib$(SUFFIX).h.in zlib$(SUFFIX).h "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" + +gzread.c: gzread.c.in + cscript $(TOP)\win32\replace.vbs $(TOP)\gzread.c.in gzread.c "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" + +zconf: $(TOP)/zconf$(SUFFIX).h.in $(TOP)/zlib$(SUFFIX).h $(TOP)/zlib_name_mangling$(SUFFIX).h + $(CP) $(TOP)\zconf$(SUFFIX).h.in $(TOP)\zconf$(SUFFIX).h + +$(TOP)/win32/$(DEFFILE): $(TOP)/win32/$(DEFFILE).in + cscript $(TOP)\win32\replace.vbs $(TOP)/win32/$(DEFFILE).in $(TOP)/win32/$(DEFFILE) "@ZLIB_SYMBOL_PREFIX@" "$(SYMBOL_PREFIX)" + +$(STATICLIB): zconf $(OBJS) + $(AR) $(ARFLAGS) -out:$@ $(OBJS) + +$(IMPLIB): $(SHAREDLIB) + +$(SHAREDLIB): zconf $(TOP)/win32/$(DEFFILE) $(OBJS) $(RESFILE) + $(LD) $(LDFLAGS) -def:$(TOP)/win32/$(DEFFILE) -dll -implib:$(IMPLIB) \ + -out:$@ $(OBJS) $(RESFILE) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;2 + +depcheck.exe: depcheck.obj + $(LD) $(LDFLAGS) depcheck.obj + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + +example.exe: example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) + $(LD) $(LDFLAGS) example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + +minigzip.exe: minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) + $(LD) $(LDFLAGS) minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(STATICLIB) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + +example_d.exe: example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) + $(LD) $(LDFLAGS) -out:$@ example.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + +minigzip_d.exe: minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) + $(LD) $(LDFLAGS) -out:$@ minigzip.obj gzlib2.obj gzread2.obj gzwrite2.obj $(IMPLIB) + if exist $@.manifest \ + mt -nologo -manifest $@.manifest -outputresource:$@;1 + +{$(TOP)}.c.obj: + $(CC) -c $(WFLAGS) $(CFLAGS) $< + +gzlib2.obj: gzlib.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h + $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzlib2.obj gzlib.c + +gzread2.obj: gzread.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h + $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzread2.obj gzread.c + +gzwrite2.obj: gzwrite.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h + $(CC) -c $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP -Fogzwrite2.obj gzwrite.c + +{$(TOP)/arch/x86}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< + +{$(TOP)/arch/generic}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) $< + +{$(TOP)/test}.c.obj: + $(CC) -c -I$(TOP) $(WFLAGS) $(CFLAGS) -DWITH_GZFILEOP $< + +$(TOP)/zconf$(SUFFIX).h: zconf + +{$(TOP)/win32}.cpp.obj: + $(CXX) -c -I$(TOP) $(WFLAGS) $(CXXFLAGS) $< + +adler32.obj: $(TOP)/adler32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h +adler32_c.obj: $(TOP)/arch/generic/adler32_c.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/adler32_p.h +adler32_avx2.obj: $(TOP)/arch/x86/adler32_avx2.c $(TOP)/zbuild.h $(TOP)/adler32_p.h $(TOP)/arch/x86/adler32_avx2_p.h $(TOP)/arch/x86/x86_intrins.h +adler32_avx512.obj: $(TOP)/arch/x86/adler32_avx512.c $(TOP)/zbuild.h $(TOP)/arch_functions.h $(TOP)/adler32_p.h $(TOP)/arch/x86/adler32_avx512_p.h $(TOP)/arch/x86/x86_intrins.h +adler32_avx512_vnni.obj: $(TOP)/arch/x86/adler32_avx512_vnni.c $(TOP)/zbuild.h $(TOP)/arch_functions.h $(TOP)/adler32_p.h $(TOP)/arch/x86/adler32_avx512_p.h \ + $(TOP)/arch/x86/adler32_avx2_p.h $(TOP)/arch/x86/x86_intrins.h +adler32_sse42.obj: $(TOP)/arch/x86/adler32_sse42.c $(TOP)/zbuild.h $(TOP)/adler32_p.h \ + $(TOP)/arch/x86/adler32_ssse3_p.h +adler32_ssse3.obj: $(TOP)/arch/x86/adler32_ssse3.c $(TOP)/zbuild.h $(TOP)/adler32_p.h \ + $(TOP)/arch/x86/adler32_ssse3_p.h +adler32_fold_c.obj: $(TOP)/arch/generic/adler32_fold_c.c $(TOP)/zbuild.h $(TOP)/functable.h +chunkset_c.obj: $(TOP)/arch/generic/chunkset_c.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h +chunkset_avx2.obj: $(TOP)/arch/x86/chunkset_avx2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h +chunkset_sse2.obj: $(TOP)/arch/x86/chunkset_sse2.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h +chunkset_ssse3.obj: $(TOP)/arch/x86/chunkset_ssse3.c $(TOP)/zbuild.h $(TOP)/chunkset_tpl.h $(TOP)/inffast_tpl.h $(TOP)/arch/generic/chunk_permute_table.h +compare256_c.obj: $(TOP)/arch/generic/compare256_c.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h +compare256_avx2.obj: $(TOP)/arch/x86/compare256_avx2.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h +compare256_sse2.obj: $(TOP)/arch/x86/compare256_sse2.c $(TOP)/zbuild.h $(TOP)/zutil_p.h $(TOP)/deflate.h $(TOP)/fallback_builtins.h $(TOP)/match_tpl.h +compress.obj: $(TOP)/compress.c $(TOP)/zbuild.h $(TOP)/zutil.h +cpu_features.obj: $(TOP)/cpu_features.c $(TOP)/cpu_features.h $(TOP)/zbuild.h +crc32.obj: $(TOP)/crc32.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/crc32_braid_tbl.h +crc32_braid_c.obj: $(TOP)/arch/generic/crc32_braid_c.c $(TOP)/zbuild.h $(TOP)/crc32_braid_p.h $(TOP)/crc32_braid_tbl.h +crc32_braid_comb.obj: $(TOP)/crc32_braid_comb.c $(TOP)/zutil.h $(TOP)/crc32_braid_p.h $(TOP)/crc32_braid_tbl.h $(TOP)/crc32_braid_comb_p.h +crc32_fold_c.obj: $(TOP)/arch/generic/crc32_fold_c.c $(TOP)/zbuild.h $(TOP)/crc32.h $(TOP)/functable.h $(TOP)/zutil.h +crc32_pclmulqdq.obj: $(TOP)/arch/x86/crc32_pclmulqdq.c $(TOP)/arch/x86/crc32_pclmulqdq_tpl.h +deflate.obj: $(TOP)/deflate.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_fast.obj: $(TOP)/deflate_fast.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_huff.obj: $(TOP)/deflate_huff.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_medium.obj: $(TOP)/deflate_medium.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_quick.obj: $(TOP)/deflate_quick.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/trees_emit.h $(TOP)/zutil_p.h +deflate_rle.obj: $(TOP)/deflate_rle.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h $(TOP)/compare256_rle.h +deflate_slow.obj: $(TOP)/deflate_slow.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +deflate_stored.obj: $(TOP)/deflate_stored.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/deflate_p.h $(TOP)/functable.h +functable.obj: $(TOP)/functable.c $(TOP)/zbuild.h $(TOP)/functable.h $(TOP)/cpu_features.h $(TOP)/arch/x86/x86_features.h $(TOP)/arch_functions.h +gzlib.obj: $(TOP)/gzlib.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +gzread.obj: $(TOP)/gzread.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +gzwrite.obj: $(TOP)/gzwrite.c $(TOP)/zbuild.h $(TOP)/gzguts.h $(TOP)/zutil_p.h +infback.obj: $(TOP)/infback.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h $(TOP)/inflate.h $(TOP)/inflate_p.h $(TOP)/functable.h +inflate.obj: $(TOP)/inflate.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h $(TOP)/inflate.h $(TOP)/inflate_p.h $(TOP)/functable.h $(TOP)/inffixed_tbl.h +inftrees.obj: $(TOP)/inftrees.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/inftrees.h +insert_string.obj: $(TOP)/insert_string.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/insert_string_tpl.h +insert_string_roll.obj: $(TOP)/insert_string_roll.c $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/insert_string_tpl.h +slide_hash_c.obj: $(TOP)/arch/generic/slide_hash_c.c $(TOP)/zbuild.h $(TOP)/deflate.h +slide_hash_avx2.obj: $(TOP)/arch/x86/slide_hash_avx2.c $(TOP)/zbuild.h $(TOP)/deflate.h +slide_hash_sse2.obj: $(TOP)/arch/x86/slide_hash_sse2.c $(TOP)/zbuild.h $(TOP)/deflate.h +trees.obj: $(TOP)/trees.c $(TOP)/trees.h $(TOP)/trees_emit.h $(TOP)/zbuild.h $(TOP)/deflate.h $(TOP)/trees_tbl.h +uncompr.obj: $(TOP)/uncompr.c $(TOP)/zbuild.h $(TOP)/zutil.h +zutil.obj: $(TOP)/zutil.c $(TOP)/zbuild.h $(TOP)/zutil.h $(TOP)/zutil_p.h + +$(RESFILE): $(TOP)/win32/$(RCFILE) + $(RC) $(RCFLAGS) /fo$@ $(TOP)/win32/$(RCFILE) + +# testing +depcheck: depcheck.exe + depcheck win32\Makefile.msc . + depcheck win32\Makefile.arm . + depcheck win32\Makefile.a64 . + +test: example.exe minigzip.exe depcheck + example + echo hello world | minigzip | minigzip -d + +testdll: example_d.exe minigzip_d.exe + example_d + echo hello world | minigzip_d | minigzip_d -d + +depcheck.obj: $(TOP)/win32/depcheck.cpp + +example.obj: $(TOP)/test/example.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h $(TOP)/deflate.h $(TOP)/test/test_shared_ng.h + +minigzip.obj: $(TOP)/test/minigzip.c $(TOP)/zbuild.h $(TOP)/zlib$(SUFFIX).h + + +# cleanup +clean: + -del $(STATICLIB) + -del $(SHAREDLIB) + -del $(IMPLIB) + -del *.obj + -del *.res + -del *.exp + -del *.exe + -del *.pdb + -del *.manifest + +distclean: clean + -del zconf$(SUFFIX).h + -del zlib$(SUFFIX).h + -del zlib_name_mangling$(SUFFIX).h + -del $(TOP)\win32\zlib.def + -del $(TOP)\win32\zlibcompat.def + -del $(TOP)\win32\zlib-ng.def + -del gzread.c diff --git a/3rdparty/zlib-ng/win32/depcheck.cpp b/3rdparty/zlib-ng/win32/depcheck.cpp new file mode 100644 index 0000000000..f83bdd6852 --- /dev/null +++ b/3rdparty/zlib-ng/win32/depcheck.cpp @@ -0,0 +1,321 @@ +/* depcheck.cpp - Dependency checker for NMake Makefiles + * Copyright (c) 2024 Mika T. Lindqvist + */ + +#include +#include +#include +#include +#include + +int main(int argc, char* argv[]) { + if (argc != 3) { + printf("Usage: depcheck Makefile \n"); + return -1; + } + std::filebuf fb; + if (fb.open (argv[1],std::ios::in)) { + std::istream is(&fb); + std::string makefile = argv[1]; + std::string l, tmp, tmp2; + while (is) { + std::getline(is, l); + while (l.back() == '\\') { + std::getline(is, tmp); + l.replace(l.length() - 1, 1, tmp); + } + size_t pos = l.find("obj:"); + if (pos != std::string::npos) { + std::string objfile = l.substr(0, pos+3); + printf("File: %s\n", objfile.c_str()); + std::vector files; + std::stringstream ss(l.substr(pos+4)); + while(getline(ss, tmp, ' ')){ + if (tmp != "" && tmp != "/") { + files.push_back(tmp); + } + } + for (auto it = files.begin(); it != files.end(); ++it) { + printf("Dependency: %s\n", (*it).c_str()); + } + if (!files.empty()) { + std::filebuf fb2; + std::string src = files[0]; + size_t pos2 = src.find("$(TOP)"); + if (pos2 != std::string::npos) { + src.replace(pos2, 6, argv[2]); + } + printf("Source: %s\n", src.c_str()); + if (fb2.open(src.c_str(),std::ios::in)) { + std::istream is2(&fb2); + std::vector includes; + while (is2) { + std::getline(is2, l); + pos = l.find("#"); + if (pos != std::string::npos) { + pos2 = l.find("include"); + size_t pos3 = l.find("\""); + if (pos2 != std::string::npos && pos3 != std::string::npos && pos2 > pos && pos3 > pos2) { + tmp = l.substr(pos3 + 1); + pos2 = tmp.find("\""); + if (pos2 != std::string::npos) { + tmp = tmp.substr(0, pos2); + } + pos2 = tmp.find("../"); + if (pos2 != std::string::npos) { + tmp = tmp.substr(3); + } + printf("Line: %s\n", tmp.c_str()); + int found = 0; + for (size_t i = 1; i < files.size(); i++) { + pos3 = files[i].find("$(SUFFIX)"); + if (pos3 != std::string::npos) { + tmp2 = files[i].substr(0, pos3).append(files[i].substr(pos3 + 9)); + printf("Comparing dependency \"%s\" and \"%s\"\n", tmp2.c_str(), tmp.c_str()); + if (tmp2 == tmp) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/%s\"\n", tmp2.c_str(), tmp.c_str()); + if (tmp2 == std::string("$(TOP)/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + + tmp2 = files[i].substr(0, pos3).append("-ng").append(files[i].substr(pos3 + 9)); + printf("Comparing dependency \"%s\" and \"%s\"\n", tmp2.c_str(), tmp.c_str()); + if (tmp2 == tmp) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/%s\"\n", tmp2.c_str(), tmp.c_str()); + if (tmp2 == std::string("$(TOP)/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + } else { + printf("Comparing dependency \"%s\" and \"%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == tmp) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == std::string("$(TOP)/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/arch/%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == std::string("$(TOP)/arch/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/arch/generic/%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == std::string("$(TOP)/arch/generic/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/arch/arm/%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == std::string("$(TOP)/arch/arm/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/arch/x86/%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == std::string("$(TOP)/arch/x86/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + printf("Comparing dependency \"%s\" and \"$(TOP)/test/%s\"\n", files[i].c_str(), tmp.c_str()); + if (files[i] == std::string("$(TOP)/test/").append(tmp)) { + printf("Dependency %s OK\n", tmp.c_str()); + found = 1; + includes.push_back(tmp); + break; + } + } + } + // Skip irrelevant dependencies + if (tmp.substr(0, 9) == "arch/s390") found = 1; + if (tmp == "zlib-ng.h" && std::find(includes.begin(), includes.end(), "zlib.h") != includes.end()) found = 1; + if (found == 0) { + printf("%s: Dependency %s missing for %s!\n", makefile.c_str(), tmp.c_str(), objfile.c_str()); + return -1; + } + } + } + } + for (size_t i = 1; i < files.size(); i++) { + int found = 0; + tmp = files[i]; + printf("Dependency: %s\n", tmp.c_str()); + pos2 = tmp.find("$(TOP)"); + if (pos2 != std::string::npos) { + tmp = tmp.substr(7); + } + for (size_t j = 0; j < includes.size(); j++) { + pos2 = tmp.find("$(SUFFIX)"); + if (pos2 != std::string::npos) { + std::string tmp1 = tmp.substr(0, pos2).append(tmp.substr(pos2 + 9)); + printf("[%zd/%zd] Comparing dependency \"%s\" and \"%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == includes[j]) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/generic/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/generic/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/arm/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/arm/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/x86/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/x86/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"test/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("test/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + tmp1 = tmp.substr(0, pos2).append("-ng").append(tmp.substr(pos2 + 9)); + printf("[%zd/%zd] Comparing dependency \"%s\" and \"%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == includes[j]) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/generic/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/generic/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/arm/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/arm/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/x86/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("arch/x86/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"test/%s\"\n", j, includes.size(), tmp1.c_str(), includes[j].c_str()); + if (tmp1 == std::string("test/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + } else { + printf("[%zd/%zd] Comparing dependency \"%s\" and \"%s\"\n", j, includes.size(), tmp.c_str(), includes[j].c_str()); + if (tmp == includes[j]) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/%s\"\n", j, includes.size(), tmp.c_str(), includes[j].c_str()); + if (tmp == std::string("arch/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/generic/%s\"\n", j, includes.size(), tmp.c_str(), includes[j].c_str()); + if (tmp == std::string("arch/generic/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/arm/%s\"\n", j, includes.size(), tmp.c_str(), includes[j].c_str()); + if (tmp == std::string("arch/arm/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"arch/x86/%s\"\n", j, includes.size(), tmp.c_str(), includes[j].c_str()); + if (tmp == std::string("arch/x86/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + printf("[%zd/%zd] Comparing dependency \"%s\" and \"test/%s\"\n", j, includes.size(), tmp.c_str(), includes[j].c_str()); + if (tmp == std::string("test/").append(includes[j])) { + printf("Dependency %s OK\n", files[i].c_str()); + found = 1; + break; + } + } + } + // Skip indirect dependencies + if (tmp.find("arm_features.h") != std::string::npos + && std::find(includes.begin(), includes.end(), "cpu_features.h") != includes.end() + && (makefile.find(".arm") != std::string::npos + || makefile.find(".a64") != std::string::npos)) found = 1; + if (tmp.find("x86_features.h") != std::string::npos + && std::find(includes.begin(), includes.end(), "cpu_features.h") != includes.end() + && makefile.find(".msc") != std::string::npos) found = 1; + // + if (tmp.find("generic_functions.h") != std::string::npos + && std::find(includes.begin(), includes.end(), "arch_functions.h") != includes.end()) found = 1; + if (tmp.find("arm_functions.h") != std::string::npos + && std::find(includes.begin(), includes.end(), "arch_functions.h") != includes.end() + && (makefile.find(".arm") != std::string::npos + || makefile.find(".a64") != std::string::npos)) found = 1; + if (tmp.find("x86_functions.h") != std::string::npos + && std::find(includes.begin(), includes.end(), "arch_functions.h") != includes.end() + && makefile.find(".msc") != std::string::npos) found = 1; + if (found == 0) { + printf("%s: Dependency %s not needed for %s\n", makefile.c_str(), files[i].c_str(), objfile.c_str()); + return -1; + } + } + fb2.close(); + } + } + } + } + fb.close(); + } + return 0; +} diff --git a/3rdparty/zlib-ng/win32/replace.vbs b/3rdparty/zlib-ng/win32/replace.vbs new file mode 100644 index 0000000000..6779971d07 --- /dev/null +++ b/3rdparty/zlib-ng/win32/replace.vbs @@ -0,0 +1,15 @@ +strInputFileName = Wscript.Arguments(0) +strOutputFileName = Wscript.Arguments(1) +strOldText = Wscript.Arguments(2) +strNewText = Wscript.Arguments(3) + +Set objFSO = CreateObject("Scripting.FileSystemObject") +Set objFile = objFSO.OpenTextFile(strInputFileName, 1) + +strText = objFile.ReadAll +objFile.Close +strNewText = Replace(strText, strOldText, strNewText) + +Set objFile = objFSO.OpenTextFile(strOutputFileName, 2, True) +objFile.Write strNewText +objFile.Close diff --git a/3rdparty/zlib-ng/win32/zlib-ng.def.in b/3rdparty/zlib-ng/win32/zlib-ng.def.in new file mode 100644 index 0000000000..53b2bc21f7 --- /dev/null +++ b/3rdparty/zlib-ng/win32/zlib-ng.def.in @@ -0,0 +1,60 @@ +; zlib-ng data compression library +EXPORTS +; basic functions + @ZLIB_SYMBOL_PREFIX@zlibng_version + @ZLIB_SYMBOL_PREFIX@zng_deflate + @ZLIB_SYMBOL_PREFIX@zng_deflateEnd + @ZLIB_SYMBOL_PREFIX@zng_deflateInit + @ZLIB_SYMBOL_PREFIX@zng_deflateInit2 + @ZLIB_SYMBOL_PREFIX@zng_inflate + @ZLIB_SYMBOL_PREFIX@zng_inflateEnd + @ZLIB_SYMBOL_PREFIX@zng_inflateInit + @ZLIB_SYMBOL_PREFIX@zng_inflateInit2 + @ZLIB_SYMBOL_PREFIX@zng_inflateBackInit +; advanced functions + @ZLIB_SYMBOL_PREFIX@zng_deflateSetDictionary + @ZLIB_SYMBOL_PREFIX@zng_deflateGetDictionary + @ZLIB_SYMBOL_PREFIX@zng_deflateCopy + @ZLIB_SYMBOL_PREFIX@zng_deflateReset + @ZLIB_SYMBOL_PREFIX@zng_deflateParams + @ZLIB_SYMBOL_PREFIX@zng_deflateTune + @ZLIB_SYMBOL_PREFIX@zng_deflateBound + @ZLIB_SYMBOL_PREFIX@zng_deflatePending + @ZLIB_SYMBOL_PREFIX@zng_deflatePrime + @ZLIB_SYMBOL_PREFIX@zng_deflateSetHeader + @ZLIB_SYMBOL_PREFIX@zng_deflateSetParams + @ZLIB_SYMBOL_PREFIX@zng_deflateGetParams + @ZLIB_SYMBOL_PREFIX@zng_inflateSetDictionary + @ZLIB_SYMBOL_PREFIX@zng_inflateGetDictionary + @ZLIB_SYMBOL_PREFIX@zng_inflateSync + @ZLIB_SYMBOL_PREFIX@zng_inflateCopy + @ZLIB_SYMBOL_PREFIX@zng_inflateReset + @ZLIB_SYMBOL_PREFIX@zng_inflateReset2 + @ZLIB_SYMBOL_PREFIX@zng_inflatePrime + @ZLIB_SYMBOL_PREFIX@zng_inflateMark + @ZLIB_SYMBOL_PREFIX@zng_inflateGetHeader + @ZLIB_SYMBOL_PREFIX@zng_inflateBack + @ZLIB_SYMBOL_PREFIX@zng_inflateBackEnd + @ZLIB_SYMBOL_PREFIX@zng_zlibCompileFlags +; utility functions + @ZLIB_SYMBOL_PREFIX@zng_compress + @ZLIB_SYMBOL_PREFIX@zng_compress2 + @ZLIB_SYMBOL_PREFIX@zng_compressBound + @ZLIB_SYMBOL_PREFIX@zng_uncompress + @ZLIB_SYMBOL_PREFIX@zng_uncompress2 +; checksum functions + @ZLIB_SYMBOL_PREFIX@zng_adler32 + @ZLIB_SYMBOL_PREFIX@zng_adler32_z + @ZLIB_SYMBOL_PREFIX@zng_crc32 + @ZLIB_SYMBOL_PREFIX@zng_crc32_z + @ZLIB_SYMBOL_PREFIX@zng_adler32_combine + @ZLIB_SYMBOL_PREFIX@zng_crc32_combine +; various hacks, don't look :) + @ZLIB_SYMBOL_PREFIX@zng_zError + @ZLIB_SYMBOL_PREFIX@zng_inflateSyncPoint + @ZLIB_SYMBOL_PREFIX@zng_get_crc_table + @ZLIB_SYMBOL_PREFIX@zng_inflateUndermine + @ZLIB_SYMBOL_PREFIX@zng_inflateValidate + @ZLIB_SYMBOL_PREFIX@zng_inflateCodesUsed + @ZLIB_SYMBOL_PREFIX@zng_inflateResetKeep + @ZLIB_SYMBOL_PREFIX@zng_deflateResetKeep diff --git a/3rdparty/zlib-ng/win32/zlib-ng1.rc b/3rdparty/zlib-ng/win32/zlib-ng1.rc new file mode 100644 index 0000000000..f65cfa254e --- /dev/null +++ b/3rdparty/zlib-ng/win32/zlib-ng1.rc @@ -0,0 +1,36 @@ +#include +#include "zlib-ng.h" + +VS_VERSION_INFO VERSIONINFO + FILEVERSION ZLIBNG_VER_MAJOR,ZLIBNG_VER_MINOR,ZLIBNG_VER_REVISION,0 + PRODUCTVERSION ZLIBNG_VER_MAJOR,ZLIBNG_VER_MINOR,ZLIBNG_VER_REVISION,0 + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +#ifdef _DEBUG + FILEFLAGS 1 +#else + FILEFLAGS 0 +#endif + FILEOS VOS__WINDOWS32 + FILETYPE VFT_DLL + FILESUBTYPE 0 // not used +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904E4" + //language ID = U.S. English, char set = Windows, Multilingual + BEGIN + VALUE "FileDescription", "zlib data compression library\0" + VALUE "FileVersion", ZLIBNG_VERSION "\0" + VALUE "InternalName", "zlib-ng1.dll\0" + VALUE "LegalCopyright", "(C) 1995-2024 Jean-loup Gailly & Mark Adler\0" + VALUE "OriginalFilename", "zlib-ng1.dll\0" + VALUE "ProductName", "zlib\0" + VALUE "ProductVersion", ZLIBNG_VERSION "\0" + VALUE "Comments", "For more information visit https://www.zlib.net/\0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x0409, 1252 + END +END diff --git a/3rdparty/zlib-ng/win32/zlib.def.in b/3rdparty/zlib-ng/win32/zlib.def.in new file mode 100644 index 0000000000..561a42f7f8 --- /dev/null +++ b/3rdparty/zlib-ng/win32/zlib.def.in @@ -0,0 +1,64 @@ +; zlib data compression library +EXPORTS +; basic functions + @ZLIB_SYMBOL_PREFIX@zlibVersion + @ZLIB_SYMBOL_PREFIX@deflate + @ZLIB_SYMBOL_PREFIX@deflateEnd + @ZLIB_SYMBOL_PREFIX@inflate + @ZLIB_SYMBOL_PREFIX@inflateEnd +; advanced functions + @ZLIB_SYMBOL_PREFIX@deflateSetDictionary + @ZLIB_SYMBOL_PREFIX@deflateGetDictionary + @ZLIB_SYMBOL_PREFIX@deflateCopy + @ZLIB_SYMBOL_PREFIX@deflateReset + @ZLIB_SYMBOL_PREFIX@deflateParams + @ZLIB_SYMBOL_PREFIX@deflateTune + @ZLIB_SYMBOL_PREFIX@deflateBound + @ZLIB_SYMBOL_PREFIX@deflatePending + @ZLIB_SYMBOL_PREFIX@deflatePrime + @ZLIB_SYMBOL_PREFIX@deflateSetHeader + @ZLIB_SYMBOL_PREFIX@inflateSetDictionary + @ZLIB_SYMBOL_PREFIX@inflateGetDictionary + @ZLIB_SYMBOL_PREFIX@inflateSync + @ZLIB_SYMBOL_PREFIX@inflateCopy + @ZLIB_SYMBOL_PREFIX@inflateReset + @ZLIB_SYMBOL_PREFIX@inflateReset2 + @ZLIB_SYMBOL_PREFIX@inflatePrime + @ZLIB_SYMBOL_PREFIX@inflateMark + @ZLIB_SYMBOL_PREFIX@inflateGetHeader + @ZLIB_SYMBOL_PREFIX@inflateBack + @ZLIB_SYMBOL_PREFIX@inflateBackEnd + @ZLIB_SYMBOL_PREFIX@zlibCompileFlags +; utility functions + @ZLIB_SYMBOL_PREFIX@compress + @ZLIB_SYMBOL_PREFIX@compress2 + @ZLIB_SYMBOL_PREFIX@compressBound + @ZLIB_SYMBOL_PREFIX@uncompress + @ZLIB_SYMBOL_PREFIX@uncompress2 +; large file functions + @ZLIB_SYMBOL_PREFIX@adler32_combine64 + @ZLIB_SYMBOL_PREFIX@crc32_combine64 + @ZLIB_SYMBOL_PREFIX@crc32_combine_gen64 +; checksum functions + @ZLIB_SYMBOL_PREFIX@adler32 + @ZLIB_SYMBOL_PREFIX@adler32_z + @ZLIB_SYMBOL_PREFIX@crc32 + @ZLIB_SYMBOL_PREFIX@crc32_z + @ZLIB_SYMBOL_PREFIX@adler32_combine + @ZLIB_SYMBOL_PREFIX@crc32_combine + @ZLIB_SYMBOL_PREFIX@crc32_combine_gen + @ZLIB_SYMBOL_PREFIX@crc32_combine_op +; various hacks, don't look :) + @ZLIB_SYMBOL_PREFIX@deflateInit_ + @ZLIB_SYMBOL_PREFIX@deflateInit2_ + @ZLIB_SYMBOL_PREFIX@inflateInit_ + @ZLIB_SYMBOL_PREFIX@inflateInit2_ + @ZLIB_SYMBOL_PREFIX@inflateBackInit_ + @ZLIB_SYMBOL_PREFIX@zError + @ZLIB_SYMBOL_PREFIX@inflateSyncPoint + @ZLIB_SYMBOL_PREFIX@get_crc_table + @ZLIB_SYMBOL_PREFIX@inflateUndermine + @ZLIB_SYMBOL_PREFIX@inflateValidate + @ZLIB_SYMBOL_PREFIX@inflateCodesUsed + @ZLIB_SYMBOL_PREFIX@inflateResetKeep + @ZLIB_SYMBOL_PREFIX@deflateResetKeep diff --git a/3rdparty/zlib-ng/win32/zlib1.rc b/3rdparty/zlib-ng/win32/zlib1.rc new file mode 100644 index 0000000000..9bb9c18654 --- /dev/null +++ b/3rdparty/zlib-ng/win32/zlib1.rc @@ -0,0 +1,36 @@ +#include +#include "zlib.h" + +VS_VERSION_INFO VERSIONINFO + FILEVERSION ZLIB_VER_MAJOR,ZLIB_VER_MINOR,ZLIB_VER_REVISION,0 + PRODUCTVERSION ZLIB_VER_MAJOR,ZLIB_VER_MINOR,ZLIB_VER_REVISION,0 + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +#ifdef _DEBUG + FILEFLAGS 1 +#else + FILEFLAGS 0 +#endif + FILEOS VOS__WINDOWS32 + FILETYPE VFT_DLL + FILESUBTYPE 0 // not used +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904E4" + //language ID = U.S. English, char set = Windows, Multilingual + BEGIN + VALUE "FileDescription", "zlib data compression library\0" + VALUE "FileVersion", ZLIB_VERSION "\0" + VALUE "InternalName", "zlib1.dll\0" + VALUE "LegalCopyright", "(C) 1995-2024 Jean-loup Gailly & Mark Adler\0" + VALUE "OriginalFilename", "zlib1.dll\0" + VALUE "ProductName", "zlib\0" + VALUE "ProductVersion", ZLIB_VERSION "\0" + VALUE "Comments", "For more information visit https://www.zlib.net/\0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x0409, 1252 + END +END diff --git a/3rdparty/zlib-ng/win32/zlibcompat.def.in b/3rdparty/zlib-ng/win32/zlibcompat.def.in new file mode 100644 index 0000000000..52a713cf03 --- /dev/null +++ b/3rdparty/zlib-ng/win32/zlibcompat.def.in @@ -0,0 +1,97 @@ +; zlib data compression library +EXPORTS +; basic functions + @ZLIB_SYMBOL_PREFIX@zlibVersion + @ZLIB_SYMBOL_PREFIX@deflate + @ZLIB_SYMBOL_PREFIX@deflateEnd + @ZLIB_SYMBOL_PREFIX@inflate + @ZLIB_SYMBOL_PREFIX@inflateEnd +; advanced functions + @ZLIB_SYMBOL_PREFIX@deflateSetDictionary + @ZLIB_SYMBOL_PREFIX@deflateGetDictionary + @ZLIB_SYMBOL_PREFIX@deflateCopy + @ZLIB_SYMBOL_PREFIX@deflateReset + @ZLIB_SYMBOL_PREFIX@deflateParams + @ZLIB_SYMBOL_PREFIX@deflateTune + @ZLIB_SYMBOL_PREFIX@deflateBound + @ZLIB_SYMBOL_PREFIX@deflatePending + @ZLIB_SYMBOL_PREFIX@deflatePrime + @ZLIB_SYMBOL_PREFIX@deflateSetHeader + @ZLIB_SYMBOL_PREFIX@inflateSetDictionary + @ZLIB_SYMBOL_PREFIX@inflateGetDictionary + @ZLIB_SYMBOL_PREFIX@inflateSync + @ZLIB_SYMBOL_PREFIX@inflateCopy + @ZLIB_SYMBOL_PREFIX@inflateReset + @ZLIB_SYMBOL_PREFIX@inflateReset2 + @ZLIB_SYMBOL_PREFIX@inflatePrime + @ZLIB_SYMBOL_PREFIX@inflateMark + @ZLIB_SYMBOL_PREFIX@inflateGetHeader + @ZLIB_SYMBOL_PREFIX@inflateBack + @ZLIB_SYMBOL_PREFIX@inflateBackEnd + @ZLIB_SYMBOL_PREFIX@zlibCompileFlags +; utility functions + @ZLIB_SYMBOL_PREFIX@compress + @ZLIB_SYMBOL_PREFIX@compress2 + @ZLIB_SYMBOL_PREFIX@compressBound + @ZLIB_SYMBOL_PREFIX@uncompress + @ZLIB_SYMBOL_PREFIX@uncompress2 + @ZLIB_SYMBOL_PREFIX@gzopen + @ZLIB_SYMBOL_PREFIX@gzdopen + @ZLIB_SYMBOL_PREFIX@gzbuffer + @ZLIB_SYMBOL_PREFIX@gzsetparams + @ZLIB_SYMBOL_PREFIX@gzread + @ZLIB_SYMBOL_PREFIX@gzfread + @ZLIB_SYMBOL_PREFIX@gzwrite + @ZLIB_SYMBOL_PREFIX@gzfwrite + @ZLIB_SYMBOL_PREFIX@gzprintf + @ZLIB_SYMBOL_PREFIX@gzvprintf + @ZLIB_SYMBOL_PREFIX@gzputs + @ZLIB_SYMBOL_PREFIX@gzgets + @ZLIB_SYMBOL_PREFIX@gzputc + @ZLIB_SYMBOL_PREFIX@gzgetc + @ZLIB_SYMBOL_PREFIX@gzungetc + @ZLIB_SYMBOL_PREFIX@gzflush + @ZLIB_SYMBOL_PREFIX@gzseek + @ZLIB_SYMBOL_PREFIX@gzrewind + @ZLIB_SYMBOL_PREFIX@gztell + @ZLIB_SYMBOL_PREFIX@gzoffset + @ZLIB_SYMBOL_PREFIX@gzeof + @ZLIB_SYMBOL_PREFIX@gzdirect + @ZLIB_SYMBOL_PREFIX@gzclose + @ZLIB_SYMBOL_PREFIX@gzclose_r + @ZLIB_SYMBOL_PREFIX@gzclose_w + @ZLIB_SYMBOL_PREFIX@gzerror + @ZLIB_SYMBOL_PREFIX@gzclearerr +; large file functions + @ZLIB_SYMBOL_PREFIX@gzopen64 + @ZLIB_SYMBOL_PREFIX@gzseek64 + @ZLIB_SYMBOL_PREFIX@gztell64 + @ZLIB_SYMBOL_PREFIX@gzoffset64 + @ZLIB_SYMBOL_PREFIX@adler32_combine64 + @ZLIB_SYMBOL_PREFIX@crc32_combine64 + @ZLIB_SYMBOL_PREFIX@crc32_combine_gen64 +; checksum functions + @ZLIB_SYMBOL_PREFIX@adler32 + @ZLIB_SYMBOL_PREFIX@adler32_z + @ZLIB_SYMBOL_PREFIX@crc32 + @ZLIB_SYMBOL_PREFIX@crc32_z + @ZLIB_SYMBOL_PREFIX@adler32_combine + @ZLIB_SYMBOL_PREFIX@crc32_combine + @ZLIB_SYMBOL_PREFIX@crc32_combine_gen + @ZLIB_SYMBOL_PREFIX@crc32_combine_op +; various hacks, don't look :) + @ZLIB_SYMBOL_PREFIX@deflateInit_ + @ZLIB_SYMBOL_PREFIX@deflateInit2_ + @ZLIB_SYMBOL_PREFIX@inflateInit_ + @ZLIB_SYMBOL_PREFIX@inflateInit2_ + @ZLIB_SYMBOL_PREFIX@inflateBackInit_ + @ZLIB_SYMBOL_PREFIX@gzgetc_ + @ZLIB_SYMBOL_PREFIX@zError + @ZLIB_SYMBOL_PREFIX@inflateSyncPoint + @ZLIB_SYMBOL_PREFIX@get_crc_table + @ZLIB_SYMBOL_PREFIX@inflateUndermine + @ZLIB_SYMBOL_PREFIX@inflateValidate + @ZLIB_SYMBOL_PREFIX@inflateCodesUsed + @ZLIB_SYMBOL_PREFIX@inflateResetKeep + @ZLIB_SYMBOL_PREFIX@deflateResetKeep + @ZLIB_SYMBOL_PREFIX@gzopen_w diff --git a/3rdparty/zlib-ng/zbuild.h b/3rdparty/zlib-ng/zbuild.h index d550b4c582..9157eef9e3 100644 --- a/3rdparty/zlib-ng/zbuild.h +++ b/3rdparty/zlib-ng/zbuild.h @@ -202,6 +202,24 @@ # define ALIGNED_(x) __declspec(align(x)) #endif +#ifdef HAVE_BUILTIN_ASSUME_ALIGNED +# define HINT_ALIGNED(p,n) __builtin_assume_aligned((void *)(p),(n)) +#else +# define HINT_ALIGNED(p,n) (p) +#endif +#define HINT_ALIGNED_16(p) HINT_ALIGNED((p),16) +#define HINT_ALIGNED_64(p) HINT_ALIGNED((p),64) +#define HINT_ALIGNED_4096(p) HINT_ALIGNED((p),4096) + +/* PADSZ returns needed bytes to pad bpos to pad size + * PAD_NN calculates pad size and adds it to bpos, returning the result. + * All take an integer or a pointer as bpos input. + */ +#define PADSZ(bpos, pad) (((pad) - ((uintptr_t)(bpos) % (pad))) % (pad)) +#define PAD_16(bpos) ((bpos) + PADSZ((bpos),16)) +#define PAD_64(bpos) ((bpos) + PADSZ((bpos),64)) +#define PAD_4096(bpos) ((bpos) + PADSZ((bpos),4096)) + /* Diagnostic functions */ #ifdef ZLIB_DEBUG # include @@ -246,6 +264,31 @@ # endif #endif +#if defined(__has_feature) +# if __has_feature(address_sanitizer) +# define Z_ADDRESS_SANITIZER 1 +# endif +#elif defined(__SANITIZE_ADDRESS__) +# define Z_ADDRESS_SANITIZER 1 +#endif + +/* + * __asan_loadN() and __asan_storeN() calls are inserted by compilers in order to check memory accesses. + * They can be called manually too, with the following caveats: + * gcc says: "warning: implicit declaration of function ‘...’" + * g++ says: "error: new declaration ‘...’ ambiguates built-in declaration ‘...’" + * Accommodate both. + */ +#ifdef Z_ADDRESS_SANITIZER +#ifndef __cplusplus +void __asan_loadN(void *, long); +void __asan_storeN(void *, long); +#endif +#else +# define __asan_loadN(a, size) do { Z_UNUSED(a); Z_UNUSED(size); } while (0) +# define __asan_storeN(a, size) do { Z_UNUSED(a); Z_UNUSED(size); } while (0) +#endif + #if defined(__has_feature) # if __has_feature(memory_sanitizer) # define Z_MEMORY_SANITIZER 1 @@ -254,7 +297,31 @@ #endif #ifndef Z_MEMORY_SANITIZER +# define __msan_check_mem_is_initialized(a, size) do { Z_UNUSED(a); Z_UNUSED(size); } while (0) # define __msan_unpoison(a, size) do { Z_UNUSED(a); Z_UNUSED(size); } while (0) #endif +/* Notify sanitizer runtime about an upcoming read access. */ +#define instrument_read(a, size) do { \ + void *__a = (void *)(a); \ + long __size = size; \ + __asan_loadN(__a, __size); \ + __msan_check_mem_is_initialized(__a, __size); \ +} while (0) + +/* Notify sanitizer runtime about an upcoming write access. */ +#define instrument_write(a, size) do { \ + void *__a = (void *)(a); \ + long __size = size; \ + __asan_storeN(__a, __size); \ +} while (0) + +/* Notify sanitizer runtime about an upcoming read/write access. */ +#define instrument_read_write(a, size) do { \ + void *__a = (void *)(a); \ + long __size = size; \ + __asan_storeN(__a, __size); \ + __msan_check_mem_is_initialized(__a, __size); \ +} while (0) + #endif diff --git a/3rdparty/zlib-ng/zconf-ng.h.in b/3rdparty/zlib-ng/zconf-ng.h.in new file mode 100644 index 0000000000..a1b5311b85 --- /dev/null +++ b/3rdparty/zlib-ng/zconf-ng.h.in @@ -0,0 +1,176 @@ +/* zconf-ng.h -- configuration of the zlib-ng compression library + * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef ZCONFNG_H +#define ZCONFNG_H + +#include "zlib_name_mangling-ng.h" + +#if !defined(_WIN32) && defined(__WIN32__) +# define _WIN32 +#endif + +/* Clang macro for detecting declspec support + * https://clang.llvm.org/docs/LanguageExtensions.html#has-declspec-attribute + */ +#ifndef __has_declspec_attribute +# define __has_declspec_attribute(x) 0 +#endif + +/* Always define z_const as const */ +#define z_const const + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# define MAX_MEM_LEVEL 9 +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MIN_WBITS +# define MIN_WBITS 8 /* 256 LZ77 window */ +#endif +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus about 7 kilobytes + for small objects. +*/ + +/* Type declarations */ + +#ifdef ZLIB_INTERNAL +# define Z_INTERNAL ZLIB_INTERNAL +#endif + +/* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +#if defined(ZLIB_DLL) && (defined(_WIN32) || (__has_declspec_attribute(dllexport) && __has_declspec_attribute(dllimport))) +# ifdef Z_INTERNAL +# define Z_EXTERN extern __declspec(dllexport) +# else +# define Z_EXTERN extern __declspec(dllimport) +# endif +#endif + +/* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +#if defined(ZLIB_WINAPI) && defined(_WIN32) +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define Z_EXPORT WINAPI +# define Z_EXPORTVA WINAPIV +#endif + +#ifndef Z_EXTERN +# define Z_EXTERN extern +#endif +#ifndef Z_EXPORT +# define Z_EXPORT +#endif +#ifndef Z_EXPORTVA +# define Z_EXPORTVA +#endif + +/* Conditional exports */ +#define ZNG_CONDEXPORT Z_EXPORT + +/* Fallback for something that includes us. */ +typedef unsigned char Byte; +typedef Byte Bytef; + +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +typedef char charf; +typedef int intf; +typedef uInt uIntf; +typedef uLong uLongf; + +typedef void const *voidpc; +typedef void *voidpf; +typedef void *voidp; + +#ifdef HAVE_UNISTD_H /* may be set to #if 1 by configure/cmake/etc */ +# define Z_HAVE_UNISTD_H +#endif + +#ifdef NEED_PTRDIFF_T /* may be set to #if 1 by configure/cmake/etc */ +typedef PTRDIFF_TYPE ptrdiff_t; +#endif + +#include /* for off_t */ + +#include /* for wchar_t and NULL */ + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE) +# include /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifndef z_off_t +# define z_off_t off_t +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && defined(WITH_GZFILEOP) +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif + +#ifndef z_off_t +# define z_off_t long +#endif + +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(__MSYS__) +# define z_off64_t _off64_t +# elif defined(_WIN32) && !defined(__GNUC__) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t +# endif +#endif + +#endif /* ZCONFNG_H */ diff --git a/3rdparty/zlib-ng/zconf.h.in b/3rdparty/zlib-ng/zconf.h.in index 7a6e281e84..be8221fd86 100644 --- a/3rdparty/zlib-ng/zconf.h.in +++ b/3rdparty/zlib-ng/zconf.h.in @@ -1,5 +1,5 @@ /* zconf.h -- configuration of the zlib compression library - * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ diff --git a/3rdparty/zlib-ng/zlib.h.in b/3rdparty/zlib-ng/zlib.h.in index eabb94afe0..3dceaa3344 100644 --- a/3rdparty/zlib-ng/zlib.h.in +++ b/3rdparty/zlib-ng/zlib.h.in @@ -1,9 +1,9 @@ #ifndef ZLIB_H_ #define ZLIB_H_ /* zlib.h -- interface of the 'zlib-ng' compression library - Forked from and compatible with zlib 1.2.13 + Forked from and compatible with zlib 1.3.1 - Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler + Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -49,20 +49,20 @@ extern "C" { #endif -#define ZLIBNG_VERSION "2.1.6" -#define ZLIBNG_VERNUM 0x020106F0L /* MMNNRRSM: major minor revision status modified */ +#define ZLIBNG_VERSION "2.2.1" +#define ZLIBNG_VERNUM 0x020201F0L /* MMNNRRSM: major minor revision status modified */ #define ZLIBNG_VER_MAJOR 2 -#define ZLIBNG_VER_MINOR 1 -#define ZLIBNG_VER_REVISION 6 +#define ZLIBNG_VER_MINOR 2 +#define ZLIBNG_VER_REVISION 1 #define ZLIBNG_VER_STATUS F /* 0=devel, 1-E=beta, F=Release (DEPRECATED) */ #define ZLIBNG_VER_STATUSH 0xF /* Hex values: 0=devel, 1-E=beta, F=Release */ #define ZLIBNG_VER_MODIFIED 0 /* non-zero if modified externally from zlib-ng */ -#define ZLIB_VERSION "1.3.0.zlib-ng" -#define ZLIB_VERNUM 0x130f +#define ZLIB_VERSION "1.3.1.zlib-ng" +#define ZLIB_VERNUM 0x131f #define ZLIB_VER_MAJOR 1 #define ZLIB_VER_MINOR 3 -#define ZLIB_VER_REVISION 0 +#define ZLIB_VER_REVISION 1 #define ZLIB_VER_SUBREVISION 15 /* 15=fork (0xf) */ /* @@ -220,7 +220,7 @@ typedef gz_header *gz_headerp; #define Z_DEFLATED 8 /* The deflate compression method (the only one supported in this version) */ -#define Z_NULL NULL /* for compatibility with zlib, was for initializing zalloc, zfree, opaque */ +#define Z_NULL 0 /* for compatibility with zlib, was for initializing zalloc, zfree, opaque */ #define zlib_version zlibVersion() /* for compatibility with versions < 1.0.2 */ @@ -1732,14 +1732,14 @@ Z_EXTERN unsigned long Z_EXPORT crc32_combine(unsigned long crc1, unsigned long seq1 and seq2 with lengths len1 and len2, CRC-32 check values were calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and - len2. + len2. len2 must be non-negative. */ /* Z_EXTERN unsigned long Z_EXPORT crc32_combine_gen(z_off_t len2); Return the operator corresponding to length len2, to be used with - crc32_combine_op(). + crc32_combine_op(). len2 must be non-negative. */ Z_EXTERN unsigned long Z_EXPORT crc32_combine_op(unsigned long crc1, unsigned long crc2, diff --git a/3rdparty/zlib-ng/zlib.map b/3rdparty/zlib-ng/zlib.map new file mode 100644 index 0000000000..293e803729 --- /dev/null +++ b/3rdparty/zlib-ng/zlib.map @@ -0,0 +1,98 @@ +ZLIB_1.2.0 { + global: + compressBound; + deflateBound; + inflateBack; + inflateBackEnd; + inflateBackInit_; + inflateCopy; + local: + deflate_copyright; + inflate_copyright; + zcalloc; + zcfree; + z_errmsg; + gz_error; + gz_intmax; + _*; +}; + +ZLIB_1.2.0.2 { + gzclearerr; + gzungetc; + zlibCompileFlags; +} ZLIB_1.2.0; + +ZLIB_1.2.0.8 { + deflatePrime; +} ZLIB_1.2.0.2; + +ZLIB_1.2.2 { + adler32_combine; + crc32_combine; + deflateSetHeader; + inflateGetHeader; +} ZLIB_1.2.0.8; + +ZLIB_1.2.2.3 { + deflateTune; + gzdirect; +} ZLIB_1.2.2; + +ZLIB_1.2.2.4 { + inflatePrime; +} ZLIB_1.2.2.3; + +ZLIB_1.2.3.3 { + adler32_combine64; + crc32_combine64; + gzopen64; + gzseek64; + gztell64; + inflateUndermine; +} ZLIB_1.2.2.4; + +ZLIB_1.2.3.4 { + inflateReset2; + inflateMark; +} ZLIB_1.2.3.3; + +ZLIB_1.2.3.5 { + gzbuffer; + gzoffset; + gzoffset64; + gzclose_r; + gzclose_w; +} ZLIB_1.2.3.4; + +ZLIB_1.2.5.1 { + deflatePending; +} ZLIB_1.2.3.5; + +ZLIB_1.2.5.2 { + deflateResetKeep; + gzgetc_; + inflateResetKeep; +} ZLIB_1.2.5.1; + +ZLIB_1.2.7.1 { + inflateGetDictionary; + gzvprintf; +} ZLIB_1.2.5.2; + +ZLIB_1.2.9 { + inflateCodesUsed; + inflateValidate; + uncompress2; + gzfread; + gzfwrite; + deflateGetDictionary; + adler32_z; + crc32_z; +} ZLIB_1.2.7.1; + +ZLIB_1.2.12 { + crc32_combine_gen; + crc32_combine_gen64; + crc32_combine_op; +} ZLIB_1.2.9; diff --git a/3rdparty/zlib-ng/zutil.c b/3rdparty/zlib-ng/zutil.c index 270a28c742..39fbceb4a0 100644 --- a/3rdparty/zlib-ng/zutil.c +++ b/3rdparty/zlib-ng/zutil.c @@ -21,7 +21,7 @@ z_const char * const PREFIX(z_errmsg)[10] = { }; const char PREFIX3(vstring)[] = - " zlib-ng 2.1.6"; + " zlib-ng 2.2.1"; #ifdef ZLIB_COMPAT const char * Z_EXPORT zlibVersion(void) { @@ -109,51 +109,3 @@ void Z_INTERNAL PREFIX(zcfree)(void *opaque, void *ptr) { Z_UNUSED(opaque); zng_free(ptr); } - -/* Since we support custom memory allocators, some which might not align memory as we expect, - * we have to ask for extra memory and return an aligned pointer. */ -void Z_INTERNAL *PREFIX3(alloc_aligned)(zng_calloc_func zalloc, void *opaque, unsigned items, unsigned size, unsigned align) { - uintptr_t return_ptr, original_ptr; - uint32_t alloc_size, align_diff; - void *ptr; - - /* If no custom calloc function used then call zlib-ng's aligned calloc */ - if (zalloc == PREFIX(zcalloc)) - return PREFIX(zcalloc)(opaque, items, size); - - /* Allocate enough memory for proper alignment and to store the original memory pointer */ - alloc_size = sizeof(void *) + (items * size) + align; - ptr = zalloc(opaque, 1, alloc_size); - if (!ptr) - return NULL; - - /* Calculate return pointer address with space enough to store original pointer */ - align_diff = align - ((uintptr_t)ptr % align); - return_ptr = (uintptr_t)ptr + align_diff; - if (align_diff < sizeof(void *)) - return_ptr += align; - - /* Store the original pointer for free() */ - original_ptr = return_ptr - sizeof(void *); - memcpy((void *)original_ptr, &ptr, sizeof(void *)); - - /* Return properly aligned pointer in allocation */ - return (void *)return_ptr; -} - -void Z_INTERNAL PREFIX3(free_aligned)(zng_cfree_func zfree, void *opaque, void *ptr) { - /* If no custom cfree function used then call zlib-ng's aligned cfree */ - if (zfree == PREFIX(zcfree)) { - PREFIX(zcfree)(opaque, ptr); - return; - } - if (!ptr) - return; - - /* Calculate offset to original memory allocation pointer */ - void *original_ptr = (void *)((uintptr_t)ptr - sizeof(void *)); - void *free_ptr = *(void **)original_ptr; - - /* Free original memory allocation */ - zfree(opaque, free_ptr); -} diff --git a/3rdparty/zlib-ng/zutil.h b/3rdparty/zlib-ng/zutil.h index 663616b44d..a6284502d0 100644 --- a/3rdparty/zlib-ng/zutil.h +++ b/3rdparty/zlib-ng/zutil.h @@ -1,7 +1,7 @@ #ifndef ZUTIL_H_ #define ZUTIL_H_ /* zutil.h -- internal interface and configuration of the compression library - * Copyright (C) 1995-2022 Jean-loup Gailly, Mark Adler + * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ @@ -24,7 +24,7 @@ typedef unsigned long ulg; extern z_const char * const PREFIX(z_errmsg)[10]; /* indexed by 2-zlib_error */ /* (size given to avoid silly warnings with Visual C++) */ -#define ERR_MSG(err) PREFIX(z_errmsg)[Z_NEED_DICT-(err)] +#define ERR_MSG(err) PREFIX(z_errmsg)[(err) < -6 || (err) > 2 ? 9 : 2 - (err)] #define ERR_RETURN(strm, err) return (strm->msg = ERR_MSG(err), (err)) /* To be used only when the state is known to be valid */ @@ -103,7 +103,7 @@ extern z_const char * const PREFIX(z_errmsg)[10]; /* indexed by 2-zlib_error */ # define OS_CODE 6 #endif -#if defined(MACOS) || defined(TARGET_OS_MAC) +#if defined(MACOS) # define OS_CODE 7 #endif @@ -137,12 +137,4 @@ void Z_INTERNAL PREFIX(zcfree)(void *opaque, void *ptr); typedef void *zng_calloc_func(void *opaque, unsigned items, unsigned size); typedef void zng_cfree_func(void *opaque, void *ptr); -void Z_INTERNAL *PREFIX3(alloc_aligned)(zng_calloc_func zalloc, void *opaque, unsigned items, unsigned size, unsigned align); -void Z_INTERNAL PREFIX3(free_aligned)(zng_cfree_func zfree, void *opaque, void *ptr); - -#define ZALLOC(strm, items, size) PREFIX3(alloc_aligned)((strm)->zalloc, (strm)->opaque, (items), (size), 64) -#define ZFREE(strm, addr) PREFIX3(free_aligned)((strm)->zfree, (strm)->opaque, (void *)(addr)) - -#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} - #endif /* ZUTIL_H_ */ diff --git a/3rdparty/zlib-ng/zutil_p.h b/3rdparty/zlib-ng/zutil_p.h index caec91d50d..97799f0ce3 100644 --- a/3rdparty/zlib-ng/zutil_p.h +++ b/3rdparty/zlib-ng/zutil_p.h @@ -16,15 +16,19 @@ /* Function to allocate 16 or 64-byte aligned memory */ static inline void *zng_alloc(size_t size) { -#ifdef HAVE_POSIX_MEMALIGN +#ifdef HAVE_ALIGNED_ALLOC + /* Size must be a multiple of alignment */ + size = (size + (64 - 1)) & ~(64 - 1); + return (void *)aligned_alloc(64, size); /* Defined in C11 */ +#elif defined(HAVE_POSIX_MEMALIGN) void *ptr; return posix_memalign(&ptr, 64, size) ? NULL : ptr; #elif defined(_WIN32) return (void *)_aligned_malloc(size, 64); #elif defined(__APPLE__) - return (void *)malloc(size); /* MacOS always aligns to 16 bytes */ -#elif defined(HAVE_ALIGNED_ALLOC) - return (void *)aligned_alloc(64, size); + /* Fallback for when posix_memalign and aligned_alloc are not available. + * On macOS, it always aligns to 16 bytes. */ + return (void *)malloc(size); #else return (void *)memalign(64, size); #endif