/* x86_functions.h -- x86 implementations for arch-specific functions. * Copyright (C) 2013 Intel Corporation Jim Kukunas * For conditions of distribution and use, see copyright notice in zlib.h */ #ifndef X86_FUNCTIONS_H_ #define X86_FUNCTIONS_H_ #ifdef X86_SSE2 uint32_t chunksize_sse2(void); uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left); # ifdef HAVE_BUILTIN_CTZ uint32_t compare256_sse2(const uint8_t *src0, const uint8_t *src1); uint32_t longest_match_sse2(deflate_state *const s, Pos cur_match); uint32_t longest_match_slow_sse2(deflate_state *const s, Pos cur_match); void slide_hash_sse2(deflate_state *s); # endif void inflate_fast_sse2(PREFIX3(stream)* strm, uint32_t start); #endif #ifdef X86_SSSE3 uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len); uint8_t* chunkmemset_safe_ssse3(uint8_t *out, unsigned dist, unsigned len, unsigned left); void inflate_fast_ssse3(PREFIX3(stream) *strm, uint32_t start); #endif #ifdef X86_SSE42 uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); #endif #ifdef X86_AVX2 uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); uint32_t chunksize_avx2(void); uint8_t* chunkmemset_safe_avx2(uint8_t *out, unsigned dist, unsigned len, unsigned left); # ifdef HAVE_BUILTIN_CTZ uint32_t compare256_avx2(const uint8_t *src0, const uint8_t *src1); uint32_t longest_match_avx2(deflate_state *const s, Pos cur_match); uint32_t longest_match_slow_avx2(deflate_state *const s, Pos cur_match); void slide_hash_avx2(deflate_state *s); # endif void inflate_fast_avx2(PREFIX3(stream)* strm, uint32_t start); #endif #ifdef X86_AVX512 uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); #endif #ifdef X86_AVX512VNNI uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len); uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len); #endif #ifdef X86_PCLMULQDQ_CRC uint32_t crc32_fold_pclmulqdq_reset(crc32_fold *crc); void crc32_fold_pclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); void crc32_fold_pclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); uint32_t crc32_fold_pclmulqdq_final(crc32_fold *crc); uint32_t crc32_pclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); #endif #ifdef X86_VPCLMULQDQ_CRC uint32_t crc32_fold_vpclmulqdq_reset(crc32_fold *crc); void crc32_fold_vpclmulqdq_copy(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len); void crc32_fold_vpclmulqdq(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc); uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc); uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len); #endif #ifdef DISABLE_RUNTIME_CPU_DETECTION // X86 - SSE2 # if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64) || defined(X86_NOCHECK_SSE2) # undef native_chunkmemset_safe # define native_chunkmemset_safe chunkmemset_safe_sse2 # undef native_chunksize # define native_chunksize chunksize_sse2 # undef native_inflate_fast # define native_inflate_fast inflate_fast_sse2 # undef native_slide_hash # define native_slide_hash slide_hash_sse2 # ifdef HAVE_BUILTIN_CTZ # undef native_compare256 # define native_compare256 compare256_sse2 # undef native_longest_match # define native_longest_match longest_match_sse2 # undef native_longest_match_slow # define native_longest_match_slow longest_match_slow_sse2 # endif #endif // X86 - SSSE3 # if defined(X86_SSSE3) && defined(__SSSE3__) # undef native_adler32 # define native_adler32 adler32_ssse3 # undef native_chunkmemset_safe # define native_chunkmemset_safe chunkmemset_safe_ssse3 # undef native_inflate_fast # define native_inflate_fast inflate_fast_ssse3 # endif // X86 - SSE4.2 # if defined(X86_SSE42) && defined(__SSE4_2__) # undef native_adler32_fold_copy # define native_adler32_fold_copy adler32_fold_copy_sse42 # endif // X86 - PCLMUL #if defined(X86_PCLMULQDQ_CRC) && defined(__PCLMUL__) # undef native_crc32 # define native_crc32 crc32_pclmulqdq # undef native_crc32_fold # define native_crc32_fold crc32_fold_pclmulqdq # undef native_crc32_fold_copy # define native_crc32_fold_copy crc32_fold_pclmulqdq_copy # undef native_crc32_fold_final # define native_crc32_fold_final crc32_fold_pclmulqdq_final # undef native_crc32_fold_reset # define native_crc32_fold_reset crc32_fold_pclmulqdq_reset #endif // X86 - AVX # if defined(X86_AVX2) && defined(__AVX2__) # undef native_adler32 # define native_adler32 adler32_avx2 # undef native_adler32_fold_copy # define native_adler32_fold_copy adler32_fold_copy_avx2 # undef native_chunkmemset_safe # define native_chunkmemset_safe chunkmemset_safe_avx2 # undef native_chunksize # define native_chunksize chunksize_avx2 # undef native_inflate_fast # define native_inflate_fast inflate_fast_avx2 # undef native_slide_hash # define native_slide_hash slide_hash_avx2 # ifdef HAVE_BUILTIN_CTZ # undef native_compare256 # define native_compare256 compare256_avx2 # undef native_longest_match # define native_longest_match longest_match_avx2 # undef native_longest_match_slow # define native_longest_match_slow longest_match_slow_avx2 # endif # endif // X86 - AVX512 (F,DQ,BW,Vl) # if defined(X86_AVX512) && defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__) # undef native_adler32 # define native_adler32 adler32_avx512 # undef native_adler32_fold_copy # define native_adler32_fold_copy adler32_fold_copy_avx512 // X86 - AVX512 (VNNI) # if defined(X86_AVX512VNNI) && defined(__AVX512VNNI__) # undef native_adler32 # define native_adler32 adler32_avx512_vnni # undef native_adler32_fold_copy # define native_adler32_fold_copy adler32_fold_copy_avx512_vnni # endif // X86 - VPCLMULQDQ # if defined(__PCLMUL__) && defined(__AVX512F__) && defined(__VPCLMULQDQ__) # undef native_crc32 # define native_crc32 crc32_vpclmulqdq # undef native_crc32_fold # define native_crc32_fold crc32_fold_vpclmulqdq # undef native_crc32_fold_copy # define native_crc32_fold_copy crc32_fold_vpclmulqdq_copy # undef native_crc32_fold_final # define native_crc32_fold_final crc32_fold_vpclmulqdq_final # undef native_crc32_fold_reset # define native_crc32_fold_reset crc32_fold_vpclmulqdq_reset # endif # endif #endif #endif /* X86_FUNCTIONS_H_ */