mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2025-01-20 07:25:05 +08:00
Merge pull request #3792 from stweil/avx512f
Add initial support for Intel AVX512F
This commit is contained in:
commit
864ab537bb
@ -167,6 +167,10 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86|x86_64|AMD64|amd64|i386|i686")
|
||||
set(AVX2_COMPILE_FLAGS "/arch:AVX2")
|
||||
add_definitions("-DHAVE_AVX2")
|
||||
|
||||
set(HAVE_AVX512F ON)
|
||||
set(AVX512_COMPILE_FLAGS "/arch:AVX512")
|
||||
add_definitions("-DHAVE_AVX512F")
|
||||
|
||||
set(HAVE_FMA ON)
|
||||
set(FMA_COMPILE_FLAGS "-D__FMA__")
|
||||
add_definitions("-DHAVE_FMA")
|
||||
@ -198,6 +202,12 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86|x86_64|AMD64|amd64|i386|i686")
|
||||
add_definitions("-DHAVE_AVX2")
|
||||
endif()
|
||||
|
||||
check_cxx_compiler_flag("-mavx512f" HAVE_AVX512F)
|
||||
if(HAVE_AVX512F)
|
||||
set(AVX512F_COMPILE_FLAGS "-mavx512f")
|
||||
add_definitions("-DHAVE_AVX512F")
|
||||
endif()
|
||||
|
||||
check_cxx_compiler_flag("-mfma" HAVE_FMA)
|
||||
if(HAVE_FMA)
|
||||
set(FMA_COMPILE_FLAGS "-mfma")
|
||||
@ -215,6 +225,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64.*|AARCH64.*")
|
||||
|
||||
set(HAVE_AVX FALSE)
|
||||
set(HAVE_AVX2 FALSE)
|
||||
set(HAVE_AVX512F FALSE)
|
||||
set(HAVE_FMA FALSE)
|
||||
set(HAVE_SSE4_1 FALSE)
|
||||
|
||||
@ -225,6 +236,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm.*")
|
||||
|
||||
set(HAVE_AVX FALSE)
|
||||
set(HAVE_AVX2 FALSE)
|
||||
set(HAVE_AVX512F FALSE)
|
||||
set(HAVE_FMA FALSE)
|
||||
set(HAVE_SSE4_1 FALSE)
|
||||
|
||||
@ -238,6 +250,7 @@ else()
|
||||
|
||||
set(HAVE_AVX FALSE)
|
||||
set(HAVE_AVX2 FALSE)
|
||||
set(HAVE_AVX512F FALSE)
|
||||
set(HAVE_FMA FALSE)
|
||||
set(HAVE_NEON FALSE)
|
||||
set(HAVE_SSE4_1 FALSE)
|
||||
@ -480,6 +493,7 @@ message(STATUS "Linker options: ${CMAKE_EXE_LINKER_FLAGS} "
|
||||
message(STATUS "Install directory: ${CMAKE_INSTALL_PREFIX}")
|
||||
message(STATUS "HAVE_AVX: ${HAVE_AVX}")
|
||||
message(STATUS "HAVE_AVX2: ${HAVE_AVX2}")
|
||||
message(STATUS "HAVE_AVX512F: ${HAVE_AVX512F}")
|
||||
message(STATUS "HAVE_FMA: ${HAVE_FMA}")
|
||||
message(STATUS "HAVE_SSE4_1: ${HAVE_SSE4_1}")
|
||||
message(STATUS "MARCH_NATIVE_OPT: ${MARCH_NATIVE_OPT}")
|
||||
@ -641,6 +655,11 @@ if(HAVE_AVX2)
|
||||
set_source_files_properties(src/arch/intsimdmatrixavx2.cpp
|
||||
PROPERTIES COMPILE_FLAGS ${AVX2_COMPILE_FLAGS})
|
||||
endif(HAVE_AVX2)
|
||||
if(HAVE_AVX512F)
|
||||
list(APPEND arch_files_opt src/arch/dotproductavx512.cpp)
|
||||
set_source_files_properties(src/arch/dotproductavx512.cpp
|
||||
PROPERTIES COMPILE_FLAGS ${AVX512F_COMPILE_FLAGS})
|
||||
endif(HAVE_AVX512F)
|
||||
if(HAVE_FMA)
|
||||
list(APPEND arch_files_opt src/arch/dotproductfma.cpp)
|
||||
set_source_files_properties(src/arch/dotproductfma.cpp
|
||||
|
@ -160,6 +160,14 @@ libtesseract_la_LIBADD += libtesseract_avx2.la
|
||||
noinst_LTLIBRARIES += libtesseract_avx2.la
|
||||
endif
|
||||
|
||||
if HAVE_AVX512F
|
||||
libtesseract_avx512_la_CXXFLAGS = -mavx512f
|
||||
libtesseract_avx512_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
|
||||
libtesseract_avx512_la_SOURCES = src/arch/dotproductavx512.cpp
|
||||
libtesseract_la_LIBADD += libtesseract_avx512.la
|
||||
noinst_LTLIBRARIES += libtesseract_avx512.la
|
||||
endif
|
||||
|
||||
if HAVE_FMA
|
||||
libtesseract_fma_la_CXXFLAGS = -mfma
|
||||
libtesseract_fma_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil
|
||||
|
@ -129,6 +129,7 @@ AX_CHECK_COMPILE_FLAG([-Werror=unused-command-line-argument], [WERROR=-Werror=un
|
||||
|
||||
AM_CONDITIONAL([HAVE_AVX], false)
|
||||
AM_CONDITIONAL([HAVE_AVX2], false)
|
||||
AM_CONDITIONAL([HAVE_AVX512F], false)
|
||||
AM_CONDITIONAL([HAVE_FMA], false)
|
||||
AM_CONDITIONAL([HAVE_SSE4_1], false)
|
||||
AM_CONDITIONAL([HAVE_NEON], false)
|
||||
@ -149,6 +150,12 @@ case "${host_cpu}" in
|
||||
AC_DEFINE([HAVE_AVX2], [1], [Enable AVX2 instructions])
|
||||
fi
|
||||
|
||||
AX_CHECK_COMPILE_FLAG([-mavx512f], [avx512f=true], [avx512f=false], [$WERROR])
|
||||
AM_CONDITIONAL([HAVE_AVX512F], $avx512f)
|
||||
if $avx512f; then
|
||||
AC_DEFINE([HAVE_AVX512F], [1], [Enable AVX512F instructions])
|
||||
fi
|
||||
|
||||
AX_CHECK_COMPILE_FLAG([-mfma], [fma=true], [fma=false], [$WERROR])
|
||||
AM_CONDITIONAL([HAVE_FMA], $fma)
|
||||
if $fma; then
|
||||
|
@ -27,6 +27,9 @@ TFloat DotProductNative(const TFloat *u, const TFloat *v, int n);
|
||||
// Uses Intel AVX intrinsics to access the SIMD instruction set.
|
||||
TFloat DotProductAVX(const TFloat *u, const TFloat *v, int n);
|
||||
|
||||
// Uses Intel AVX512F intrinsics to access the SIMD instruction set.
|
||||
TFloat DotProductAVX512F(const TFloat *u, const TFloat *v, int n);
|
||||
|
||||
// Use Intel FMA.
|
||||
TFloat DotProductFMA(const TFloat *u, const TFloat *v, int n);
|
||||
|
||||
|
70
src/arch/dotproductavx512.cpp
Normal file
70
src/arch/dotproductavx512.cpp
Normal file
@ -0,0 +1,70 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: dotproductavx512.cpp
|
||||
// Description: Architecture-specific dot-product function.
|
||||
// Author: Stefan Weil
|
||||
//
|
||||
// (C) Copyright 2022
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if !defined(__AVX__)
|
||||
# if defined(__i686__) || defined(__x86_64__)
|
||||
# error Implementation only for AVX capable architectures
|
||||
# endif
|
||||
#else
|
||||
|
||||
# include <immintrin.h>
|
||||
# include <cstdint>
|
||||
# include "dotproduct.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel AVX intrinsics to access the SIMD instruction set.
|
||||
# if defined(FAST_FLOAT)
|
||||
float DotProductAVX512F(const float *u, const float *v, int n) {
|
||||
const unsigned quot = n / 16;
|
||||
const unsigned rem = n % 16;
|
||||
__m512 t0 = _mm512_setzero_ps();
|
||||
for (unsigned k = 0; k < quot; k++) {
|
||||
__m512 f0 = _mm512_loadu_ps(u);
|
||||
__m512 f1 = _mm512_loadu_ps(v);
|
||||
t0 = _mm512_fmadd_ps(f0, f1, t0);
|
||||
u += 16;
|
||||
v += 16;
|
||||
}
|
||||
float result = _mm512_reduce_add_ps(t0);
|
||||
for (unsigned k = 0; k < rem; k++) {
|
||||
result += *u++ * *v++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
# else
|
||||
double DotProductAVX512F(const double *u, const double *v, int n) {
|
||||
const unsigned quot = n / 8;
|
||||
const unsigned rem = n % 8;
|
||||
__m512d t0 = _mm512_setzero_pd();
|
||||
for (unsigned k = 0; k < quot; k++) {
|
||||
t0 = _mm512_fmadd_pd(_mm512_loadu_pd(u), _mm512_loadu_pd(v), t0);
|
||||
u += 8;
|
||||
v += 8;
|
||||
}
|
||||
double result = _mm512_reduce_add_pd(t0);
|
||||
for (unsigned k = 0; k < rem; k++) {
|
||||
result += *u++ * *v++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
# endif
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif
|
@ -225,6 +225,11 @@ SIMDDetect::SIMDDetect() {
|
||||
// Select code for calculation of dot product based on autodetection.
|
||||
if (false) {
|
||||
// This is a dummy to support conditional compilation.
|
||||
#if defined(HAVE_AVX512F)
|
||||
} else if (avx512F_available_) {
|
||||
// AVX512F detected.
|
||||
SetDotProduct(DotProductAVX512F, &IntSimdMatrix::intSimdMatrixAVX2);
|
||||
#endif
|
||||
#if defined(HAVE_AVX2)
|
||||
} else if (avx2_available_) {
|
||||
// AVX2 detected.
|
||||
|
Loading…
Reference in New Issue
Block a user