mirror of
https://github.com/tesseract-ocr/tesseract.git
synced 2024-11-27 20:59:36 +08:00
Add dot product implementation for Intel FMA (double = tessdata_best)
Signed-off-by: Stefan Weil <sw@weilnetz.de>
This commit is contained in:
parent
cce26fa197
commit
2d5b166876
@ -220,6 +220,7 @@ endforeach()
|
||||
# add definition as expected in src/arch/simddetect.cpp
|
||||
set(AVX_OPT OFF)
|
||||
set(AVX2_OPT OFF)
|
||||
set(FMA_OPT OFF)
|
||||
set(SSE41_OPT OFF)
|
||||
set(MARCH_NATIVE_OPT OFF)
|
||||
foreach(flag ${_enable_vector_unit_list}) # from OptimizeForArchitecture()
|
||||
@ -227,7 +228,7 @@ foreach(flag ${_enable_vector_unit_list}) # from OptimizeForArchitecture()
|
||||
string(REPLACE "\." "_" flag "${flag}")
|
||||
set(sim_flags "${sim_flags} -D${flag}")
|
||||
string(REPLACE "_" "" flag "${flag}")
|
||||
if("${flag}" MATCHES "AVX|AVX2|SSE41")
|
||||
if("${flag}" MATCHES "AVX|AVX2|FMA|SSE41")
|
||||
set("${flag}_OPT" ON)
|
||||
endif()
|
||||
endforeach(flag)
|
||||
@ -285,6 +286,7 @@ message( STATUS "Architecture flags: ${Vc_ARCHITECTURE_FLAGS}")
|
||||
message( STATUS "Vector unit list: ${_enable_vector_unit_list}")
|
||||
message( STATUS "AVX_OPT: ${AVX_OPT}")
|
||||
message( STATUS "AVX2_OPT: ${AVX2_OPT}")
|
||||
message( STATUS "FMA_OPT: ${FMA_OPT}")
|
||||
message( STATUS "SSE41_OPT: ${SSE41_OPT}")
|
||||
message( STATUS "MARCH_NATIVE_OPT: ${MARCH_NATIVE_OPT}")
|
||||
message( STATUS "sim_flags: ${sim_flags}")
|
||||
@ -364,6 +366,9 @@ endif(AVX_OPT)
|
||||
if(AVX2_OPT)
|
||||
list(APPEND tesseract_src src/arch/intsimdmatrixavx2.cpp)
|
||||
endif(AVX2_OPT)
|
||||
if(FMA_OPT)
|
||||
list(APPEND tesseract_src src/arch/dotproductfma.cpp)
|
||||
endif(AVX_OPT)
|
||||
if(SSE41_OPT)
|
||||
list(APPEND tesseract_src src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp)
|
||||
endif(SSE41_OPT)
|
||||
|
@ -126,6 +126,9 @@ AM_CONDITIONAL([AVX_OPT], ${avx})
|
||||
AX_CHECK_COMPILE_FLAG([-mavx2], [avx2=true], [avx2=false], [$WERROR])
|
||||
AM_CONDITIONAL([AVX2_OPT], $avx2)
|
||||
|
||||
AX_CHECK_COMPILE_FLAG([-mfma], [fma=true], [fma=false], [$WERROR])
|
||||
AM_CONDITIONAL([FMA_OPT], $fma)
|
||||
|
||||
AX_CHECK_COMPILE_FLAG([-msse4.1], [sse41=true], [sse41=false], [$WERROR])
|
||||
AM_CONDITIONAL([SSE41_OPT], $sse41)
|
||||
|
||||
|
@ -68,6 +68,9 @@ endif
|
||||
if AVX2_OPT
|
||||
libtesseract_la_LIBADD += ../arch/libtesseract_avx2.la
|
||||
endif
|
||||
if FMA_OPT
|
||||
libtesseract_la_LIBADD += ../arch/libtesseract_fma.la
|
||||
endif
|
||||
if SSE41_OPT
|
||||
libtesseract_la_LIBADD += ../arch/libtesseract_sse.la
|
||||
endif
|
||||
|
@ -136,6 +136,7 @@ static void PrintVersionInfo() {
|
||||
if (tesseract::SIMDDetect::IsAVX512FAvailable()) printf(" Found AVX512F\n");
|
||||
if (tesseract::SIMDDetect::IsAVX2Available()) printf(" Found AVX2\n");
|
||||
if (tesseract::SIMDDetect::IsAVXAvailable()) printf(" Found AVX\n");
|
||||
if (tesseract::SIMDDetect::IsFMAAvailable()) printf(" Found FMA\n");
|
||||
if (tesseract::SIMDDetect::IsSSEAvailable()) printf(" Found SSE\n");
|
||||
#ifdef _OPENMP
|
||||
printf(" Found OpenMP %d\n", _OPENMP);
|
||||
|
@ -20,6 +20,9 @@ endif
|
||||
if AVX2_OPT
|
||||
noinst_LTLIBRARIES += libtesseract_avx2.la
|
||||
endif
|
||||
if FMA_OPT
|
||||
noinst_LTLIBRARIES += libtesseract_fma.la
|
||||
endif
|
||||
if SSE41_OPT
|
||||
noinst_LTLIBRARIES += libtesseract_sse.la
|
||||
endif
|
||||
@ -34,6 +37,10 @@ if AVX2_OPT
|
||||
libtesseract_arch_la_CPPFLAGS += -DAVX2
|
||||
libtesseract_avx2_la_CXXFLAGS = -mavx2
|
||||
endif
|
||||
if FMA_OPT
|
||||
libtesseract_arch_la_CPPFLAGS += -DFMA
|
||||
libtesseract_fma_la_CXXFLAGS = -mfma
|
||||
endif
|
||||
if SSE41_OPT
|
||||
libtesseract_arch_la_CPPFLAGS += -DSSE4_1
|
||||
libtesseract_sse_la_CXXFLAGS = -msse4.1
|
||||
@ -55,6 +62,10 @@ if AVX2_OPT
|
||||
libtesseract_avx2_la_SOURCES = intsimdmatrixavx2.cpp
|
||||
endif
|
||||
|
||||
if FMA_OPT
|
||||
libtesseract_fma_la_SOURCES = dotproductfma.cpp
|
||||
endif
|
||||
|
||||
if SSE41_OPT
|
||||
libtesseract_sse_la_SOURCES = dotproductsse.cpp intsimdmatrixsse.cpp
|
||||
endif
|
||||
|
@ -22,6 +22,9 @@ namespace tesseract {
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
double DotProductNative(const double* u, const double* v, int n);
|
||||
|
||||
// Use Intel FMA.
|
||||
double DotProductFMA(const double* u, const double* v, int n);
|
||||
|
||||
} // namespace tesseract.
|
||||
|
||||
#endif // TESSERACT_ARCH_DOTPRODUCT_H_
|
||||
|
57
src/arch/dotproductfma.cpp
Normal file
57
src/arch/dotproductfma.cpp
Normal file
@ -0,0 +1,57 @@
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// File: dotproductfma.cpp
|
||||
// Description: Architecture-specific dot-product function.
|
||||
// Author: Stefan Weil
|
||||
//
|
||||
// (C) Copyright 2015, Google Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if !defined(__FMA__)
|
||||
#error Implementation only for FMA capable architectures
|
||||
#endif
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <cstdint>
|
||||
#include "dotproduct.h"
|
||||
|
||||
namespace tesseract {
|
||||
|
||||
// Computes and returns the dot product of the n-vectors u and v.
|
||||
// Uses Intel FMA intrinsics to access the SIMD instruction set.
|
||||
double DotProductFMA(const double* u, const double* v, int n) {
|
||||
const unsigned quot = n / 8;
|
||||
const unsigned rem = n % 8;
|
||||
__m256d t0 = _mm256_setzero_pd();
|
||||
__m256d t1 = _mm256_setzero_pd();
|
||||
for (unsigned k = 0; k < quot; k++) {
|
||||
__m256d f0 = _mm256_loadu_pd(u);
|
||||
__m256d f1 = _mm256_loadu_pd(v);
|
||||
t0 = _mm256_fmadd_pd(f0, f1, t0);
|
||||
u += 4;
|
||||
v += 4;
|
||||
__m256d f2 = _mm256_loadu_pd(u);
|
||||
__m256d f3 = _mm256_loadu_pd(v);
|
||||
t1 = _mm256_fmadd_pd(f2, f3, t1);
|
||||
u += 4;
|
||||
v += 4;
|
||||
}
|
||||
t0 = _mm256_hadd_pd(t0, t1);
|
||||
alignas(32) double tmp[4];
|
||||
_mm256_store_pd(tmp, t0);
|
||||
double result = tmp[0] + tmp[1] + tmp[2] + tmp[3];
|
||||
for (unsigned k = 0; k < rem; k++) {
|
||||
result += *u++ * *v++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace tesseract.
|
@ -24,7 +24,7 @@
|
||||
#include "params.h" // for STRING_VAR
|
||||
#include "tprintf.h" // for tprintf
|
||||
|
||||
#if defined(AVX) || defined(AVX2) || defined(SSE4_1)
|
||||
#if defined(AVX) || defined(AVX2) || defined(FMA) || defined(SSE4_1)
|
||||
# define HAS_CPUID
|
||||
#endif
|
||||
|
||||
@ -60,6 +60,8 @@ bool SIMDDetect::avx_available_;
|
||||
bool SIMDDetect::avx2_available_;
|
||||
bool SIMDDetect::avx512F_available_;
|
||||
bool SIMDDetect::avx512BW_available_;
|
||||
// If true, then FMA has been detected.
|
||||
bool SIMDDetect::fma_available_;
|
||||
// If true, then SSe4.1 has been detected.
|
||||
bool SIMDDetect::sse_available_;
|
||||
|
||||
@ -98,6 +100,9 @@ SIMDDetect::SIMDDetect() {
|
||||
#if defined(SSE4_1)
|
||||
sse_available_ = (ecx & 0x00080000) != 0;
|
||||
#endif
|
||||
#if defined(FMA)
|
||||
fma_available_ = (ecx & 0x00001000) != 0;
|
||||
#endif
|
||||
#if defined(AVX)
|
||||
avx_available_ = (ecx & 0x10000000) != 0;
|
||||
if (avx_available_) {
|
||||
@ -121,6 +126,9 @@ SIMDDetect::SIMDDetect() {
|
||||
#if defined(SSE4_1)
|
||||
sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
|
||||
#endif
|
||||
#if defined(FMA)
|
||||
fma_available_ = (cpuInfo[2] & 0x00001000) != 0;
|
||||
#endif
|
||||
#if defined(AVX)
|
||||
avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
|
||||
#endif
|
||||
@ -185,6 +193,12 @@ void SIMDDetect::Update() {
|
||||
SetDotProduct(DotProductAVX, &IntSimdMatrix::intSimdMatrixSSE);
|
||||
dotproduct_method = "avx";
|
||||
#endif
|
||||
#if defined(FMA)
|
||||
} else if (!strcmp(dotproduct.string(), "fma")) {
|
||||
// FMA selected by config variable.
|
||||
SetDotProduct(DotProductFMA, IntSimdMatrix::intSimdMatrix);
|
||||
dotproduct_method = "fma";
|
||||
#endif
|
||||
#if defined(SSE4_1)
|
||||
} else if (!strcmp(dotproduct.string(), "sse")) {
|
||||
// SSE selected by config variable.
|
||||
|
@ -46,6 +46,10 @@ class SIMDDetect {
|
||||
static inline bool IsAVX512BWAvailable() {
|
||||
return detector.avx512BW_available_;
|
||||
}
|
||||
// Returns true if FMA is available on this system.
|
||||
static inline bool IsFMAAvailable() {
|
||||
return detector.fma_available_;
|
||||
}
|
||||
// Returns true if SSE4.1 is available on this system.
|
||||
static inline bool IsSSEAvailable() {
|
||||
return detector.sse_available_;
|
||||
@ -66,6 +70,8 @@ class SIMDDetect {
|
||||
static TESS_API bool avx2_available_;
|
||||
static TESS_API bool avx512F_available_;
|
||||
static TESS_API bool avx512BW_available_;
|
||||
// If true, then FMA has been detected.
|
||||
static TESS_API bool fma_available_;
|
||||
// If true, then SSe4.1 has been detected.
|
||||
static TESS_API bool sse_available_;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user