From f0fb6809e30986af8b127f9b68841f72c993c022 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 14 Jul 2021 19:07:05 +0200 Subject: [PATCH] Use SIMD instructions for DotProductNative Signed-off-by: Stefan Weil --- Makefile.am | 3 +++ configure.ac | 4 +++- src/arch/dotproduct.cpp | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Makefile.am b/Makefile.am index e95f18ea1..d56f6e48b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -144,6 +144,9 @@ noinst_HEADERS += src/arch/simddetect.h noinst_LTLIBRARIES += libtesseract_native.la libtesseract_native_la_CXXFLAGS = -O3 -ffast-math +if OPENMP_SIMD +libtesseract_native_la_CXXFLAGS += -fopenmp-simd -DOPENMP_SIMD +endif if MARCH_NATIVE_OPT libtesseract_native_la_CXXFLAGS += -march=native -mtune=native endif diff --git a/configure.ac b/configure.ac index 37a6bf452..bc5bb745e 100644 --- a/configure.ac +++ b/configure.ac @@ -186,6 +186,8 @@ esac AX_CHECK_COMPILE_FLAG([-march=native], [arch_native=true], [arch_native=false], [$WERROR]) AM_CONDITIONAL([MARCH_NATIVE_OPT], $arch_native) +AX_CHECK_COMPILE_FLAG([-fopenmp-simd], [openmp_simd=true], [openmp_simd=false], [$WERROR]) +AM_CONDITIONAL([OPENMP_SIMD], $openmp_simd) AC_ARG_WITH([extra-includes], [AS_HELP_STRING([--with-extra-includes=DIR], @@ -284,7 +286,7 @@ m4_define([MY_CHECK_FRAMEWORK], ]) if test "$my_cv_framework_$1"="yes"; then AC_DEFINE(AS_TR_CPP([HAVE_FRAMEWORK_$1]), 1, - [Define if you have the $1 framework]) + [Define if you have the $1 framework]) AS_TR_CPP([FRAMEWORK_$1])="-framework $1" AC_SUBST(AS_TR_CPP([FRAMEWORK_$1])) fi] diff --git a/src/arch/dotproduct.cpp b/src/arch/dotproduct.cpp index 62bcc00ce..07bf10d68 100644 --- a/src/arch/dotproduct.cpp +++ b/src/arch/dotproduct.cpp @@ -21,6 +21,9 @@ namespace tesseract { // Computes and returns the dot product of the two n-vectors u and v. double DotProductNative(const double *u, const double *v, int n) { double total = 0.0; +#if defined(OPENMP_SIMD) +#pragma omp simd reduction(+:total) +#endif for (int k = 0; k < n; ++k) { total += u[k] * v[k]; }