From 7fc7d28dd0d86ae06c66904db631b627a5428028 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 12 Jan 2019 21:30:45 +0100 Subject: [PATCH] Compile files for AVX, AVX2 or SSE only when needed Signed-off-by: Stefan Weil --- src/api/Makefile.am | 13 ++++++++++--- src/arch/Makefile.am | 15 ++++++++++++++- src/arch/dotproductavx.cpp | 8 ++++---- src/arch/dotproductsse.cpp | 6 +++--- src/arch/intsimdmatrix.cpp | 11 +++++------ src/arch/intsimdmatrixavx2.cpp | 12 ++++-------- src/arch/intsimdmatrixsse.cpp | 11 ++++------- unittest/Makefile.am | 7 +++++++ unittest/intsimdmatrix_test.cc | 12 ++++++++++-- 9 files changed, 61 insertions(+), 34 deletions(-) diff --git a/src/api/Makefile.am b/src/api/Makefile.am index 8125c848..ca2215fb 100644 --- a/src/api/Makefile.am +++ b/src/api/Makefile.am @@ -53,9 +53,6 @@ libtesseract_la_LIBADD = \ ../dict/libtesseract_dict.la \ ../arch/libtesseract_arch.la \ ../arch/libtesseract_native.la \ - ../arch/libtesseract_avx.la \ - ../arch/libtesseract_avx2.la \ - ../arch/libtesseract_sse.la \ ../lstm/libtesseract_lstm.la \ ../ccstruct/libtesseract_ccstruct.la \ ../cutil/libtesseract_cutil.la \ @@ -63,6 +60,16 @@ libtesseract_la_LIBADD = \ ../ccutil/libtesseract_ccutil.la \ ../opencl/libtesseract_opencl.la +if AVX_OPT +libtesseract_la_LIBADD += ../arch/libtesseract_avx.la +endif +if AVX2_OPT +libtesseract_la_LIBADD += ../arch/libtesseract_avx2.la +endif +if SSE41_OPT +libtesseract_la_LIBADD += ../arch/libtesseract_sse.la +endif + libtesseract_la_LDFLAGS += -version-info $(GENERIC_LIBRARY_VERSION) $(NOUNDEFINED) bin_PROGRAMS = tesseract diff --git a/src/arch/Makefile.am b/src/arch/Makefile.am index 0640a379..e8abcc20 100644 --- a/src/arch/Makefile.am +++ b/src/arch/Makefile.am @@ -15,8 +15,15 @@ noinst_HEADERS += intsimdmatrix.h noinst_HEADERS += simddetect.h noinst_LTLIBRARIES = libtesseract_native.la -noinst_LTLIBRARIES += libtesseract_avx.la libtesseract_avx2.la +if AVX_OPT +noinst_LTLIBRARIES += libtesseract_avx.la +endif +if AVX2_OPT +noinst_LTLIBRARIES += libtesseract_avx2.la +endif +if SSE41_OPT noinst_LTLIBRARIES += libtesseract_sse.la +endif noinst_LTLIBRARIES += libtesseract_arch.la libtesseract_arch_la_CPPFLAGS = $(AM_CPPFLAGS) @@ -41,8 +48,14 @@ libtesseract_native_la_SOURCES = dotproduct.cpp libtesseract_arch_la_SOURCES = intsimdmatrix.cpp simddetect.cpp +if AVX_OPT libtesseract_avx_la_SOURCES = dotproductavx.cpp +endif +if AVX2_OPT libtesseract_avx2_la_SOURCES = intsimdmatrixavx2.cpp +endif +if SSE41_OPT libtesseract_sse_la_SOURCES = dotproductsse.cpp intsimdmatrixsse.cpp +endif diff --git a/src/arch/dotproductavx.cpp b/src/arch/dotproductavx.cpp index 85a02c1b..ac2c2f60 100644 --- a/src/arch/dotproductavx.cpp +++ b/src/arch/dotproductavx.cpp @@ -16,8 +16,10 @@ // limitations under the License. /////////////////////////////////////////////////////////////////////// -#if defined(__AVX__) -// Implementation for avx capable archs. +#if !defined(__AVX__) +#error Implementation only for AVX capable architectures +#endif + #include #include #include "dotproductavx.h" @@ -96,5 +98,3 @@ double DotProductAVX(const double* u, const double* v, int n) { } } // namespace tesseract. - -#endif // __AVX__ diff --git a/src/arch/dotproductsse.cpp b/src/arch/dotproductsse.cpp index e0cbcdce..fefc8e40 100644 --- a/src/arch/dotproductsse.cpp +++ b/src/arch/dotproductsse.cpp @@ -16,7 +16,9 @@ // limitations under the License. /////////////////////////////////////////////////////////////////////// -#if defined(__SSE4_1__) +#if !defined(__SSE4_1__) +#error Implementation only for SSE 4.1 capable architectures +#endif #include #include @@ -117,5 +119,3 @@ int32_t IntDotProductSSE(const int8_t* u, const int8_t* v, int n) { } } // namespace tesseract. - -#endif // __SSE4_1__ diff --git a/src/arch/intsimdmatrix.cpp b/src/arch/intsimdmatrix.cpp index 222ae786..cd831ce7 100644 --- a/src/arch/intsimdmatrix.cpp +++ b/src/arch/intsimdmatrix.cpp @@ -25,12 +25,11 @@ namespace tesseract { const IntSimdMatrix* IntSimdMatrix::intSimdMatrix = nullptr; -// Computes a reshaped copy of the weight matrix w. If there are no -// partial_funcs_, it does nothing. -void IntSimdMatrix::Init(const GENERIC_2D_ARRAY& w, std::vector& shaped_w) const { - if (partial_funcs_.empty()) return; - int num_out = w.dim1(); - int num_in = w.dim2() - 1; +// Computes a reshaped copy of the weight matrix w. +void IntSimdMatrix::Init(const GENERIC_2D_ARRAY& w, + std::vector& shaped_w) const { + const int num_out = w.dim1(); + const int num_in = w.dim2() - 1; // The rounded-up sizes of the reshaped weight matrix, excluding biases. int rounded_num_in = Roundup(num_in, num_inputs_per_group_); int rounded_num_out = RoundOutputs(num_out); diff --git a/src/arch/intsimdmatrixavx2.cpp b/src/arch/intsimdmatrixavx2.cpp index 2dc97c43..0943ba9b 100644 --- a/src/arch/intsimdmatrixavx2.cpp +++ b/src/arch/intsimdmatrixavx2.cpp @@ -16,9 +16,12 @@ // limitations under the License. /////////////////////////////////////////////////////////////////////// +#if !defined(__AVX2__) +#error Implementation only for AVX2 capable architectures +#endif + #include "intsimdmatrix.h" -#ifdef __AVX2__ #include #include #include @@ -265,16 +268,9 @@ static void PartialMatrixDotVector8(const int8_t* wi, const double* scales, } ExtractResults(result0, shift_id, wi, scales, num_out, v); } -#else -namespace tesseract { -#endif // __AVX2__ -#ifdef __AVX2__ const IntSimdMatrix IntSimdMatrix::IntSimdMatrixAVX2 = IntSimdMatrix(kNumOutputsPerRegister, kMaxOutputRegisters, kNumInputsPerRegister, kNumInputsPerGroup, kNumInputGroups, {PartialMatrixDotVector64, PartialMatrixDotVector32, PartialMatrixDotVector16, PartialMatrixDotVector8}); -#else -const IntSimdMatrix IntSimdMatrix::IntSimdMatrixAVX2 = IntSimdMatrix(1, 1, 1, 1, 1, {}); -#endif // __AVX2__ } // namespace tesseract. diff --git a/src/arch/intsimdmatrixsse.cpp b/src/arch/intsimdmatrixsse.cpp index 65ae7b19..4f5d4d04 100644 --- a/src/arch/intsimdmatrixsse.cpp +++ b/src/arch/intsimdmatrixsse.cpp @@ -15,6 +15,10 @@ // limitations under the License. /////////////////////////////////////////////////////////////////////// +#if !defined(__SSE4_1__) +#error Implementation only for SSE 4.1 capable architectures +#endif + #include "intsimdmatrix.h" #include @@ -22,7 +26,6 @@ namespace tesseract { -#ifdef __SSE4_1__ // Computes part of matrix.vector v = Wu. Computes 1 result. static void PartialMatrixDotVector1(const int8_t* wi, const double* scales, const int8_t* u, int num_in, int num_out, @@ -31,14 +34,8 @@ static void PartialMatrixDotVector1(const int8_t* wi, const double* scales, // Add in the bias and correct for integer values. *v = (total / INT8_MAX + wi[num_in]) * *scales; } -#endif // __SSE4_1__ -#ifdef __SSE4_1__ const IntSimdMatrix IntSimdMatrix::IntSimdMatrixSSE = IntSimdMatrix(1, 1, 1, 1, 1, {PartialMatrixDotVector1}); -#else -const IntSimdMatrix IntSimdMatrix::IntSimdMatrixSSE = - IntSimdMatrix(1, 1, 1, 1, 1, {}); -#endif // __SSE4_1__ } // namespace tesseract. diff --git a/unittest/Makefile.am b/unittest/Makefile.am index 38bc4c5c..359bbdb1 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -175,6 +175,13 @@ intfeaturemap_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) intsimdmatrix_test_SOURCES = intsimdmatrix_test.cc intsimdmatrix_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) +intsimdmatrix_test_CPPFLAGS = $(AM_CPPFLAGS) +if AVX2_OPT +intsimdmatrix_test_CPPFLAGS += -DAVX2 +endif +if SSE41_OPT +intsimdmatrix_test_CPPFLAGS += -DSSE4_1 +endif lang_model_test_SOURCES = lang_model_test.cc lang_model_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS) diff --git a/unittest/intsimdmatrix_test.cc b/unittest/intsimdmatrix_test.cc index 70bc7eb6..17e154d6 100644 --- a/unittest/intsimdmatrix_test.cc +++ b/unittest/intsimdmatrix_test.cc @@ -92,24 +92,32 @@ TEST_F(IntSimdMatrixTest, C) { // Tests that the SSE implementation gets the same result as the vanilla. TEST_F(IntSimdMatrixTest, SSE) { +#if defined(SSE4_1) if (SIMDDetect::IsSSEAvailable()) { tprintf("SSE found! Continuing..."); } else { - tprintf("No SSE found! Not Tested!"); + tprintf("No SSE found! Not tested!"); return; } ExpectEqualResults(IntSimdMatrix::IntSimdMatrixSSE); +#else + tprintf("SSE unsupported! Not tested!"); +#endif } // Tests that the AVX2 implementation gets the same result as the vanilla. TEST_F(IntSimdMatrixTest, AVX2) { +#if defined(AVX2) if (SIMDDetect::IsAVX2Available()) { tprintf("AVX2 found! Continuing..."); } else { - tprintf("No AVX2 found! Not Tested!"); + tprintf("No AVX2 found! Not tested!"); return; } ExpectEqualResults(IntSimdMatrix::IntSimdMatrixAVX2); +#else + tprintf("AVX2 unsupported! Not tested!"); +#endif } } // namespace