diff --git a/CMakeLists.txt b/CMakeLists.txt index 1910b490..bb929106 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -236,18 +236,17 @@ foreach(flag ${Vc_ARCHITECTURE_FLAGS}) endforeach() # add definition as expected in src/arch/simddetect.cpp -set(AVX_OPT OFF) -set(AVX2_OPT OFF) -set(FMA_OPT OFF) -set(SSE41_OPT OFF) +set(HAVE_AVX OFF) +set(HAVE_AVX2 OFF) +set(HAVE_FMA OFF) +set(HAVE_SSE4_1 OFF) set(MARCH_NATIVE_OPT OFF) foreach(flag ${_enable_vector_unit_list}) # from OptimizeForArchitecture() string(TOUPPER "${flag}" flag) string(REPLACE "\." "_" flag "${flag}") - set(simd_flags "${simd_flags} -D${flag}") - string(REPLACE "_" "" flag "${flag}") - if("${flag}" MATCHES "AVX|AVX2|FMA|SSE41") - set("${flag}_OPT" ON) + if("${flag}" MATCHES "AVX|AVX2|FMA|SSE4_1") + set(simd_flags "${simd_flags} -DHAVE_${flag}") + set("HAVE_${flag}" ON) endif() endforeach(flag) if (NOT MSVC) @@ -306,10 +305,10 @@ message( STATUS "Linker options: ${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FL message( STATUS "Install directory: ${CMAKE_INSTALL_PREFIX}") message( STATUS "Architecture flags: ${Vc_ARCHITECTURE_FLAGS}") message( STATUS "Vector unit list: ${_enable_vector_unit_list}") -message( STATUS "AVX_OPT: ${AVX_OPT}") -message( STATUS "AVX2_OPT: ${AVX2_OPT}") -message( STATUS "FMA_OPT: ${FMA_OPT}") -message( STATUS "SSE41_OPT: ${SSE41_OPT}") +message( STATUS "HAVE_AVX: ${HAVE_AVX}") +message( STATUS "HAVE_AVX2: ${HAVE_AVX2}") +message( STATUS "HAVE_FMA: ${HAVE_FMA}") +message( STATUS "HAVE_SSE4_1: ${HAVE_SSE4_1}") message( STATUS "MARCH_NATIVE_OPT: ${MARCH_NATIVE_OPT}") message( STATUS "simd_flags: ${simd_flags}") message( STATUS "--------------------------------------------------------") @@ -383,22 +382,22 @@ list(APPEND arch_files ) set_source_files_properties(${arch_files} PROPERTIES COMPILE_FLAGS "${simd_flags}") set_source_files_properties(src/arch/dotproduct.cpp PROPERTIES COMPILE_FLAGS "${MARCH_NATIVE_FLAGS} ${Vc_CXX_FLAGS}") -if(AVX_OPT) +if(HAVE_AVX) list(APPEND arch_files_opt src/arch/dotproductavx.cpp) - set_source_files_properties(src/arch/dotproductavx.cpp PROPERTIES COMPILE_FLAGS "-DAVX") -endif(AVX_OPT) -if(AVX2_OPT) + set_source_files_properties(src/arch/dotproductavx.cpp PROPERTIES COMPILE_FLAGS "-mavx") +endif(HAVE_AVX) +if(HAVE_AVX2) list(APPEND arch_files_opt src/arch/intsimdmatrixavx2.cpp) - set_source_files_properties(src/arch/intsimdmatrixavx2.cpp PROPERTIES COMPILE_FLAGS "-DAVX2") -endif(AVX2_OPT) -if(FMA_OPT) + set_source_files_properties(src/arch/intsimdmatrixavx2.cpp PROPERTIES COMPILE_FLAGS "-mavx2") +endif(HAVE_AVX2) +if(HAVE_FMA) list(APPEND arch_files_opt src/arch/dotproductfma.cpp) set_source_files_properties(src/arch/dotproductfma.cpp PROPERTIES COMPILE_FLAGS "-mfma") -endif(FMA_OPT) -if(SSE41_OPT) +endif(HAVE_FMA) +if(HAVE_SSE4_1) list(APPEND arch_files_opt src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp) - set_source_files_properties(src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp PROPERTIES COMPILE_FLAGS "-DSSE4_1 -msse4.1") -endif(SSE41_OPT) + set_source_files_properties(src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp PROPERTIES COMPILE_FLAGS "-msse4.1") +endif(HAVE_SSE4_1) set_source_files_properties(${arch_files_opt} PROPERTIES COMPILE_FLAGS "${Vc_CXX_FLAGS}") file(GLOB tesseract_hdr diff --git a/Makefile.am b/Makefile.am index 444cf0fe..58146b4b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -150,19 +150,6 @@ libtesseract_la_LIBADD += libtesseract_textord.la libtesseract_la_LIBADD += libtesseract_viewer.la libtesseract_la_LIBADD += libtesseract_wordrec.la -if AVX_OPT -libtesseract_la_LIBADD += libtesseract_avx.la -endif -if AVX2_OPT -libtesseract_la_LIBADD += libtesseract_avx2.la -endif -if FMA_OPT -libtesseract_la_LIBADD += libtesseract_fma.la -endif -if SSE41_OPT -libtesseract_la_LIBADD += libtesseract_sse.la -endif - libtesseract_la_LDFLAGS += -version-info $(GENERIC_LIBRARY_VERSION) $(NOUNDEFINED) # Rules for src/arch. @@ -177,37 +164,8 @@ noinst_HEADERS += src/arch/intsimdmatrix.h noinst_HEADERS += src/arch/simddetect.h noinst_LTLIBRARIES += libtesseract_native.la -if AVX_OPT -noinst_LTLIBRARIES += libtesseract_avx.la -endif -if AVX2_OPT -noinst_LTLIBRARIES += libtesseract_avx2.la -endif -if FMA_OPT -noinst_LTLIBRARIES += libtesseract_fma.la -endif -if SSE41_OPT -noinst_LTLIBRARIES += libtesseract_sse.la -endif noinst_LTLIBRARIES += libtesseract_arch.la -if AVX_OPT -libtesseract_arch_la_CPPFLAGS += -DAVX -libtesseract_avx_la_CXXFLAGS = -mavx -endif -if AVX2_OPT -libtesseract_arch_la_CPPFLAGS += -DAVX2 -libtesseract_avx2_la_CXXFLAGS = -mavx2 -endif -if FMA_OPT -libtesseract_arch_la_CPPFLAGS += -DFMA -libtesseract_fma_la_CXXFLAGS = -mfma -endif -if SSE41_OPT -libtesseract_arch_la_CPPFLAGS += -DSSE4_1 -libtesseract_sse_la_CXXFLAGS = -msse4.1 -endif - libtesseract_native_la_CXXFLAGS = -O3 -ffast-math if MARCH_NATIVE_OPT libtesseract_native_la_CXXFLAGS += -march=native -mtune=native @@ -216,20 +174,32 @@ libtesseract_native_la_SOURCES = src/arch/dotproduct.cpp libtesseract_arch_la_SOURCES = src/arch/intsimdmatrix.cpp src/arch/simddetect.cpp -if AVX_OPT +if HAVE_AVX +libtesseract_avx_la_CXXFLAGS = -mavx libtesseract_avx_la_SOURCES = src/arch/dotproductavx.cpp +libtesseract_la_LIBADD += libtesseract_avx.la +noinst_LTLIBRARIES += libtesseract_avx.la endif -if AVX2_OPT +if HAVE_AVX2 +libtesseract_avx2_la_CXXFLAGS = -mavx2 libtesseract_avx2_la_SOURCES = src/arch/intsimdmatrixavx2.cpp +libtesseract_la_LIBADD += libtesseract_avx2.la +noinst_LTLIBRARIES += libtesseract_avx2.la endif -if FMA_OPT +if HAVE_FMA +libtesseract_fma_la_CXXFLAGS = -mfma libtesseract_fma_la_SOURCES = src/arch/dotproductfma.cpp +libtesseract_la_LIBADD += libtesseract_fma.la +noinst_LTLIBRARIES += libtesseract_fma.la endif -if SSE41_OPT +if HAVE_SSE4_1 +libtesseract_sse_la_CXXFLAGS = -msse4.1 libtesseract_sse_la_SOURCES = src/arch/dotproductsse.cpp src/arch/intsimdmatrixsse.cpp +libtesseract_la_LIBADD += libtesseract_sse.la +noinst_LTLIBRARIES += libtesseract_sse.la endif # Rules for src/ccmain. diff --git a/configure.ac b/configure.ac index 951c2491..0550e1c1 100644 --- a/configure.ac +++ b/configure.ac @@ -125,16 +125,28 @@ AX_CHECK_COMPILE_FLAG([-Werror=unused-command-line-argument], [WERROR=-Werror=un ## Checks for supported compiler options. AX_CHECK_COMPILE_FLAG([-mavx], [avx=true], [avx=false], [$WERROR]) -AM_CONDITIONAL([AVX_OPT], ${avx}) +AM_CONDITIONAL([HAVE_AVX], ${avx}) +if $avx; then + AC_DEFINE([HAVE_AVX], [1], [Enable AVX instructions]) +fi AX_CHECK_COMPILE_FLAG([-mavx2], [avx2=true], [avx2=false], [$WERROR]) -AM_CONDITIONAL([AVX2_OPT], $avx2) +AM_CONDITIONAL([HAVE_AVX2], $avx2) +if $avx2; then + AC_DEFINE([HAVE_AVX2], [1], [Enable AVX2 instructions]) +fi AX_CHECK_COMPILE_FLAG([-mfma], [fma=true], [fma=false], [$WERROR]) -AM_CONDITIONAL([FMA_OPT], $fma) +AM_CONDITIONAL([HAVE_FMA], $fma) +if $fma; then + AC_DEFINE([HAVE_FMA], [1], [Enable FMA instructions]) +fi AX_CHECK_COMPILE_FLAG([-msse4.1], [sse41=true], [sse41=false], [$WERROR]) -AM_CONDITIONAL([SSE41_OPT], $sse41) +AM_CONDITIONAL([HAVE_SSE4_1], $sse41) +if $sse41; then + AC_DEFINE([HAVE_SSE4_1], [1], [Enable SSE 4.1 instructions]) +fi AX_CHECK_COMPILE_FLAG([-march=native], [arch_native=true], [arch_native=false], [$WERROR]) AM_CONDITIONAL([MARCH_NATIVE_OPT], $arch_native) diff --git a/src/arch/simddetect.cpp b/src/arch/simddetect.cpp index b580fa1c..127764e0 100644 --- a/src/arch/simddetect.cpp +++ b/src/arch/simddetect.cpp @@ -15,6 +15,7 @@ // limitations under the License. /////////////////////////////////////////////////////////////////////// +#include "config_auto.h" // for HAVE_AVX, ... #include // for std::inner_product #include "simddetect.h" #include "dotproduct.h" @@ -22,7 +23,7 @@ #include "params.h" // for STRING_VAR #include "tprintf.h" // for tprintf -#if defined(AVX) || defined(AVX2) || defined(FMA) || defined(SSE4_1) +#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_FMA) || defined(HAVE_SSE4_1) # define HAS_CPUID #endif @@ -95,13 +96,13 @@ SIMDDetect::SIMDDetect() { if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) { // Note that these tests all use hex because the older compilers don't have // the newer flags. -#if defined(SSE4_1) +#if defined(HAVE_SSE4_1) sse_available_ = (ecx & 0x00080000) != 0; #endif -#if defined(FMA) +#if defined(HAVE_FMA) fma_available_ = (ecx & 0x00001000) != 0; #endif -#if defined(AVX) +#if defined(HAVE_AVX) avx_available_ = (ecx & 0x10000000) != 0; if (avx_available_) { // There is supposed to be a __get_cpuid_count function, but this is all @@ -121,19 +122,19 @@ SIMDDetect::SIMDDetect() { max_function_id = cpuInfo[0]; if (max_function_id >= 1) { __cpuid(cpuInfo, 1); -#if defined(SSE4_1) +#if defined(HAVE_SSE4_1) sse_available_ = (cpuInfo[2] & 0x00080000) != 0; #endif -#if defined(AVX) || defined(AVX2) || defined(FMA) +#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_FMA) if ((cpuInfo[2] & 0x08000000) && ((_xgetbv(0) & 6) == 6)) { // OSXSAVE bit is set, XMM state and YMM state are fine. -#if defined(FMA) +#if defined(HAVE_FMA) fma_available_ = (cpuInfo[2] & 0x00001000) != 0; #endif -#if defined(AVX) +#if defined(HAVE_AVX) avx_available_ = (cpuInfo[2] & 0x10000000) != 0; #endif -#if defined(AVX2) +#if defined(HAVE_AVX2) if (max_function_id >= 7) { __cpuid(cpuInfo, 7); avx2_available_ = (cpuInfo[1] & 0x00000020) != 0; @@ -152,17 +153,17 @@ SIMDDetect::SIMDDetect() { // Select code for calculation of dot product based on autodetection. if (false) { // This is a dummy to support conditional compilation. -#if defined(AVX2) +#if defined(HAVE_AVX2) } else if (avx2_available_) { // AVX2 detected. SetDotProduct(DotProductAVX, &IntSimdMatrix::intSimdMatrixAVX2); #endif -#if defined(AVX) +#if defined(HAVE_AVX) } else if (avx_available_) { // AVX detected. SetDotProduct(DotProductAVX, &IntSimdMatrix::intSimdMatrixSSE); #endif -#if defined(SSE4_1) +#if defined(HAVE_SSE4_1) } else if (sse_available_) { // SSE detected. SetDotProduct(DotProductSSE, &IntSimdMatrix::intSimdMatrixSSE); @@ -184,25 +185,25 @@ void SIMDDetect::Update() { // Native optimized code selected by config variable. SetDotProduct(DotProductNative); dotproduct_method = "native"; -#if defined(AVX2) +#if defined(HAVE_AVX2) } else if (!strcmp(dotproduct.c_str(), "avx2")) { // AVX2 selected by config variable. SetDotProduct(DotProductAVX, &IntSimdMatrix::intSimdMatrixAVX2); dotproduct_method = "avx2"; #endif -#if defined(AVX) +#if defined(HAVE_AVX) } else if (!strcmp(dotproduct.c_str(), "avx")) { // AVX selected by config variable. SetDotProduct(DotProductAVX, &IntSimdMatrix::intSimdMatrixSSE); dotproduct_method = "avx"; #endif -#if defined(FMA) +#if defined(HAVE_FMA) } else if (!strcmp(dotproduct.c_str(), "fma")) { // FMA selected by config variable. SetDotProduct(DotProductFMA, IntSimdMatrix::intSimdMatrix); dotproduct_method = "fma"; #endif -#if defined(SSE4_1) +#if defined(HAVE_SSE4_1) } else if (!strcmp(dotproduct.c_str(), "sse")) { // SSE selected by config variable. SetDotProduct(DotProductSSE, &IntSimdMatrix::intSimdMatrixSSE); @@ -217,10 +218,10 @@ void SIMDDetect::Update() { tprintf("Warning, ignoring unsupported config variable value: dotproduct=%s\n", dotproduct.c_str()); tprintf("Support values for dotproduct: auto generic native" -#if defined(AVX) +#if defined(HAVE_AVX) " avx" #endif -#if defined(SSE4_1) +#if defined(HAVE_SSE4_1) " sse" #endif " std::inner_product.\n"); diff --git a/unittest/Makefile.am b/unittest/Makefile.am index 0623848b..3037f649 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -271,11 +271,11 @@ intfeaturemap_test_LDADD = $(TRAINING_LIBS) intsimdmatrix_test_SOURCES = intsimdmatrix_test.cc intsimdmatrix_test_LDADD = $(TESS_LIBS) intsimdmatrix_test_CPPFLAGS = $(AM_CPPFLAGS) -if AVX2_OPT -intsimdmatrix_test_CPPFLAGS += -DAVX2 +if HAVE_AVX2 +intsimdmatrix_test_CPPFLAGS += -DHAVE_AVX2 endif -if SSE41_OPT -intsimdmatrix_test_CPPFLAGS += -DSSE4_1 +if HAVE_SSE4_1 +intsimdmatrix_test_CPPFLAGS += -DHAVE_SSE4_1 endif lang_model_test_SOURCES = lang_model_test.cc diff --git a/unittest/intsimdmatrix_test.cc b/unittest/intsimdmatrix_test.cc index 09326246..8ddd8998 100644 --- a/unittest/intsimdmatrix_test.cc +++ b/unittest/intsimdmatrix_test.cc @@ -98,7 +98,7 @@ TEST_F(IntSimdMatrixTest, C) { // Tests that the SSE implementation gets the same result as the vanilla. TEST_F(IntSimdMatrixTest, SSE) { -#if defined(SSE4_1) +#if defined(HAVE_SSE4_1) if (SIMDDetect::IsSSEAvailable()) { tprintf("SSE found! Continuing..."); } else { @@ -113,7 +113,7 @@ TEST_F(IntSimdMatrixTest, SSE) { // Tests that the AVX2 implementation gets the same result as the vanilla. TEST_F(IntSimdMatrixTest, AVX2) { -#if defined(AVX2) +#if defined(HAVE_AVX2) if (SIMDDetect::IsAVX2Available()) { tprintf("AVX2 found! Continuing..."); } else {