Merge pull request #738 from stweil/avx

Support AVX for 32 bit platforms
This commit is contained in:
zdenop 2017-02-27 19:35:19 +01:00 committed by GitHub
commit e68e48772d
2 changed files with 9 additions and 14 deletions

View File

@ -16,9 +16,8 @@
// limitations under the License.
///////////////////////////////////////////////////////////////////////
#if !defined(__AVX__) || defined(__i386__)
#if !defined(__AVX__)
// Implementation for non-avx archs.
// Also used for 32 bit AVX archs because of missing _mm256_extract_epi64.
#include "dotproductavx.h"
#include <stdio.h>
@ -92,13 +91,13 @@ double DotProductAVX(const double* u, const double* v, int n) {
// fool the instrinsics into thinking we are extracting the bottom int64.
auto cast_sum = _mm256_castpd_si256(sum);
*(reinterpret_cast<inT64*>(&result)) =
#ifndef _WIN32
_mm256_extract_epi64(cast_sum, 0)
#else
// this is a very simple workaround that probably could be activated
// for all other platforms that do not have _mm256_extract_epi64
#if defined(_WIN32) || defined(__i386__)
// This is a very simple workaround that is activated
// for all platforms that do not have _mm256_extract_epi64.
// _mm256_extract_epi64(X, Y) == ((uint64_t*)&X)[Y]
((uint64_t*)&cast_sum)[0]
#else
_mm256_extract_epi64(cast_sum, 0)
#endif
;
while (offset < n) {

View File

@ -119,13 +119,9 @@ esac
AM_CONDITIONAL([AVX_OPT], false)
AM_CONDITIONAL([SSE41_OPT], false)
# The current implementation for AVX uses 64 bit code.
AC_CHECK_SIZEOF([void *])
if test "$ac_cv_sizeof_void_p" = "8"; then
AX_CHECK_COMPILE_FLAG([-mavx], [avx=true], [avx=false])
if $avx; then
AM_CONDITIONAL([AVX_OPT], true)
fi
AX_CHECK_COMPILE_FLAG([-mavx], [avx=true], [avx=false])
if $avx; then
AM_CONDITIONAL([AVX_OPT], true)
fi
AX_CHECK_COMPILE_FLAG([-msse4.1], [sse41=true], [sse41=false])