From b465c826963f9ea01d466f0281b568196e6ef933 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Thu, 12 Sep 2019 09:45:56 -0500 Subject: [PATCH] core: workaround old gcc vec_mul{e,o} (Issue #15506) ISA 2.07 (aka POWER8) effectively extended the expanding multiply operation to word types. The altivec intrinsics prior to gcc 8 did not get the update. Workaround this deficiency similar to other fixes. This was exposed by commit 33fb253a66275abaa5060ef318c9a5cc87c5fd6e which leverages the int -> dword expanding multiply. This fixes Issue #15506 --- .../core/include/opencv2/core/vsx_utils.hpp | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/modules/core/include/opencv2/core/vsx_utils.hpp b/modules/core/include/opencv2/core/vsx_utils.hpp index c60da3cbbc..0f1029de83 100644 --- a/modules/core/include/opencv2/core/vsx_utils.hpp +++ b/modules/core/include/opencv2/core/vsx_utils.hpp @@ -124,6 +124,33 @@ VSX_FINLINE(rt) fnm(const rg& a, const rg& b) \ #define VSX_IMPL_2VRG(rt, rg, opc, fnm) VSX_IMPL_2VRG_F(rt, rg, #opc" %0,%1,%2", fnm) +#if __GNUG__ < 8 + + // Support for int4 -> dword2 expanding multiply was added in GCC 8. + #ifdef vec_mule + #undef vec_mule + #endif + #ifdef vec_mulo + #undef vec_mulo + #endif + + VSX_REDIRECT_2RG(vec_ushort8, vec_uchar16, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_short8, vec_char16, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_int4, vec_short8, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_uint4, vec_ushort8, vec_mule, __builtin_vec_mule) + VSX_REDIRECT_2RG(vec_ushort8, vec_uchar16, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_short8, vec_char16, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_int4, vec_short8, vec_mulo, __builtin_vec_mulo) + VSX_REDIRECT_2RG(vec_uint4, vec_ushort8, vec_mulo, __builtin_vec_mulo) + + // dword2 support arrived in ISA 2.07 and GCC 8+ + VSX_IMPL_2VRG(vec_dword2, vec_int4, vmulesw, vec_mule) + VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmuleuw, vec_mule) + VSX_IMPL_2VRG(vec_dword2, vec_int4, vmulosw, vec_mulo) + VSX_IMPL_2VRG(vec_udword2, vec_uint4, vmulouw, vec_mulo) + +#endif + #if __GNUG__ < 7 // up to GCC 6 vec_mul only supports precisions and llong # ifdef vec_mul