mirror of
https://github.com/opencv/opencv.git
synced 2025-01-19 15:04:01 +08:00
Merge pull request #9007 from alalek:issue_9001
This commit is contained in:
commit
1c4fb418cf
@ -647,12 +647,15 @@ macro(ocv_compiler_optimization_fill_cpu_config)
|
||||
if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x")
|
||||
set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_${OPT}
|
||||
# define CV_TRY_${OPT} 1
|
||||
# define CV_CPU_HAS_SUPPORT_${OPT} 1
|
||||
# define CV_CPU_CALL_${OPT}(fn, args) return (opt_${OPT}::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_${OPT}
|
||||
# define CV_TRY_${OPT} 1
|
||||
# define CV_CPU_HAS_SUPPORT_${OPT} (cv::checkHardwareSupport(CV_CPU_${OPT}))
|
||||
# define CV_CPU_CALL_${OPT}(fn, args) if (CV_CPU_HAS_SUPPORT_${OPT}) return (opt_${OPT}::fn args)
|
||||
#else
|
||||
# define CV_TRY_${OPT} 0
|
||||
# define CV_CPU_HAS_SUPPORT_${OPT} 0
|
||||
# define CV_CPU_CALL_${OPT}(fn, args)
|
||||
#endif
|
||||
|
@ -1,144 +1,180 @@
|
||||
// AUTOGENERATED, DO NOT EDIT
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
|
||||
# define CV_TRY_SSE 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE 1
|
||||
# define CV_CPU_CALL_SSE(fn, args) return (opt_SSE::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
|
||||
# define CV_TRY_SSE 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
|
||||
# define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
|
||||
#else
|
||||
# define CV_TRY_SSE 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE 0
|
||||
# define CV_CPU_CALL_SSE(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...) CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
|
||||
# define CV_TRY_SSE2 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE2 1
|
||||
# define CV_CPU_CALL_SSE2(fn, args) return (opt_SSE2::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
|
||||
# define CV_TRY_SSE2 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
|
||||
# define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
|
||||
#else
|
||||
# define CV_TRY_SSE2 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE2 0
|
||||
# define CV_CPU_CALL_SSE2(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...) CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
|
||||
# define CV_TRY_SSE3 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE3 1
|
||||
# define CV_CPU_CALL_SSE3(fn, args) return (opt_SSE3::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
|
||||
# define CV_TRY_SSE3 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
|
||||
# define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
|
||||
#else
|
||||
# define CV_TRY_SSE3 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE3 0
|
||||
# define CV_CPU_CALL_SSE3(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...) CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
|
||||
# define CV_TRY_SSSE3 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSSE3 1
|
||||
# define CV_CPU_CALL_SSSE3(fn, args) return (opt_SSSE3::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
|
||||
# define CV_TRY_SSSE3 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
|
||||
# define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
|
||||
#else
|
||||
# define CV_TRY_SSSE3 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSSE3 0
|
||||
# define CV_CPU_CALL_SSSE3(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...) CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
|
||||
# define CV_TRY_SSE4_1 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_1 1
|
||||
# define CV_CPU_CALL_SSE4_1(fn, args) return (opt_SSE4_1::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
|
||||
# define CV_TRY_SSE4_1 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
|
||||
# define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
|
||||
#else
|
||||
# define CV_TRY_SSE4_1 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_1 0
|
||||
# define CV_CPU_CALL_SSE4_1(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...) CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
|
||||
# define CV_TRY_SSE4_2 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_2 1
|
||||
# define CV_CPU_CALL_SSE4_2(fn, args) return (opt_SSE4_2::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
|
||||
# define CV_TRY_SSE4_2 1
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
|
||||
# define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
|
||||
#else
|
||||
# define CV_TRY_SSE4_2 0
|
||||
# define CV_CPU_HAS_SUPPORT_SSE4_2 0
|
||||
# define CV_CPU_CALL_SSE4_2(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...) CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
|
||||
# define CV_TRY_POPCNT 1
|
||||
# define CV_CPU_HAS_SUPPORT_POPCNT 1
|
||||
# define CV_CPU_CALL_POPCNT(fn, args) return (opt_POPCNT::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
|
||||
# define CV_TRY_POPCNT 1
|
||||
# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
|
||||
# define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
|
||||
#else
|
||||
# define CV_TRY_POPCNT 0
|
||||
# define CV_CPU_HAS_SUPPORT_POPCNT 0
|
||||
# define CV_CPU_CALL_POPCNT(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...) CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
|
||||
# define CV_TRY_AVX 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX 1
|
||||
# define CV_CPU_CALL_AVX(fn, args) return (opt_AVX::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
|
||||
# define CV_TRY_AVX 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
|
||||
# define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
|
||||
#else
|
||||
# define CV_TRY_AVX 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX 0
|
||||
# define CV_CPU_CALL_AVX(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...) CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
|
||||
# define CV_TRY_FP16 1
|
||||
# define CV_CPU_HAS_SUPPORT_FP16 1
|
||||
# define CV_CPU_CALL_FP16(fn, args) return (opt_FP16::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
|
||||
# define CV_TRY_FP16 1
|
||||
# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
|
||||
# define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
|
||||
#else
|
||||
# define CV_TRY_FP16 0
|
||||
# define CV_CPU_HAS_SUPPORT_FP16 0
|
||||
# define CV_CPU_CALL_FP16(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...) CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
|
||||
# define CV_TRY_AVX2 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX2 1
|
||||
# define CV_CPU_CALL_AVX2(fn, args) return (opt_AVX2::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
|
||||
# define CV_TRY_AVX2 1
|
||||
# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
|
||||
# define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
|
||||
#else
|
||||
# define CV_TRY_AVX2 0
|
||||
# define CV_CPU_HAS_SUPPORT_AVX2 0
|
||||
# define CV_CPU_CALL_AVX2(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...) CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
|
||||
# define CV_TRY_FMA3 1
|
||||
# define CV_CPU_HAS_SUPPORT_FMA3 1
|
||||
# define CV_CPU_CALL_FMA3(fn, args) return (opt_FMA3::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
|
||||
# define CV_TRY_FMA3 1
|
||||
# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
|
||||
# define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
|
||||
#else
|
||||
# define CV_TRY_FMA3 0
|
||||
# define CV_CPU_HAS_SUPPORT_FMA3 0
|
||||
# define CV_CPU_CALL_FMA3(fn, args)
|
||||
#endif
|
||||
#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
|
||||
|
||||
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
|
||||
# define CV_TRY_NEON 1
|
||||
# define CV_CPU_HAS_SUPPORT_NEON 1
|
||||
# define CV_CPU_CALL_NEON(fn, args) return (opt_NEON::fn args)
|
||||
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
|
||||
# define CV_TRY_NEON 1
|
||||
# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
|
||||
# define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
|
||||
#else
|
||||
# define CV_TRY_NEON 0
|
||||
# define CV_CPU_HAS_SUPPORT_NEON 0
|
||||
# define CV_CPU_CALL_NEON(fn, args)
|
||||
#endif
|
||||
|
@ -315,7 +315,7 @@ public:
|
||||
int inpCnAll = input.size[1], width = input.size[3], height = input.size[2];
|
||||
int inpCn = inpCnAll / ngroups;
|
||||
p.is1x1_ = kernel == Size(0,0) && pad == Size(0, 0);
|
||||
p.useAVX2 = checkHardwareSupport(CPU_AVX2);
|
||||
p.useAVX2 = CV_CPU_HAS_SUPPORT_AVX2;
|
||||
|
||||
int ncn = std::min(inpCn, (int)BLK_SIZE_CN);
|
||||
p.ofstab_.resize(kernel.width*kernel.height*ncn);
|
||||
@ -486,7 +486,7 @@ public:
|
||||
// now compute dot product of the weights
|
||||
// and im2row-transformed part of the tensor
|
||||
int bsz = ofs1 - ofs0;
|
||||
#if CV_DNN_TRY_AVX2
|
||||
#if CV_TRY_AVX2
|
||||
if(useAVX2)
|
||||
fastConv_avx2(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0,
|
||||
outShape, bsz, vsz, vsz_a, relu, cn0 == 0);
|
||||
@ -776,7 +776,7 @@ public:
|
||||
b_ = &b;
|
||||
c_ = &c;
|
||||
nstripes_ = nstripes;
|
||||
useAVX2 = checkHardwareSupport(CPU_AVX2);
|
||||
useAVX2 = CV_CPU_HAS_SUPPORT_AVX2;
|
||||
}
|
||||
|
||||
void operator()(const Range& range_) const
|
||||
@ -794,7 +794,7 @@ public:
|
||||
size_t bstep = b_->step1();
|
||||
size_t cstep = c_->step1();
|
||||
|
||||
#if CV_DNN_TRY_AVX2
|
||||
#if CV_TRY_AVX2
|
||||
if( useAVX2 )
|
||||
fastGEMM_avx2( aptr, astep, bptr, bstep, cptr, cstep, mmax, kmax, nmax );
|
||||
else
|
||||
|
@ -127,7 +127,7 @@ public:
|
||||
biasMat_ = &biasMat;
|
||||
dstMat_ = &dstMat;
|
||||
nstripes_ = nstripes;
|
||||
useAVX2_ = checkHardwareSupport(CPU_AVX2);
|
||||
useAVX2_ = CV_CPU_HAS_SUPPORT_AVX2;
|
||||
}
|
||||
|
||||
void operator()(const Range& r) const
|
||||
@ -161,7 +161,7 @@ public:
|
||||
|
||||
memcpy(sptr, sptr_, vecsize*sizeof(sptr[0]));
|
||||
|
||||
#if CV_DNN_TRY_AVX2
|
||||
#if CV_TRY_AVX2
|
||||
if( useAVX2_ )
|
||||
fastGEMM1T_avx2( sptr, wptr, wstep, biasptr, dptr, nw, vecsize);
|
||||
else
|
||||
|
@ -43,10 +43,6 @@
|
||||
#include "layers_common.hpp"
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
|
||||
#if CV_DNN_TRY_AVX2
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
namespace cv {
|
||||
namespace dnn {
|
||||
|
||||
@ -334,7 +330,6 @@ void fastGEMM_avx2( const float* aptr, size_t astep, const float* bptr,
|
||||
_mm256_storeu_ps(cptr3 + n + 8, d31);
|
||||
}
|
||||
}
|
||||
_mm256_zeroupper();
|
||||
|
||||
for( ; n < nb; n++ )
|
||||
{
|
||||
@ -350,9 +345,8 @@ void fastGEMM_avx2( const float* aptr, size_t astep, const float* bptr,
|
||||
cptr0[n] = d0;
|
||||
}
|
||||
}
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -63,9 +63,7 @@ void getConvPoolPaddings(const Size& inp, const Size& out,
|
||||
const Size &kernel, const Size &stride,
|
||||
const String &padMode, Size &pad);
|
||||
|
||||
#if CV_SSE2
|
||||
#define CV_DNN_TRY_AVX2 1
|
||||
|
||||
#if CV_TRY_AVX2
|
||||
void fastConv_avx2(const float* weights, size_t wstep, const float* bias,
|
||||
const float* rowbuf, float* output, const int* outShape,
|
||||
int blockSize, int vecsize, int vecsize_aligned,
|
||||
@ -76,9 +74,6 @@ void fastGEMM1T_avx2( const float* vec, const float* weights,
|
||||
void fastGEMM_avx2( const float* aptr, size_t astep, const float* bptr0,
|
||||
size_t bstep, float* cptr, size_t cstep,
|
||||
int ma, int na, int nb );
|
||||
|
||||
#else
|
||||
#define CV_DNN_TRY_AVX2 0
|
||||
#endif
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user