core: dispatch matmul

- gemm: keep baseline only (lapack is 10x+ faster, lets reduce binary size) - transform / distTransform - scaleAdd (32f/64f only) - Mahalanobis: keep baseline only (no perf tests) - mulTransposed: keep baseline only (no perf tests) - dot
2025-08-06 14:36:36 +08:00 · 2019-02-14 16:00:17 +03:00 · 2019-02-14 16:00:17 +03:00 · cd66f6e3db
commit cd66f6e3db
parent fbde57dba8
4 changed files with 308 additions and 3575 deletions
--- a/modules/core/CMakeLists.txt
+++ b/modules/core/CMakeLists.txt
@ -6,6 +6,7 @@ ocv_add_dispatched_file(arithm SSE2 SSE4_1 AVX2 VSX3)
 ocv_add_dispatched_file(convert SSE2 AVX2)
 ocv_add_dispatched_file(convert_scale SSE2 AVX2)
 ocv_add_dispatched_file(count_non_zero SSE2 AVX2)
+ocv_add_dispatched_file(matmul SSE2 AVX2)
 ocv_add_dispatched_file(sum SSE2 AVX2)

 # dispatching for accuracy tests
--- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h
+++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h
@ -15,6 +15,7 @@
 #define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
 #define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
 #define CV_CPU_OPTIMIZATION_NAMESPACE_END }
+#define CV_CPU_BASELINE_MODE 1
 #endif


--- a/modules/core/src/matmul.dispatch.cpp
+++ b/modules/core/src/matmul.dispatch.cpp
--- a/modules/core/src/matmul.simd.hpp
+++ b/modules/core/src/matmul.simd.hpp