mirror of
https://github.com/opencv/opencv.git
synced 2025-07-31 09:57:28 +08:00
![]() Backport to 4.x: patchNaNs() SIMD acceleration #24480 backport from #23098 connected PR in extra: [#1118@extra](https://github.com/opencv/opencv_extra/pull/1118) ### This PR contains: * new SIMD code for `patchNaNs()` * CPU perf test <details> <summary>Performance comparison</summary> Geometric mean (ms) |Name of Test|noopt|sse2|avx2|sse2 vs noopt (x-factor)|avx2 vs noopt (x-factor)| |---|:-:|:-:|:-:|:-:|:-:| |PatchNaNs::OCL_PatchNaNsFixture::(640x480, 32FC1)|0.019|0.017|0.018|1.11|1.07| |PatchNaNs::OCL_PatchNaNsFixture::(640x480, 32FC4)|0.037|0.037|0.033|1.00|1.10| |PatchNaNs::OCL_PatchNaNsFixture::(1280x720, 32FC1)|0.032|0.032|0.033|0.99|0.98| |PatchNaNs::OCL_PatchNaNsFixture::(1280x720, 32FC4)|0.072|0.072|0.070|1.00|1.03| |PatchNaNs::OCL_PatchNaNsFixture::(1920x1080, 32FC1)|0.051|0.051|0.050|1.00|1.01| |PatchNaNs::OCL_PatchNaNsFixture::(1920x1080, 32FC4)|0.137|0.138|0.128|0.99|1.06| |PatchNaNs::OCL_PatchNaNsFixture::(3840x2160, 32FC1)|0.137|0.128|0.129|1.07|1.06| |PatchNaNs::OCL_PatchNaNsFixture::(3840x2160, 32FC4)|0.450|0.450|0.448|1.00|1.01| |PatchNaNs::PatchNaNsFixture::(640x480, 32FC1)|0.149|0.029|0.020|5.13|7.44| |PatchNaNs::PatchNaNsFixture::(640x480, 32FC2)|0.304|0.058|0.040|5.25|7.65| |PatchNaNs::PatchNaNsFixture::(640x480, 32FC3)|0.448|0.086|0.059|5.22|7.55| |PatchNaNs::PatchNaNsFixture::(640x480, 32FC4)|0.601|0.133|0.083|4.51|7.23| |PatchNaNs::PatchNaNsFixture::(1280x720, 32FC1)|0.451|0.093|0.060|4.83|7.52| |PatchNaNs::PatchNaNsFixture::(1280x720, 32FC2)|0.892|0.184|0.126|4.85|7.06| |PatchNaNs::PatchNaNsFixture::(1280x720, 32FC3)|1.345|0.311|0.230|4.32|5.84| |PatchNaNs::PatchNaNsFixture::(1280x720, 32FC4)|1.831|0.546|0.436|3.35|4.20| |PatchNaNs::PatchNaNsFixture::(1920x1080, 32FC1)|1.017|0.250|0.160|4.06|6.35| |PatchNaNs::PatchNaNsFixture::(1920x1080, 32FC2)|2.077|0.646|0.605|3.21|3.43| |PatchNaNs::PatchNaNsFixture::(1920x1080, 32FC3)|3.134|1.053|0.961|2.97|3.26| |PatchNaNs::PatchNaNsFixture::(1920x1080, 32FC4)|4.222|1.436|1.288|2.94|3.28| |PatchNaNs::PatchNaNsFixture::(3840x2160, 32FC1)|4.225|1.401|1.277|3.01|3.31| |PatchNaNs::PatchNaNsFixture::(3840x2160, 32FC2)|8.310|2.953|2.635|2.81|3.15| |PatchNaNs::PatchNaNsFixture::(3840x2160, 32FC3)|12.396|4.455|4.252|2.78|2.92| |PatchNaNs::PatchNaNsFixture::(3840x2160, 32FC4)|17.174|5.831|5.824|2.95|2.95| </details> ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake |
||
---|---|---|
.. | ||
cuda | ||
opencl | ||
parallel | ||
utils | ||
algorithm.cpp | ||
alloc.cpp | ||
arithm_ipp.hpp | ||
arithm.cpp | ||
arithm.dispatch.cpp | ||
arithm.simd.hpp | ||
array.cpp | ||
async.cpp | ||
batch_distance.cpp | ||
bindings_utils.cpp | ||
buffer_area.cpp | ||
bufferpool.impl.hpp | ||
channels.cpp | ||
check.cpp | ||
command_line_parser.cpp | ||
conjugate_gradient.cpp | ||
convert_c.cpp | ||
convert_scale.dispatch.cpp | ||
convert_scale.simd.hpp | ||
convert.dispatch.cpp | ||
convert.hpp | ||
convert.simd.hpp | ||
copy.cpp | ||
count_non_zero.dispatch.cpp | ||
count_non_zero.simd.hpp | ||
cuda_gpu_mat_nd.cpp | ||
cuda_gpu_mat.cpp | ||
cuda_host_mem.cpp | ||
cuda_info.cpp | ||
cuda_stream.cpp | ||
datastructs.cpp | ||
directx.cpp | ||
directx.inc.hpp | ||
downhill_simplex.cpp | ||
dxt.cpp | ||
gl_core_3_1.cpp | ||
gl_core_3_1.hpp | ||
glob.cpp | ||
hal_internal.cpp | ||
hal_internal.hpp | ||
hal_replacement.hpp | ||
has_non_zero.dispatch.cpp | ||
has_non_zero.simd.hpp | ||
intel_gpu_gemm.inl.hpp | ||
kmeans.cpp | ||
lapack.cpp | ||
lda.cpp | ||
logger.cpp | ||
lpsolver.cpp | ||
lut.cpp | ||
mathfuncs_core.dispatch.cpp | ||
mathfuncs_core.simd.hpp | ||
mathfuncs.cpp | ||
mathfuncs.hpp | ||
matmul.dispatch.cpp | ||
matmul.simd.hpp | ||
matrix_c.cpp | ||
matrix_decomp.cpp | ||
matrix_expressions.cpp | ||
matrix_iterator.cpp | ||
matrix_operations.cpp | ||
matrix_sparse.cpp | ||
matrix_transform.cpp | ||
matrix_wrap.cpp | ||
matrix.cpp | ||
mean.dispatch.cpp | ||
mean.simd.hpp | ||
merge.dispatch.cpp | ||
merge.simd.hpp | ||
minmax.cpp | ||
norm.cpp | ||
ocl_disabled.impl.hpp | ||
ocl.cpp | ||
opengl.cpp | ||
out.cpp | ||
ovx.cpp | ||
parallel_impl.cpp | ||
parallel_impl.hpp | ||
parallel.cpp | ||
pca.cpp | ||
persistence_base64_encoding.cpp | ||
persistence_base64_encoding.hpp | ||
persistence_impl.hpp | ||
persistence_json.cpp | ||
persistence_types.cpp | ||
persistence_xml.cpp | ||
persistence_yml.cpp | ||
persistence.cpp | ||
persistence.hpp | ||
precomp.hpp | ||
rand.cpp | ||
softfloat.cpp | ||
split.dispatch.cpp | ||
split.simd.hpp | ||
stat_c.cpp | ||
stat.dispatch.cpp | ||
stat.hpp | ||
stat.simd.hpp | ||
stl.cpp | ||
sum.dispatch.cpp | ||
sum.simd.hpp | ||
system.cpp | ||
tables.cpp | ||
trace.cpp | ||
types.cpp | ||
umatrix.cpp | ||
umatrix.hpp | ||
va_intel.cpp | ||
va_wrapper.impl.hpp |