mirror of
https://github.com/opencv/opencv.git
synced 2024-12-30 04:58:54 +08:00
b0a3b42287
Enable SSE optimization
106 lines
4.4 KiB
Diff
106 lines
4.4 KiB
Diff
diff --git a/3rdparty/libpng/CMakeLists.txt b/3rdparty/libpng/CMakeLists.txt
|
|
index fee9c99..eaf2095 100644
|
|
--- a/3rdparty/libpng/CMakeLists.txt
|
|
+++ b/3rdparty/libpng/CMakeLists.txt
|
|
@@ -33,6 +33,11 @@ if(ENABLE_NEON)
|
|
add_definitions(-DPNG_ARM_NEON_OPT=2)
|
|
endif()
|
|
|
|
+if(ENABLE_SSE)
|
|
+ list(APPEND lib_srcs contrib/intel/intel_init.c contrib/intel/filter_sse2_intrinsics.c)
|
|
+ add_definitions(-DPNG_INTEL_SSE)
|
|
+endif()
|
|
+
|
|
# ----------------------------------------------------------------------------------
|
|
# Define the library target:
|
|
# ----------------------------------------------------------------------------------
|
|
diff --git a/3rdparty/libpng/pngpriv.h b/3rdparty/libpng/pngpriv.h
|
|
index fe3355d..c0aa785 100644
|
|
--- a/3rdparty/libpng/pngpriv.h
|
|
+++ b/3rdparty/libpng/pngpriv.h
|
|
@@ -182,6 +182,42 @@
|
|
# endif
|
|
#endif /* PNG_ARM_NEON_OPT > 0 */
|
|
|
|
+#ifndef PNG_INTEL_SSE_OPT
|
|
+# ifdef PNG_INTEL_SSE
|
|
+ /* Only check for SSE if the build configuration has been modified to
|
|
+ * enable SSE optimizations. This means that these optimizations will
|
|
+ * be off by default. See contrib/intel for more details.
|
|
+ */
|
|
+# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \
|
|
+ defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
|
|
+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
|
|
+# define PNG_INTEL_SSE_OPT 1
|
|
+# endif
|
|
+# endif
|
|
+#endif
|
|
+
|
|
+#if defined(PNG_INTEL_SSE_OPT) && PNG_INTEL_SSE_OPT > 0
|
|
+# ifndef PNG_INTEL_SSE_IMPLEMENTATION
|
|
+# if defined(__SSE4_1__) || defined(__AVX__)
|
|
+ /* We are not actually using AVX, but checking for AVX is the best
|
|
+ way we can detect SSE4.1 and SSSE3 on MSVC.
|
|
+ */
|
|
+# define PNG_INTEL_SSE_IMPLEMENTATION 3
|
|
+# elif defined(__SSSE3__)
|
|
+# define PNG_INTEL_SSE_IMPLEMENTATION 2
|
|
+# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
|
|
+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
|
|
+# define PNG_INTEL_SSE_IMPLEMENTATION 1
|
|
+# else
|
|
+# define PNG_INTEL_SSE_IMPLEMENTATION 0
|
|
+# endif
|
|
+# endif
|
|
+
|
|
+# if PNG_INTEL_SSE_IMPLEMENTATION > 0
|
|
+# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2
|
|
+# endif
|
|
+#endif
|
|
+
|
|
/* Is this a build of a DLL where compilation of the object modules requires
|
|
* different preprocessor settings to those required for a simple library? If
|
|
* so PNG_BUILD_DLL must be set.
|
|
@@ -457,7 +493,7 @@
|
|
|
|
/* Memory model/platform independent fns */
|
|
#ifndef PNG_ABORT
|
|
-# ifdef _WINDOWS_
|
|
+# if defined(_WINDOWS_) && !defined(WINRT)
|
|
# define PNG_ABORT() ExitProcess(0)
|
|
# else
|
|
# define PNG_ABORT() abort()
|
|
@@ -1190,6 +1226,21 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_neon,(png_row_infop
|
|
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_neon,(png_row_infop
|
|
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
|
|
#endif
|
|
+
|
|
+#if defined(PNG_INTEL_SSE_IMPLEMENTATION) && PNG_INTEL_SSE_IMPLEMENTATION > 0
|
|
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop
|
|
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
|
|
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop
|
|
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
|
|
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop
|
|
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
|
|
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop
|
|
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
|
|
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop
|
|
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
|
|
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop
|
|
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
|
|
+#endif
|
|
|
|
/* Choose the best filter to use and filter the row data */
|
|
PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr,
|
|
@@ -1919,6 +1970,10 @@ PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr,
|
|
PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_neon,
|
|
(png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
|
|
# endif
|
|
+# if defined(PNG_INTEL_SSE_IMPLEMENTATION) && PNG_INTEL_SSE_IMPLEMENTATION > 0
|
|
+PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2,
|
|
+ (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
|
|
+# endif
|
|
#endif
|
|
|
|
PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr,
|