From 657c17bb8c1af5b36a807b517c11a475c27eca5a Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 1 Nov 2019 15:07:49 +0300 Subject: [PATCH 01/14] cmake: fix ITT define condition --- modules/core/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index c8dfbc039d..1d6da94e20 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -39,7 +39,7 @@ if(HAVE_CUDA) ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wenum-compare -Wunused-function -Wshadow) endif() -if(CV_TRACE AND HAVE_ITT AND BUILD_ITT) +if(CV_TRACE AND HAVE_ITT) add_definitions(-DOPENCV_WITH_ITT=1) endif() From 21c38bbdaf772dd899e441b009187382ca64b212 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 2 Nov 2019 10:32:23 +0000 Subject: [PATCH 02/14] cmake(cpu optmizations): fix cleanup of OPENCV_DEPENDANT_TARGETS_* vars --- cmake/OpenCVCompilerOptimizations.cmake | 5 ++++- cmake/OpenCVModule.cmake | 1 - cmake/OpenCVUtils.cmake | 13 +++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake index de34aff4dd..9377cfc0b9 100644 --- a/cmake/OpenCVCompilerOptimizations.cmake +++ b/cmake/OpenCVCompilerOptimizations.cmake @@ -714,7 +714,10 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T foreach(OPT ${CPU_DISPATCH_FINAL}) if(__result_${OPT}) #message("${OPT}: ${__result_${OPT}}") - if(CMAKE_GENERATOR MATCHES "^Visual") + if(CMAKE_GENERATOR MATCHES "^Visual" + OR OPENCV_CMAKE_CPU_OPTIMIZATIONS_FORCE_TARGETS + ) + # MSVS generator is not able to properly order compilation flags: # extra flags are added before common flags, so switching between optimizations doesn't work correctly # Also CMAKE_CXX_FLAGS doesn't work (it is directory-based, so add_subdirectory is required) add_library(${TARGET_BASE_NAME}_${OPT} OBJECT ${__result_${OPT}}) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 8b1267d951..f83ae6fec4 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -63,7 +63,6 @@ foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MOD unset(OPENCV_MODULE_${mod}_PRIVATE_OPT_DEPS CACHE) unset(OPENCV_MODULE_${mod}_LINK_DEPS CACHE) unset(OPENCV_MODULE_${mod}_WRAPPERS CACHE) - unset(OPENCV_DEPENDANT_TARGETS_${mod} CACHE) endforeach() # clean modules info which needs to be recalculated diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index a876d9c02c..b61a301b6e 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -288,9 +288,22 @@ function(ocv_append_target_property target prop) endif() endfunction() +if(DEFINED OPENCV_DEPENDANT_TARGETS_LIST) + foreach(v ${OPENCV_DEPENDANT_TARGETS_LIST}) + unset(${v} CACHE) + endforeach() + unset(OPENCV_DEPENDANT_TARGETS_LIST CACHE) +endif() + function(ocv_append_dependant_targets target) #ocv_debug_message("ocv_append_dependant_targets(${target} ${ARGN})") _ocv_fix_target(target) + list(FIND OPENCV_DEPENDANT_TARGETS_LIST "OPENCV_DEPENDANT_TARGETS_${target}" __id) + if(__id EQUAL -1) + list(APPEND OPENCV_DEPENDANT_TARGETS_LIST "OPENCV_DEPENDANT_TARGETS_${target}") + list(SORT OPENCV_DEPENDANT_TARGETS_LIST) + set(OPENCV_DEPENDANT_TARGETS_LIST "${OPENCV_DEPENDANT_TARGETS_LIST}" CACHE INTERNAL "") + endif() set(OPENCV_DEPENDANT_TARGETS_${target} "${OPENCV_DEPENDANT_TARGETS_${target}};${ARGN}" CACHE INTERNAL "" FORCE) endfunction() From 6d5b900324ecea347f6518edeb15019579390c62 Mon Sep 17 00:00:00 2001 From: Igor Murzov Date: Wed, 30 Oct 2019 18:35:38 +0300 Subject: [PATCH 03/14] Simplify OpenCL info dumping code: * Reduce code nesting * Drop redundant .c_str() calls --- .../opencv2/core/opencl/opencl_info.hpp | 45 +++++++++---------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/modules/core/include/opencv2/core/opencl/opencl_info.hpp b/modules/core/include/opencv2/core/opencl/opencl_info.hpp index b5d37394a7..cbb5ec02ac 100644 --- a/modules/core/include/opencv2/core/opencl/opencl_info.hpp +++ b/modules/core/include/opencv2/core/opencl/opencl_info.hpp @@ -64,33 +64,30 @@ static void dumpOpenCLInformation() std::vector platforms; cv::ocl::getPlatfomsInfo(platforms); - if (platforms.size() > 0) - { - DUMP_MESSAGE_STDOUT("OpenCL Platforms: "); - for (size_t i = 0; i < platforms.size(); i++) - { - const PlatformInfo* platform = &platforms[i]; - DUMP_MESSAGE_STDOUT(" " << platform->name().c_str()); - Device current_device; - for (int j = 0; j < platform->deviceNumber(); j++) - { - platform->getDevice(current_device, j); - const char* deviceTypeStr = current_device.type() == Device::TYPE_CPU - ? ("CPU") : (current_device.type() == Device::TYPE_GPU ? current_device.hostUnifiedMemory() ? "iGPU" : "dGPU" : "unknown"); - DUMP_MESSAGE_STDOUT( " " << deviceTypeStr << ": " << current_device.name().c_str() << " (" << current_device.version().c_str() << ")"); - DUMP_CONFIG_PROPERTY( cv::format("cv_ocl_platform_%d_device_%d", (int)i, (int)j ), - cv::format("(Platform=%s)(Type=%s)(Name=%s)(Version=%s)", - platform->name().c_str(), deviceTypeStr, current_device.name().c_str(), current_device.version().c_str()) ); - } - } - } - else + if (platforms.empty()) { DUMP_MESSAGE_STDOUT("OpenCL is not available"); DUMP_CONFIG_PROPERTY("cv_ocl", "not available"); return; } + DUMP_MESSAGE_STDOUT("OpenCL Platforms: "); + for (size_t i = 0; i < platforms.size(); i++) + { + const PlatformInfo* platform = &platforms[i]; + DUMP_MESSAGE_STDOUT(" " << platform->name()); + Device current_device; + for (int j = 0; j < platform->deviceNumber(); j++) + { + platform->getDevice(current_device, j); + const char* deviceTypeStr = (current_device.type() == Device::TYPE_CPU) ? "CPU" : + (current_device.type() == Device::TYPE_GPU ? current_device.hostUnifiedMemory() ? "iGPU" : "dGPU" : "unknown"); + DUMP_MESSAGE_STDOUT( " " << deviceTypeStr << ": " << current_device.name() << " (" << current_device.version() << ")"); + DUMP_CONFIG_PROPERTY( cv::format("cv_ocl_platform_%d_device_%d", (int)i, j ), + cv::format("(Platform=%s)(Type=%s)(Name=%s)(Version=%s)", + platform->name().c_str(), deviceTypeStr, current_device.name().c_str(), current_device.version().c_str()) ); + } + } const Device& device = Device::getDefault(); if (!device.available()) CV_Error(Error::OpenCLInitError, "OpenCL device is not available"); @@ -102,8 +99,8 @@ static void dumpOpenCLInformation() DUMP_CONFIG_PROPERTY("cv_ocl_current_platformName", device.getPlatform().name()); #endif - const char* deviceTypeStr = device.type() == Device::TYPE_CPU - ? ("CPU") : (device.type() == Device::TYPE_GPU ? device.hostUnifiedMemory() ? "iGPU" : "dGPU" : "unknown"); + const char* deviceTypeStr = (device.type() == Device::TYPE_CPU) ? "CPU" : + (device.type() == Device::TYPE_GPU ? device.hostUnifiedMemory() ? "iGPU" : "dGPU" : "unknown"); DUMP_MESSAGE_STDOUT(" Type = " << deviceTypeStr); DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceType", deviceTypeStr); @@ -156,7 +153,7 @@ static void dumpOpenCLInformation() } pos = pos2 + 1; } - DUMP_CONFIG_PROPERTY("cv_ocl_current_extensions", extensionsStr.c_str()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_extensions", extensionsStr); const char* haveAmdBlasStr = haveAmdBlas() ? "Yes" : "No"; DUMP_MESSAGE_STDOUT(" Has AMD Blas = " << haveAmdBlasStr); From 2c2716de0f18e6a19fa4589d51fb2337054cbcd9 Mon Sep 17 00:00:00 2001 From: TH3CHARLie Date: Mon, 4 Nov 2019 21:27:48 +0800 Subject: [PATCH 04/14] core(test): add test for YAML parse multiple documents - added removal of temporary file --- modules/core/test/test_io.cpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/modules/core/test/test_io.cpp b/modules/core/test/test_io.cpp index 55e8f3499e..9744d5b425 100644 --- a/modules/core/test/test_io.cpp +++ b/modules/core/test/test_io.cpp @@ -1674,4 +1674,33 @@ TEST(Core_InputOutput, FileStorage_free_file_after_exception) ASSERT_EQ(0, std::remove(fileName.c_str())); } +TEST(Core_InputOutput, FileStorage_YAML_parse_multiple_documents) +{ + const std::string filename = "FileStorage_YAML_parse_multiple_documents.yml"; + FileStorage fs; + + fs.open(filename, FileStorage::WRITE); + fs << "a" << 42; + fs.release(); + + fs.open(filename, FileStorage::APPEND); + fs << "b" << 1988; + fs.release(); + + fs.open(filename, FileStorage::READ); + + EXPECT_EQ(42, (int)fs["a"]); + EXPECT_EQ(1988, (int)fs["b"]); + + EXPECT_EQ(42, (int)fs.root(0)["a"]); + EXPECT_TRUE(fs.root(0)["b"].empty()); + + EXPECT_TRUE(fs.root(1)["a"].empty()); + EXPECT_EQ(1988, (int)fs.root(1)["b"]); + + fs.release(); + + ASSERT_EQ(0, std::remove(filename.c_str())); +} + }} // namespace From 2112aa31e6f9d2687b590c2d98cee5ab5972ecfb Mon Sep 17 00:00:00 2001 From: Chip Kerchner <49959681+ChipKerchner@users.noreply.github.com> Date: Tue, 5 Nov 2019 10:52:35 -0500 Subject: [PATCH 05/14] Merge pull request #15828 from ChipKerchner:momentsToHal * Convert moments in tile algorithms to HAL (1.3x faster for VSX). * Adding NEON code back in for non 64-bit platforms. * Remove floats from post processing. --- modules/imgproc/src/moments.cpp | 160 +++++++------------------------- 1 file changed, 35 insertions(+), 125 deletions(-) diff --git a/modules/imgproc/src/moments.cpp b/modules/imgproc/src/moments.cpp index 1f1e5922cd..9e7e6d2dfd 100644 --- a/modules/imgproc/src/moments.cpp +++ b/modules/imgproc/src/moments.cpp @@ -38,8 +38,10 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ + #include "precomp.hpp" #include "opencl_kernels_imgproc.hpp" +#include "opencv2/core/hal/intrin.hpp" namespace cv { @@ -211,7 +213,7 @@ struct MomentsInTile_SIMD } }; -#if CV_SSE2 +#if CV_SIMD128 template <> struct MomentsInTile_SIMD @@ -226,115 +228,33 @@ struct MomentsInTile_SIMD int x = 0; { - __m128i dx = _mm_set1_epi16(8); - __m128i z = _mm_setzero_si128(), qx0 = z, qx1 = z, qx2 = z, qx3 = z, qx = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); + v_int16x8 dx = v_setall_s16(8), qx = v_int16x8(0, 1, 2, 3, 4, 5, 6, 7); + v_uint32x4 z = v_setzero_u32(), qx0 = z, qx1 = z, qx2 = z, qx3 = z; for( ; x <= len - 8; x += 8 ) { - __m128i p = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr + x)), z); - __m128i sx = _mm_mullo_epi16(qx, qx); + v_int16x8 p = v_reinterpret_as_s16(v_load_expand(ptr + x)); + v_int16x8 sx = v_mul_wrap(qx, qx); - qx0 = _mm_add_epi16(qx0, p); - qx1 = _mm_add_epi32(qx1, _mm_madd_epi16(p, qx)); - qx2 = _mm_add_epi32(qx2, _mm_madd_epi16(p, sx)); - qx3 = _mm_add_epi32(qx3, _mm_madd_epi16( _mm_mullo_epi16(p, qx), sx)); + qx0 += v_reinterpret_as_u32(p); + qx1 = v_reinterpret_as_u32(v_dotprod(p, qx, v_reinterpret_as_s32(qx1))); + qx2 = v_reinterpret_as_u32(v_dotprod(p, sx, v_reinterpret_as_s32(qx2))); + qx3 = v_reinterpret_as_u32(v_dotprod(v_mul_wrap(p, qx), sx, v_reinterpret_as_s32(qx3))); - qx = _mm_add_epi16(qx, dx); + qx += dx; } - __m128i qx01_lo = _mm_unpacklo_epi32(qx0, qx1); - __m128i qx23_lo = _mm_unpacklo_epi32(qx2, qx3); - __m128i qx01_hi = _mm_unpackhi_epi32(qx0, qx1); - __m128i qx23_hi = _mm_unpackhi_epi32(qx2, qx3); - qx01_lo = _mm_add_epi32(qx01_lo, qx01_hi); - qx23_lo = _mm_add_epi32(qx23_lo, qx23_hi); - __m128i qx0123_lo = _mm_unpacklo_epi64(qx01_lo, qx23_lo); - __m128i qx0123_hi = _mm_unpackhi_epi64(qx01_lo, qx23_lo); - qx0123_lo = _mm_add_epi32(qx0123_lo, qx0123_hi); - _mm_store_si128((__m128i*)buf, qx0123_lo); - - x0 = (buf[0] & 0xffff) + (buf[0] >> 16); - x1 = buf[1]; - x2 = buf[2]; - x3 = buf[3]; + x0 = v_reduce_sum(qx0); + x0 = (x0 & 0xffff) + (x0 >> 16); + x1 = v_reduce_sum(qx1); + x2 = v_reduce_sum(qx2); + x3 = v_reduce_sum(qx3); } return x; } - - int CV_DECL_ALIGNED(16) buf[4]; }; -#elif CV_NEON - -template <> -struct MomentsInTile_SIMD -{ - MomentsInTile_SIMD() - { - ushort CV_DECL_ALIGNED(8) init[4] = { 0, 1, 2, 3 }; - qx_init = vld1_u16(init); - v_step = vdup_n_u16(4); - } - - int operator() (const uchar * ptr, int len, int & x0, int & x1, int & x2, int & x3) - { - int x = 0; - - uint32x4_t v_z = vdupq_n_u32(0), v_x0 = v_z, v_x1 = v_z, - v_x2 = v_z, v_x3 = v_z; - uint16x4_t qx = qx_init; - - for( ; x <= len - 8; x += 8 ) - { - uint16x8_t v_src = vmovl_u8(vld1_u8(ptr + x)); - - // first part - uint32x4_t v_qx = vmovl_u16(qx); - uint16x4_t v_p = vget_low_u16(v_src); - uint32x4_t v_px = vmull_u16(qx, v_p); - - v_x0 = vaddw_u16(v_x0, v_p); - v_x1 = vaddq_u32(v_x1, v_px); - v_px = vmulq_u32(v_px, v_qx); - v_x2 = vaddq_u32(v_x2, v_px); - v_x3 = vaddq_u32(v_x3, vmulq_u32(v_px, v_qx)); - qx = vadd_u16(qx, v_step); - - // second part - v_qx = vmovl_u16(qx); - v_p = vget_high_u16(v_src); - v_px = vmull_u16(qx, v_p); - - v_x0 = vaddw_u16(v_x0, v_p); - v_x1 = vaddq_u32(v_x1, v_px); - v_px = vmulq_u32(v_px, v_qx); - v_x2 = vaddq_u32(v_x2, v_px); - v_x3 = vaddq_u32(v_x3, vmulq_u32(v_px, v_qx)); - - qx = vadd_u16(qx, v_step); - } - - vst1q_u32(buf, v_x0); - x0 = buf[0] + buf[1] + buf[2] + buf[3]; - vst1q_u32(buf, v_x1); - x1 = buf[0] + buf[1] + buf[2] + buf[3]; - vst1q_u32(buf, v_x2); - x2 = buf[0] + buf[1] + buf[2] + buf[3]; - vst1q_u32(buf, v_x3); - x3 = buf[0] + buf[1] + buf[2] + buf[3]; - - return x; - } - - uint CV_DECL_ALIGNED(16) buf[4]; - uint16x4_t qx_init, v_step; -}; - -#endif - -#if CV_SSE4_1 - template <> struct MomentsInTile_SIMD { @@ -348,49 +268,39 @@ struct MomentsInTile_SIMD int x = 0; { - __m128i v_delta = _mm_set1_epi32(4), v_zero = _mm_setzero_si128(), v_x0 = v_zero, - v_x1 = v_zero, v_x2 = v_zero, v_x3 = v_zero, v_ix0 = _mm_setr_epi32(0, 1, 2, 3); + v_int32x4 v_delta = v_setall_s32(4), v_ix0 = v_int32x4(0, 1, 2, 3); + v_uint32x4 z = v_setzero_u32(), v_x0 = z, v_x1 = z, v_x2 = z; + v_uint64x2 v_x3 = v_reinterpret_as_u64(z); for( ; x <= len - 4; x += 4 ) { - __m128i v_src = _mm_loadl_epi64((const __m128i *)(ptr + x)); - v_src = _mm_unpacklo_epi16(v_src, v_zero); + v_int32x4 v_src = v_reinterpret_as_s32(v_load_expand(ptr + x)); - v_x0 = _mm_add_epi32(v_x0, v_src); - v_x1 = _mm_add_epi32(v_x1, _mm_mullo_epi32(v_src, v_ix0)); + v_x0 += v_reinterpret_as_u32(v_src); + v_x1 += v_reinterpret_as_u32(v_src * v_ix0); - __m128i v_ix1 = _mm_mullo_epi32(v_ix0, v_ix0); - v_x2 = _mm_add_epi32(v_x2, _mm_mullo_epi32(v_src, v_ix1)); + v_int32x4 v_ix1 = v_ix0 * v_ix0; + v_x2 += v_reinterpret_as_u32(v_src * v_ix1); - v_ix1 = _mm_mullo_epi32(v_ix0, v_ix1); - v_src = _mm_mullo_epi32(v_src, v_ix1); - v_x3 = _mm_add_epi64(v_x3, _mm_add_epi64(_mm_unpacklo_epi32(v_src, v_zero), _mm_unpackhi_epi32(v_src, v_zero))); + v_ix1 = v_ix0 * v_ix1; + v_src = v_src * v_ix1; + v_uint64x2 v_lo, v_hi; + v_expand(v_reinterpret_as_u32(v_src), v_lo, v_hi); + v_x3 += v_lo + v_hi; - v_ix0 = _mm_add_epi32(v_ix0, v_delta); + v_ix0 += v_delta; } - __m128i v_x01_lo = _mm_unpacklo_epi32(v_x0, v_x1); - __m128i v_x22_lo = _mm_unpacklo_epi32(v_x2, v_x2); - __m128i v_x01_hi = _mm_unpackhi_epi32(v_x0, v_x1); - __m128i v_x22_hi = _mm_unpackhi_epi32(v_x2, v_x2); - v_x01_lo = _mm_add_epi32(v_x01_lo, v_x01_hi); - v_x22_lo = _mm_add_epi32(v_x22_lo, v_x22_hi); - __m128i v_x0122_lo = _mm_unpacklo_epi64(v_x01_lo, v_x22_lo); - __m128i v_x0122_hi = _mm_unpackhi_epi64(v_x01_lo, v_x22_lo); - v_x0122_lo = _mm_add_epi32(v_x0122_lo, v_x0122_hi); - _mm_store_si128((__m128i*)buf64, v_x3); - _mm_store_si128((__m128i*)buf, v_x0122_lo); - - x0 = buf[0]; - x1 = buf[1]; - x2 = buf[2]; + x0 = v_reduce_sum(v_x0); + x1 = v_reduce_sum(v_x1); + x2 = v_reduce_sum(v_x2); + v_store_aligned(buf64, v_reinterpret_as_s64(v_x3)); x3 = buf64[0] + buf64[1]; } return x; } - int CV_DECL_ALIGNED(16) buf[4]; int64 CV_DECL_ALIGNED(16) buf64[2]; }; From cdbfdcc36327f593b1c96b8f6a1f4a21ecc0fca7 Mon Sep 17 00:00:00 2001 From: Igor Murzov Date: Wed, 30 Oct 2019 18:24:32 +0300 Subject: [PATCH 06/14] Fix OpenCL device detection when some OpenCL platform has no devices It's not an error if some OpenCL platform has no devices. This makes OpenCL device detection work correctly in the following scenario: $ OPENCV_OPENCL_DEVICE=:GPU: ./opencv_test_dnn OpenCV version: 4.1.2-dev OpenCV VCS version: 4.1.2-80-g467748ee98-dirty Build type: Debug Compiler: /usr/bin/g++ (ver 7.4.0) Parallel framework: pthreads CPU features: SSE SSE2 SSE3 *SSE4.1 *SSE4.2 *FP16 *AVX *AVX2 *AVX512-SKX? Intel(R) IPP version: ippIP AVX2 (l9) 2019.0.0 Gold (-) Jul 24 2018 OpenCL Platforms: AMD Accelerated Parallel Processing Portable Computing Language CPU: pthread-AMD Ryzen 7 2700X Eight-Core Processor (OpenCL 1.2 pocl HSTR: pthread-x86_64-pc-linux-gnu-znver1) NVIDIA CUDA dGPU: GeForce GTX 1080 (OpenCL 1.2 CUDA) Current OpenCL device: Type = dGPU Name = GeForce GTX 1080 Version = OpenCL 1.2 CUDA Driver version = 430.26 --- modules/core/src/ocl.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 8ebb0064a9..7780364f1c 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -2037,16 +2037,25 @@ struct Context::Impl 0 }; - cl_uint i, nd0 = 0, nd = 0; + cl_uint nd0 = 0; int dtype = dtype0 & 15; - CV_OCL_DBG_CHECK(clGetDeviceIDs(pl, dtype, 0, 0, &nd0)); + cl_int status = clGetDeviceIDs(pl, dtype, 0, NULL, &nd0); + if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices + { + CV_OCL_DBG_CHECK_RESULT(status, + cv::format("clGetDeviceIDs(platform=%p, device_type=%d, num_entries=0, devices=NULL, numDevices=%p)", pl, dtype, &nd0).c_str()); + } + + if (nd0 == 0) + return; AutoBuffer dlistbuf(nd0*2+1); cl_device_id* dlist = (cl_device_id*)dlistbuf.data(); cl_device_id* dlist_new = dlist + nd0; CV_OCL_DBG_CHECK(clGetDeviceIDs(pl, dtype, nd0, dlist, &nd0)); - String name0; + cl_uint i, nd = 0; + String name0; for(i = 0; i < nd0; i++) { Device d(dlist[i]); @@ -5944,7 +5953,12 @@ void convertFromImage(void* cl_mem_image, UMat& dst) static void getDevices(std::vector& devices, cl_platform_id platform) { cl_uint numDevices = 0; - CV_OCL_DBG_CHECK(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 0, NULL, &numDevices)); + cl_int status = clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 0, NULL, &numDevices); + if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices + { + CV_OCL_DBG_CHECK_RESULT(status, + cv::format("clGetDeviceIDs(platform, Device::TYPE_ALL, num_entries=0, devices=NULL, numDevices=%p)", &numDevices).c_str()); + } if (numDevices == 0) { From 2e20f06f8edddfa5961f65e260ec8157a0aa8de8 Mon Sep 17 00:00:00 2001 From: SSE4 Date: Tue, 5 Nov 2019 16:13:21 +0700 Subject: [PATCH 07/14] - fix FindOpenEXR to respect OPENEXR_ROOT Signed-off-by: SSE4 --- cmake/OpenCVFindOpenEXR.cmake | 114 ++++++++++++++++++++++------------ 1 file changed, 74 insertions(+), 40 deletions(-) diff --git a/cmake/OpenCVFindOpenEXR.cmake b/cmake/OpenCVFindOpenEXR.cmake index c0a46806e1..39a0c7dbce 100644 --- a/cmake/OpenCVFindOpenEXR.cmake +++ b/cmake/OpenCVFindOpenEXR.cmake @@ -20,55 +20,89 @@ if(WIN32) elseif(MSVC) SET(OPENEXR_LIBSEARCH_SUFFIXES Win32/Release Win32 Win32/Debug) endif() -else() - set(OPENEXR_ROOT "") endif() -SET(LIBRARY_PATHS - /usr/lib - /usr/local/lib - /sw/lib - /opt/local/lib - "${ProgramFiles_ENV_PATH}/OpenEXR/lib/static" - "${OPENEXR_ROOT}/lib") +SET(SEARCH_PATHS + "${OPENEXR_ROOT}" + /usr + /usr/local + /sw + /opt + "${ProgramFiles_ENV_PATH}/OpenEXR") -FIND_PATH(OPENEXR_INCLUDE_PATH ImfRgbaFile.h - PATH_SUFFIXES OpenEXR - PATHS - /usr/include - /usr/local/include - /sw/include - /opt/local/include - "${ProgramFiles_ENV_PATH}/OpenEXR/include" - "${OPENEXR_ROOT}/include") +MACRO(FIND_OPENEXR_LIBRARY LIBRARY_NAME LIBRARY_SUFFIX) + string(TOUPPER "${LIBRARY_NAME}" LIBRARY_NAME_UPPER) + FIND_LIBRARY(OPENEXR_${LIBRARY_NAME_UPPER}_LIBRARY + NAMES ${LIBRARY_NAME}${LIBRARY_SUFFIX} + PATH_SUFFIXES ${OPENEXR_LIBSEARCH_SUFFIXES} + NO_DEFAULT_PATH + PATHS "${SEARCH_PATH}/lib" "${SEARCH_PATH}/lib/static") +ENDMACRO() -FIND_LIBRARY(OPENEXR_HALF_LIBRARY - NAMES Half - PATH_SUFFIXES ${OPENEXR_LIBSEARCH_SUFFIXES} - PATHS ${LIBRARY_PATHS}) +FOREACH(SEARCH_PATH ${SEARCH_PATHS}) + FIND_PATH(OPENEXR_INCLUDE_PATH ImfRgbaFile.h + PATH_SUFFIXES OpenEXR + NO_DEFAULT_PATH + PATHS + "${SEARCH_PATH}/include") -FIND_LIBRARY(OPENEXR_IEX_LIBRARY - NAMES Iex - PATH_SUFFIXES ${OPENEXR_LIBSEARCH_SUFFIXES} - PATHS ${LIBRARY_PATHS}) + IF (OPENEXR_INCLUDE_PATH) + SET(OPENEXR_VERSION_FILE "${OPENEXR_INCLUDE_PATH}/OpenEXRConfig.h") + IF (EXISTS ${OPENEXR_VERSION_FILE}) + FILE (STRINGS ${OPENEXR_VERSION_FILE} contents REGEX "#define OPENEXR_VERSION_MAJOR ") + IF (${contents} MATCHES "#define OPENEXR_VERSION_MAJOR ([0-9]+)") + SET(OPENEXR_VERSION_MAJOR "${CMAKE_MATCH_1}") + ENDIF () + FILE (STRINGS ${OPENEXR_VERSION_FILE} contents REGEX "#define OPENEXR_VERSION_MINOR ") + IF (${contents} MATCHES "#define OPENEXR_VERSION_MINOR ([0-9]+)") + SET(OPENEXR_VERSION_MINOR "${CMAKE_MATCH_1}") + ENDIF () + ENDIF () + ENDIF () -FIND_LIBRARY(OPENEXR_IMATH_LIBRARY - NAMES Imath - PATH_SUFFIXES ${OPENEXR_LIBSEARCH_SUFFIXES} - PATHS ${LIBRARY_PATHS}) + IF (OPENEXR_VERSION_MAJOR AND OPENEXR_VERSION_MINOR) + set(OPENEXR_VERSION "${OPENEXR_VERSION_MAJOR}_${OPENEXR_VERSION_MINOR}") + ENDIF () -FIND_LIBRARY(OPENEXR_ILMIMF_LIBRARY - NAMES IlmImf - PATH_SUFFIXES ${OPENEXR_LIBSEARCH_SUFFIXES} - PATHS ${LIBRARY_PATHS}) + SET(LIBRARY_SUFFIXES + "-${OPENEXR_VERSION}" + "-${OPENEXR_VERSION}_s" + "-${OPENEXR_VERSION}_d" + "-${OPEXEXR_VERSION}_s_d" + "" + "_s" + "_d" + "_s_d") -FIND_LIBRARY(OPENEXR_ILMTHREAD_LIBRARY - NAMES IlmThread - PATH_SUFFIXES ${OPENEXR_LIBSEARCH_SUFFIXES} - PATHS ${LIBRARY_PATHS}) + FOREACH(LIBRARY_SUFFIX ${LIBRARY_SUFFIXES}) + FIND_OPENEXR_LIBRARY("Half" ${LIBRARY_SUFFIX}) + FIND_OPENEXR_LIBRARY("Iex" ${LIBRARY_SUFFIX}) + FIND_OPENEXR_LIBRARY("Imath" ${LIBRARY_SUFFIX}) + FIND_OPENEXR_LIBRARY("IlmImf" ${LIBRARY_SUFFIX}) + FIND_OPENEXR_LIBRARY("IlmThread" ${LIBRARY_SUFFIX}) + IF (OPENEXR_INCLUDE_PATH AND OPENEXR_IMATH_LIBRARY AND OPENEXR_ILMIMF_LIBRARY AND OPENEXR_IEX_LIBRARY AND OPENEXR_HALF_LIBRARY) + SET(OPENEXR_FOUND TRUE) + BREAK() + ENDIF() + UNSET(OPENEXR_IMATH_LIBRARY) + UNSET(OPENEXR_ILMIMF_LIBRARY) + UNSET(OPENEXR_IEX_LIBRARY) + UNSET(OPENEXR_ILMTHREAD_LIBRARY) + UNSET(OPENEXR_HALF_LIBRARY) + ENDFOREACH() -IF (OPENEXR_INCLUDE_PATH AND OPENEXR_IMATH_LIBRARY AND OPENEXR_ILMIMF_LIBRARY AND OPENEXR_IEX_LIBRARY AND OPENEXR_HALF_LIBRARY) - SET(OPENEXR_FOUND TRUE) + IF (OPENEXR_FOUND) + BREAK() + ENDIF() + + UNSET(OPENEXR_INCLUDE_PATH) + UNSET(OPENEXR_VERSION_FILE) + UNSET(OPENEXR_VERSION_MAJOR) + UNSET(OPENEXR_VERSION_MINOR) + UNSET(OPENEXR_VERSION) +ENDFOREACH() + +IF (OPENEXR_FOUND) SET(OPENEXR_INCLUDE_PATHS ${OPENEXR_INCLUDE_PATH} CACHE PATH "The include paths needed to use OpenEXR") SET(OPENEXR_LIBRARIES ${OPENEXR_IMATH_LIBRARY} ${OPENEXR_ILMIMF_LIBRARY} ${OPENEXR_IEX_LIBRARY} ${OPENEXR_HALF_LIBRARY} ${OPENEXR_ILMTHREAD_LIBRARY} CACHE STRING "The libraries needed to use OpenEXR" FORCE) ENDIF () From 07ef08e966d3fb44eafa270a6e8a613777406086 Mon Sep 17 00:00:00 2001 From: czgdp1807 Date: Wed, 6 Nov 2019 17:15:31 +0530 Subject: [PATCH 08/14] removed typo --- modules/cudaoptflow/src/pyrlk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cudaoptflow/src/pyrlk.cpp b/modules/cudaoptflow/src/pyrlk.cpp index 881c620143..1020977903 100644 --- a/modules/cudaoptflow/src/pyrlk.cpp +++ b/modules/cudaoptflow/src/pyrlk.cpp @@ -333,7 +333,7 @@ namespace GpuMat& nextPts = _nextPts.getGpuMatRef(); GpuMat& status = _status.getGpuMatRef(); GpuMat* err = _err.needed() ? &(_err.getGpuMatRef()) : NULL; - if (_prevImg.kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT && _prevImg.kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT) + if (_prevImg.kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT && _nextImg.kind() == _InputArray::STD_VECTOR_CUDA_GPU_MAT) { std::vector prevPyr, nextPyr; _prevImg.getGpuMatVector(prevPyr); From 7c4158d8c2efc302f58e84f43e13743172d8020a Mon Sep 17 00:00:00 2001 From: Dimitri Gerin Date: Wed, 6 Nov 2019 21:05:35 +0300 Subject: [PATCH 09/14] Fix dnn::getLayerInputs --- modules/dnn/src/dnn.cpp | 9 +++------ modules/dnn/test/test_misc.cpp | 2 ++ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index ad2cd73807..f134cf6813 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -3189,14 +3189,11 @@ Ptr Net::getLayer(LayerId layerId) std::vector > Net::getLayerInputs(LayerId layerId) { LayerData &ld = impl->getLayerData(layerId); - if (!ld.layerInstance) - CV_Error(Error::StsNullPtr, format("Requested layer \"%s\" was not initialized", ld.name.c_str())); std::vector > inputLayers; - inputLayers.reserve(ld.inputLayersId.size()); - std::set::iterator it; - for (it = ld.inputLayersId.begin(); it != ld.inputLayersId.end(); ++it) { - inputLayers.push_back(getLayer(*it)); + inputLayers.reserve(ld.inputBlobsId.size()); + for (int i = 0; i < ld.inputBlobsId.size(); ++i) { + inputLayers.push_back(getLayer(ld.inputBlobsId[i].lid)); } return inputLayers; } diff --git a/modules/dnn/test/test_misc.cpp b/modules/dnn/test/test_misc.cpp index e2a6af735b..3e72cfe58e 100644 --- a/modules/dnn/test/test_misc.cpp +++ b/modules/dnn/test/test_misc.cpp @@ -86,6 +86,8 @@ TEST_P(dump, Regression) Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt"), findDataFile("dnn/squeezenet_v1.1.caffemodel", false)); + ASSERT_EQ(net.getLayerInputs(net.getLayerId("fire2/concat")).size(), 2); + int size[] = {1, 3, 227, 227}; Mat input = cv::Mat::ones(4, size, CV_32F); net.setInput(input); From 35cebbd1675f8e64e3babc4b45db024dfd2725b7 Mon Sep 17 00:00:00 2001 From: collinbrake <42008700+collinbrake@users.noreply.github.com> Date: Fri, 8 Nov 2019 04:27:37 -0500 Subject: [PATCH 10/14] Merge pull request #15832 from collinbrake:feature_grammar_fixes_4 * Grammar fixes for python core operations docs * fixed whitespace error * reverted changes --- .../py_basic_ops/py_basic_ops.markdown | 43 ++++++------ .../py_image_arithmetics.markdown | 34 +++++---- .../py_optimization/py_optimization.markdown | 70 +++++++++---------- 3 files changed, 71 insertions(+), 76 deletions(-) diff --git a/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown b/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown index 1d0ebb3967..4c6aa4bb92 100644 --- a/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown +++ b/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.markdown @@ -8,13 +8,13 @@ Learn to: - Access pixel values and modify them - Access image properties -- Setting Region of Interest (ROI) -- Splitting and Merging images +- Set a Region of Interest (ROI) +- Split and merge images -Almost all the operations in this section is mainly related to Numpy rather than OpenCV. A good +Almost all the operations in this section are mainly related to Numpy rather than OpenCV. A good knowledge of Numpy is required to write better optimized code with OpenCV. -*( Examples will be shown in Python terminal since most of them are just single line codes )* +*( Examples will be shown in a Python terminal, since most of them are just single lines of code )* Accessing and Modifying pixel values ------------------------------------ @@ -45,15 +45,15 @@ You can modify the pixel values the same way. [255 255 255] @endcode -**warning** +**Warning** -Numpy is a optimized library for fast array calculations. So simply accessing each and every pixel -values and modifying it will be very slow and it is discouraged. +Numpy is an optimized library for fast array calculations. So simply accessing each and every pixel +value and modifying it will be very slow and it is discouraged. @note The above method is normally used for selecting a region of an array, say the first 5 rows and last 3 columns. For individual pixel access, the Numpy array methods, array.item() and -array.itemset() are considered better, however they always return a scalar. If you want to access -all B,G,R values, you need to call array.item() separately for all. +array.itemset() are considered better. They always return a scalar, however, so if you want to access +all the B,G,R values, you will need to call array.item() separately for each value. Better pixel accessing and editing method : @code{.py} @@ -70,11 +70,10 @@ Better pixel accessing and editing method : Accessing Image Properties -------------------------- -Image properties include number of rows, columns and channels, type of image data, number of pixels -etc. +Image properties include number of rows, columns, and channels; type of image data; number of pixels; etc. -The shape of an image is accessed by img.shape. It returns a tuple of number of rows, columns, and channels -(if image is color): +The shape of an image is accessed by img.shape. It returns a tuple of the number of rows, columns, and channels +(if the image is color): @code{.py} >>> print( img.shape ) (342, 548, 3) @@ -95,13 +94,13 @@ uint8 @endcode @note img.dtype is very important while debugging because a large number of errors in OpenCV-Python -code is caused by invalid datatype. +code are caused by invalid datatype. Image ROI --------- -Sometimes, you will have to play with certain region of images. For eye detection in images, first -face detection is done all over the image. When a face is obtained, we select the face region alone +Sometimes, you will have to play with certain regions of images. For eye detection in images, first +face detection is done over the entire image. When a face is obtained, we select the face region alone and search for eyes inside it instead of searching the whole image. It improves accuracy (because eyes are always on faces :D ) and performance (because we search in a small area). @@ -118,9 +117,9 @@ Check the results below: Splitting and Merging Image Channels ------------------------------------ -Sometimes you will need to work separately on B,G,R channels of image. In this case, you need -to split the BGR images to single channels. In other cases, you may need to join these individual -channels to a BGR image. You can do it simply by: +Sometimes you will need to work separately on the B,G,R channels of an image. In this case, you need +to split the BGR image into single channels. In other cases, you may need to join these individual +channels to create a BGR image. You can do this simply by: @code{.py} >>> b,g,r = cv.split(img) >>> img = cv.merge((b,g,r)) @@ -129,7 +128,7 @@ Or @code >>> b = img[:,:,0] @endcode -Suppose you want to set all the red pixels to zero, you do not need to split the channels first. +Suppose you want to set all the red pixels to zero - you do not need to split the channels first. Numpy indexing is faster: @code{.py} >>> img[:,:,2] = 0 @@ -137,13 +136,13 @@ Numpy indexing is faster: **Warning** -cv.split() is a costly operation (in terms of time). So do it only if you need it. Otherwise go +cv.split() is a costly operation (in terms of time). So use it only if necessary. Otherwise go for Numpy indexing. Making Borders for Images (Padding) ----------------------------------- -If you want to create a border around the image, something like a photo frame, you can use +If you want to create a border around an image, something like a photo frame, you can use **cv.copyMakeBorder()**. But it has more applications for convolution operation, zero padding etc. This function takes following arguments: diff --git a/doc/py_tutorials/py_core/py_image_arithmetics/py_image_arithmetics.markdown b/doc/py_tutorials/py_core/py_image_arithmetics/py_image_arithmetics.markdown index b90982a4c3..d08d974c2f 100644 --- a/doc/py_tutorials/py_core/py_image_arithmetics/py_image_arithmetics.markdown +++ b/doc/py_tutorials/py_core/py_image_arithmetics/py_image_arithmetics.markdown @@ -4,21 +4,20 @@ Arithmetic Operations on Images {#tutorial_py_image_arithmetics} Goal ---- -- Learn several arithmetic operations on images like addition, subtraction, bitwise operations - etc. -- You will learn these functions : **cv.add()**, **cv.addWeighted()** etc. +- Learn several arithmetic operations on images, like addition, subtraction, bitwise operations, and etc. +- Learn these functions: **cv.add()**, **cv.addWeighted()**, etc. Image Addition -------------- -You can add two images by OpenCV function, cv.add() or simply by numpy operation, -res = img1 + img2. Both images should be of same depth and type, or second image can just be a +You can add two images with the OpenCV function, cv.add(), or simply by the numpy operation +res = img1 + img2. Both images should be of same depth and type, or the second image can just be a scalar value. @note There is a difference between OpenCV addition and Numpy addition. OpenCV addition is a saturated operation while Numpy addition is a modulo operation. -For example, consider below sample: +For example, consider the below sample: @code{.py} >>> x = np.uint8([250]) >>> y = np.uint8([10]) @@ -29,13 +28,12 @@ For example, consider below sample: >>> print( x+y ) # 250+10 = 260 % 256 = 4 [4] @endcode -It will be more visible when you add two images. OpenCV function will provide a better result. So -always better stick to OpenCV functions. +This will be more visible when you add two images. Stick with OpenCV functions, because they will provide a better result. Image Blending -------------- -This is also image addition, but different weights are given to images so that it gives a feeling of +This is also image addition, but different weights are given to images in order to give a feeling of blending or transparency. Images are added as per the equation below: \f[g(x) = (1 - \alpha)f_{0}(x) + \alpha f_{1}(x)\f] @@ -43,8 +41,8 @@ blending or transparency. Images are added as per the equation below: By varying \f$\alpha\f$ from \f$0 \rightarrow 1\f$, you can perform a cool transition between one image to another. -Here I took two images to blend them together. First image is given a weight of 0.7 and second image -is given 0.3. cv.addWeighted() applies following equation on the image. +Here I took two images to blend together. The first image is given a weight of 0.7 and the second image +is given 0.3. cv.addWeighted() applies the following equation to the image: \f[dst = \alpha \cdot img1 + \beta \cdot img2 + \gamma\f] @@ -66,14 +64,14 @@ Check the result below: Bitwise Operations ------------------ -This includes bitwise AND, OR, NOT and XOR operations. They will be highly useful while extracting +This includes the bitwise AND, OR, NOT, and XOR operations. They will be highly useful while extracting any part of the image (as we will see in coming chapters), defining and working with non-rectangular -ROI etc. Below we will see an example on how to change a particular region of an image. +ROI's, and etc. Below we will see an example of how to change a particular region of an image. -I want to put OpenCV logo above an image. If I add two images, it will change color. If I blend it, -I get an transparent effect. But I want it to be opaque. If it was a rectangular region, I could use -ROI as we did in last chapter. But OpenCV logo is a not a rectangular shape. So you can do it with -bitwise operations as below: +I want to put the OpenCV logo above an image. If I add two images, it will change the color. If I blend them, +I get a transparent effect. But I want it to be opaque. If it was a rectangular region, I could use +ROI as we did in the last chapter. But the OpenCV logo is a not a rectangular shape. So you can do it with +bitwise operations as shown below: @code{.py} # Load two images img1 = cv.imread('messi5.jpg') @@ -81,7 +79,7 @@ img2 = cv.imread('opencv-logo-white.png') # I want to put logo on top-left corner, So I create a ROI rows,cols,channels = img2.shape -roi = img1[0:rows, 0:cols ] +roi = img1[0:rows, 0:cols] # Now create a mask of logo and create its inverse mask also img2gray = cv.cvtColor(img2,cv.COLOR_BGR2GRAY) diff --git a/doc/py_tutorials/py_core/py_optimization/py_optimization.markdown b/doc/py_tutorials/py_core/py_optimization/py_optimization.markdown index 4631d3bf58..61f403bf54 100644 --- a/doc/py_tutorials/py_core/py_optimization/py_optimization.markdown +++ b/doc/py_tutorials/py_core/py_optimization/py_optimization.markdown @@ -4,28 +4,27 @@ Performance Measurement and Improvement Techniques {#tutorial_py_optimization} Goal ---- -In image processing, since you are dealing with large number of operations per second, it is -mandatory that your code is not only providing the correct solution, but also in the fastest manner. -So in this chapter, you will learn +In image processing, since you are dealing with a large number of operations per second, it is mandatory that your code is not only providing the correct solution, but that it is also providing it in the fastest manner. +So in this chapter, you will learn: - To measure the performance of your code. - Some tips to improve the performance of your code. -- You will see these functions : **cv.getTickCount**, **cv.getTickFrequency** etc. +- You will see these functions: **cv.getTickCount**, **cv.getTickFrequency**, etc. Apart from OpenCV, Python also provides a module **time** which is helpful in measuring the time of -execution. Another module **profile** helps to get detailed report on the code, like how much time -each function in the code took, how many times the function was called etc. But, if you are using +execution. Another module **profile** helps to get a detailed report on the code, like how much time +each function in the code took, how many times the function was called, etc. But, if you are using IPython, all these features are integrated in an user-friendly manner. We will see some important -ones, and for more details, check links in **Additional Resources** section. +ones, and for more details, check links in the **Additional Resources** section. Measuring Performance with OpenCV --------------------------------- -**cv.getTickCount** function returns the number of clock-cycles after a reference event (like the -moment machine was switched ON) to the moment this function is called. So if you call it before and -after the function execution, you get number of clock-cycles used to execute a function. +The **cv.getTickCount** function returns the number of clock-cycles after a reference event (like the +moment the machine was switched ON) to the moment this function is called. So if you call it before and +after the function execution, you get the number of clock-cycles used to execute a function. -**cv.getTickFrequency** function returns the frequency of clock-cycles, or the number of +The **cv.getTickFrequency** function returns the frequency of clock-cycles, or the number of clock-cycles per second. So to find the time of execution in seconds, you can do following: @code{.py} e1 = cv.getTickCount() @@ -33,8 +32,8 @@ e1 = cv.getTickCount() e2 = cv.getTickCount() time = (e2 - e1)/ cv.getTickFrequency() @endcode -We will demonstrate with following example. Following example apply median filtering with a kernel -of odd size ranging from 5 to 49. (Don't worry about what will the result look like, that is not our +We will demonstrate with following example. The following example applies median filtering with kernels +of odd sizes ranging from 5 to 49. (Don't worry about what the result will look like - that is not our goal): @code{.py} img1 = cv.imread('messi5.jpg') @@ -48,16 +47,16 @@ print( t ) # Result I got is 0.521107655 seconds @endcode -@note You can do the same with time module. Instead of cv.getTickCount, use time.time() function. -Then take the difference of two times. +@note You can do the same thing with the time module. Instead of cv.getTickCount, use the time.time() function. +Then take the difference of the two times. Default Optimization in OpenCV ------------------------------ -Many of the OpenCV functions are optimized using SSE2, AVX etc. It contains unoptimized code also. +Many of the OpenCV functions are optimized using SSE2, AVX, etc. It contains the unoptimized code also. So if our system support these features, we should exploit them (almost all modern day processors support them). It is enabled by default while compiling. So OpenCV runs the optimized code if it is -enabled, else it runs the unoptimized code. You can use **cv.useOptimized()** to check if it is +enabled, otherwise it runs the unoptimized code. You can use **cv.useOptimized()** to check if it is enabled/disabled and **cv.setUseOptimized()** to enable/disable it. Let's see a simple example. @code{.py} # check if optimization is enabled @@ -76,8 +75,8 @@ Out[8]: False In [9]: %timeit res = cv.medianBlur(img,49) 10 loops, best of 3: 64.1 ms per loop @endcode -See, optimized median filtering is \~2x faster than unoptimized version. If you check its source, -you can see median filtering is SIMD optimized. So you can use this to enable optimization at the +As you can see, optimized median filtering is \~2x faster than the unoptimized version. If you check its source, +you can see that median filtering is SIMD optimized. So you can use this to enable optimization at the top of your code (remember it is enabled by default). Measuring Performance in IPython @@ -85,10 +84,10 @@ Measuring Performance in IPython Sometimes you may need to compare the performance of two similar operations. IPython gives you a magic command %timeit to perform this. It runs the code several times to get more accurate results. -Once again, they are suitable to measure single line codes. +Once again, it is suitable to measuring single lines of code. -For example, do you know which of the following addition operation is better, x = 5; y = x\*\*2, -x = 5; y = x\*x, x = np.uint8([5]); y = x\*x or y = np.square(x) ? We will find it with %timeit in +For example, do you know which of the following addition operations is better, x = 5; y = x\*\*2, +x = 5; y = x\*x, x = np.uint8([5]); y = x\*x, or y = np.square(x)? We will find out with %timeit in the IPython shell. @code{.py} In [10]: x = 5 @@ -108,15 +107,15 @@ In [19]: %timeit y=np.square(z) 1000000 loops, best of 3: 1.16 us per loop @endcode You can see that, x = 5 ; y = x\*x is fastest and it is around 20x faster compared to Numpy. If you -consider the array creation also, it may reach upto 100x faster. Cool, right? *(Numpy devs are +consider the array creation also, it may reach up to 100x faster. Cool, right? *(Numpy devs are working on this issue)* @note Python scalar operations are faster than Numpy scalar operations. So for operations including -one or two elements, Python scalar is better than Numpy arrays. Numpy takes advantage when size of -array is a little bit bigger. +one or two elements, Python scalar is better than Numpy arrays. Numpy has the advantage when the size of +the array is a little bit bigger. We will try one more example. This time, we will compare the performance of **cv.countNonZero()** -and **np.count_nonzero()** for same image. +and **np.count_nonzero()** for the same image. @code{.py} In [35]: %timeit z = cv.countNonZero(img) @@ -125,7 +124,7 @@ In [35]: %timeit z = cv.countNonZero(img) In [36]: %timeit z = np.count_nonzero(img) 1000 loops, best of 3: 370 us per loop @endcode -See, OpenCV function is nearly 25x faster than Numpy function. +See, the OpenCV function is nearly 25x faster than the Numpy function. @note Normally, OpenCV functions are faster than Numpy functions. So for same operation, OpenCV functions are preferred. But, there can be exceptions, especially when Numpy works with views @@ -134,8 +133,8 @@ instead of copies. More IPython magic commands --------------------------- -There are several other magic commands to measure the performance, profiling, line profiling, memory -measurement etc. They all are well documented. So only links to those docs are provided here. +There are several other magic commands to measure performance, profiling, line profiling, memory +measurement, and etc. They all are well documented. So only links to those docs are provided here. Interested readers are recommended to try them out. Performance Optimization Techniques @@ -143,19 +142,18 @@ Performance Optimization Techniques There are several techniques and coding methods to exploit maximum performance of Python and Numpy. Only relevant ones are noted here and links are given to important sources. The main thing to be -noted here is that, first try to implement the algorithm in a simple manner. Once it is working, -profile it, find the bottlenecks and optimize them. +noted here is, first try to implement the algorithm in a simple manner. Once it is working, +profile it, find the bottlenecks, and optimize them. --# Avoid using loops in Python as far as possible, especially double/triple loops etc. They are +-# Avoid using loops in Python as much as possible, especially double/triple loops etc. They are inherently slow. -2. Vectorize the algorithm/code to the maximum possible extent because Numpy and OpenCV are +2. Vectorize the algorithm/code to the maximum extent possible, because Numpy and OpenCV are optimized for vector operations. 3. Exploit the cache coherence. -4. Never make copies of array unless it is needed. Try to use views instead. Array copying is a +4. Never make copies of an array unless it is necessary. Try to use views instead. Array copying is a costly operation. -Even after doing all these operations, if your code is still slow, or use of large loops are -inevitable, use additional libraries like Cython to make it faster. +If your code is still slow after doing all of these operations, or if the use of large loops is inevitable, use additional libraries like Cython to make it faster. Additional Resources -------------------- From dd9262c318845eade5d6ab67216482bd2e79304c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Gur=C3=ADn?= Date: Thu, 15 Aug 2019 23:09:37 -0300 Subject: [PATCH 11/14] expose FS --- modules/js/src/helpers.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/js/src/helpers.js b/modules/js/src/helpers.js index 08d1a89b2b..a2f3101b08 100644 --- a/modules/js/src/helpers.js +++ b/modules/js/src/helpers.js @@ -38,6 +38,10 @@ // the use of this software, even if advised of the possibility of such damage. // +if (typeof Module.FS === 'undefined' && typeof FS !== 'undefined') { + Module.FS = FS; +} + Module['imread'] = function(imageSource) { var img = null; if (typeof imageSource === 'string') { From 3755099bd469a71317f5fff4f7655733dc207555 Mon Sep 17 00:00:00 2001 From: Adam Gyarmati <40522904+gyadam@users.noreply.github.com> Date: Fri, 8 Nov 2019 19:05:46 -0800 Subject: [PATCH 12/14] Fix Windows installation script error Fix an error during Windows installation caused by trying to create the already existing Build directory. Also excluding intermediate steps for Install directory creation. --- .../windows_install/windows_install.markdown | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/doc/tutorials/introduction/windows_install/windows_install.markdown b/doc/tutorials/introduction/windows_install/windows_install.markdown index c8f46cbdce..7a506b042a 100644 --- a/doc/tutorials/introduction/windows_install/windows_install.markdown +++ b/doc/tutorials/introduction/windows_install/windows_install.markdown @@ -48,10 +48,8 @@ CMAKE_CONFIG_GENERATOR="Visual Studio 14 2015 Win64" if [ ! -d "$myRepo/opencv" ]; then echo "cloning opencv" git clone https://github.com/opencv/opencv.git - mkdir Build - mkdir Build/opencv - mkdir Install - mkdir Install/opencv + mkdir -p Build/opencv + mkdir -p Install/opencv else cd opencv git pull --rebase @@ -60,8 +58,7 @@ fi if [ ! -d "$myRepo/opencv_contrib" ]; then echo "cloning opencv_contrib" git clone https://github.com/opencv/opencv_contrib.git - mkdir Build - mkdir Build/opencv_contrib + mkdir -p Build/opencv_contrib else cd opencv_contrib git pull --rebase From cfc781949df01d899a85172fdf3c917a43d437ca Mon Sep 17 00:00:00 2001 From: Lubov Batanina Date: Sat, 9 Nov 2019 14:11:09 +0300 Subject: [PATCH 13/14] Merge pull request #15811 from l-bat:eltwise_div Supported ONNX Squeeze, ReduceL2 and Eltwise::DIV * Support eltwise div * Fix test * OpenCL support added * refactoring * fix code style * Only squeeze with axes supported --- modules/dnn/src/layers/eltwise_layer.cpp | 22 ++++++++++ modules/dnn/src/onnx/onnx_importer.cpp | 52 ++++++++++++++++++++---- modules/dnn/test/test_onnx_importer.cpp | 32 +++++++++++++++ 3 files changed, 97 insertions(+), 9 deletions(-) diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index e248b1622d..f077866bdc 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -62,6 +62,7 @@ public: PROD = 0, SUM = 1, MAX = 2, + DIV = 3 } op; std::vector coeffs; bool variableChannels; @@ -79,6 +80,8 @@ public: op = SUM; else if (operation == "max") op = MAX; + else if (operation == "div") + op = DIV; else CV_Error(cv::Error::StsBadArg, "Unknown operation type \"" + operation + "\""); } @@ -271,6 +274,18 @@ public: srcptr0 = (const float*)dstptr; } } + else if( op == DIV ) + { + for( k = 1; k < n; k++ ) + { + const float* srcptr1 = srcs[k]->ptr() + globalDelta; + for( j = 0; j < blockSize; j++ ) + { + dstptr[j] = srcptr0[j]/srcptr1[j]; + } + srcptr0 = (const float*)dstptr; + } + } else if( op == MAX ) { for( k = 1; k < n; k++ ) @@ -393,6 +408,11 @@ public: for (int i = 2; i < inputs.size(); ++i) multiply(inputs[i], outputs[0], outputs[0]); break; + case DIV: + divide(inputs[0], inputs[1], outputs[0]); + for (int i = 2; i < inputs.size(); ++i) + divide(outputs[0], inputs[i], outputs[0]); + break; case MAX: max(inputs[0], inputs[1], outputs[0]); for (int i = 2; i < inputs.size(); ++i) @@ -486,6 +506,8 @@ public: ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::SUM); else if (op == PROD) ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MUL); + else if (op == DIV) + ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::DIV); else if (op == MAX) ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MAX); else diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 51f9affe3b..ed7474f9db 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -520,19 +520,27 @@ void ONNXImporter::populateNet(Net dstNet) } else if (layer_type == "Div") { - Mat blob = getBlob(node_proto, constBlobs, 1); - CV_Assert_N(blob.type() == CV_32F, blob.total()); - if (blob.total() == 1) + if (constBlobs.find(node_proto.input(1)) == constBlobs.end()) { - layerParams.set("scale", 1.0f / blob.at(0)); - layerParams.type = "Power"; + layerParams.type = "Eltwise"; + layerParams.set("operation", "div"); } else { - layerParams.type = "Scale"; - divide(1.0, blob, blob); - layerParams.blobs.push_back(blob); - layerParams.set("bias_term", false); + Mat blob = getBlob(node_proto, constBlobs, 1); + CV_Assert_N(blob.type() == CV_32F, blob.total()); + if (blob.total() == 1) + { + layerParams.set("scale", 1.0f / blob.at(0)); + layerParams.type = "Power"; + } + else + { + layerParams.type = "Scale"; + divide(1.0, blob, blob); + layerParams.blobs.push_back(blob); + layerParams.set("bias_term", false); + } } } else if (layer_type == "Neg") @@ -771,6 +779,32 @@ void ONNXImporter::populateNet(Net dstNet) continue; } } + else if (layer_type == "ReduceL2") + { + CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes")); + CV_Assert(graph_proto.node_size() > li + 1 && graph_proto.node(li + 1).op_type() == "Div"); + ++li; + layerParams.type = "Normalize"; + + DictValue axes_dict = layerParams.get("axes"); + if (axes_dict.size() != 1) + CV_Error(Error::StsNotImplemented, "Multidimensional reduceL2"); + int axis = axes_dict.getIntValue(0); + layerParams.set("axis",axis); + layerParams.set("end_axis", axis); + } + else if (layer_type == "Squeeze") + { + CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes")); + DictValue axes_dict = layerParams.get("axes"); + if (axes_dict.size() != 1) + CV_Error(Error::StsNotImplemented, "Multidimensional squeeze"); + + int axis = axes_dict.getIntValue(0); + layerParams.set("axis", axis - 1); + layerParams.set("end_axis", axis); + layerParams.type = "Flatten"; + } else if (layer_type == "Unsqueeze") { CV_Assert(node_proto.input_size() == 1); diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index f54ce77d59..b96f408f26 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -318,6 +318,28 @@ TEST_P(Test_ONNX_layers, MultyInputs) expectNoFallbacksFromIE(net); } +TEST_P(Test_ONNX_layers, Div) +{ + const String model = _tf("models/div.onnx"); + Net net = readNetFromONNX(model); + ASSERT_FALSE(net.empty()); + + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + + Mat inp1 = blobFromNPY(_tf("data/input_div_0.npy")); + Mat inp2 = blobFromNPY(_tf("data/input_div_1.npy")); + Mat ref = blobFromNPY(_tf("data/output_div.npy")); + checkBackend(&inp1, &ref); + + net.setInput(inp1, "0"); + net.setInput(inp2, "1"); + Mat out = net.forward(); + + normAssert(ref, out, "", default_l1, default_lInf); + expectNoFallbacksFromIE(net); +} + TEST_P(Test_ONNX_layers, DynamicReshape) { if (backend == DNN_BACKEND_INFERENCE_ENGINE) @@ -333,6 +355,16 @@ TEST_P(Test_ONNX_layers, Reshape) testONNXModels("unsqueeze"); } +TEST_P(Test_ONNX_layers, Squeeze) +{ + testONNXModels("squeeze"); +} + +TEST_P(Test_ONNX_layers, ReduceL2) +{ + testONNXModels("reduceL2"); +} + TEST_P(Test_ONNX_layers, Slice) { #if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000) From fccf28408818a112d64fe9c5d0ad4a392359d61c Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Sat, 9 Nov 2019 11:19:23 +0000 Subject: [PATCH 14/14] Fixed relative paths handling in cap_gstreamer: --- modules/videoio/src/cap_gstreamer.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/videoio/src/cap_gstreamer.cpp b/modules/videoio/src/cap_gstreamer.cpp index 585f43959d..f0a4f8eddb 100644 --- a/modules/videoio/src/cap_gstreamer.cpp +++ b/modules/videoio/src/cap_gstreamer.cpp @@ -748,18 +748,20 @@ bool GStreamerCapture::open(const String &filename_) // else, we might have a file or a manual pipeline. // if gstreamer cannot parse the manual pipeline, we assume we were given and // ordinary file path. + CV_LOG_INFO(NULL, "OpenCV | GStreamer: " << filename); if (!gst_uri_is_valid(filename)) { if (utils::fs::exists(filename_)) { - uri.attach(g_filename_to_uri(filename, NULL, NULL)); + GSafePtr err; + uri.attach(gst_filename_to_uri(filename, err.getRef())); if (uri) { file = true; } else { - CV_WARN("Error opening file: " << filename << " (" << uri.get() << ")"); + CV_WARN("Error opening file: " << filename << " (" << err->message << ")"); return false; } } @@ -779,7 +781,7 @@ bool GStreamerCapture::open(const String &filename_) { uri.attach(g_strdup(filename)); } - + CV_LOG_INFO(NULL, "OpenCV | GStreamer: mode - " << (file ? "FILE" : manualpipeline ? "MANUAL" : "URI")); bool element_from_uri = false; if (!uridecodebin) {