mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 17:44:04 +08:00
Merge remote-tracking branch 'upstream/3.4' into merge-3.4
This commit is contained in:
commit
b6a58818bb
@ -714,7 +714,10 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T
|
||||
foreach(OPT ${CPU_DISPATCH_FINAL})
|
||||
if(__result_${OPT})
|
||||
#message("${OPT}: ${__result_${OPT}}")
|
||||
if(CMAKE_GENERATOR MATCHES "^Visual")
|
||||
if(CMAKE_GENERATOR MATCHES "^Visual"
|
||||
OR OPENCV_CMAKE_CPU_OPTIMIZATIONS_FORCE_TARGETS
|
||||
)
|
||||
# MSVS generator is not able to properly order compilation flags:
|
||||
# extra flags are added before common flags, so switching between optimizations doesn't work correctly
|
||||
# Also CMAKE_CXX_FLAGS doesn't work (it is directory-based, so add_subdirectory is required)
|
||||
add_library(${TARGET_BASE_NAME}_${OPT} OBJECT ${__result_${OPT}})
|
||||
|
@ -20,55 +20,89 @@ if(WIN32)
|
||||
elseif(MSVC)
|
||||
SET(OPENEXR_LIBSEARCH_SUFFIXES Win32/Release Win32 Win32/Debug)
|
||||
endif()
|
||||
else()
|
||||
set(OPENEXR_ROOT "")
|
||||
endif()
|
||||
|
||||
SET(LIBRARY_PATHS
|
||||
/usr/lib
|
||||
/usr/local/lib
|
||||
/sw/lib
|
||||
/opt/local/lib
|
||||
"${ProgramFiles_ENV_PATH}/OpenEXR/lib/static"
|
||||
"${OPENEXR_ROOT}/lib")
|
||||
SET(SEARCH_PATHS
|
||||
"${OPENEXR_ROOT}"
|
||||
/usr
|
||||
/usr/local
|
||||
/sw
|
||||
/opt
|
||||
"${ProgramFiles_ENV_PATH}/OpenEXR")
|
||||
|
||||
MACRO(FIND_OPENEXR_LIBRARY LIBRARY_NAME LIBRARY_SUFFIX)
|
||||
string(TOUPPER "${LIBRARY_NAME}" LIBRARY_NAME_UPPER)
|
||||
FIND_LIBRARY(OPENEXR_${LIBRARY_NAME_UPPER}_LIBRARY
|
||||
NAMES ${LIBRARY_NAME}${LIBRARY_SUFFIX}
|
||||
PATH_SUFFIXES ${OPENEXR_LIBSEARCH_SUFFIXES}
|
||||
NO_DEFAULT_PATH
|
||||
PATHS "${SEARCH_PATH}/lib" "${SEARCH_PATH}/lib/static")
|
||||
ENDMACRO()
|
||||
|
||||
FOREACH(SEARCH_PATH ${SEARCH_PATHS})
|
||||
FIND_PATH(OPENEXR_INCLUDE_PATH ImfRgbaFile.h
|
||||
PATH_SUFFIXES OpenEXR
|
||||
NO_DEFAULT_PATH
|
||||
PATHS
|
||||
/usr/include
|
||||
/usr/local/include
|
||||
/sw/include
|
||||
/opt/local/include
|
||||
"${ProgramFiles_ENV_PATH}/OpenEXR/include"
|
||||
"${OPENEXR_ROOT}/include")
|
||||
"${SEARCH_PATH}/include")
|
||||
|
||||
FIND_LIBRARY(OPENEXR_HALF_LIBRARY
|
||||
NAMES Half
|
||||
PATH_SUFFIXES ${OPENEXR_LIBSEARCH_SUFFIXES}
|
||||
PATHS ${LIBRARY_PATHS})
|
||||
IF (OPENEXR_INCLUDE_PATH)
|
||||
SET(OPENEXR_VERSION_FILE "${OPENEXR_INCLUDE_PATH}/OpenEXRConfig.h")
|
||||
IF (EXISTS ${OPENEXR_VERSION_FILE})
|
||||
FILE (STRINGS ${OPENEXR_VERSION_FILE} contents REGEX "#define OPENEXR_VERSION_MAJOR ")
|
||||
IF (${contents} MATCHES "#define OPENEXR_VERSION_MAJOR ([0-9]+)")
|
||||
SET(OPENEXR_VERSION_MAJOR "${CMAKE_MATCH_1}")
|
||||
ENDIF ()
|
||||
FILE (STRINGS ${OPENEXR_VERSION_FILE} contents REGEX "#define OPENEXR_VERSION_MINOR ")
|
||||
IF (${contents} MATCHES "#define OPENEXR_VERSION_MINOR ([0-9]+)")
|
||||
SET(OPENEXR_VERSION_MINOR "${CMAKE_MATCH_1}")
|
||||
ENDIF ()
|
||||
ENDIF ()
|
||||
ENDIF ()
|
||||
|
||||
FIND_LIBRARY(OPENEXR_IEX_LIBRARY
|
||||
NAMES Iex
|
||||
PATH_SUFFIXES ${OPENEXR_LIBSEARCH_SUFFIXES}
|
||||
PATHS ${LIBRARY_PATHS})
|
||||
IF (OPENEXR_VERSION_MAJOR AND OPENEXR_VERSION_MINOR)
|
||||
set(OPENEXR_VERSION "${OPENEXR_VERSION_MAJOR}_${OPENEXR_VERSION_MINOR}")
|
||||
ENDIF ()
|
||||
|
||||
FIND_LIBRARY(OPENEXR_IMATH_LIBRARY
|
||||
NAMES Imath
|
||||
PATH_SUFFIXES ${OPENEXR_LIBSEARCH_SUFFIXES}
|
||||
PATHS ${LIBRARY_PATHS})
|
||||
|
||||
FIND_LIBRARY(OPENEXR_ILMIMF_LIBRARY
|
||||
NAMES IlmImf
|
||||
PATH_SUFFIXES ${OPENEXR_LIBSEARCH_SUFFIXES}
|
||||
PATHS ${LIBRARY_PATHS})
|
||||
|
||||
FIND_LIBRARY(OPENEXR_ILMTHREAD_LIBRARY
|
||||
NAMES IlmThread
|
||||
PATH_SUFFIXES ${OPENEXR_LIBSEARCH_SUFFIXES}
|
||||
PATHS ${LIBRARY_PATHS})
|
||||
SET(LIBRARY_SUFFIXES
|
||||
"-${OPENEXR_VERSION}"
|
||||
"-${OPENEXR_VERSION}_s"
|
||||
"-${OPENEXR_VERSION}_d"
|
||||
"-${OPEXEXR_VERSION}_s_d"
|
||||
""
|
||||
"_s"
|
||||
"_d"
|
||||
"_s_d")
|
||||
|
||||
FOREACH(LIBRARY_SUFFIX ${LIBRARY_SUFFIXES})
|
||||
FIND_OPENEXR_LIBRARY("Half" ${LIBRARY_SUFFIX})
|
||||
FIND_OPENEXR_LIBRARY("Iex" ${LIBRARY_SUFFIX})
|
||||
FIND_OPENEXR_LIBRARY("Imath" ${LIBRARY_SUFFIX})
|
||||
FIND_OPENEXR_LIBRARY("IlmImf" ${LIBRARY_SUFFIX})
|
||||
FIND_OPENEXR_LIBRARY("IlmThread" ${LIBRARY_SUFFIX})
|
||||
IF (OPENEXR_INCLUDE_PATH AND OPENEXR_IMATH_LIBRARY AND OPENEXR_ILMIMF_LIBRARY AND OPENEXR_IEX_LIBRARY AND OPENEXR_HALF_LIBRARY)
|
||||
SET(OPENEXR_FOUND TRUE)
|
||||
BREAK()
|
||||
ENDIF()
|
||||
UNSET(OPENEXR_IMATH_LIBRARY)
|
||||
UNSET(OPENEXR_ILMIMF_LIBRARY)
|
||||
UNSET(OPENEXR_IEX_LIBRARY)
|
||||
UNSET(OPENEXR_ILMTHREAD_LIBRARY)
|
||||
UNSET(OPENEXR_HALF_LIBRARY)
|
||||
ENDFOREACH()
|
||||
|
||||
IF (OPENEXR_FOUND)
|
||||
BREAK()
|
||||
ENDIF()
|
||||
|
||||
UNSET(OPENEXR_INCLUDE_PATH)
|
||||
UNSET(OPENEXR_VERSION_FILE)
|
||||
UNSET(OPENEXR_VERSION_MAJOR)
|
||||
UNSET(OPENEXR_VERSION_MINOR)
|
||||
UNSET(OPENEXR_VERSION)
|
||||
ENDFOREACH()
|
||||
|
||||
IF (OPENEXR_FOUND)
|
||||
SET(OPENEXR_INCLUDE_PATHS ${OPENEXR_INCLUDE_PATH} CACHE PATH "The include paths needed to use OpenEXR")
|
||||
SET(OPENEXR_LIBRARIES ${OPENEXR_IMATH_LIBRARY} ${OPENEXR_ILMIMF_LIBRARY} ${OPENEXR_IEX_LIBRARY} ${OPENEXR_HALF_LIBRARY} ${OPENEXR_ILMTHREAD_LIBRARY} CACHE STRING "The libraries needed to use OpenEXR" FORCE)
|
||||
ENDIF ()
|
||||
|
@ -63,7 +63,6 @@ foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MOD
|
||||
unset(OPENCV_MODULE_${mod}_PRIVATE_OPT_DEPS CACHE)
|
||||
unset(OPENCV_MODULE_${mod}_LINK_DEPS CACHE)
|
||||
unset(OPENCV_MODULE_${mod}_WRAPPERS CACHE)
|
||||
unset(OPENCV_DEPENDANT_TARGETS_${mod} CACHE)
|
||||
endforeach()
|
||||
|
||||
# clean modules info which needs to be recalculated
|
||||
|
@ -288,9 +288,22 @@ function(ocv_append_target_property target prop)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if(DEFINED OPENCV_DEPENDANT_TARGETS_LIST)
|
||||
foreach(v ${OPENCV_DEPENDANT_TARGETS_LIST})
|
||||
unset(${v} CACHE)
|
||||
endforeach()
|
||||
unset(OPENCV_DEPENDANT_TARGETS_LIST CACHE)
|
||||
endif()
|
||||
|
||||
function(ocv_append_dependant_targets target)
|
||||
#ocv_debug_message("ocv_append_dependant_targets(${target} ${ARGN})")
|
||||
_ocv_fix_target(target)
|
||||
list(FIND OPENCV_DEPENDANT_TARGETS_LIST "OPENCV_DEPENDANT_TARGETS_${target}" __id)
|
||||
if(__id EQUAL -1)
|
||||
list(APPEND OPENCV_DEPENDANT_TARGETS_LIST "OPENCV_DEPENDANT_TARGETS_${target}")
|
||||
list(SORT OPENCV_DEPENDANT_TARGETS_LIST)
|
||||
set(OPENCV_DEPENDANT_TARGETS_LIST "${OPENCV_DEPENDANT_TARGETS_LIST}" CACHE INTERNAL "")
|
||||
endif()
|
||||
set(OPENCV_DEPENDANT_TARGETS_${target} "${OPENCV_DEPENDANT_TARGETS_${target}};${ARGN}" CACHE INTERNAL "" FORCE)
|
||||
endfunction()
|
||||
|
||||
|
@ -8,13 +8,13 @@ Learn to:
|
||||
|
||||
- Access pixel values and modify them
|
||||
- Access image properties
|
||||
- Setting Region of Interest (ROI)
|
||||
- Splitting and Merging images
|
||||
- Set a Region of Interest (ROI)
|
||||
- Split and merge images
|
||||
|
||||
Almost all the operations in this section is mainly related to Numpy rather than OpenCV. A good
|
||||
Almost all the operations in this section are mainly related to Numpy rather than OpenCV. A good
|
||||
knowledge of Numpy is required to write better optimized code with OpenCV.
|
||||
|
||||
*( Examples will be shown in Python terminal since most of them are just single line codes )*
|
||||
*( Examples will be shown in a Python terminal, since most of them are just single lines of code )*
|
||||
|
||||
Accessing and Modifying pixel values
|
||||
------------------------------------
|
||||
@ -45,15 +45,15 @@ You can modify the pixel values the same way.
|
||||
[255 255 255]
|
||||
@endcode
|
||||
|
||||
**warning**
|
||||
**Warning**
|
||||
|
||||
Numpy is a optimized library for fast array calculations. So simply accessing each and every pixel
|
||||
values and modifying it will be very slow and it is discouraged.
|
||||
Numpy is an optimized library for fast array calculations. So simply accessing each and every pixel
|
||||
value and modifying it will be very slow and it is discouraged.
|
||||
|
||||
@note The above method is normally used for selecting a region of an array, say the first 5 rows
|
||||
and last 3 columns. For individual pixel access, the Numpy array methods, array.item() and
|
||||
array.itemset() are considered better, however they always return a scalar. If you want to access
|
||||
all B,G,R values, you need to call array.item() separately for all.
|
||||
array.itemset() are considered better. They always return a scalar, however, so if you want to access
|
||||
all the B,G,R values, you will need to call array.item() separately for each value.
|
||||
|
||||
Better pixel accessing and editing method :
|
||||
@code{.py}
|
||||
@ -70,11 +70,10 @@ Better pixel accessing and editing method :
|
||||
Accessing Image Properties
|
||||
--------------------------
|
||||
|
||||
Image properties include number of rows, columns and channels, type of image data, number of pixels
|
||||
etc.
|
||||
Image properties include number of rows, columns, and channels; type of image data; number of pixels; etc.
|
||||
|
||||
The shape of an image is accessed by img.shape. It returns a tuple of number of rows, columns, and channels
|
||||
(if image is color):
|
||||
The shape of an image is accessed by img.shape. It returns a tuple of the number of rows, columns, and channels
|
||||
(if the image is color):
|
||||
@code{.py}
|
||||
>>> print( img.shape )
|
||||
(342, 548, 3)
|
||||
@ -95,13 +94,13 @@ uint8
|
||||
@endcode
|
||||
|
||||
@note img.dtype is very important while debugging because a large number of errors in OpenCV-Python
|
||||
code is caused by invalid datatype.
|
||||
code are caused by invalid datatype.
|
||||
|
||||
Image ROI
|
||||
---------
|
||||
|
||||
Sometimes, you will have to play with certain region of images. For eye detection in images, first
|
||||
face detection is done all over the image. When a face is obtained, we select the face region alone
|
||||
Sometimes, you will have to play with certain regions of images. For eye detection in images, first
|
||||
face detection is done over the entire image. When a face is obtained, we select the face region alone
|
||||
and search for eyes inside it instead of searching the whole image. It improves accuracy (because eyes
|
||||
are always on faces :D ) and performance (because we search in a small area).
|
||||
|
||||
@ -118,9 +117,9 @@ Check the results below:
|
||||
Splitting and Merging Image Channels
|
||||
------------------------------------
|
||||
|
||||
Sometimes you will need to work separately on B,G,R channels of image. In this case, you need
|
||||
to split the BGR images to single channels. In other cases, you may need to join these individual
|
||||
channels to a BGR image. You can do it simply by:
|
||||
Sometimes you will need to work separately on the B,G,R channels of an image. In this case, you need
|
||||
to split the BGR image into single channels. In other cases, you may need to join these individual
|
||||
channels to create a BGR image. You can do this simply by:
|
||||
@code{.py}
|
||||
>>> b,g,r = cv.split(img)
|
||||
>>> img = cv.merge((b,g,r))
|
||||
@ -129,7 +128,7 @@ Or
|
||||
@code
|
||||
>>> b = img[:,:,0]
|
||||
@endcode
|
||||
Suppose you want to set all the red pixels to zero, you do not need to split the channels first.
|
||||
Suppose you want to set all the red pixels to zero - you do not need to split the channels first.
|
||||
Numpy indexing is faster:
|
||||
@code{.py}
|
||||
>>> img[:,:,2] = 0
|
||||
@ -137,13 +136,13 @@ Numpy indexing is faster:
|
||||
|
||||
**Warning**
|
||||
|
||||
cv.split() is a costly operation (in terms of time). So do it only if you need it. Otherwise go
|
||||
cv.split() is a costly operation (in terms of time). So use it only if necessary. Otherwise go
|
||||
for Numpy indexing.
|
||||
|
||||
Making Borders for Images (Padding)
|
||||
-----------------------------------
|
||||
|
||||
If you want to create a border around the image, something like a photo frame, you can use
|
||||
If you want to create a border around an image, something like a photo frame, you can use
|
||||
**cv.copyMakeBorder()**. But it has more applications for convolution operation, zero
|
||||
padding etc. This function takes following arguments:
|
||||
|
||||
|
@ -4,21 +4,20 @@ Arithmetic Operations on Images {#tutorial_py_image_arithmetics}
|
||||
Goal
|
||||
----
|
||||
|
||||
- Learn several arithmetic operations on images like addition, subtraction, bitwise operations
|
||||
etc.
|
||||
- You will learn these functions : **cv.add()**, **cv.addWeighted()** etc.
|
||||
- Learn several arithmetic operations on images, like addition, subtraction, bitwise operations, and etc.
|
||||
- Learn these functions: **cv.add()**, **cv.addWeighted()**, etc.
|
||||
|
||||
Image Addition
|
||||
--------------
|
||||
|
||||
You can add two images by OpenCV function, cv.add() or simply by numpy operation,
|
||||
res = img1 + img2. Both images should be of same depth and type, or second image can just be a
|
||||
You can add two images with the OpenCV function, cv.add(), or simply by the numpy operation
|
||||
res = img1 + img2. Both images should be of same depth and type, or the second image can just be a
|
||||
scalar value.
|
||||
|
||||
@note There is a difference between OpenCV addition and Numpy addition. OpenCV addition is a
|
||||
saturated operation while Numpy addition is a modulo operation.
|
||||
|
||||
For example, consider below sample:
|
||||
For example, consider the below sample:
|
||||
@code{.py}
|
||||
>>> x = np.uint8([250])
|
||||
>>> y = np.uint8([10])
|
||||
@ -29,13 +28,12 @@ For example, consider below sample:
|
||||
>>> print( x+y ) # 250+10 = 260 % 256 = 4
|
||||
[4]
|
||||
@endcode
|
||||
It will be more visible when you add two images. OpenCV function will provide a better result. So
|
||||
always better stick to OpenCV functions.
|
||||
This will be more visible when you add two images. Stick with OpenCV functions, because they will provide a better result.
|
||||
|
||||
Image Blending
|
||||
--------------
|
||||
|
||||
This is also image addition, but different weights are given to images so that it gives a feeling of
|
||||
This is also image addition, but different weights are given to images in order to give a feeling of
|
||||
blending or transparency. Images are added as per the equation below:
|
||||
|
||||
\f[g(x) = (1 - \alpha)f_{0}(x) + \alpha f_{1}(x)\f]
|
||||
@ -43,8 +41,8 @@ blending or transparency. Images are added as per the equation below:
|
||||
By varying \f$\alpha\f$ from \f$0 \rightarrow 1\f$, you can perform a cool transition between one image to
|
||||
another.
|
||||
|
||||
Here I took two images to blend them together. First image is given a weight of 0.7 and second image
|
||||
is given 0.3. cv.addWeighted() applies following equation on the image.
|
||||
Here I took two images to blend together. The first image is given a weight of 0.7 and the second image
|
||||
is given 0.3. cv.addWeighted() applies the following equation to the image:
|
||||
|
||||
\f[dst = \alpha \cdot img1 + \beta \cdot img2 + \gamma\f]
|
||||
|
||||
@ -66,14 +64,14 @@ Check the result below:
|
||||
Bitwise Operations
|
||||
------------------
|
||||
|
||||
This includes bitwise AND, OR, NOT and XOR operations. They will be highly useful while extracting
|
||||
This includes the bitwise AND, OR, NOT, and XOR operations. They will be highly useful while extracting
|
||||
any part of the image (as we will see in coming chapters), defining and working with non-rectangular
|
||||
ROI etc. Below we will see an example on how to change a particular region of an image.
|
||||
ROI's, and etc. Below we will see an example of how to change a particular region of an image.
|
||||
|
||||
I want to put OpenCV logo above an image. If I add two images, it will change color. If I blend it,
|
||||
I get an transparent effect. But I want it to be opaque. If it was a rectangular region, I could use
|
||||
ROI as we did in last chapter. But OpenCV logo is a not a rectangular shape. So you can do it with
|
||||
bitwise operations as below:
|
||||
I want to put the OpenCV logo above an image. If I add two images, it will change the color. If I blend them,
|
||||
I get a transparent effect. But I want it to be opaque. If it was a rectangular region, I could use
|
||||
ROI as we did in the last chapter. But the OpenCV logo is a not a rectangular shape. So you can do it with
|
||||
bitwise operations as shown below:
|
||||
@code{.py}
|
||||
# Load two images
|
||||
img1 = cv.imread('messi5.jpg')
|
||||
|
@ -4,28 +4,27 @@ Performance Measurement and Improvement Techniques {#tutorial_py_optimization}
|
||||
Goal
|
||||
----
|
||||
|
||||
In image processing, since you are dealing with large number of operations per second, it is
|
||||
mandatory that your code is not only providing the correct solution, but also in the fastest manner.
|
||||
So in this chapter, you will learn
|
||||
In image processing, since you are dealing with a large number of operations per second, it is mandatory that your code is not only providing the correct solution, but that it is also providing it in the fastest manner.
|
||||
So in this chapter, you will learn:
|
||||
|
||||
- To measure the performance of your code.
|
||||
- Some tips to improve the performance of your code.
|
||||
- You will see these functions : **cv.getTickCount**, **cv.getTickFrequency** etc.
|
||||
- You will see these functions: **cv.getTickCount**, **cv.getTickFrequency**, etc.
|
||||
|
||||
Apart from OpenCV, Python also provides a module **time** which is helpful in measuring the time of
|
||||
execution. Another module **profile** helps to get detailed report on the code, like how much time
|
||||
each function in the code took, how many times the function was called etc. But, if you are using
|
||||
execution. Another module **profile** helps to get a detailed report on the code, like how much time
|
||||
each function in the code took, how many times the function was called, etc. But, if you are using
|
||||
IPython, all these features are integrated in an user-friendly manner. We will see some important
|
||||
ones, and for more details, check links in **Additional Resources** section.
|
||||
ones, and for more details, check links in the **Additional Resources** section.
|
||||
|
||||
Measuring Performance with OpenCV
|
||||
---------------------------------
|
||||
|
||||
**cv.getTickCount** function returns the number of clock-cycles after a reference event (like the
|
||||
moment machine was switched ON) to the moment this function is called. So if you call it before and
|
||||
after the function execution, you get number of clock-cycles used to execute a function.
|
||||
The **cv.getTickCount** function returns the number of clock-cycles after a reference event (like the
|
||||
moment the machine was switched ON) to the moment this function is called. So if you call it before and
|
||||
after the function execution, you get the number of clock-cycles used to execute a function.
|
||||
|
||||
**cv.getTickFrequency** function returns the frequency of clock-cycles, or the number of
|
||||
The **cv.getTickFrequency** function returns the frequency of clock-cycles, or the number of
|
||||
clock-cycles per second. So to find the time of execution in seconds, you can do following:
|
||||
@code{.py}
|
||||
e1 = cv.getTickCount()
|
||||
@ -33,8 +32,8 @@ e1 = cv.getTickCount()
|
||||
e2 = cv.getTickCount()
|
||||
time = (e2 - e1)/ cv.getTickFrequency()
|
||||
@endcode
|
||||
We will demonstrate with following example. Following example apply median filtering with a kernel
|
||||
of odd size ranging from 5 to 49. (Don't worry about what will the result look like, that is not our
|
||||
We will demonstrate with following example. The following example applies median filtering with kernels
|
||||
of odd sizes ranging from 5 to 49. (Don't worry about what the result will look like - that is not our
|
||||
goal):
|
||||
@code{.py}
|
||||
img1 = cv.imread('messi5.jpg')
|
||||
@ -48,16 +47,16 @@ print( t )
|
||||
|
||||
# Result I got is 0.521107655 seconds
|
||||
@endcode
|
||||
@note You can do the same with time module. Instead of cv.getTickCount, use time.time() function.
|
||||
Then take the difference of two times.
|
||||
@note You can do the same thing with the time module. Instead of cv.getTickCount, use the time.time() function.
|
||||
Then take the difference of the two times.
|
||||
|
||||
Default Optimization in OpenCV
|
||||
------------------------------
|
||||
|
||||
Many of the OpenCV functions are optimized using SSE2, AVX etc. It contains unoptimized code also.
|
||||
Many of the OpenCV functions are optimized using SSE2, AVX, etc. It contains the unoptimized code also.
|
||||
So if our system support these features, we should exploit them (almost all modern day processors
|
||||
support them). It is enabled by default while compiling. So OpenCV runs the optimized code if it is
|
||||
enabled, else it runs the unoptimized code. You can use **cv.useOptimized()** to check if it is
|
||||
enabled, otherwise it runs the unoptimized code. You can use **cv.useOptimized()** to check if it is
|
||||
enabled/disabled and **cv.setUseOptimized()** to enable/disable it. Let's see a simple example.
|
||||
@code{.py}
|
||||
# check if optimization is enabled
|
||||
@ -76,8 +75,8 @@ Out[8]: False
|
||||
In [9]: %timeit res = cv.medianBlur(img,49)
|
||||
10 loops, best of 3: 64.1 ms per loop
|
||||
@endcode
|
||||
See, optimized median filtering is \~2x faster than unoptimized version. If you check its source,
|
||||
you can see median filtering is SIMD optimized. So you can use this to enable optimization at the
|
||||
As you can see, optimized median filtering is \~2x faster than the unoptimized version. If you check its source,
|
||||
you can see that median filtering is SIMD optimized. So you can use this to enable optimization at the
|
||||
top of your code (remember it is enabled by default).
|
||||
|
||||
Measuring Performance in IPython
|
||||
@ -85,10 +84,10 @@ Measuring Performance in IPython
|
||||
|
||||
Sometimes you may need to compare the performance of two similar operations. IPython gives you a
|
||||
magic command %timeit to perform this. It runs the code several times to get more accurate results.
|
||||
Once again, they are suitable to measure single line codes.
|
||||
Once again, it is suitable to measuring single lines of code.
|
||||
|
||||
For example, do you know which of the following addition operation is better, x = 5; y = x\*\*2,
|
||||
x = 5; y = x\*x, x = np.uint8([5]); y = x\*x or y = np.square(x) ? We will find it with %timeit in
|
||||
For example, do you know which of the following addition operations is better, x = 5; y = x\*\*2,
|
||||
x = 5; y = x\*x, x = np.uint8([5]); y = x\*x, or y = np.square(x)? We will find out with %timeit in the
|
||||
IPython shell.
|
||||
@code{.py}
|
||||
In [10]: x = 5
|
||||
@ -112,11 +111,11 @@ consider the array creation also, it may reach upto 100x faster. Cool, right? *(
|
||||
working on this issue)*
|
||||
|
||||
@note Python scalar operations are faster than Numpy scalar operations. So for operations including
|
||||
one or two elements, Python scalar is better than Numpy arrays. Numpy takes advantage when size of
|
||||
array is a little bit bigger.
|
||||
one or two elements, Python scalar is better than Numpy arrays. Numpy has the advantage when the size of
|
||||
the array is a little bit bigger.
|
||||
|
||||
We will try one more example. This time, we will compare the performance of **cv.countNonZero()**
|
||||
and **np.count_nonzero()** for same image.
|
||||
and **np.count_nonzero()** for the same image.
|
||||
|
||||
@code{.py}
|
||||
In [35]: %timeit z = cv.countNonZero(img)
|
||||
@ -125,7 +124,7 @@ In [35]: %timeit z = cv.countNonZero(img)
|
||||
In [36]: %timeit z = np.count_nonzero(img)
|
||||
1000 loops, best of 3: 370 us per loop
|
||||
@endcode
|
||||
See, OpenCV function is nearly 25x faster than Numpy function.
|
||||
See, the OpenCV function is nearly 25x faster than the Numpy function.
|
||||
|
||||
@note Normally, OpenCV functions are faster than Numpy functions. So for same operation, OpenCV
|
||||
functions are preferred. But, there can be exceptions, especially when Numpy works with views
|
||||
@ -134,8 +133,8 @@ instead of copies.
|
||||
More IPython magic commands
|
||||
---------------------------
|
||||
|
||||
There are several other magic commands to measure the performance, profiling, line profiling, memory
|
||||
measurement etc. They all are well documented. So only links to those docs are provided here.
|
||||
There are several other magic commands to measure performance, profiling, line profiling, memory
|
||||
measurement, and etc. They all are well documented. So only links to those docs are provided here.
|
||||
Interested readers are recommended to try them out.
|
||||
|
||||
Performance Optimization Techniques
|
||||
@ -143,19 +142,18 @@ Performance Optimization Techniques
|
||||
|
||||
There are several techniques and coding methods to exploit maximum performance of Python and Numpy.
|
||||
Only relevant ones are noted here and links are given to important sources. The main thing to be
|
||||
noted here is that, first try to implement the algorithm in a simple manner. Once it is working,
|
||||
profile it, find the bottlenecks and optimize them.
|
||||
noted here is, first try to implement the algorithm in a simple manner. Once it is working,
|
||||
profile it, find the bottlenecks, and optimize them.
|
||||
|
||||
-# Avoid using loops in Python as far as possible, especially double/triple loops etc. They are
|
||||
-# Avoid using loops in Python as much as possible, especially double/triple loops etc. They are
|
||||
inherently slow.
|
||||
2. Vectorize the algorithm/code to the maximum possible extent because Numpy and OpenCV are
|
||||
2. Vectorize the algorithm/code to the maximum extent possible, because Numpy and OpenCV are
|
||||
optimized for vector operations.
|
||||
3. Exploit the cache coherence.
|
||||
4. Never make copies of array unless it is needed. Try to use views instead. Array copying is a
|
||||
4. Never make copies of an array unless it is necessary. Try to use views instead. Array copying is a
|
||||
costly operation.
|
||||
|
||||
Even after doing all these operations, if your code is still slow, or use of large loops are
|
||||
inevitable, use additional libraries like Cython to make it faster.
|
||||
If your code is still slow after doing all of these operations, or if the use of large loops is inevitable, use additional libraries like Cython to make it faster.
|
||||
|
||||
Additional Resources
|
||||
--------------------
|
||||
|
@ -48,10 +48,8 @@ CMAKE_CONFIG_GENERATOR="Visual Studio 14 2015 Win64"
|
||||
if [ ! -d "$myRepo/opencv" ]; then
|
||||
echo "cloning opencv"
|
||||
git clone https://github.com/opencv/opencv.git
|
||||
mkdir Build
|
||||
mkdir Build/opencv
|
||||
mkdir Install
|
||||
mkdir Install/opencv
|
||||
mkdir -p Build/opencv
|
||||
mkdir -p Install/opencv
|
||||
else
|
||||
cd opencv
|
||||
git pull --rebase
|
||||
@ -60,8 +58,7 @@ fi
|
||||
if [ ! -d "$myRepo/opencv_contrib" ]; then
|
||||
echo "cloning opencv_contrib"
|
||||
git clone https://github.com/opencv/opencv_contrib.git
|
||||
mkdir Build
|
||||
mkdir Build/opencv_contrib
|
||||
mkdir -p Build/opencv_contrib
|
||||
else
|
||||
cd opencv_contrib
|
||||
git pull --rebase
|
||||
|
@ -42,7 +42,7 @@ if(HAVE_CUDA)
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wenum-compare -Wunused-function -Wshadow)
|
||||
endif()
|
||||
|
||||
if(CV_TRACE AND HAVE_ITT AND BUILD_ITT)
|
||||
if(CV_TRACE AND HAVE_ITT)
|
||||
add_definitions(-DOPENCV_WITH_ITT=1)
|
||||
endif()
|
||||
|
||||
|
@ -64,33 +64,30 @@ static void dumpOpenCLInformation()
|
||||
|
||||
std::vector<PlatformInfo> platforms;
|
||||
cv::ocl::getPlatfomsInfo(platforms);
|
||||
if (platforms.size() > 0)
|
||||
{
|
||||
DUMP_MESSAGE_STDOUT("OpenCL Platforms: ");
|
||||
for (size_t i = 0; i < platforms.size(); i++)
|
||||
{
|
||||
const PlatformInfo* platform = &platforms[i];
|
||||
DUMP_MESSAGE_STDOUT(" " << platform->name().c_str());
|
||||
Device current_device;
|
||||
for (int j = 0; j < platform->deviceNumber(); j++)
|
||||
{
|
||||
platform->getDevice(current_device, j);
|
||||
const char* deviceTypeStr = current_device.type() == Device::TYPE_CPU
|
||||
? ("CPU") : (current_device.type() == Device::TYPE_GPU ? current_device.hostUnifiedMemory() ? "iGPU" : "dGPU" : "unknown");
|
||||
DUMP_MESSAGE_STDOUT( " " << deviceTypeStr << ": " << current_device.name().c_str() << " (" << current_device.version().c_str() << ")");
|
||||
DUMP_CONFIG_PROPERTY( cv::format("cv_ocl_platform_%d_device_%d", (int)i, (int)j ),
|
||||
cv::format("(Platform=%s)(Type=%s)(Name=%s)(Version=%s)",
|
||||
platform->name().c_str(), deviceTypeStr, current_device.name().c_str(), current_device.version().c_str()) );
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
if (platforms.empty())
|
||||
{
|
||||
DUMP_MESSAGE_STDOUT("OpenCL is not available");
|
||||
DUMP_CONFIG_PROPERTY("cv_ocl", "not available");
|
||||
return;
|
||||
}
|
||||
|
||||
DUMP_MESSAGE_STDOUT("OpenCL Platforms: ");
|
||||
for (size_t i = 0; i < platforms.size(); i++)
|
||||
{
|
||||
const PlatformInfo* platform = &platforms[i];
|
||||
DUMP_MESSAGE_STDOUT(" " << platform->name());
|
||||
Device current_device;
|
||||
for (int j = 0; j < platform->deviceNumber(); j++)
|
||||
{
|
||||
platform->getDevice(current_device, j);
|
||||
const char* deviceTypeStr = (current_device.type() == Device::TYPE_CPU) ? "CPU" :
|
||||
(current_device.type() == Device::TYPE_GPU ? current_device.hostUnifiedMemory() ? "iGPU" : "dGPU" : "unknown");
|
||||
DUMP_MESSAGE_STDOUT( " " << deviceTypeStr << ": " << current_device.name() << " (" << current_device.version() << ")");
|
||||
DUMP_CONFIG_PROPERTY( cv::format("cv_ocl_platform_%d_device_%d", (int)i, j ),
|
||||
cv::format("(Platform=%s)(Type=%s)(Name=%s)(Version=%s)",
|
||||
platform->name().c_str(), deviceTypeStr, current_device.name().c_str(), current_device.version().c_str()) );
|
||||
}
|
||||
}
|
||||
const Device& device = Device::getDefault();
|
||||
if (!device.available())
|
||||
CV_Error(Error::OpenCLInitError, "OpenCL device is not available");
|
||||
@ -102,8 +99,8 @@ static void dumpOpenCLInformation()
|
||||
DUMP_CONFIG_PROPERTY("cv_ocl_current_platformName", device.getPlatform().name());
|
||||
#endif
|
||||
|
||||
const char* deviceTypeStr = device.type() == Device::TYPE_CPU
|
||||
? ("CPU") : (device.type() == Device::TYPE_GPU ? device.hostUnifiedMemory() ? "iGPU" : "dGPU" : "unknown");
|
||||
const char* deviceTypeStr = (device.type() == Device::TYPE_CPU) ? "CPU" :
|
||||
(device.type() == Device::TYPE_GPU ? device.hostUnifiedMemory() ? "iGPU" : "dGPU" : "unknown");
|
||||
DUMP_MESSAGE_STDOUT(" Type = " << deviceTypeStr);
|
||||
DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceType", deviceTypeStr);
|
||||
|
||||
@ -156,7 +153,7 @@ static void dumpOpenCLInformation()
|
||||
}
|
||||
pos = pos2 + 1;
|
||||
}
|
||||
DUMP_CONFIG_PROPERTY("cv_ocl_current_extensions", extensionsStr.c_str());
|
||||
DUMP_CONFIG_PROPERTY("cv_ocl_current_extensions", extensionsStr);
|
||||
|
||||
const char* haveAmdBlasStr = haveAmdBlas() ? "Yes" : "No";
|
||||
DUMP_MESSAGE_STDOUT(" Has AMD Blas = " << haveAmdBlasStr);
|
||||
|
@ -2032,16 +2032,25 @@ struct Context::Impl
|
||||
0
|
||||
};
|
||||
|
||||
cl_uint i, nd0 = 0, nd = 0;
|
||||
cl_uint nd0 = 0;
|
||||
int dtype = dtype0 & 15;
|
||||
CV_OCL_DBG_CHECK(clGetDeviceIDs(pl, dtype, 0, 0, &nd0));
|
||||
cl_int status = clGetDeviceIDs(pl, dtype, 0, NULL, &nd0);
|
||||
if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
|
||||
{
|
||||
CV_OCL_DBG_CHECK_RESULT(status,
|
||||
cv::format("clGetDeviceIDs(platform=%p, device_type=%d, num_entries=0, devices=NULL, numDevices=%p)", pl, dtype, &nd0).c_str());
|
||||
}
|
||||
|
||||
if (nd0 == 0)
|
||||
return;
|
||||
|
||||
AutoBuffer<void*> dlistbuf(nd0*2+1);
|
||||
cl_device_id* dlist = (cl_device_id*)dlistbuf.data();
|
||||
cl_device_id* dlist_new = dlist + nd0;
|
||||
CV_OCL_DBG_CHECK(clGetDeviceIDs(pl, dtype, nd0, dlist, &nd0));
|
||||
String name0;
|
||||
|
||||
cl_uint i, nd = 0;
|
||||
String name0;
|
||||
for(i = 0; i < nd0; i++)
|
||||
{
|
||||
Device d(dlist[i]);
|
||||
@ -5941,7 +5950,12 @@ void convertFromImage(void* cl_mem_image, UMat& dst)
|
||||
static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
|
||||
{
|
||||
cl_uint numDevices = 0;
|
||||
CV_OCL_DBG_CHECK(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 0, NULL, &numDevices));
|
||||
cl_int status = clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 0, NULL, &numDevices);
|
||||
if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
|
||||
{
|
||||
CV_OCL_DBG_CHECK_RESULT(status,
|
||||
cv::format("clGetDeviceIDs(platform, Device::TYPE_ALL, num_entries=0, devices=NULL, numDevices=%p)", &numDevices).c_str());
|
||||
}
|
||||
|
||||
if (numDevices == 0)
|
||||
{
|
||||
|
@ -6,7 +6,7 @@
|
||||
#define OPENCV_DNN_VERSION_HPP
|
||||
|
||||
/// Use with major OpenCV version only.
|
||||
#define OPENCV_DNN_API_VERSION 20191024
|
||||
#define OPENCV_DNN_API_VERSION 20191111
|
||||
|
||||
#if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS
|
||||
#define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION)
|
||||
|
@ -3450,14 +3450,11 @@ Ptr<Layer> Net::getLayer(LayerId layerId)
|
||||
std::vector<Ptr<Layer> > Net::getLayerInputs(LayerId layerId)
|
||||
{
|
||||
LayerData &ld = impl->getLayerData(layerId);
|
||||
if (!ld.layerInstance)
|
||||
CV_Error(Error::StsNullPtr, format("Requested layer \"%s\" was not initialized", ld.name.c_str()));
|
||||
|
||||
std::vector<Ptr<Layer> > inputLayers;
|
||||
inputLayers.reserve(ld.inputLayersId.size());
|
||||
std::set<int>::iterator it;
|
||||
for (it = ld.inputLayersId.begin(); it != ld.inputLayersId.end(); ++it) {
|
||||
inputLayers.push_back(getLayer(*it));
|
||||
inputLayers.reserve(ld.inputBlobsId.size());
|
||||
for (int i = 0; i < ld.inputBlobsId.size(); ++i) {
|
||||
inputLayers.push_back(getLayer(ld.inputBlobsId[i].lid));
|
||||
}
|
||||
return inputLayers;
|
||||
}
|
||||
|
@ -68,6 +68,7 @@ public:
|
||||
PROD = 0,
|
||||
SUM = 1,
|
||||
MAX = 2,
|
||||
DIV = 3
|
||||
} op;
|
||||
std::vector<float> coeffs;
|
||||
bool variableChannels;
|
||||
@ -85,6 +86,8 @@ public:
|
||||
op = SUM;
|
||||
else if (operation == "max")
|
||||
op = MAX;
|
||||
else if (operation == "div")
|
||||
op = DIV;
|
||||
else
|
||||
CV_Error(cv::Error::StsBadArg, "Unknown operation type \"" + operation + "\"");
|
||||
}
|
||||
@ -104,8 +107,8 @@ public:
|
||||
virtual bool supportBackend(int backendId) CV_OVERRIDE
|
||||
{
|
||||
return backendId == DNN_BACKEND_OPENCV ||
|
||||
backendId == DNN_BACKEND_CUDA ||
|
||||
backendId == DNN_BACKEND_HALIDE ||
|
||||
(backendId == DNN_BACKEND_CUDA && op != DIV) || // TODO: not implemented, see PR #15811
|
||||
(backendId == DNN_BACKEND_HALIDE && op != DIV) || // TODO: not implemented, see PR #15811
|
||||
(backendId == DNN_BACKEND_INFERENCE_ENGINE && !variableChannels &&
|
||||
(preferableTarget != DNN_TARGET_OPENCL || coeffs.empty()));
|
||||
}
|
||||
@ -278,6 +281,18 @@ public:
|
||||
srcptr0 = (const float*)dstptr;
|
||||
}
|
||||
}
|
||||
else if( op == DIV )
|
||||
{
|
||||
for( k = 1; k < n; k++ )
|
||||
{
|
||||
const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta;
|
||||
for( j = 0; j < blockSize; j++ )
|
||||
{
|
||||
dstptr[j] = srcptr0[j]/srcptr1[j];
|
||||
}
|
||||
srcptr0 = (const float*)dstptr;
|
||||
}
|
||||
}
|
||||
else if( op == MAX )
|
||||
{
|
||||
for( k = 1; k < n; k++ )
|
||||
@ -400,6 +415,11 @@ public:
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
multiply(inputs[i], outputs[0], outputs[0]);
|
||||
break;
|
||||
case DIV:
|
||||
divide(inputs[0], inputs[1], outputs[0]);
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
divide(outputs[0], inputs[i], outputs[0]);
|
||||
break;
|
||||
case MAX:
|
||||
max(inputs[0], inputs[1], outputs[0]);
|
||||
for (int i = 2; i < inputs.size(); ++i)
|
||||
@ -515,6 +535,8 @@ public:
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::SUM);
|
||||
else if (op == PROD)
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MUL);
|
||||
else if (op == DIV)
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::DIV);
|
||||
else if (op == MAX)
|
||||
ieLayer.setEltwiseType(InferenceEngine::Builder::EltwiseLayer::EltwiseType::MAX);
|
||||
else
|
||||
|
@ -519,6 +519,13 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
}
|
||||
}
|
||||
else if (layer_type == "Div")
|
||||
{
|
||||
if (constBlobs.find(node_proto.input(1)) == constBlobs.end())
|
||||
{
|
||||
layerParams.type = "Eltwise";
|
||||
layerParams.set("operation", "div");
|
||||
}
|
||||
else
|
||||
{
|
||||
Mat blob = getBlob(node_proto, constBlobs, 1);
|
||||
CV_Assert_N(blob.type() == CV_32F, blob.total());
|
||||
@ -535,6 +542,7 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
layerParams.set("bias_term", false);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (layer_type == "Neg")
|
||||
{
|
||||
layerParams.type = "Power";
|
||||
@ -771,6 +779,32 @@ void ONNXImporter::populateNet(Net dstNet)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (layer_type == "ReduceL2")
|
||||
{
|
||||
CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes"));
|
||||
CV_Assert(graph_proto.node_size() > li + 1 && graph_proto.node(li + 1).op_type() == "Div");
|
||||
++li;
|
||||
layerParams.type = "Normalize";
|
||||
|
||||
DictValue axes_dict = layerParams.get("axes");
|
||||
if (axes_dict.size() != 1)
|
||||
CV_Error(Error::StsNotImplemented, "Multidimensional reduceL2");
|
||||
int axis = axes_dict.getIntValue(0);
|
||||
layerParams.set("axis",axis);
|
||||
layerParams.set("end_axis", axis);
|
||||
}
|
||||
else if (layer_type == "Squeeze")
|
||||
{
|
||||
CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes"));
|
||||
DictValue axes_dict = layerParams.get("axes");
|
||||
if (axes_dict.size() != 1)
|
||||
CV_Error(Error::StsNotImplemented, "Multidimensional squeeze");
|
||||
|
||||
int axis = axes_dict.getIntValue(0);
|
||||
layerParams.set("axis", axis - 1);
|
||||
layerParams.set("end_axis", axis);
|
||||
layerParams.type = "Flatten";
|
||||
}
|
||||
else if (layer_type == "Unsqueeze")
|
||||
{
|
||||
CV_Assert(node_proto.input_size() == 1);
|
||||
|
@ -86,6 +86,8 @@ TEST_P(dump, Regression)
|
||||
Net net = readNet(findDataFile("dnn/squeezenet_v1.1.prototxt"),
|
||||
findDataFile("dnn/squeezenet_v1.1.caffemodel", false));
|
||||
|
||||
ASSERT_EQ(net.getLayerInputs(net.getLayerId("fire2/concat")).size(), 2);
|
||||
|
||||
int size[] = {1, 3, 227, 227};
|
||||
Mat input = cv::Mat::ones(4, size, CV_32F);
|
||||
net.setInput(input);
|
||||
|
@ -322,6 +322,28 @@ TEST_P(Test_ONNX_layers, MultyInputs)
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Div)
|
||||
{
|
||||
const String model = _tf("models/div.onnx");
|
||||
Net net = readNetFromONNX(model);
|
||||
ASSERT_FALSE(net.empty());
|
||||
|
||||
net.setPreferableBackend(backend);
|
||||
net.setPreferableTarget(target);
|
||||
|
||||
Mat inp1 = blobFromNPY(_tf("data/input_div_0.npy"));
|
||||
Mat inp2 = blobFromNPY(_tf("data/input_div_1.npy"));
|
||||
Mat ref = blobFromNPY(_tf("data/output_div.npy"));
|
||||
checkBackend(&inp1, &ref);
|
||||
|
||||
net.setInput(inp1, "0");
|
||||
net.setInput(inp2, "1");
|
||||
Mat out = net.forward();
|
||||
|
||||
normAssert(ref, out, "", default_l1, default_lInf);
|
||||
expectNoFallbacksFromIE(net);
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, DynamicReshape)
|
||||
{
|
||||
if (backend == DNN_BACKEND_INFERENCE_ENGINE)
|
||||
@ -337,6 +359,16 @@ TEST_P(Test_ONNX_layers, Reshape)
|
||||
testONNXModels("unsqueeze");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Squeeze)
|
||||
{
|
||||
testONNXModels("squeeze");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, ReduceL2)
|
||||
{
|
||||
testONNXModels("reduceL2");
|
||||
}
|
||||
|
||||
TEST_P(Test_ONNX_layers, Slice)
|
||||
{
|
||||
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LT(2019010000)
|
||||
|
@ -38,8 +38,10 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels_imgproc.hpp"
|
||||
#include "opencv2/core/hal/intrin.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -211,7 +213,7 @@ struct MomentsInTile_SIMD
|
||||
}
|
||||
};
|
||||
|
||||
#if CV_SSE2
|
||||
#if CV_SIMD128
|
||||
|
||||
template <>
|
||||
struct MomentsInTile_SIMD<uchar, int, int>
|
||||
@ -226,115 +228,33 @@ struct MomentsInTile_SIMD<uchar, int, int>
|
||||
int x = 0;
|
||||
|
||||
{
|
||||
__m128i dx = _mm_set1_epi16(8);
|
||||
__m128i z = _mm_setzero_si128(), qx0 = z, qx1 = z, qx2 = z, qx3 = z, qx = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
v_int16x8 dx = v_setall_s16(8), qx = v_int16x8(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
v_uint32x4 z = v_setzero_u32(), qx0 = z, qx1 = z, qx2 = z, qx3 = z;
|
||||
|
||||
for( ; x <= len - 8; x += 8 )
|
||||
{
|
||||
__m128i p = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr + x)), z);
|
||||
__m128i sx = _mm_mullo_epi16(qx, qx);
|
||||
v_int16x8 p = v_reinterpret_as_s16(v_load_expand(ptr + x));
|
||||
v_int16x8 sx = v_mul_wrap(qx, qx);
|
||||
|
||||
qx0 = _mm_add_epi16(qx0, p);
|
||||
qx1 = _mm_add_epi32(qx1, _mm_madd_epi16(p, qx));
|
||||
qx2 = _mm_add_epi32(qx2, _mm_madd_epi16(p, sx));
|
||||
qx3 = _mm_add_epi32(qx3, _mm_madd_epi16( _mm_mullo_epi16(p, qx), sx));
|
||||
qx0 += v_reinterpret_as_u32(p);
|
||||
qx1 = v_reinterpret_as_u32(v_dotprod(p, qx, v_reinterpret_as_s32(qx1)));
|
||||
qx2 = v_reinterpret_as_u32(v_dotprod(p, sx, v_reinterpret_as_s32(qx2)));
|
||||
qx3 = v_reinterpret_as_u32(v_dotprod(v_mul_wrap(p, qx), sx, v_reinterpret_as_s32(qx3)));
|
||||
|
||||
qx = _mm_add_epi16(qx, dx);
|
||||
qx += dx;
|
||||
}
|
||||
|
||||
__m128i qx01_lo = _mm_unpacklo_epi32(qx0, qx1);
|
||||
__m128i qx23_lo = _mm_unpacklo_epi32(qx2, qx3);
|
||||
__m128i qx01_hi = _mm_unpackhi_epi32(qx0, qx1);
|
||||
__m128i qx23_hi = _mm_unpackhi_epi32(qx2, qx3);
|
||||
qx01_lo = _mm_add_epi32(qx01_lo, qx01_hi);
|
||||
qx23_lo = _mm_add_epi32(qx23_lo, qx23_hi);
|
||||
__m128i qx0123_lo = _mm_unpacklo_epi64(qx01_lo, qx23_lo);
|
||||
__m128i qx0123_hi = _mm_unpackhi_epi64(qx01_lo, qx23_lo);
|
||||
qx0123_lo = _mm_add_epi32(qx0123_lo, qx0123_hi);
|
||||
_mm_store_si128((__m128i*)buf, qx0123_lo);
|
||||
|
||||
x0 = (buf[0] & 0xffff) + (buf[0] >> 16);
|
||||
x1 = buf[1];
|
||||
x2 = buf[2];
|
||||
x3 = buf[3];
|
||||
x0 = v_reduce_sum(qx0);
|
||||
x0 = (x0 & 0xffff) + (x0 >> 16);
|
||||
x1 = v_reduce_sum(qx1);
|
||||
x2 = v_reduce_sum(qx2);
|
||||
x3 = v_reduce_sum(qx3);
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
int CV_DECL_ALIGNED(16) buf[4];
|
||||
};
|
||||
|
||||
#elif CV_NEON
|
||||
|
||||
template <>
|
||||
struct MomentsInTile_SIMD<uchar, int, int>
|
||||
{
|
||||
MomentsInTile_SIMD()
|
||||
{
|
||||
ushort CV_DECL_ALIGNED(8) init[4] = { 0, 1, 2, 3 };
|
||||
qx_init = vld1_u16(init);
|
||||
v_step = vdup_n_u16(4);
|
||||
}
|
||||
|
||||
int operator() (const uchar * ptr, int len, int & x0, int & x1, int & x2, int & x3)
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
uint32x4_t v_z = vdupq_n_u32(0), v_x0 = v_z, v_x1 = v_z,
|
||||
v_x2 = v_z, v_x3 = v_z;
|
||||
uint16x4_t qx = qx_init;
|
||||
|
||||
for( ; x <= len - 8; x += 8 )
|
||||
{
|
||||
uint16x8_t v_src = vmovl_u8(vld1_u8(ptr + x));
|
||||
|
||||
// first part
|
||||
uint32x4_t v_qx = vmovl_u16(qx);
|
||||
uint16x4_t v_p = vget_low_u16(v_src);
|
||||
uint32x4_t v_px = vmull_u16(qx, v_p);
|
||||
|
||||
v_x0 = vaddw_u16(v_x0, v_p);
|
||||
v_x1 = vaddq_u32(v_x1, v_px);
|
||||
v_px = vmulq_u32(v_px, v_qx);
|
||||
v_x2 = vaddq_u32(v_x2, v_px);
|
||||
v_x3 = vaddq_u32(v_x3, vmulq_u32(v_px, v_qx));
|
||||
qx = vadd_u16(qx, v_step);
|
||||
|
||||
// second part
|
||||
v_qx = vmovl_u16(qx);
|
||||
v_p = vget_high_u16(v_src);
|
||||
v_px = vmull_u16(qx, v_p);
|
||||
|
||||
v_x0 = vaddw_u16(v_x0, v_p);
|
||||
v_x1 = vaddq_u32(v_x1, v_px);
|
||||
v_px = vmulq_u32(v_px, v_qx);
|
||||
v_x2 = vaddq_u32(v_x2, v_px);
|
||||
v_x3 = vaddq_u32(v_x3, vmulq_u32(v_px, v_qx));
|
||||
|
||||
qx = vadd_u16(qx, v_step);
|
||||
}
|
||||
|
||||
vst1q_u32(buf, v_x0);
|
||||
x0 = buf[0] + buf[1] + buf[2] + buf[3];
|
||||
vst1q_u32(buf, v_x1);
|
||||
x1 = buf[0] + buf[1] + buf[2] + buf[3];
|
||||
vst1q_u32(buf, v_x2);
|
||||
x2 = buf[0] + buf[1] + buf[2] + buf[3];
|
||||
vst1q_u32(buf, v_x3);
|
||||
x3 = buf[0] + buf[1] + buf[2] + buf[3];
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
uint CV_DECL_ALIGNED(16) buf[4];
|
||||
uint16x4_t qx_init, v_step;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if CV_SSE4_1
|
||||
|
||||
template <>
|
||||
struct MomentsInTile_SIMD<ushort, int, int64>
|
||||
{
|
||||
@ -348,49 +268,39 @@ struct MomentsInTile_SIMD<ushort, int, int64>
|
||||
int x = 0;
|
||||
|
||||
{
|
||||
__m128i v_delta = _mm_set1_epi32(4), v_zero = _mm_setzero_si128(), v_x0 = v_zero,
|
||||
v_x1 = v_zero, v_x2 = v_zero, v_x3 = v_zero, v_ix0 = _mm_setr_epi32(0, 1, 2, 3);
|
||||
v_int32x4 v_delta = v_setall_s32(4), v_ix0 = v_int32x4(0, 1, 2, 3);
|
||||
v_uint32x4 z = v_setzero_u32(), v_x0 = z, v_x1 = z, v_x2 = z;
|
||||
v_uint64x2 v_x3 = v_reinterpret_as_u64(z);
|
||||
|
||||
for( ; x <= len - 4; x += 4 )
|
||||
{
|
||||
__m128i v_src = _mm_loadl_epi64((const __m128i *)(ptr + x));
|
||||
v_src = _mm_unpacklo_epi16(v_src, v_zero);
|
||||
v_int32x4 v_src = v_reinterpret_as_s32(v_load_expand(ptr + x));
|
||||
|
||||
v_x0 = _mm_add_epi32(v_x0, v_src);
|
||||
v_x1 = _mm_add_epi32(v_x1, _mm_mullo_epi32(v_src, v_ix0));
|
||||
v_x0 += v_reinterpret_as_u32(v_src);
|
||||
v_x1 += v_reinterpret_as_u32(v_src * v_ix0);
|
||||
|
||||
__m128i v_ix1 = _mm_mullo_epi32(v_ix0, v_ix0);
|
||||
v_x2 = _mm_add_epi32(v_x2, _mm_mullo_epi32(v_src, v_ix1));
|
||||
v_int32x4 v_ix1 = v_ix0 * v_ix0;
|
||||
v_x2 += v_reinterpret_as_u32(v_src * v_ix1);
|
||||
|
||||
v_ix1 = _mm_mullo_epi32(v_ix0, v_ix1);
|
||||
v_src = _mm_mullo_epi32(v_src, v_ix1);
|
||||
v_x3 = _mm_add_epi64(v_x3, _mm_add_epi64(_mm_unpacklo_epi32(v_src, v_zero), _mm_unpackhi_epi32(v_src, v_zero)));
|
||||
v_ix1 = v_ix0 * v_ix1;
|
||||
v_src = v_src * v_ix1;
|
||||
v_uint64x2 v_lo, v_hi;
|
||||
v_expand(v_reinterpret_as_u32(v_src), v_lo, v_hi);
|
||||
v_x3 += v_lo + v_hi;
|
||||
|
||||
v_ix0 = _mm_add_epi32(v_ix0, v_delta);
|
||||
v_ix0 += v_delta;
|
||||
}
|
||||
|
||||
__m128i v_x01_lo = _mm_unpacklo_epi32(v_x0, v_x1);
|
||||
__m128i v_x22_lo = _mm_unpacklo_epi32(v_x2, v_x2);
|
||||
__m128i v_x01_hi = _mm_unpackhi_epi32(v_x0, v_x1);
|
||||
__m128i v_x22_hi = _mm_unpackhi_epi32(v_x2, v_x2);
|
||||
v_x01_lo = _mm_add_epi32(v_x01_lo, v_x01_hi);
|
||||
v_x22_lo = _mm_add_epi32(v_x22_lo, v_x22_hi);
|
||||
__m128i v_x0122_lo = _mm_unpacklo_epi64(v_x01_lo, v_x22_lo);
|
||||
__m128i v_x0122_hi = _mm_unpackhi_epi64(v_x01_lo, v_x22_lo);
|
||||
v_x0122_lo = _mm_add_epi32(v_x0122_lo, v_x0122_hi);
|
||||
_mm_store_si128((__m128i*)buf64, v_x3);
|
||||
_mm_store_si128((__m128i*)buf, v_x0122_lo);
|
||||
|
||||
x0 = buf[0];
|
||||
x1 = buf[1];
|
||||
x2 = buf[2];
|
||||
x0 = v_reduce_sum(v_x0);
|
||||
x1 = v_reduce_sum(v_x1);
|
||||
x2 = v_reduce_sum(v_x2);
|
||||
v_store_aligned(buf64, v_reinterpret_as_s64(v_x3));
|
||||
x3 = buf64[0] + buf64[1];
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
int CV_DECL_ALIGNED(16) buf[4];
|
||||
int64 CV_DECL_ALIGNED(16) buf64[2];
|
||||
};
|
||||
|
||||
|
@ -38,6 +38,10 @@
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
|
||||
if (typeof Module.FS === 'undefined' && typeof FS !== 'undefined') {
|
||||
Module.FS = FS;
|
||||
}
|
||||
|
||||
Module['imread'] = function(imageSource) {
|
||||
var img = null;
|
||||
if (typeof imageSource === 'string') {
|
||||
|
@ -687,18 +687,20 @@ bool GStreamerCapture::open(const String &filename_)
|
||||
// else, we might have a file or a manual pipeline.
|
||||
// if gstreamer cannot parse the manual pipeline, we assume we were given and
|
||||
// ordinary file path.
|
||||
CV_LOG_INFO(NULL, "OpenCV | GStreamer: " << filename);
|
||||
if (!gst_uri_is_valid(filename))
|
||||
{
|
||||
if (utils::fs::exists(filename_))
|
||||
{
|
||||
uri.attach(g_filename_to_uri(filename, NULL, NULL));
|
||||
GSafePtr<GError> err;
|
||||
uri.attach(gst_filename_to_uri(filename, err.getRef()));
|
||||
if (uri)
|
||||
{
|
||||
file = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_WARN("Error opening file: " << filename << " (" << uri.get() << ")");
|
||||
CV_WARN("Error opening file: " << filename << " (" << err->message << ")");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -718,7 +720,7 @@ bool GStreamerCapture::open(const String &filename_)
|
||||
{
|
||||
uri.attach(g_strdup(filename));
|
||||
}
|
||||
|
||||
CV_LOG_INFO(NULL, "OpenCV | GStreamer: mode - " << (file ? "FILE" : manualpipeline ? "MANUAL" : "URI"));
|
||||
bool element_from_uri = false;
|
||||
if (!uridecodebin)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user