Merge remote-tracking branch 'upstream/3.4' into merge-3.4

This commit is contained in:
Alexander Alekhin 2019-10-24 18:17:40 +00:00
commit 055ffc0425
61 changed files with 1438 additions and 394 deletions

View File

@ -346,7 +346,7 @@ elseif(MIPS)
ocv_update(CPU_MSA_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_msa.cpp") ocv_update(CPU_MSA_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_msa.cpp")
ocv_update(CPU_KNOWN_OPTIMIZATIONS "MSA") ocv_update(CPU_KNOWN_OPTIMIZATIONS "MSA")
ocv_update(CPU_MSA_FLAGS_ON "-mmsa") ocv_update(CPU_MSA_FLAGS_ON "-mmsa")
set(CPU_BASELINE "MSA" CACHE STRING "${HELP_CPU_BASELINE}") set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
elseif(PPC64LE) elseif(PPC64LE)
ocv_update(CPU_KNOWN_OPTIMIZATIONS "VSX;VSX3") ocv_update(CPU_KNOWN_OPTIMIZATIONS "VSX;VSX3")
ocv_update(CPU_VSX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx.cpp") ocv_update(CPU_VSX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx.cpp")

View File

@ -133,7 +133,7 @@ message(STATUS "Found MKL ${MKL_VERSION_STR} at: ${MKL_ROOT_DIR}")
set(HAVE_MKL ON) set(HAVE_MKL ON)
set(MKL_ROOT_DIR "${MKL_ROOT_DIR}" CACHE PATH "Path to MKL directory") set(MKL_ROOT_DIR "${MKL_ROOT_DIR}" CACHE PATH "Path to MKL directory")
set(MKL_INCLUDE_DIRS "${MKL_INCLUDE_DIRS}" CACHE PATH "Path to MKL include directory") set(MKL_INCLUDE_DIRS "${MKL_INCLUDE_DIRS}" CACHE PATH "Path to MKL include directory")
set(MKL_LIBRARIES "${MKL_LIBRARIES}" CACHE STRING "MKL libarries") set(MKL_LIBRARIES "${MKL_LIBRARIES}" CACHE STRING "MKL libraries")
if(UNIX AND NOT MKL_LIBRARIES_DONT_HACK) if(UNIX AND NOT MKL_LIBRARIES_DONT_HACK)
#it's ugly but helps to avoid cyclic lib problem #it's ugly but helps to avoid cyclic lib problem
set(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_LIBRARIES} ${MKL_LIBRARIES} "-lpthread" "-lm" "-ldl") set(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_LIBRARIES} ${MKL_LIBRARIES} "-lpthread" "-lm" "-ldl")

View File

@ -1,4 +1,4 @@
include("${CMAKE_CURRENT_LIST_DIR}/OpenCV_WinRT.cmake") include("${CMAKE_CURRENT_LIST_DIR}/OpenCV-WinRT.cmake")
# Adding additional using directory for WindowsPhone 8.0 to get Windows.winmd properly # Adding additional using directory for WindowsPhone 8.0 to get Windows.winmd properly
if(WINRT_8_0) if(WINRT_8_0)

View File

@ -1 +1 @@
include("${CMAKE_CURRENT_LIST_DIR}/OpenCV_WinRT.cmake") include("${CMAKE_CURRENT_LIST_DIR}/OpenCV-WinRT.cmake")

View File

@ -27,7 +27,7 @@ src1.delete(); src2.delete(); dst.delete(); mask.delete();
Image Subtraction Image Subtraction
-------------- --------------
You can subtract two images by OpenCV function, cv.subtract(). res = img1 - img2. Both images should be of same depth and type. You can subtract two images by OpenCV function, cv.subtract(). res = img1 - img2. Both images should be of same depth and type. Note that when used with RGBA images, the alpha channel is also subtracted.
For example, consider below sample: For example, consider below sample:
@code{.js} @code{.js}
@ -59,4 +59,4 @@ Try it
<iframe src="../../js_image_arithmetics_bitwise.html" width="100%" <iframe src="../../js_image_arithmetics_bitwise.html" width="100%"
onload="this.style.height=this.contentDocument.body.scrollHeight +'px';"> onload="this.style.height=this.contentDocument.body.scrollHeight +'px';">
</iframe> </iframe>
\endhtmlonly \endhtmlonly

View File

@ -4,21 +4,21 @@ Gui Features in OpenCV {#tutorial_py_table_of_contents_gui}
- @subpage tutorial_py_image_display - @subpage tutorial_py_image_display
Learn to load an Learn to load an
image, display it and save it back image, display it, and save it back
- @subpage tutorial_py_video_display - @subpage tutorial_py_video_display
Learn to play videos, Learn to play videos,
capture videos from Camera and write it as a video capture videos from a camera, and write videos
- @subpage tutorial_py_drawing_functions - @subpage tutorial_py_drawing_functions
Learn to draw lines, Learn to draw lines,
rectangles, ellipses, circles etc with OpenCV rectangles, ellipses, circles, etc with OpenCV
- @subpage tutorial_py_mouse_handling - @subpage tutorial_py_mouse_handling
Draw stuffs with your Draw stuff with your
mouse mouse
- @subpage tutorial_py_trackbar - @subpage tutorial_py_trackbar

View File

@ -4,19 +4,19 @@ Getting Started with Videos {#tutorial_py_video_display}
Goal Goal
---- ----
- Learn to read video, display video and save video. - Learn to read video, display video, and save video.
- Learn to capture from Camera and display it. - Learn to capture video from a camera and display it.
- You will learn these functions : **cv.VideoCapture()**, **cv.VideoWriter()** - You will learn these functions : **cv.VideoCapture()**, **cv.VideoWriter()**
Capture Video from Camera Capture Video from Camera
------------------------- -------------------------
Often, we have to capture live stream with camera. OpenCV provides a very simple interface to this. Often, we have to capture live stream with a camera. OpenCV provides a very simple interface to do this.
Let's capture a video from the camera (I am using the in-built webcam of my laptop), convert it into Let's capture a video from the camera (I am using the built-in webcam on my laptop), convert it into
grayscale video and display it. Just a simple task to get started. grayscale video and display it. Just a simple task to get started.
To capture a video, you need to create a **VideoCapture** object. Its argument can be either the To capture a video, you need to create a **VideoCapture** object. Its argument can be either the
device index or the name of a video file. Device index is just the number to specify which camera. device index or the name of a video file. A device index is just the number to specify which camera.
Normally one camera will be connected (as in my case). So I simply pass 0 (or -1). You can select Normally one camera will be connected (as in my case). So I simply pass 0 (or -1). You can select
the second camera by passing 1 and so on. After that, you can capture frame-by-frame. But at the the second camera by passing 1 and so on. After that, you can capture frame-by-frame. But at the
end, don't forget to release the capture. end, don't forget to release the capture.
@ -46,16 +46,16 @@ while True:
# When everything done, release the capture # When everything done, release the capture
cap.release() cap.release()
cv.destroyAllWindows()@endcode cv.destroyAllWindows()@endcode
`cap.read()` returns a bool (`True`/`False`). If frame is read correctly, it will be `True`. So you can `cap.read()` returns a bool (`True`/`False`). If the frame is read correctly, it will be `True`. So you can
check end of the video by checking this return value. check for the end of the video by checking this returned value.
Sometimes, cap may not have initialized the capture. In that case, this code shows error. You can Sometimes, cap may not have initialized the capture. In that case, this code shows an error. You can
check whether it is initialized or not by the method **cap.isOpened()**. If it is `True`, OK. check whether it is initialized or not by the method **cap.isOpened()**. If it is `True`, OK.
Otherwise open it using **cap.open()**. Otherwise open it using **cap.open()**.
You can also access some of the features of this video using **cap.get(propId)** method where propId You can also access some of the features of this video using **cap.get(propId)** method where propId
is a number from 0 to 18. Each number denotes a property of the video (if it is applicable to that is a number from 0 to 18. Each number denotes a property of the video (if it is applicable to that
video) and full details can be seen here: cv::VideoCapture::get(). video). Full details can be seen here: cv::VideoCapture::get().
Some of these values can be modified using **cap.set(propId, value)**. Value is the new value you Some of these values can be modified using **cap.set(propId, value)**. Value is the new value you
want. want.
@ -63,13 +63,13 @@ For example, I can check the frame width and height by `cap.get(cv.CAP_PROP_FRAM
640x480 by default. But I want to modify it to 320x240. Just use `ret = cap.set(cv.CAP_PROP_FRAME_WIDTH,320)` and 640x480 by default. But I want to modify it to 320x240. Just use `ret = cap.set(cv.CAP_PROP_FRAME_WIDTH,320)` and
`ret = cap.set(cv.CAP_PROP_FRAME_HEIGHT,240)`. `ret = cap.set(cv.CAP_PROP_FRAME_HEIGHT,240)`.
@note If you are getting error, make sure camera is working fine using any other camera application @note If you are getting an error, make sure your camera is working fine using any other camera application
(like Cheese in Linux). (like Cheese in Linux).
Playing Video from file Playing Video from file
----------------------- -----------------------
It is same as capturing from Camera, just change camera index with video file name. Also while Playing video from file is the same as capturing it from camera, just change the camera index to a video file name. Also while
displaying the frame, use appropriate time for `cv.waitKey()`. If it is too less, video will be very displaying the frame, use appropriate time for `cv.waitKey()`. If it is too less, video will be very
fast and if it is too high, video will be slow (Well, that is how you can display videos in slow fast and if it is too high, video will be slow (Well, that is how you can display videos in slow
motion). 25 milliseconds will be OK in normal cases. motion). 25 milliseconds will be OK in normal cases.
@ -96,23 +96,23 @@ cap.release()
cv.destroyAllWindows() cv.destroyAllWindows()
@endcode @endcode
@note Make sure proper versions of ffmpeg or gstreamer is installed. Sometimes, it is a headache to @note Make sure a proper version of ffmpeg or gstreamer is installed. Sometimes it is a headache to
work with Video Capture mostly due to wrong installation of ffmpeg/gstreamer. work with video capture, mostly due to wrong installation of ffmpeg/gstreamer.
Saving a Video Saving a Video
-------------- --------------
So we capture a video, process it frame-by-frame and we want to save that video. For images, it is So we capture a video and process it frame-by-frame, and we want to save that video. For images, it is
very simple, just use `cv.imwrite()`. Here a little more work is required. very simple: just use `cv.imwrite()`. Here, a little more work is required.
This time we create a **VideoWriter** object. We should specify the output file name (eg: This time we create a **VideoWriter** object. We should specify the output file name (eg:
output.avi). Then we should specify the **FourCC** code (details in next paragraph). Then number of output.avi). Then we should specify the **FourCC** code (details in next paragraph). Then number of
frames per second (fps) and frame size should be passed. And last one is **isColor** flag. If it is frames per second (fps) and frame size should be passed. And the last one is the **isColor** flag. If it is
`True`, encoder expect color frame, otherwise it works with grayscale frame. `True`, the encoder expect color frame, otherwise it works with grayscale frame.
[FourCC](http://en.wikipedia.org/wiki/FourCC) is a 4-byte code used to specify the video codec. The [FourCC](http://en.wikipedia.org/wiki/FourCC) is a 4-byte code used to specify the video codec. The
list of available codes can be found in [fourcc.org](http://www.fourcc.org/codecs.php). It is list of available codes can be found in [fourcc.org](http://www.fourcc.org/codecs.php). It is
platform dependent. Following codecs works fine for me. platform dependent. The following codecs work fine for me.
- In Fedora: DIVX, XVID, MJPG, X264, WMV1, WMV2. (XVID is more preferable. MJPG results in high - In Fedora: DIVX, XVID, MJPG, X264, WMV1, WMV2. (XVID is more preferable. MJPG results in high
size video. X264 gives very small size video) size video. X264 gives very small size video)
@ -122,7 +122,7 @@ platform dependent. Following codecs works fine for me.
FourCC code is passed as `cv.VideoWriter_fourcc('M','J','P','G')` or FourCC code is passed as `cv.VideoWriter_fourcc('M','J','P','G')` or
`cv.VideoWriter_fourcc(*'MJPG')` for MJPG. `cv.VideoWriter_fourcc(*'MJPG')` for MJPG.
Below code capture from a Camera, flip every frame in vertical direction and saves it. The below code captures from a camera, flips every frame in the vertical direction, and saves the video.
@code{.py} @code{.py}
import numpy as np import numpy as np
import cv2 as cv import cv2 as cv

View File

@ -216,30 +216,30 @@ prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
dptr0[0] = dptr0[size.width-1] = dptr1[0] = dptr1[size.width-1] = val0; dptr0[0] = dptr0[size.width-1] = dptr1[0] = dptr1[size.width-1] = val0;
x = 1; x = 1;
#if CV_SIMD128 #if CV_SIMD
{ {
v_int16x8 ftz = v_setall_s16((short) ftzero); v_int16 ftz = vx_setall_s16((short) ftzero);
v_int16x8 ftz2 = v_setall_s16((short)(ftzero*2)); v_int16 ftz2 = vx_setall_s16((short)(ftzero*2));
v_int16x8 z = v_setzero_s16(); v_int16 z = vx_setzero_s16();
for(; x <= (size.width - 1) - 8; x += 8 ) for(; x <= (size.width - 1) - v_int16::nlanes; x += v_int16::nlanes)
{ {
v_int16x8 s00 = v_reinterpret_as_s16(v_load_expand(srow0 + x + 1)); v_int16 s00 = v_reinterpret_as_s16(vx_load_expand(srow0 + x + 1));
v_int16x8 s01 = v_reinterpret_as_s16(v_load_expand(srow0 + x - 1)); v_int16 s01 = v_reinterpret_as_s16(vx_load_expand(srow0 + x - 1));
v_int16x8 s10 = v_reinterpret_as_s16(v_load_expand(srow1 + x + 1)); v_int16 s10 = v_reinterpret_as_s16(vx_load_expand(srow1 + x + 1));
v_int16x8 s11 = v_reinterpret_as_s16(v_load_expand(srow1 + x - 1)); v_int16 s11 = v_reinterpret_as_s16(vx_load_expand(srow1 + x - 1));
v_int16x8 s20 = v_reinterpret_as_s16(v_load_expand(srow2 + x + 1)); v_int16 s20 = v_reinterpret_as_s16(vx_load_expand(srow2 + x + 1));
v_int16x8 s21 = v_reinterpret_as_s16(v_load_expand(srow2 + x - 1)); v_int16 s21 = v_reinterpret_as_s16(vx_load_expand(srow2 + x - 1));
v_int16x8 s30 = v_reinterpret_as_s16(v_load_expand(srow3 + x + 1)); v_int16 s30 = v_reinterpret_as_s16(vx_load_expand(srow3 + x + 1));
v_int16x8 s31 = v_reinterpret_as_s16(v_load_expand(srow3 + x - 1)); v_int16 s31 = v_reinterpret_as_s16(vx_load_expand(srow3 + x - 1));
v_int16x8 d0 = s00 - s01; v_int16 d0 = s00 - s01;
v_int16x8 d1 = s10 - s11; v_int16 d1 = s10 - s11;
v_int16x8 d2 = s20 - s21; v_int16 d2 = s20 - s21;
v_int16x8 d3 = s30 - s31; v_int16 d3 = s30 - s31;
v_uint16x8 v0 = v_reinterpret_as_u16(v_max(v_min(d0 + d1 + d1 + d2 + ftz, ftz2), z)); v_uint16 v0 = v_reinterpret_as_u16(v_max(v_min(d0 + d1 + d1 + d2 + ftz, ftz2), z));
v_uint16x8 v1 = v_reinterpret_as_u16(v_max(v_min(d1 + d2 + d2 + d3 + ftz, ftz2), z)); v_uint16 v1 = v_reinterpret_as_u16(v_max(v_min(d1 + d2 + d2 + d3 + ftz, ftz2), z));
v_pack_store(dptr0 + x, v0); v_pack_store(dptr0 + x, v0);
v_pack_store(dptr1 + x, v1); v_pack_store(dptr1 + x, v1);
@ -262,10 +262,10 @@ prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
{ {
uchar* dptr = dst.ptr<uchar>(y); uchar* dptr = dst.ptr<uchar>(y);
x = 0; x = 0;
#if CV_SIMD128 #if CV_SIMD
{ {
v_uint8x16 val0_16 = v_setall_u8(val0); v_uint8 val0_16 = vx_setall_u8(val0);
for(; x <= size.width-16; x+=16 ) for(; x <= size.width-v_uint8::nlanes; x+=v_uint8::nlanes)
v_store(dptr + x, val0_16); v_store(dptr + x, val0_16);
} }
#endif #endif
@ -309,13 +309,13 @@ inline int dispDescale(int v1, int v2, int d)
return (int)(v1*256 + (d != 0 ? v2*256/d : 0)); // no need to add 127, this will be converted to float return (int)(v1*256 + (d != 0 ? v2*256/d : 0)); // no need to add 127, this will be converted to float
} }
#if CV_SIMD128 #if CV_SIMD
template <typename dType> template <typename dType>
static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right, static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
Mat& disp, Mat& cost, StereoBMParams& state, Mat& disp, Mat& cost, StereoBMParams& state,
uchar* buf, int _dy0, int _dy1 ) uchar* buf, int _dy0, int _dy1 )
{ {
const int ALIGN = 16; const int ALIGN = CV_SIMD_WIDTH;
int x, y, d; int x, y, d;
int wsz = state.SADWindowSize, wsz2 = wsz/2; int wsz = state.SADWindowSize, wsz2 = wsz/2;
int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1); int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
@ -345,7 +345,9 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0; int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
const int TABSZ = 256; const int TABSZ = 256;
uchar tab[TABSZ]; uchar tab[TABSZ];
const v_int16x8 d0_8 = v_int16x8(0,1,2,3,4,5,6,7), dd_8 = v_setall_s16(8); short v_seq[v_int16::nlanes];
for (short i = 0; i < v_int16::nlanes; ++i)
v_seq[i] = i;
sad = (ushort*)alignPtr(buf + sizeof(sad[0]), ALIGN); sad = (ushort*)alignPtr(buf + sizeof(sad[0]), ALIGN);
hsad0 = (ushort*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN); hsad0 = (ushort*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
@ -368,20 +370,26 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep ) for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep )
{ {
int lval = lptr[0]; int lval = lptr[0];
v_uint8x16 lv = v_setall_u8((uchar)lval); v_uint8 lv = vx_setall_u8((uchar)lval);
for( d = 0; d < ndisp; d += 16 ) for( d = 0; d <= ndisp - v_uint8::nlanes; d += v_uint8::nlanes )
{ {
v_uint8x16 rv = v_load(rptr + d); v_uint8 diff = v_absdiff(lv, vx_load(rptr + d));
v_uint16x8 hsad_l = v_load(hsad + d);
v_uint16x8 hsad_h = v_load(hsad + d + 8);
v_uint8x16 diff = v_absdiff(lv, rv);
v_store(cbuf + d, diff); v_store(cbuf + d, diff);
v_uint16x8 diff0, diff1; v_store(hsad + d, vx_load(hsad + d) + v_expand_low(diff));
v_expand(diff, diff0, diff1); v_store(hsad + d + v_uint16::nlanes, vx_load(hsad + d + v_uint16::nlanes) + v_expand_high(diff));
hsad_l += diff0; }
hsad_h += diff1; if( d <= ndisp - v_uint16::nlanes )
v_store(hsad + d, hsad_l); {
v_store(hsad + d + 8, hsad_h); v_uint8 diff = v_absdiff(lv, vx_load_low(rptr + d));
v_store_low(cbuf + d, diff);
v_store(hsad + d, vx_load(hsad + d) + v_expand_low(diff));
d += v_uint16::nlanes;
}
for( ; d < ndisp; d++ )
{
int diff = abs(lval - rptr[d]);
cbuf[d] = (uchar)diff;
hsad[d] += (ushort)diff;
} }
htext[y] += tab[lval]; htext[y] += tab[lval];
} }
@ -412,24 +420,27 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep ) hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep )
{ {
int lval = lptr[0]; int lval = lptr[0];
v_uint8x16 lv = v_setall_u8((uchar)lval); v_uint8 lv = vx_setall_u8((uchar)lval);
for( d = 0; d < ndisp; d += 16 ) for( d = 0; d <= ndisp - v_uint8::nlanes; d += v_uint8::nlanes )
{ {
v_uint8x16 rv = v_load(rptr + d); v_uint8 diff = v_absdiff(lv, vx_load(rptr + d));
v_uint16x8 hsad_l = v_load(hsad + d); v_int8 cbs = v_reinterpret_as_s8(vx_load(cbuf_sub + d));
v_uint16x8 hsad_h = v_load(hsad + d + 8);
v_uint8x16 cbs = v_load(cbuf_sub + d);
v_uint8x16 diff = v_absdiff(lv, rv);
v_int16x8 diff_l, diff_h, cbs_l, cbs_h;
v_store(cbuf + d, diff); v_store(cbuf + d, diff);
v_expand(v_reinterpret_as_s8(diff), diff_l, diff_h); v_store(hsad + d, v_reinterpret_as_u16(v_reinterpret_as_s16(vx_load(hsad + d) + v_expand_low(diff)) - v_expand_low(cbs)));
v_expand(v_reinterpret_as_s8(cbs), cbs_l, cbs_h); v_store(hsad + d + v_uint16::nlanes, v_reinterpret_as_u16(v_reinterpret_as_s16(vx_load(hsad + d + v_uint16::nlanes) + v_expand_high(diff)) - v_expand_high(cbs)));
diff_l -= cbs_l; }
diff_h -= cbs_h; if( d <= ndisp - v_uint16::nlanes)
hsad_h = v_reinterpret_as_u16(v_reinterpret_as_s16(hsad_h) + diff_h); {
hsad_l = v_reinterpret_as_u16(v_reinterpret_as_s16(hsad_l) + diff_l); v_uint8 diff = v_absdiff(lv, vx_load_low(rptr + d));
v_store(hsad + d, hsad_l); v_store_low(cbuf + d, diff);
v_store(hsad + d + 8, hsad_h); v_store(hsad + d, v_reinterpret_as_u16(v_reinterpret_as_s16(vx_load(hsad + d) + v_expand_low(diff)) - vx_load_expand((schar*)cbuf_sub + d)));
d += v_uint16::nlanes;
}
for( ; d < ndisp; d++ )
{
int diff = abs(lval - rptr[d]);
cbuf[d] = (uchar)diff;
hsad[d] = hsad[d] + (ushort)diff - cbuf_sub[d];
} }
htext[y] += tab[lval] - tab[lptr_sub[0]]; htext[y] += tab[lval] - tab[lptr_sub[0]];
} }
@ -446,17 +457,25 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
hsad = hsad0 + (1 - dy0)*ndisp; hsad = hsad0 + (1 - dy0)*ndisp;
for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp ) for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp )
for( d = 0; d <= ndisp-16; d += 16 ) {
for( d = 0; d <= ndisp-2*v_uint16::nlanes; d += 2*v_uint16::nlanes )
{ {
v_uint16x8 s0 = v_load(sad + d); v_store(sad + d, vx_load(sad + d) + vx_load(hsad + d));
v_uint16x8 s1 = v_load(sad + d + 8); v_store(sad + d + v_uint16::nlanes, vx_load(sad + d + v_uint16::nlanes) + vx_load(hsad + d + v_uint16::nlanes));
v_uint16x8 t0 = v_load(hsad + d);
v_uint16x8 t1 = v_load(hsad + d + 8);
s0 = s0 + t0;
s1 = s1 + t1;
v_store(sad + d, s0);
v_store(sad + d + 8, s1);
} }
if( d <= ndisp-v_uint16::nlanes )
{
v_store(sad + d, vx_load(sad + d) + vx_load(hsad + d));
d += v_uint16::nlanes;
}
if( d <= ndisp-v_uint16::nlanes/2 )
{
v_store_low(sad + d, vx_load_low(sad + d) + vx_load_low(hsad + d));
d += v_uint16::nlanes/2;
}
for( ; d < ndisp; d++ )
sad[d] = sad[d] + hsad[d];
}
int tsum = 0; int tsum = 0;
for( y = -wsz2-1; y < wsz2; y++ ) for( y = -wsz2-1; y < wsz2; y++ )
tsum += htext[y]; tsum += htext[y];
@ -467,38 +486,41 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
int minsad = INT_MAX, mind = -1; int minsad = INT_MAX, mind = -1;
hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp; hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp;
hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp; hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;
v_int16x8 minsad8 = v_setall_s16(SHRT_MAX); v_int16 minsad8 = vx_setall_s16(SHRT_MAX);
v_int16x8 mind8 = v_setall_s16(0), d8 = d0_8; v_int16 mind8 = vx_setall_s16(0);
for( d = 0; d < ndisp; d += 16 ) for( d = 0; d <= ndisp - 2*v_int16::nlanes; d += 2*v_int16::nlanes )
{ {
v_int16x8 u0 = v_reinterpret_as_s16(v_load(hsad_sub + d)); v_int16 sad8 = v_reinterpret_as_s16(vx_load(hsad + d)) - v_reinterpret_as_s16(vx_load(hsad_sub + d)) + v_reinterpret_as_s16(vx_load(sad + d));
v_int16x8 u1 = v_reinterpret_as_s16(v_load(hsad + d)); v_store(sad + d, v_reinterpret_as_u16(sad8));
mind8 = v_max(mind8, (minsad8 > sad8) & vx_setall_s16((short)d));
minsad8 = v_min(minsad8, sad8);
v_int16x8 v0 = v_reinterpret_as_s16(v_load(hsad_sub + d + 8)); sad8 = v_reinterpret_as_s16(vx_load(hsad + d + v_int16::nlanes)) - v_reinterpret_as_s16(vx_load(hsad_sub + d + v_int16::nlanes)) + v_reinterpret_as_s16(vx_load(sad + d + v_int16::nlanes));
v_int16x8 v1 = v_reinterpret_as_s16(v_load(hsad + d + 8)); v_store(sad + d + v_int16::nlanes, v_reinterpret_as_u16(sad8));
mind8 = v_max(mind8, (minsad8 > sad8) & vx_setall_s16((short)d+v_int16::nlanes));
v_int16x8 usad8 = v_reinterpret_as_s16(v_load(sad + d)); minsad8 = v_min(minsad8, sad8);
v_int16x8 vsad8 = v_reinterpret_as_s16(v_load(sad + d + 8)); }
if( d <= ndisp - v_int16::nlanes )
u1 -= u0; {
v1 -= v0; v_int16 sad8 = v_reinterpret_as_s16(vx_load(hsad + d)) - v_reinterpret_as_s16(vx_load(hsad_sub + d)) + v_reinterpret_as_s16(vx_load(sad + d));
usad8 += u1; v_store(sad + d, v_reinterpret_as_u16(sad8));
vsad8 += v1; mind8 = v_max(mind8, (minsad8 > sad8) & vx_setall_s16((short)d));
minsad8 = v_min(minsad8, sad8);
v_int16x8 mask = minsad8 > usad8; d += v_int16::nlanes;
minsad8 = v_min(minsad8, usad8); }
mind8 = v_max(mind8, (mask& d8)); minsad = v_reduce_min(minsad8);
v_int16 v_mask = (vx_setall_s16((short)minsad) == minsad8);
v_store(sad + d, v_reinterpret_as_u16(usad8)); mind = v_reduce_min(((mind8+vx_load(v_seq)) & v_mask) | (vx_setall_s16(SHRT_MAX) & ~v_mask));
v_store(sad + d + 8, v_reinterpret_as_u16(vsad8)); for( ; d < ndisp; d++ )
{
mask = minsad8 > vsad8; int sad8 = (int)(hsad[d]) - hsad_sub[d] + sad[d];
minsad8 = v_min(minsad8, vsad8); sad[d] = (ushort)sad8;
if(minsad > sad8)
d8 = d8 + dd_8; {
mind8 = v_max(mind8, (mask & d8)); mind = d;
d8 = d8 + dd_8; minsad = sad8;
}
} }
tsum += htext[y + wsz2] - htext[y - wsz2 - 1]; tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
@ -508,41 +530,45 @@ static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
continue; continue;
} }
ushort CV_DECL_ALIGNED(16) minsad_buf[8], mind_buf[8];
v_store(minsad_buf, v_reinterpret_as_u16(minsad8));
v_store(mind_buf, v_reinterpret_as_u16(mind8));
for( d = 0; d < 8; d++ )
if(minsad > (int)minsad_buf[d] || (minsad == (int)minsad_buf[d] && mind > mind_buf[d]))
{
minsad = minsad_buf[d];
mind = mind_buf[d];
}
if( uniquenessRatio > 0 ) if( uniquenessRatio > 0 )
{ {
int thresh = minsad + (minsad * uniquenessRatio/100); int thresh = minsad + (minsad * uniquenessRatio/100);
v_int32x4 thresh4 = v_setall_s32(thresh + 1); v_int32 thresh4 = vx_setall_s32(thresh + 1);
v_int32x4 d1 = v_setall_s32(mind-1), d2 = v_setall_s32(mind+1); v_int32 d1 = vx_setall_s32(mind-1), d2 = vx_setall_s32(mind+1);
v_int32x4 dd_4 = v_setall_s32(4); v_int32 dd_4 = vx_setall_s32(v_int32::nlanes);
v_int32x4 d4 = v_int32x4(0,1,2,3); v_int32 d4 = vx_load_expand(v_seq);
v_int32x4 mask4;
for( d = 0; d < ndisp; d += 8 ) for( d = 0; d <= ndisp - v_int16::nlanes; d += v_int16::nlanes )
{ {
v_int16x8 sad8 = v_reinterpret_as_s16(v_load(sad + d)); v_int32 sad4_l, sad4_h;
v_int32x4 sad4_l, sad4_h; v_expand(v_reinterpret_as_s16(vx_load(sad + d)), sad4_l, sad4_h);
v_expand(sad8, sad4_l, sad4_h); if( v_check_any((thresh4 > sad4_l) & ((d1 > d4) | (d4 > d2))) )
mask4 = thresh4 > sad4_l;
mask4 = mask4 & ((d1 > d4) | (d4 > d2));
if( v_check_any(mask4) )
break; break;
d4 += dd_4; d4 += dd_4;
mask4 = thresh4 > sad4_h; if( v_check_any((thresh4 > sad4_h) & ((d1 > d4) | (d4 > d2))) )
mask4 = mask4 & ((d1 > d4) | (d4 > d2));
if( v_check_any(mask4) )
break; break;
d4 += dd_4; d4 += dd_4;
} }
if( d <= ndisp - v_int16::nlanes )
{
dptr[y*dstep] = FILTERED;
continue;
}
if( d <= ndisp - v_int32::nlanes )
{
v_int32 sad4_l = vx_load_expand((short*)sad + d);
if (v_check_any((thresh4 > sad4_l) & ((d1 > d4) | (d4 > d2))))
{
dptr[y*dstep] = FILTERED;
continue;
}
d += v_int16::nlanes;
}
for( ; d < ndisp; d++ )
{
if( (thresh + 1) > sad[d] && ((mind - 1) > d || d > (mind + 1)) )
break;
}
if( d < ndisp ) if( d < ndisp )
{ {
dptr[y*dstep] = FILTERED; dptr[y*dstep] = FILTERED;
@ -571,7 +597,7 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
uchar* buf, int _dy0, int _dy1 ) uchar* buf, int _dy0, int _dy1 )
{ {
const int ALIGN = 16; const int ALIGN = CV_SIMD_WIDTH;
int x, y, d; int x, y, d;
int wsz = state.SADWindowSize, wsz2 = wsz/2; int wsz = state.SADWindowSize, wsz2 = wsz/2;
int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1); int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
@ -587,12 +613,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
const int disp_shift = dispShiftTemplate<mType>::value; const int disp_shift = dispShiftTemplate<mType>::value;
mType FILTERED = (mType)((mindisp - 1) << disp_shift); mType FILTERED = (mType)((mindisp - 1) << disp_shift);
#if CV_SIMD128
{
CV_Assert (ndisp % 8 == 0);
}
#endif
int *sad, *hsad0, *hsad, *hsad_sub, *htext; int *sad, *hsad0, *hsad, *hsad_sub, *htext;
uchar *cbuf0, *cbuf; uchar *cbuf0, *cbuf;
const uchar* lptr0 = left.ptr() + lofs; const uchar* lptr0 = left.ptr() + lofs;
@ -607,6 +627,13 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
const int TABSZ = 256; const int TABSZ = 256;
uchar tab[TABSZ]; uchar tab[TABSZ];
#if CV_SIMD
int v_seq[v_int32::nlanes];
for (int i = 0; i < v_int32::nlanes; ++i)
v_seq[i] = i;
v_int32 d0_4 = vx_load(v_seq), dd_4 = vx_setall_s32(v_int32::nlanes);
#endif
sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN); sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN);
hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN); hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN); htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
@ -628,22 +655,22 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
{ {
int lval = lptr[0]; int lval = lptr[0];
d = 0; d = 0;
#if CV_SIMD128 #if CV_SIMD
{ {
v_uint8x16 lv = v_setall_u8((uchar)lval); v_uint8 lv = vx_setall_u8((uchar)lval);
for( ; d <= ndisp - 16; d += 16 ) for( ; d <= ndisp - v_uint8::nlanes; d += v_uint8::nlanes )
{ {
v_uint8x16 rv = v_load(rptr + d); v_uint8 rv = vx_load(rptr + d);
v_int32x4 hsad_0 = v_load(hsad + d); v_int32 hsad_0 = vx_load(hsad + d);
v_int32x4 hsad_1 = v_load(hsad + d + 4); v_int32 hsad_1 = vx_load(hsad + d + v_int32::nlanes);
v_int32x4 hsad_2 = v_load(hsad + d + 8); v_int32 hsad_2 = vx_load(hsad + d + 2*v_int32::nlanes);
v_int32x4 hsad_3 = v_load(hsad + d + 12); v_int32 hsad_3 = vx_load(hsad + d + 3*v_int32::nlanes);
v_uint8x16 diff = v_absdiff(lv, rv); v_uint8 diff = v_absdiff(lv, rv);
v_store(cbuf + d, diff); v_store(cbuf + d, diff);
v_uint16x8 diff0, diff1; v_uint16 diff0, diff1;
v_uint32x4 diff00, diff01, diff10, diff11; v_uint32 diff00, diff01, diff10, diff11;
v_expand(diff, diff0, diff1); v_expand(diff, diff0, diff1);
v_expand(diff0, diff00, diff01); v_expand(diff0, diff00, diff01);
v_expand(diff1, diff10, diff11); v_expand(diff1, diff10, diff11);
@ -654,9 +681,9 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
hsad_3 += v_reinterpret_as_s32(diff11); hsad_3 += v_reinterpret_as_s32(diff11);
v_store(hsad + d, hsad_0); v_store(hsad + d, hsad_0);
v_store(hsad + d + 4, hsad_1); v_store(hsad + d + v_int32::nlanes, hsad_1);
v_store(hsad + d + 8, hsad_2); v_store(hsad + d + 2*v_int32::nlanes, hsad_2);
v_store(hsad + d + 12, hsad_3); v_store(hsad + d + 3*v_int32::nlanes, hsad_3);
} }
} }
#endif #endif
@ -696,22 +723,22 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
{ {
int lval = lptr[0]; int lval = lptr[0];
d = 0; d = 0;
#if CV_SIMD128 #if CV_SIMD
{ {
v_uint8x16 lv = v_setall_u8((uchar)lval); v_uint8 lv = vx_setall_u8((uchar)lval);
for( ; d <= ndisp - 16; d += 16 ) for( ; d <= ndisp - v_uint8::nlanes; d += v_uint8::nlanes )
{ {
v_uint8x16 rv = v_load(rptr + d); v_uint8 rv = vx_load(rptr + d);
v_int32x4 hsad_0 = v_load(hsad + d); v_int32 hsad_0 = vx_load(hsad + d);
v_int32x4 hsad_1 = v_load(hsad + d + 4); v_int32 hsad_1 = vx_load(hsad + d + v_int32::nlanes);
v_int32x4 hsad_2 = v_load(hsad + d + 8); v_int32 hsad_2 = vx_load(hsad + d + 2*v_int32::nlanes);
v_int32x4 hsad_3 = v_load(hsad + d + 12); v_int32 hsad_3 = vx_load(hsad + d + 3*v_int32::nlanes);
v_uint8x16 cbs = v_load(cbuf_sub + d); v_uint8 cbs = vx_load(cbuf_sub + d);
v_uint8x16 diff = v_absdiff(lv, rv); v_uint8 diff = v_absdiff(lv, rv);
v_store(cbuf + d, diff); v_store(cbuf + d, diff);
v_uint16x8 diff0, diff1, cbs0, cbs1; v_uint16 diff0, diff1, cbs0, cbs1;
v_int32x4 diff00, diff01, diff10, diff11, cbs00, cbs01, cbs10, cbs11; v_int32 diff00, diff01, diff10, diff11, cbs00, cbs01, cbs10, cbs11;
v_expand(diff, diff0, diff1); v_expand(diff, diff0, diff1);
v_expand(cbs, cbs0, cbs1); v_expand(cbs, cbs0, cbs1);
v_expand(v_reinterpret_as_s16(diff0), diff00, diff01); v_expand(v_reinterpret_as_s16(diff0), diff00, diff01);
@ -719,19 +746,19 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
v_expand(v_reinterpret_as_s16(cbs0), cbs00, cbs01); v_expand(v_reinterpret_as_s16(cbs0), cbs00, cbs01);
v_expand(v_reinterpret_as_s16(cbs1), cbs10, cbs11); v_expand(v_reinterpret_as_s16(cbs1), cbs10, cbs11);
v_int32x4 diff_0 = diff00 - cbs00; v_int32 diff_0 = diff00 - cbs00;
v_int32x4 diff_1 = diff01 - cbs01; v_int32 diff_1 = diff01 - cbs01;
v_int32x4 diff_2 = diff10 - cbs10; v_int32 diff_2 = diff10 - cbs10;
v_int32x4 diff_3 = diff11 - cbs11; v_int32 diff_3 = diff11 - cbs11;
hsad_0 += diff_0; hsad_0 += diff_0;
hsad_1 += diff_1; hsad_1 += diff_1;
hsad_2 += diff_2; hsad_2 += diff_2;
hsad_3 += diff_3; hsad_3 += diff_3;
v_store(hsad + d, hsad_0); v_store(hsad + d, hsad_0);
v_store(hsad + d + 4, hsad_1); v_store(hsad + d + v_int32::nlanes, hsad_1);
v_store(hsad + d + 8, hsad_2); v_store(hsad + d + 2*v_int32::nlanes, hsad_2);
v_store(hsad + d + 12, hsad_3); v_store(hsad + d + 3*v_int32::nlanes, hsad_3);
} }
} }
#endif #endif
@ -758,18 +785,18 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp ) for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp )
{ {
d = 0; d = 0;
#if CV_SIMD128 #if CV_SIMD
{ {
for( d = 0; d <= ndisp-8; d += 8 ) for( d = 0; d <= ndisp-2*v_int32::nlanes; d += 2*v_int32::nlanes )
{ {
v_int32x4 s0 = v_load(sad + d); v_int32 s0 = vx_load(sad + d);
v_int32x4 s1 = v_load(sad + d + 4); v_int32 s1 = vx_load(sad + d + v_int32::nlanes);
v_int32x4 t0 = v_load(hsad + d); v_int32 t0 = vx_load(hsad + d);
v_int32x4 t1 = v_load(hsad + d + 4); v_int32 t1 = vx_load(hsad + d + v_int32::nlanes);
s0 += t0; s0 += t0;
s1 += t1; s1 += t1;
v_store(sad + d, s0); v_store(sad + d, s0);
v_store(sad + d + 4, s1); v_store(sad + d + v_int32::nlanes, s1);
} }
} }
#endif #endif
@ -787,50 +814,31 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp; hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp;
hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp; hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;
d = 0; d = 0;
#if CV_SIMD128 #if CV_SIMD
{ {
v_int32x4 d0_4 = v_int32x4(0, 1, 2, 3); v_int32 minsad4 = vx_setall_s32(INT_MAX);
v_int32x4 dd_4 = v_setall_s32(4); v_int32 mind4 = vx_setall_s32(0), d4 = d0_4;
v_int32x4 minsad4 = v_setall_s32(INT_MAX);
v_int32x4 mind4 = v_setall_s32(0), d4 = d0_4;
for( ; d <= ndisp - 8; d += 8 ) for( ; d <= ndisp - 2*v_int32::nlanes; d += 2*v_int32::nlanes )
{ {
v_int32x4 u0 = v_load(hsad_sub + d); v_int32 sad4 = vx_load(sad + d) + vx_load(hsad + d) - vx_load(hsad_sub + d);
v_int32x4 u1 = v_load(hsad + d); v_store(sad + d, sad4);
mind4 = v_select(minsad4 > sad4, d4, mind4);
v_int32x4 v0 = v_load(hsad_sub + d + 4); minsad4 = v_min(minsad4, sad4);
v_int32x4 v1 = v_load(hsad + d + 4);
v_int32x4 usad4 = v_load(sad + d);
v_int32x4 vsad4 = v_load(sad + d + 4);
u1 -= u0;
v1 -= v0;
usad4 += u1;
vsad4 += v1;
v_store(sad + d, usad4);
v_store(sad + d + 4, vsad4);
v_int32x4 mask = minsad4 > usad4;
minsad4 = v_min(minsad4, usad4);
mind4 = v_select(mask, d4, mind4);
d4 += dd_4; d4 += dd_4;
mask = minsad4 > vsad4; sad4 = vx_load(sad + d + v_int32::nlanes) + vx_load(hsad + d + v_int32::nlanes) - vx_load(hsad_sub + d + v_int32::nlanes);
minsad4 = v_min(minsad4, vsad4); v_store(sad + d + v_int32::nlanes, sad4);
mind4 = v_select(mask, d4, mind4); mind4 = v_select(minsad4 > sad4, d4, mind4);
minsad4 = v_min(minsad4, sad4);
d4 += dd_4; d4 += dd_4;
} }
int CV_DECL_ALIGNED(16) minsad_buf[4], mind_buf[4]; int CV_DECL_ALIGNED(CV_SIMD_WIDTH) minsad_buf[v_int32::nlanes], mind_buf[v_int32::nlanes];
v_store(minsad_buf, minsad4); v_store(minsad_buf, minsad4);
v_store(mind_buf, mind4); v_store(mind_buf, mind4);
if(minsad_buf[0] < minsad || (minsad == minsad_buf[0] && mind_buf[0] < mind)) { minsad = minsad_buf[0]; mind = mind_buf[0]; } for (int i = 0; i < v_int32::nlanes; ++i)
if(minsad_buf[1] < minsad || (minsad == minsad_buf[1] && mind_buf[1] < mind)) { minsad = minsad_buf[1]; mind = mind_buf[1]; } if(minsad_buf[i] < minsad || (minsad == minsad_buf[i] && mind_buf[i] < mind)) { minsad = minsad_buf[i]; mind = mind_buf[i]; }
if(minsad_buf[2] < minsad || (minsad == minsad_buf[2] && mind_buf[2] < mind)) { minsad = minsad_buf[2]; mind = mind_buf[2]; }
if(minsad_buf[3] < minsad || (minsad == minsad_buf[3] && mind_buf[3] < mind)) { minsad = minsad_buf[3]; mind = mind_buf[3]; }
} }
#endif #endif
for( ; d < ndisp; d++ ) for( ; d < ndisp; d++ )
@ -1027,7 +1035,7 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
Mat disp_i = disp->rowRange(row0, row1); Mat disp_i = disp->rowRange(row0, row1);
Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat(); Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat();
#if CV_SIMD128 #if CV_SIMD
if (useShorts) if (useShorts)
{ {
if( disp_i.type() == CV_16S) if( disp_i.type() == CV_16S)

View File

@ -1012,6 +1012,54 @@ OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float32x8, _mm256_castsi256_ps
OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left, v_float64x4, _mm256_castsi256_pd) OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_left, v_float64x4, _mm256_castsi256_pd)
OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float64x4, _mm256_castsi256_pd) OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float64x4, _mm256_castsi256_pd)
/** Reverse **/
inline v_uint8x32 v_reverse(const v_uint8x32 &a)
{
static const __m256i perm = _mm256_setr_epi8(
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
__m256i vec = _mm256_shuffle_epi8(a.val, perm);
return v_uint8x32(_mm256_permute2x128_si256(vec, vec, 1));
}
inline v_int8x32 v_reverse(const v_int8x32 &a)
{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
inline v_uint16x16 v_reverse(const v_uint16x16 &a)
{
static const __m256i perm = _mm256_setr_epi8(
14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1,
14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
__m256i vec = _mm256_shuffle_epi8(a.val, perm);
return v_uint16x16(_mm256_permute2x128_si256(vec, vec, 1));
}
inline v_int16x16 v_reverse(const v_int16x16 &a)
{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
inline v_uint32x8 v_reverse(const v_uint32x8 &a)
{
static const __m256i perm = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0);
return v_uint32x8(_mm256_permutevar8x32_epi32(a.val, perm));
}
inline v_int32x8 v_reverse(const v_int32x8 &a)
{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_float32x8 v_reverse(const v_float32x8 &a)
{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_uint64x4 v_reverse(const v_uint64x4 &a)
{
return v_uint64x4(_mm256_permute4x64_epi64(a.val, _MM_SHUFFLE(0, 1, 2, 3)));
}
inline v_int64x4 v_reverse(const v_int64x4 &a)
{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
inline v_float64x4 v_reverse(const v_float64x4 &a)
{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
////////// Reduce and mask ///////// ////////// Reduce and mask /////////
/** Reduce **/ /** Reduce **/

View File

@ -1068,6 +1068,79 @@ OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_int64x8, epi64)
OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float32x16, ps) OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float32x16, ps)
OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float64x8, pd) OPENCV_HAL_IMPL_AVX512_ROTATE_EC(v_float64x8, pd)
/** Reverse **/
inline v_uint8x64 v_reverse(const v_uint8x64 &a)
{
#if CV_AVX_512VBMI
static const __m512i perm = _mm512_set_epi32(
0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f,
0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f,
0x20212223, 0x24252627, 0x28292a2b, 0x2c2d2e2f,
0x30313233, 0x34353637, 0x38393a3b, 0x3c3d3e3f);
return v_uint8x64(_mm512_permutexvar_epi8(perm, a.val));
#else
static const __m512i shuf = _mm512_set_epi32(
0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f,
0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f,
0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f,
0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
static const __m512i perm = _mm512_set_epi64(1, 0, 3, 2, 5, 4, 7, 6);
__m512i vec = _mm512_shuffle_epi8(a.val, shuf);
return v_uint8x64(_mm512_permutexvar_epi64(perm, vec));
#endif
}
inline v_int8x64 v_reverse(const v_int8x64 &a)
{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
inline v_uint16x32 v_reverse(const v_uint16x32 &a)
{
#if CV_AVX_512VBMI
static const __m512i perm = _mm512_set_epi32(
0x00000001, 0x00020003, 0x00040005, 0x00060007,
0x00080009, 0x000a000b, 0x000c000d, 0x000e000f,
0x00100011, 0x00120013, 0x00140015, 0x00160017,
0x00180019, 0x001a001b, 0x001c001d, 0x001e001f);
return v_uint16x32(_mm512_permutexvar_epi16(perm, a.val));
#else
static const __m512i shuf = _mm512_set_epi32(
0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e,
0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e,
0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e,
0x01000302, 0x05040706, 0x09080b0a, 0x0d0c0f0e);
static const __m512i perm = _mm512_set_epi64(1, 0, 3, 2, 5, 4, 7, 6);
__m512i vec = _mm512_shuffle_epi8(a.val, shuf);
return v_uint16x32(_mm512_permutexvar_epi64(perm, vec));
#endif
}
inline v_int16x32 v_reverse(const v_int16x32 &a)
{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
inline v_uint32x16 v_reverse(const v_uint32x16 &a)
{
static const __m512i perm = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,14, 15);
return v_uint32x16(_mm512_permutexvar_epi32(perm, a.val));
}
inline v_int32x16 v_reverse(const v_int32x16 &a)
{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_float32x16 v_reverse(const v_float32x16 &a)
{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_uint64x8 v_reverse(const v_uint64x8 &a)
{
static const __m512i perm = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
return v_uint64x8(_mm512_permutexvar_epi64(perm, a.val));
}
inline v_int64x8 v_reverse(const v_int64x8 &a)
{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
inline v_float64x8 v_reverse(const v_float64x8 &a)
{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
////////// Reduce ///////// ////////// Reduce /////////
/** Reduce **/ /** Reduce **/

View File

@ -112,6 +112,7 @@ These operations allow to reorder or recombine elements in one or multiple vecto
- Pack: @ref v_pack, @ref v_pack_u, @ref v_pack_b, @ref v_rshr_pack, @ref v_rshr_pack_u, - Pack: @ref v_pack, @ref v_pack_u, @ref v_pack_b, @ref v_rshr_pack, @ref v_rshr_pack_u,
@ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store @ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store
- Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high - Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high
- Reverse: @ref v_reverse
- Extract: @ref v_extract - Extract: @ref v_extract
@ -215,6 +216,7 @@ Regular integers:
|cvt_flt32 | | | | | | x | |cvt_flt32 | | | | | | x |
|cvt_flt64 | | | | | | x | |cvt_flt64 | | | | | | x |
|transpose4x4 | | | | | x | x | |transpose4x4 | | | | | x | x |
|reverse | x | x | x | x | x | x |
Big integers: Big integers:
@ -224,6 +226,7 @@ Big integers:
|add, sub | x | x | |add, sub | x | x |
|shift | x | x | |shift | x | x |
|logical | x | x | |logical | x | x |
|reverse | x | x |
|extract | x | x | |extract | x | x |
|rotate (lanes) | x | x | |rotate (lanes) | x | x |
|cvt_flt64 | | x | |cvt_flt64 | | x |
@ -250,6 +253,7 @@ Floating point:
|transpose4x4 | x | | |transpose4x4 | x | |
|extract | x | x | |extract | x | x |
|rotate (lanes) | x | x | |rotate (lanes) | x | x |
|reverse | x | x |
@{ */ @{ */
@ -1724,6 +1728,23 @@ inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
} }
} }
/** @brief Vector reverse order
Reverse the order of the vector
Scheme:
@code
REG {A1 ... An} ==> REG {An ... A1}
@endcode
For all types. */
template<typename _Tp, int n>
inline v_reg<_Tp, n> v_reverse(const v_reg<_Tp, n>& a)
{
v_reg<_Tp, n> c;
for( int i = 0; i < n; i++ )
c.s[i] = a.s[n-i-1];
return c;
}
/** @brief Vector extract /** @brief Vector extract
Scheme: Scheme:

View File

@ -906,6 +906,57 @@ OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_int64x2, int64, s64)
OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float32x4, float, f32) OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float32x4, float, f32)
OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float64x2, double, f64) OPENCV_HAL_IMPL_MSA_LOADSTORE_OP(v_float64x2, double, f64)
/** Reverse **/
inline v_uint8x16 v_reverse(const v_uint8x16 &a)
{
v_uint8x16 c = v_uint8x16((v16u8)__builtin_msa_vshf_b((v16i8)((v2i64){0x08090A0B0C0D0E0F, 0x0001020304050607}), msa_dupq_n_s8(0), (v16i8)a.val));
return c;
}
inline v_int8x16 v_reverse(const v_int8x16 &a)
{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
inline v_uint16x8 v_reverse(const v_uint16x8 &a)
{
v_uint16x8 c = v_uint16x8((v8u16)__builtin_msa_vshf_h((v8i16)((v2i64){0x0004000500060007, 0x0000000100020003}), msa_dupq_n_s16(0), (v8i16)a.val));
return c;
}
inline v_int16x8 v_reverse(const v_int16x8 &a)
{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
inline v_uint32x4 v_reverse(const v_uint32x4 &a)
{
v_uint32x4 c;
c.val[0] = a.val[3];
c.val[1] = a.val[2];
c.val[2] = a.val[1];
c.val[3] = a.val[0];
return c;
}
inline v_int32x4 v_reverse(const v_int32x4 &a)
{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_float32x4 v_reverse(const v_float32x4 &a)
{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_uint64x2 v_reverse(const v_uint64x2 &a)
{
v_uint64x2 c;
c.val[0] = a.val[1];
c.val[1] = a.val[0];
return c;
}
inline v_int64x2 v_reverse(const v_int64x2 &a)
{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
inline v_float64x2 v_reverse(const v_float64x2 &a)
{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
#define OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(func, cfunc) \ #define OPENCV_HAL_IMPL_MSA_REDUCE_OP_8U(func, cfunc) \
inline unsigned short v_reduce_##func(const v_uint16x8& a) \ inline unsigned short v_reduce_##func(const v_uint16x8& a) \
{ \ { \

View File

@ -1585,6 +1585,52 @@ OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32)
OPENCV_HAL_IMPL_NEON_UNPACKS(float64x2, f64) OPENCV_HAL_IMPL_NEON_UNPACKS(float64x2, f64)
#endif #endif
inline v_uint8x16 v_reverse(const v_uint8x16 &a)
{
uint8x16_t vec = vrev64q_u8(a.val);
return v_uint8x16(vextq_u8(vec, vec, 8));
}
inline v_int8x16 v_reverse(const v_int8x16 &a)
{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
inline v_uint16x8 v_reverse(const v_uint16x8 &a)
{
uint16x8_t vec = vrev64q_u16(a.val);
return v_uint16x8(vextq_u16(vec, vec, 4));
}
inline v_int16x8 v_reverse(const v_int16x8 &a)
{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
inline v_uint32x4 v_reverse(const v_uint32x4 &a)
{
uint32x4_t vec = vrev64q_u32(a.val);
return v_uint32x4(vextq_u32(vec, vec, 2));
}
inline v_int32x4 v_reverse(const v_int32x4 &a)
{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_float32x4 v_reverse(const v_float32x4 &a)
{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_uint64x2 v_reverse(const v_uint64x2 &a)
{
uint64x2_t vec = a.val;
uint64x1_t vec_lo = vget_low_u64(vec);
uint64x1_t vec_hi = vget_high_u64(vec);
return v_uint64x2(vcombine_u64(vec_hi, vec_lo));
}
inline v_int64x2 v_reverse(const v_int64x2 &a)
{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
#if CV_SIMD128_64F
inline v_float64x2 v_reverse(const v_float64x2 &a)
{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
#endif
#define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \ #define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \
template <int s> \ template <int s> \
inline v_##_Tpvec v_extract(const v_##_Tpvec& a, const v_##_Tpvec& b) \ inline v_##_Tpvec v_extract(const v_##_Tpvec& a, const v_##_Tpvec& b) \

View File

@ -1220,14 +1220,23 @@ inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps) OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float32x4, ps)
OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd) OPENCV_HAL_IMPL_SSE_FLT_CMP_OP(v_float64x2, pd)
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec, cast) \ #if CV_SSE4_1
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
{ return cast(v_reinterpret_as_f64(a) == v_reinterpret_as_f64(b)); } \ { return _Tpvec(_mm_cmpeq_epi64(a.val, b.val)); } \
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
{ return cast(v_reinterpret_as_f64(a) != v_reinterpret_as_f64(b)); } { return ~(a == b); }
#else
#define OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(_Tpvec) \
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
{ __m128i cmp = _mm_cmpeq_epi32(a.val, b.val); \
return _Tpvec(_mm_and_si128(cmp, _mm_shuffle_epi32(cmp, _MM_SHUFFLE(2, 3, 0, 1)))); } \
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
{ return ~(a == b); }
#endif
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2, v_reinterpret_as_u64) OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_uint64x2)
OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2, v_reinterpret_as_s64) OPENCV_HAL_IMPL_SSE_64BIT_CMP_OP(v_int64x2)
inline v_float32x4 v_not_nan(const v_float32x4& a) inline v_float32x4 v_not_nan(const v_float32x4& a)
{ return v_float32x4(_mm_cmpord_ps(a.val, a.val)); } { return v_float32x4(_mm_cmpord_ps(a.val, a.val)); }
@ -1914,6 +1923,59 @@ OPENCV_HAL_IMPL_SSE_UNPACKS(v_int32x4, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP)
OPENCV_HAL_IMPL_SSE_UNPACKS(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps) OPENCV_HAL_IMPL_SSE_UNPACKS(v_float32x4, ps, _mm_castps_si128, _mm_castsi128_ps)
OPENCV_HAL_IMPL_SSE_UNPACKS(v_float64x2, pd, _mm_castpd_si128, _mm_castsi128_pd) OPENCV_HAL_IMPL_SSE_UNPACKS(v_float64x2, pd, _mm_castpd_si128, _mm_castsi128_pd)
inline v_uint8x16 v_reverse(const v_uint8x16 &a)
{
#if CV_SSSE3
static const __m128i perm = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
return v_uint8x16(_mm_shuffle_epi8(a.val, perm));
#else
uchar CV_DECL_ALIGNED(32) d[16];
v_store_aligned(d, a);
return v_uint8x16(d[15], d[14], d[13], d[12], d[11], d[10], d[9], d[8], d[7], d[6], d[5], d[4], d[3], d[2], d[1], d[0]);
#endif
}
inline v_int8x16 v_reverse(const v_int8x16 &a)
{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
inline v_uint16x8 v_reverse(const v_uint16x8 &a)
{
#if CV_SSSE3
static const __m128i perm = _mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
return v_uint16x8(_mm_shuffle_epi8(a.val, perm));
#else
__m128i r = _mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3));
r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(2, 3, 0, 1));
r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(2, 3, 0, 1));
return v_uint16x8(r);
#endif
}
inline v_int16x8 v_reverse(const v_int16x8 &a)
{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
inline v_uint32x4 v_reverse(const v_uint32x4 &a)
{
return v_uint32x4(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(0, 1, 2, 3)));
}
inline v_int32x4 v_reverse(const v_int32x4 &a)
{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_float32x4 v_reverse(const v_float32x4 &a)
{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_uint64x2 v_reverse(const v_uint64x2 &a)
{
return v_uint64x2(_mm_shuffle_epi32(a.val, _MM_SHUFFLE(1, 0, 3, 2)));
}
inline v_int64x2 v_reverse(const v_int64x2 &a)
{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
inline v_float64x2 v_reverse(const v_float64x2 &a)
{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
template<int s, typename _Tpvec> template<int s, typename _Tpvec>
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
{ {

View File

@ -678,6 +678,53 @@ OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_float64x2)
OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_uint64x2) OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_uint64x2)
OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_int64x2) OPENCV_IMPL_VSX_ROTATE_64_2RG_LR(v_int64x2)
/* Reverse */
inline v_uint8x16 v_reverse(const v_uint8x16 &a)
{
static const vec_uchar16 perm = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
vec_uchar16 vec = (vec_uchar16)a.val;
return v_uint8x16(vec_perm(vec, vec, perm));
}
inline v_int8x16 v_reverse(const v_int8x16 &a)
{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
inline v_uint16x8 v_reverse(const v_uint16x8 &a)
{
static const vec_uchar16 perm = {14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1};
vec_uchar16 vec = (vec_uchar16)a.val;
return v_reinterpret_as_u16(v_uint8x16(vec_perm(vec, vec, perm)));
}
inline v_int16x8 v_reverse(const v_int16x8 &a)
{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
inline v_uint32x4 v_reverse(const v_uint32x4 &a)
{
static const vec_uchar16 perm = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3};
vec_uchar16 vec = (vec_uchar16)a.val;
return v_reinterpret_as_u32(v_uint8x16(vec_perm(vec, vec, perm)));
}
inline v_int32x4 v_reverse(const v_int32x4 &a)
{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_float32x4 v_reverse(const v_float32x4 &a)
{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_uint64x2 v_reverse(const v_uint64x2 &a)
{
static const vec_uchar16 perm = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7};
vec_uchar16 vec = (vec_uchar16)a.val;
return v_reinterpret_as_u64(v_uint8x16(vec_perm(vec, vec, perm)));
}
inline v_int64x2 v_reverse(const v_int64x2 &a)
{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
inline v_float64x2 v_reverse(const v_float64x2 &a)
{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
/* Extract */ /* Extract */
template<int s, typename _Tpvec> template<int s, typename _Tpvec>
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)

View File

@ -21,6 +21,18 @@ namespace cv
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#if (__EMSCRIPTEN_major__ * 1000000 + __EMSCRIPTEN_minor__ * 1000 + __EMSCRIPTEN_tiny__) < (1038046)
// handle renames: https://github.com/emscripten-core/emscripten/pull/9440 (https://github.com/emscripten-core/emscripten/commit/755d5b46cb84d0aa120c10981b11d05646c29673)
#define wasm_i32x4_trunc_saturate_f32x4 wasm_trunc_saturate_i32x4_f32x4
#define wasm_u32x4_trunc_saturate_f32x4 wasm_trunc_saturate_u32x4_f32x4
#define wasm_i64x2_trunc_saturate_f64x2 wasm_trunc_saturate_i64x2_f64x2
#define wasm_u64x2_trunc_saturate_f64x2 wasm_trunc_saturate_u64x2_f64x2
#define wasm_f32x4_convert_i32x4 wasm_convert_f32x4_i32x4
#define wasm_f32x4_convert_u32x4 wasm_convert_f32x4_u32x4
#define wasm_f64x2_convert_i64x2 wasm_convert_f64x2_i64x2
#define wasm_f64x2_convert_u64x2 wasm_convert_f64x2_u64x2
#endif // COMPATIBILITY: <1.38.46
///////// Types /////////// ///////// Types ///////////
struct v_uint8x16 struct v_uint8x16
@ -3111,6 +3123,38 @@ OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float32x4, float)
OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float64x2, double) OPENCV_HAL_IMPL_WASM_LOADSTORE_INT_OP(v_float64x2, double)
/** Reverse **/
inline v_uint8x16 v_reverse(const v_uint8x16 &a)
{ return v_uint8x16(wasm_v8x16_shuffle(a.val, a.val, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); }
inline v_int8x16 v_reverse(const v_int8x16 &a)
{ return v_reinterpret_as_s8(v_reverse(v_reinterpret_as_u8(a))); }
inline v_uint16x8 v_reverse(const v_uint16x8 &a)
{ return v_uint16x8(wasm_v8x16_shuffle(a.val, a.val, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); }
inline v_int16x8 v_reverse(const v_int16x8 &a)
{ return v_reinterpret_as_s16(v_reverse(v_reinterpret_as_u16(a))); }
inline v_uint32x4 v_reverse(const v_uint32x4 &a)
{ return v_uint32x4(wasm_v8x16_shuffle(a.val, a.val, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)); }
inline v_int32x4 v_reverse(const v_int32x4 &a)
{ return v_reinterpret_as_s32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_float32x4 v_reverse(const v_float32x4 &a)
{ return v_reinterpret_as_f32(v_reverse(v_reinterpret_as_u32(a))); }
inline v_uint64x2 v_reverse(const v_uint64x2 &a)
{ return v_uint64x2(wasm_v8x16_shuffle(a.val, a.val, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); }
inline v_int64x2 v_reverse(const v_int64x2 &a)
{ return v_reinterpret_as_s64(v_reverse(v_reinterpret_as_u64(a))); }
inline v_float64x2 v_reverse(const v_float64x2 &a)
{ return v_reinterpret_as_f64(v_reverse(v_reinterpret_as_u64(a))); }
#define OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, esuffix) \ #define OPENCV_HAL_IMPL_WASM_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, esuffix) \
inline scalartype v_reduce_sum(const _Tpvec& a) \ inline scalartype v_reduce_sum(const _Tpvec& a) \
{ \ { \
@ -3400,25 +3444,25 @@ inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
inline v_int32x4 v_round(const v_float32x4& a) inline v_int32x4 v_round(const v_float32x4& a)
{ {
v128_t h = wasm_f32x4_splat(0.5); v128_t h = wasm_f32x4_splat(0.5);
return v_int32x4(wasm_trunc_saturate_i32x4_f32x4(wasm_f32x4_add(a.val, h))); return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(wasm_f32x4_add(a.val, h)));
} }
inline v_int32x4 v_floor(const v_float32x4& a) inline v_int32x4 v_floor(const v_float32x4& a)
{ {
v128_t a1 = wasm_trunc_saturate_i32x4_f32x4(a.val); v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val);
v128_t mask = wasm_f32x4_lt(a.val, wasm_convert_f32x4_i32x4(a1)); v128_t mask = wasm_f32x4_lt(a.val, wasm_f32x4_convert_i32x4(a1));
return v_int32x4(wasm_i32x4_add(a1, mask)); return v_int32x4(wasm_i32x4_add(a1, mask));
} }
inline v_int32x4 v_ceil(const v_float32x4& a) inline v_int32x4 v_ceil(const v_float32x4& a)
{ {
v128_t a1 = wasm_trunc_saturate_i32x4_f32x4(a.val); v128_t a1 = wasm_i32x4_trunc_saturate_f32x4(a.val);
v128_t mask = wasm_f32x4_gt(a.val, wasm_convert_f32x4_i32x4(a1)); v128_t mask = wasm_f32x4_gt(a.val, wasm_f32x4_convert_i32x4(a1));
return v_int32x4(wasm_i32x4_sub(a1, mask)); return v_int32x4(wasm_i32x4_sub(a1, mask));
} }
inline v_int32x4 v_trunc(const v_float32x4& a) inline v_int32x4 v_trunc(const v_float32x4& a)
{ return v_int32x4(wasm_trunc_saturate_i32x4_f32x4(a.val)); } { return v_int32x4(wasm_i32x4_trunc_saturate_f32x4(a.val)); }
#define OPENCV_HAL_IMPL_WASM_MATH_FUNC(func, cfunc, _Tpvec, _Tpnvec, _Tp, _Tpn) \ #define OPENCV_HAL_IMPL_WASM_MATH_FUNC(func, cfunc, _Tpvec, _Tpnvec, _Tp, _Tpn) \
inline _Tpnvec func(const _Tpvec& a) \ inline _Tpnvec func(const _Tpvec& a) \
@ -3924,7 +3968,7 @@ OPENCV_HAL_IMPL_WASM_LOADSTORE_INTERLEAVE(v_float64x2, double, f64, v_uint64x2,
inline v_float32x4 v_cvt_f32(const v_int32x4& a) inline v_float32x4 v_cvt_f32(const v_int32x4& a)
{ {
return v_float32x4(wasm_convert_f32x4_i32x4(a.val)); return v_float32x4(wasm_f32x4_convert_i32x4(a.val));
} }
inline v_float32x4 v_cvt_f32(const v_float64x2& a) inline v_float32x4 v_cvt_f32(const v_float64x2& a)
@ -3943,7 +3987,7 @@ inline v_float64x2 v_cvt_f64(const v_int32x4& a)
{ {
#ifdef __wasm_unimplemented_simd128__ #ifdef __wasm_unimplemented_simd128__
v128_t p = v128_cvti32x4_i64x2(a.val); v128_t p = v128_cvti32x4_i64x2(a.val);
return v_float64x2(wasm_convert_f64x2_i64x2(p)); return v_float64x2(wasm_f64x2_convert_i64x2(p));
#else #else
fallback::v_int32x4 a_(a); fallback::v_int32x4 a_(a);
return fallback::v_cvt_f64(a_); return fallback::v_cvt_f64(a_);
@ -3954,7 +3998,7 @@ inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
{ {
#ifdef __wasm_unimplemented_simd128__ #ifdef __wasm_unimplemented_simd128__
v128_t p = v128_cvti32x4_i64x2_high(a.val); v128_t p = v128_cvti32x4_i64x2_high(a.val);
return v_float64x2(wasm_convert_f64x2_i64x2(p)); return v_float64x2(wasm_f64x2_convert_i64x2(p));
#else #else
fallback::v_int32x4 a_(a); fallback::v_int32x4 a_(a);
return fallback::v_cvt_f64_high(a_); return fallback::v_cvt_f64_high(a_);
@ -3976,7 +4020,7 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
inline v_float64x2 v_cvt_f64(const v_int64x2& a) inline v_float64x2 v_cvt_f64(const v_int64x2& a)
{ {
#ifdef __wasm_unimplemented_simd128__ #ifdef __wasm_unimplemented_simd128__
return v_float64x2(wasm_convert_f64x2_i64x2(a.val)); return v_float64x2(wasm_f64x2_convert_i64x2(a.val));
#else #else
fallback::v_int64x2 a_(a); fallback::v_int64x2 a_(a);
return fallback::v_cvt_f64(a_); return fallback::v_cvt_f64(a_);

View File

@ -112,6 +112,13 @@ bool isAlignedAllocationEnabled()
} }
return useMemalign; return useMemalign;
} }
// do not use variable directly, details: https://github.com/opencv/opencv/issues/15691
static const bool g_force_initialization_memalign_flag
#if defined __GNUC__
__attribute__((unused))
#endif
= isAlignedAllocationEnabled();
#endif #endif
#ifdef OPENCV_ALLOC_ENABLE_STATISTICS #ifdef OPENCV_ALLOC_ENABLE_STATISTICS

View File

@ -711,6 +711,13 @@ static bool ipp_flip(Mat &src, Mat &dst, int flip_mode)
#ifdef HAVE_IPP_IW #ifdef HAVE_IPP_IW
CV_INSTRUMENT_REGION_IPP(); CV_INSTRUMENT_REGION_IPP();
// Details: https://github.com/opencv/opencv/issues/12943
if (flip_mode <= 0 /* swap rows */
&& cv::ipp::getIppTopFeatures() != ippCPUID_SSE42
&& (int64_t)(src.total()) * src.elemSize() >= CV_BIG_INT(0x80000000)/*2Gb*/
)
return false;
IppiAxis ippMode; IppiAxis ippMode;
if(flip_mode < 0) if(flip_mode < 0)
ippMode = ippAxsBoth; ippMode = ippAxsBoth;

View File

@ -179,7 +179,25 @@ static int countNonZero32f( const float* src, int len )
static int countNonZero64f( const double* src, int len ) static int countNonZero64f( const double* src, int len )
{ {
return countNonZero_(src, len); int nz = 0, i = 0;
#if CV_SIMD_64F
v_int64 sum1 = vx_setzero_s64();
v_int64 sum2 = vx_setzero_s64();
v_float64 zero = vx_setzero_f64();
int step = v_float64::nlanes * 2;
int len0 = len & -step;
for(i = 0; i < len0; i += step )
{
sum1 += v_reinterpret_as_s64(vx_load(&src[i]) == zero);
sum2 += v_reinterpret_as_s64(vx_load(&src[i + step / 2]) == zero);
}
// N.B the value is incremented by -1 (0xF...F) for each value
nz = i + (int)v_reduce_sum(sum1 + sum2);
v_cleanup();
#endif
return nz + countNonZero_(src + i, len - i);
} }
CountNonZeroFunc getCountNonZeroTab(int depth) CountNonZeroFunc getCountNonZeroTab(int depth)

View File

@ -1115,6 +1115,22 @@ template<typename R> struct TheTest
return *this; return *this;
} }
TheTest & test_reverse()
{
Data<R> dataA;
R a = dataA;
Data<R> resB = v_reverse(a);
for (int i = 0; i < R::nlanes; ++i)
{
SCOPED_TRACE(cv::format("i=%d", i));
EXPECT_EQ(dataA[R::nlanes - i - 1], resB[i]);
}
return *this;
}
template<int s> template<int s>
TheTest & test_extract() TheTest & test_extract()
{ {
@ -1426,6 +1442,50 @@ template<typename R> struct TheTest
return *this; return *this;
} }
#endif #endif
#if CV_SIMD_64F
TheTest & test_cmp64()
{
Data<R> dataA, dataB;
R a = dataA, b = dataB;
for (int i = 0; i < R::nlanes; ++i)
{
dataA[i] = dataB[i];
}
dataA[0]++;
a = dataA, b = dataB;
Data<R> resC = (a == b);
Data<R> resD = (a != b);
for (int i = 0; i < R::nlanes; ++i)
{
SCOPED_TRACE(cv::format("i=%d", i));
EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0);
EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0);
}
for (int i = 0; i < R::nlanes; ++i)
{
dataA[i] = dataB[i] = (LaneType)-1;
}
a = dataA, b = dataB;
resC = (a == b);
resD = (a != b);
for (int i = 0; i < R::nlanes; ++i)
{
SCOPED_TRACE(cv::format("i=%d", i));
EXPECT_EQ(dataA[i] == dataB[i], resC[i] != 0);
EXPECT_EQ(dataA[i] != dataB[i], resD[i] != 0);
}
return *this;
}
#endif
}; };
@ -1459,6 +1519,7 @@ void test_hal_intrin_uint8()
.test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>() .test_pack_u<1>().test_pack_u<2>().test_pack_u<3>().test_pack_u<8>()
.test_pack_b() .test_pack_b()
.test_unpack() .test_unpack()
.test_reverse()
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
; ;
@ -1497,6 +1558,7 @@ void test_hal_intrin_int8()
.test_popcount() .test_popcount()
.test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>() .test_pack<1>().test_pack<2>().test_pack<3>().test_pack<8>()
.test_unpack() .test_unpack()
.test_reverse()
.test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>() .test_extract<0>().test_extract<1>().test_extract<8>().test_extract<15>()
.test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>() .test_rotate<0>().test_rotate<1>().test_rotate<8>().test_rotate<15>()
; ;
@ -1529,6 +1591,7 @@ void test_hal_intrin_uint16()
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
.test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>() .test_pack_u<1>().test_pack_u<2>().test_pack_u<7>().test_pack_u<16>()
.test_unpack() .test_unpack()
.test_reverse()
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
; ;
@ -1561,6 +1624,7 @@ void test_hal_intrin_int16()
.test_popcount() .test_popcount()
.test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>() .test_pack<1>().test_pack<2>().test_pack<7>().test_pack<16>()
.test_unpack() .test_unpack()
.test_reverse()
.test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>() .test_extract<0>().test_extract<1>().test_extract<4>().test_extract<7>()
.test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>() .test_rotate<0>().test_rotate<1>().test_rotate<4>().test_rotate<7>()
; ;
@ -1590,6 +1654,7 @@ void test_hal_intrin_uint32()
.test_popcount() .test_popcount()
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
.test_unpack() .test_unpack()
.test_reverse()
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
.test_transpose() .test_transpose()
@ -1619,6 +1684,7 @@ void test_hal_intrin_int32()
.test_mask() .test_mask()
.test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>() .test_pack<1>().test_pack<2>().test_pack<15>().test_pack<32>()
.test_unpack() .test_unpack()
.test_reverse()
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
.test_float_cvt32() .test_float_cvt32()
@ -1635,8 +1701,12 @@ void test_hal_intrin_uint64()
TheTest<v_uint64>() TheTest<v_uint64>()
.test_loadstore() .test_loadstore()
.test_addsub() .test_addsub()
#if CV_SIMD_64F
.test_cmp64()
#endif
.test_shift<1>().test_shift<8>() .test_shift<1>().test_shift<8>()
.test_logic() .test_logic()
.test_reverse()
.test_extract<0>().test_extract<1>() .test_extract<0>().test_extract<1>()
.test_rotate<0>().test_rotate<1>() .test_rotate<0>().test_rotate<1>()
; ;
@ -1648,8 +1718,12 @@ void test_hal_intrin_int64()
TheTest<v_int64>() TheTest<v_int64>()
.test_loadstore() .test_loadstore()
.test_addsub() .test_addsub()
#if CV_SIMD_64F
.test_cmp64()
#endif
.test_shift<1>().test_shift<8>() .test_shift<1>().test_shift<8>()
.test_logic() .test_logic()
.test_reverse()
.test_extract<0>().test_extract<1>() .test_extract<0>().test_extract<1>()
.test_rotate<0>().test_rotate<1>() .test_rotate<0>().test_rotate<1>()
.test_cvt64_double() .test_cvt64_double()
@ -1680,6 +1754,7 @@ void test_hal_intrin_float32()
.test_matmul() .test_matmul()
.test_transpose() .test_transpose()
.test_reduce_sum4() .test_reduce_sum4()
.test_reverse()
.test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>() .test_extract<0>().test_extract<1>().test_extract<2>().test_extract<3>()
.test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>() .test_rotate<0>().test_rotate<1>().test_rotate<2>().test_rotate<3>()
; ;
@ -1709,6 +1784,7 @@ void test_hal_intrin_float64()
.test_unpack() .test_unpack()
.test_float_math() .test_float_math()
.test_float_cvt32() .test_float_cvt32()
.test_reverse()
.test_extract<0>().test_extract<1>() .test_extract<0>().test_extract<1>()
.test_rotate<0>().test_rotate<1>() .test_rotate<0>().test_rotate<1>()
; ;

View File

@ -2025,4 +2025,17 @@ TEST(Core_Eigen, eigen2cv_check_Mat_type)
} }
#endif // HAVE_EIGEN #endif // HAVE_EIGEN
TEST(Mat, regression_12943) // memory usage: ~4.5 Gb
{
applyTestTag(CV_TEST_TAG_MEMORY_6GB);
const int width = 0x8000;
const int height = 0x10001;
cv::Mat src(height, width, CV_8UC1, Scalar::all(128));
cv::Mat dst;
cv::flip(src, dst, 0);
}
}} // namespace }} // namespace

View File

@ -6,7 +6,7 @@
#define OPENCV_DNN_VERSION_HPP #define OPENCV_DNN_VERSION_HPP
/// Use with major OpenCV version only. /// Use with major OpenCV version only.
#define OPENCV_DNN_API_VERSION 20190902 #define OPENCV_DNN_API_VERSION 20191024
#if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS #if !defined CV_DOXYGEN && !defined CV_STATIC_ANALYSIS && !defined CV_DNN_DONT_ADD_INLINE_NS
#define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION) #define CV__DNN_INLINE_NS __CV_CAT(dnn4_v, OPENCV_DNN_API_VERSION)

View File

@ -128,7 +128,7 @@ namespace cv {
void setConvolution(int kernel, int pad, int stride, void setConvolution(int kernel, int pad, int stride,
int filters_num, int channels_num, int use_batch_normalize, int use_relu) int filters_num, int channels_num, int use_batch_normalize)
{ {
cv::dnn::LayerParams conv_param = cv::dnn::LayerParams conv_param =
getParamConvolution(kernel, pad, stride, filters_num); getParamConvolution(kernel, pad, stride, filters_num);
@ -168,27 +168,29 @@ namespace cv {
net->layers.push_back(lp); net->layers.push_back(lp);
} }
if (use_relu)
{
cv::dnn::LayerParams activation_param;
activation_param.set<float>("negative_slope", 0.1f);
activation_param.name = "ReLU-name";
activation_param.type = "ReLU";
darknet::LayerParameter lp;
std::string layer_name = cv::format("relu_%d", layer_id);
lp.layer_name = layer_name;
lp.layer_type = activation_param.type;
lp.layerParams = activation_param;
lp.bottom_indexes.push_back(last_layer);
last_layer = layer_name;
net->layers.push_back(lp);
}
layer_id++; layer_id++;
fused_layer_names.push_back(last_layer); fused_layer_names.push_back(last_layer);
} }
void setReLU()
{
cv::dnn::LayerParams activation_param;
activation_param.set<float>("negative_slope", 0.1f);
activation_param.name = "ReLU-name";
activation_param.type = "ReLU";
darknet::LayerParameter lp;
std::string layer_name = cv::format("relu_%d", layer_id);
lp.layer_name = layer_name;
lp.layer_type = activation_param.type;
lp.layerParams = activation_param;
lp.bottom_indexes.push_back(last_layer);
last_layer = layer_name;
net->layers.push_back(lp);
fused_layer_names.back() = last_layer;
}
void setMaxpool(size_t kernel, size_t pad, size_t stride) void setMaxpool(size_t kernel, size_t pad, size_t stride)
{ {
cv::dnn::LayerParams maxpool_param; cv::dnn::LayerParams maxpool_param;
@ -409,12 +411,19 @@ namespace cv {
fused_layer_names.push_back(last_layer); fused_layer_names.push_back(last_layer);
} }
void setShortcut(int from) void setShortcut(int from, float alpha)
{ {
cv::dnn::LayerParams shortcut_param; cv::dnn::LayerParams shortcut_param;
shortcut_param.name = "Shortcut-name"; shortcut_param.name = "Shortcut-name";
shortcut_param.type = "Eltwise"; shortcut_param.type = "Eltwise";
if (alpha != 1)
{
std::vector<float> coeffs(2, 1);
coeffs[0] = alpha;
shortcut_param.set("coeff", DictValue::arrayReal<float*>(&coeffs[0], coeffs.size()));
}
shortcut_param.set<std::string>("op", "sum"); shortcut_param.set<std::string>("op", "sum");
darknet::LayerParameter lp; darknet::LayerParameter lp;
@ -422,8 +431,8 @@ namespace cv {
lp.layer_name = layer_name; lp.layer_name = layer_name;
lp.layer_type = shortcut_param.type; lp.layer_type = shortcut_param.type;
lp.layerParams = shortcut_param; lp.layerParams = shortcut_param;
lp.bottom_indexes.push_back(fused_layer_names.at(from));
lp.bottom_indexes.push_back(last_layer); lp.bottom_indexes.push_back(last_layer);
lp.bottom_indexes.push_back(fused_layer_names.at(from));
last_layer = layer_name; last_layer = layer_name;
net->layers.push_back(lp); net->layers.push_back(lp);
@ -548,10 +557,7 @@ namespace cv {
int pad = getParam<int>(layer_params, "pad", 0); int pad = getParam<int>(layer_params, "pad", 0);
int stride = getParam<int>(layer_params, "stride", 1); int stride = getParam<int>(layer_params, "stride", 1);
int filters = getParam<int>(layer_params, "filters", -1); int filters = getParam<int>(layer_params, "filters", -1);
std::string activation = getParam<std::string>(layer_params, "activation", "linear");
bool batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1; bool batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
if(activation != "linear" && activation != "leaky")
CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation);
int flipped = getParam<int>(layer_params, "flipped", 0); int flipped = getParam<int>(layer_params, "flipped", 0);
if (flipped == 1) if (flipped == 1)
CV_Error(cv::Error::StsNotImplemented, "Transpose the convolutional weights is not implemented"); CV_Error(cv::Error::StsNotImplemented, "Transpose the convolutional weights is not implemented");
@ -563,7 +569,7 @@ namespace cv {
CV_Assert(current_channels > 0); CV_Assert(current_channels > 0);
setParams.setConvolution(kernel_size, pad, stride, filters, current_channels, setParams.setConvolution(kernel_size, pad, stride, filters, current_channels,
batch_normalize, activation == "leaky"); batch_normalize);
current_channels = filters; current_channels = filters;
} }
@ -593,7 +599,7 @@ namespace cv {
current_channels = 0; current_channels = 0;
for (size_t k = 0; k < layers_vec.size(); ++k) { for (size_t k = 0; k < layers_vec.size(); ++k) {
layers_vec[k] = layers_vec[k] > 0 ? layers_vec[k] : (layers_vec[k] + layers_counter); layers_vec[k] = layers_vec[k] >= 0 ? layers_vec[k] : (layers_vec[k] + layers_counter);
current_channels += net->out_channels_vec[layers_vec[k]]; current_channels += net->out_channels_vec[layers_vec[k]];
} }
@ -631,13 +637,15 @@ namespace cv {
else if (layer_type == "shortcut") else if (layer_type == "shortcut")
{ {
std::string bottom_layer = getParam<std::string>(layer_params, "from", ""); std::string bottom_layer = getParam<std::string>(layer_params, "from", "");
float alpha = getParam<float>(layer_params, "alpha", 1);
float beta = getParam<float>(layer_params, "beta", 0);
if (beta != 0)
CV_Error(Error::StsNotImplemented, "Non-zero beta");
CV_Assert(!bottom_layer.empty()); CV_Assert(!bottom_layer.empty());
int from = std::atoi(bottom_layer.c_str()); int from = std::atoi(bottom_layer.c_str());
from += layers_counter; from = from < 0 ? from + layers_counter : from;
current_channels = net->out_channels_vec[from]; setParams.setShortcut(from, alpha);
setParams.setShortcut(from);
} }
else if (layer_type == "upsample") else if (layer_type == "upsample")
{ {
@ -667,6 +675,15 @@ namespace cv {
else { else {
CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type); CV_Error(cv::Error::StsParseError, "Unknown layer type: " + layer_type);
} }
std::string activation = getParam<std::string>(layer_params, "activation", "linear");
if (activation == "leaky")
{
setParams.setReLU();
}
else if (activation != "linear")
CV_Error(cv::Error::StsParseError, "Unsupported activation: " + activation);
net->out_channels_vec[layers_counter] = current_channels; net->out_channels_vec[layers_counter] = current_channels;
} }
@ -710,7 +727,6 @@ namespace cv {
{ {
int kernel_size = getParam<int>(layer_params, "size", -1); int kernel_size = getParam<int>(layer_params, "size", -1);
int filters = getParam<int>(layer_params, "filters", -1); int filters = getParam<int>(layer_params, "filters", -1);
std::string activation = getParam<std::string>(layer_params, "activation", "linear");
bool use_batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1; bool use_batch_normalize = getParam<int>(layer_params, "batch_normalize", 0) == 1;
CV_Assert(kernel_size > 0 && filters > 0); CV_Assert(kernel_size > 0 && filters > 0);
@ -754,14 +770,16 @@ namespace cv {
bn_blobs.push_back(biasData_mat); bn_blobs.push_back(biasData_mat);
setParams.setLayerBlobs(cv_layers_counter, bn_blobs); setParams.setLayerBlobs(cv_layers_counter, bn_blobs);
} }
if(activation == "leaky")
++cv_layers_counter;
} }
if (layer_type == "region" || layer_type == "yolo") if (layer_type == "region" || layer_type == "yolo")
{ {
++cv_layers_counter; // For permute. ++cv_layers_counter; // For permute.
} }
std::string activation = getParam<std::string>(layer_params, "activation", "linear");
if(activation == "leaky")
++cv_layers_counter; // For ReLU
current_channels = net->out_channels_vec[darknet_layers_counter]; current_channels = net->out_channels_vec[darknet_layers_counter];
} }
return true; return true;

View File

@ -103,6 +103,37 @@ public:
static BackendRegistry impl; static BackendRegistry impl;
return impl; return impl;
} }
static inline bool checkIETarget(int target)
{
#ifndef HAVE_INF_ENGINE
return false;
#else
cv::dnn::Net net;
cv::dnn::LayerParams lp;
lp.set("kernel_size", 1);
lp.set("num_output", 1);
lp.set("bias_term", false);
lp.type = "Convolution";
lp.name = "testLayer";
lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1)));
net.addLayerToPrev(lp.name, lp.type, lp);
net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
net.setPreferableTarget(target);
static int inpDims[] = {1, 2, 3, 4};
net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0)));
try
{
net.forward();
}
catch(...)
{
return false;
}
return true;
#endif
}
private: private:
BackendRegistry() BackendRegistry()
{ {
@ -154,35 +185,6 @@ private:
} }
#endif #endif
} }
static inline bool checkIETarget(int target)
{
#ifndef HAVE_INF_ENGINE
return false;
#else
cv::dnn::Net net;
cv::dnn::LayerParams lp;
lp.set("kernel_size", 1);
lp.set("num_output", 1);
lp.set("bias_term", false);
lp.type = "Convolution";
lp.name = "testLayer";
lp.blobs.push_back(Mat({1, 2, 1, 1}, CV_32F, Scalar(1)));
net.addLayerToPrev(lp.name, lp.type, lp);
net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);
net.setPreferableTarget(target);
static int inpDims[] = {1, 2, 3, 4};
net.setInput(cv::Mat(4, &inpDims[0], CV_32FC1, cv::Scalar(0)));
try
{
net.forward();
}
catch(...)
{
return false;
}
return true;
#endif
}
BackendsList backends; BackendsList backends;
}; };
@ -1689,6 +1691,9 @@ struct Net::Impl
// backend. Split a whole model on several Inference Engine networks if // backend. Split a whole model on several Inference Engine networks if
// some of layers are not implemented. // some of layers are not implemented.
bool supportsCPUFallback = preferableTarget == DNN_TARGET_CPU ||
BackendRegistry::checkIETarget(DNN_TARGET_CPU);
// Set of all input and output blobs wrappers for current network. // Set of all input and output blobs wrappers for current network.
std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers; std::map<LayerPin, Ptr<BackendWrapper> > netBlobsWrappers;
for (it = layers.begin(); it != layers.end(); ++it) for (it = layers.begin(); it != layers.end(); ++it)
@ -1702,7 +1707,8 @@ struct Net::Impl
if (!fused && !layer->supportBackend(preferableBackend)) if (!fused && !layer->supportBackend(preferableBackend))
{ {
bool customizable = ld.id != 0 && ld.outputBlobs.size() == 1 && bool customizable = ld.id != 0 && ld.outputBlobs.size() == 1 &&
INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2); INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2019R2) &&
supportsCPUFallback;
// TODO: there is a bug in Myriad plugin with custom layers shape infer. // TODO: there is a bug in Myriad plugin with custom layers shape infer.
if (preferableTarget == DNN_TARGET_MYRIAD) if (preferableTarget == DNN_TARGET_MYRIAD)
{ {

View File

@ -70,6 +70,7 @@ public:
MAX = 2, MAX = 2,
} op; } op;
std::vector<float> coeffs; std::vector<float> coeffs;
bool variableChannels;
EltwiseLayerImpl(const LayerParams& params) EltwiseLayerImpl(const LayerParams& params)
{ {
@ -105,7 +106,7 @@ public:
return backendId == DNN_BACKEND_OPENCV || return backendId == DNN_BACKEND_OPENCV ||
backendId == DNN_BACKEND_CUDA || backendId == DNN_BACKEND_CUDA ||
backendId == DNN_BACKEND_HALIDE || backendId == DNN_BACKEND_HALIDE ||
(backendId == DNN_BACKEND_INFERENCE_ENGINE && (backendId == DNN_BACKEND_INFERENCE_ENGINE && !variableChannels &&
(preferableTarget != DNN_TARGET_OPENCL || coeffs.empty())); (preferableTarget != DNN_TARGET_OPENCL || coeffs.empty()));
} }
@ -115,33 +116,57 @@ public:
std::vector<MatShape> &internals) const CV_OVERRIDE std::vector<MatShape> &internals) const CV_OVERRIDE
{ {
CV_Assert(inputs.size() >= 2); CV_Assert(inputs.size() >= 2);
CV_Assert(inputs[0].size() >= 2);
CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size()); CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size());
CV_Assert(op == SUM || coeffs.size() == 0); CV_Assert(op == SUM || coeffs.size() == 0);
int dims = inputs[0].size();
// Number of channels in output shape is determined by the first input tensor.
int numChannels = inputs[0][1];
for (int i = 1; i < inputs.size(); i++) for (int i = 1; i < inputs.size(); i++)
{ {
CV_Assert(inputs[0] == inputs[i]); CV_Assert(inputs[0][0] == inputs[i][0]);
// It's allowed for channels axis to be different.
for (int j = 2; j < dims; j++)
CV_Assert(inputs[0][j] == inputs[i][j]);
} }
outputs.assign(1, inputs[0]); outputs.assign(1, inputs[0]);
outputs[0][1] = numChannels;
return false; return false;
} }
void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE
{
std::vector<Mat> inputs;
inputs_arr.getMatVector(inputs);
variableChannels = false;
for (int i = 1; i < inputs.size(); ++i)
{
if (inputs[i].size[1] != inputs[0].size[1])
{
variableChannels = true;
break;
}
}
}
class EltwiseInvoker : public ParallelLoopBody class EltwiseInvoker : public ParallelLoopBody
{ {
public: public:
const Mat* srcs; std::vector<const Mat*> srcs;
int nsrcs; int nsrcs;
Mat* dst; Mat* dst;
const std::vector<float>* coeffs; std::vector<float> coeffs;
EltwiseOp op; EltwiseOp op;
int nstripes; int nstripes;
const ActivationLayer* activ; const ActivationLayer* activ;
int channels; int channels;
size_t planeSize; size_t planeSize;
EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {} EltwiseInvoker() : nsrcs(0), dst(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {}
static void run(const Mat* srcs, int nsrcs, Mat& dst, static void run(const Mat* srcs, int nsrcs, Mat& dst,
const std::vector<float>& coeffs, EltwiseOp op, const std::vector<float>& coeffs, EltwiseOp op,
@ -150,15 +175,23 @@ public:
CV_Check(dst.dims, 1 < dst.dims && dst.dims <= 5, ""); CV_CheckTypeEQ(dst.type(), CV_32FC1, ""); CV_Assert(dst.isContinuous()); CV_Check(dst.dims, 1 < dst.dims && dst.dims <= 5, ""); CV_CheckTypeEQ(dst.type(), CV_32FC1, ""); CV_Assert(dst.isContinuous());
CV_Assert(coeffs.empty() || coeffs.size() == (size_t)nsrcs); CV_Assert(coeffs.empty() || coeffs.size() == (size_t)nsrcs);
EltwiseInvoker p;
p.srcs.resize(nsrcs);
p.coeffs = coeffs;
for( int i = 0; i < nsrcs; i++ ) for( int i = 0; i < nsrcs; i++ )
{ {
CV_Assert(srcs[i].size == dst.size && p.srcs[i] = srcs + i;
srcs[i].type() == dst.type() && CV_Assert(srcs[i].type() == dst.type() &&
srcs[i].isContinuous()); srcs[i].isContinuous());
// Sort srcs and coefficients in the order by number of channels
for( int j = i; j >= 1 && p.srcs[j - 1]->size[1] < p.srcs[j]->size[1]; j-- )
{
std::swap(p.srcs[j - 1], p.srcs[j]);
if (!p.coeffs.empty())
std::swap(p.coeffs[j - 1], p.coeffs[j]);
}
} }
EltwiseInvoker p;
p.srcs = srcs;
p.nsrcs = nsrcs; p.nsrcs = nsrcs;
p.dst = &dst; p.dst = &dst;
p.op = op; p.op = op;
@ -180,7 +213,8 @@ public:
break; break;
} }
} }
p.coeffs = simpleCoeffs ? 0 : &coeffs; if (simpleCoeffs)
p.coeffs.clear();
p.activ = activ; p.activ = activ;
parallel_for_(Range(0, nstripes), p, nstripes); parallel_for_(Range(0, nstripes), p, nstripes);
@ -192,8 +226,8 @@ public:
size_t stripeSize = (total + nstripes - 1)/nstripes; size_t stripeSize = (total + nstripes - 1)/nstripes;
size_t stripeStart = r.start*stripeSize; size_t stripeStart = r.start*stripeSize;
size_t stripeEnd = std::min(r.end*stripeSize, total); size_t stripeEnd = std::min(r.end*stripeSize, total);
int c, j, k, n = nsrcs; int c, j, k, n;
const float* coeffsptr = coeffs && !coeffs->empty() ? &coeffs->at(0) : 0; const float* coeffsptr = !coeffs.empty() ? &coeffs[0] : 0;
float* dstptr0 = dst->ptr<float>(); float* dstptr0 = dst->ptr<float>();
int blockSize0 = 1 << 12, blockSize; int blockSize0 = 1 << 12, blockSize;
@ -208,14 +242,35 @@ public:
for( c = 0; c < channels; c++ ) for( c = 0; c < channels; c++ )
{ {
size_t globalDelta = delta + (sampleIdx*channels + c)*planeSize; size_t globalDelta = delta + (sampleIdx*channels + c)*planeSize;
const float* srcptr0 = srcs[0].ptr<float>() + globalDelta; const float* srcptr0 = srcs[0]->ptr<float>() + globalDelta;
float* dstptr = dstptr0 + globalDelta; float* dstptr = dstptr0 + globalDelta;
if( op == PROD ) // This code assumes that srcs are sorted in descending order by channels.
for (n = 1; n < nsrcs && c < srcs[n]->size[1]; ++n) {}
if (n == 1)
{
if( !coeffsptr )
{
for( j = 0; j < blockSize; j++ )
{
dstptr[j] = srcptr0[j];
}
}
else
{
float c0 = coeffsptr[0];
for( j = 0; j < blockSize; j++ )
{
dstptr[j] = c0*srcptr0[j];
}
}
}
else if( op == PROD )
{ {
for( k = 1; k < n; k++ ) for( k = 1; k < n; k++ )
{ {
const float* srcptr1 = srcs[k].ptr<float>() + globalDelta; const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta;
for( j = 0; j < blockSize; j++ ) for( j = 0; j < blockSize; j++ )
{ {
dstptr[j] = srcptr0[j]*srcptr1[j]; dstptr[j] = srcptr0[j]*srcptr1[j];
@ -227,7 +282,7 @@ public:
{ {
for( k = 1; k < n; k++ ) for( k = 1; k < n; k++ )
{ {
const float* srcptr1 = srcs[k].ptr<float>() + globalDelta; const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta;
for( j = 0; j < blockSize; j++ ) for( j = 0; j < blockSize; j++ )
{ {
dstptr[j] = std::max(srcptr0[j], srcptr1[j]); dstptr[j] = std::max(srcptr0[j], srcptr1[j]);
@ -239,7 +294,7 @@ public:
{ {
for( k = 1; k < n; k++ ) for( k = 1; k < n; k++ )
{ {
const float* srcptr1 = srcs[k].ptr<float>() + globalDelta; const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta;
for( j = 0; j < blockSize; j++ ) for( j = 0; j < blockSize; j++ )
{ {
dstptr[j] = srcptr0[j] + srcptr1[j]; dstptr[j] = srcptr0[j] + srcptr1[j];
@ -252,7 +307,7 @@ public:
float c0 = coeffsptr[0]; float c0 = coeffsptr[0];
for( k = 1; k < n; k++ ) for( k = 1; k < n; k++ )
{ {
const float* srcptr1 = srcs[k].ptr<float>() + globalDelta; const float* srcptr1 = srcs[k]->ptr<float>() + globalDelta;
float c1 = coeffsptr[k]; float c1 = coeffsptr[k];
for( j = 0; j < blockSize; j++ ) for( j = 0; j < blockSize; j++ )
{ {
@ -279,7 +334,7 @@ public:
std::vector<UMat> inputs; std::vector<UMat> inputs;
std::vector<UMat> outputs; std::vector<UMat> outputs;
if (inputs_.depth() == CV_16S && op != SUM) if ((inputs_.depth() == CV_16S && op != SUM) || variableChannels)
return false; return false;
inputs_.getUMatVector(inputs); inputs_.getUMatVector(inputs);

View File

@ -390,12 +390,6 @@ TEST_P(Test_Darknet_nets, YOLOv3)
{ {
applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB)); applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB));
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD
&& getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
#endif
// batchId, classId, confidence, left, top, right, bottom // batchId, classId, confidence, left, top, right, bottom
Mat ref = (Mat_<float>(9, 7) << 0, 7, 0.952983f, 0.614622f, 0.150257f, 0.901369f, 0.289251f, // a truck Mat ref = (Mat_<float>(9, 7) << 0, 7, 0.952983f, 0.614622f, 0.150257f, 0.901369f, 0.289251f, // a truck
0, 1, 0.987908f, 0.150913f, 0.221933f, 0.742255f, 0.74626f, // a bicycle 0, 1, 0.987908f, 0.150913f, 0.221933f, 0.742255f, 0.74626f, // a bicycle
@ -413,23 +407,35 @@ TEST_P(Test_Darknet_nets, YOLOv3)
std::string config_file = "yolov3.cfg"; std::string config_file = "yolov3.cfg";
std::string weights_file = "yolov3.weights"; std::string weights_file = "yolov3.weights";
#if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_MYRIAD &&
getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
{
scoreDiff = 0.04;
iouDiff = 0.2;
}
#endif
{ {
SCOPED_TRACE("batch size 1"); SCOPED_TRACE("batch size 1");
testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff); testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff);
} }
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2018050000) #if defined(INF_ENGINE_RELEASE)
if (backend == DNN_BACKEND_INFERENCE_ENGINE && target == DNN_TARGET_OPENCL)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL) // Test with 'batch size 2' is disabled for DLIE/OpenCL target
#endif
#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2019020000)
if (backend == DNN_BACKEND_INFERENCE_ENGINE) if (backend == DNN_BACKEND_INFERENCE_ENGINE)
{ {
if (target == DNN_TARGET_OPENCL) if (INF_ENGINE_VER_MAJOR_LE(2018050000) && target == DNN_TARGET_OPENCL)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_2019R2); applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_2018R5);
if (target == DNN_TARGET_OPENCL_FP16) else if (INF_ENGINE_VER_MAJOR_EQ(2019020000))
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_2019R2); {
if (target == DNN_TARGET_OPENCL)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_2019R2);
if (target == DNN_TARGET_OPENCL_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_2019R2);
}
else if (target == DNN_TARGET_MYRIAD &&
getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X)
applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X);
} }
#endif #endif
@ -444,6 +450,9 @@ INSTANTIATE_TEST_CASE_P(/**/, Test_Darknet_nets, dnnBackendsAndTargets());
TEST_P(Test_Darknet_layers, shortcut) TEST_P(Test_Darknet_layers, shortcut)
{ {
testDarknetLayer("shortcut"); testDarknetLayer("shortcut");
testDarknetLayer("shortcut_leaky");
testDarknetLayer("shortcut_unequal");
testDarknetLayer("shortcut_unequal_2");
} }
TEST_P(Test_Darknet_layers, upsample) TEST_P(Test_Darknet_layers, upsample)

View File

@ -1493,4 +1493,62 @@ TEST(Layer_Test_Convolution, relu_fusion)
normAssert(input, output); normAssert(input, output);
} }
typedef testing::TestWithParam<tuple<bool, tuple<Backend, Target> > > Layer_Test_Eltwise_unequal;
TEST_P(Layer_Test_Eltwise_unequal, Accuracy)
{
bool weighted = get<0>(GetParam());
int backendId = get<0>(get<1>(GetParam()));
int targetId = get<1>(get<1>(GetParam()));
if (backendId == DNN_BACKEND_OPENCV && targetId == DNN_TARGET_OPENCL_FP16)
applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16);
Net net;
LayerParams lp;
lp.type = "Eltwise";
lp.name = "testLayer";
const int inpShapes[][4] = {{1, 4, 2, 2}, {1, 5, 2, 2}, {1, 3, 2, 2}};
std::vector<String> inpNames(3);
std::vector<Mat> inputs(3);
size_t numOutValues = 1*4*2*2; // By the first input
std::vector<float> weights(3, 1);
if (weighted)
{
for (int i = 0; i < inputs.size(); ++i)
randu(Mat(1, 1, CV_32F, &weights[i]), -1, 1);
lp.set("coeff", DictValue::arrayReal<float*>(&weights[0], weights.size()));
}
int eltwiseId = net.addLayer(lp.name, lp.type, lp);
for (int i = 0; i < inputs.size(); ++i)
{
inputs[i].create(4, inpShapes[i], CV_32F);
randu(inputs[i], 0, 255);
inpNames[i] = format("input_%d", i);
net.connect(0, i, eltwiseId, i);
}
Mat ref(1, numOutValues, CV_32F, Scalar(0));
net.setInputsNames(inpNames);
for (int i = 0; i < inputs.size(); ++i)
{
net.setInput(inputs[i], inpNames[i]);
if (numOutValues >= inputs[i].total())
ref.colRange(0, inputs[i].total()) += weights[i] * inputs[i].reshape(1, 1);
else
ref += weights[i] * inputs[i].reshape(1, 1).colRange(0, numOutValues);
}
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
Mat out = net.forward();
normAssert(out.reshape(1, 1), ref);
}
INSTANTIATE_TEST_CASE_P(/**/, Layer_Test_Eltwise_unequal, Combine(
testing::Bool(),
dnnBackendsAndTargets()
));
}} // namespace }} // namespace

View File

@ -394,7 +394,9 @@ enum ConnectedComponentsTypes {
CC_STAT_WIDTH = 2, //!< The horizontal size of the bounding box CC_STAT_WIDTH = 2, //!< The horizontal size of the bounding box
CC_STAT_HEIGHT = 3, //!< The vertical size of the bounding box CC_STAT_HEIGHT = 3, //!< The vertical size of the bounding box
CC_STAT_AREA = 4, //!< The total area (in pixels) of the connected component CC_STAT_AREA = 4, //!< The total area (in pixels) of the connected component
CC_STAT_MAX = 5 #ifndef CV_DOXYGEN
CC_STAT_MAX = 5 //!< Max enumeration value. Used internally only for memory allocation
#endif
}; };
//! connected components algorithm //! connected components algorithm
@ -4008,7 +4010,23 @@ without self-intersections. Otherwise, the function output is undefined.
*/ */
CV_EXPORTS_W bool isContourConvex( InputArray contour ); CV_EXPORTS_W bool isContourConvex( InputArray contour );
//! finds intersection of two convex polygons /** @example samples/cpp/intersectExample.cpp
Examples of how intersectConvexConvex works
*/
/** @brief Finds intersection of two convex polygons
@param _p1 First polygon
@param _p2 Second polygon
@param _p12 Output polygon describing the intersecting area
@param handleNested When true, an intersection is found if one of the polygons is fully enclosed in the other.
When false, no intersection is found. If the polygons share a side or the vertex of one polygon lies on an edge
of the other, they are not considered nested and an intersection will be found regardless of the value of handleNested.
@returns Absolute value of area of intersecting polygon
@note intersectConvexConvex doesn't confirm that both polygons are convex and will return invalid results if they aren't.
*/
CV_EXPORTS_W float intersectConvexConvex( InputArray _p1, InputArray _p2, CV_EXPORTS_W float intersectConvexConvex( InputArray _p1, InputArray _p2,
OutputArray _p12, bool handleNested = true ); OutputArray _p12, bool handleNested = true );

View File

@ -2624,11 +2624,127 @@ void accW_simd_(const uchar* src, float* dst, const uchar* mask, int len, int cn
v_dst10 = v_fma(v_dst10, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src10)) * v_alpha); v_dst10 = v_fma(v_dst10, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src10)) * v_alpha);
v_dst11 = v_fma(v_dst11, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src11)) * v_alpha); v_dst11 = v_fma(v_dst11, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src11)) * v_alpha);
v_store(dst + x, v_dst00); v_store(dst + x , v_dst00);
v_store(dst + x + step, v_dst01); v_store(dst + x + step , v_dst01);
v_store(dst + x + step * 2, v_dst10); v_store(dst + x + step * 2, v_dst10);
v_store(dst + x + step * 3, v_dst11); v_store(dst + x + step * 3, v_dst11);
} }
} else {
const v_float32 zero = vx_setall_f32((float)0);
int size = len * cn;
if ( cn == 1 ){
for (; x <= size - cVectorWidth; x += cVectorWidth)
{
v_uint8 v_src = vx_load(src + x);
v_uint8 v_mask = vx_load(mask + x);
v_uint16 v_m0, v_m1;
v_expand(v_mask, v_m0, v_m1);
v_uint32 v_m00, v_m01, v_m10, v_m11;
v_expand(v_m0, v_m00, v_m01);
v_expand(v_m1, v_m10, v_m11);
v_float32 v_mf00, v_mf01, v_mf10, v_mf11;
v_mf00 = v_cvt_f32(v_reinterpret_as_s32(v_m00));
v_mf01 = v_cvt_f32(v_reinterpret_as_s32(v_m01));
v_mf10 = v_cvt_f32(v_reinterpret_as_s32(v_m10));
v_mf11 = v_cvt_f32(v_reinterpret_as_s32(v_m11));
v_uint16 v_src0, v_src1;
v_expand(v_src, v_src0, v_src1);
v_uint32 v_src00, v_src01, v_src10, v_src11;
v_expand(v_src0, v_src00, v_src01);
v_expand(v_src1, v_src10, v_src11);
v_float32 v_dst00 = vx_load(dst + x);
v_float32 v_dst01 = vx_load(dst + x + step);
v_float32 v_dst10 = vx_load(dst + x + step * 2);
v_float32 v_dst11 = vx_load(dst + x + step * 3);
v_mf00 = v_mf00 != zero;
v_mf01 = v_mf01 != zero;
v_mf10 = v_mf10 != zero;
v_mf11 = v_mf11 != zero;
v_dst00 = v_select(v_mf00, v_fma(v_dst00, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src00)) * v_alpha), v_dst00);
v_dst01 = v_select(v_mf01, v_fma(v_dst01, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src01)) * v_alpha), v_dst01);
v_dst10 = v_select(v_mf10, v_fma(v_dst10, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src10)) * v_alpha), v_dst10);
v_dst11 = v_select(v_mf11, v_fma(v_dst11, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src11)) * v_alpha), v_dst11);
v_store(dst + x , v_dst00);
v_store(dst + x + step , v_dst01);
v_store(dst + x + step * 2, v_dst10);
v_store(dst + x + step * 3, v_dst11);
}
} else if ( cn == 3 )
{
for (; x*cn <= size - cVectorWidth*cn; x += cVectorWidth )
{
v_uint8 v_src0, v_src1, v_src2;
v_load_deinterleave(src + x * cn, v_src0, v_src1, v_src2);
v_uint16 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21;
v_expand(v_src0, v_src00, v_src01);
v_expand(v_src1, v_src10, v_src11);
v_expand(v_src2, v_src20, v_src21);
v_uint32 v_src000, v_src001, v_src010, v_src011, v_src100, v_src101, v_src110, v_src111, v_src200, v_src201, v_src210, v_src211;
v_expand(v_src00, v_src000, v_src001);
v_expand(v_src01, v_src010, v_src011);
v_expand(v_src10, v_src100, v_src101);
v_expand(v_src11, v_src110, v_src111);
v_expand(v_src20, v_src200, v_src201);
v_expand(v_src21, v_src210, v_src211);
v_float32 v_dst00, v_dst01, v_dst02, v_dst03, v_dst10, v_dst11, v_dst12, v_dst13;
v_float32 v_dst20, v_dst21, v_dst22, v_dst23;
v_load_deinterleave(dst + x * cn , v_dst00, v_dst10, v_dst20);
v_load_deinterleave(dst + (x + step) * cn, v_dst01, v_dst11, v_dst21);
v_load_deinterleave(dst + (x + 2 * step) * cn, v_dst02, v_dst12, v_dst22);
v_load_deinterleave(dst + (x + 3 * step) * cn, v_dst03, v_dst13, v_dst23);
v_uint8 v_mask = vx_load(mask + x);
v_uint16 v_m0, v_m1;
v_expand(v_mask, v_m0, v_m1);
v_uint32 v_m00, v_m01, v_m10, v_m11;
v_expand(v_m0, v_m00, v_m01);
v_expand(v_m1, v_m10, v_m11);
v_float32 v_mf00, v_mf01, v_mf10, v_mf11;
v_mf00 = v_cvt_f32(v_reinterpret_as_s32(v_m00));
v_mf01 = v_cvt_f32(v_reinterpret_as_s32(v_m01));
v_mf10 = v_cvt_f32(v_reinterpret_as_s32(v_m10));
v_mf11 = v_cvt_f32(v_reinterpret_as_s32(v_m11));
v_mf00 = v_mf00 != zero;
v_mf01 = v_mf01 != zero;
v_mf10 = v_mf10 != zero;
v_mf11 = v_mf11 != zero;
v_dst00 = v_select(v_mf00, v_fma(v_dst00, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src000)) * v_alpha), v_dst00);
v_dst01 = v_select(v_mf01, v_fma(v_dst01, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src001)) * v_alpha), v_dst01);
v_dst02 = v_select(v_mf10, v_fma(v_dst02, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src010)) * v_alpha), v_dst02);
v_dst03 = v_select(v_mf11, v_fma(v_dst03, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src011)) * v_alpha), v_dst03);
v_dst10 = v_select(v_mf00, v_fma(v_dst10, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src100)) * v_alpha), v_dst10);
v_dst11 = v_select(v_mf01, v_fma(v_dst11, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src101)) * v_alpha), v_dst11);
v_dst12 = v_select(v_mf10, v_fma(v_dst12, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src110)) * v_alpha), v_dst12);
v_dst13 = v_select(v_mf11, v_fma(v_dst13, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src111)) * v_alpha), v_dst13);
v_dst20 = v_select(v_mf00, v_fma(v_dst20, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src200)) * v_alpha), v_dst20);
v_dst21 = v_select(v_mf01, v_fma(v_dst21, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src201)) * v_alpha), v_dst21);
v_dst22 = v_select(v_mf10, v_fma(v_dst22, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src210)) * v_alpha), v_dst22);
v_dst23 = v_select(v_mf11, v_fma(v_dst23, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src211)) * v_alpha), v_dst23);
v_store_interleave(dst + x * cn , v_dst00, v_dst10, v_dst20);
v_store_interleave(dst + ( x + step ) * cn, v_dst01, v_dst11, v_dst21);
v_store_interleave(dst + ( x + step * 2 ) * cn, v_dst02, v_dst12, v_dst22);
v_store_interleave(dst + ( x + step * 3 ) * cn, v_dst03, v_dst13, v_dst23);
}
}
} }
#endif // CV_SIMD #endif // CV_SIMD
accW_general_(src, dst, mask, len, cn, alpha, x); accW_general_(src, dst, mask, len, cn, alpha, x);
@ -2657,9 +2773,81 @@ void accW_simd_(const ushort* src, float* dst, const uchar* mask, int len, int c
v_dst0 = v_fma(v_dst0, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_int0)) * v_alpha); v_dst0 = v_fma(v_dst0, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_int0)) * v_alpha);
v_dst1 = v_fma(v_dst1, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_int1)) * v_alpha); v_dst1 = v_fma(v_dst1, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_int1)) * v_alpha);
v_store(dst + x, v_dst0); v_store(dst + x , v_dst0);
v_store(dst + x + step, v_dst1); v_store(dst + x + step, v_dst1);
} }
} else {
const v_float32 zero = vx_setall_f32((float)0);
int size = len * cn;
if ( cn == 1 )
{
for (; x <= size - cVectorWidth; x += cVectorWidth)
{
v_uint16 v_src = vx_load(src + x);
v_uint16 v_mask = v_reinterpret_as_u16(vx_load_expand(mask + x));
v_uint32 v_m0, v_m1;
v_expand(v_mask, v_m0, v_m1);
v_float32 v_mf0, v_mf1;
v_mf0 = v_cvt_f32(v_reinterpret_as_s32(v_m0));
v_mf1 = v_cvt_f32(v_reinterpret_as_s32(v_m1));
v_uint32 v_src0, v_src1;
v_expand(v_src, v_src0, v_src1);
v_float32 v_dst0 = vx_load(dst + x);
v_float32 v_dst1 = vx_load(dst + x + step);
v_mf0 = v_mf0 != zero;
v_mf1 = v_mf1 != zero;
v_dst0 = v_select(v_mf0, v_fma(v_dst0, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src0)) * v_alpha), v_dst0);
v_dst1 = v_select(v_mf1, v_fma(v_dst1, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src1)) * v_alpha), v_dst1);
v_store(dst + x , v_dst0);
v_store(dst + x + step, v_dst1);
}
} else if ( cn == 3 )
{
for (; x*cn <= size - cVectorWidth*cn; x += cVectorWidth )
{
v_uint16 v_src0, v_src1, v_src2;
v_load_deinterleave(src + x * cn, v_src0, v_src1, v_src2);
v_uint16 v_mask = v_reinterpret_as_u16(vx_load_expand(mask + x));
v_uint32 v_m0, v_m1;
v_expand(v_mask, v_m0, v_m1);
v_uint32 v_src00, v_src01, v_src10, v_src11, v_src20, v_src21;
v_expand(v_src0, v_src00, v_src01);
v_expand(v_src1, v_src10, v_src11);
v_expand(v_src2, v_src20, v_src21);
v_float32 v_dst00, v_dst01, v_dst02, v_dst10, v_dst11, v_dst20, v_dst21;
v_load_deinterleave(dst + x * cn , v_dst00, v_dst10, v_dst20);
v_load_deinterleave(dst + (x + step) * cn, v_dst01, v_dst11, v_dst21);
v_float32 v_mf0, v_mf1;
v_mf0 = v_cvt_f32(v_reinterpret_as_s32(v_m0));
v_mf1 = v_cvt_f32(v_reinterpret_as_s32(v_m1));
v_mf0 = v_mf0 != zero;
v_mf1 = v_mf1 != zero;
v_dst00 = v_select(v_mf0, v_fma(v_dst00, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src00)) * v_alpha), v_dst00);
v_dst10 = v_select(v_mf0, v_fma(v_dst10, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src10)) * v_alpha), v_dst10);
v_dst20 = v_select(v_mf0, v_fma(v_dst20, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src20)) * v_alpha), v_dst20);
v_dst01 = v_select(v_mf1, v_fma(v_dst01, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src01)) * v_alpha), v_dst01);
v_dst11 = v_select(v_mf1, v_fma(v_dst11, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src11)) * v_alpha), v_dst11);
v_dst21 = v_select(v_mf1, v_fma(v_dst21, v_beta, v_cvt_f32(v_reinterpret_as_s32(v_src21)) * v_alpha), v_dst21);
v_store_interleave(dst + x * cn , v_dst00, v_dst10, v_dst20);
v_store_interleave(dst + ( x + step ) * cn, v_dst01, v_dst11, v_dst21);
}
}
} }
#endif // CV_SIMD #endif // CV_SIMD
accW_general_(src, dst, mask, len, cn, alpha, x); accW_general_(src, dst, mask, len, cn, alpha, x);

View File

@ -27,6 +27,13 @@ endif()
set(OPENCV_JAVADOC_DESTINATION "${OpenCV_BINARY_DIR}/doc/doxygen/html/javadoc" CACHE STRING "") set(OPENCV_JAVADOC_DESTINATION "${OpenCV_BINARY_DIR}/doc/doxygen/html/javadoc" CACHE STRING "")
# Old Javadoc URL looks like this: https://docs.oracle.com/javase/6/docs/api/
# New Javadoc URL looks like this: https://docs.oracle.com/en/java/javase/11/docs/api/
set(OPENCV_JAVADOC_LINK_URL "" CACHE STRING "See details in modules/java/jar/CMakeLists.txt")
if(OPENCV_JAVADOC_LINK_URL)
set(CMAKE_CONFIG_OPENCV_JAVADOC_LINK "link=\"${OPENCV_JAVADOC_LINK_URL}\"")
endif()
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build.xml.in" "${OPENCV_JAVA_DIR}/build.xml" @ONLY) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/build.xml.in" "${OPENCV_JAVA_DIR}/build.xml" @ONLY)
list(APPEND depends "${OPENCV_JAVA_DIR}/build.xml") list(APPEND depends "${OPENCV_JAVA_DIR}/build.xml")

View File

@ -42,7 +42,7 @@
bottom="Generated on ${timestamp} / OpenCV @OPENCV_VCSVERSION@" bottom="Generated on ${timestamp} / OpenCV @OPENCV_VCSVERSION@"
failonerror="true" failonerror="true"
encoding="UTF-8" charset="UTF-8" docencoding="UTF-8" encoding="UTF-8" charset="UTF-8" docencoding="UTF-8"
link="https://docs.oracle.com/javase/6/docs/api/" @CMAKE_CONFIG_OPENCV_JAVADOC_LINK@
additionalparam="--allow-script-in-comments" additionalparam="--allow-script-in-comments"
> >
<Header> <Header>

View File

@ -96,7 +96,7 @@ class SamplesFindFile(NewOpenCVTests):
def test_MissingFileException(self): def test_MissingFileException(self):
try: try:
res = cv.samples.findFile('non_existed.file', True) _res = cv.samples.findFile('non_existed.file', True)
self.assertEqual("Dead code", 0) self.assertEqual("Dead code", 0)
except cv.error as _e: except cv.error as _e:
pass pass

View File

@ -46,7 +46,8 @@ static std::vector<std::string>& getTestTagsSkipList()
#if OPENCV_32BIT_CONFIGURATION #if OPENCV_32BIT_CONFIGURATION
testSkipWithTags.push_back(CV_TEST_TAG_MEMORY_2GB); testSkipWithTags.push_back(CV_TEST_TAG_MEMORY_2GB);
#else #else
testSkipWithTags.push_back(CV_TEST_TAG_MEMORY_6GB); if (!cvtest::runBigDataTests)
testSkipWithTags.push_back(CV_TEST_TAG_MEMORY_6GB);
#endif #endif
testSkipWithTags.push_back(CV_TEST_TAG_VERYLONG); testSkipWithTags.push_back(CV_TEST_TAG_VERYLONG);
#if defined(_DEBUG) #if defined(_DEBUG)

View File

@ -0,0 +1,161 @@
/*
* Author: Steve Nicholson
*
* A program that illustrates intersectConvexConvex in various scenarios
*/
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
using namespace cv;
using namespace std;
// Create a vector of points describing a rectangle with the given corners
static vector<Point> makeRectangle(Point topLeft, Point bottomRight)
{
vector<Point> rectangle;
rectangle.push_back(topLeft);
rectangle.push_back(Point(bottomRight.x, topLeft.y));
rectangle.push_back(bottomRight);
rectangle.push_back(Point(topLeft.x, bottomRight.y));
return rectangle;
}
static vector<Point> makeTriangle(Point point1, Point point2, Point point3)
{
vector<Point> triangle;
triangle.push_back(point1);
triangle.push_back(point2);
triangle.push_back(point3);
return triangle;
}
// Run intersectConvexConvex on two polygons then draw the polygons and their intersection (if there is one)
// Return the area of the intersection
static float drawIntersection(Mat &image, vector<Point> polygon1, vector<Point> polygon2, bool handleNested = true)
{
vector<Point> intersectionPolygon;
vector<vector<Point> > polygons;
polygons.push_back(polygon1);
polygons.push_back(polygon2);
float intersectArea = intersectConvexConvex(polygon1, polygon2, intersectionPolygon, handleNested);
if (intersectArea > 0)
{
Scalar fillColor(200, 200, 200);
// If the input is invalid, draw the intersection in red
if (!isContourConvex(polygon1) || !isContourConvex(polygon2))
{
fillColor = Scalar(0, 0, 255);
}
vector<vector<Point> > pp;
pp.push_back(intersectionPolygon);
fillPoly(image, pp, fillColor);
}
polylines(image, polygons, true, Scalar(0, 0, 0));
return intersectArea;
}
static void drawDescription(Mat &image, int intersectionArea, string description, Point origin)
{
const size_t bufSize=1024;
char caption[bufSize];
snprintf(caption, bufSize, "Intersection area: %d%s", intersectionArea, description.c_str());
putText(image, caption, origin, FONT_HERSHEY_SIMPLEX, 0.6, Scalar(0, 0, 0));
}
static void intersectConvexExample()
{
Mat image(610, 550, CV_8UC3, Scalar(255, 255, 255));
float intersectionArea;
intersectionArea = drawIntersection(image,
makeRectangle(Point(10, 10), Point(50, 50)),
makeRectangle(Point(20, 20), Point(60, 60)));
drawDescription(image, (int)intersectionArea, "", Point(70, 40));
intersectionArea = drawIntersection(image,
makeRectangle(Point(10, 70), Point(35, 95)),
makeRectangle(Point(35, 95), Point(60, 120)));
drawDescription(image, (int)intersectionArea, "", Point(70, 100));
intersectionArea = drawIntersection(image,
makeRectangle(Point(10, 130), Point(60, 180)),
makeRectangle(Point(20, 140), Point(50, 170)),
true);
drawDescription(image, (int)intersectionArea, " (handleNested true)", Point(70, 160));
intersectionArea = drawIntersection(image,
makeRectangle(Point(10, 190), Point(60, 240)),
makeRectangle(Point(20, 200), Point(50, 230)),
false);
drawDescription(image, (int)intersectionArea, " (handleNested false)", Point(70, 220));
intersectionArea = drawIntersection(image,
makeRectangle(Point(10, 250), Point(60, 300)),
makeRectangle(Point(20, 250), Point(50, 290)),
true);
drawDescription(image, (int)intersectionArea, " (handleNested true)", Point(70, 280));
// These rectangles share an edge so handleNested can be false and an intersection is still found
intersectionArea = drawIntersection(image,
makeRectangle(Point(10, 310), Point(60, 360)),
makeRectangle(Point(20, 310), Point(50, 350)),
false);
drawDescription(image, (int)intersectionArea, " (handleNested false)", Point(70, 340));
intersectionArea = drawIntersection(image,
makeRectangle(Point(10, 370), Point(60, 420)),
makeRectangle(Point(20, 371), Point(50, 410)),
false);
drawDescription(image, (int)intersectionArea, " (handleNested false)", Point(70, 400));
// A vertex of the triangle lies on an edge of the rectangle so handleNested can be false and an intersection is still found
intersectionArea = drawIntersection(image,
makeRectangle(Point(10, 430), Point(60, 480)),
makeTriangle(Point(35, 430), Point(20, 470), Point(50, 470)),
false);
drawDescription(image, (int)intersectionArea, " (handleNested false)", Point(70, 460));
// Show intersection of overlapping rectangle and triangle
intersectionArea = drawIntersection(image,
makeRectangle(Point(10, 490), Point(40, 540)),
makeTriangle(Point(25, 500), Point(25, 530), Point(60, 515)),
false);
drawDescription(image, (int)intersectionArea, "", Point(70, 520));
// This concave polygon is invalid input to intersectConvexConvex so it returns an invalid intersection
vector<Point> notConvex;
notConvex.push_back(Point(25, 560));
notConvex.push_back(Point(25, 590));
notConvex.push_back(Point(45, 580));
notConvex.push_back(Point(60, 600));
notConvex.push_back(Point(60, 550));
notConvex.push_back(Point(45, 570));
intersectionArea = drawIntersection(image,
makeRectangle(Point(10, 550), Point(50, 600)),
notConvex,
false);
drawDescription(image, (int)intersectionArea, " (invalid input: not convex)", Point(70, 580));
imshow("Intersections", image);
waitKey(0);
}
int main()
{
intersectConvexExample();
}

View File

@ -116,8 +116,10 @@ double compose_megapix = -1;
float conf_thresh = 1.f; float conf_thresh = 1.f;
#ifdef HAVE_OPENCV_XFEATURES2D #ifdef HAVE_OPENCV_XFEATURES2D
string features_type = "surf"; string features_type = "surf";
float match_conf = 0.65f;
#else #else
string features_type = "orb"; string features_type = "orb";
float match_conf = 0.3f;
#endif #endif
string matcher_type = "homography"; string matcher_type = "homography";
string estimator_type = "homography"; string estimator_type = "homography";
@ -132,7 +134,6 @@ int expos_comp_type = ExposureCompensator::GAIN_BLOCKS;
int expos_comp_nr_feeds = 1; int expos_comp_nr_feeds = 1;
int expos_comp_nr_filtering = 2; int expos_comp_nr_filtering = 2;
int expos_comp_block_size = 32; int expos_comp_block_size = 32;
float match_conf = 0.3f;
string seam_find_type = "gc_color"; string seam_find_type = "gc_color";
int blend_type = Blender::MULTI_BAND; int blend_type = Blender::MULTI_BAND;
int timelapse_type = Timelapser::AS_IS; int timelapse_type = Timelapser::AS_IS;
@ -196,7 +197,7 @@ static int parseCmdArgs(int argc, char** argv)
else if (string(argv[i]) == "--features") else if (string(argv[i]) == "--features")
{ {
features_type = argv[i + 1]; features_type = argv[i + 1];
if (features_type == "orb") if (string(features_type) == "orb")
match_conf = 0.3f; match_conf = 0.3f;
i++; i++;
} }

View File

@ -14,9 +14,9 @@ using namespace cv;
const char* keys = const char* keys =
"{ help h| | Print help message. }" "{ help h| | Print help message. }"
"{ input1 | | Path to input image 1. }" "{ @input1 | | Path to input image 1. }"
"{ input2 | | Path to input image 2. }" "{ @input2 | | Path to input image 2. }"
"{ input3 | | Path to input image 3. }"; "{ @input3 | | Path to input image 3. }";
/** /**
* @function main * @function main

View File

@ -14,7 +14,7 @@ parser.add_argument('--median_filter', default=0, type=int, help='Kernel size of
args = parser.parse_args() args = parser.parse_args()
net = cv.dnn.readNetFromTorch(cv.samples.findFile(args.model)) net = cv.dnn.readNetFromTorch(cv.samples.findFile(args.model))
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV); net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
if args.input: if args.input:
cap = cv.VideoCapture(args.input) cap = cv.VideoCapture(args.input)

View File

@ -27,7 +27,7 @@ args = parser.parse_args()
### Get OpenCV predictions ##################################################### ### Get OpenCV predictions #####################################################
net = cv.dnn.readNetFromTensorflow(cv.samples.findFile(args.weights), cv.samples.findFile(args.prototxt)) net = cv.dnn.readNetFromTensorflow(cv.samples.findFile(args.weights), cv.samples.findFile(args.prototxt))
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV); net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
detections = [] detections = []
for imgName in os.listdir(args.images): for imgName in os.listdir(args.images):

View File

@ -134,7 +134,7 @@ def main():
for j in range(4): for j in range(4):
p1 = (vertices[j][0], vertices[j][1]) p1 = (vertices[j][0], vertices[j][1])
p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1]) p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1])
cv.line(frame, p1, p2, (0, 255, 0), 1); cv.line(frame, p1, p2, (0, 255, 0), 1)
# Put efficiency information # Put efficiency information
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))

View File

@ -21,7 +21,7 @@ def tokenize(s):
elif token: elif token:
tokens.append(token) tokens.append(token)
token = "" token = ""
isString = (symbol == '\"' or symbol == '\'') ^ isString; isString = (symbol == '\"' or symbol == '\'') ^ isString
elif symbol == '{' or symbol == '}' or symbol == '[' or symbol == ']': elif symbol == '{' or symbol == '}' or symbol == '[' or symbol == ']':
if token: if token:

View File

@ -122,7 +122,7 @@ def createSSDGraph(modelPath, configPath, outputPath):
print('Input image size: %dx%d' % (image_width, image_height)) print('Input image size: %dx%d' % (image_width, image_height))
# Read the graph. # Read the graph.
inpNames = ['image_tensor'] _inpNames = ['image_tensor']
outNames = ['num_detections', 'detection_scores', 'detection_boxes', 'detection_classes'] outNames = ['num_detections', 'detection_scores', 'detection_boxes', 'detection_classes']
writeTextGraph(modelPath, outputPath, outNames) writeTextGraph(modelPath, outputPath, outNames)

View File

@ -45,7 +45,7 @@ def main():
small = img small = img
for i in xrange(3): for _i in xrange(3):
small = cv.pyrDown(small) small = cv.pyrDown(small)
def onmouse(event, x, y, flags, param): def onmouse(event, x, y, flags, param):

View File

@ -97,7 +97,7 @@ def main():
obj_points.append(pattern_points) obj_points.append(pattern_points)
# calculate camera distortion # calculate camera distortion
rms, camera_matrix, dist_coefs, rvecs, tvecs = cv.calibrateCamera(obj_points, img_points, (w, h), None, None) rms, camera_matrix, dist_coefs, _rvecs, _tvecs = cv.calibrateCamera(obj_points, img_points, (w, h), None, None)
print("\nRMS:", rms) print("\nRMS:", rms)
print("camera matrix:\n", camera_matrix) print("camera matrix:\n", camera_matrix)
@ -106,7 +106,7 @@ def main():
# undistort the image with the calibration # undistort the image with the calibration
print('') print('')
for fn in img_names if debug_dir else []: for fn in img_names if debug_dir else []:
path, name, ext = splitfn(fn) _path, name, _ext = splitfn(fn)
img_found = os.path.join(debug_dir, name + '_chess.png') img_found = os.path.join(debug_dir, name + '_chess.png')
outfile = os.path.join(debug_dir, name + '_undistorted.png') outfile = os.path.join(debug_dir, name + '_undistorted.png')

View File

@ -184,7 +184,7 @@ def main():
extrinsics = fs.getNode('extrinsic_parameters').mat() extrinsics = fs.getNode('extrinsic_parameters').mat()
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D # pylint: disable=unused-variable
fig = plt.figure() fig = plt.figure()
ax = fig.gca(projection='3d') ax = fig.gca(projection='3d')

View File

@ -46,7 +46,7 @@ class App():
cam = video.create_capture(fn, fallback='synth:bg=baboon.jpg:class=chess:noise=0.05') cam = video.create_capture(fn, fallback='synth:bg=baboon.jpg:class=chess:noise=0.05')
while True: while True:
flag, frame = cam.read() _flag, frame = cam.read()
cv.imshow('camera', frame) cv.imshow('camera', frame)
small = cv.pyrDown(frame) small = cv.pyrDown(frame)

View File

@ -38,7 +38,7 @@ def main():
cap = video.create_capture(fn) cap = video.create_capture(fn)
while True: while True:
flag, img = cap.read() _flag, img = cap.read()
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
thrs1 = cv.getTrackbarPos('thrs1', 'edge') thrs1 = cv.getTrackbarPos('thrs1', 'edge')
thrs2 = cv.getTrackbarPos('thrs2', 'edge') thrs2 = cv.getTrackbarPos('thrs2', 'edge')

View File

@ -48,7 +48,7 @@ def main():
cam = create_capture(video_src, fallback='synth:bg={}:noise=0.05'.format(cv.samples.findFile('samples/data/lena.jpg'))) cam = create_capture(video_src, fallback='synth:bg={}:noise=0.05'.format(cv.samples.findFile('samples/data/lena.jpg')))
while True: while True:
ret, img = cam.read() _ret, img = cam.read()
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
gray = cv.equalizeHist(gray) gray = cv.equalizeHist(gray)

View File

@ -88,6 +88,7 @@ def main():
update() update()
ch = cv.waitKey(0) ch = cv.waitKey(0)
if ch == ord('f'): if ch == ord('f'):
global cur_func_name
if PY3: if PY3:
cur_func_name = next(dist_func_names) cur_func_name = next(dist_func_names)
else: else:

View File

@ -30,7 +30,7 @@ def main():
circles = cv.HoughCircles(img, cv.HOUGH_GRADIENT, 1, 10, np.array([]), 100, 30, 1, 30) circles = cv.HoughCircles(img, cv.HOUGH_GRADIENT, 1, 10, np.array([]), 100, 30, 1, 30)
if circles is not None: # Check if circles have been found and only then iterate over these and add them to the image if circles is not None: # Check if circles have been found and only then iterate over these and add them to the image
a, b, c = circles.shape _a, b, _c = circles.shape
for i in range(b): for i in range(b):
cv.circle(cimg, (circles[0][i][0], circles[0][i][1]), circles[0][i][2], (0, 0, 255), 3, cv.LINE_AA) cv.circle(cimg, (circles[0][i][0], circles[0][i][1]), circles[0][i][2], (0, 0, 255), 3, cv.LINE_AA)
cv.circle(cimg, (circles[0][i][0], circles[0][i][1]), 2, (0, 255, 0), 3, cv.LINE_AA) # draw center of circle cv.circle(cimg, (circles[0][i][0], circles[0][i][1]), 2, (0, 255, 0), 3, cv.LINE_AA) # draw center of circle

View File

@ -29,14 +29,14 @@ def main():
if True: # HoughLinesP if True: # HoughLinesP
lines = cv.HoughLinesP(dst, 1, math.pi/180.0, 40, np.array([]), 50, 10) lines = cv.HoughLinesP(dst, 1, math.pi/180.0, 40, np.array([]), 50, 10)
a,b,c = lines.shape a, b, _c = lines.shape
for i in range(a): for i in range(a):
cv.line(cdst, (lines[i][0][0], lines[i][0][1]), (lines[i][0][2], lines[i][0][3]), (0, 0, 255), 3, cv.LINE_AA) cv.line(cdst, (lines[i][0][0], lines[i][0][1]), (lines[i][0][2], lines[i][0][3]), (0, 0, 255), 3, cv.LINE_AA)
else: # HoughLines else: # HoughLines
lines = cv.HoughLines(dst, 1, math.pi/180.0, 50, np.array([]), 0, 0) lines = cv.HoughLines(dst, 1, math.pi/180.0, 50, np.array([]), 0, 0)
if lines is not None: if lines is not None:
a,b,c = lines.shape a, b, _c = lines.shape
for i in range(a): for i in range(a):
rho = lines[i][0][0] rho = lines[i][0][0]
theta = lines[i][0][1] theta = lines[i][0][1]

View File

@ -33,7 +33,7 @@ def main():
points, _ = make_gaussians(cluster_n, img_size) points, _ = make_gaussians(cluster_n, img_size)
term_crit = (cv.TERM_CRITERIA_EPS, 30, 0.1) term_crit = (cv.TERM_CRITERIA_EPS, 30, 0.1)
ret, labels, centers = cv.kmeans(points, cluster_n, None, term_crit, 10, 0) _ret, labels, _centers = cv.kmeans(points, cluster_n, None, term_crit, 10, 0)
img = np.zeros((img_size, img_size, 3), np.uint8) img = np.zeros((img_size, img_size, 3), np.uint8)
for (x, y), label in zip(np.int32(points), labels.ravel()): for (x, y), label in zip(np.int32(points), labels.ravel()):

View File

@ -60,7 +60,7 @@ def main():
cv.createTrackbar('%d'%i, 'level control', 5, 50, nothing) cv.createTrackbar('%d'%i, 'level control', 5, 50, nothing)
while True: while True:
ret, frame = cap.read() _ret, frame = cap.read()
pyr = build_lappyr(frame, leveln) pyr = build_lappyr(frame, leveln)
for i in xrange(leveln): for i in xrange(leveln):

View File

@ -64,14 +64,14 @@ def main():
fn = 0 fn = 0
cam = video.create_capture(fn) cam = video.create_capture(fn)
ret, prev = cam.read() _ret, prev = cam.read()
prevgray = cv.cvtColor(prev, cv.COLOR_BGR2GRAY) prevgray = cv.cvtColor(prev, cv.COLOR_BGR2GRAY)
show_hsv = False show_hsv = False
show_glitch = False show_glitch = False
cur_glitch = prev.copy() cur_glitch = prev.copy()
while True: while True:
ret, img = cam.read() _ret, img = cam.read()
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
flow = cv.calcOpticalFlowFarneback(prevgray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0) flow = cv.calcOpticalFlowFarneback(prevgray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
prevgray = gray prevgray = gray

View File

@ -51,7 +51,7 @@ def main():
print('loading error') print('loading error')
continue continue
found, w = hog.detectMultiScale(img, winStride=(8,8), padding=(32,32), scale=1.05) found, _w = hog.detectMultiScale(img, winStride=(8,8), padding=(32,32), scale=1.05)
found_filtered = [] found_filtered = []
for ri, r in enumerate(found): for ri, r in enumerate(found):
for qi, q in enumerate(found): for qi, q in enumerate(found):

View File

@ -69,8 +69,8 @@ def main():
out_points = points[mask] out_points = points[mask]
out_colors = colors[mask] out_colors = colors[mask]
out_fn = 'out.ply' out_fn = 'out.ply'
write_ply('out.ply', out_points, out_colors) write_ply(out_fn, out_points, out_colors)
print('%s saved' % 'out.ply') print('%s saved' % out_fn)
cv.imshow('left', imgL) cv.imshow('left', imgL)
cv.imshow('disparity', (disp-min_disp)/num_disp) cv.imshow('disparity', (disp-min_disp)/num_disp)

View File

@ -32,7 +32,7 @@ def main():
w, h = 512, 512 w, h = 512, 512
args, args_list = getopt.getopt(sys.argv[1:], 'o:', []) args, _args_list = getopt.getopt(sys.argv[1:], 'o:', [])
args = dict(args) args = dict(args)
out = None out = None
if '-o' in args: if '-o' in args:

View File

@ -25,13 +25,13 @@ def access_pixel():
y = 0 y = 0
x = 0 x = 0
## [Pixel access 1] ## [Pixel access 1]
intensity = img[y,x] _intensity = img[y,x]
## [Pixel access 1] ## [Pixel access 1]
## [Pixel access 3] ## [Pixel access 3]
blue = img[y,x,0] _blue = img[y,x,0]
green = img[y,x,1] _green = img[y,x,1]
red = img[y,x,2] _red = img[y,x,2]
## [Pixel access 3] ## [Pixel access 3]
## [Pixel access 5] ## [Pixel access 5]
@ -42,12 +42,12 @@ def reference_counting():
# Memory management and reference counting # Memory management and reference counting
## [Reference counting 2] ## [Reference counting 2]
img = cv.imread('image.jpg') img = cv.imread('image.jpg')
img1 = np.copy(img) _img1 = np.copy(img)
## [Reference counting 2] ## [Reference counting 2]
## [Reference counting 3] ## [Reference counting 3]
img = cv.imread('image.jpg') img = cv.imread('image.jpg')
sobelx = cv.Sobel(img, cv.CV_32F, 1, 0); _sobelx = cv.Sobel(img, cv.CV_32F, 1, 0)
## [Reference counting 3] ## [Reference counting 3]
def primitive_operations(): def primitive_operations():
@ -57,17 +57,17 @@ def primitive_operations():
## [Set image to black] ## [Set image to black]
## [Select ROI] ## [Select ROI]
smallImg = img[10:110,10:110] _smallImg = img[10:110,10:110]
## [Select ROI] ## [Select ROI]
## [BGR to Gray] ## [BGR to Gray]
img = cv.imread('image.jpg') img = cv.imread('image.jpg')
grey = cv.cvtColor(img, cv.COLOR_BGR2GRAY) _grey = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
## [BGR to Gray] ## [BGR to Gray]
src = np.ones((4,4), np.uint8) src = np.ones((4,4), np.uint8)
## [Convert to CV_32F] ## [Convert to CV_32F]
dst = src.astype(np.float32) _dst = src.astype(np.float32)
## [Convert to CV_32F] ## [Convert to CV_32F]
def visualize_images(): def visualize_images():

View File

@ -25,8 +25,8 @@ def gammaCorrection():
res = cv.LUT(img_original, lookUpTable) res = cv.LUT(img_original, lookUpTable)
## [changing-contrast-brightness-gamma-correction] ## [changing-contrast-brightness-gamma-correction]
img_gamma_corrected = cv.hconcat([img_original, res]); img_gamma_corrected = cv.hconcat([img_original, res])
cv.imshow("Gamma correction", img_gamma_corrected); cv.imshow("Gamma correction", img_gamma_corrected)
def on_linear_transform_alpha_trackbar(val): def on_linear_transform_alpha_trackbar(val):
global alpha global alpha

View File

@ -85,13 +85,13 @@ contours, _ = cv.findContours(bw, cv.RETR_LIST, cv.CHAIN_APPROX_NONE)
for i, c in enumerate(contours): for i, c in enumerate(contours):
# Calculate the area of each contour # Calculate the area of each contour
area = cv.contourArea(c); area = cv.contourArea(c)
# Ignore contours that are too small or too large # Ignore contours that are too small or too large
if area < 1e2 or 1e5 < area: if area < 1e2 or 1e5 < area:
continue continue
# Draw each contour only for visualisation purposes # Draw each contour only for visualisation purposes
cv.drawContours(src, contours, i, (0, 0, 255), 2); cv.drawContours(src, contours, i, (0, 0, 255), 2)
# Find the orientation of each shape # Find the orientation of each shape
getOrientation(c, src) getOrientation(c, src)
## [contours] ## [contours]

View File

@ -70,7 +70,7 @@ def main():
draw_str(res, (20, 60), "frame interval : %.1f ms" % (frame_interval.value*1000)) draw_str(res, (20, 60), "frame interval : %.1f ms" % (frame_interval.value*1000))
cv.imshow('threaded video', res) cv.imshow('threaded video', res)
if len(pending) < threadn: if len(pending) < threadn:
ret, frame = cap.read() _ret, frame = cap.read()
t = clock() t = clock()
frame_interval.update(t - last_frame_time) frame_interval.update(t - last_frame_time)
last_frame_time = t last_frame_time = t

View File

@ -42,7 +42,7 @@ def main():
cv.createTrackbar("Focus", "Video", focus, 100, lambda v: cap.set(cv.CAP_PROP_FOCUS, v / 100)) cv.createTrackbar("Focus", "Video", focus, 100, lambda v: cap.set(cv.CAP_PROP_FOCUS, v / 100))
while True: while True:
status, img = cap.read() _status, img = cap.read()
fourcc = decode_fourcc(cap.get(cv.CAP_PROP_FOURCC)) fourcc = decode_fourcc(cap.get(cv.CAP_PROP_FOURCC))