diff --git a/doc/py_tutorials/py_video/py_lucas_kanade/py_lucas_kanade.markdown b/doc/py_tutorials/py_video/py_lucas_kanade/py_lucas_kanade.markdown index 61abdd4889..ccd6c4ff20 100644 --- a/doc/py_tutorials/py_video/py_lucas_kanade/py_lucas_kanade.markdown +++ b/doc/py_tutorials/py_video/py_lucas_kanade/py_lucas_kanade.markdown @@ -1,225 +1,4 @@ Optical Flow {#tutorial_py_lucas_kanade} ============ -Goal ----- - -In this chapter, - - We will understand the concepts of optical flow and its estimation using Lucas-Kanade - method. - - We will use functions like **cv.calcOpticalFlowPyrLK()** to track feature points in a - video. - -Optical Flow ------------- - -Optical flow is the pattern of apparent motion of image objects between two consecutive frames -caused by the movemement of object or camera. It is 2D vector field where each vector is a -displacement vector showing the movement of points from first frame to second. Consider the image -below (Image Courtesy: [Wikipedia article on Optical -Flow](http://en.wikipedia.org/wiki/Optical_flow)). - -![image](images/optical_flow_basic1.jpg) - -It shows a ball moving in 5 consecutive frames. The arrow shows its displacement vector. Optical -flow has many applications in areas like : - -- Structure from Motion -- Video Compression -- Video Stabilization ... - -Optical flow works on several assumptions: - --# The pixel intensities of an object do not change between consecutive frames. -2. Neighbouring pixels have similar motion. - -Consider a pixel \f$I(x,y,t)\f$ in first frame (Check a new dimension, time, is added here. Earlier we -were working with images only, so no need of time). It moves by distance \f$(dx,dy)\f$ in next frame -taken after \f$dt\f$ time. So since those pixels are the same and intensity does not change, we can say, - -\f[I(x,y,t) = I(x+dx, y+dy, t+dt)\f] - -Then take taylor series approximation of right-hand side, remove common terms and divide by \f$dt\f$ to -get the following equation: - -\f[f_x u + f_y v + f_t = 0 \;\f] - -where: - -\f[f_x = \frac{\partial f}{\partial x} \; ; \; f_y = \frac{\partial f}{\partial y}\f]\f[u = \frac{dx}{dt} \; ; \; v = \frac{dy}{dt}\f] - -Above equation is called Optical Flow equation. In it, we can find \f$f_x\f$ and \f$f_y\f$, they are image -gradients. Similarly \f$f_t\f$ is the gradient along time. But \f$(u,v)\f$ is unknown. We cannot solve this -one equation with two unknown variables. So several methods are provided to solve this problem and -one of them is Lucas-Kanade. - -### Lucas-Kanade method - -We have seen an assumption before, that all the neighbouring pixels will have similar motion. -Lucas-Kanade method takes a 3x3 patch around the point. So all the 9 points have the same motion. We -can find \f$(f_x, f_y, f_t)\f$ for these 9 points. So now our problem becomes solving 9 equations with -two unknown variables which is over-determined. A better solution is obtained with least square fit -method. Below is the final solution which is two equation-two unknown problem and solve to get the -solution. - -\f[\begin{bmatrix} u \\ v \end{bmatrix} = -\begin{bmatrix} - \sum_{i}{f_{x_i}}^2 & \sum_{i}{f_{x_i} f_{y_i} } \\ - \sum_{i}{f_{x_i} f_{y_i}} & \sum_{i}{f_{y_i}}^2 -\end{bmatrix}^{-1} -\begin{bmatrix} - - \sum_{i}{f_{x_i} f_{t_i}} \\ - - \sum_{i}{f_{y_i} f_{t_i}} -\end{bmatrix}\f] - -( Check similarity of inverse matrix with Harris corner detector. It denotes that corners are better -points to be tracked.) - -So from the user point of view, the idea is simple, we give some points to track, we receive the optical -flow vectors of those points. But again there are some problems. Until now, we were dealing with -small motions, so it fails when there is a large motion. To deal with this we use pyramids. When we go up in -the pyramid, small motions are removed and large motions become small motions. So by applying -Lucas-Kanade there, we get optical flow along with the scale. - -Lucas-Kanade Optical Flow in OpenCV ------------------------------------ - -OpenCV provides all these in a single function, **cv.calcOpticalFlowPyrLK()**. Here, we create a -simple application which tracks some points in a video. To decide the points, we use -**cv.goodFeaturesToTrack()**. We take the first frame, detect some Shi-Tomasi corner points in it, -then we iteratively track those points using Lucas-Kanade optical flow. For the function -**cv.calcOpticalFlowPyrLK()** we pass the previous frame, previous points and next frame. It -returns next points along with some status numbers which has a value of 1 if next point is found, -else zero. We iteratively pass these next points as previous points in next step. See the code -below: -@code{.py} -import numpy as np -import cv2 as cv - -cap = cv.VideoCapture('slow.flv') - -# params for ShiTomasi corner detection -feature_params = dict( maxCorners = 100, - qualityLevel = 0.3, - minDistance = 7, - blockSize = 7 ) - -# Parameters for lucas kanade optical flow -lk_params = dict( winSize = (15,15), - maxLevel = 2, - criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03)) - -# Create some random colors -color = np.random.randint(0,255,(100,3)) - -# Take first frame and find corners in it -ret, old_frame = cap.read() -old_gray = cv.cvtColor(old_frame, cv.COLOR_BGR2GRAY) -p0 = cv.goodFeaturesToTrack(old_gray, mask = None, **feature_params) - -# Create a mask image for drawing purposes -mask = np.zeros_like(old_frame) - -while(1): - ret,frame = cap.read() - frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) - - # calculate optical flow - p1, st, err = cv.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params) - - # Select good points - good_new = p1[st==1] - good_old = p0[st==1] - - # draw the tracks - for i,(new,old) in enumerate(zip(good_new,good_old)): - a,b = new.ravel() - c,d = old.ravel() - mask = cv.line(mask, (a,b),(c,d), color[i].tolist(), 2) - frame = cv.circle(frame,(a,b),5,color[i].tolist(),-1) - img = cv.add(frame,mask) - - cv.imshow('frame',img) - k = cv.waitKey(30) & 0xff - if k == 27: - break - - # Now update the previous frame and previous points - old_gray = frame_gray.copy() - p0 = good_new.reshape(-1,1,2) - -cv.destroyAllWindows() -cap.release() -@endcode -(This code doesn't check how correct are the next keypoints. So even if any feature point disappears -in image, there is a chance that optical flow finds the next point which may look close to it. So -actually for a robust tracking, corner points should be detected in particular intervals. OpenCV -samples comes up with such a sample which finds the feature points at every 5 frames. It also run a -backward-check of the optical flow points got to select only good ones. Check -samples/python/lk_track.py). - -See the results we got: - -![image](images/opticalflow_lk.jpg) - -Dense Optical Flow in OpenCV ----------------------------- - -Lucas-Kanade method computes optical flow for a sparse feature set (in our example, corners detected -using Shi-Tomasi algorithm). OpenCV provides another algorithm to find the dense optical flow. It -computes the optical flow for all the points in the frame. It is based on Gunner Farneback's -algorithm which is explained in "Two-Frame Motion Estimation Based on Polynomial Expansion" by -Gunner Farneback in 2003. - -Below sample shows how to find the dense optical flow using above algorithm. We get a 2-channel -array with optical flow vectors, \f$(u,v)\f$. We find their magnitude and direction. We color code the -result for better visualization. Direction corresponds to Hue value of the image. Magnitude -corresponds to Value plane. See the code below: -@code{.py} -import cv2 as cv -import numpy as np -cap = cv.VideoCapture("vtest.avi") - -ret, frame1 = cap.read() -prvs = cv.cvtColor(frame1,cv.COLOR_BGR2GRAY) -hsv = np.zeros_like(frame1) -hsv[...,1] = 255 - -while(1): - ret, frame2 = cap.read() - next = cv.cvtColor(frame2,cv.COLOR_BGR2GRAY) - - flow = cv.calcOpticalFlowFarneback(prvs,next, None, 0.5, 3, 15, 3, 5, 1.2, 0) - - mag, ang = cv.cartToPolar(flow[...,0], flow[...,1]) - hsv[...,0] = ang*180/np.pi/2 - hsv[...,2] = cv.normalize(mag,None,0,255,cv.NORM_MINMAX) - bgr = cv.cvtColor(hsv,cv.COLOR_HSV2BGR) - - cv.imshow('frame2',bgr) - k = cv.waitKey(30) & 0xff - if k == 27: - break - elif k == ord('s'): - cv.imwrite('opticalfb.png',frame2) - cv.imwrite('opticalhsv.png',bgr) - prvs = next - -cap.release() -cv.destroyAllWindows() -@endcode -See the result below: - -![image](images/opticalfb.jpg) - -OpenCV comes with a more advanced sample on dense optical flow, please see -samples/python/opt_flow.py. - -Additional Resources --------------------- - -Exercises ---------- - --# Check the code in samples/python/lk_track.py. Try to understand the code. -2. Check the code in samples/python/opt_flow.py. Try to understand the code. +Tutorial content has been moved: @ref tutorial_optical_flow diff --git a/doc/py_tutorials/py_video/py_meanshift/py_meanshift.markdown b/doc/py_tutorials/py_video/py_meanshift/py_meanshift.markdown index b246d76067..3c162aa76e 100644 --- a/doc/py_tutorials/py_video/py_meanshift/py_meanshift.markdown +++ b/doc/py_tutorials/py_video/py_meanshift/py_meanshift.markdown @@ -1,185 +1,4 @@ Meanshift and Camshift {#tutorial_py_meanshift} ====================== -Goal ----- - -In this chapter, - -- We will learn about Meanshift and Camshift algorithms to find and track objects in videos. - -Meanshift ---------- - -The intuition behind the meanshift is simple. Consider you have a set of points. (It can be a pixel -distribution like histogram backprojection). You are given a small window ( may be a circle) and you -have to move that window to the area of maximum pixel density (or maximum number of points). It is -illustrated in the simple image given below: - -![image](images/meanshift_basics.jpg) - -The initial window is shown in blue circle with the name "C1". Its original center is marked in blue -rectangle, named "C1_o". But if you find the centroid of the points inside that window, you will -get the point "C1_r" (marked in small blue circle) which is the real centroid of window. Surely -they don't match. So move your window such that circle of the new window matches with previous -centroid. Again find the new centroid. Most probably, it won't match. So move it again, and continue -the iterations such that center of window and its centroid falls on the same location (or with a -small desired error). So finally what you obtain is a window with maximum pixel distribution. It is -marked with green circle, named "C2". As you can see in image, it has maximum number of points. The -whole process is demonstrated on a static image below: - -![image](images/meanshift_face.gif) - -So we normally pass the histogram backprojected image and initial target location. When the object -moves, obviously the movement is reflected in histogram backprojected image. As a result, meanshift -algorithm moves our window to the new location with maximum density. - -### Meanshift in OpenCV - -To use meanshift in OpenCV, first we need to setup the target, find its histogram so that we can -backproject the target on each frame for calculation of meanshift. We also need to provide initial -location of window. For histogram, only Hue is considered here. Also, to avoid false values due to -low light, low light values are discarded using **cv.inRange()** function. -@code{.py} -import numpy as np -import cv2 as cv - -cap = cv.VideoCapture('slow.flv') - -# take first frame of the video -ret,frame = cap.read() - -# setup initial location of window -r,h,c,w = 250,90,400,125 # simply hardcoded the values -track_window = (c,r,w,h) - -# set up the ROI for tracking -roi = frame[r:r+h, c:c+w] -hsv_roi = cv.cvtColor(roi, cv.COLOR_BGR2HSV) -mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.))) -roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180]) -cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX) - -# Setup the termination criteria, either 10 iteration or move by atleast 1 pt -term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 ) - -while(1): - ret ,frame = cap.read() - - if ret == True: - hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV) - dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1) - - # apply meanshift to get the new location - ret, track_window = cv.meanShift(dst, track_window, term_crit) - - # Draw it on image - x,y,w,h = track_window - img2 = cv.rectangle(frame, (x,y), (x+w,y+h), 255,2) - cv.imshow('img2',img2) - - k = cv.waitKey(60) & 0xff - if k == 27: - break - else: - cv.imwrite(chr(k)+".jpg",img2) - - else: - break - -cv.destroyAllWindows() -cap.release() -@endcode -Three frames in a video I used is given below: - -![image](images/meanshift_result.jpg) - -Camshift --------- - -Did you closely watch the last result? There is a problem. Our window always has the same size when -car is farther away and it is very close to camera. That is not good. We need to adapt the window -size with size and rotation of the target. Once again, the solution came from "OpenCV Labs" and it -is called CAMshift (Continuously Adaptive Meanshift) published by Gary Bradsky in his paper -"Computer Vision Face Tracking for Use in a Perceptual User Interface" in 1998. - -It applies meanshift first. Once meanshift converges, it updates the size of the window as, -\f$s = 2 \times \sqrt{\frac{M_{00}}{256}}\f$. It also calculates the orientation of best fitting ellipse -to it. Again it applies the meanshift with new scaled search window and previous window location. -The process is continued until required accuracy is met. - -![image](images/camshift_face.gif) - -### Camshift in OpenCV - -It is almost same as meanshift, but it returns a rotated rectangle (that is our result) and box -parameters (used to be passed as search window in next iteration). See the code below: -@code{.py} -import numpy as np -import cv2 as cv - -cap = cv.VideoCapture('slow.flv') - -# take first frame of the video -ret,frame = cap.read() - -# setup initial location of window -r,h,c,w = 250,90,400,125 # simply hardcoded the values -track_window = (c,r,w,h) - -# set up the ROI for tracking -roi = frame[r:r+h, c:c+w] -hsv_roi = cv.cvtColor(roi, cv.COLOR_BGR2HSV) -mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.))) -roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180]) -cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX) - -# Setup the termination criteria, either 10 iteration or move by atleast 1 pt -term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 ) - -while(1): - ret ,frame = cap.read() - - if ret == True: - hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV) - dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1) - - # apply meanshift to get the new location - ret, track_window = cv.CamShift(dst, track_window, term_crit) - - # Draw it on image - pts = cv.boxPoints(ret) - pts = np.int0(pts) - img2 = cv.polylines(frame,[pts],True, 255,2) - cv.imshow('img2',img2) - - k = cv.waitKey(60) & 0xff - if k == 27: - break - else: - cv.imwrite(chr(k)+".jpg",img2) - - else: - break - -cv.destroyAllWindows() -cap.release() -@endcode -Three frames of the result is shown below: - -![image](images/camshift_result.jpg) - -Additional Resources --------------------- - --# French Wikipedia page on [Camshift](http://fr.wikipedia.org/wiki/Camshift). (The two animations - are taken from here) -2. Bradski, G.R., "Real time face and object tracking as a component of a perceptual user - interface," Applications of Computer Vision, 1998. WACV '98. Proceedings., Fourth IEEE Workshop - on , vol., no., pp.214,219, 19-21 Oct 1998 - -Exercises ---------- - --# OpenCV comes with a Python sample on interactive demo of camshift. Use it, hack it, understand - it. +Tutorial content has been moved: @ref tutorial_meanshift diff --git a/doc/py_tutorials/py_video/py_table_of_contents_video.markdown b/doc/py_tutorials/py_video/py_table_of_contents_video.markdown index 3a12d951e9..badb14e200 100644 --- a/doc/py_tutorials/py_video/py_table_of_contents_video.markdown +++ b/doc/py_tutorials/py_video/py_table_of_contents_video.markdown @@ -1,13 +1,13 @@ Video Analysis {#tutorial_py_table_of_contents_video} ============== -- @subpage tutorial_py_meanshift +- @ref tutorial_meanshift We have already seen an example of color-based tracking. It is simpler. This time, we see significantly better algorithms like "Meanshift", and its upgraded version, "Camshift" to find and track them. -- @subpage tutorial_py_lucas_kanade +- @ref tutorial_optical_flow Now let's discuss an important concept, "Optical Flow", which is related to videos and has many applications. diff --git a/doc/py_tutorials/py_video/py_meanshift/images/camshift_face.gif b/doc/tutorials/video/meanshift/images/camshift_face.gif similarity index 100% rename from doc/py_tutorials/py_video/py_meanshift/images/camshift_face.gif rename to doc/tutorials/video/meanshift/images/camshift_face.gif diff --git a/doc/py_tutorials/py_video/py_meanshift/images/camshift_result.jpg b/doc/tutorials/video/meanshift/images/camshift_result.jpg similarity index 100% rename from doc/py_tutorials/py_video/py_meanshift/images/camshift_result.jpg rename to doc/tutorials/video/meanshift/images/camshift_result.jpg diff --git a/doc/py_tutorials/py_video/py_meanshift/images/meanshift_basics.jpg b/doc/tutorials/video/meanshift/images/meanshift_basics.jpg similarity index 100% rename from doc/py_tutorials/py_video/py_meanshift/images/meanshift_basics.jpg rename to doc/tutorials/video/meanshift/images/meanshift_basics.jpg diff --git a/doc/py_tutorials/py_video/py_meanshift/images/meanshift_face.gif b/doc/tutorials/video/meanshift/images/meanshift_face.gif similarity index 100% rename from doc/py_tutorials/py_video/py_meanshift/images/meanshift_face.gif rename to doc/tutorials/video/meanshift/images/meanshift_face.gif diff --git a/doc/py_tutorials/py_video/py_meanshift/images/meanshift_result.jpg b/doc/tutorials/video/meanshift/images/meanshift_result.jpg similarity index 100% rename from doc/py_tutorials/py_video/py_meanshift/images/meanshift_result.jpg rename to doc/tutorials/video/meanshift/images/meanshift_result.jpg diff --git a/doc/tutorials/video/meanshift/meanshift.markdown b/doc/tutorials/video/meanshift/meanshift.markdown new file mode 100644 index 0000000000..c0e745824e --- /dev/null +++ b/doc/tutorials/video/meanshift/meanshift.markdown @@ -0,0 +1,118 @@ +Meanshift and Camshift {#tutorial_meanshift} +====================== + +Goal +---- + +In this chapter, + +- We will learn about the Meanshift and Camshift algorithms to track objects in videos. + +Meanshift +--------- + +The intuition behind the meanshift is simple. Consider you have a set of points. (It can be a pixel +distribution like histogram backprojection). You are given a small window (may be a circle) and you +have to move that window to the area of maximum pixel density (or maximum number of points). It is +illustrated in the simple image given below: + +![image](images/meanshift_basics.jpg) + +The initial window is shown in blue circle with the name "C1". Its original center is marked in blue +rectangle, named "C1_o". But if you find the centroid of the points inside that window, you will +get the point "C1_r" (marked in small blue circle) which is the real centroid of the window. Surely +they don't match. So move your window such that the circle of the new window matches with the previous +centroid. Again find the new centroid. Most probably, it won't match. So move it again, and continue +the iterations such that the center of window and its centroid falls on the same location (or within a +small desired error). So finally what you obtain is a window with maximum pixel distribution. It is +marked with a green circle, named "C2". As you can see in the image, it has maximum number of points. The +whole process is demonstrated on a static image below: + +![image](images/meanshift_face.gif) + +So we normally pass the histogram backprojected image and initial target location. When the object +moves, obviously the movement is reflected in the histogram backprojected image. As a result, the meanshift +algorithm moves our window to the new location with maximum density. + +### Meanshift in OpenCV + +To use meanshift in OpenCV, first we need to setup the target, find its histogram so that we can +backproject the target on each frame for calculation of meanshift. We also need to provide an initial +location of window. For histogram, only Hue is considered here. Also, to avoid false values due to +low light, low light values are discarded using **cv.inRange()** function. + +@add_toggle_cpp +- **Downloadable code**: Click + [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/video/meanshift/meanshift.cpp) + +- **Code at glance:** + @include samples/cpp/tutorial_code/video/meanshift/meanshift.cpp +@end_toggle + +@add_toggle_python +- **Downloadable code**: Click + [here](https://github.com/opencv/opencv/tree/master/samples/python/tutorial_code/video/meanshift/meanshift.py) + +- **Code at glance:** + @include samples/python/tutorial_code/video/meanshift/meanshift.py +@end_toggle + +Three frames in a video I used is given below: + +![image](images/meanshift_result.jpg) + +Camshift +-------- + +Did you closely watch the last result? There is a problem. Our window always has the same size whether +the car is very far or very close to the camera. That is not good. We need to adapt the window +size with size and rotation of the target. Once again, the solution came from "OpenCV Labs" and it +is called CAMshift (Continuously Adaptive Meanshift) published by Gary Bradsky in his paper +"Computer Vision Face Tracking for Use in a Perceptual User Interface" in 1998 @cite Bradski98 . + +It applies meanshift first. Once meanshift converges, it updates the size of the window as, +\f$s = 2 \times \sqrt{\frac{M_{00}}{256}}\f$. It also calculates the orientation of the best fitting ellipse +to it. Again it applies the meanshift with new scaled search window and previous window location. +The process continues until the required accuracy is met. + +![image](images/camshift_face.gif) + +### Camshift in OpenCV + +It is similar to meanshift, but returns a rotated rectangle (that is our result) and box +parameters (used to be passed as search window in next iteration). See the code below: + +@add_toggle_cpp +- **Downloadable code**: Click + [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/video/meanshift/camshift.cpp) + +- **Code at glance:** + @include samples/cpp/tutorial_code/video/meanshift/camshift.cpp +@end_toggle + +@add_toggle_python +- **Downloadable code**: Click + [here](https://github.com/opencv/opencv/tree/master/samples/python/tutorial_code/video/meanshift/camshift.py) + +- **Code at glance:** + @include samples/python/tutorial_code/video/meanshift/camshift.py +@end_toggle + +Three frames of the result is shown below: + +![image](images/camshift_result.jpg) + +Additional Resources +-------------------- + +-# French Wikipedia page on [Camshift](http://fr.wikipedia.org/wiki/Camshift). (The two animations + are taken from there) +2. Bradski, G.R., "Real time face and object tracking as a component of a perceptual user + interface," Applications of Computer Vision, 1998. WACV '98. Proceedings., Fourth IEEE Workshop + on , vol., no., pp.214,219, 19-21 Oct 1998 + +Exercises +--------- + +-# OpenCV comes with a Python [sample](https://github.com/opencv/opencv/blob/master/samples/python/camshift.py) for an interactive demo of camshift. Use it, hack it, understand + it. diff --git a/doc/py_tutorials/py_video/py_lucas_kanade/images/optical_flow_basic1.jpg b/doc/tutorials/video/optical_flow/images/optical_flow_basic1.jpg similarity index 100% rename from doc/py_tutorials/py_video/py_lucas_kanade/images/optical_flow_basic1.jpg rename to doc/tutorials/video/optical_flow/images/optical_flow_basic1.jpg diff --git a/doc/py_tutorials/py_video/py_lucas_kanade/images/opticalfb.jpg b/doc/tutorials/video/optical_flow/images/opticalfb.jpg similarity index 100% rename from doc/py_tutorials/py_video/py_lucas_kanade/images/opticalfb.jpg rename to doc/tutorials/video/optical_flow/images/opticalfb.jpg diff --git a/doc/py_tutorials/py_video/py_lucas_kanade/images/opticalflow_lk.jpg b/doc/tutorials/video/optical_flow/images/opticalflow_lk.jpg similarity index 100% rename from doc/py_tutorials/py_video/py_lucas_kanade/images/opticalflow_lk.jpg rename to doc/tutorials/video/optical_flow/images/opticalflow_lk.jpg diff --git a/doc/tutorials/video/optical_flow/optical_flow.markdown b/doc/tutorials/video/optical_flow/optical_flow.markdown new file mode 100644 index 0000000000..8b1d130238 --- /dev/null +++ b/doc/tutorials/video/optical_flow/optical_flow.markdown @@ -0,0 +1,156 @@ +Optical Flow {#tutorial_optical_flow} +============ + +Goal +---- + +In this chapter, + - We will understand the concepts of optical flow and its estimation using Lucas-Kanade + method. + - We will use functions like **cv.calcOpticalFlowPyrLK()** to track feature points in a + video. + - We will create a dense optical flow field using the **cv.calcOpticalFlowFarneback()** method. + +Optical Flow +------------ + +Optical flow is the pattern of apparent motion of image objects between two consecutive frames +caused by the movemement of object or camera. It is 2D vector field where each vector is a +displacement vector showing the movement of points from first frame to second. Consider the image +below (Image Courtesy: [Wikipedia article on Optical Flow](http://en.wikipedia.org/wiki/Optical_flow)). + +![image](images/optical_flow_basic1.jpg) + +It shows a ball moving in 5 consecutive frames. The arrow shows its displacement vector. Optical +flow has many applications in areas like : + +- Structure from Motion +- Video Compression +- Video Stabilization ... + +Optical flow works on several assumptions: + +-# The pixel intensities of an object do not change between consecutive frames. +2. Neighbouring pixels have similar motion. + +Consider a pixel \f$I(x,y,t)\f$ in first frame (Check a new dimension, time, is added here. Earlier we +were working with images only, so no need of time). It moves by distance \f$(dx,dy)\f$ in next frame +taken after \f$dt\f$ time. So since those pixels are the same and intensity does not change, we can say, + +\f[I(x,y,t) = I(x+dx, y+dy, t+dt)\f] + +Then take taylor series approximation of right-hand side, remove common terms and divide by \f$dt\f$ to +get the following equation: + +\f[f_x u + f_y v + f_t = 0 \;\f] + +where: + +\f[f_x = \frac{\partial f}{\partial x} \; ; \; f_y = \frac{\partial f}{\partial y}\f]\f[u = \frac{dx}{dt} \; ; \; v = \frac{dy}{dt}\f] + +Above equation is called Optical Flow equation. In it, we can find \f$f_x\f$ and \f$f_y\f$, they are image +gradients. Similarly \f$f_t\f$ is the gradient along time. But \f$(u,v)\f$ is unknown. We cannot solve this +one equation with two unknown variables. So several methods are provided to solve this problem and +one of them is Lucas-Kanade. + +### Lucas-Kanade method + +We have seen an assumption before, that all the neighbouring pixels will have similar motion. +Lucas-Kanade method takes a 3x3 patch around the point. So all the 9 points have the same motion. We +can find \f$(f_x, f_y, f_t)\f$ for these 9 points. So now our problem becomes solving 9 equations with +two unknown variables which is over-determined. A better solution is obtained with least square fit +method. Below is the final solution which is two equation-two unknown problem and solve to get the +solution. + +\f[\begin{bmatrix} u \\ v \end{bmatrix} = +\begin{bmatrix} + \sum_{i}{f_{x_i}}^2 & \sum_{i}{f_{x_i} f_{y_i} } \\ + \sum_{i}{f_{x_i} f_{y_i}} & \sum_{i}{f_{y_i}}^2 +\end{bmatrix}^{-1} +\begin{bmatrix} + - \sum_{i}{f_{x_i} f_{t_i}} \\ + - \sum_{i}{f_{y_i} f_{t_i}} +\end{bmatrix}\f] + +( Check similarity of inverse matrix with Harris corner detector. It denotes that corners are better +points to be tracked.) + +So from the user point of view, the idea is simple, we give some points to track, we receive the optical +flow vectors of those points. But again there are some problems. Until now, we were dealing with +small motions, so it fails when there is a large motion. To deal with this we use pyramids. When we go up in +the pyramid, small motions are removed and large motions become small motions. So by applying +Lucas-Kanade there, we get optical flow along with the scale. + +Lucas-Kanade Optical Flow in OpenCV +----------------------------------- + +OpenCV provides all these in a single function, **cv.calcOpticalFlowPyrLK()**. Here, we create a +simple application which tracks some points in a video. To decide the points, we use +**cv.goodFeaturesToTrack()**. We take the first frame, detect some Shi-Tomasi corner points in it, +then we iteratively track those points using Lucas-Kanade optical flow. For the function +**cv.calcOpticalFlowPyrLK()** we pass the previous frame, previous points and next frame. It +returns next points along with some status numbers which has a value of 1 if next point is found, +else zero. We iteratively pass these next points as previous points in next step. See the code +below: + +@add_toggle_cpp +- **Downloadable code**: Click + [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/video/optical_flow/optical_flow.cpp) + +- **Code at glance:** + @include samples/cpp/tutorial_code/video/optical_flow/optical_flow.cpp +@end_toggle + +@add_toggle_python +- **Downloadable code**: Click + [here](https://github.com/opencv/opencv/tree/master/samples/python/tutorial_code/video/optical_flow/optical_flow.py) + +- **Code at glance:** + @include samples/python/tutorial_code/video/optical_flow/optical_flow.py +@end_toggle + +(This code doesn't check how correct are the next keypoints. So even if any feature point disappears +in image, there is a chance that optical flow finds the next point which may look close to it. So +actually for a robust tracking, corner points should be detected in particular intervals. OpenCV +samples comes up with such a sample which finds the feature points at every 5 frames. It also run a +backward-check of the optical flow points got to select only good ones. Check +samples/python/lk_track.py). + +See the results we got: + +![image](images/opticalflow_lk.jpg) + +Dense Optical Flow in OpenCV +---------------------------- + +Lucas-Kanade method computes optical flow for a sparse feature set (in our example, corners detected +using Shi-Tomasi algorithm). OpenCV provides another algorithm to find the dense optical flow. It +computes the optical flow for all the points in the frame. It is based on Gunner Farneback's +algorithm which is explained in "Two-Frame Motion Estimation Based on Polynomial Expansion" by +Gunner Farneback in 2003. + +Below sample shows how to find the dense optical flow using above algorithm. We get a 2-channel +array with optical flow vectors, \f$(u,v)\f$. We find their magnitude and direction. We color code the +result for better visualization. Direction corresponds to Hue value of the image. Magnitude +corresponds to Value plane. See the code below: + +@add_toggle_cpp +- **Downloadable code**: Click + [here](https://github.com/opencv/opencv/tree/master/samples/cpp/tutorial_code/video/optical_flow/optical_flow_dense.cpp) + +- **Code at glance:** + @include samples/cpp/tutorial_code/video/optical_flow/optical_flow_dense.cpp +@end_toggle + +@add_toggle_python +- **Downloadable code**: Click + [here](https://github.com/opencv/opencv/tree/master/samples/python/tutorial_code/video/optical_flow/optical_flow_dense.py) + +- **Code at glance:** + @include samples/python/tutorial_code/video/optical_flow/optical_flow_dense.py +@end_toggle + + +See the result below: + +![image](images/opticalfb.jpg) diff --git a/doc/tutorials/video/table_of_content_video.markdown b/doc/tutorials/video/table_of_content_video.markdown index 2e30d2cc8b..92a5315355 100644 --- a/doc/tutorials/video/table_of_content_video.markdown +++ b/doc/tutorials/video/table_of_content_video.markdown @@ -14,3 +14,15 @@ tracking and foreground extractions. We will learn how to extract foreground masks from both videos and sequences of images and to show them. + +- @subpage tutorial_meanshift + + *Languages:* C++, Python + + Learn how to use the Meanshift and Camshift algorithms to track objects in videos. + +- @subpage tutorial_optical_flow + + *Languages:* C++, Python + + We will learn how to use optical flow methods to track sparse features or to create a dense representation. diff --git a/modules/calib3d/src/levmarq.cpp b/modules/calib3d/src/levmarq.cpp index 0d339ccf79..623cc202b9 100644 --- a/modules/calib3d/src/levmarq.cpp +++ b/modules/calib3d/src/levmarq.cpp @@ -80,11 +80,8 @@ namespace cv class LMSolverImpl CV_FINAL : public LMSolver { public: - LMSolverImpl() : maxIters(100) { init(); } - LMSolverImpl(const Ptr& _cb, int _maxIters) : cb(_cb), epsx(FLT_EPSILON), epsf(FLT_EPSILON), maxIters(_maxIters) { init(); } - LMSolverImpl(const Ptr& _cb, int _maxIters, double _eps) : cb(_cb), epsx(_eps), epsf(_eps), maxIters(_maxIters) { init(); } - - void init() + LMSolverImpl(const Ptr& _cb, int _maxIters, double _eps = FLT_EPSILON) + : cb(_cb), epsx(_eps), epsf(_eps), maxIters(_maxIters) { printInterval = 0; } diff --git a/modules/calib3d/src/stereobm.cpp b/modules/calib3d/src/stereobm.cpp index 166578deea..92a0238213 100644 --- a/modules/calib3d/src/stereobm.cpp +++ b/modules/calib3d/src/stereobm.cpp @@ -1130,7 +1130,7 @@ public: CV_Error( Error::StsOutOfRange, "SADWindowSize must be odd, be within 5..255 and be not larger than image width or height" ); if( params.numDisparities <= 0 || params.numDisparities % 16 != 0 ) - CV_Error( Error::StsOutOfRange, "numDisparities must be positive and divisble by 16" ); + CV_Error( Error::StsOutOfRange, "numDisparities must be positive and divisible by 16" ); if( params.textureThreshold < 0 ) CV_Error( Error::StsOutOfRange, "texture threshold must be non-negative" ); diff --git a/modules/core/include/opencv2/core/hal/intrin_avx.hpp b/modules/core/include/opencv2/core/hal/intrin_avx.hpp index e98524b9de..91e4483444 100644 --- a/modules/core/include/opencv2/core/hal/intrin_avx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_avx.hpp @@ -1015,6 +1015,34 @@ OPENCV_HAL_IMPL_AVX_ROTATE_CAST(v_rotate_right, v_float64x4, _mm256_castsi256_pd ////////// Reduce and mask ///////// /** Reduce **/ +inline unsigned v_reduce_sum(const v_uint8x32& a) +{ + __m256i half = _mm256_sad_epu8(a.val, _mm256_setzero_si256()); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))); +} +inline int v_reduce_sum(const v_int8x32& a) +{ + __m256i half = _mm256_sad_epu8(_mm256_xor_si256(a.val, _mm256_set1_epi8((schar)-128)), _mm256_setzero_si256()); + __m128i quarter = _mm_add_epi32(_v256_extract_low(half), _v256_extract_high(half)); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(quarter, _mm_unpackhi_epi64(quarter, quarter))) - 4096; +} +#define OPENCV_HAL_IMPL_AVX_REDUCE_32(_Tpvec, sctype, func, intrin) \ + inline sctype v_reduce_##func(const _Tpvec& a) \ + { \ + __m128i val = intrin(_v256_extract_low(a.val), _v256_extract_high(a.val)); \ + val = intrin(val, _mm_srli_si128(val,8)); \ + val = intrin(val, _mm_srli_si128(val,4)); \ + val = intrin(val, _mm_srli_si128(val,2)); \ + val = intrin(val, _mm_srli_si128(val,1)); \ + return (sctype)_mm_cvtsi128_si32(val); \ + } + +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_uint8x32, uchar, min, _mm_min_epu8) +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_int8x32, schar, min, _mm_min_epi8) +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_uint8x32, uchar, max, _mm_max_epu8) +OPENCV_HAL_IMPL_AVX_REDUCE_32(v_int8x32, schar, max, _mm_max_epi8) + #define OPENCV_HAL_IMPL_AVX_REDUCE_16(_Tpvec, sctype, func, intrin) \ inline sctype v_reduce_##func(const _Tpvec& a) \ { \ @@ -1062,31 +1090,6 @@ OPENCV_HAL_IMPL_AVX_REDUCE_8(v_int32x8, int, max, _mm_max_epi32) OPENCV_HAL_IMPL_AVX_REDUCE_FLT(min, _mm_min_ps) OPENCV_HAL_IMPL_AVX_REDUCE_FLT(max, _mm_max_ps) -inline ushort v_reduce_sum(const v_uint16x16& a) -{ - __m128i a0 = _v256_extract_low(a.val); - __m128i a1 = _v256_extract_high(a.val); - - __m128i s0 = _mm_adds_epu16(a0, a1); - s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 8)); - s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 4)); - s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 2)); - - return (ushort)_mm_cvtsi128_si32(s0); -} - -inline short v_reduce_sum(const v_int16x16& a) -{ - __m256i s0 = _mm256_hadds_epi16(a.val, a.val); - s0 = _mm256_hadds_epi16(s0, s0); - s0 = _mm256_hadds_epi16(s0, s0); - - __m128i s1 = _v256_extract_high(s0); - s1 = _mm_adds_epi16(_v256_extract_low(s0), s1); - - return (short)_mm_cvtsi128_si32(s1); -} - inline int v_reduce_sum(const v_int32x8& a) { __m256i s0 = _mm256_hadd_epi32(a.val, a.val); @@ -1101,6 +1104,11 @@ inline int v_reduce_sum(const v_int32x8& a) inline unsigned v_reduce_sum(const v_uint32x8& a) { return v_reduce_sum(v_reinterpret_as_s32(a)); } +inline int v_reduce_sum(const v_int16x16& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } +inline unsigned v_reduce_sum(const v_uint16x16& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } + inline float v_reduce_sum(const v_float32x8& a) { __m256 s0 = _mm256_hadd_ps(a.val, a.val); @@ -1112,6 +1120,18 @@ inline float v_reduce_sum(const v_float32x8& a) return _mm_cvtss_f32(s1); } +inline uint64 v_reduce_sum(const v_uint64x4& a) +{ + uint64 CV_DECL_ALIGNED(32) idx[2]; + _mm_store_si128((__m128i*)idx, _mm_add_epi64(_v256_extract_low(a.val), _v256_extract_high(a.val))); + return idx[0] + idx[1]; +} +inline int64 v_reduce_sum(const v_int64x4& a) +{ + int64 CV_DECL_ALIGNED(32) idx[2]; + _mm_store_si128((__m128i*)idx, _mm_add_epi64(_v256_extract_low(a.val), _v256_extract_high(a.val))); + return idx[0] + idx[1]; +} inline double v_reduce_sum(const v_float64x4& a) { __m256d s0 = _mm256_hadd_pd(a.val, a.val); @@ -1166,26 +1186,39 @@ inline float v_reduce_sad(const v_float32x8& a, const v_float32x8& b) } /** Popcount **/ -#define OPENCV_HAL_IMPL_AVX_POPCOUNT(_Tpvec) \ - inline v_uint32x8 v_popcount(const _Tpvec& a) \ - { \ - const v_uint32x8 m1 = v256_setall_u32(0x55555555); \ - const v_uint32x8 m2 = v256_setall_u32(0x33333333); \ - const v_uint32x8 m4 = v256_setall_u32(0x0f0f0f0f); \ - v_uint32x8 p = v_reinterpret_as_u32(a); \ - p = ((p >> 1) & m1) + (p & m1); \ - p = ((p >> 2) & m2) + (p & m2); \ - p = ((p >> 4) & m4) + (p & m4); \ - p.val = _mm256_sad_epu8(p.val, _mm256_setzero_si256()); \ - return p; \ - } - -OPENCV_HAL_IMPL_AVX_POPCOUNT(v_uint8x32) -OPENCV_HAL_IMPL_AVX_POPCOUNT(v_int8x32) -OPENCV_HAL_IMPL_AVX_POPCOUNT(v_uint16x16) -OPENCV_HAL_IMPL_AVX_POPCOUNT(v_int16x16) -OPENCV_HAL_IMPL_AVX_POPCOUNT(v_uint32x8) -OPENCV_HAL_IMPL_AVX_POPCOUNT(v_int32x8) +inline v_uint8x32 v_popcount(const v_uint8x32& a) +{ + __m256i _popcnt_table = _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4); + __m256i _popcnt_mask = _mm256_set1_epi8(0x0F); + return v_uint8x32(_mm256_add_epi8(_mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256( a.val , _popcnt_mask)), + _mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256(_mm256_srli_epi16(a.val, 4), _popcnt_mask)))); +} +inline v_uint16x16 v_popcount(const v_uint16x16& a) +{ + v_uint8x32 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v256_setall_u16(0x00ff); +} +inline v_uint32x8 v_popcount(const v_uint32x8& a) +{ + v_uint8x32 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v256_setall_u32(0x000000ff); +} +inline v_uint64x4 v_popcount(const v_uint64x4& a) +{ + return v_uint64x4(_mm256_sad_epu8(v_popcount(v_reinterpret_as_u8(a)).val, _mm256_setzero_si256())); +} +inline v_uint8x32 v_popcount(const v_int8x32& a) +{ return v_popcount(v_reinterpret_as_u8(a)); } +inline v_uint16x16 v_popcount(const v_int16x16& a) +{ return v_popcount(v_reinterpret_as_u16(a)); } +inline v_uint32x8 v_popcount(const v_int32x8& a) +{ return v_popcount(v_reinterpret_as_u32(a)); } +inline v_uint64x4 v_popcount(const v_int64x4& a) +{ return v_popcount(v_reinterpret_as_u64(a)); } /** Mask **/ inline int v_signmask(const v_int8x32& a) diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index 757c67b314..a1b914f37f 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -603,27 +603,20 @@ static const unsigned char popCountTable[] = 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, }; -/** @brief Count the 1 bits in the vector and return 4 values +/** @brief Count the 1 bits in the vector lanes and return result as corresponding unsigned type Scheme: @code -{A1 A2 A3 ...} => popcount(A1) +{A1 A2 A3 ...} => {popcount(A1), popcount(A2), popcount(A3), ...} @endcode -Any types but result will be in v_uint32x4*/ -template inline v_uint32x4 v_popcount(const v_reg<_Tp, n>& a) +For all integer types. */ +template +inline v_reg::abs_type, n> v_popcount(const v_reg<_Tp, n>& a) { - v_uint8x16 b; - b = v_reinterpret_as_u8(a); - for( int i = 0; i < v_uint8x16::nlanes; i++ ) - { - b.s[i] = popCountTable[b.s[i]]; - } - v_uint32x4 c; - for( int i = 0; i < v_uint32x4::nlanes; i++ ) - { - c.s[i] = b.s[i*4] + b.s[i*4+1] + b.s[i*4+2] + b.s[i*4+3]; - } - return c; + v_reg::abs_type, n> b = v_reg::abs_type, n>::zero(); + for (int i = 0; i < (int)(n*sizeof(_Tp)); i++) + b.s[i/sizeof(_Tp)] += popCountTable[v_reinterpret_as_u8(a).s[i]]; + return b; } diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index c6da1b42d9..468872a677 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -910,6 +910,31 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32) OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64) #endif +inline unsigned v_reduce_sum(const v_uint8x16& a) +{ + uint32x4_t t0 = vpaddlq_u16(vpaddlq_u8(a.val)); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline int v_reduce_sum(const v_int8x16& a) +{ + int32x4_t t0 = vpaddlq_s16(vpaddlq_s8(a.val)); + int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0)); + return vget_lane_s32(vpadd_s32(t1, t1), 0); +} +inline unsigned v_reduce_sum(const v_uint16x8& a) +{ + uint32x4_t t0 = vpaddlq_u16(a.val); + uint32x2_t t1 = vpadd_u32(vget_low_u32(t0), vget_high_u32(t0)); + return vget_lane_u32(vpadd_u32(t1, t1), 0); +} +inline int v_reduce_sum(const v_int16x8& a) +{ + int32x4_t t0 = vpaddlq_s16(a.val); + int32x2_t t1 = vpadd_s32(vget_low_s32(t0), vget_high_s32(t0)); + return vget_lane_s32(vpadd_s32(t1, t1), 0); +} + #define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ inline scalartype v_reduce_##func(const _Tpvec& a) \ { \ @@ -918,12 +943,10 @@ inline scalartype v_reduce_##func(const _Tpvec& a) \ return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \ } -OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, sum, add, u16) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, max, max, u16) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, min, min, u16) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, sum, add, s16) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16) -OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned int, max, max, u16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned int, min, min, u16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, int, max, max, s16) +OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, int, min, min, s16) #define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ inline scalartype v_reduce_##func(const _Tpvec& a) \ @@ -942,6 +965,10 @@ OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32) OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32) OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32) +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ return vget_lane_u64(vadd_u64(vget_low_u64(a.val), vget_high_u64(a.val)),0); } +inline int64 v_reduce_sum(const v_int64x2& a) +{ return vget_lane_s64(vadd_s64(vget_low_s64(a.val), vget_high_s64(a.val)),0); } #if CV_SIMD128_64F inline double v_reduce_sum(const v_float64x2& a) { @@ -1007,21 +1034,22 @@ inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) return vget_lane_f32(vpadd_f32(t1, t1), 0); } -#define OPENCV_HAL_IMPL_NEON_POPCOUNT(_Tpvec, cast) \ -inline v_uint32x4 v_popcount(const _Tpvec& a) \ -{ \ - uint8x16_t t = vcntq_u8(cast(a.val)); \ - uint16x8_t t0 = vpaddlq_u8(t); /* 16 -> 8 */ \ - uint32x4_t t1 = vpaddlq_u16(t0); /* 8 -> 4 */ \ - return v_uint32x4(t1); \ -} - -OPENCV_HAL_IMPL_NEON_POPCOUNT(v_uint8x16, OPENCV_HAL_NOP) -OPENCV_HAL_IMPL_NEON_POPCOUNT(v_uint16x8, vreinterpretq_u8_u16) -OPENCV_HAL_IMPL_NEON_POPCOUNT(v_uint32x4, vreinterpretq_u8_u32) -OPENCV_HAL_IMPL_NEON_POPCOUNT(v_int8x16, vreinterpretq_u8_s8) -OPENCV_HAL_IMPL_NEON_POPCOUNT(v_int16x8, vreinterpretq_u8_s16) -OPENCV_HAL_IMPL_NEON_POPCOUNT(v_int32x4, vreinterpretq_u8_s32) +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ return v_uint8x16(vcntq_u8(a.val)); } +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_uint8x16(vcntq_u8(vreinterpretq_u8_s8(a.val))); } +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ return v_uint16x8(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u16(a.val)))); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_uint16x8(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s16(a.val)))); } +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ return v_uint32x4(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u32(a.val))))); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_uint32x4(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s32(a.val))))); } +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ return v_uint64x2(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(a.val)))))); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_uint64x2(vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vcntq_u8(vreinterpretq_u8_s64(a.val)))))); } inline int v_signmask(const v_uint8x16& a) { diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index 6ab360e0b7..7b7e97c561 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -302,8 +302,8 @@ inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(_mm_set1_##ssuffix((_Tps) template inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0& a) \ { return _Tpvec(cast(a.val)); } -OPENCV_HAL_IMPL_SSE_INITVEC(v_uint8x16, uchar, u8, si128, epi8, char, OPENCV_HAL_NOP) -OPENCV_HAL_IMPL_SSE_INITVEC(v_int8x16, schar, s8, si128, epi8, char, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_uint8x16, uchar, u8, si128, epi8, schar, OPENCV_HAL_NOP) +OPENCV_HAL_IMPL_SSE_INITVEC(v_int8x16, schar, s8, si128, epi8, schar, OPENCV_HAL_NOP) OPENCV_HAL_IMPL_SSE_INITVEC(v_uint16x8, ushort, u16, si128, epi16, short, OPENCV_HAL_NOP) OPENCV_HAL_IMPL_SSE_INITVEC(v_int16x8, short, s16, si128, epi16, short, OPENCV_HAL_NOP) OPENCV_HAL_IMPL_SSE_INITVEC(v_uint32x4, unsigned, u32, si128, epi32, int, OPENCV_HAL_NOP) @@ -1393,6 +1393,41 @@ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float32x4, float, ps) OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float64x2, double, pd) +inline unsigned v_reduce_sum(const v_uint8x16& a) +{ + __m128i half = _mm_sad_epu8(a.val, _mm_setzero_si128()); + return (unsigned)_mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))); +} +inline int v_reduce_sum(const v_int8x16& a) +{ + __m128i half = _mm_set1_epi8((schar)-128); + half = _mm_sad_epu8(_mm_xor_si128(a.val, half), _mm_setzero_si128()); + return _mm_cvtsi128_si32(_mm_add_epi32(half, _mm_unpackhi_epi64(half, half))) - 2048; +} +#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(func) \ +inline schar v_reduce_##func(const v_int8x16& a) \ +{ \ + __m128i val = a.val; \ + __m128i smask = _mm_set1_epi8((schar)-128); \ + val = _mm_xor_si128(val, smask); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,2)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,1)); \ + return (schar)_mm_cvtsi128_si32(val) ^ (schar)-128; \ +} \ +inline uchar v_reduce_##func(const v_uint8x16& a) \ +{ \ + __m128i val = a.val; \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,8)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,4)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,2)); \ + val = _mm_##func##_epu8(val, _mm_srli_si128(val,1)); \ + return (uchar)_mm_cvtsi128_si32(val); \ +} +OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(max) +OPENCV_HAL_IMPL_SSE_REDUCE_OP_16(min) + #define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(_Tpvec, scalartype, func, suffix, sbit) \ inline scalartype v_reduce_##func(const v_##_Tpvec& a) \ { \ @@ -1412,26 +1447,8 @@ inline unsigned scalartype v_reduce_##func(const v_u##_Tpvec& a) \ val = _mm_##func##_##suffix(val, _mm_srli_si128(val,2)); \ return (unsigned scalartype)(_mm_cvtsi128_si32(val) ^ sbit); \ } -#define OPENCV_HAL_IMPL_SSE_REDUCE_OP_8_SUM(_Tpvec, scalartype, suffix) \ -inline scalartype v_reduce_sum(const v_##_Tpvec& a) \ -{ \ - __m128i val = a.val; \ - val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 8)); \ - val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 4)); \ - val = _mm_adds_epi##suffix(val, _mm_srli_si128(val, 2)); \ - return (scalartype)_mm_cvtsi128_si32(val); \ -} \ -inline unsigned scalartype v_reduce_sum(const v_u##_Tpvec& a) \ -{ \ - __m128i val = a.val; \ - val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 8)); \ - val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 4)); \ - val = _mm_adds_epu##suffix(val, _mm_srli_si128(val, 2)); \ - return (unsigned scalartype)_mm_cvtsi128_si32(val); \ -} OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, max, epi16, (short)-32768) OPENCV_HAL_IMPL_SSE_REDUCE_OP_8(int16x8, short, min, epi16, (short)-32768) -OPENCV_HAL_IMPL_SSE_REDUCE_OP_8_SUM(int16x8, short, 16) #define OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(_Tpvec, scalartype, regtype, suffix, cast_from, cast_to, extract) \ inline scalartype v_reduce_sum(const _Tpvec& a) \ @@ -1456,6 +1473,23 @@ OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_uint32x4, unsigned, __m128i, epi32, OPENCV OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_int32x4, int, __m128i, epi32, OPENCV_HAL_NOP, OPENCV_HAL_NOP, si128_si32) OPENCV_HAL_IMPL_SSE_REDUCE_OP_4_SUM(v_float32x4, float, __m128, ps, _mm_castps_si128, _mm_castsi128_ps, ss_f32) +inline int v_reduce_sum(const v_int16x8& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } +inline unsigned v_reduce_sum(const v_uint16x8& a) +{ return v_reduce_sum(v_expand_low(a) + v_expand_high(a)); } + +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ + uint64 CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return idx[0] + idx[1]; +} +inline int64 v_reduce_sum(const v_int64x2& a) +{ + int64 CV_DECL_ALIGNED(32) idx[2]; + v_store_aligned(idx, a); + return idx[0] + idx[1]; +} inline double v_reduce_sum(const v_float64x2& a) { double CV_DECL_ALIGNED(32) idx[2]; @@ -1520,27 +1554,42 @@ inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) return v_reduce_sum(v_absdiff(a, b)); } -#define OPENCV_HAL_IMPL_SSE_POPCOUNT(_Tpvec) \ -inline v_uint32x4 v_popcount(const _Tpvec& a) \ -{ \ - __m128i m1 = _mm_set1_epi32(0x55555555); \ - __m128i m2 = _mm_set1_epi32(0x33333333); \ - __m128i m4 = _mm_set1_epi32(0x0f0f0f0f); \ - __m128i p = a.val; \ - p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 1), m1), _mm_and_si128(p, m1)); \ - p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 2), m2), _mm_and_si128(p, m2)); \ - p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 4), m4), _mm_and_si128(p, m4)); \ - p = _mm_adds_epi8(p, _mm_srli_si128(p, 1)); \ - p = _mm_adds_epi8(p, _mm_srli_si128(p, 2)); \ - return v_uint32x4(_mm_and_si128(p, _mm_set1_epi32(0x000000ff))); \ +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ + __m128i m1 = _mm_set1_epi32(0x55555555); + __m128i m2 = _mm_set1_epi32(0x33333333); + __m128i m4 = _mm_set1_epi32(0x0f0f0f0f); + __m128i p = a.val; + p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 1), m1), _mm_and_si128(p, m1)); + p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 2), m2), _mm_and_si128(p, m2)); + p = _mm_add_epi32(_mm_and_si128(_mm_srli_epi32(p, 4), m4), _mm_and_si128(p, m4)); + return v_uint8x16(p); } - -OPENCV_HAL_IMPL_SSE_POPCOUNT(v_uint8x16) -OPENCV_HAL_IMPL_SSE_POPCOUNT(v_uint16x8) -OPENCV_HAL_IMPL_SSE_POPCOUNT(v_uint32x4) -OPENCV_HAL_IMPL_SSE_POPCOUNT(v_int8x16) -OPENCV_HAL_IMPL_SSE_POPCOUNT(v_int16x8) -OPENCV_HAL_IMPL_SSE_POPCOUNT(v_int32x4) +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + return v_reinterpret_as_u16(p) & v_setall_u16(0x00ff); +} +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ + v_uint8x16 p = v_popcount(v_reinterpret_as_u8(a)); + p += v_rotate_right<1>(p); + p += v_rotate_right<2>(p); + return v_reinterpret_as_u32(p) & v_setall_u32(0x000000ff); +} +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ + return v_uint64x2(_mm_sad_epu8(v_popcount(v_reinterpret_as_u8(a)).val, _mm_setzero_si128())); +} +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_popcount(v_reinterpret_as_u8(a)); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_popcount(v_reinterpret_as_u16(a)); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_popcount(v_reinterpret_as_u32(a)); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_popcount(v_reinterpret_as_u64(a)); } #define OPENCV_HAL_IMPL_SSE_CHECK_SIGNS(_Tpvec, suffix, pack_op, and_op, signmask, allmask) \ inline int v_signmask(const _Tpvec& a) \ diff --git a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp index 390977b55e..1a118ae270 100644 --- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp @@ -692,15 +692,27 @@ inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) ////////// Reduce and mask ///////// /** Reduce **/ -inline short v_reduce_sum(const v_int16x8& a) +inline uint v_reduce_sum(const v_uint8x16& a) +{ + const vec_uint4 zero4 = vec_uint4_z; + vec_uint4 sum4 = vec_sum4s(a.val, zero4); + return (uint)vec_extract(vec_sums(vec_int4_c(sum4), vec_int4_c(zero4)), 3); +} +inline int v_reduce_sum(const v_int8x16& a) +{ + const vec_int4 zero4 = vec_int4_z; + vec_int4 sum4 = vec_sum4s(a.val, zero4); + return (int)vec_extract(vec_sums(sum4, zero4), 3); +} +inline int v_reduce_sum(const v_int16x8& a) { const vec_int4 zero = vec_int4_z; - return saturate_cast(vec_extract(vec_sums(vec_sum4s(a.val, zero), zero), 3)); + return saturate_cast(vec_extract(vec_sums(vec_sum4s(a.val, zero), zero), 3)); } -inline ushort v_reduce_sum(const v_uint16x8& a) +inline uint v_reduce_sum(const v_uint16x8& a) { const vec_int4 v4 = vec_int4_c(vec_unpackhu(vec_adds(a.val, vec_sld(a.val, a.val, 8)))); - return saturate_cast(vec_extract(vec_sums(v4, vec_int4_z), 3)); + return saturate_cast(vec_extract(vec_sums(v4, vec_int4_z), 3)); } #define OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(_Tpvec, _Tpvec2, scalartype, suffix, func) \ @@ -719,6 +731,14 @@ OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add) OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max) OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min) +inline uint64 v_reduce_sum(const v_uint64x2& a) +{ + return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0); +} +inline int64 v_reduce_sum(const v_int64x2& a) +{ + return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0); +} inline double v_reduce_sum(const v_float64x2& a) { return vec_extract(vec_add(a.val, vec_permi(a.val, a.val, 3)), 0); @@ -736,6 +756,19 @@ OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, min, vec_min) OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, max, vec_max) OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, min, vec_min) +#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_16(_Tpvec, _Tpvec2, scalartype, suffix, func) \ +inline scalartype v_reduce_##suffix(const _Tpvec& a) \ +{ \ + _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \ + rs = func(rs, vec_sld(rs, rs, 4)); \ + rs = func(rs, vec_sld(rs, rs, 2)); \ + return vec_extract(func(rs, vec_sld(rs, rs, 1)), 0); \ +} +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint8x16, vec_uchar16, uchar, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint8x16, vec_uchar16, uchar, min, vec_min) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int8x16, vec_char16, schar, max, vec_max) +OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int8x16, vec_char16, schar, min, vec_min) + inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c, const v_float32x4& d) { @@ -792,9 +825,22 @@ inline float v_reduce_sad(const v_float32x4& a, const v_float32x4& b) } /** Popcount **/ -template -inline v_uint32x4 v_popcount(const _Tpvec& a) -{ return v_uint32x4(vec_popcntu(vec_uint4_c(a.val))); } +inline v_uint8x16 v_popcount(const v_uint8x16& a) +{ return v_uint8x16(vec_popcntu(a.val)); } +inline v_uint8x16 v_popcount(const v_int8x16& a) +{ return v_uint8x16(vec_popcntu(a.val)); } +inline v_uint16x8 v_popcount(const v_uint16x8& a) +{ return v_uint16x8(vec_popcntu(a.val)); } +inline v_uint16x8 v_popcount(const v_int16x8& a) +{ return v_uint16x8(vec_popcntu(a.val)); } +inline v_uint32x4 v_popcount(const v_uint32x4& a) +{ return v_uint32x4(vec_popcntu(a.val)); } +inline v_uint32x4 v_popcount(const v_int32x4& a) +{ return v_uint32x4(vec_popcntu(a.val)); } +inline v_uint64x2 v_popcount(const v_uint64x2& a) +{ return v_uint64x2(vec_popcntu(a.val)); } +inline v_uint64x2 v_popcount(const v_int64x2& a) +{ return v_uint64x2(vec_popcntu(a.val)); } /** Mask **/ inline int v_signmask(const v_uint8x16& a) diff --git a/modules/core/src/stat.simd.hpp b/modules/core/src/stat.simd.hpp index b75100d3f4..34b784e12e 100644 --- a/modules/core/src/stat.simd.hpp +++ b/modules/core/src/stat.simd.hpp @@ -32,28 +32,15 @@ int normHamming(const uchar* a, int n) int i = 0; int result = 0; -#if CV_AVX2 + +#if CV_SIMD && CV_SIMD_WIDTH > 16 { - __m256i _r0 = _mm256_setzero_si256(); - __m256i _0 = _mm256_setzero_si256(); - __m256i _popcnt_table = _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4); - __m256i _popcnt_mask = _mm256_set1_epi8(0x0F); - - for(; i <= n - 32; i+= 32) - { - __m256i _a0 = _mm256_loadu_si256((const __m256i*)(a + i)); - - __m256i _popc0 = _mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256(_a0, _popcnt_mask)); - __m256i _popc1 = _mm256_shuffle_epi8(_popcnt_table, - _mm256_and_si256(_mm256_srli_epi16(_a0, 4), _popcnt_mask)); - - _r0 = _mm256_add_epi32(_r0, _mm256_sad_epu8(_0, _mm256_add_epi8(_popc0, _popc1))); - } - _r0 = _mm256_add_epi32(_r0, _mm256_shuffle_epi32(_r0, 2)); - result = _mm256_extract_epi32_(_mm256_add_epi32(_r0, _mm256_permute2x128_si256(_r0, _r0, 1)), 0); + v_uint64 t = vx_setzero_u64(); + for (; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) + t += v_popcount(v_reinterpret_as_u64(vx_load(a + i))); + result = (int)v_reduce_sum(t); } -#endif // CV_AVX2 +#endif #if CV_POPCNT { @@ -68,18 +55,14 @@ int normHamming(const uchar* a, int n) result += CV_POPCNT_U32(*(uint*)(a + i)); } } -#endif // CV_POPCNT - -#if CV_SIMD128 +#elif CV_SIMD { - v_uint32x4 t = v_setzero_u32(); + v_uint64x2 t = v_setzero_u64(); for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes) - { - t += v_popcount(v_load(a + i)); - } - result += v_reduce_sum(t); + t += v_popcount(v_reinterpret_as_u64(v_load(a + i))); + result += (int)v_reduce_sum(t); } -#endif // CV_SIMD128 +#endif #if CV_ENABLE_UNROLLED for(; i <= n - 4; i += 4) { @@ -100,31 +83,15 @@ int normHamming(const uchar* a, const uchar* b, int n) int i = 0; int result = 0; -#if CV_AVX2 + +#if CV_SIMD && CV_SIMD_WIDTH > 16 { - __m256i _r0 = _mm256_setzero_si256(); - __m256i _0 = _mm256_setzero_si256(); - __m256i _popcnt_table = _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4); - __m256i _popcnt_mask = _mm256_set1_epi8(0x0F); - - for(; i <= n - 32; i+= 32) - { - __m256i _a0 = _mm256_loadu_si256((const __m256i*)(a + i)); - __m256i _b0 = _mm256_loadu_si256((const __m256i*)(b + i)); - - __m256i _xor = _mm256_xor_si256(_a0, _b0); - - __m256i _popc0 = _mm256_shuffle_epi8(_popcnt_table, _mm256_and_si256(_xor, _popcnt_mask)); - __m256i _popc1 = _mm256_shuffle_epi8(_popcnt_table, - _mm256_and_si256(_mm256_srli_epi16(_xor, 4), _popcnt_mask)); - - _r0 = _mm256_add_epi32(_r0, _mm256_sad_epu8(_0, _mm256_add_epi8(_popc0, _popc1))); - } - _r0 = _mm256_add_epi32(_r0, _mm256_shuffle_epi32(_r0, 2)); - result = _mm256_extract_epi32_(_mm256_add_epi32(_r0, _mm256_permute2x128_si256(_r0, _r0, 1)), 0); + v_uint64 t = vx_setzero_u64(); + for (; i <= n - v_uint8::nlanes; i += v_uint8::nlanes) + t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i))); + result += (int)v_reduce_sum(t); } -#endif // CV_AVX2 +#endif #if CV_POPCNT { @@ -139,18 +106,14 @@ int normHamming(const uchar* a, const uchar* b, int n) result += CV_POPCNT_U32(*(uint*)(a + i) ^ *(uint*)(b + i)); } } -#endif // CV_POPCNT - -#if CV_SIMD128 +#elif CV_SIMD { - v_uint32x4 t = v_setzero_u32(); + v_uint64x2 t = v_setzero_u64(); for(; i <= n - v_uint8x16::nlanes; i += v_uint8x16::nlanes) - { - t += v_popcount(v_load(a + i) ^ v_load(b + i)); - } - result += v_reduce_sum(t); + t += v_popcount(v_reinterpret_as_u64(vx_load(a + i) ^ vx_load(b + i))); + result += (int)v_reduce_sum(t); } -#endif // CV_SIMD128 +#endif #if CV_ENABLE_UNROLLED for(; i <= n - 4; i += 4) { diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 1be4a54959..a10621366c 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -1195,7 +1195,7 @@ CV_IMPL const char* cvErrorStr( int status ) case CV_BadDepth : return "Input image depth is not supported by function"; case CV_StsUnmatchedFormats : return "Formats of input arguments do not match"; case CV_StsUnmatchedSizes : return "Sizes of input arguments do not match"; - case CV_StsOutOfRange : return "One of arguments\' values is out of range"; + case CV_StsOutOfRange : return "One of the arguments\' values is out of range"; case CV_StsUnsupportedFormat : return "Unsupported format or combination of formats"; case CV_BadCOI : return "Input COI is not supported"; case CV_BadNumChannels : return "Bad number of channels"; diff --git a/modules/core/test/test_intrin_utils.hpp b/modules/core/test/test_intrin_utils.hpp index 3b85d68dea..6ead0ecc60 100644 --- a/modules/core/test/test_intrin_utils.hpp +++ b/modules/core/test/test_intrin_utils.hpp @@ -686,18 +686,24 @@ template struct TheTest TheTest & test_popcount() { + typedef typename V_RegTraits::u_reg Ru; static unsigned popcountTable[] = { - 0, 1, 2, 4, 5, 7, 9, 12, 13, 15, 17, 20, 22, 25, 28, 32, 33, - 35, 37, 40, 42, 45, 48, 52, 54, 57, 60, 64, 67, 71, 75, 80, 81, - 83, 85, 88, 90, 93, 96, 100, 102, 105, 108, 112, 115, 119, 123, - 128, 130, 133, 136, 140, 143, 147, 151, 156, 159, 163, 167, 172, - 176, 181, 186, 192, 193 + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, //0x00-0x0f + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x10-0x1f + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x20-0x2f + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x30-0x3f + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, //0x40-0x4f + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x50-0x5f + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, //0x60-0x6f + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, //0x70-0x7f + 1 //0x80 }; Data dataA; R a = dataA; - unsigned resB = (unsigned)v_reduce_sum(v_popcount(a)); - EXPECT_EQ(popcountTable[R::nlanes], resB); + Data resB = v_popcount(a); + for (int i = 0; i < Ru::nlanes; ++i) + EXPECT_EQ(popcountTable[i + 1], resB[i]); return *this; } diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 8adaf1c57f..0566ed5a21 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -2794,9 +2794,6 @@ AsyncMat Net::forwardAsync(const String& outputName) { CV_TRACE_FUNCTION(); #ifdef CV_CXX11 - if (impl->preferableBackend != DNN_BACKEND_INFERENCE_ENGINE) - CV_Error(Error::StsNotImplemented, "Asynchronous forward for backend which is different from DNN_BACKEND_INFERENCE_ENGINE"); - String layerName = outputName; if (layerName.empty()) @@ -2805,6 +2802,9 @@ AsyncMat Net::forwardAsync(const String& outputName) std::vector pins(1, impl->getPinByAlias(layerName)); impl->setUpNet(pins); + if (impl->preferableBackend != DNN_BACKEND_INFERENCE_ENGINE) + CV_Error(Error::StsNotImplemented, "Asynchronous forward for backend which is different from DNN_BACKEND_INFERENCE_ENGINE"); + impl->isAsync = true; impl->forwardToLayer(impl->getLayerData(layerName)); impl->isAsync = false; diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 6572c9ba4e..421ef0f540 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -312,15 +312,13 @@ public: { std::vector inputs; std::vector outputs; + outs.getUMatVector(outputs); bool use_half = (inps.depth() == CV_16S); if (use_half) { std::vector orig_inputs; - std::vector orig_outputs; - inps.getUMatVector(orig_inputs); - outs.getUMatVector(orig_outputs); inputs.resize(orig_inputs.size()); for (size_t i = 0; i < orig_inputs.size(); i++) @@ -329,7 +327,6 @@ public: else { inps.getUMatVector(inputs); - outs.getUMatVector(outputs); } std::vector allDecodedBBoxes; @@ -362,19 +359,17 @@ public: if (numKept == 0) { - // Set confidences to zeros. - Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)}; - if (use_half) - { - std::vector orig_outputs; - outs.getUMatVector(orig_outputs); - orig_outputs[0](ranges).setTo(0); - } else - outputs[0](ranges).setTo(0); + outputs[0].setTo(0); return true; } - int outputShape[] = {1, 1, (int)numKept, 7}; - UMat umat = UMat(4, outputShape, CV_32F); + + UMat umat = use_half ? UMat::zeros(4, outputs[0].size, CV_32F) : outputs[0]; + + if (!use_half) + umat.setTo(0); + + // If there are valid detections + if (numKept > 0) { Mat mat = umat.getMat(ACCESS_WRITE); float* outputsData = mat.ptr(); @@ -393,16 +388,7 @@ public: { UMat half_umat; convertFp16(umat, half_umat); - - std::vector orig_outputs; - outs.getUMatVector(orig_outputs); - orig_outputs.clear(); - orig_outputs.push_back(half_umat); - outs.assign(orig_outputs); - } else { - outputs.clear(); - outputs.push_back(umat); - outs.assign(outputs); + outs.assign(std::vector(1, half_umat)); } return true; @@ -484,15 +470,12 @@ public: numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices); } + outputs[0].setTo(0); + + // If there is no detections if (numKept == 0) - { - // Set confidences to zeros. - Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)}; - outputs[0](ranges).setTo(0); return; - } - int outputShape[] = {1, 1, (int)numKept, 7}; - outputs[0].create(4, outputShape, CV_32F); + float* outputsData = outputs[0].ptr(); size_t count = 0; @@ -703,8 +686,6 @@ public: prior_width += 1.0f; prior_height += 1.0f; } - CV_Assert(prior_width > 0); - CV_Assert(prior_height > 0); float prior_center_x = prior_bbox.xmin + prior_width * .5; float prior_center_y = prior_bbox.ymin + prior_height * .5; diff --git a/modules/dnn/src/layers/proposal_layer.cpp b/modules/dnn/src/layers/proposal_layer.cpp index 6514ed3a5c..836fc1831b 100644 --- a/modules/dnn/src/layers/proposal_layer.cpp +++ b/modules/dnn/src/layers/proposal_layer.cpp @@ -131,6 +131,9 @@ public: CV_Assert(layerInternals.empty()); internals.push_back(layerOutputs[0]); + // Detections layer. + internals.push_back(shape(1, 1, keepTopAfterNMS, 7)); + outputs.resize(2); outputs[0] = shape(keepTopAfterNMS, 5); outputs[1] = shape(keepTopAfterNMS, 1); @@ -176,13 +179,14 @@ public: internals_.getUMatVector(internals); CV_Assert(inputs.size() == 3); - CV_Assert(internals.size() == 3); + CV_Assert(internals.size() == 4); const UMat& scores = inputs[0]; const UMat& bboxDeltas = inputs[1]; const UMat& imInfo = inputs[2]; UMat& priorBoxes = internals[0]; UMat& permuttedScores = internals[1]; UMat& permuttedDeltas = internals[2]; + UMat& detections = internals[3]; CV_Assert(imInfo.total() >= 2); // We've chosen the smallest data type because we need just a shape from it. @@ -217,7 +221,7 @@ public: layerInputs[2] = priorBoxes; layerInputs[3] = umat_fakeImageBlob; - layerOutputs[0] = UMat(); + layerOutputs[0] = detections; detectionOutputLayer->forward(layerInputs, layerOutputs, internals); // DetectionOutputLayer produces 1x1xNx7 output where N might be less or @@ -237,10 +241,6 @@ public: dst = outputs[1].rowRange(0, numDets); layerOutputs[0].col(2).copyTo(dst); - if (numDets < keepTopAfterNMS) - for (int i = 0; i < 2; ++i) - outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0); - return true; } #endif @@ -266,13 +266,14 @@ public: internals_arr.getMatVector(internals); CV_Assert(inputs.size() == 3); - CV_Assert(internals.size() == 3); + CV_Assert(internals.size() == 4); const Mat& scores = inputs[0]; const Mat& bboxDeltas = inputs[1]; const Mat& imInfo = inputs[2]; Mat& priorBoxes = internals[0]; Mat& permuttedScores = internals[1]; Mat& permuttedDeltas = internals[2]; + Mat& detections = internals[3]; CV_Assert(imInfo.total() >= 2); // We've chosen the smallest data type because we need just a shape from it. @@ -302,7 +303,7 @@ public: layerInputs[2] = priorBoxes; layerInputs[3] = fakeImageBlob; - layerOutputs[0] = Mat(); + layerOutputs[0] = detections; detectionOutputLayer->forward(layerInputs, layerOutputs, internals); // DetectionOutputLayer produces 1x1xNx7 output where N might be less or @@ -319,10 +320,6 @@ public: // The scores. dst = outputs[1].rowRange(0, numDets); layerOutputs[0].col(2).copyTo(dst); - - if (numDets < keepTopAfterNMS) - for (int i = 0; i < 2; ++i) - outputs[i].rowRange(numDets, keepTopAfterNMS).setTo(0); } virtual Ptr initInfEngine(const std::vector >&) CV_OVERRIDE diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 58a9d8578d..a9821eb9c6 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -172,7 +172,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe) Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); float diffScores = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1.5e-2 : 0.0; float diffSquares = (target == DNN_TARGET_MYRIAD) ? 0.063 : 0.0; - float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.252 : 0.0; + float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.252 : FLT_MIN; processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", inp, "detection_out", "", diffScores, diffSquares, detectionConfThresh); expectNoFallbacksFromIE(net); diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index caaadfd897..3b1cc02a85 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -204,7 +204,7 @@ TEST(Reproducibility_SSD, Accuracy) Mat out = net.forward("detection_out"); Mat ref = blobFromNPY(_tf("ssd_out.npy")); - normAssertDetections(ref, out); + normAssertDetections(ref, out, "", FLT_MIN); } typedef testing::TestWithParam > Reproducibility_MobileNet_SSD; @@ -225,6 +225,8 @@ TEST_P(Reproducibility_MobileNet_SSD, Accuracy) net.setInput(inp); Mat out = net.forward().clone(); + ASSERT_EQ(out.size[2], 100); + const float scores_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 1.5e-2 : 1e-5; const float boxes_iou_diff = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD) ? 6.3e-2 : 1e-4; Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy")); diff --git a/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java b/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java index cdb03a7108..c22eaa90d5 100644 --- a/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java +++ b/modules/java/generator/android-21/java/org/opencv/android/JavaCamera2View.java @@ -341,11 +341,22 @@ public class JavaCamera2View extends CameraBridgeViewBase { if (chromaPixelStride == 2) { // Chroma channels are interleaved + assert(planes[0].getPixelStride() == 1); + assert(planes[2].getPixelStride() == 2); ByteBuffer y_plane = planes[0].getBuffer(); - ByteBuffer uv_plane = planes[1].getBuffer(); + ByteBuffer uv_plane1 = planes[1].getBuffer(); + ByteBuffer uv_plane2 = planes[2].getBuffer(); Mat y_mat = new Mat(h, w, CvType.CV_8UC1, y_plane); - Mat uv_mat = new Mat(h / 2, w / 2, CvType.CV_8UC2, uv_plane); - Imgproc.cvtColorTwoPlane(y_mat, uv_mat, mRgba, Imgproc.COLOR_YUV2RGBA_NV21); + Mat uv_mat1 = new Mat(h / 2, w / 2, CvType.CV_8UC2, uv_plane1); + Mat uv_mat2 = new Mat(h / 2, w / 2, CvType.CV_8UC2, uv_plane2); + long addr_diff = uv_mat2.dataAddr() - uv_mat1.dataAddr(); + if (addr_diff > 0) { + assert(addr_diff == 1); + Imgproc.cvtColorTwoPlane(y_mat, uv_mat1, mRgba, Imgproc.COLOR_YUV2RGBA_NV12); + } else { + assert(addr_diff == -1); + Imgproc.cvtColorTwoPlane(y_mat, uv_mat2, mRgba, Imgproc.COLOR_YUV2RGBA_NV21); + } return mRgba; } else { // Chroma channels are not interleaved byte[] yuv_bytes = new byte[w*(h+h/2)]; diff --git a/samples/CMakeLists.example.in b/samples/CMakeLists.example.in index 1769d4d9cf..cb741b76e1 100644 --- a/samples/CMakeLists.example.in +++ b/samples/CMakeLists.example.in @@ -39,4 +39,4 @@ message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}") add_executable(${EXAMPLE_NAME} "${EXAMPLE_FILE}") # Link your application with OpenCV libraries -target_link_libraries(${EXAMPLE_NAME} ${OpenCV_LIBS}) +target_link_libraries(${EXAMPLE_NAME} LINK_PRIVATE ${OpenCV_LIBS}) diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 15aff36c39..3b6c1a2443 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -1,32 +1,3 @@ -# Utility function: adds sample executable target with name "example__" -# Usage: -# ocv_define_sample( ) -function(ocv_define_sample out_target source sub) - get_filename_component(name "${source}" NAME_WE) - set(the_target "example_${sub}_${name}") - add_executable(${the_target} "${source}") - set_target_properties(${the_target} PROPERTIES PROJECT_LABEL "(sample) ${name}") - if(ENABLE_SOLUTION_FOLDERS) - set_target_properties(${the_target} PROPERTIES FOLDER "samples/${sub}") - endif() - if(WIN32 AND MSVC AND NOT BUILD_SHARED_LIBS) - set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG") - endif() - if(WIN32) - install(TARGETS ${the_target} RUNTIME DESTINATION "samples/${sub}" COMPONENT samples) - endif() - # Add single target to build all samples in the group: 'make opencv_samples_cpp' - set(parent_target opencv_samples_${sub}) - if(NOT TARGET ${parent_target}) - add_custom_target(${parent_target}) - if(TARGET opencv_samples) - add_dependencies(opencv_samples ${parent_target}) - endif() - endif() - add_dependencies(${parent_target} ${the_target}) - set(${out_target} ${the_target} PARENT_SCOPE) -endfunction() - if(NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_LIST_DIR) #=================================================================================================== # @@ -34,6 +5,8 @@ if(NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_LIST_DIR) # #=================================================================================================== +include("${CMAKE_CURRENT_LIST_DIR}/samples_utils.cmake") + function(ocv_install_example_src relpath) if(INSTALL_C_EXAMPLES) file(GLOB files ${ARGN}) @@ -43,6 +16,10 @@ function(ocv_install_example_src relpath) endif() endfunction() +if((TARGET Threads::Threads OR HAVE_PTHREAD OR MSVC OR APPLE) AND NOT OPENCV_EXAMPLES_DISABLE_THREADS) + add_definitions(-DHAVE_THREADS=1) +endif() + add_subdirectory(cpp) add_subdirectory(java/tutorial_code) add_subdirectory(dnn) @@ -98,6 +75,8 @@ option(BUILD_EXAMPLES "Build samples" ON) # │   ├── cpp/ find_package(OpenCV REQUIRED PATHS "..") +include("${CMAKE_CURRENT_LIST_DIR}/samples_utils.cmake") + function(ocv_install_example_src) # not used in this branch endfunction() @@ -129,6 +108,17 @@ endif() add_definitions(-DDISABLE_OPENCV_24_COMPATIBILITY=1) # Avoid C-like legacy API +if(OPENCV_EXAMPLES_DISABLE_THREADS) + # nothing +elseif(MSVC OR APPLE) + set(HAVE_THREADS 1) +else() + find_package(Threads) +endif() +if((TARGET Threads::Threads OR HAVE_THREADS) AND NOT OPENCV_EXAMPLES_DISABLE_THREADS) + add_definitions(-DHAVE_THREADS=1) +endif() + add_subdirectory(cpp) if(WIN32) add_subdirectory(directx) diff --git a/samples/cpp/CMakeLists.txt b/samples/cpp/CMakeLists.txt index 71fe897527..45f886b95e 100644 --- a/samples/cpp/CMakeLists.txt +++ b/samples/cpp/CMakeLists.txt @@ -35,12 +35,12 @@ foreach(sample_filename ${cpp_samples}) set(package "tutorial") endif() ocv_define_sample(tgt ${sample_filename} ${package}) - ocv_target_link_libraries(${tgt} ${OPENCV_LINKER_LIBS} ${OPENCV_CPP_SAMPLES_REQUIRED_DEPS}) + ocv_target_link_libraries(${tgt} LINK_PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_CPP_SAMPLES_REQUIRED_DEPS}) if(sample_filename MATCHES "/gpu/" AND HAVE_opencv_cudaarithm AND HAVE_opencv_cuda_filters) - ocv_target_link_libraries(${tgt} opencv_cudaarithm opencv_cudafilters) + ocv_target_link_libraries(${tgt} LINK_PRIVATE opencv_cudaarithm opencv_cudafilters) endif() if(sample_filename MATCHES "/viz/") - ocv_target_link_libraries(${tgt} ${VTK_LIBRARIES}) + ocv_target_link_libraries(${tgt} LINK_PRIVATE ${VTK_LIBRARIES}) target_compile_definitions(${tgt} PRIVATE -DUSE_VTK) endif() if(HAVE_OPENGL AND sample_filename MATCHES "detect_mser") diff --git a/samples/cpp/example_cmake/CMakeLists.txt b/samples/cpp/example_cmake/CMakeLists.txt index 3a31cb87de..d9fe9ccc6b 100644 --- a/samples/cpp/example_cmake/CMakeLists.txt +++ b/samples/cpp/example_cmake/CMakeLists.txt @@ -27,4 +27,4 @@ message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}") add_executable(opencv_example example.cpp) # Link your application with OpenCV libraries -target_link_libraries(opencv_example ${OpenCV_LIBS}) +target_link_libraries(opencv_example LINK_PRIVATE ${OpenCV_LIBS}) diff --git a/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/CMakeLists.txt b/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/CMakeLists.txt index b0ca6c0ff0..2f63f048a0 100644 --- a/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/CMakeLists.txt +++ b/samples/cpp/tutorial_code/calib3d/real_time_pose_estimation/CMakeLists.txt @@ -17,5 +17,5 @@ ocv_include_modules_recurse(${OPENCV_CPP_SAMPLES_REQUIRED_DEPS}) add_executable( ${target}pnp_registration ${sample_dir}main_registration.cpp ${sample_pnplib} ) add_executable( ${target}pnp_detection ${sample_dir}main_detection.cpp ${sample_pnplib} ) -ocv_target_link_libraries( ${target}pnp_registration ${OPENCV_LINKER_LIBS} ${OPENCV_CPP_SAMPLES_REQUIRED_DEPS} ) -ocv_target_link_libraries( ${target}pnp_detection ${OPENCV_LINKER_LIBS} ${OPENCV_CPP_SAMPLES_REQUIRED_DEPS} ) +ocv_target_link_libraries(${target}pnp_registration LINK_PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_CPP_SAMPLES_REQUIRED_DEPS}) +ocv_target_link_libraries(${target}pnp_detection LINK_PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_CPP_SAMPLES_REQUIRED_DEPS}) diff --git a/samples/cpp/tutorial_code/video/meanshift/camshift.cpp b/samples/cpp/tutorial_code/video/meanshift/camshift.cpp new file mode 100644 index 0000000000..13965e623f --- /dev/null +++ b/samples/cpp/tutorial_code/video/meanshift/camshift.cpp @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include +#include + +using namespace cv; +using namespace std; + +int main(int argc, char **argv) +{ + const string about = + "This sample demonstrates the camshift algorithm.\n" + "The example file can be downloaded from:\n" + " https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4"; + const string keys = + "{ h help | | print this help message }" + "{ @image || path to image file }"; + CommandLineParser parser(argc, argv, keys); + parser.about(about); + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + string filename = parser.get("@image"); + if (!parser.check()) + { + parser.printErrors(); + return 0; + } + + VideoCapture capture(filename); + if (!capture.isOpened()){ + //error in opening the video input + cerr << "Unable to open file!" << endl; + return 0; + } + + Mat frame, roi, hsv_roi, mask; + // take first frame of the video + capture >> frame; + + // setup initial location of window + Rect track_window(300, 200, 100, 50); // simply hardcoded the values + + // set up the ROI for tracking + roi = frame(track_window); + cvtColor(roi, hsv_roi, COLOR_BGR2HSV); + inRange(hsv_roi, Scalar(0, 60, 32), Scalar(180, 255, 255), mask); + + float range_[] = {0, 180}; + const float* range[] = {range_}; + Mat roi_hist; + int histSize[] = {180}; + int channels[] = {0}; + calcHist(&hsv_roi, 1, channels, mask, roi_hist, 1, histSize, range); + normalize(roi_hist, roi_hist, 0, 255, NORM_MINMAX); + + // Setup the termination criteria, either 10 iteration or move by atleast 1 pt + TermCriteria term_crit(TermCriteria::EPS | TermCriteria::COUNT, 10, 1); + + while(true){ + Mat hsv, dst; + capture >> frame; + if (frame.empty()) + break; + cvtColor(frame, hsv, COLOR_BGR2HSV); + calcBackProject(&hsv, 1, channels, roi_hist, dst, range); + + // apply camshift to get the new location + RotatedRect rot_rect = CamShift(dst, track_window, term_crit); + + // Draw it on image + Point2f points[4]; + rot_rect.points(points); + for (int i = 0; i < 4; i++) + line(frame, points[i], points[(i+1)%4], 255, 2); + imshow("img2", frame); + + int keyboard = waitKey(30); + if (keyboard == 'q' || keyboard == 27) + break; + } +} diff --git a/samples/cpp/tutorial_code/video/meanshift/meanshift.cpp b/samples/cpp/tutorial_code/video/meanshift/meanshift.cpp new file mode 100644 index 0000000000..0e16442c6d --- /dev/null +++ b/samples/cpp/tutorial_code/video/meanshift/meanshift.cpp @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include + +using namespace cv; +using namespace std; + +int main(int argc, char **argv) +{ + const string about = + "This sample demonstrates the meanshift algorithm.\n" + "The example file can be downloaded from:\n" + " https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4"; + const string keys = + "{ h help | | print this help message }" + "{ @image || path to image file }"; + CommandLineParser parser(argc, argv, keys); + parser.about(about); + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + string filename = parser.get("@image"); + if (!parser.check()) + { + parser.printErrors(); + return 0; + } + + VideoCapture capture(filename); + if (!capture.isOpened()){ + //error in opening the video input + cerr << "Unable to open file!" << endl; + return 0; + } + + Mat frame, roi, hsv_roi, mask; + // take first frame of the video + capture >> frame; + + // setup initial location of window + Rect track_window(300, 200, 100, 50); // simply hardcoded the values + + // set up the ROI for tracking + roi = frame(track_window); + cvtColor(roi, hsv_roi, COLOR_BGR2HSV); + inRange(hsv_roi, Scalar(0, 60, 32), Scalar(180, 255, 255), mask); + + float range_[] = {0, 180}; + const float* range[] = {range_}; + Mat roi_hist; + int histSize[] = {180}; + int channels[] = {0}; + calcHist(&hsv_roi, 1, channels, mask, roi_hist, 1, histSize, range); + normalize(roi_hist, roi_hist, 0, 255, NORM_MINMAX); + + // Setup the termination criteria, either 10 iteration or move by atleast 1 pt + TermCriteria term_crit(TermCriteria::EPS | TermCriteria::COUNT, 10, 1); + + while(true){ + Mat hsv, dst; + capture >> frame; + if (frame.empty()) + break; + cvtColor(frame, hsv, COLOR_BGR2HSV); + calcBackProject(&hsv, 1, channels, roi_hist, dst, range); + + // apply meanshift to get the new location + meanShift(dst, track_window, term_crit); + + // Draw it on image + rectangle(frame, track_window, 255, 2); + imshow("img2", frame); + + int keyboard = waitKey(30); + if (keyboard == 'q' || keyboard == 27) + break; + } +} diff --git a/samples/cpp/tutorial_code/video/optical_flow/optical_flow.cpp b/samples/cpp/tutorial_code/video/optical_flow/optical_flow.cpp new file mode 100644 index 0000000000..9b03d331a9 --- /dev/null +++ b/samples/cpp/tutorial_code/video/optical_flow/optical_flow.cpp @@ -0,0 +1,101 @@ +#include +#include +#include +#include +#include +#include + +using namespace cv; +using namespace std; + +int main(int argc, char **argv) +{ + const string about = + "This sample demonstrates Lucas-Kanade Optical Flow calculation.\n" + "The example file can be downloaded from:\n" + " https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4"; + const string keys = + "{ h help | | print this help message }" + "{ @image || path to image file }"; + CommandLineParser parser(argc, argv, keys); + parser.about(about); + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + string filename = parser.get("@image"); + if (!parser.check()) + { + parser.printErrors(); + return 0; + } + + VideoCapture capture(filename); + if (!capture.isOpened()){ + //error in opening the video input + cerr << "Unable to open file!" << endl; + return 0; + } + + // Create some random colors + vector colors; + RNG rng; + for(int i = 0; i < 100; i++) + { + int r = rng.uniform(0, 256); + int g = rng.uniform(0, 256); + int b = rng.uniform(0, 256); + colors.push_back(Scalar(r,g,b)); + } + + Mat old_frame, old_gray; + vector p0, p1; + + // Take first frame and find corners in it + capture >> old_frame; + cvtColor(old_frame, old_gray, COLOR_BGR2GRAY); + goodFeaturesToTrack(old_gray, p0, 100, 0.3, 7, Mat(), 7, false, 0.04); + + // Create a mask image for drawing purposes + Mat mask = Mat::zeros(old_frame.size(), old_frame.type()); + + while(true){ + Mat frame, frame_gray; + + capture >> frame; + if (frame.empty()) + break; + cvtColor(frame, frame_gray, COLOR_BGR2GRAY); + + // calculate optical flow + vector status; + vector err; + TermCriteria criteria = TermCriteria((TermCriteria::COUNT) + (TermCriteria::EPS), 10, 0.03); + calcOpticalFlowPyrLK(old_gray, frame_gray, p0, p1, status, err, Size(15,15), 2, criteria); + + vector good_new; + for(uint i = 0; i < p0.size(); i++) + { + // Select good points + if(status[i] == 1) { + good_new.push_back(p1[i]); + // draw the tracks + line(mask,p1[i], p0[i], colors[i], 2); + circle(frame, p1[i], 5, colors[i], -1); + } + } + Mat img; + add(frame, mask, img); + + imshow("Frame", img); + + int keyboard = waitKey(30); + if (keyboard == 'q' || keyboard == 27) + break; + + // Now update the previous frame and previous points + old_gray = frame_gray.clone(); + p0 = good_new; + } +} diff --git a/samples/cpp/tutorial_code/video/optical_flow/optical_flow_dense.cpp b/samples/cpp/tutorial_code/video/optical_flow/optical_flow_dense.cpp new file mode 100644 index 0000000000..b4c12c359d --- /dev/null +++ b/samples/cpp/tutorial_code/video/optical_flow/optical_flow_dense.cpp @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include +#include + +using namespace cv; +using namespace std; + +int main() +{ + VideoCapture capture(samples::findFile("vtest.avi")); + if (!capture.isOpened()){ + //error in opening the video input + cerr << "Unable to open file!" << endl; + return 0; + } + + Mat frame1, prvs; + capture >> frame1; + cvtColor(frame1, prvs, COLOR_BGR2GRAY); + + while(true){ + Mat frame2, next; + capture >> frame2; + if (frame2.empty()) + break; + cvtColor(frame2, next, COLOR_BGR2GRAY); + + Mat flow(prvs.size(), CV_32FC2); + calcOpticalFlowFarneback(prvs, next, flow, 0.5, 3, 15, 3, 5, 1.2, 0); + + // visualization + Mat flow_parts[2]; + split(flow, flow_parts); + Mat magnitude, angle, magn_norm; + cartToPolar(flow_parts[0], flow_parts[1], magnitude, angle, true); + normalize(magnitude, magn_norm, 0.0f, 1.0f, NORM_MINMAX); + angle *= ((1.f / 360.f) * (180.f / 255.f)); + + //build hsv image + Mat _hsv[3], hsv, hsv8, bgr; + _hsv[0] = angle; + _hsv[1] = Mat::ones(angle.size(), CV_32F); + _hsv[2] = magn_norm; + merge(_hsv, 3, hsv); + hsv.convertTo(hsv8, CV_8U, 255.0); + cvtColor(hsv8, bgr, COLOR_HSV2BGR); + + imshow("frame2", bgr); + + int keyboard = waitKey(30); + if (keyboard == 'q' || keyboard == 27) + break; + + prvs = next; + } +} diff --git a/samples/directx/CMakeLists.txt b/samples/directx/CMakeLists.txt index 391ea91af0..961ed68a67 100644 --- a/samples/directx/CMakeLists.txt +++ b/samples/directx/CMakeLists.txt @@ -17,5 +17,5 @@ ocv_include_modules_recurse(${tgt} ${OPENCV_DIRECTX_SAMPLES_REQUIRED_DEPS}) file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) foreach(sample_filename ${all_samples}) ocv_define_sample(tgt ${sample_filename} directx) - ocv_target_link_libraries(${tgt} ${OPENCV_LINKER_LIBS} ${OPENCV_DIRECTX_SAMPLES_REQUIRED_DEPS}) + ocv_target_link_libraries(${tgt} LINK_PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_DIRECTX_SAMPLES_REQUIRED_DEPS}) endforeach() diff --git a/samples/dnn/CMakeLists.txt b/samples/dnn/CMakeLists.txt index 4af6d40928..d1ec67f4ff 100644 --- a/samples/dnn/CMakeLists.txt +++ b/samples/dnn/CMakeLists.txt @@ -18,5 +18,5 @@ ocv_include_modules_recurse(${OPENCV_DNN_SAMPLES_REQUIRED_DEPS}) file(GLOB_RECURSE dnn_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) foreach(sample_filename ${dnn_samples}) ocv_define_sample(tgt ${sample_filename} dnn) - ocv_target_link_libraries(${tgt} ${OPENCV_LINKER_LIBS} ${OPENCV_DNN_SAMPLES_REQUIRED_DEPS}) + ocv_target_link_libraries(${tgt} LINK_PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_DNN_SAMPLES_REQUIRED_DEPS}) endforeach() diff --git a/samples/dnn/object_detection.cpp b/samples/dnn/object_detection.cpp index c30e2179af..6f6a1ca9e5 100644 --- a/samples/dnn/object_detection.cpp +++ b/samples/dnn/object_detection.cpp @@ -5,6 +5,11 @@ #include #include +#ifdef CV_CXX11 +#include +#include +#endif + #include "common.hpp" std::string keys = @@ -26,8 +31,9 @@ std::string keys = "0: CPU target (by default), " "1: OpenCL, " "2: OpenCL fp16 (half-float precision), " - "3: VPU }"; - + "3: VPU }" + "{ async | 0 | Number of asynchronous forwards at the same time. " + "Choose 0 for synchronous mode }"; using namespace cv; using namespace dnn; @@ -35,13 +41,66 @@ using namespace dnn; float confThreshold, nmsThreshold; std::vector classes; +inline void preprocess(const Mat& frame, Net& net, Size inpSize, float scale, + const Scalar& mean, bool swapRB); + void postprocess(Mat& frame, const std::vector& out, Net& net); void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame); void callback(int pos, void* userdata); -std::vector getOutputsNames(const Net& net); +#ifdef CV_CXX11 +template +class QueueFPS : public std::queue +{ +public: + QueueFPS() : counter(0) {} + + void push(const T& entry) + { + std::lock_guard lock(mutex); + + std::queue::push(entry); + counter += 1; + if (counter == 1) + { + // Start counting from a second frame (warmup). + tm.reset(); + tm.start(); + } + } + + T get() + { + std::lock_guard lock(mutex); + T entry = this->front(); + this->pop(); + return entry; + } + + float getFPS() + { + tm.stop(); + double fps = counter / tm.getTimeSec(); + tm.start(); + return static_cast(fps); + } + + void clear() + { + std::lock_guard lock(mutex); + while (!this->empty()) + this->pop(); + } + + unsigned int counter; + +private: + TickMeter tm; + std::mutex mutex; +}; +#endif // CV_CXX11 int main(int argc, char** argv) { @@ -67,6 +126,7 @@ int main(int argc, char** argv) bool swapRB = parser.get("rgb"); int inpWidth = parser.get("width"); int inpHeight = parser.get("height"); + size_t async = parser.get("async"); CV_Assert(parser.has("model")); std::string modelPath = findFile(parser.get("model")); std::string configPath = findFile(parser.get("config")); @@ -104,6 +164,108 @@ int main(int argc, char** argv) else cap.open(parser.get("device")); +#ifdef CV_CXX11 + bool process = true; + + // Frames capturing thread + QueueFPS framesQueue; + std::thread framesThread([&](){ + Mat frame; + while (process) + { + cap >> frame; + if (!frame.empty()) + framesQueue.push(frame.clone()); + else + break; + } + }); + + // Frames processing thread + QueueFPS processedFramesQueue; + QueueFPS > predictionsQueue; + std::thread processingThread([&](){ + std::queue > futureOutputs; + Mat blob; + while (process) + { + // Get a next frame + Mat frame; + { + if (!framesQueue.empty()) + { + frame = framesQueue.get(); + if (async) + { + if (futureOutputs.size() == async) + frame = Mat(); + } + else + framesQueue.clear(); // Skip the rest of frames + } + } + + // Process the frame + if (!frame.empty()) + { + preprocess(frame, net, Size(inpWidth, inpHeight), scale, mean, swapRB); + processedFramesQueue.push(frame); + + if (async) + { + futureOutputs.push(net.forwardAsync()); + } + else + { + std::vector outs; + net.forward(outs, outNames); + predictionsQueue.push(outs); + } + } + + while (!futureOutputs.empty() && + futureOutputs.front().wait_for(std::chrono::seconds(0)) == std::future_status::ready) + { + Mat out = futureOutputs.front().get(); + predictionsQueue.push({out}); + futureOutputs.pop(); + } + } + }); + + // Postprocessing and rendering loop + while (waitKey(1) < 0) + { + if (predictionsQueue.empty()) + continue; + + std::vector outs = predictionsQueue.get(); + Mat frame = processedFramesQueue.get(); + + postprocess(frame, outs, net); + + if (predictionsQueue.counter > 1) + { + std::string label = format("Camera: %.2f FPS", framesQueue.getFPS()); + putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); + + label = format("Network: %.2f FPS", predictionsQueue.getFPS()); + putText(frame, label, Point(0, 30), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); + + label = format("Skipped frames: %d", framesQueue.counter - predictionsQueue.counter); + putText(frame, label, Point(0, 45), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0)); + } + imshow(kWinName, frame); + } + + process = false; + framesThread.join(); + processingThread.join(); + +#else // CV_CXX11 + if (async) + CV_Error(Error::StsNotImplemented, "Asynchronous forward is supported only with Inference Engine backend."); + // Process frames. Mat frame, blob; while (waitKey(1) < 0) @@ -115,19 +277,8 @@ int main(int argc, char** argv) break; } - // Create a 4D blob from a frame. - Size inpSize(inpWidth > 0 ? inpWidth : frame.cols, - inpHeight > 0 ? inpHeight : frame.rows); - blobFromImage(frame, blob, scale, inpSize, mean, swapRB, false); + preprocess(frame, net, Size(inpWidth, inpHeight), scale, mean, swapRB); - // Run a model. - net.setInput(blob); - if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN - { - resize(frame, frame, inpSize); - Mat imInfo = (Mat_(1, 3) << inpSize.height, inpSize.width, 1.6f); - net.setInput(imInfo, "im_info"); - } std::vector outs; net.forward(outs, outNames); @@ -142,9 +293,29 @@ int main(int argc, char** argv) imshow(kWinName, frame); } +#endif // CV_CXX11 return 0; } +inline void preprocess(const Mat& frame, Net& net, Size inpSize, float scale, + const Scalar& mean, bool swapRB) +{ + static Mat blob; + // Create a 4D blob from a frame. + if (inpSize.width <= 0) inpSize.width = frame.cols; + if (inpSize.height <= 0) inpSize.height = frame.rows; + blobFromImage(frame, blob, 1.0, inpSize, Scalar(), swapRB, false, CV_8U); + + // Run a model. + net.setInput(blob, "", scale, mean); + if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN + { + resize(frame, frame, inpSize); + Mat imInfo = (Mat_(1, 3) << inpSize.height, inpSize.width, 1.6f); + net.setInput(imInfo, "im_info"); + } +} + void postprocess(Mat& frame, const std::vector& outs, Net& net) { static std::vector outLayers = net.getUnconnectedOutLayers(); diff --git a/samples/dnn/object_detection.py b/samples/dnn/object_detection.py index 3f7b0e23d7..f32f76bc53 100644 --- a/samples/dnn/object_detection.py +++ b/samples/dnn/object_detection.py @@ -1,6 +1,13 @@ import cv2 as cv import argparse import numpy as np +import sys +import time +from threading import Thread +if sys.version_info[0] == '2': + import Queue as queue +else: + import queue from common import * from tf_text_graph_common import readTextMessage @@ -35,6 +42,9 @@ parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, '%d: OpenCL, ' '%d: OpenCL fp16 (half-float precision), ' '%d: VPU' % targets) +parser.add_argument('--async', type=int, default=0, + help='Number of asynchronous forwards at the same time. ' + 'Choose 0 for synchronous mode') args, _ = parser.parse_known_args() add_preproc_args(args.zoo, parser, 'object_detection') parser = argparse.ArgumentParser(parents=[parser], @@ -173,32 +183,125 @@ def callback(pos): cv.createTrackbar('Confidence threshold, %', winName, int(confThreshold * 100), 99, callback) cap = cv.VideoCapture(cv.samples.findFileOrKeep(args.input) if args.input else 0) + +class QueueFPS(queue.Queue): + def __init__(self): + queue.Queue.__init__(self) + self.startTime = 0 + self.counter = 0 + + def put(self, v): + queue.Queue.put(self, v) + self.counter += 1 + if self.counter == 1: + self.startTime = time.time() + + def getFPS(self): + return self.counter / (time.time() - self.startTime) + + +process = True + +# +# Frames capturing thread +# +framesQueue = QueueFPS() +def framesThreadBody(): + global framesQueue, process + + while process: + hasFrame, frame = cap.read() + if not hasFrame: + break + framesQueue.put(frame) + + +# +# Frames processing thread +# +processedFramesQueue = queue.Queue() +predictionsQueue = QueueFPS() +def processingThreadBody(): + global processedFramesQueue, predictionsQueue, args, process + + futureOutputs = [] + while process: + # Get a next frame + frame = None + try: + frame = framesQueue.get_nowait() + + if args.async: + if len(futureOutputs) == args.async: + frame = None # Skip the frame + else: + framesQueue.queue.clear() # Skip the rest of frames + except queue.Empty: + pass + + + if not frame is None: + frameHeight = frame.shape[0] + frameWidth = frame.shape[1] + + # Create a 4D blob from a frame. + inpWidth = args.width if args.width else frameWidth + inpHeight = args.height if args.height else frameHeight + blob = cv.dnn.blobFromImage(frame, size=(inpWidth, inpHeight), swapRB=args.rgb, ddepth=cv.CV_8U) + processedFramesQueue.put(frame) + + # Run a model + net.setInput(blob, scalefactor=args.scale, mean=args.mean) + if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN + frame = cv.resize(frame, (inpWidth, inpHeight)) + net.setInput(np.array([[inpHeight, inpWidth, 1.6]], dtype=np.float32), 'im_info') + + if args.async: + futureOutputs.append(net.forwardAsync()) + else: + outs = net.forward(outNames) + predictionsQueue.put(np.copy(outs)) + + while futureOutputs and futureOutputs[0].wait_for(0) == 0: + out = futureOutputs[0].get() + predictionsQueue.put(np.copy([out])) + + del futureOutputs[0] + + +framesThread = Thread(target=framesThreadBody) +framesThread.start() + +processingThread = Thread(target=processingThreadBody) +processingThread.start() + +# +# Postprocessing and rendering loop +# while cv.waitKey(1) < 0: - hasFrame, frame = cap.read() - if not hasFrame: - cv.waitKey() - break + try: + # Request prediction first because they put after frames + outs = predictionsQueue.get_nowait() + frame = processedFramesQueue.get_nowait() - frameHeight = frame.shape[0] - frameWidth = frame.shape[1] + postprocess(frame, outs) - # Create a 4D blob from a frame. - inpWidth = args.width if args.width else frameWidth - inpHeight = args.height if args.height else frameHeight - blob = cv.dnn.blobFromImage(frame, args.scale, (inpWidth, inpHeight), args.mean, args.rgb, crop=False) + # Put efficiency information. + if predictionsQueue.counter > 1: + label = 'Camera: %.2f FPS' % (framesQueue.getFPS()) + cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) - # Run a model - net.setInput(blob) - if net.getLayer(0).outputNameToIndex('im_info') != -1: # Faster-RCNN or R-FCN - frame = cv.resize(frame, (inpWidth, inpHeight)) - net.setInput(np.array([[inpHeight, inpWidth, 1.6]], dtype=np.float32), 'im_info') - outs = net.forward(outNames) + label = 'Network: %.2f FPS' % (predictionsQueue.getFPS()) + cv.putText(frame, label, (0, 30), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) - postprocess(frame, outs) + label = 'Skipped frames: %d' % (framesQueue.counter - predictionsQueue.counter) + cv.putText(frame, label, (0, 45), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) - # Put efficiency information. - t, _ = net.getPerfProfile() - label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) - cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) + cv.imshow(winName, frame) + except queue.Empty: + pass - cv.imshow(winName, frame) + +process = False +framesThread.join() +processingThread.join() diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt index 96fe897af3..d6f7d43320 100644 --- a/samples/gpu/CMakeLists.txt +++ b/samples/gpu/CMakeLists.txt @@ -51,11 +51,11 @@ endif() file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) foreach(sample_filename ${all_samples}) ocv_define_sample(tgt ${sample_filename} gpu) - ocv_target_link_libraries(${tgt} ${OPENCV_LINKER_LIBS} ${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS}) + ocv_target_link_libraries(${tgt} LINK_PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_CUDA_SAMPLES_REQUIRED_DEPS}) if(HAVE_opencv_xfeatures2d) - ocv_target_link_libraries(${tgt} opencv_xfeatures2d) + ocv_target_link_libraries(${tgt} LINK_PRIVATE opencv_xfeatures2d) endif() if(HAVE_opencv_cudacodec) - ocv_target_link_libraries(${tgt} opencv_cudacodec) + ocv_target_link_libraries(${tgt} LINK_PRIVATE opencv_cudacodec) endif() endforeach() diff --git a/samples/opencl/CMakeLists.txt b/samples/opencl/CMakeLists.txt index a614bbe9ee..bb857ef448 100644 --- a/samples/opencl/CMakeLists.txt +++ b/samples/opencl/CMakeLists.txt @@ -31,7 +31,7 @@ ocv_include_directories(${OpenCL_INCLUDE_DIR}) file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) foreach(sample_filename ${all_samples}) ocv_define_sample(tgt ${sample_filename} opencl) - ocv_target_link_libraries(${tgt} + ocv_target_link_libraries(${tgt} LINK_PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_OPENCL_SAMPLES_REQUIRED_DEPS} ${OpenCL_LIBRARY}) diff --git a/samples/opengl/CMakeLists.txt b/samples/opengl/CMakeLists.txt index 31a075c0cf..adf48a8d16 100644 --- a/samples/opengl/CMakeLists.txt +++ b/samples/opengl/CMakeLists.txt @@ -23,9 +23,9 @@ if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND) endif() foreach(sample_filename ${all_samples}) ocv_define_sample(tgt ${sample_filename} opengl) - ocv_target_link_libraries(${tgt} ${OPENCV_LINKER_LIBS} ${OPENCV_OPENGL_SAMPLES_REQUIRED_DEPS}) + ocv_target_link_libraries(${tgt} LINK_PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_OPENGL_SAMPLES_REQUIRED_DEPS}) if(sample_filename STREQUAL "opengl_interop.cpp") - ocv_target_link_libraries(${tgt} ${X11_LIBRARIES}) + ocv_target_link_libraries(${tgt} LINK_PRIVATE ${X11_LIBRARIES}) ocv_target_include_directories(${tgt} ${X11_INCLUDE_DIR}) endif() endforeach() diff --git a/samples/openvx/CMakeLists.txt b/samples/openvx/CMakeLists.txt index fd9165bec9..ad65f050c9 100644 --- a/samples/openvx/CMakeLists.txt +++ b/samples/openvx/CMakeLists.txt @@ -21,5 +21,5 @@ add_definitions(-DIVX_HIDE_INFO_WARNINGS) file(GLOB_RECURSE cpp_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) foreach(sample_filename ${cpp_samples}) ocv_define_sample(tgt ${sample_filename} openvx) - ocv_target_link_libraries(${tgt} ${OPENCV_LINKER_LIBS} ${OPENCV_OPENVX_SAMPLE_REQUIRED_DEPS}) + ocv_target_link_libraries(${tgt} LINK_PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_OPENVX_SAMPLE_REQUIRED_DEPS}) endforeach() diff --git a/samples/python/tutorial_code/video/meanshift/camshift.py b/samples/python/tutorial_code/video/meanshift/camshift.py new file mode 100644 index 0000000000..d115bdb4d3 --- /dev/null +++ b/samples/python/tutorial_code/video/meanshift/camshift.py @@ -0,0 +1,50 @@ +import numpy as np +import cv2 as cv +import argparse + +parser = argparse.ArgumentParser(description='This sample demonstrates the camshift algorithm. \ + The example file can be downloaded from: \ + https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4') +parser.add_argument('image', type=str, help='path to image file') +args = parser.parse_args() + +cap = cv.VideoCapture(args.image) + +# take first frame of the video +ret,frame = cap.read() + +# setup initial location of window +x, y, w, h = 300, 200, 100, 50 # simply hardcoded the values +track_window = (x, y, w, h) + +# set up the ROI for tracking +roi = frame[y:y+h, x:x+w] +hsv_roi = cv.cvtColor(roi, cv.COLOR_BGR2HSV) +mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.))) +roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180]) +cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX) + +# Setup the termination criteria, either 10 iteration or move by atleast 1 pt +term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 ) + +while(1): + ret, frame = cap.read() + + if ret == True: + hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV) + dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1) + + # apply camshift to get the new location + ret, track_window = cv.CamShift(dst, track_window, term_crit) + + # Draw it on image + pts = cv.boxPoints(ret) + pts = np.int0(pts) + img2 = cv.polylines(frame,[pts],True, 255,2) + cv.imshow('img2',img2) + + k = cv.waitKey(30) & 0xff + if k == 27: + break + else: + break diff --git a/samples/python/tutorial_code/video/meanshift/meanshift.py b/samples/python/tutorial_code/video/meanshift/meanshift.py new file mode 100644 index 0000000000..f765b023e9 --- /dev/null +++ b/samples/python/tutorial_code/video/meanshift/meanshift.py @@ -0,0 +1,49 @@ +import numpy as np +import cv2 as cv +import argparse + +parser = argparse.ArgumentParser(description='This sample demonstrates the meanshift algorithm. \ + The example file can be downloaded from: \ + https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4') +parser.add_argument('image', type=str, help='path to image file') +args = parser.parse_args() + +cap = cv.VideoCapture(args.image) + +# take first frame of the video +ret,frame = cap.read() + +# setup initial location of window +x, y, w, h = 300, 200, 100, 50 # simply hardcoded the values +track_window = (x, y, w, h) + +# set up the ROI for tracking +roi = frame[y:y+h, x:x+w] +hsv_roi = cv.cvtColor(roi, cv.COLOR_BGR2HSV) +mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.))) +roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180]) +cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX) + +# Setup the termination criteria, either 10 iteration or move by atleast 1 pt +term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 ) + +while(1): + ret, frame = cap.read() + + if ret == True: + hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV) + dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1) + + # apply meanshift to get the new location + ret, track_window = cv.meanShift(dst, track_window, term_crit) + + # Draw it on image + x,y,w,h = track_window + img2 = cv.rectangle(frame, (x,y), (x+w,y+h), 255,2) + cv.imshow('img2',img2) + + k = cv.waitKey(30) & 0xff + if k == 27: + break + else: + break diff --git a/samples/python/tutorial_code/video/optical_flow/optical_flow.py b/samples/python/tutorial_code/video/optical_flow/optical_flow.py new file mode 100644 index 0000000000..c367407e45 --- /dev/null +++ b/samples/python/tutorial_code/video/optical_flow/optical_flow.py @@ -0,0 +1,61 @@ +import numpy as np +import cv2 as cv +import argparse + +parser = argparse.ArgumentParser(description='This sample demonstrates Lucas-Kanade Optical Flow calculation. \ + The example file can be downloaded from: \ + https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4') +parser.add_argument('image', type=str, help='path to image file') +args = parser.parse_args() + +cap = cv.VideoCapture(args.image) + +# params for ShiTomasi corner detection +feature_params = dict( maxCorners = 100, + qualityLevel = 0.3, + minDistance = 7, + blockSize = 7 ) + +# Parameters for lucas kanade optical flow +lk_params = dict( winSize = (15,15), + maxLevel = 2, + criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03)) + +# Create some random colors +color = np.random.randint(0,255,(100,3)) + +# Take first frame and find corners in it +ret, old_frame = cap.read() +old_gray = cv.cvtColor(old_frame, cv.COLOR_BGR2GRAY) +p0 = cv.goodFeaturesToTrack(old_gray, mask = None, **feature_params) + +# Create a mask image for drawing purposes +mask = np.zeros_like(old_frame) + +while(1): + ret,frame = cap.read() + frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) + + # calculate optical flow + p1, st, err = cv.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params) + + # Select good points + good_new = p1[st==1] + good_old = p0[st==1] + + # draw the tracks + for i,(new,old) in enumerate(zip(good_new, good_old)): + a,b = new.ravel() + c,d = old.ravel() + mask = cv.line(mask, (a,b),(c,d), color[i].tolist(), 2) + frame = cv.circle(frame,(a,b),5,color[i].tolist(),-1) + img = cv.add(frame,mask) + + cv.imshow('frame',img) + k = cv.waitKey(30) & 0xff + if k == 27: + break + + # Now update the previous frame and previous points + old_gray = frame_gray.copy() + p0 = good_new.reshape(-1,1,2) diff --git a/samples/python/tutorial_code/video/optical_flow/optical_flow_dense.py b/samples/python/tutorial_code/video/optical_flow/optical_flow_dense.py new file mode 100644 index 0000000000..b937b24ea7 --- /dev/null +++ b/samples/python/tutorial_code/video/optical_flow/optical_flow_dense.py @@ -0,0 +1,23 @@ +import numpy as np +import cv2 as cv +cap = cv.VideoCapture(cv.samples.findFile("vtest.avi")) +ret, frame1 = cap.read() +prvs = cv.cvtColor(frame1,cv.COLOR_BGR2GRAY) +hsv = np.zeros_like(frame1) +hsv[...,1] = 255 +while(1): + ret, frame2 = cap.read() + next = cv.cvtColor(frame2,cv.COLOR_BGR2GRAY) + flow = cv.calcOpticalFlowFarneback(prvs,next, None, 0.5, 3, 15, 3, 5, 1.2, 0) + mag, ang = cv.cartToPolar(flow[...,0], flow[...,1]) + hsv[...,0] = ang*180/np.pi/2 + hsv[...,2] = cv.normalize(mag,None,0,255,cv.NORM_MINMAX) + bgr = cv.cvtColor(hsv,cv.COLOR_HSV2BGR) + cv.imshow('frame2',bgr) + k = cv.waitKey(30) & 0xff + if k == 27: + break + elif k == ord('s'): + cv.imwrite('opticalfb.png',frame2) + cv.imwrite('opticalhsv.png',bgr) + prvs = next diff --git a/samples/samples_utils.cmake b/samples/samples_utils.cmake new file mode 100644 index 0000000000..b39996aea1 --- /dev/null +++ b/samples/samples_utils.cmake @@ -0,0 +1,31 @@ +# Utility function: adds sample executable target with name "example__" +# Usage: +# ocv_define_sample( ) +function(ocv_define_sample out_target source sub) + get_filename_component(name "${source}" NAME_WE) + set(the_target "example_${sub}_${name}") + add_executable(${the_target} "${source}") + if(TARGET Threads::Threads AND NOT OPENCV_EXAMPLES_DISABLE_THREADS) + target_link_libraries(${the_target} LINK_PRIVATE Threads::Threads) + endif() + set_target_properties(${the_target} PROPERTIES PROJECT_LABEL "(sample) ${name}") + if(ENABLE_SOLUTION_FOLDERS) + set_target_properties(${the_target} PROPERTIES FOLDER "samples/${sub}") + endif() + if(WIN32 AND MSVC AND NOT BUILD_SHARED_LIBS) + set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG") + endif() + if(WIN32) + install(TARGETS ${the_target} RUNTIME DESTINATION "samples/${sub}" COMPONENT samples) + endif() + # Add single target to build all samples in the group: 'make opencv_samples_cpp' + set(parent_target opencv_samples_${sub}) + if(NOT TARGET ${parent_target}) + add_custom_target(${parent_target}) + if(TARGET opencv_samples) + add_dependencies(opencv_samples ${parent_target}) + endif() + endif() + add_dependencies(${parent_target} ${the_target}) + set(${out_target} ${the_target} PARENT_SCOPE) +endfunction() diff --git a/samples/tapi/CMakeLists.txt b/samples/tapi/CMakeLists.txt index ba0ac69f9d..c44e92dc73 100644 --- a/samples/tapi/CMakeLists.txt +++ b/samples/tapi/CMakeLists.txt @@ -22,5 +22,5 @@ ocv_include_modules_recurse(${OPENCV_TAPI_SAMPLES_REQUIRED_DEPS}) file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) foreach(sample_filename ${all_samples}) ocv_define_sample(tgt ${sample_filename} tapi) - ocv_target_link_libraries(${tgt} ${OPENCV_LINKER_LIBS} ${OPENCV_TAPI_SAMPLES_REQUIRED_DEPS}) + ocv_target_link_libraries(${tgt} LINK_PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_TAPI_SAMPLES_REQUIRED_DEPS}) endforeach() diff --git a/samples/va_intel/CMakeLists.txt b/samples/va_intel/CMakeLists.txt index 04f2ea5fd4..2974d41c8f 100644 --- a/samples/va_intel/CMakeLists.txt +++ b/samples/va_intel/CMakeLists.txt @@ -17,5 +17,5 @@ ocv_include_modules_recurse(${OPENCV_VA_INTEL_SAMPLES_REQUIRED_DEPS}) file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) foreach(sample_filename ${all_samples}) ocv_define_sample(tgt ${sample_filename} va_intel) - ocv_target_link_libraries(${tgt} ${OPENCV_LINKER_LIBS} ${OPENCV_VA_INTEL_SAMPLES_REQUIRED_DEPS} ${VA_LIBRARIES} ${VA_INTEL_LIBRARIES}) + ocv_target_link_libraries(${tgt} LINK_PRIVATE ${OPENCV_LINKER_LIBS} ${OPENCV_VA_INTEL_SAMPLES_REQUIRED_DEPS} ${VA_LIBRARIES} ${VA_INTEL_LIBRARIES}) endforeach()