diff --git a/doc/py_tutorials/py_video/py_meanshift/py_meanshift.markdown b/doc/py_tutorials/py_video/py_meanshift/py_meanshift.markdown index b246d76067..3c162aa76e 100644 --- a/doc/py_tutorials/py_video/py_meanshift/py_meanshift.markdown +++ b/doc/py_tutorials/py_video/py_meanshift/py_meanshift.markdown @@ -1,185 +1,4 @@ Meanshift and Camshift {#tutorial_py_meanshift} ====================== -Goal ----- - -In this chapter, - -- We will learn about Meanshift and Camshift algorithms to find and track objects in videos. - -Meanshift ---------- - -The intuition behind the meanshift is simple. Consider you have a set of points. (It can be a pixel -distribution like histogram backprojection). You are given a small window ( may be a circle) and you -have to move that window to the area of maximum pixel density (or maximum number of points). It is -illustrated in the simple image given below: - -![image](images/meanshift_basics.jpg) - -The initial window is shown in blue circle with the name "C1". Its original center is marked in blue -rectangle, named "C1_o". But if you find the centroid of the points inside that window, you will -get the point "C1_r" (marked in small blue circle) which is the real centroid of window. Surely -they don't match. So move your window such that circle of the new window matches with previous -centroid. Again find the new centroid. Most probably, it won't match. So move it again, and continue -the iterations such that center of window and its centroid falls on the same location (or with a -small desired error). So finally what you obtain is a window with maximum pixel distribution. It is -marked with green circle, named "C2". As you can see in image, it has maximum number of points. The -whole process is demonstrated on a static image below: - -![image](images/meanshift_face.gif) - -So we normally pass the histogram backprojected image and initial target location. When the object -moves, obviously the movement is reflected in histogram backprojected image. As a result, meanshift -algorithm moves our window to the new location with maximum density. - -### Meanshift in OpenCV - -To use meanshift in OpenCV, first we need to setup the target, find its histogram so that we can -backproject the target on each frame for calculation of meanshift. We also need to provide initial -location of window. For histogram, only Hue is considered here. Also, to avoid false values due to -low light, low light values are discarded using **cv.inRange()** function. -@code{.py} -import numpy as np -import cv2 as cv - -cap = cv.VideoCapture('slow.flv') - -# take first frame of the video -ret,frame = cap.read() - -# setup initial location of window -r,h,c,w = 250,90,400,125 # simply hardcoded the values -track_window = (c,r,w,h) - -# set up the ROI for tracking -roi = frame[r:r+h, c:c+w] -hsv_roi = cv.cvtColor(roi, cv.COLOR_BGR2HSV) -mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.))) -roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180]) -cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX) - -# Setup the termination criteria, either 10 iteration or move by atleast 1 pt -term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 ) - -while(1): - ret ,frame = cap.read() - - if ret == True: - hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV) - dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1) - - # apply meanshift to get the new location - ret, track_window = cv.meanShift(dst, track_window, term_crit) - - # Draw it on image - x,y,w,h = track_window - img2 = cv.rectangle(frame, (x,y), (x+w,y+h), 255,2) - cv.imshow('img2',img2) - - k = cv.waitKey(60) & 0xff - if k == 27: - break - else: - cv.imwrite(chr(k)+".jpg",img2) - - else: - break - -cv.destroyAllWindows() -cap.release() -@endcode -Three frames in a video I used is given below: - -![image](images/meanshift_result.jpg) - -Camshift --------- - -Did you closely watch the last result? There is a problem. Our window always has the same size when -car is farther away and it is very close to camera. That is not good. We need to adapt the window -size with size and rotation of the target. Once again, the solution came from "OpenCV Labs" and it -is called CAMshift (Continuously Adaptive Meanshift) published by Gary Bradsky in his paper -"Computer Vision Face Tracking for Use in a Perceptual User Interface" in 1998. - -It applies meanshift first. Once meanshift converges, it updates the size of the window as, -\f$s = 2 \times \sqrt{\frac{M_{00}}{256}}\f$. It also calculates the orientation of best fitting ellipse -to it. Again it applies the meanshift with new scaled search window and previous window location. -The process is continued until required accuracy is met. - -![image](images/camshift_face.gif) - -### Camshift in OpenCV - -It is almost same as meanshift, but it returns a rotated rectangle (that is our result) and box -parameters (used to be passed as search window in next iteration). See the code below: -@code{.py} -import numpy as np -import cv2 as cv - -cap = cv.VideoCapture('slow.flv') - -# take first frame of the video -ret,frame = cap.read() - -# setup initial location of window -r,h,c,w = 250,90,400,125 # simply hardcoded the values -track_window = (c,r,w,h) - -# set up the ROI for tracking -roi = frame[r:r+h, c:c+w] -hsv_roi = cv.cvtColor(roi, cv.COLOR_BGR2HSV) -mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.))) -roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180]) -cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX) - -# Setup the termination criteria, either 10 iteration or move by atleast 1 pt -term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 ) - -while(1): - ret ,frame = cap.read() - - if ret == True: - hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV) - dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1) - - # apply meanshift to get the new location - ret, track_window = cv.CamShift(dst, track_window, term_crit) - - # Draw it on image - pts = cv.boxPoints(ret) - pts = np.int0(pts) - img2 = cv.polylines(frame,[pts],True, 255,2) - cv.imshow('img2',img2) - - k = cv.waitKey(60) & 0xff - if k == 27: - break - else: - cv.imwrite(chr(k)+".jpg",img2) - - else: - break - -cv.destroyAllWindows() -cap.release() -@endcode -Three frames of the result is shown below: - -![image](images/camshift_result.jpg) - -Additional Resources --------------------- - --# French Wikipedia page on [Camshift](http://fr.wikipedia.org/wiki/Camshift). (The two animations - are taken from here) -2. Bradski, G.R., "Real time face and object tracking as a component of a perceptual user - interface," Applications of Computer Vision, 1998. WACV '98. Proceedings., Fourth IEEE Workshop - on , vol., no., pp.214,219, 19-21 Oct 1998 - -Exercises ---------- - --# OpenCV comes with a Python sample on interactive demo of camshift. Use it, hack it, understand - it. +Tutorial content has been moved: @ref tutorial_meanshift diff --git a/doc/py_tutorials/py_video/py_table_of_contents_video.markdown b/doc/py_tutorials/py_video/py_table_of_contents_video.markdown index 3a12d951e9..a12b5efcb9 100644 --- a/doc/py_tutorials/py_video/py_table_of_contents_video.markdown +++ b/doc/py_tutorials/py_video/py_table_of_contents_video.markdown @@ -1,7 +1,7 @@ Video Analysis {#tutorial_py_table_of_contents_video} ============== -- @subpage tutorial_py_meanshift +- @ref tutorial_meanshift We have already seen an example of color-based tracking. It is simpler. This time, we see significantly better diff --git a/doc/py_tutorials/py_video/py_meanshift/images/camshift_face.gif b/doc/tutorials/video/meanshift/images/camshift_face.gif similarity index 100% rename from doc/py_tutorials/py_video/py_meanshift/images/camshift_face.gif rename to doc/tutorials/video/meanshift/images/camshift_face.gif diff --git a/doc/py_tutorials/py_video/py_meanshift/images/camshift_result.jpg b/doc/tutorials/video/meanshift/images/camshift_result.jpg similarity index 100% rename from doc/py_tutorials/py_video/py_meanshift/images/camshift_result.jpg rename to doc/tutorials/video/meanshift/images/camshift_result.jpg diff --git a/doc/py_tutorials/py_video/py_meanshift/images/meanshift_basics.jpg b/doc/tutorials/video/meanshift/images/meanshift_basics.jpg similarity index 100% rename from doc/py_tutorials/py_video/py_meanshift/images/meanshift_basics.jpg rename to doc/tutorials/video/meanshift/images/meanshift_basics.jpg diff --git a/doc/py_tutorials/py_video/py_meanshift/images/meanshift_face.gif b/doc/tutorials/video/meanshift/images/meanshift_face.gif similarity index 100% rename from doc/py_tutorials/py_video/py_meanshift/images/meanshift_face.gif rename to doc/tutorials/video/meanshift/images/meanshift_face.gif diff --git a/doc/py_tutorials/py_video/py_meanshift/images/meanshift_result.jpg b/doc/tutorials/video/meanshift/images/meanshift_result.jpg similarity index 100% rename from doc/py_tutorials/py_video/py_meanshift/images/meanshift_result.jpg rename to doc/tutorials/video/meanshift/images/meanshift_result.jpg diff --git a/doc/tutorials/video/meanshift/meanshift.markdown b/doc/tutorials/video/meanshift/meanshift.markdown new file mode 100644 index 0000000000..d2649f6dc1 --- /dev/null +++ b/doc/tutorials/video/meanshift/meanshift.markdown @@ -0,0 +1,118 @@ +Meanshift and Camshift {#tutorial_meanshift} +====================== + +Goal +---- + +In this chapter, + +- We will learn about the Meanshift and Camshift algorithms to track objects in videos. + +Meanshift +--------- + +The intuition behind the meanshift is simple. Consider you have a set of points. (It can be a pixel +distribution like histogram backprojection). You are given a small window (may be a circle) and you +have to move that window to the area of maximum pixel density (or maximum number of points). It is +illustrated in the simple image given below: + +![image](images/meanshift_basics.jpg) + +The initial window is shown in blue circle with the name "C1". Its original center is marked in blue +rectangle, named "C1_o". But if you find the centroid of the points inside that window, you will +get the point "C1_r" (marked in small blue circle) which is the real centroid of the window. Surely +they don't match. So move your window such that the circle of the new window matches with the previous +centroid. Again find the new centroid. Most probably, it won't match. So move it again, and continue +the iterations such that the center of window and its centroid falls on the same location (or within a +small desired error). So finally what you obtain is a window with maximum pixel distribution. It is +marked with a green circle, named "C2". As you can see in the image, it has maximum number of points. The +whole process is demonstrated on a static image below: + +![image](images/meanshift_face.gif) + +So we normally pass the histogram backprojected image and initial target location. When the object +moves, obviously the movement is reflected in the histogram backprojected image. As a result, the meanshift +algorithm moves our window to the new location with maximum density. + +### Meanshift in OpenCV + +To use meanshift in OpenCV, first we need to setup the target, find its histogram so that we can +backproject the target on each frame for calculation of meanshift. We also need to provide an initial +location of window. For histogram, only Hue is considered here. Also, to avoid false values due to +low light, low light values are discarded using **cv.inRange()** function. + +@add_toggle_cpp +- **Downloadable code**: Click + [here](https://github.com/opencv/opencv/tree/3.4/samples/cpp/tutorial_code/video/meanshift/meanshift.cpp) + +- **Code at glance:** + @include samples/cpp/tutorial_code/video/meanshift/meanshift.cpp +@end_toggle + +@add_toggle_python +- **Downloadable code**: Click + [here](https://github.com/opencv/opencv/tree/3.4/samples/python/tutorial_code/video/meanshift/meanshift.py) + +- **Code at glance:** + @include samples/python/tutorial_code/video/meanshift/meanshift.py +@end_toggle + +Three frames in a video I used is given below: + +![image](images/meanshift_result.jpg) + +Camshift +-------- + +Did you closely watch the last result? There is a problem. Our window always has the same size whether +the car is very far or very close to the camera. That is not good. We need to adapt the window +size with size and rotation of the target. Once again, the solution came from "OpenCV Labs" and it +is called CAMshift (Continuously Adaptive Meanshift) published by Gary Bradsky in his paper +"Computer Vision Face Tracking for Use in a Perceptual User Interface" in 1998 @cite Bradski98 . + +It applies meanshift first. Once meanshift converges, it updates the size of the window as, +\f$s = 2 \times \sqrt{\frac{M_{00}}{256}}\f$. It also calculates the orientation of the best fitting ellipse +to it. Again it applies the meanshift with new scaled search window and previous window location. +The process continues until the required accuracy is met. + +![image](images/camshift_face.gif) + +### Camshift in OpenCV + +It is similar to meanshift, but returns a rotated rectangle (that is our result) and box +parameters (used to be passed as search window in next iteration). See the code below: + +@add_toggle_cpp +- **Downloadable code**: Click + [here](https://github.com/opencv/opencv/tree/3.4/samples/cpp/tutorial_code/video/meanshift/camshift.cpp) + +- **Code at glance:** + @include samples/cpp/tutorial_code/video/meanshift/camshift.cpp +@end_toggle + +@add_toggle_python +- **Downloadable code**: Click + [here](https://github.com/opencv/opencv/tree/3.4/samples/python/tutorial_code/video/meanshift/camshift.py) + +- **Code at glance:** + @include samples/python/tutorial_code/video/meanshift/camshift.py +@end_toggle + +Three frames of the result is shown below: + +![image](images/camshift_result.jpg) + +Additional Resources +-------------------- + +-# French Wikipedia page on [Camshift](http://fr.wikipedia.org/wiki/Camshift). (The two animations + are taken from there) +2. Bradski, G.R., "Real time face and object tracking as a component of a perceptual user + interface," Applications of Computer Vision, 1998. WACV '98. Proceedings., Fourth IEEE Workshop + on , vol., no., pp.214,219, 19-21 Oct 1998 + +Exercises +--------- + +-# OpenCV comes with a Python [sample](https://github.com/opencv/opencv/blob/3.4/samples/python/camshift.py) for an interactive demo of camshift. Use it, hack it, understand + it. diff --git a/doc/tutorials/video/table_of_content_video.markdown b/doc/tutorials/video/table_of_content_video.markdown index 2e30d2cc8b..ca37eb09cb 100644 --- a/doc/tutorials/video/table_of_content_video.markdown +++ b/doc/tutorials/video/table_of_content_video.markdown @@ -14,3 +14,9 @@ tracking and foreground extractions. We will learn how to extract foreground masks from both videos and sequences of images and to show them. + +- @subpage tutorial_meanshift + + *Languages:* C++, Python + + Learn how to use the Meanshift and Camshift algorithms to track objects in videos. diff --git a/samples/cpp/tutorial_code/video/meanshift/camshift.cpp b/samples/cpp/tutorial_code/video/meanshift/camshift.cpp new file mode 100644 index 0000000000..13965e623f --- /dev/null +++ b/samples/cpp/tutorial_code/video/meanshift/camshift.cpp @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include +#include + +using namespace cv; +using namespace std; + +int main(int argc, char **argv) +{ + const string about = + "This sample demonstrates the camshift algorithm.\n" + "The example file can be downloaded from:\n" + " https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4"; + const string keys = + "{ h help | | print this help message }" + "{ @image || path to image file }"; + CommandLineParser parser(argc, argv, keys); + parser.about(about); + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + string filename = parser.get("@image"); + if (!parser.check()) + { + parser.printErrors(); + return 0; + } + + VideoCapture capture(filename); + if (!capture.isOpened()){ + //error in opening the video input + cerr << "Unable to open file!" << endl; + return 0; + } + + Mat frame, roi, hsv_roi, mask; + // take first frame of the video + capture >> frame; + + // setup initial location of window + Rect track_window(300, 200, 100, 50); // simply hardcoded the values + + // set up the ROI for tracking + roi = frame(track_window); + cvtColor(roi, hsv_roi, COLOR_BGR2HSV); + inRange(hsv_roi, Scalar(0, 60, 32), Scalar(180, 255, 255), mask); + + float range_[] = {0, 180}; + const float* range[] = {range_}; + Mat roi_hist; + int histSize[] = {180}; + int channels[] = {0}; + calcHist(&hsv_roi, 1, channels, mask, roi_hist, 1, histSize, range); + normalize(roi_hist, roi_hist, 0, 255, NORM_MINMAX); + + // Setup the termination criteria, either 10 iteration or move by atleast 1 pt + TermCriteria term_crit(TermCriteria::EPS | TermCriteria::COUNT, 10, 1); + + while(true){ + Mat hsv, dst; + capture >> frame; + if (frame.empty()) + break; + cvtColor(frame, hsv, COLOR_BGR2HSV); + calcBackProject(&hsv, 1, channels, roi_hist, dst, range); + + // apply camshift to get the new location + RotatedRect rot_rect = CamShift(dst, track_window, term_crit); + + // Draw it on image + Point2f points[4]; + rot_rect.points(points); + for (int i = 0; i < 4; i++) + line(frame, points[i], points[(i+1)%4], 255, 2); + imshow("img2", frame); + + int keyboard = waitKey(30); + if (keyboard == 'q' || keyboard == 27) + break; + } +} diff --git a/samples/cpp/tutorial_code/video/meanshift/meanshift.cpp b/samples/cpp/tutorial_code/video/meanshift/meanshift.cpp new file mode 100644 index 0000000000..0e16442c6d --- /dev/null +++ b/samples/cpp/tutorial_code/video/meanshift/meanshift.cpp @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include + +using namespace cv; +using namespace std; + +int main(int argc, char **argv) +{ + const string about = + "This sample demonstrates the meanshift algorithm.\n" + "The example file can be downloaded from:\n" + " https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4"; + const string keys = + "{ h help | | print this help message }" + "{ @image || path to image file }"; + CommandLineParser parser(argc, argv, keys); + parser.about(about); + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + string filename = parser.get("@image"); + if (!parser.check()) + { + parser.printErrors(); + return 0; + } + + VideoCapture capture(filename); + if (!capture.isOpened()){ + //error in opening the video input + cerr << "Unable to open file!" << endl; + return 0; + } + + Mat frame, roi, hsv_roi, mask; + // take first frame of the video + capture >> frame; + + // setup initial location of window + Rect track_window(300, 200, 100, 50); // simply hardcoded the values + + // set up the ROI for tracking + roi = frame(track_window); + cvtColor(roi, hsv_roi, COLOR_BGR2HSV); + inRange(hsv_roi, Scalar(0, 60, 32), Scalar(180, 255, 255), mask); + + float range_[] = {0, 180}; + const float* range[] = {range_}; + Mat roi_hist; + int histSize[] = {180}; + int channels[] = {0}; + calcHist(&hsv_roi, 1, channels, mask, roi_hist, 1, histSize, range); + normalize(roi_hist, roi_hist, 0, 255, NORM_MINMAX); + + // Setup the termination criteria, either 10 iteration or move by atleast 1 pt + TermCriteria term_crit(TermCriteria::EPS | TermCriteria::COUNT, 10, 1); + + while(true){ + Mat hsv, dst; + capture >> frame; + if (frame.empty()) + break; + cvtColor(frame, hsv, COLOR_BGR2HSV); + calcBackProject(&hsv, 1, channels, roi_hist, dst, range); + + // apply meanshift to get the new location + meanShift(dst, track_window, term_crit); + + // Draw it on image + rectangle(frame, track_window, 255, 2); + imshow("img2", frame); + + int keyboard = waitKey(30); + if (keyboard == 'q' || keyboard == 27) + break; + } +} diff --git a/samples/python/tutorial_code/video/meanshift/camshift.py b/samples/python/tutorial_code/video/meanshift/camshift.py new file mode 100644 index 0000000000..d115bdb4d3 --- /dev/null +++ b/samples/python/tutorial_code/video/meanshift/camshift.py @@ -0,0 +1,50 @@ +import numpy as np +import cv2 as cv +import argparse + +parser = argparse.ArgumentParser(description='This sample demonstrates the camshift algorithm. \ + The example file can be downloaded from: \ + https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4') +parser.add_argument('image', type=str, help='path to image file') +args = parser.parse_args() + +cap = cv.VideoCapture(args.image) + +# take first frame of the video +ret,frame = cap.read() + +# setup initial location of window +x, y, w, h = 300, 200, 100, 50 # simply hardcoded the values +track_window = (x, y, w, h) + +# set up the ROI for tracking +roi = frame[y:y+h, x:x+w] +hsv_roi = cv.cvtColor(roi, cv.COLOR_BGR2HSV) +mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.))) +roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180]) +cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX) + +# Setup the termination criteria, either 10 iteration or move by atleast 1 pt +term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 ) + +while(1): + ret, frame = cap.read() + + if ret == True: + hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV) + dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1) + + # apply camshift to get the new location + ret, track_window = cv.CamShift(dst, track_window, term_crit) + + # Draw it on image + pts = cv.boxPoints(ret) + pts = np.int0(pts) + img2 = cv.polylines(frame,[pts],True, 255,2) + cv.imshow('img2',img2) + + k = cv.waitKey(30) & 0xff + if k == 27: + break + else: + break diff --git a/samples/python/tutorial_code/video/meanshift/meanshift.py b/samples/python/tutorial_code/video/meanshift/meanshift.py new file mode 100644 index 0000000000..f765b023e9 --- /dev/null +++ b/samples/python/tutorial_code/video/meanshift/meanshift.py @@ -0,0 +1,49 @@ +import numpy as np +import cv2 as cv +import argparse + +parser = argparse.ArgumentParser(description='This sample demonstrates the meanshift algorithm. \ + The example file can be downloaded from: \ + https://www.bogotobogo.com/python/OpenCV_Python/images/mean_shift_tracking/slow_traffic_small.mp4') +parser.add_argument('image', type=str, help='path to image file') +args = parser.parse_args() + +cap = cv.VideoCapture(args.image) + +# take first frame of the video +ret,frame = cap.read() + +# setup initial location of window +x, y, w, h = 300, 200, 100, 50 # simply hardcoded the values +track_window = (x, y, w, h) + +# set up the ROI for tracking +roi = frame[y:y+h, x:x+w] +hsv_roi = cv.cvtColor(roi, cv.COLOR_BGR2HSV) +mask = cv.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.))) +roi_hist = cv.calcHist([hsv_roi],[0],mask,[180],[0,180]) +cv.normalize(roi_hist,roi_hist,0,255,cv.NORM_MINMAX) + +# Setup the termination criteria, either 10 iteration or move by atleast 1 pt +term_crit = ( cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 1 ) + +while(1): + ret, frame = cap.read() + + if ret == True: + hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV) + dst = cv.calcBackProject([hsv],[0],roi_hist,[0,180],1) + + # apply meanshift to get the new location + ret, track_window = cv.meanShift(dst, track_window, term_crit) + + # Draw it on image + x,y,w,h = track_window + img2 = cv.rectangle(frame, (x,y), (x+w,y+h), 255,2) + cv.imshow('img2',img2) + + k = cv.waitKey(30) & 0xff + if k == 27: + break + else: + break