mirror of
https://github.com/opencv/opencv.git
synced 2024-12-16 02:19:12 +08:00
94d63d4281
The docstring for one of the Python sample programs includes a link to the research paper describing the main algorithm. That link is no longer valid (results in a 404 error) so this update replaces it with another link from the same institution which is currently valid.
199 lines
6.3 KiB
Python
Executable File
199 lines
6.3 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
'''
|
|
MOSSE tracking sample
|
|
|
|
This sample implements correlation-based tracking approach, described in [1].
|
|
|
|
Usage:
|
|
mosse.py [--pause] [<video source>]
|
|
|
|
--pause - Start with playback paused at the first video frame.
|
|
Useful for tracking target selection.
|
|
|
|
Draw rectangles around objects with a mouse to track them.
|
|
|
|
Keys:
|
|
SPACE - pause video
|
|
c - clear targets
|
|
|
|
[1] David S. Bolme et al. "Visual Object Tracking using Adaptive Correlation Filters"
|
|
http://www.cs.colostate.edu/~draper/papers/bolme_cvpr10.pdf
|
|
'''
|
|
|
|
# Python 2/3 compatibility
|
|
from __future__ import print_function
|
|
import sys
|
|
PY3 = sys.version_info[0] == 3
|
|
|
|
if PY3:
|
|
xrange = range
|
|
|
|
import numpy as np
|
|
import cv2
|
|
from common import draw_str, RectSelector
|
|
import video
|
|
|
|
def rnd_warp(a):
|
|
h, w = a.shape[:2]
|
|
T = np.zeros((2, 3))
|
|
coef = 0.2
|
|
ang = (np.random.rand()-0.5)*coef
|
|
c, s = np.cos(ang), np.sin(ang)
|
|
T[:2, :2] = [[c,-s], [s, c]]
|
|
T[:2, :2] += (np.random.rand(2, 2) - 0.5)*coef
|
|
c = (w/2, h/2)
|
|
T[:,2] = c - np.dot(T[:2, :2], c)
|
|
return cv2.warpAffine(a, T, (w, h), borderMode = cv2.BORDER_REFLECT)
|
|
|
|
def divSpec(A, B):
|
|
Ar, Ai = A[...,0], A[...,1]
|
|
Br, Bi = B[...,0], B[...,1]
|
|
C = (Ar+1j*Ai)/(Br+1j*Bi)
|
|
C = np.dstack([np.real(C), np.imag(C)]).copy()
|
|
return C
|
|
|
|
eps = 1e-5
|
|
|
|
class MOSSE:
|
|
def __init__(self, frame, rect):
|
|
x1, y1, x2, y2 = rect
|
|
w, h = map(cv2.getOptimalDFTSize, [x2-x1, y2-y1])
|
|
x1, y1 = (x1+x2-w)//2, (y1+y2-h)//2
|
|
self.pos = x, y = x1+0.5*(w-1), y1+0.5*(h-1)
|
|
self.size = w, h
|
|
img = cv2.getRectSubPix(frame, (w, h), (x, y))
|
|
|
|
self.win = cv2.createHanningWindow((w, h), cv2.CV_32F)
|
|
g = np.zeros((h, w), np.float32)
|
|
g[h//2, w//2] = 1
|
|
g = cv2.GaussianBlur(g, (-1, -1), 2.0)
|
|
g /= g.max()
|
|
|
|
self.G = cv2.dft(g, flags=cv2.DFT_COMPLEX_OUTPUT)
|
|
self.H1 = np.zeros_like(self.G)
|
|
self.H2 = np.zeros_like(self.G)
|
|
for i in xrange(128):
|
|
a = self.preprocess(rnd_warp(img))
|
|
A = cv2.dft(a, flags=cv2.DFT_COMPLEX_OUTPUT)
|
|
self.H1 += cv2.mulSpectrums(self.G, A, 0, conjB=True)
|
|
self.H2 += cv2.mulSpectrums( A, A, 0, conjB=True)
|
|
self.update_kernel()
|
|
self.update(frame)
|
|
|
|
def update(self, frame, rate = 0.125):
|
|
(x, y), (w, h) = self.pos, self.size
|
|
self.last_img = img = cv2.getRectSubPix(frame, (w, h), (x, y))
|
|
img = self.preprocess(img)
|
|
self.last_resp, (dx, dy), self.psr = self.correlate(img)
|
|
self.good = self.psr > 8.0
|
|
if not self.good:
|
|
return
|
|
|
|
self.pos = x+dx, y+dy
|
|
self.last_img = img = cv2.getRectSubPix(frame, (w, h), self.pos)
|
|
img = self.preprocess(img)
|
|
|
|
A = cv2.dft(img, flags=cv2.DFT_COMPLEX_OUTPUT)
|
|
H1 = cv2.mulSpectrums(self.G, A, 0, conjB=True)
|
|
H2 = cv2.mulSpectrums( A, A, 0, conjB=True)
|
|
self.H1 = self.H1 * (1.0-rate) + H1 * rate
|
|
self.H2 = self.H2 * (1.0-rate) + H2 * rate
|
|
self.update_kernel()
|
|
|
|
@property
|
|
def state_vis(self):
|
|
f = cv2.idft(self.H, flags=cv2.DFT_SCALE | cv2.DFT_REAL_OUTPUT )
|
|
h, w = f.shape
|
|
f = np.roll(f, -h//2, 0)
|
|
f = np.roll(f, -w//2, 1)
|
|
kernel = np.uint8( (f-f.min()) / f.ptp()*255 )
|
|
resp = self.last_resp
|
|
resp = np.uint8(np.clip(resp/resp.max(), 0, 1)*255)
|
|
vis = np.hstack([self.last_img, kernel, resp])
|
|
return vis
|
|
|
|
def draw_state(self, vis):
|
|
(x, y), (w, h) = self.pos, self.size
|
|
x1, y1, x2, y2 = int(x-0.5*w), int(y-0.5*h), int(x+0.5*w), int(y+0.5*h)
|
|
cv2.rectangle(vis, (x1, y1), (x2, y2), (0, 0, 255))
|
|
if self.good:
|
|
cv2.circle(vis, (int(x), int(y)), 2, (0, 0, 255), -1)
|
|
else:
|
|
cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255))
|
|
cv2.line(vis, (x2, y1), (x1, y2), (0, 0, 255))
|
|
draw_str(vis, (x1, y2+16), 'PSR: %.2f' % self.psr)
|
|
|
|
def preprocess(self, img):
|
|
img = np.log(np.float32(img)+1.0)
|
|
img = (img-img.mean()) / (img.std()+eps)
|
|
return img*self.win
|
|
|
|
def correlate(self, img):
|
|
C = cv2.mulSpectrums(cv2.dft(img, flags=cv2.DFT_COMPLEX_OUTPUT), self.H, 0, conjB=True)
|
|
resp = cv2.idft(C, flags=cv2.DFT_SCALE | cv2.DFT_REAL_OUTPUT)
|
|
h, w = resp.shape
|
|
_, mval, _, (mx, my) = cv2.minMaxLoc(resp)
|
|
side_resp = resp.copy()
|
|
cv2.rectangle(side_resp, (mx-5, my-5), (mx+5, my+5), 0, -1)
|
|
smean, sstd = side_resp.mean(), side_resp.std()
|
|
psr = (mval-smean) / (sstd+eps)
|
|
return resp, (mx-w//2, my-h//2), psr
|
|
|
|
def update_kernel(self):
|
|
self.H = divSpec(self.H1, self.H2)
|
|
self.H[...,1] *= -1
|
|
|
|
class App:
|
|
def __init__(self, video_src, paused = False):
|
|
self.cap = video.create_capture(video_src)
|
|
_, self.frame = self.cap.read()
|
|
cv2.imshow('frame', self.frame)
|
|
self.rect_sel = RectSelector('frame', self.onrect)
|
|
self.trackers = []
|
|
self.paused = paused
|
|
|
|
def onrect(self, rect):
|
|
frame_gray = cv2.cvtColor(self.frame, cv2.COLOR_BGR2GRAY)
|
|
tracker = MOSSE(frame_gray, rect)
|
|
self.trackers.append(tracker)
|
|
|
|
def run(self):
|
|
while True:
|
|
if not self.paused:
|
|
ret, self.frame = self.cap.read()
|
|
if not ret:
|
|
break
|
|
frame_gray = cv2.cvtColor(self.frame, cv2.COLOR_BGR2GRAY)
|
|
for tracker in self.trackers:
|
|
tracker.update(frame_gray)
|
|
|
|
vis = self.frame.copy()
|
|
for tracker in self.trackers:
|
|
tracker.draw_state(vis)
|
|
if len(self.trackers) > 0:
|
|
cv2.imshow('tracker state', self.trackers[-1].state_vis)
|
|
self.rect_sel.draw(vis)
|
|
|
|
cv2.imshow('frame', vis)
|
|
ch = cv2.waitKey(10)
|
|
if ch == 27:
|
|
break
|
|
if ch == ord(' '):
|
|
self.paused = not self.paused
|
|
if ch == ord('c'):
|
|
self.trackers = []
|
|
|
|
|
|
if __name__ == '__main__':
|
|
print (__doc__)
|
|
import sys, getopt
|
|
opts, args = getopt.getopt(sys.argv[1:], '', ['pause'])
|
|
opts = dict(opts)
|
|
try:
|
|
video_src = args[0]
|
|
except:
|
|
video_src = '0'
|
|
|
|
App(video_src, paused = '--pause' in opts).run()
|