mirror of
https://github.com/opencv/opencv.git
synced 2025-01-18 22:44:02 +08:00
Merge pull request #22808 from zihaomu:nanotrack
[teset data in opencv_extra](https://github.com/opencv/opencv_extra/pull/1016) NanoTrack is an extremely lightweight and fast object-tracking model. The total size is **1.1 MB**. And the FPS on M1 chip is **150**, on Raspberry Pi 4 is about **30**. (Float32 CPU only) With this model, many users can run object tracking on the edge device. The author of NanoTrack is @HonglinChu. The original repo is https://github.com/HonglinChu/NanoTrack. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
b16f76eede
commit
cb8f1dca3b
@ -849,6 +849,43 @@ public:
|
||||
//bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;
|
||||
};
|
||||
|
||||
/** @brief the Nano tracker is a super lightweight dnn-based general object tracking.
|
||||
*
|
||||
* Nano tracker is much faster and extremely lightweight due to special model structure, the whole model size is about 1.1 MB.
|
||||
* Nano tracker needs two models: one for feature extraction (backbone) and the another for localization (neckhead).
|
||||
* Please download these two onnx models at:https://github.com/HonglinChu/SiamTrackers/tree/master/NanoTrack/models/onnx.
|
||||
* Original repo is here: https://github.com/HonglinChu/NanoTrack
|
||||
* Author:HongLinChu, 1628464345@qq.com
|
||||
*/
|
||||
class CV_EXPORTS_W TrackerNano : public Tracker
|
||||
{
|
||||
protected:
|
||||
TrackerNano(); // use ::create()
|
||||
public:
|
||||
virtual ~TrackerNano() CV_OVERRIDE;
|
||||
|
||||
struct CV_EXPORTS_W_SIMPLE Params
|
||||
{
|
||||
CV_WRAP Params();
|
||||
CV_PROP_RW std::string backbone;
|
||||
CV_PROP_RW std::string neckhead;
|
||||
CV_PROP_RW int backend;
|
||||
CV_PROP_RW int target;
|
||||
};
|
||||
|
||||
/** @brief Constructor
|
||||
@param parameters NanoTrack parameters TrackerNano::Params
|
||||
*/
|
||||
static CV_WRAP
|
||||
Ptr<TrackerNano> create(const TrackerNano::Params& parameters = TrackerNano::Params());
|
||||
|
||||
/** @brief Return tracking score
|
||||
*/
|
||||
CV_WRAP virtual float getTrackingScore() = 0;
|
||||
|
||||
//void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
|
||||
//bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;
|
||||
};
|
||||
|
||||
//! @} video_track
|
||||
|
||||
|
@ -2,4 +2,5 @@
|
||||
typedef TrackerMIL::Params TrackerMIL_Params;
|
||||
typedef TrackerGOTURN::Params TrackerGOTURN_Params;
|
||||
typedef TrackerDaSiamRPN::Params TrackerDaSiamRPN_Params;
|
||||
typedef TrackerNano::Params TrackerNano_Params;
|
||||
#endif
|
||||
|
359
modules/video/src/tracking/tracker_nano.cpp
Normal file
359
modules/video/src/tracking/tracker_nano.cpp
Normal file
@ -0,0 +1,359 @@
|
||||
// This file is part of OpenCV project.
|
||||
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
||||
// of this distribution and at http://opencv.org/license.html.
|
||||
|
||||
// This file is modified from the https://github.com/HonglinChu/NanoTrack/blob/master/ncnn_macos_nanotrack/nanotrack.cpp
|
||||
// Author, HongLinChu, 1628464345@qq.com
|
||||
// Adapt to OpenCV, ZihaoMu: zihaomu@outlook.com
|
||||
|
||||
// Link to original inference code: https://github.com/HonglinChu/NanoTrack
|
||||
// Link to original training repo: https://github.com/HonglinChu/SiamTrackers/tree/master/NanoTrack
|
||||
|
||||
#include "../precomp.hpp"
|
||||
#ifdef HAVE_OPENCV_DNN
|
||||
#include "opencv2/dnn.hpp"
|
||||
#endif
|
||||
|
||||
namespace cv {
|
||||
|
||||
TrackerNano::TrackerNano()
|
||||
{
|
||||
// nothing
|
||||
}
|
||||
|
||||
TrackerNano::~TrackerNano()
|
||||
{
|
||||
// nothing
|
||||
}
|
||||
|
||||
TrackerNano::Params::Params()
|
||||
{
|
||||
backbone = "backbone.onnx";
|
||||
neckhead = "neckhead.onnx";
|
||||
#ifdef HAVE_OPENCV_DNN
|
||||
backend = dnn::DNN_BACKEND_DEFAULT;
|
||||
target = dnn::DNN_TARGET_CPU;
|
||||
#else
|
||||
backend = -1; // invalid value
|
||||
target = -1; // invalid value
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCV_DNN
|
||||
static void softmax(const Mat& src, Mat& dst)
|
||||
{
|
||||
Mat maxVal;
|
||||
cv::max(src.row(1), src.row(0), maxVal);
|
||||
|
||||
src.row(1) -= maxVal;
|
||||
src.row(0) -= maxVal;
|
||||
|
||||
exp(src, dst);
|
||||
|
||||
Mat sumVal = dst.row(0) + dst.row(1);
|
||||
dst.row(0) = dst.row(0) / sumVal;
|
||||
dst.row(1) = dst.row(1) / sumVal;
|
||||
}
|
||||
|
||||
static float sizeCal(float w, float h)
|
||||
{
|
||||
float pad = (w + h) * 0.5f;
|
||||
float sz2 = (w + pad) * (h + pad);
|
||||
return sqrt(sz2);
|
||||
}
|
||||
|
||||
static Mat sizeCal(const Mat& w, const Mat& h)
|
||||
{
|
||||
Mat pad = (w + h) * 0.5;
|
||||
Mat sz2 = (w + pad).mul((h + pad));
|
||||
|
||||
cv::sqrt(sz2, sz2);
|
||||
return sz2;
|
||||
}
|
||||
|
||||
// Similar python code: r = np.maximum(r, 1. / r) # r is matrix
|
||||
static void elementReciprocalMax(Mat& srcDst)
|
||||
{
|
||||
size_t totalV = srcDst.total();
|
||||
float* ptr = srcDst.ptr<float>(0);
|
||||
for (size_t i = 0; i < totalV; i++)
|
||||
{
|
||||
float val = *(ptr + i);
|
||||
*(ptr + i) = std::max(val, 1.0f/val);
|
||||
}
|
||||
}
|
||||
|
||||
class TrackerNanoImpl : public TrackerNano
|
||||
{
|
||||
public:
|
||||
TrackerNanoImpl(const TrackerNano::Params& parameters)
|
||||
: params(parameters)
|
||||
{
|
||||
backbone = dnn::readNet(params.backbone);
|
||||
neckhead = dnn::readNet(params.neckhead);
|
||||
|
||||
CV_Assert(!backbone.empty());
|
||||
CV_Assert(!neckhead.empty());
|
||||
|
||||
backbone.setPreferableBackend(params.backend);
|
||||
backbone.setPreferableTarget(params.target);
|
||||
neckhead.setPreferableBackend(params.backend);
|
||||
neckhead.setPreferableTarget(params.target);
|
||||
}
|
||||
|
||||
void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
|
||||
bool update(InputArray image, Rect& boundingBox) CV_OVERRIDE;
|
||||
float getTrackingScore() CV_OVERRIDE;
|
||||
|
||||
// Save the target bounding box for each frame.
|
||||
std::vector<float> targetSz = {0, 0}; // H and W of bounding box
|
||||
std::vector<float> targetPos = {0, 0}; // center point of bounding box (x, y)
|
||||
float tracking_score;
|
||||
|
||||
TrackerNano::Params params;
|
||||
|
||||
struct trackerConfig
|
||||
{
|
||||
float windowInfluence = 0.455f;
|
||||
float lr = 0.37f;
|
||||
float contextAmount = 0.5;
|
||||
bool swapRB = true;
|
||||
int totalStride = 16;
|
||||
float penaltyK = 0.055f;
|
||||
};
|
||||
|
||||
protected:
|
||||
const int exemplarSize = 127;
|
||||
const int instanceSize = 255;
|
||||
|
||||
trackerConfig trackState;
|
||||
int scoreSize;
|
||||
Size imgSize = {0, 0};
|
||||
Mat hanningWindow;
|
||||
Mat grid2searchX, grid2searchY;
|
||||
|
||||
dnn::Net backbone, neckhead;
|
||||
Mat image;
|
||||
|
||||
void getSubwindow(Mat& dstCrop, Mat& srcImg, int originalSz, int resizeSz);
|
||||
void generateGrids();
|
||||
};
|
||||
|
||||
void TrackerNanoImpl::generateGrids()
|
||||
{
|
||||
int sz = scoreSize;
|
||||
const int sz2 = sz / 2;
|
||||
|
||||
std::vector<float> x1Vec(sz, 0);
|
||||
|
||||
for (int i = 0; i < sz; i++)
|
||||
{
|
||||
x1Vec[i] = i - sz2;
|
||||
}
|
||||
|
||||
Mat x1M(1, sz, CV_32FC1, x1Vec.data());
|
||||
|
||||
cv::repeat(x1M, sz, 1, grid2searchX);
|
||||
cv::repeat(x1M.t(), 1, sz, grid2searchY);
|
||||
|
||||
grid2searchX *= trackState.totalStride;
|
||||
grid2searchY *= trackState.totalStride;
|
||||
|
||||
grid2searchX += instanceSize/2;
|
||||
grid2searchY += instanceSize/2;
|
||||
}
|
||||
|
||||
void TrackerNanoImpl::init(InputArray image_, const Rect &boundingBox_)
|
||||
{
|
||||
scoreSize = (instanceSize - exemplarSize) / trackState.totalStride + 8;
|
||||
trackState = trackerConfig();
|
||||
image = image_.getMat().clone();
|
||||
|
||||
// convert Rect2d from left-up to center.
|
||||
targetPos[0] = float(boundingBox_.x) + float(boundingBox_.width) * 0.5f;
|
||||
targetPos[1] = float(boundingBox_.y) + float(boundingBox_.height) * 0.5f;
|
||||
|
||||
targetSz[0] = float(boundingBox_.width);
|
||||
targetSz[1] = float(boundingBox_.height);
|
||||
|
||||
imgSize = image.size();
|
||||
|
||||
// Extent the bounding box.
|
||||
float sumSz = targetSz[0] + targetSz[1];
|
||||
float wExtent = targetSz[0] + trackState.contextAmount * (sumSz);
|
||||
float hExtent = targetSz[1] + trackState.contextAmount * (sumSz);
|
||||
int sz = int(cv::sqrt(wExtent * hExtent));
|
||||
|
||||
Mat crop;
|
||||
getSubwindow(crop, image, sz, exemplarSize);
|
||||
Mat blob = dnn::blobFromImage(crop, 1.0, Size(), Scalar(), trackState.swapRB);
|
||||
|
||||
backbone.setInput(blob);
|
||||
Mat out = backbone.forward(); // Feature extraction.
|
||||
neckhead.setInput(out, "input1");
|
||||
|
||||
createHanningWindow(hanningWindow, Size(scoreSize, scoreSize), CV_32F);
|
||||
generateGrids();
|
||||
}
|
||||
|
||||
void TrackerNanoImpl::getSubwindow(Mat& dstCrop, Mat& srcImg, int originalSz, int resizeSz)
|
||||
{
|
||||
Scalar avgChans = mean(srcImg);
|
||||
Size imgSz = srcImg.size();
|
||||
int c = (originalSz + 1) / 2;
|
||||
|
||||
int context_xmin = targetPos[0] - c;
|
||||
int context_xmax = context_xmin + originalSz - 1;
|
||||
int context_ymin = targetPos[1] - c;
|
||||
int context_ymax = context_ymin + originalSz - 1;
|
||||
|
||||
int left_pad = std::max(0, -context_xmin);
|
||||
int top_pad = std::max(0, -context_ymin);
|
||||
int right_pad = std::max(0, context_xmax - imgSz.width + 1);
|
||||
int bottom_pad = std::max(0, context_ymax - imgSz.height + 1);
|
||||
|
||||
context_xmin += left_pad;
|
||||
context_xmax += left_pad;
|
||||
context_ymin += top_pad;
|
||||
context_ymax += top_pad;
|
||||
|
||||
Mat cropImg;
|
||||
if (left_pad == 0 && top_pad == 0 && right_pad == 0 && bottom_pad == 0)
|
||||
{
|
||||
// Crop image without padding.
|
||||
cropImg = srcImg(cv::Rect(context_xmin, context_ymin,
|
||||
context_xmax - context_xmin + 1, context_ymax - context_ymin + 1));
|
||||
}
|
||||
else // Crop image with padding, and the padding value is avgChans
|
||||
{
|
||||
cv::Mat tmpMat;
|
||||
cv::copyMakeBorder(srcImg, tmpMat, top_pad, bottom_pad, left_pad, right_pad, cv::BORDER_CONSTANT, avgChans);
|
||||
cropImg = tmpMat(cv::Rect(context_xmin, context_ymin, context_xmax - context_xmin + 1, context_ymax - context_ymin + 1));
|
||||
}
|
||||
resize(cropImg, dstCrop, Size(resizeSz, resizeSz));
|
||||
}
|
||||
|
||||
bool TrackerNanoImpl::update(InputArray image_, Rect &boundingBoxRes)
|
||||
{
|
||||
image = image_.getMat().clone();
|
||||
int targetSzSum = targetSz[0] + targetSz[1];
|
||||
|
||||
float wc = targetSz[0] + trackState.contextAmount * targetSzSum;
|
||||
float hc = targetSz[1] + trackState.contextAmount * targetSzSum;
|
||||
float sz = cv::sqrt(wc * hc);
|
||||
float scale_z = exemplarSize / sz;
|
||||
float sx = sz * (instanceSize / exemplarSize);
|
||||
targetSz[0] *= scale_z;
|
||||
targetSz[1] *= scale_z;
|
||||
|
||||
Mat crop;
|
||||
getSubwindow(crop, image, int(sx), instanceSize);
|
||||
|
||||
Mat blob = dnn::blobFromImage(crop, 1.0, Size(), Scalar(), trackState.swapRB);
|
||||
backbone.setInput(blob);
|
||||
Mat xf = backbone.forward();
|
||||
neckhead.setInput(xf, "input2");
|
||||
std::vector<String> outputName = {"output1", "output2"};
|
||||
std::vector<Mat> outs;
|
||||
neckhead.forward(outs, outputName);
|
||||
|
||||
CV_Assert(outs.size() == 2);
|
||||
|
||||
Mat clsScore = outs[0]; // 1x2x16x16
|
||||
Mat bboxPred = outs[1]; // 1x4x16x16
|
||||
|
||||
clsScore = clsScore.reshape(0, {2, scoreSize, scoreSize});
|
||||
bboxPred = bboxPred.reshape(0, {4, scoreSize, scoreSize});
|
||||
|
||||
Mat scoreSoftmax; // 2x16x16
|
||||
softmax(clsScore, scoreSoftmax);
|
||||
|
||||
Mat score = scoreSoftmax.row(1);
|
||||
score = score.reshape(0, {scoreSize, scoreSize});
|
||||
|
||||
Mat predX1 = grid2searchX - bboxPred.row(0).reshape(0, {scoreSize, scoreSize});
|
||||
Mat predY1 = grid2searchY - bboxPred.row(1).reshape(0, {scoreSize, scoreSize});
|
||||
Mat predX2 = grid2searchX + bboxPred.row(2).reshape(0, {scoreSize, scoreSize});
|
||||
Mat predY2 = grid2searchY + bboxPred.row(3).reshape(0, {scoreSize, scoreSize});
|
||||
|
||||
// size penalty
|
||||
// scale penalty
|
||||
Mat sc = sizeCal(predX2 - predX1, predY2 - predY1)/sizeCal(targetPos[0], targetPos[1]);
|
||||
elementReciprocalMax(sc);
|
||||
|
||||
// ratio penalty
|
||||
float ratioVal = targetSz[0] / targetSz[1];
|
||||
|
||||
Mat ratioM(scoreSize, scoreSize, CV_32FC1, Scalar::all(ratioVal));
|
||||
Mat rc = ratioM / ((predX2 - predX1) / (predY2 - predY1));
|
||||
elementReciprocalMax(rc);
|
||||
|
||||
Mat penalty;
|
||||
exp(((rc.mul(sc) - 1) * trackState.penaltyK * (-1)), penalty);
|
||||
Mat pscore = penalty.mul(score);
|
||||
|
||||
// Window penalty
|
||||
pscore = pscore * (1.0 - trackState.windowInfluence) + hanningWindow * trackState.windowInfluence;
|
||||
|
||||
// get Max
|
||||
int bestID[2] = { 0, 0 };
|
||||
minMaxIdx(pscore, 0, 0, 0, bestID);
|
||||
|
||||
tracking_score = pscore.at<float>(bestID);
|
||||
|
||||
float x1Val = predX1.at<float>(bestID);
|
||||
float x2Val = predX2.at<float>(bestID);
|
||||
float y1Val = predY1.at<float>(bestID);
|
||||
float y2Val = predY2.at<float>(bestID);
|
||||
|
||||
float predXs = (x1Val + x2Val)/2;
|
||||
float predYs = (y1Val + y2Val)/2;
|
||||
float predW = (x2Val - x1Val)/scale_z;
|
||||
float predH = (y2Val - y1Val)/scale_z;
|
||||
|
||||
float diffXs = (predXs - instanceSize / 2) / scale_z;
|
||||
float diffYs = (predYs - instanceSize / 2) / scale_z;
|
||||
|
||||
targetSz[0] /= scale_z;
|
||||
targetSz[1] /= scale_z;
|
||||
|
||||
float lr = penalty.at<float>(bestID) * score.at<float>(bestID) * trackState.lr;
|
||||
|
||||
float resX = targetPos[0] + diffXs;
|
||||
float resY = targetPos[1] + diffYs;
|
||||
float resW = predW * lr + (1 - lr) * targetSz[0];
|
||||
float resH = predH * lr + (1 - lr) * targetSz[1];
|
||||
|
||||
resX = std::max(0.f, std::min((float)imgSize.width, resX));
|
||||
resY = std::max(0.f, std::min((float)imgSize.height, resY));
|
||||
resW = std::max(10.f, std::min((float)imgSize.width, resW));
|
||||
resH = std::max(10.f, std::min((float)imgSize.height, resH));
|
||||
|
||||
targetPos[0] = resX;
|
||||
targetPos[1] = resY;
|
||||
targetSz[0] = resW;
|
||||
targetSz[1] = resH;
|
||||
|
||||
// convert center to Rect.
|
||||
boundingBoxRes = { int(resX - resW/2), int(resY - resH/2), int(resW), int(resH)};
|
||||
return true;
|
||||
}
|
||||
|
||||
float TrackerNanoImpl::getTrackingScore()
|
||||
{
|
||||
return tracking_score;
|
||||
}
|
||||
|
||||
Ptr<TrackerNano> TrackerNano::create(const TrackerNano::Params& parameters)
|
||||
{
|
||||
return makePtr<TrackerNanoImpl>(parameters);
|
||||
}
|
||||
|
||||
#else // OPENCV_HAVE_DNN
|
||||
Ptr<TrackerNano> TrackerNano::create(const TrackerNano::Params& parameters)
|
||||
{
|
||||
CV_UNUSED(parameters);
|
||||
CV_Error(cv::Error::StsNotImplemented, "to use NanoTrack, the tracking module needs to be built with opencv_dnn !");
|
||||
}
|
||||
#endif // OPENCV_HAVE_DNN
|
||||
}
|
@ -64,40 +64,67 @@ TEST_P(DistanceAndOverlap, GOTURN)
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Tracking, DistanceAndOverlap, TESTSET_NAMES);
|
||||
|
||||
TEST(GOTURN, memory_usage)
|
||||
static bool checkIOU(const Rect& r0, const Rect& r1, double threshold)
|
||||
{
|
||||
cv::Rect roi(145, 70, 85, 85);
|
||||
int interArea = (r0 & r1).area();
|
||||
double iouVal = (interArea * 1.0 )/ (r0.area() + r1.area() - interArea);;
|
||||
|
||||
if (iouVal > threshold)
|
||||
return true;
|
||||
else
|
||||
{
|
||||
std::cout <<"Unmatched IOU: expect IOU val ("<<iouVal <<") > the IOU threadhold ("<<threshold<<")! Box 0 is "
|
||||
<< r0 <<", and Box 1 is "<<r1<< std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void checkTrackingAccuracy(cv::Ptr<Tracker>& tracker, double iouThreshold = 0.8)
|
||||
{
|
||||
// Template image
|
||||
Mat img0 = imread(findDataFile("tracking/bag/00000001.jpg"), 1);
|
||||
|
||||
// Tracking image sequence.
|
||||
std::vector<Mat> imgs;
|
||||
imgs.push_back(imread(findDataFile("tracking/bag/00000002.jpg"), 1));
|
||||
imgs.push_back(imread(findDataFile("tracking/bag/00000003.jpg"), 1));
|
||||
imgs.push_back(imread(findDataFile("tracking/bag/00000004.jpg"), 1));
|
||||
imgs.push_back(imread(findDataFile("tracking/bag/00000005.jpg"), 1));
|
||||
imgs.push_back(imread(findDataFile("tracking/bag/00000006.jpg"), 1));
|
||||
|
||||
cv::Rect roi(325, 164, 100, 100);
|
||||
std::vector<Rect> targetRois;
|
||||
targetRois.push_back(cv::Rect(278, 133, 99, 104));
|
||||
targetRois.push_back(cv::Rect(293, 88, 93, 110));
|
||||
targetRois.push_back(cv::Rect(287, 76, 89, 116));
|
||||
targetRois.push_back(cv::Rect(297, 74, 82, 122));
|
||||
targetRois.push_back(cv::Rect(311, 83, 78, 125));
|
||||
|
||||
tracker->init(img0, roi);
|
||||
CV_Assert(targetRois.size() == imgs.size());
|
||||
|
||||
for (int i = 0; i < (int)imgs.size(); i++)
|
||||
{
|
||||
bool res = tracker->update(imgs[i], roi);
|
||||
ASSERT_TRUE(res);
|
||||
ASSERT_TRUE(checkIOU(roi, targetRois[i], iouThreshold)) << cv::format("Fail at img %d.",i);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GOTURN, accuracy)
|
||||
{
|
||||
std::string model = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.prototxt");
|
||||
std::string weights = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.caffemodel", false);
|
||||
cv::TrackerGOTURN::Params params;
|
||||
params.modelTxt = model;
|
||||
params.modelBin = weights;
|
||||
cv::Ptr<Tracker> tracker = TrackerGOTURN::create(params);
|
||||
|
||||
string inputVideo = cvtest::findDataFile("tracking/david/data/david.webm");
|
||||
cv::VideoCapture video(inputVideo);
|
||||
ASSERT_TRUE(video.isOpened()) << inputVideo;
|
||||
|
||||
cv::Mat frame;
|
||||
video >> frame;
|
||||
ASSERT_FALSE(frame.empty()) << inputVideo;
|
||||
tracker->init(frame, roi);
|
||||
string ground_truth_bb;
|
||||
for (int nframes = 0; nframes < 15; ++nframes)
|
||||
{
|
||||
std::cout << "Frame: " << nframes << std::endl;
|
||||
video >> frame;
|
||||
bool res = tracker->update(frame, roi);
|
||||
ASSERT_TRUE(res);
|
||||
std::cout << "Predicted ROI: " << roi << std::endl;
|
||||
}
|
||||
// TODO! GOTURN have low accuracy. Try to remove this api at 5.x.
|
||||
checkTrackingAccuracy(tracker, 0.08);
|
||||
}
|
||||
|
||||
TEST(DaSiamRPN, memory_usage)
|
||||
TEST(DaSiamRPN, accuracy)
|
||||
{
|
||||
cv::Rect roi(145, 70, 85, 85);
|
||||
|
||||
std::string model = cvtest::findDataFile("dnn/onnx/models/dasiamrpn_model.onnx", false);
|
||||
std::string kernel_r1 = cvtest::findDataFile("dnn/onnx/models/dasiamrpn_kernel_r1.onnx", false);
|
||||
std::string kernel_cls1 = cvtest::findDataFile("dnn/onnx/models/dasiamrpn_kernel_cls1.onnx", false);
|
||||
@ -106,24 +133,18 @@ TEST(DaSiamRPN, memory_usage)
|
||||
params.kernel_r1 = kernel_r1;
|
||||
params.kernel_cls1 = kernel_cls1;
|
||||
cv::Ptr<Tracker> tracker = TrackerDaSiamRPN::create(params);
|
||||
|
||||
string inputVideo = cvtest::findDataFile("tracking/david/data/david.webm");
|
||||
cv::VideoCapture video(inputVideo);
|
||||
ASSERT_TRUE(video.isOpened()) << inputVideo;
|
||||
|
||||
cv::Mat frame;
|
||||
video >> frame;
|
||||
ASSERT_FALSE(frame.empty()) << inputVideo;
|
||||
tracker->init(frame, roi);
|
||||
string ground_truth_bb;
|
||||
for (int nframes = 0; nframes < 15; ++nframes)
|
||||
{
|
||||
std::cout << "Frame: " << nframes << std::endl;
|
||||
video >> frame;
|
||||
bool res = tracker->update(frame, roi);
|
||||
ASSERT_TRUE(res);
|
||||
std::cout << "Predicted ROI: " << roi << std::endl;
|
||||
}
|
||||
checkTrackingAccuracy(tracker, 0.7);
|
||||
}
|
||||
|
||||
TEST(NanoTrack, accuracy)
|
||||
{
|
||||
std::string backbonePath = cvtest::findDataFile("dnn/onnx/models/nanotrack_backbone_sim.onnx", false);
|
||||
std::string neckheadPath = cvtest::findDataFile("dnn/onnx/models/nanotrack_head_sim.onnx", false);
|
||||
|
||||
cv::TrackerNano::Params params;
|
||||
params.backbone = backbonePath;
|
||||
params.neckhead = neckheadPath;
|
||||
cv::Ptr<Tracker> tracker = TrackerNano::create(params);
|
||||
checkTrackingAccuracy(tracker);
|
||||
}
|
||||
}} // namespace opencv_test::
|
||||
|
183
samples/dnn/nanotrack_tracker.cpp
Normal file
183
samples/dnn/nanotrack_tracker.cpp
Normal file
@ -0,0 +1,183 @@
|
||||
// NanoTrack
|
||||
// Link to original inference code: https://github.com/HonglinChu/NanoTrack
|
||||
// Link to original training repo: https://github.com/HonglinChu/SiamTrackers/tree/master/NanoTrack
|
||||
// backBone model: https://github.com/HonglinChu/SiamTrackers/blob/master/NanoTrack/models/onnx/nanotrack_backbone_sim.onnx
|
||||
// headNeck model: https://github.com/HonglinChu/SiamTrackers/blob/master/NanoTrack/models/onnx/nanotrack_head_sim.onnx
|
||||
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
#include <opencv2/dnn.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/video.hpp>
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::dnn;
|
||||
|
||||
const char *keys =
|
||||
"{ help h | | Print help message }"
|
||||
"{ input i | | Full path to input video folder, the specific camera index. (empty for camera 0) }"
|
||||
"{ backbone | backbone.onnx | Path to onnx model of backbone.onnx}"
|
||||
"{ headneck | headneck.onnx | Path to onnx model of headneck.onnx }"
|
||||
"{ backend | 0 | Choose one of computation backends: "
|
||||
"0: automatically (by default), "
|
||||
"1: Halide language (http://halide-lang.org/), "
|
||||
"2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
|
||||
"3: OpenCV implementation, "
|
||||
"4: VKCOM, "
|
||||
"5: CUDA },"
|
||||
"{ target | 0 | Choose one of target computation devices: "
|
||||
"0: CPU target (by default), "
|
||||
"1: OpenCL, "
|
||||
"2: OpenCL fp16 (half-float precision), "
|
||||
"3: VPU, "
|
||||
"4: Vulkan, "
|
||||
"6: CUDA, "
|
||||
"7: CUDA fp16 (half-float preprocess) }"
|
||||
;
|
||||
|
||||
static
|
||||
int run(int argc, char** argv)
|
||||
{
|
||||
// Parse command line arguments.
|
||||
CommandLineParser parser(argc, argv, keys);
|
||||
|
||||
if (parser.has("help"))
|
||||
{
|
||||
parser.printMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::string inputName = parser.get<String>("input");
|
||||
std::string backbone = parser.get<String>("backbone");
|
||||
std::string headneck = parser.get<String>("headneck");
|
||||
int backend = parser.get<int>("backend");
|
||||
int target = parser.get<int>("target");
|
||||
|
||||
Ptr<TrackerNano> tracker;
|
||||
try
|
||||
{
|
||||
TrackerNano::Params params;
|
||||
params.backbone = samples::findFile(backbone);
|
||||
params.neckhead = samples::findFile(headneck);
|
||||
params.backend = backend;
|
||||
params.target = target;
|
||||
tracker = TrackerNano::create(params);
|
||||
}
|
||||
catch (const cv::Exception& ee)
|
||||
{
|
||||
std::cerr << "Exception: " << ee.what() << std::endl;
|
||||
std::cout << "Can't load the network by using the following files:" << std::endl;
|
||||
std::cout << "backbone : " << backbone << std::endl;
|
||||
std::cout << "headneck : " << headneck << std::endl;
|
||||
return 2;
|
||||
}
|
||||
|
||||
const std::string winName = "NanoTrack";
|
||||
namedWindow(winName, WINDOW_AUTOSIZE);
|
||||
|
||||
// Open a video file or an image file or a camera stream.
|
||||
VideoCapture cap;
|
||||
|
||||
if (inputName.empty() || (isdigit(inputName[0]) && inputName.size() == 1))
|
||||
{
|
||||
int c = inputName.empty() ? 0 : inputName[0] - '0';
|
||||
std::cout << "Trying to open camera #" << c << " ..." << std::endl;
|
||||
if (!cap.open(c))
|
||||
{
|
||||
std::cout << "Capture from camera #" << c << " didn't work. Specify -i=<video> parameter to read from video file" << std::endl;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
else if (inputName.size())
|
||||
{
|
||||
inputName = samples::findFileOrKeep(inputName);
|
||||
if (!cap.open(inputName))
|
||||
{
|
||||
std::cout << "Could not open: " << inputName << std::endl;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Read the first image.
|
||||
Mat image;
|
||||
cap >> image;
|
||||
if (image.empty())
|
||||
{
|
||||
std::cerr << "Can't capture frame!" << std::endl;
|
||||
return 2;
|
||||
}
|
||||
|
||||
Mat image_select = image.clone();
|
||||
putText(image_select, "Select initial bounding box you want to track.", Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
putText(image_select, "And Press the ENTER key.", Point(0, 35), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
|
||||
Rect selectRect = selectROI(winName, image_select);
|
||||
std::cout << "ROI=" << selectRect << std::endl;
|
||||
|
||||
tracker->init(image, selectRect);
|
||||
|
||||
TickMeter tickMeter;
|
||||
|
||||
for (int count = 0; ; ++count)
|
||||
{
|
||||
cap >> image;
|
||||
if (image.empty())
|
||||
{
|
||||
std::cerr << "Can't capture frame " << count << ". End of video stream?" << std::endl;
|
||||
break;
|
||||
}
|
||||
|
||||
Rect rect;
|
||||
|
||||
tickMeter.start();
|
||||
bool ok = tracker->update(image, rect);
|
||||
tickMeter.stop();
|
||||
|
||||
float score = tracker->getTrackingScore();
|
||||
|
||||
std::cout << "frame " << count <<
|
||||
": predicted score=" << score <<
|
||||
" rect=" << rect <<
|
||||
" time=" << tickMeter.getTimeMilli() << "ms" <<
|
||||
std::endl;
|
||||
|
||||
Mat render_image = image.clone();
|
||||
|
||||
if (ok)
|
||||
{
|
||||
rectangle(render_image, rect, Scalar(0, 255, 0), 2);
|
||||
|
||||
std::string timeLabel = format("Inference time: %.2f ms", tickMeter.getTimeMilli());
|
||||
std::string scoreLabel = format("Score: %f", score);
|
||||
putText(render_image, timeLabel, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
putText(render_image, scoreLabel, Point(0, 35), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
}
|
||||
|
||||
imshow(winName, render_image);
|
||||
|
||||
tickMeter.reset();
|
||||
|
||||
int c = waitKey(1);
|
||||
if (c == 27 /*ESC*/)
|
||||
break;
|
||||
}
|
||||
|
||||
std::cout << "Exit" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
try
|
||||
{
|
||||
return run(argc, argv);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
std::cerr << "FATAL: C++ exception: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
@ -9,6 +9,9 @@ For DaSiamRPN:
|
||||
network: https://www.dropbox.com/s/rr1lk9355vzolqv/dasiamrpn_model.onnx?dl=0
|
||||
kernel_r1: https://www.dropbox.com/s/999cqx5zrfi7w4p/dasiamrpn_kernel_r1.onnx?dl=0
|
||||
kernel_cls1: https://www.dropbox.com/s/qvmtszx5h339a0w/dasiamrpn_kernel_cls1.onnx?dl=0
|
||||
For NanoTrack:
|
||||
nanotrack_backbone: https://github.com/HonglinChu/SiamTrackers/blob/master/NanoTrack/models/onnx/nanotrack_backbone_sim.onnx
|
||||
nanotrack_headneck: https://github.com/HonglinChu/SiamTrackers/blob/master/NanoTrack/models/onnx/nanotrack_head_sim.onnx
|
||||
|
||||
USAGE:
|
||||
tracker.py [-h] [--input INPUT] [--tracker_algo TRACKER_ALGO]
|
||||
@ -18,6 +21,7 @@ USAGE:
|
||||
[--dasiamrpn_kernel_cls1 DASIAMRPN_KERNEL_CLS1]
|
||||
[--dasiamrpn_backend DASIAMRPN_BACKEND]
|
||||
[--dasiamrpn_target DASIAMRPN_TARGET]
|
||||
[--nanotrack_backbone NANOTRACK_BACKEND] [--nanotrack_headneck NANOTRACK_TARGET]
|
||||
'''
|
||||
|
||||
# Python 2/3 compatibility
|
||||
@ -52,8 +56,13 @@ class App(object):
|
||||
params.kernel_cls1 = self.args.dasiamrpn_kernel_cls1
|
||||
params.kernel_r1 = self.args.dasiamrpn_kernel_r1
|
||||
tracker = cv.TrackerDaSiamRPN_create(params)
|
||||
elif self.trackerAlgorithm == 'nanotrack':
|
||||
params = cv.TrackerNano_Params()
|
||||
params.backbone = args.nanotrack_backbone
|
||||
params.neckhead = args.nanotrack_headneck
|
||||
tracker = cv.TrackerNano_create(params)
|
||||
else:
|
||||
sys.exit("Tracker {} is not recognized. Please use one of three available: mil, goturn, dasiamrpn.".format(self.trackerAlgorithm))
|
||||
sys.exit("Tracker {} is not recognized. Please use one of three available: mil, goturn, dasiamrpn, nanotrack.".format(self.trackerAlgorithm))
|
||||
return tracker
|
||||
|
||||
def initializeTracker(self, image):
|
||||
@ -117,12 +126,14 @@ if __name__ == '__main__':
|
||||
print(__doc__)
|
||||
parser = argparse.ArgumentParser(description="Run tracker")
|
||||
parser.add_argument("--input", type=str, default="vtest.avi", help="Path to video source")
|
||||
parser.add_argument("--tracker_algo", type=str, default="mil", help="One of available tracking algorithms: mil, goturn, dasiamrpn")
|
||||
parser.add_argument("--tracker_algo", type=str, default="nanotrack", help="One of available tracking algorithms: mil, goturn, dasiamrpn, nanotrack")
|
||||
parser.add_argument("--goturn", type=str, default="goturn.prototxt", help="Path to GOTURN architecture")
|
||||
parser.add_argument("--goturn_model", type=str, default="goturn.caffemodel", help="Path to GOTERN model")
|
||||
parser.add_argument("--dasiamrpn_net", type=str, default="dasiamrpn_model.onnx", help="Path to onnx model of DaSiamRPN net")
|
||||
parser.add_argument("--dasiamrpn_kernel_r1", type=str, default="dasiamrpn_kernel_r1.onnx", help="Path to onnx model of DaSiamRPN kernel_r1")
|
||||
parser.add_argument("--dasiamrpn_kernel_cls1", type=str, default="dasiamrpn_kernel_cls1.onnx", help="Path to onnx model of DaSiamRPN kernel_cls1")
|
||||
parser.add_argument("--nanotrack_backbone", type=str, default="nanotrack_backbone_sim.onnx", help="Path to onnx model of NanoTrack backBone")
|
||||
parser.add_argument("--nanotrack_headneck", type=str, default="nanotrack_head_sim.onnx", help="Path to onnx model of NanoTrack headNeck")
|
||||
|
||||
args = parser.parse_args()
|
||||
App(args).run()
|
||||
|
Loading…
Reference in New Issue
Block a user