mirror of
https://github.com/opencv/opencv.git
synced 2025-06-07 09:25:45 +08:00

[teset data in opencv_extra](https://github.com/opencv/opencv_extra/pull/1016) NanoTrack is an extremely lightweight and fast object-tracking model. The total size is **1.1 MB**. And the FPS on M1 chip is **150**, on Raspberry Pi 4 is about **30**. (Float32 CPU only) With this model, many users can run object tracking on the edge device. The author of NanoTrack is @HonglinChu. The original repo is https://github.com/HonglinChu/NanoTrack. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
360 lines
11 KiB
C++
360 lines
11 KiB
C++
// This file is part of OpenCV project.
|
|
// It is subject to the license terms in the LICENSE file found in the top-level directory
|
|
// of this distribution and at http://opencv.org/license.html.
|
|
|
|
// This file is modified from the https://github.com/HonglinChu/NanoTrack/blob/master/ncnn_macos_nanotrack/nanotrack.cpp
|
|
// Author, HongLinChu, 1628464345@qq.com
|
|
// Adapt to OpenCV, ZihaoMu: zihaomu@outlook.com
|
|
|
|
// Link to original inference code: https://github.com/HonglinChu/NanoTrack
|
|
// Link to original training repo: https://github.com/HonglinChu/SiamTrackers/tree/master/NanoTrack
|
|
|
|
#include "../precomp.hpp"
|
|
#ifdef HAVE_OPENCV_DNN
|
|
#include "opencv2/dnn.hpp"
|
|
#endif
|
|
|
|
namespace cv {
|
|
|
|
TrackerNano::TrackerNano()
|
|
{
|
|
// nothing
|
|
}
|
|
|
|
TrackerNano::~TrackerNano()
|
|
{
|
|
// nothing
|
|
}
|
|
|
|
TrackerNano::Params::Params()
|
|
{
|
|
backbone = "backbone.onnx";
|
|
neckhead = "neckhead.onnx";
|
|
#ifdef HAVE_OPENCV_DNN
|
|
backend = dnn::DNN_BACKEND_DEFAULT;
|
|
target = dnn::DNN_TARGET_CPU;
|
|
#else
|
|
backend = -1; // invalid value
|
|
target = -1; // invalid value
|
|
#endif
|
|
}
|
|
|
|
#ifdef HAVE_OPENCV_DNN
|
|
static void softmax(const Mat& src, Mat& dst)
|
|
{
|
|
Mat maxVal;
|
|
cv::max(src.row(1), src.row(0), maxVal);
|
|
|
|
src.row(1) -= maxVal;
|
|
src.row(0) -= maxVal;
|
|
|
|
exp(src, dst);
|
|
|
|
Mat sumVal = dst.row(0) + dst.row(1);
|
|
dst.row(0) = dst.row(0) / sumVal;
|
|
dst.row(1) = dst.row(1) / sumVal;
|
|
}
|
|
|
|
static float sizeCal(float w, float h)
|
|
{
|
|
float pad = (w + h) * 0.5f;
|
|
float sz2 = (w + pad) * (h + pad);
|
|
return sqrt(sz2);
|
|
}
|
|
|
|
static Mat sizeCal(const Mat& w, const Mat& h)
|
|
{
|
|
Mat pad = (w + h) * 0.5;
|
|
Mat sz2 = (w + pad).mul((h + pad));
|
|
|
|
cv::sqrt(sz2, sz2);
|
|
return sz2;
|
|
}
|
|
|
|
// Similar python code: r = np.maximum(r, 1. / r) # r is matrix
|
|
static void elementReciprocalMax(Mat& srcDst)
|
|
{
|
|
size_t totalV = srcDst.total();
|
|
float* ptr = srcDst.ptr<float>(0);
|
|
for (size_t i = 0; i < totalV; i++)
|
|
{
|
|
float val = *(ptr + i);
|
|
*(ptr + i) = std::max(val, 1.0f/val);
|
|
}
|
|
}
|
|
|
|
class TrackerNanoImpl : public TrackerNano
|
|
{
|
|
public:
|
|
TrackerNanoImpl(const TrackerNano::Params& parameters)
|
|
: params(parameters)
|
|
{
|
|
backbone = dnn::readNet(params.backbone);
|
|
neckhead = dnn::readNet(params.neckhead);
|
|
|
|
CV_Assert(!backbone.empty());
|
|
CV_Assert(!neckhead.empty());
|
|
|
|
backbone.setPreferableBackend(params.backend);
|
|
backbone.setPreferableTarget(params.target);
|
|
neckhead.setPreferableBackend(params.backend);
|
|
neckhead.setPreferableTarget(params.target);
|
|
}
|
|
|
|
void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
|
|
bool update(InputArray image, Rect& boundingBox) CV_OVERRIDE;
|
|
float getTrackingScore() CV_OVERRIDE;
|
|
|
|
// Save the target bounding box for each frame.
|
|
std::vector<float> targetSz = {0, 0}; // H and W of bounding box
|
|
std::vector<float> targetPos = {0, 0}; // center point of bounding box (x, y)
|
|
float tracking_score;
|
|
|
|
TrackerNano::Params params;
|
|
|
|
struct trackerConfig
|
|
{
|
|
float windowInfluence = 0.455f;
|
|
float lr = 0.37f;
|
|
float contextAmount = 0.5;
|
|
bool swapRB = true;
|
|
int totalStride = 16;
|
|
float penaltyK = 0.055f;
|
|
};
|
|
|
|
protected:
|
|
const int exemplarSize = 127;
|
|
const int instanceSize = 255;
|
|
|
|
trackerConfig trackState;
|
|
int scoreSize;
|
|
Size imgSize = {0, 0};
|
|
Mat hanningWindow;
|
|
Mat grid2searchX, grid2searchY;
|
|
|
|
dnn::Net backbone, neckhead;
|
|
Mat image;
|
|
|
|
void getSubwindow(Mat& dstCrop, Mat& srcImg, int originalSz, int resizeSz);
|
|
void generateGrids();
|
|
};
|
|
|
|
void TrackerNanoImpl::generateGrids()
|
|
{
|
|
int sz = scoreSize;
|
|
const int sz2 = sz / 2;
|
|
|
|
std::vector<float> x1Vec(sz, 0);
|
|
|
|
for (int i = 0; i < sz; i++)
|
|
{
|
|
x1Vec[i] = i - sz2;
|
|
}
|
|
|
|
Mat x1M(1, sz, CV_32FC1, x1Vec.data());
|
|
|
|
cv::repeat(x1M, sz, 1, grid2searchX);
|
|
cv::repeat(x1M.t(), 1, sz, grid2searchY);
|
|
|
|
grid2searchX *= trackState.totalStride;
|
|
grid2searchY *= trackState.totalStride;
|
|
|
|
grid2searchX += instanceSize/2;
|
|
grid2searchY += instanceSize/2;
|
|
}
|
|
|
|
void TrackerNanoImpl::init(InputArray image_, const Rect &boundingBox_)
|
|
{
|
|
scoreSize = (instanceSize - exemplarSize) / trackState.totalStride + 8;
|
|
trackState = trackerConfig();
|
|
image = image_.getMat().clone();
|
|
|
|
// convert Rect2d from left-up to center.
|
|
targetPos[0] = float(boundingBox_.x) + float(boundingBox_.width) * 0.5f;
|
|
targetPos[1] = float(boundingBox_.y) + float(boundingBox_.height) * 0.5f;
|
|
|
|
targetSz[0] = float(boundingBox_.width);
|
|
targetSz[1] = float(boundingBox_.height);
|
|
|
|
imgSize = image.size();
|
|
|
|
// Extent the bounding box.
|
|
float sumSz = targetSz[0] + targetSz[1];
|
|
float wExtent = targetSz[0] + trackState.contextAmount * (sumSz);
|
|
float hExtent = targetSz[1] + trackState.contextAmount * (sumSz);
|
|
int sz = int(cv::sqrt(wExtent * hExtent));
|
|
|
|
Mat crop;
|
|
getSubwindow(crop, image, sz, exemplarSize);
|
|
Mat blob = dnn::blobFromImage(crop, 1.0, Size(), Scalar(), trackState.swapRB);
|
|
|
|
backbone.setInput(blob);
|
|
Mat out = backbone.forward(); // Feature extraction.
|
|
neckhead.setInput(out, "input1");
|
|
|
|
createHanningWindow(hanningWindow, Size(scoreSize, scoreSize), CV_32F);
|
|
generateGrids();
|
|
}
|
|
|
|
void TrackerNanoImpl::getSubwindow(Mat& dstCrop, Mat& srcImg, int originalSz, int resizeSz)
|
|
{
|
|
Scalar avgChans = mean(srcImg);
|
|
Size imgSz = srcImg.size();
|
|
int c = (originalSz + 1) / 2;
|
|
|
|
int context_xmin = targetPos[0] - c;
|
|
int context_xmax = context_xmin + originalSz - 1;
|
|
int context_ymin = targetPos[1] - c;
|
|
int context_ymax = context_ymin + originalSz - 1;
|
|
|
|
int left_pad = std::max(0, -context_xmin);
|
|
int top_pad = std::max(0, -context_ymin);
|
|
int right_pad = std::max(0, context_xmax - imgSz.width + 1);
|
|
int bottom_pad = std::max(0, context_ymax - imgSz.height + 1);
|
|
|
|
context_xmin += left_pad;
|
|
context_xmax += left_pad;
|
|
context_ymin += top_pad;
|
|
context_ymax += top_pad;
|
|
|
|
Mat cropImg;
|
|
if (left_pad == 0 && top_pad == 0 && right_pad == 0 && bottom_pad == 0)
|
|
{
|
|
// Crop image without padding.
|
|
cropImg = srcImg(cv::Rect(context_xmin, context_ymin,
|
|
context_xmax - context_xmin + 1, context_ymax - context_ymin + 1));
|
|
}
|
|
else // Crop image with padding, and the padding value is avgChans
|
|
{
|
|
cv::Mat tmpMat;
|
|
cv::copyMakeBorder(srcImg, tmpMat, top_pad, bottom_pad, left_pad, right_pad, cv::BORDER_CONSTANT, avgChans);
|
|
cropImg = tmpMat(cv::Rect(context_xmin, context_ymin, context_xmax - context_xmin + 1, context_ymax - context_ymin + 1));
|
|
}
|
|
resize(cropImg, dstCrop, Size(resizeSz, resizeSz));
|
|
}
|
|
|
|
bool TrackerNanoImpl::update(InputArray image_, Rect &boundingBoxRes)
|
|
{
|
|
image = image_.getMat().clone();
|
|
int targetSzSum = targetSz[0] + targetSz[1];
|
|
|
|
float wc = targetSz[0] + trackState.contextAmount * targetSzSum;
|
|
float hc = targetSz[1] + trackState.contextAmount * targetSzSum;
|
|
float sz = cv::sqrt(wc * hc);
|
|
float scale_z = exemplarSize / sz;
|
|
float sx = sz * (instanceSize / exemplarSize);
|
|
targetSz[0] *= scale_z;
|
|
targetSz[1] *= scale_z;
|
|
|
|
Mat crop;
|
|
getSubwindow(crop, image, int(sx), instanceSize);
|
|
|
|
Mat blob = dnn::blobFromImage(crop, 1.0, Size(), Scalar(), trackState.swapRB);
|
|
backbone.setInput(blob);
|
|
Mat xf = backbone.forward();
|
|
neckhead.setInput(xf, "input2");
|
|
std::vector<String> outputName = {"output1", "output2"};
|
|
std::vector<Mat> outs;
|
|
neckhead.forward(outs, outputName);
|
|
|
|
CV_Assert(outs.size() == 2);
|
|
|
|
Mat clsScore = outs[0]; // 1x2x16x16
|
|
Mat bboxPred = outs[1]; // 1x4x16x16
|
|
|
|
clsScore = clsScore.reshape(0, {2, scoreSize, scoreSize});
|
|
bboxPred = bboxPred.reshape(0, {4, scoreSize, scoreSize});
|
|
|
|
Mat scoreSoftmax; // 2x16x16
|
|
softmax(clsScore, scoreSoftmax);
|
|
|
|
Mat score = scoreSoftmax.row(1);
|
|
score = score.reshape(0, {scoreSize, scoreSize});
|
|
|
|
Mat predX1 = grid2searchX - bboxPred.row(0).reshape(0, {scoreSize, scoreSize});
|
|
Mat predY1 = grid2searchY - bboxPred.row(1).reshape(0, {scoreSize, scoreSize});
|
|
Mat predX2 = grid2searchX + bboxPred.row(2).reshape(0, {scoreSize, scoreSize});
|
|
Mat predY2 = grid2searchY + bboxPred.row(3).reshape(0, {scoreSize, scoreSize});
|
|
|
|
// size penalty
|
|
// scale penalty
|
|
Mat sc = sizeCal(predX2 - predX1, predY2 - predY1)/sizeCal(targetPos[0], targetPos[1]);
|
|
elementReciprocalMax(sc);
|
|
|
|
// ratio penalty
|
|
float ratioVal = targetSz[0] / targetSz[1];
|
|
|
|
Mat ratioM(scoreSize, scoreSize, CV_32FC1, Scalar::all(ratioVal));
|
|
Mat rc = ratioM / ((predX2 - predX1) / (predY2 - predY1));
|
|
elementReciprocalMax(rc);
|
|
|
|
Mat penalty;
|
|
exp(((rc.mul(sc) - 1) * trackState.penaltyK * (-1)), penalty);
|
|
Mat pscore = penalty.mul(score);
|
|
|
|
// Window penalty
|
|
pscore = pscore * (1.0 - trackState.windowInfluence) + hanningWindow * trackState.windowInfluence;
|
|
|
|
// get Max
|
|
int bestID[2] = { 0, 0 };
|
|
minMaxIdx(pscore, 0, 0, 0, bestID);
|
|
|
|
tracking_score = pscore.at<float>(bestID);
|
|
|
|
float x1Val = predX1.at<float>(bestID);
|
|
float x2Val = predX2.at<float>(bestID);
|
|
float y1Val = predY1.at<float>(bestID);
|
|
float y2Val = predY2.at<float>(bestID);
|
|
|
|
float predXs = (x1Val + x2Val)/2;
|
|
float predYs = (y1Val + y2Val)/2;
|
|
float predW = (x2Val - x1Val)/scale_z;
|
|
float predH = (y2Val - y1Val)/scale_z;
|
|
|
|
float diffXs = (predXs - instanceSize / 2) / scale_z;
|
|
float diffYs = (predYs - instanceSize / 2) / scale_z;
|
|
|
|
targetSz[0] /= scale_z;
|
|
targetSz[1] /= scale_z;
|
|
|
|
float lr = penalty.at<float>(bestID) * score.at<float>(bestID) * trackState.lr;
|
|
|
|
float resX = targetPos[0] + diffXs;
|
|
float resY = targetPos[1] + diffYs;
|
|
float resW = predW * lr + (1 - lr) * targetSz[0];
|
|
float resH = predH * lr + (1 - lr) * targetSz[1];
|
|
|
|
resX = std::max(0.f, std::min((float)imgSize.width, resX));
|
|
resY = std::max(0.f, std::min((float)imgSize.height, resY));
|
|
resW = std::max(10.f, std::min((float)imgSize.width, resW));
|
|
resH = std::max(10.f, std::min((float)imgSize.height, resH));
|
|
|
|
targetPos[0] = resX;
|
|
targetPos[1] = resY;
|
|
targetSz[0] = resW;
|
|
targetSz[1] = resH;
|
|
|
|
// convert center to Rect.
|
|
boundingBoxRes = { int(resX - resW/2), int(resY - resH/2), int(resW), int(resH)};
|
|
return true;
|
|
}
|
|
|
|
float TrackerNanoImpl::getTrackingScore()
|
|
{
|
|
return tracking_score;
|
|
}
|
|
|
|
Ptr<TrackerNano> TrackerNano::create(const TrackerNano::Params& parameters)
|
|
{
|
|
return makePtr<TrackerNanoImpl>(parameters);
|
|
}
|
|
|
|
#else // OPENCV_HAVE_DNN
|
|
Ptr<TrackerNano> TrackerNano::create(const TrackerNano::Params& parameters)
|
|
{
|
|
CV_UNUSED(parameters);
|
|
CV_Error(cv::Error::StsNotImplemented, "to use NanoTrack, the tracking module needs to be built with opencv_dnn !");
|
|
}
|
|
#endif // OPENCV_HAVE_DNN
|
|
}
|