mirror of
https://github.com/opencv/opencv.git
synced 2025-01-09 21:27:59 +08:00
Merge pull request #26202 from gursimarsingh:improved_tracker_samples
Improved Tracker Samples #26202 Relates to #25006 This sample has been rewritten to track a selected target in a video or camera stream. It combines VIT tracker, Nano tracker and Dasiamrpn tracker into one tracker sample ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
This commit is contained in:
parent
0c774c94f9
commit
816851c999
@ -769,6 +769,11 @@ public:
|
||||
*/
|
||||
CV_WRAP virtual
|
||||
bool update(InputArray image, CV_OUT Rect& boundingBox) = 0;
|
||||
|
||||
/** @brief Return tracking score
|
||||
*/
|
||||
CV_WRAP virtual float getTrackingScore() { return -1; }
|
||||
|
||||
};
|
||||
|
||||
|
||||
@ -834,10 +839,6 @@ public:
|
||||
static CV_WRAP
|
||||
Ptr<TrackerDaSiamRPN> create(const TrackerDaSiamRPN::Params& parameters = TrackerDaSiamRPN::Params());
|
||||
|
||||
/** @brief Return tracking score
|
||||
*/
|
||||
CV_WRAP virtual float getTrackingScore() = 0;
|
||||
|
||||
//void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
|
||||
//bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;
|
||||
};
|
||||
@ -872,10 +873,6 @@ public:
|
||||
static CV_WRAP
|
||||
Ptr<TrackerNano> create(const TrackerNano::Params& parameters = TrackerNano::Params());
|
||||
|
||||
/** @brief Return tracking score
|
||||
*/
|
||||
CV_WRAP virtual float getTrackingScore() = 0;
|
||||
|
||||
//void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
|
||||
//bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;
|
||||
};
|
||||
@ -910,10 +907,6 @@ public:
|
||||
static CV_WRAP
|
||||
Ptr<TrackerVit> create(const TrackerVit::Params& parameters = TrackerVit::Params());
|
||||
|
||||
/** @brief Return tracking score
|
||||
*/
|
||||
CV_WRAP virtual float getTrackingScore() = 0;
|
||||
|
||||
// void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
|
||||
// bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;
|
||||
};
|
||||
|
@ -89,8 +89,12 @@ public:
|
||||
TrackerNanoImpl(const TrackerNano::Params& parameters)
|
||||
: params(parameters)
|
||||
{
|
||||
backbone = dnn::readNet(params.backbone);
|
||||
neckhead = dnn::readNet(params.neckhead);
|
||||
dnn::EngineType engine = dnn::ENGINE_AUTO;
|
||||
if (params.backend != 0 || params.target != 0){
|
||||
engine = dnn::ENGINE_CLASSIC;
|
||||
}
|
||||
backbone = dnn::readNet(params.backbone, "", "", engine);
|
||||
neckhead = dnn::readNet(params.neckhead, "", "", engine);
|
||||
|
||||
CV_Assert(!backbone.empty());
|
||||
CV_Assert(!neckhead.empty());
|
||||
|
@ -44,7 +44,11 @@ public:
|
||||
TrackerVitImpl(const TrackerVit::Params& parameters)
|
||||
: params(parameters)
|
||||
{
|
||||
net = dnn::readNet(params.net);
|
||||
dnn::EngineType engine = dnn::ENGINE_AUTO;
|
||||
if (params.backend != 0 || params.target != 0){
|
||||
engine = dnn::ENGINE_CLASSIC;
|
||||
}
|
||||
net = dnn::readNet(params.net, "", "", engine);
|
||||
CV_Assert(!net.empty());
|
||||
|
||||
net.setPreferableBackend(params.backend);
|
||||
|
@ -1,190 +0,0 @@
|
||||
// DaSiamRPN tracker.
|
||||
// Original paper: https://arxiv.org/abs/1808.06048
|
||||
// Link to original repo: https://github.com/foolwood/DaSiamRPN
|
||||
// Links to onnx models:
|
||||
// - network: https://www.dropbox.com/s/rr1lk9355vzolqv/dasiamrpn_model.onnx?dl=0
|
||||
// - kernel_r1: https://www.dropbox.com/s/999cqx5zrfi7w4p/dasiamrpn_kernel_r1.onnx?dl=0
|
||||
// - kernel_cls1: https://www.dropbox.com/s/qvmtszx5h339a0w/dasiamrpn_kernel_cls1.onnx?dl=0
|
||||
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
#include <opencv2/dnn.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/video.hpp>
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::dnn;
|
||||
|
||||
std::string param_keys =
|
||||
"{ help h | | Print help message }"
|
||||
"{ input i | | Full path to input video folder, the specific camera index. (empty for camera 0) }"
|
||||
"{ net | dasiamrpn_model.onnx | Path to onnx model of net}"
|
||||
"{ kernel_cls1 | dasiamrpn_kernel_cls1.onnx | Path to onnx model of kernel_r1 }"
|
||||
"{ kernel_r1 | dasiamrpn_kernel_r1.onnx | Path to onnx model of kernel_cls1 }";
|
||||
std::string backend_keys = cv::format(
|
||||
"{ backend | 0 | Choose one of computation backends: "
|
||||
"%d: automatically (by default), "
|
||||
"%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
|
||||
"%d: OpenCV implementation, "
|
||||
"%d: VKCOM, "
|
||||
"%d: CUDA }", cv::dnn::DNN_BACKEND_DEFAULT, cv::dnn::DNN_BACKEND_INFERENCE_ENGINE, cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_BACKEND_VKCOM, cv::dnn::DNN_BACKEND_CUDA);
|
||||
std::string target_keys = cv::format(
|
||||
"{ target | 0 | Choose one of target computation devices: "
|
||||
"%d: CPU target (by default), "
|
||||
"%d: OpenCL, "
|
||||
"%d: OpenCL fp16 (half-float precision), "
|
||||
"%d: VPU, "
|
||||
"%d: Vulkan, "
|
||||
"%d: CUDA, "
|
||||
"%d: CUDA fp16 (half-float preprocess) }", cv::dnn::DNN_TARGET_CPU, cv::dnn::DNN_TARGET_OPENCL, cv::dnn::DNN_TARGET_OPENCL_FP16, cv::dnn::DNN_TARGET_MYRIAD, cv::dnn::DNN_TARGET_VULKAN, cv::dnn::DNN_TARGET_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16);
|
||||
std::string keys = param_keys + backend_keys + target_keys;
|
||||
|
||||
static
|
||||
int run(int argc, char** argv)
|
||||
{
|
||||
// Parse command line arguments.
|
||||
CommandLineParser parser(argc, argv, keys);
|
||||
|
||||
if (parser.has("help"))
|
||||
{
|
||||
parser.printMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::string inputName = parser.get<String>("input");
|
||||
std::string net = parser.get<String>("net");
|
||||
std::string kernel_cls1 = parser.get<String>("kernel_cls1");
|
||||
std::string kernel_r1 = parser.get<String>("kernel_r1");
|
||||
int backend = parser.get<int>("backend");
|
||||
int target = parser.get<int>("target");
|
||||
|
||||
Ptr<TrackerDaSiamRPN> tracker;
|
||||
try
|
||||
{
|
||||
TrackerDaSiamRPN::Params params;
|
||||
params.model = samples::findFile(net);
|
||||
params.kernel_cls1 = samples::findFile(kernel_cls1);
|
||||
params.kernel_r1 = samples::findFile(kernel_r1);
|
||||
params.backend = backend;
|
||||
params.target = target;
|
||||
tracker = TrackerDaSiamRPN::create(params);
|
||||
}
|
||||
catch (const cv::Exception& ee)
|
||||
{
|
||||
std::cerr << "Exception: " << ee.what() << std::endl;
|
||||
std::cout << "Can't load the network by using the following files:" << std::endl;
|
||||
std::cout << "siamRPN : " << net << std::endl;
|
||||
std::cout << "siamKernelCL1 : " << kernel_cls1 << std::endl;
|
||||
std::cout << "siamKernelR1 : " << kernel_r1 << std::endl;
|
||||
return 2;
|
||||
}
|
||||
|
||||
const std::string winName = "DaSiamRPN";
|
||||
namedWindow(winName, WINDOW_AUTOSIZE);
|
||||
|
||||
// Open a video file or an image file or a camera stream.
|
||||
VideoCapture cap;
|
||||
|
||||
if (inputName.empty() || (isdigit(inputName[0]) && inputName.size() == 1))
|
||||
{
|
||||
int c = inputName.empty() ? 0 : inputName[0] - '0';
|
||||
std::cout << "Trying to open camera #" << c << " ..." << std::endl;
|
||||
if (!cap.open(c))
|
||||
{
|
||||
std::cout << "Capture from camera #" << c << " didn't work. Specify -i=<video> parameter to read from video file" << std::endl;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
else if (inputName.size())
|
||||
{
|
||||
inputName = samples::findFileOrKeep(inputName);
|
||||
if (!cap.open(inputName))
|
||||
{
|
||||
std::cout << "Could not open: " << inputName << std::endl;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Read the first image.
|
||||
Mat image;
|
||||
cap >> image;
|
||||
if (image.empty())
|
||||
{
|
||||
std::cerr << "Can't capture frame!" << std::endl;
|
||||
return 2;
|
||||
}
|
||||
|
||||
Mat image_select = image.clone();
|
||||
putText(image_select, "Select initial bounding box you want to track.", Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
putText(image_select, "And Press the ENTER key.", Point(0, 35), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
|
||||
Rect selectRect = selectROI(winName, image_select);
|
||||
std::cout << "ROI=" << selectRect << std::endl;
|
||||
|
||||
tracker->init(image, selectRect);
|
||||
|
||||
TickMeter tickMeter;
|
||||
|
||||
for (int count = 0; ; ++count)
|
||||
{
|
||||
cap >> image;
|
||||
if (image.empty())
|
||||
{
|
||||
std::cerr << "Can't capture frame " << count << ". End of video stream?" << std::endl;
|
||||
break;
|
||||
}
|
||||
|
||||
Rect rect;
|
||||
|
||||
tickMeter.start();
|
||||
bool ok = tracker->update(image, rect);
|
||||
tickMeter.stop();
|
||||
|
||||
float score = tracker->getTrackingScore();
|
||||
|
||||
std::cout << "frame " << count <<
|
||||
": predicted score=" << score <<
|
||||
" rect=" << rect <<
|
||||
" time=" << tickMeter.getTimeMilli() << "ms" <<
|
||||
std::endl;
|
||||
|
||||
Mat render_image = image.clone();
|
||||
|
||||
if (ok)
|
||||
{
|
||||
rectangle(render_image, rect, Scalar(0, 255, 0), 2);
|
||||
|
||||
std::string timeLabel = format("Inference time: %.2f ms", tickMeter.getTimeMilli());
|
||||
std::string scoreLabel = format("Score: %f", score);
|
||||
putText(render_image, timeLabel, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
putText(render_image, scoreLabel, Point(0, 35), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
}
|
||||
|
||||
imshow(winName, render_image);
|
||||
|
||||
tickMeter.reset();
|
||||
|
||||
int c = waitKey(1);
|
||||
if (c == 27 /*ESC*/)
|
||||
break;
|
||||
}
|
||||
|
||||
std::cout << "Exit" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
try
|
||||
{
|
||||
return run(argc, argv);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
std::cerr << "FATAL: C++ exception: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
@ -326,32 +326,37 @@ def parseMetalinkFile(metalink_filepath, save_dir):
|
||||
models.append(produceDownloadInstance(name, fname, hash_sum, url, save_dir))
|
||||
return models
|
||||
|
||||
def parseYAMLFile(yaml_filepath, save_dir):
|
||||
def parseYAMLFile(yaml_filepath, save_dir, model_name):
|
||||
models = []
|
||||
with open(yaml_filepath, 'r') as stream:
|
||||
data_loaded = yaml.safe_load(stream)
|
||||
for name, params in data_loaded.items():
|
||||
load_info = params.get("load_info", None)
|
||||
if load_info:
|
||||
fname = os.path.basename(params.get("model"))
|
||||
hash_sum = load_info.get("sha1")
|
||||
url = load_info.get("url")
|
||||
download_sha = load_info.get("download_sha")
|
||||
download_name = load_info.get("download_name")
|
||||
archive_member = load_info.get("member")
|
||||
models.append(produceDownloadInstance(name, fname, hash_sum, url, save_dir,
|
||||
download_name=download_name, download_sha=download_sha, archive_member=archive_member))
|
||||
|
||||
config_load_info = params.get("config_load_info", None)
|
||||
if config_load_info:
|
||||
fname = os.path.basename(params.get("config"))
|
||||
hash_sum = config_load_info.get("sha1")
|
||||
url = config_load_info.get("url")
|
||||
download_sha = config_load_info.get("download_sha")
|
||||
download_name = config_load_info.get("download_name")
|
||||
archive_member = config_load_info.get("member")
|
||||
models.append(produceDownloadInstance(name, fname, hash_sum, url, save_dir,
|
||||
download_name=download_name, download_sha=download_sha, archive_member=archive_member))
|
||||
if model_name != "" and name != model_name:
|
||||
continue
|
||||
for key in params.keys():
|
||||
if key.endswith("load_info"):
|
||||
prefix = key[:-len('load_info')]
|
||||
load_info = params.get(prefix+"load_info", None)
|
||||
if load_info:
|
||||
print(prefix)
|
||||
if prefix == "config_":
|
||||
fname = os.path.basename(params.get("config"))
|
||||
hash_sum = load_info.get("sha1")
|
||||
url = load_info.get("url")
|
||||
download_sha = load_info.get("download_sha")
|
||||
download_name = load_info.get("download_name")
|
||||
archive_member = load_info.get("member")
|
||||
models.append(produceDownloadInstance(name, fname, hash_sum, url, save_dir,
|
||||
download_name=download_name, download_sha=download_sha, archive_member=archive_member))
|
||||
else:
|
||||
fname = os.path.basename(params.get(prefix+"model"))
|
||||
hash_sum = load_info.get(prefix+"sha1")
|
||||
url = load_info.get(prefix+"url")
|
||||
download_sha = load_info.get(prefix+"download_sha")
|
||||
download_name = load_info.get(prefix+"download_name")
|
||||
archive_member = load_info.get(prefix+"member")
|
||||
models.append(produceDownloadInstance(name, fname, hash_sum, url, save_dir,
|
||||
download_name=download_name, download_sha=download_sha, archive_member=archive_member))
|
||||
|
||||
return models
|
||||
|
||||
@ -367,7 +372,7 @@ if __name__ == '__main__':
|
||||
save_dir = args.save_dir
|
||||
selected_model_name = args.model_name
|
||||
models.extend(parseMetalinkFile('face_detector/weights.meta4', save_dir))
|
||||
models.extend(parseYAMLFile('models.yml', save_dir))
|
||||
models.extend(parseYAMLFile('models.yml', save_dir, selected_model_name))
|
||||
for m in models:
|
||||
print(m)
|
||||
if selected_model_name and not m.name.startswith(selected_model_name):
|
||||
|
@ -390,6 +390,7 @@ reid:
|
||||
sha1: "d4316b100db40f8840aa82626e1cf3f519a7f1ae"
|
||||
model: "person_reid_youtu_2021nov.onnx"
|
||||
yolo_load_info:
|
||||
yolo_url: "https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8n.onnx"
|
||||
yolo_sha1: "68f864475d06e2ec4037181052739f268eeac38d"
|
||||
yolo_model: "yolov8n.onnx"
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
@ -403,3 +404,40 @@ reid:
|
||||
rgb: false
|
||||
yolo_rgb: true
|
||||
sample: "person_reid"
|
||||
|
||||
################################################################################
|
||||
# Tracker models.
|
||||
################################################################################
|
||||
|
||||
vit:
|
||||
load_info:
|
||||
url: "https://github.com/opencv/opencv_zoo/raw/fef72f8fa7c52eaf116d3df358d24e6e959ada0e/models/object_tracking_vittrack/object_tracking_vittrack_2023sep.onnx"
|
||||
sha1: "50008bb4f6a27b1aa940ad886b1bd1936ac4ed3e"
|
||||
model: "object_tracking_vittrack_2023sep.onnx"
|
||||
sample: "object_tracker"
|
||||
|
||||
nanotrack:
|
||||
nanotrack_head_load_info:
|
||||
nanotrack_head_url: "https://github.com/HonglinChu/SiamTrackers/raw/refs/heads/master/NanoTrack/models/nanotrackv2/nanotrack_head_sim.onnx"
|
||||
nanotrack_head_sha1: "39f168489671700cf739e402dfc67d41ce648aef"
|
||||
nanotrack_head_model: "nanotrack_head_sim.onnx"
|
||||
nanotrack_back_load_info:
|
||||
nanotrack_back_url: "https://github.com/HonglinChu/SiamTrackers/raw/refs/heads/master/NanoTrack/models/nanotrackv2/nanotrack_backbone_sim.onnx"
|
||||
nanotrack_back_sha1: "6e773a364457b78574f9f63a23b0659ee8646f8f"
|
||||
nanotrack_back_model: "nanotrack_backbone_sim.onnx"
|
||||
sample: "object_tracker"
|
||||
|
||||
dasiamrpn:
|
||||
dasiamrpn_load_info:
|
||||
dasiamrpn_url: "https://github.com/opencv/opencv_zoo/raw/fef72f8fa7c52eaf116d3df358d24e6e959ada0e/models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_model_2021nov.onnx?download="
|
||||
dasiamrpn_sha1: "91b774fce7df4c0e4918469f0f482d9a27d0e2d4"
|
||||
dasiamrpn_model: "object_tracking_dasiamrpn_model_2021nov.onnx"
|
||||
dasiamrpn_kernel_r1_load_info:
|
||||
dasiamrpn_kernel_r1_url: "https://github.com/opencv/opencv_zoo/raw/fef72f8fa7c52eaf116d3df358d24e6e959ada0e/models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_kernel_r1_2021nov.onnx?download="
|
||||
dasiamrpn_kernel_r1_sha1: "bb64620a54348657133eb28be2d3a2a8c76b84b3"
|
||||
dasiamrpn_kernel_r1_model: "object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx"
|
||||
dasiamrpn_kernel_cls_load_info:
|
||||
dasiamrpn_kernel_cls_url: "https://github.com/opencv/opencv_zoo/raw/fef72f8fa7c52eaf116d3df358d24e6e959ada0e/models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx?download="
|
||||
dasiamrpn_kernel_cls_sha1: "e9ccd270ce8059bdf7ed0d1845c03ef4a951ee0f"
|
||||
dasiamrpn_kernel_cls_model: "object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx"
|
||||
sample: "object_tracker"
|
@ -1,184 +0,0 @@
|
||||
// NanoTrack
|
||||
// Link to original inference code: https://github.com/HonglinChu/NanoTrack
|
||||
// Link to original training repo: https://github.com/HonglinChu/SiamTrackers/tree/master/NanoTrack
|
||||
// backBone model: https://github.com/HonglinChu/SiamTrackers/blob/master/NanoTrack/models/onnx/nanotrack_backbone_sim.onnx
|
||||
// headNeck model: https://github.com/HonglinChu/SiamTrackers/blob/master/NanoTrack/models/onnx/nanotrack_head_sim.onnx
|
||||
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
#include <opencv2/dnn.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/video.hpp>
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::dnn;
|
||||
|
||||
std::string param_keys =
|
||||
"{ help h | | Print help message }"
|
||||
"{ input i | | Full path to input video folder, the specific camera index. (empty for camera 0) }"
|
||||
"{ backbone | backbone.onnx | Path to onnx model of backbone.onnx}"
|
||||
"{ headneck | headneck.onnx | Path to onnx model of headneck.onnx }";
|
||||
std::string backend_keys = cv::format(
|
||||
"{ backend | 0 | Choose one of computation backends: "
|
||||
"%d: automatically (by default), "
|
||||
"%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
|
||||
"%d: OpenCV implementation, "
|
||||
"%d: VKCOM, "
|
||||
"%d: CUDA }", cv::dnn::DNN_BACKEND_DEFAULT, cv::dnn::DNN_BACKEND_INFERENCE_ENGINE, cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_BACKEND_VKCOM, cv::dnn::DNN_BACKEND_CUDA);
|
||||
std::string target_keys = cv::format(
|
||||
"{ target | 0 | Choose one of target computation devices: "
|
||||
"%d: CPU target (by default), "
|
||||
"%d: OpenCL, "
|
||||
"%d: OpenCL fp16 (half-float precision), "
|
||||
"%d: VPU, "
|
||||
"%d: Vulkan, "
|
||||
"%d: CUDA, "
|
||||
"%d: CUDA fp16 (half-float preprocess) }", cv::dnn::DNN_TARGET_CPU, cv::dnn::DNN_TARGET_OPENCL, cv::dnn::DNN_TARGET_OPENCL_FP16, cv::dnn::DNN_TARGET_MYRIAD, cv::dnn::DNN_TARGET_VULKAN, cv::dnn::DNN_TARGET_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16);
|
||||
std::string keys = param_keys + backend_keys + target_keys;
|
||||
|
||||
static
|
||||
int run(int argc, char** argv)
|
||||
{
|
||||
// Parse command line arguments.
|
||||
CommandLineParser parser(argc, argv, keys);
|
||||
|
||||
if (parser.has("help"))
|
||||
{
|
||||
parser.printMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::string inputName = parser.get<String>("input");
|
||||
std::string backbone = parser.get<String>("backbone");
|
||||
std::string headneck = parser.get<String>("headneck");
|
||||
int backend = parser.get<int>("backend");
|
||||
int target = parser.get<int>("target");
|
||||
|
||||
Ptr<TrackerNano> tracker;
|
||||
try
|
||||
{
|
||||
TrackerNano::Params params;
|
||||
params.backbone = samples::findFile(backbone);
|
||||
params.neckhead = samples::findFile(headneck);
|
||||
params.backend = backend;
|
||||
params.target = target;
|
||||
tracker = TrackerNano::create(params);
|
||||
}
|
||||
catch (const cv::Exception& ee)
|
||||
{
|
||||
std::cerr << "Exception: " << ee.what() << std::endl;
|
||||
std::cout << "Can't load the network by using the following files:" << std::endl;
|
||||
std::cout << "backbone : " << backbone << std::endl;
|
||||
std::cout << "headneck : " << headneck << std::endl;
|
||||
return 2;
|
||||
}
|
||||
|
||||
const std::string winName = "NanoTrack";
|
||||
namedWindow(winName, WINDOW_AUTOSIZE);
|
||||
|
||||
// Open a video file or an image file or a camera stream.
|
||||
VideoCapture cap;
|
||||
|
||||
if (inputName.empty() || (isdigit(inputName[0]) && inputName.size() == 1))
|
||||
{
|
||||
int c = inputName.empty() ? 0 : inputName[0] - '0';
|
||||
std::cout << "Trying to open camera #" << c << " ..." << std::endl;
|
||||
if (!cap.open(c))
|
||||
{
|
||||
std::cout << "Capture from camera #" << c << " didn't work. Specify -i=<video> parameter to read from video file" << std::endl;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
else if (inputName.size())
|
||||
{
|
||||
inputName = samples::findFileOrKeep(inputName);
|
||||
if (!cap.open(inputName))
|
||||
{
|
||||
std::cout << "Could not open: " << inputName << std::endl;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Read the first image.
|
||||
Mat image;
|
||||
cap >> image;
|
||||
if (image.empty())
|
||||
{
|
||||
std::cerr << "Can't capture frame!" << std::endl;
|
||||
return 2;
|
||||
}
|
||||
|
||||
Mat image_select = image.clone();
|
||||
putText(image_select, "Select initial bounding box you want to track.", Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
putText(image_select, "And Press the ENTER key.", Point(0, 35), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
|
||||
Rect selectRect = selectROI(winName, image_select);
|
||||
std::cout << "ROI=" << selectRect << std::endl;
|
||||
|
||||
tracker->init(image, selectRect);
|
||||
|
||||
TickMeter tickMeter;
|
||||
|
||||
for (int count = 0; ; ++count)
|
||||
{
|
||||
cap >> image;
|
||||
if (image.empty())
|
||||
{
|
||||
std::cerr << "Can't capture frame " << count << ". End of video stream?" << std::endl;
|
||||
break;
|
||||
}
|
||||
|
||||
Rect rect;
|
||||
|
||||
tickMeter.start();
|
||||
bool ok = tracker->update(image, rect);
|
||||
tickMeter.stop();
|
||||
|
||||
float score = tracker->getTrackingScore();
|
||||
|
||||
std::cout << "frame " << count <<
|
||||
": predicted score=" << score <<
|
||||
" rect=" << rect <<
|
||||
" time=" << tickMeter.getTimeMilli() << "ms" <<
|
||||
std::endl;
|
||||
|
||||
Mat render_image = image.clone();
|
||||
|
||||
if (ok)
|
||||
{
|
||||
rectangle(render_image, rect, Scalar(0, 255, 0), 2);
|
||||
|
||||
std::string timeLabel = format("Inference time: %.2f ms", tickMeter.getTimeMilli());
|
||||
std::string scoreLabel = format("Score: %f", score);
|
||||
putText(render_image, timeLabel, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
putText(render_image, scoreLabel, Point(0, 35), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
}
|
||||
|
||||
imshow(winName, render_image);
|
||||
|
||||
tickMeter.reset();
|
||||
|
||||
int c = waitKey(1);
|
||||
if (c == 27 /*ESC*/)
|
||||
break;
|
||||
}
|
||||
|
||||
std::cout << "Exit" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
try
|
||||
{
|
||||
return run(argc, argv);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
std::cerr << "FATAL: C++ exception: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
309
samples/dnn/object_tracker.cpp
Normal file
309
samples/dnn/object_tracker.cpp
Normal file
@ -0,0 +1,309 @@
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
#include <opencv2/dnn.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/video.hpp>
|
||||
#include "common.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace std;
|
||||
using namespace cv::dnn;
|
||||
|
||||
const string about = "Use this script for testing Object Tracking using OpenCV. \n\n"
|
||||
"Firstly, download required models using the download_models.py <alias>.\n"
|
||||
"Valid alias names are nanotrack, vit and dasiamrpn.\n\n"
|
||||
"To run:\n"
|
||||
"\t nanotrack: \n"
|
||||
"\t\t e.g: ./example_dnn_object_tracker nanotrack\n\n"
|
||||
"\t vit: \n"
|
||||
"\t\t e.g: ./example_dnn_object_tracker vit\n\n"
|
||||
"\t dasiamrpn: \n"
|
||||
"\t\t e.g: ./example_dnn_object_tracker dasiamrpn\n\n"
|
||||
|
||||
"To switch between models in runtime, make sure all the models are downloaded using download_models.py\n";
|
||||
|
||||
const string param_keys =
|
||||
"{ help h | | Print help message }"
|
||||
"{ @alias | vit | An alias name of model to extract preprocessing parameters from models.yml file. }"
|
||||
"{ zoo | ../dnn/models.yml | An optional path to file with preprocessing parameters }"
|
||||
"{ input i | | Full path to input video folder, the specific camera index. (empty for camera 0) }"
|
||||
"{ tracking_thrs | 0.3 | Tracking score threshold. If a bbox of score >= 0.3, it is considered as found }";
|
||||
|
||||
const string backend_keys = format(
|
||||
"{ backend | default | Choose one of computation backends: "
|
||||
"default: automatically (by default), "
|
||||
"openvino: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
|
||||
"opencv: OpenCV implementation, "
|
||||
"vkcom: VKCOM, "
|
||||
"cuda: CUDA, "
|
||||
"webnn: WebNN }");
|
||||
|
||||
const string target_keys = format(
|
||||
"{ target | cpu | Choose one of target computation devices: "
|
||||
"cpu: CPU target (by default), "
|
||||
"opencl: OpenCL, "
|
||||
"opencl_fp16: OpenCL fp16 (half-float precision), "
|
||||
"vpu: VPU, "
|
||||
"vulkan: Vulkan, "
|
||||
"cuda: CUDA, "
|
||||
"cuda_fp16: CUDA fp16 (half-float preprocess) }");
|
||||
|
||||
string keys = param_keys + backend_keys + target_keys;
|
||||
|
||||
static void loadParser(const string &modelName, const string &zooFile)
|
||||
{
|
||||
// Load appropriate preprocessing arguments based on model name
|
||||
if (modelName == "vit")
|
||||
{
|
||||
keys += genPreprocArguments(modelName, zooFile, "");
|
||||
}
|
||||
else if (modelName == "nanotrack")
|
||||
{
|
||||
keys += genPreprocArguments(modelName, zooFile, "nanotrack_head_");
|
||||
keys += genPreprocArguments(modelName, zooFile, "nanotrack_back_");
|
||||
}
|
||||
else if (modelName == "dasiamrpn")
|
||||
{
|
||||
keys += genPreprocArguments(modelName, zooFile, "dasiamrpn_");
|
||||
keys += genPreprocArguments(modelName, zooFile, "dasiamrpn_kernel_r1_");
|
||||
keys += genPreprocArguments(modelName, zooFile, "dasiamrpn_kernel_cls_");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
static void createTracker(const string &modelName, CommandLineParser &parser, Ptr<Tracker> &tracker) {
|
||||
int backend = getBackendID(parser.get<String>("backend"));
|
||||
int target = getTargetID(parser.get<String>("target"));
|
||||
if (modelName == "dasiamrpn") {
|
||||
const string net = parser.get<String>("dasiamrpn_model");
|
||||
const string sha1 = parser.get<String>("dasiamrpn_sha1");
|
||||
const string kernel_cls1 = parser.get<String>("dasiamrpn_kernel_cls_model");
|
||||
const string kernel_cls_sha1 = parser.get<String>("dasiamrpn_kernel_cls_sha1");
|
||||
const string kernel_r1 = parser.get<String>("dasiamrpn_kernel_r1_model");
|
||||
const string kernel_sha1 = parser.get<String>("dasiamrpn_kernel_r1_sha1");
|
||||
|
||||
TrackerDaSiamRPN::Params params;
|
||||
params.model = findModel(net, sha1);
|
||||
params.kernel_cls1 = findModel(kernel_cls1, kernel_cls_sha1);
|
||||
params.kernel_r1 = findModel(kernel_r1, kernel_sha1);
|
||||
params.backend = backend;
|
||||
params.target = target;
|
||||
tracker = TrackerDaSiamRPN::create(params);
|
||||
} else if (modelName == "nanotrack") {
|
||||
const string backbone = parser.get<String>("nanotrack_back_model");
|
||||
const string backSha1 = parser.get<String>("nanotrack_back_sha1");
|
||||
const string headneck = parser.get<String>("nanotrack_head_model");
|
||||
const string headSha1 = parser.get<String>("nanotrack_head_sha1");
|
||||
|
||||
TrackerNano::Params params;
|
||||
params.backbone = findModel(backbone, backSha1);
|
||||
params.neckhead = findModel(headneck, headSha1);
|
||||
params.backend = backend;
|
||||
params.target = target;
|
||||
tracker = TrackerNano::create(params);
|
||||
} else if (modelName == "vit") {
|
||||
const string net = parser.get<String>("model");
|
||||
const string sha1 = parser.get<String>("sha1");
|
||||
float tracking_score_threshold = parser.get<float>("tracking_thrs");
|
||||
|
||||
TrackerVit::Params params;
|
||||
params.net = findModel(net, sha1);
|
||||
params.backend = backend;
|
||||
params.target = target;
|
||||
params.tracking_score_threshold = tracking_score_threshold;
|
||||
tracker = TrackerVit::create(params);
|
||||
} else {
|
||||
cout<<"Pass the valid alias. Choices are {vit, nanotrack, dasiamrpn }."<<endl;
|
||||
exit(0);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
CommandLineParser parser(argc, argv, keys);
|
||||
parser.about(about);
|
||||
if (!parser.has("@alias") || parser.has("help"))
|
||||
{
|
||||
parser.printMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
string modelName = parser.get<String>("@alias");
|
||||
const string zooFile = findFile(parser.get<String>("zoo"));
|
||||
loadParser(modelName, zooFile);
|
||||
parser = CommandLineParser(argc, argv, keys);
|
||||
|
||||
Ptr<Tracker> tracker;
|
||||
createTracker(modelName, parser, tracker);
|
||||
|
||||
const string windowName = "TRACKING";
|
||||
namedWindow(windowName, WINDOW_NORMAL);
|
||||
FontFace fontFace("sans");
|
||||
int stdSize = 20;
|
||||
int stdWeight = 400;
|
||||
int stdImgSize = 512;
|
||||
int imgWidth = -1;
|
||||
int fontSize = 50;
|
||||
int fontWeight = 500;
|
||||
double alpha = 0.4;
|
||||
Rect selectRect;
|
||||
string inputName = parser.get<String>("input");
|
||||
string instructionLabel = "Press space bar to pause video to draw bounding box.";
|
||||
Rect banner;
|
||||
// Open a video file or an image file or a camera stream.
|
||||
VideoCapture cap;
|
||||
|
||||
if (inputName.empty() || (isdigit(inputName[0]) && inputName.size() == 1))
|
||||
{
|
||||
int c = inputName.empty() ? 0 : inputName[0] - '0';
|
||||
cout << "Trying to open camera #" << c << " ..." << endl;
|
||||
if (!cap.open(c))
|
||||
{
|
||||
cout << "Capture from camera #" << c << " didn't work. Specify -i=<video> parameter to read from video file" << endl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else if (inputName.size())
|
||||
{
|
||||
string filePath = findFile(inputName);
|
||||
if (!cap.open(filePath))
|
||||
{
|
||||
cout << "Could not open: " << inputName << endl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
Mat image;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
cap >> image;
|
||||
if (image.empty())
|
||||
{
|
||||
cerr << "Can't capture frame. End of video stream?" << endl;
|
||||
return 0;
|
||||
}
|
||||
else if (imgWidth == -1){
|
||||
imgWidth = min(image.rows, image.cols);
|
||||
fontSize = (stdSize*imgWidth)/stdImgSize;
|
||||
fontWeight = (stdWeight*imgWidth)/stdImgSize;
|
||||
banner = getTextSize(Size(), instructionLabel, Point(), fontFace, fontSize, fontWeight);
|
||||
banner.height += 2 * fontSize; // padding
|
||||
banner.width += 10; // padding
|
||||
}
|
||||
Mat org_img = image.clone();
|
||||
rectangle(image, banner, Scalar::all(255), FILLED);
|
||||
addWeighted(image, alpha, org_img, 1 - alpha, 0, image);
|
||||
putText(image, instructionLabel, Point(10, fontSize), Scalar(0,0,0), fontFace, fontSize, fontWeight);
|
||||
putText(image, "Press space bar after selecting.", Point(10, 2*fontSize), Scalar(0,0,0), fontFace, fontSize, fontWeight);
|
||||
imshow(windowName, image);
|
||||
int key = waitKey(30); //Simulating 30 FPS, if reduced frames move really fast
|
||||
if (key == ' ')
|
||||
{
|
||||
selectRect = selectROI(windowName, image);
|
||||
if (selectRect.width > 0 && selectRect.height > 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "No valid selection made. Please select again." << endl;
|
||||
}
|
||||
}
|
||||
else if (key == 27) // ESC key to exit
|
||||
{
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
cout << "ROI=" << selectRect << endl;
|
||||
tracker->init(image, selectRect);
|
||||
instructionLabel = "Press space bar to select new target";
|
||||
banner = getTextSize(Size(), instructionLabel, Point(), fontFace, fontSize, fontWeight);
|
||||
banner.height += 4 * fontSize; // padding
|
||||
banner.width += 10; // padding
|
||||
|
||||
TickMeter tickMeter;
|
||||
|
||||
for (int count = 0; ; ++count)
|
||||
{
|
||||
cap >> image;
|
||||
if (image.empty())
|
||||
{
|
||||
cerr << "Can't capture frame " << count << ". End of video stream?" << endl;
|
||||
break;
|
||||
}
|
||||
Rect rect;
|
||||
|
||||
tickMeter.start();
|
||||
bool ok = tracker->update(image, rect);
|
||||
tickMeter.stop();
|
||||
|
||||
float score = tracker->getTrackingScore();
|
||||
|
||||
Mat render_image = image.clone();
|
||||
|
||||
int key = waitKey(30); //Simulating 30 FPS, if reduced frames move really fast
|
||||
int h = image.rows;
|
||||
int w = image.cols;
|
||||
rectangle(render_image, banner, Scalar::all(255), FILLED);
|
||||
rectangle(render_image, cv::Point(0, int(h - int(1.5*fontSize))), cv::Point(w, h), Scalar::all(255), FILLED);
|
||||
addWeighted(render_image, alpha, image, 1 - alpha, 0, render_image);
|
||||
putText(render_image, instructionLabel, Point(10, fontSize), Scalar(0,0,0), fontFace, fontSize, fontWeight);
|
||||
putText(render_image, "For switching between trackers: press 'v' for ViT, 'n' for Nano, and 'd' for DaSiamRPN.", Point(10, h-10), Scalar(0,0,0), fontFace, int(0.8*fontSize), fontWeight);
|
||||
|
||||
if (ok){
|
||||
if (key == ' '){
|
||||
putText(render_image, "Select the new target", Point(10, 2*fontSize), Scalar(0,0,0), fontFace, fontSize, fontWeight);
|
||||
selectRect = selectROI(windowName, render_image);
|
||||
if (selectRect.width > 0 && selectRect.height > 0){
|
||||
tracker->init(image, selectRect);
|
||||
}
|
||||
else{
|
||||
cout<<"New target is not selected, switching to previous target"<<endl;
|
||||
}
|
||||
}
|
||||
else if (key == 'v'){
|
||||
modelName = "vit";
|
||||
loadParser(modelName, zooFile);
|
||||
parser = CommandLineParser(argc, argv, keys);
|
||||
createTracker(modelName, parser, tracker);
|
||||
tracker->init(image, rect);
|
||||
}
|
||||
else if (key == 'n'){
|
||||
modelName = "nanotrack";
|
||||
loadParser(modelName, zooFile);
|
||||
parser = CommandLineParser(argc, argv, keys);
|
||||
createTracker(modelName, parser, tracker);
|
||||
tracker->init(image, rect);
|
||||
}
|
||||
else if (key == 'd'){
|
||||
modelName = "dasiamrpn";
|
||||
loadParser(modelName, zooFile);
|
||||
parser = CommandLineParser(argc, argv, keys);
|
||||
createTracker(modelName, parser, tracker);
|
||||
tracker->init(image, rect);
|
||||
}
|
||||
rectangle(render_image, rect, Scalar(0, 255, 0), 2);
|
||||
}
|
||||
|
||||
string timeLabel = format("FPS: %.2f", tickMeter.getFPS());
|
||||
string scoreLabel = format("Score: %f", score);
|
||||
string algoLabel = "Algorithm: " + modelName;
|
||||
putText(render_image, timeLabel, Point(10, 2*fontSize), Scalar(0,0,0), fontFace, fontSize, fontWeight);
|
||||
putText(render_image, scoreLabel, Point(10, 3*fontSize), Scalar(0,0,0), fontFace, fontSize, fontWeight);
|
||||
putText(render_image, algoLabel, Point(10, 4*fontSize), Scalar(0,0,0), fontFace, fontSize, fontWeight);
|
||||
|
||||
imshow(windowName, render_image);
|
||||
|
||||
tickMeter.reset();
|
||||
|
||||
if (key == 27 /*ESC*/)
|
||||
exit(0);
|
||||
}
|
||||
return 0;
|
||||
}
|
200
samples/dnn/object_tracker.py
Normal file
200
samples/dnn/object_tracker.py
Normal file
@ -0,0 +1,200 @@
|
||||
#!/usr/bin/env python
|
||||
import sys
|
||||
import cv2 as cv
|
||||
import argparse
|
||||
from common import *
|
||||
|
||||
def help():
|
||||
print(
|
||||
'''
|
||||
Use this script for testing Object Tracking using OpenCV.
|
||||
Firstly, download required models using the download_models.py.
|
||||
To run:
|
||||
nanotrack:
|
||||
Download Model: python download_models.py nanotrack
|
||||
Example: python object_tracker.py nanotrack
|
||||
vit:
|
||||
Download Model: python download_models.py vit
|
||||
Example: python object_tracker.py vit
|
||||
or
|
||||
python object_tracker.py
|
||||
dasiamrpn:
|
||||
Download Model: python download_models.py dasiamrpn
|
||||
Example: python object_tracker.py dasiamrpn
|
||||
To switch between models in runtime, make sure all the models are downloaded using download_models.py'''
|
||||
)
|
||||
|
||||
def load_parser(model_name):
|
||||
parser = argparse.ArgumentParser(add_help=False)
|
||||
parser.add_argument('--zoo', default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models.yml'),
|
||||
help='An optional path to file with preprocessing parameters.')
|
||||
parser.add_argument("--input", type=str, help="Path to video source")
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
add_preproc_args(args.zoo, parser, 'object_tracker', alias=model_name)
|
||||
if model_name == "dasiamrpn":
|
||||
add_preproc_args(args.zoo, parser, 'object_tracker', prefix="dasiamrpn_", alias="dasiamrpn")
|
||||
add_preproc_args(args.zoo, parser, 'object_tracker', prefix="dasiamrpn_kernel_r1_", alias="dasiamrpn")
|
||||
add_preproc_args(args.zoo, parser, 'object_tracker', prefix="dasiamrpn_kernel_cls_", alias="dasiamrpn")
|
||||
elif model_name == "nanotrack":
|
||||
add_preproc_args(args.zoo, parser, 'object_tracker', prefix="nanotrack_back_", alias="nanotrack")
|
||||
add_preproc_args(args.zoo, parser, 'object_tracker', prefix="nanotrack_head_", alias="nanotrack")
|
||||
elif model_name != "vit":
|
||||
print("Pass the valid alias. Choices are { nanotrack, vit, dasiamrpn }")
|
||||
exit(0)
|
||||
parser = argparse.ArgumentParser(parents=[parser],
|
||||
description='''
|
||||
Firstly, download required models using `python download_models.py {modelName}`
|
||||
Run using python object_tracker.py {modelName}.
|
||||
''',
|
||||
formatter_class=argparse.RawTextHelpFormatter)
|
||||
return parser.parse_args()
|
||||
|
||||
def createTracker(model_name, args):
|
||||
if model_name == 'dasiamrpn':
|
||||
print("Using Dasiamrpn Tracker.")
|
||||
params = cv.TrackerDaSiamRPN_Params()
|
||||
params.model = findModel(args.dasiamrpn_model, args.dasiamrpn_sha1)
|
||||
params.kernel_cls1 = findModel(args.dasiamrpn_kernel_cls_model, args.dasiamrpn_kernel_cls_sha1)
|
||||
params.kernel_r1 = findModel(args.dasiamrpn_kernel_r1_model, args.dasiamrpn_kernel_r1_sha1)
|
||||
tracker = cv.TrackerDaSiamRPN_create(params)
|
||||
elif model_name == 'nanotrack':
|
||||
print("Using Nano Tracker.")
|
||||
params = cv.TrackerNano_Params()
|
||||
params.backbone = findModel(args.nanotrack_back_model, args.nanotrack_back_sha1)
|
||||
params.neckhead = findModel(args.nanotrack_head_model, args.nanotrack_head_sha1)
|
||||
tracker = cv.TrackerNano_create(params)
|
||||
elif model_name == 'vit':
|
||||
print("Using Vit Tracker.")
|
||||
params = cv.TrackerVit_Params()
|
||||
params.net = findModel(args.model, args.sha1)
|
||||
tracker = cv.TrackerVit_create(params)
|
||||
else:
|
||||
help()
|
||||
exit(-1)
|
||||
return tracker
|
||||
|
||||
def main(model_name, args):
|
||||
tracker = createTracker(model_name, args)
|
||||
videoPath = args.input
|
||||
print('Using video: {}'.format(videoPath))
|
||||
cap = cv.VideoCapture(cv.samples.findFile(args.input) if args.input else 0)
|
||||
if not cap.isOpened():
|
||||
print("Can't open video stream: {}".format(videoPath))
|
||||
exit(-1)
|
||||
|
||||
stdSize = 0.6
|
||||
stdWeight = 2
|
||||
stdImgSize = 512
|
||||
imgWidth = -1 # Initialization
|
||||
fontSize = 1.5
|
||||
fontThickness = 1
|
||||
alpha = 0.5
|
||||
windowName = "TRACKING"
|
||||
cv.namedWindow(windowName, cv.WINDOW_NORMAL)
|
||||
|
||||
while True:
|
||||
ret, image = cap.read()
|
||||
if not ret:
|
||||
print("Video completed!!")
|
||||
return -1
|
||||
if imgWidth == -1:
|
||||
imgWidth = min(image.shape[:2])
|
||||
fontSize = min(fontSize, (stdSize*imgWidth)/stdImgSize)
|
||||
fontThickness = max(fontThickness,(stdWeight*imgWidth)//stdImgSize)
|
||||
label = "Press space bar to pause video to draw bounding box."
|
||||
labelSize, _ = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, fontSize, fontThickness)
|
||||
org_img = image.copy()
|
||||
cv.rectangle(image, (0, 0), (labelSize[0]+10, labelSize[1]+int(40*fontSize)), (255,255,255), cv.FILLED)
|
||||
cv.addWeighted(image, alpha, org_img, 1 - alpha, 0, image)
|
||||
cv.putText(image, label, (10, int(25*fontSize)), cv.FONT_HERSHEY_SIMPLEX, fontSize, (0, 0, 0), fontThickness)
|
||||
cv.putText(image, "Press space bar after selecting.", (10, int(55*fontSize)), cv.FONT_HERSHEY_SIMPLEX, fontSize, (0, 0, 0), fontThickness)
|
||||
cv.imshow(windowName, image)
|
||||
|
||||
key = cv.waitKey(30) & 0xFF
|
||||
if key == ord(' '):
|
||||
bbox = cv.selectROI(windowName, image)
|
||||
print('ROI: {}'.format(bbox))
|
||||
if bbox != (0, 0, 0, 0):
|
||||
break
|
||||
|
||||
if key == ord('q') or key == 27:
|
||||
return
|
||||
try:
|
||||
tracker.init(image, bbox)
|
||||
except Exception as e:
|
||||
print('Unable to initialize tracker with requested bounding box. Is there any object?')
|
||||
print(e)
|
||||
|
||||
tick_meter = cv.TickMeter()
|
||||
while cap.isOpened():
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
if imgWidth == -1:
|
||||
imgWidth = min(frame.shape[:2])
|
||||
fontSize = min(fontSize, (stdSize*imgWidth)/stdImgSize)
|
||||
fontThickness = max(fontThickness,(stdWeight*imgWidth)//stdImgSize)
|
||||
label="Press space bar to select new target"
|
||||
labelSize, _ = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, fontSize, fontThickness)
|
||||
tick_meter.reset()
|
||||
tick_meter.start()
|
||||
ok, newbox = tracker.update(frame)
|
||||
tick_meter.stop()
|
||||
score = tracker.getTrackingScore()
|
||||
render_image = frame.copy()
|
||||
key = cv.waitKey(30) & 0xFF
|
||||
h, w = frame.shape[:2]
|
||||
cv.rectangle(render_image, (0, 0), (labelSize[0]+10, labelSize[1]+int(100*fontSize)), (255,255,255), cv.FILLED)
|
||||
cv.rectangle(render_image, (0, int(h-45*fontSize)), (w, h), (255,255,255), cv.FILLED)
|
||||
cv.addWeighted(render_image, alpha, frame, 1 - alpha, 0, render_image)
|
||||
cv.putText(render_image, label, (10, int(25*fontSize)), cv.FONT_HERSHEY_SIMPLEX, fontSize, (0, 0, 0), fontThickness)
|
||||
cv.putText(render_image, "For switching between trackers: press 'v' for ViT, 'n' for Nanotrack, and 'd' for DaSiamRPN.", (10, h-10), cv.FONT_HERSHEY_SIMPLEX, 0.8*fontSize, (0, 0, 0), fontThickness)
|
||||
|
||||
if ok:
|
||||
if key == ord(' '):
|
||||
cv.putText(render_image, "Select the new target", (10, int(55*fontSize)), cv.FONT_HERSHEY_SIMPLEX, fontSize, (0, 0, 0), fontThickness)
|
||||
bbox = cv.selectROI(windowName, render_image)
|
||||
print('ROI:', bbox)
|
||||
if bbox != (0, 0, 0, 0):
|
||||
tracker.init(frame, bbox)
|
||||
elif key == ord('v'):
|
||||
model_name = "vit"
|
||||
args = load_parser(model_name)
|
||||
tracker = createTracker(model_name, args)
|
||||
tracker.init(frame, newbox)
|
||||
elif key == ord('n'):
|
||||
model_name = "nanotrack"
|
||||
args = load_parser(model_name)
|
||||
tracker = createTracker(model_name, args)
|
||||
tracker.init(frame, newbox)
|
||||
elif key == ord('d'):
|
||||
model_name = "dasiamrpn"
|
||||
args = load_parser(model_name)
|
||||
tracker = createTracker(model_name, args)
|
||||
tracker.init(frame, newbox)
|
||||
elif key == ord('q') or key == 27:
|
||||
return
|
||||
|
||||
cv.rectangle(render_image, newbox, (200, 0, 0), thickness=2)
|
||||
time_label = f"FPS: {tick_meter.getFPS():.2f}"
|
||||
score_label = f"Tracking score: {score:.2f}"
|
||||
algo_label = f"Algorithm: {model_name}"
|
||||
cv.putText(render_image, time_label, (10, int(55*fontSize)), cv.FONT_HERSHEY_SIMPLEX, fontSize, (0, 0, 0), fontThickness)
|
||||
cv.putText(render_image, score_label, (10, int(85*fontSize)), cv.FONT_HERSHEY_SIMPLEX, fontSize, (0, 0, 0), fontThickness)
|
||||
cv.putText(render_image, algo_label, (10, int(115*fontSize)), cv.FONT_HERSHEY_SIMPLEX, fontSize, (0, 0, 0), fontThickness)
|
||||
|
||||
cv.imshow(windowName, render_image)
|
||||
if key in [ord('q'), 27]:
|
||||
break
|
||||
|
||||
if __name__ == '__main__':
|
||||
help()
|
||||
if len(sys.argv) < 2 or sys.argv[1].startswith("--"):
|
||||
model_name = "vit"
|
||||
else:
|
||||
model_name = sys.argv[1]
|
||||
args = load_parser(model_name)
|
||||
|
||||
main(model_name, args)
|
||||
cv.destroyAllWindows()
|
@ -1,183 +0,0 @@
|
||||
// VitTracker
|
||||
// model: https://github.com/opencv/opencv_zoo/tree/main/models/object_tracking_vittrack
|
||||
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
|
||||
#include <opencv2/dnn.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/video.hpp>
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::dnn;
|
||||
|
||||
const char *keys =
|
||||
"{ help h | | Print help message }"
|
||||
"{ input i | | Full path to input video folder, the specific camera index. (empty for camera 0) }"
|
||||
"{ net | vitTracker.onnx | Path to onnx model of vitTracker.onnx}"
|
||||
"{ tracking_score_threshold t | 0.3 | Tracking score threshold. If a bbox of score >= 0.3, it is considered as found }"
|
||||
"{ backend | 0 | Choose one of computation backends: "
|
||||
"0: automatically (by default), "
|
||||
"1: Halide language (http://halide-lang.org/), "
|
||||
"2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
|
||||
"3: OpenCV implementation, "
|
||||
"4: VKCOM, "
|
||||
"5: CUDA },"
|
||||
"{ target | 0 | Choose one of target computation devices: "
|
||||
"0: CPU target (by default), "
|
||||
"1: OpenCL, "
|
||||
"2: OpenCL fp16 (half-float precision), "
|
||||
"3: VPU, "
|
||||
"4: Vulkan, "
|
||||
"6: CUDA, "
|
||||
"7: CUDA fp16 (half-float preprocess) }"
|
||||
;
|
||||
|
||||
static
|
||||
int run(int argc, char** argv)
|
||||
{
|
||||
// Parse command line arguments.
|
||||
CommandLineParser parser(argc, argv, keys);
|
||||
|
||||
if (parser.has("help"))
|
||||
{
|
||||
parser.printMessage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::string inputName = parser.get<String>("input");
|
||||
std::string net = parser.get<String>("net");
|
||||
int backend = parser.get<int>("backend");
|
||||
int target = parser.get<int>("target");
|
||||
float tracking_score_threshold = parser.get<float>("tracking_score_threshold");
|
||||
|
||||
Ptr<TrackerVit> tracker;
|
||||
try
|
||||
{
|
||||
TrackerVit::Params params;
|
||||
params.net = samples::findFile(net);
|
||||
params.backend = backend;
|
||||
params.target = target;
|
||||
params.tracking_score_threshold = tracking_score_threshold;
|
||||
tracker = TrackerVit::create(params);
|
||||
}
|
||||
catch (const cv::Exception& ee)
|
||||
{
|
||||
std::cerr << "Exception: " << ee.what() << std::endl;
|
||||
std::cout << "Can't load the network by using the following files:" << std::endl;
|
||||
std::cout << "net : " << net << std::endl;
|
||||
return 2;
|
||||
}
|
||||
|
||||
const std::string winName = "vitTracker";
|
||||
namedWindow(winName, WINDOW_AUTOSIZE);
|
||||
|
||||
// Open a video file or an image file or a camera stream.
|
||||
VideoCapture cap;
|
||||
|
||||
if (inputName.empty() || (isdigit(inputName[0]) && inputName.size() == 1))
|
||||
{
|
||||
int c = inputName.empty() ? 0 : inputName[0] - '0';
|
||||
std::cout << "Trying to open camera #" << c << " ..." << std::endl;
|
||||
if (!cap.open(c))
|
||||
{
|
||||
std::cout << "Capture from camera #" << c << " didn't work. Specify -i=<video> parameter to read from video file" << std::endl;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
else if (inputName.size())
|
||||
{
|
||||
inputName = samples::findFileOrKeep(inputName);
|
||||
if (!cap.open(inputName))
|
||||
{
|
||||
std::cout << "Could not open: " << inputName << std::endl;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Read the first image.
|
||||
Mat image;
|
||||
cap >> image;
|
||||
if (image.empty())
|
||||
{
|
||||
std::cerr << "Can't capture frame!" << std::endl;
|
||||
return 2;
|
||||
}
|
||||
|
||||
Mat image_select = image.clone();
|
||||
putText(image_select, "Select initial bounding box you want to track.", Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
putText(image_select, "And Press the ENTER key.", Point(0, 35), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
|
||||
Rect selectRect = selectROI(winName, image_select);
|
||||
std::cout << "ROI=" << selectRect << std::endl;
|
||||
if (selectRect.empty())
|
||||
{
|
||||
std::cerr << "Invalid ROI!" << std::endl;
|
||||
return 2;
|
||||
}
|
||||
|
||||
tracker->init(image, selectRect);
|
||||
|
||||
TickMeter tickMeter;
|
||||
|
||||
for (int count = 0; ; ++count)
|
||||
{
|
||||
cap >> image;
|
||||
if (image.empty())
|
||||
{
|
||||
std::cerr << "Can't capture frame " << count << ". End of video stream?" << std::endl;
|
||||
break;
|
||||
}
|
||||
|
||||
Rect rect;
|
||||
|
||||
tickMeter.start();
|
||||
bool ok = tracker->update(image, rect);
|
||||
tickMeter.stop();
|
||||
|
||||
float score = tracker->getTrackingScore();
|
||||
|
||||
std::cout << "frame " << count;
|
||||
if (ok) {
|
||||
std::cout << ": predicted score=" << score <<
|
||||
"\trect=" << rect <<
|
||||
"\ttime=" << tickMeter.getTimeMilli() << "ms" << std::endl;
|
||||
|
||||
rectangle(image, rect, Scalar(0, 255, 0), 2);
|
||||
|
||||
std::string timeLabel = format("Inference time: %.2f ms", tickMeter.getTimeMilli());
|
||||
std::string scoreLabel = format("Score: %f", score);
|
||||
putText(image, timeLabel, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
putText(image, scoreLabel, Point(0, 35), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
|
||||
} else {
|
||||
std::cout << ": target lost" << std::endl;
|
||||
putText(image, "Target lost", Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 255));
|
||||
}
|
||||
|
||||
imshow(winName, image);
|
||||
|
||||
tickMeter.reset();
|
||||
|
||||
int c = waitKey(1);
|
||||
if (c == 27 /*ESC*/ || c == 'q' || c == 'Q')
|
||||
break;
|
||||
}
|
||||
|
||||
std::cout << "Exit" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
try
|
||||
{
|
||||
return run(argc, argv);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
std::cerr << "FATAL: C++ exception: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
@ -1,173 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
'''
|
||||
Tracker demo
|
||||
|
||||
For usage download models by following links
|
||||
For DaSiamRPN:
|
||||
network: https://www.dropbox.com/s/rr1lk9355vzolqv/dasiamrpn_model.onnx?dl=0
|
||||
kernel_r1: https://www.dropbox.com/s/999cqx5zrfi7w4p/dasiamrpn_kernel_r1.onnx?dl=0
|
||||
kernel_cls1: https://www.dropbox.com/s/qvmtszx5h339a0w/dasiamrpn_kernel_cls1.onnx?dl=0
|
||||
For NanoTrack:
|
||||
nanotrack_backbone: https://github.com/HonglinChu/SiamTrackers/blob/master/NanoTrack/models/nanotrackv2/nanotrack_backbone_sim.onnx
|
||||
nanotrack_headneck: https://github.com/HonglinChu/SiamTrackers/blob/master/NanoTrack/models/nanotrackv2/nanotrack_head_sim.onnx
|
||||
For VitTrack:
|
||||
vitTracker: https://github.com/opencv/opencv_zoo/raw/fef72f8fa7c52eaf116d3df358d24e6e959ada0e/models/object_tracking_vittrack/object_tracking_vittrack_2023sep.onnx
|
||||
USAGE:
|
||||
tracker.py [-h] [--input INPUT_VIDEO]
|
||||
[--tracker_algo TRACKER_ALGO mil, dasiamrpn, nanotrack, vittrack]
|
||||
[--dasiamrpn_net DASIAMRPN_NET]
|
||||
[--dasiamrpn_kernel_r1 DASIAMRPN_KERNEL_R1]
|
||||
[--dasiamrpn_kernel_cls1 DASIAMRPN_KERNEL_CLS1]
|
||||
[--dasiamrpn_backend DASIAMRPN_BACKEND]
|
||||
[--dasiamrpn_target DASIAMRPN_TARGET]
|
||||
[--nanotrack_backbone NANOTRACK_BACKBONE]
|
||||
[--nanotrack_headneck NANOTRACK_TARGET]
|
||||
[--vittrack_net VITTRACK_MODEL]
|
||||
[--vittrack_net VITTRACK_MODEL]
|
||||
[--tracking_score_threshold TRACKING SCORE THRESHOLD FOR ONLY VITTRACK]
|
||||
[--backend CHOOSE ONE OF COMPUTATION BACKEND]
|
||||
[--target CHOOSE ONE OF COMPUTATION TARGET]
|
||||
'''
|
||||
|
||||
# Python 2/3 compatibility
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import cv2 as cv
|
||||
import argparse
|
||||
|
||||
from video import create_capture, presets
|
||||
|
||||
backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV,
|
||||
cv.dnn.DNN_BACKEND_VKCOM, cv.dnn.DNN_BACKEND_CUDA)
|
||||
targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD,
|
||||
cv.dnn.DNN_TARGET_VULKAN, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16)
|
||||
|
||||
class App(object):
|
||||
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
self.trackerAlgorithm = args.tracker_algo
|
||||
self.tracker = self.createTracker()
|
||||
|
||||
def createTracker(self):
|
||||
if self.trackerAlgorithm == 'mil':
|
||||
tracker = cv.TrackerMIL_create()
|
||||
elif self.trackerAlgorithm == 'dasiamrpn':
|
||||
params = cv.TrackerDaSiamRPN_Params()
|
||||
params.model = self.args.dasiamrpn_net
|
||||
params.kernel_cls1 = self.args.dasiamrpn_kernel_cls1
|
||||
params.kernel_r1 = self.args.dasiamrpn_kernel_r1
|
||||
params.backend = args.backend
|
||||
params.target = args.target
|
||||
tracker = cv.TrackerDaSiamRPN_create(params)
|
||||
elif self.trackerAlgorithm == 'nanotrack':
|
||||
params = cv.TrackerNano_Params()
|
||||
params.backbone = args.nanotrack_backbone
|
||||
params.neckhead = args.nanotrack_headneck
|
||||
params.backend = args.backend
|
||||
params.target = args.target
|
||||
tracker = cv.TrackerNano_create(params)
|
||||
elif self.trackerAlgorithm == 'vittrack':
|
||||
params = cv.TrackerVit_Params()
|
||||
params.net = args.vittrack_net
|
||||
params.tracking_score_threshold = args.tracking_score_threshold
|
||||
params.backend = args.backend
|
||||
params.target = args.target
|
||||
tracker = cv.TrackerVit_create(params)
|
||||
else:
|
||||
sys.exit("Tracker {} is not recognized. Please use one of three available: mil, dasiamrpn, nanotrack.".format(self.trackerAlgorithm))
|
||||
return tracker
|
||||
|
||||
def initializeTracker(self, image):
|
||||
while True:
|
||||
print('==> Select object ROI for tracker ...')
|
||||
bbox = cv.selectROI('tracking', image)
|
||||
print('ROI: {}'.format(bbox))
|
||||
if bbox[2] <= 0 or bbox[3] <= 0:
|
||||
sys.exit("ROI selection cancelled. Exiting...")
|
||||
|
||||
try:
|
||||
self.tracker.init(image, bbox)
|
||||
except Exception as e:
|
||||
print('Unable to initialize tracker with requested bounding box. Is there any object?')
|
||||
print(e)
|
||||
print('Try again ...')
|
||||
continue
|
||||
|
||||
return
|
||||
|
||||
def run(self):
|
||||
videoPath = self.args.input
|
||||
print('Using video: {}'.format(videoPath))
|
||||
camera = create_capture(cv.samples.findFileOrKeep(videoPath), presets['cube'])
|
||||
if not camera.isOpened():
|
||||
sys.exit("Can't open video stream: {}".format(videoPath))
|
||||
|
||||
ok, image = camera.read()
|
||||
if not ok:
|
||||
sys.exit("Can't read first frame")
|
||||
assert image is not None
|
||||
|
||||
cv.namedWindow('tracking')
|
||||
self.initializeTracker(image)
|
||||
|
||||
print("==> Tracking is started. Press 'SPACE' to re-initialize tracker or 'ESC' for exit...")
|
||||
|
||||
while camera.isOpened():
|
||||
ok, image = camera.read()
|
||||
if not ok:
|
||||
print("Can't read frame")
|
||||
break
|
||||
|
||||
ok, newbox = self.tracker.update(image)
|
||||
#print(ok, newbox)
|
||||
|
||||
if ok:
|
||||
cv.rectangle(image, newbox, (200,0,0))
|
||||
|
||||
cv.imshow("tracking", image)
|
||||
k = cv.waitKey(1)
|
||||
if k == 32: # SPACE
|
||||
self.initializeTracker(image)
|
||||
if k == 27: # ESC
|
||||
break
|
||||
|
||||
print('Done')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(__doc__)
|
||||
parser = argparse.ArgumentParser(description="Run tracker")
|
||||
parser.add_argument("--input", type=str, default="vtest.avi", help="Path to video source")
|
||||
parser.add_argument("--tracker_algo", type=str, default="nanotrack", help="One of available tracking algorithms: mil, dasiamrpn, nanotrack, vittrack")
|
||||
parser.add_argument("--dasiamrpn_net", type=str, default="dasiamrpn_model.onnx", help="Path to onnx model of DaSiamRPN net")
|
||||
parser.add_argument("--dasiamrpn_kernel_r1", type=str, default="dasiamrpn_kernel_r1.onnx", help="Path to onnx model of DaSiamRPN kernel_r1")
|
||||
parser.add_argument("--dasiamrpn_kernel_cls1", type=str, default="dasiamrpn_kernel_cls1.onnx", help="Path to onnx model of DaSiamRPN kernel_cls1")
|
||||
parser.add_argument("--nanotrack_backbone", type=str, default="nanotrack_backbone_sim.onnx", help="Path to onnx model of NanoTrack backBone")
|
||||
parser.add_argument("--nanotrack_headneck", type=str, default="nanotrack_head_sim.onnx", help="Path to onnx model of NanoTrack headNeck")
|
||||
parser.add_argument("--vittrack_net", type=str, default="vitTracker.onnx", help="Path to onnx model of vittrack")
|
||||
parser.add_argument('--tracking_score_threshold', type=float, help="Tracking score threshold. If a bbox of score >= 0.3, it is considered as found ")
|
||||
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
|
||||
help="Choose one of computation backends: "
|
||||
"%d: automatically (by default), "
|
||||
"%d: Halide language (http://halide-lang.org/), "
|
||||
"%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
|
||||
"%d: OpenCV implementation, "
|
||||
"%d: VKCOM, "
|
||||
"%d: CUDA"% backends)
|
||||
parser.add_argument("--target", choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
|
||||
help="Choose one of target computation devices: "
|
||||
'%d: CPU target (by default), '
|
||||
'%d: OpenCL, '
|
||||
'%d: OpenCL fp16 (half-float precision), '
|
||||
'%d: VPU, '
|
||||
'%d: VULKAN, '
|
||||
'%d: CUDA, '
|
||||
'%d: CUDA fp16 (half-float preprocess)'% targets)
|
||||
|
||||
args = parser.parse_args()
|
||||
App(args).run()
|
||||
cv.destroyAllWindows()
|
Loading…
Reference in New Issue
Block a user