From 816851c99962dce0e661ce0401393f52818e0b61 Mon Sep 17 00:00:00 2001 From: Gursimar Singh Date: Thu, 5 Dec 2024 14:20:03 +0530 Subject: [PATCH] Merge pull request #26202 from gursimarsingh:improved_tracker_samples Improved Tracker Samples #26202 Relates to #25006 This sample has been rewritten to track a selected target in a video or camera stream. It combines VIT tracker, Nano tracker and Dasiamrpn tracker into one tracker sample ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake --- .../video/include/opencv2/video/tracking.hpp | 17 +- modules/video/src/tracking/tracker_nano.cpp | 8 +- modules/video/src/tracking/tracker_vit.cpp | 6 +- samples/dnn/dasiamrpn_tracker.cpp | 190 ----------- samples/dnn/download_models.py | 51 +-- samples/dnn/models.yml | 38 +++ samples/dnn/nanotrack_tracker.cpp | 184 ----------- samples/dnn/object_tracker.cpp | 309 ++++++++++++++++++ samples/dnn/object_tracker.py | 200 ++++++++++++ samples/dnn/vit_tracker.cpp | 183 ----------- samples/python/tracker.py | 173 ---------- 11 files changed, 591 insertions(+), 768 deletions(-) delete mode 100644 samples/dnn/dasiamrpn_tracker.cpp delete mode 100644 samples/dnn/nanotrack_tracker.cpp create mode 100644 samples/dnn/object_tracker.cpp create mode 100644 samples/dnn/object_tracker.py delete mode 100644 samples/dnn/vit_tracker.cpp delete mode 100644 samples/python/tracker.py diff --git a/modules/video/include/opencv2/video/tracking.hpp b/modules/video/include/opencv2/video/tracking.hpp index b1fc24f36c..2f2baa5760 100644 --- a/modules/video/include/opencv2/video/tracking.hpp +++ b/modules/video/include/opencv2/video/tracking.hpp @@ -769,6 +769,11 @@ public: */ CV_WRAP virtual bool update(InputArray image, CV_OUT Rect& boundingBox) = 0; + + /** @brief Return tracking score + */ + CV_WRAP virtual float getTrackingScore() { return -1; } + }; @@ -834,10 +839,6 @@ public: static CV_WRAP Ptr create(const TrackerDaSiamRPN::Params& parameters = TrackerDaSiamRPN::Params()); - /** @brief Return tracking score - */ - CV_WRAP virtual float getTrackingScore() = 0; - //void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE; //bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE; }; @@ -872,10 +873,6 @@ public: static CV_WRAP Ptr create(const TrackerNano::Params& parameters = TrackerNano::Params()); - /** @brief Return tracking score - */ - CV_WRAP virtual float getTrackingScore() = 0; - //void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE; //bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE; }; @@ -910,10 +907,6 @@ public: static CV_WRAP Ptr create(const TrackerVit::Params& parameters = TrackerVit::Params()); - /** @brief Return tracking score - */ - CV_WRAP virtual float getTrackingScore() = 0; - // void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE; // bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE; }; diff --git a/modules/video/src/tracking/tracker_nano.cpp b/modules/video/src/tracking/tracker_nano.cpp index dadad1ac55..240d7579e5 100644 --- a/modules/video/src/tracking/tracker_nano.cpp +++ b/modules/video/src/tracking/tracker_nano.cpp @@ -89,8 +89,12 @@ public: TrackerNanoImpl(const TrackerNano::Params& parameters) : params(parameters) { - backbone = dnn::readNet(params.backbone); - neckhead = dnn::readNet(params.neckhead); + dnn::EngineType engine = dnn::ENGINE_AUTO; + if (params.backend != 0 || params.target != 0){ + engine = dnn::ENGINE_CLASSIC; + } + backbone = dnn::readNet(params.backbone, "", "", engine); + neckhead = dnn::readNet(params.neckhead, "", "", engine); CV_Assert(!backbone.empty()); CV_Assert(!neckhead.empty()); diff --git a/modules/video/src/tracking/tracker_vit.cpp b/modules/video/src/tracking/tracker_vit.cpp index 1dfdde2a16..ee7206fb05 100644 --- a/modules/video/src/tracking/tracker_vit.cpp +++ b/modules/video/src/tracking/tracker_vit.cpp @@ -44,7 +44,11 @@ public: TrackerVitImpl(const TrackerVit::Params& parameters) : params(parameters) { - net = dnn::readNet(params.net); + dnn::EngineType engine = dnn::ENGINE_AUTO; + if (params.backend != 0 || params.target != 0){ + engine = dnn::ENGINE_CLASSIC; + } + net = dnn::readNet(params.net, "", "", engine); CV_Assert(!net.empty()); net.setPreferableBackend(params.backend); diff --git a/samples/dnn/dasiamrpn_tracker.cpp b/samples/dnn/dasiamrpn_tracker.cpp deleted file mode 100644 index 683cda9113..0000000000 --- a/samples/dnn/dasiamrpn_tracker.cpp +++ /dev/null @@ -1,190 +0,0 @@ -// DaSiamRPN tracker. -// Original paper: https://arxiv.org/abs/1808.06048 -// Link to original repo: https://github.com/foolwood/DaSiamRPN -// Links to onnx models: -// - network: https://www.dropbox.com/s/rr1lk9355vzolqv/dasiamrpn_model.onnx?dl=0 -// - kernel_r1: https://www.dropbox.com/s/999cqx5zrfi7w4p/dasiamrpn_kernel_r1.onnx?dl=0 -// - kernel_cls1: https://www.dropbox.com/s/qvmtszx5h339a0w/dasiamrpn_kernel_cls1.onnx?dl=0 - -#include -#include - -#include -#include -#include -#include - -using namespace cv; -using namespace cv::dnn; - -std::string param_keys = - "{ help h | | Print help message }" - "{ input i | | Full path to input video folder, the specific camera index. (empty for camera 0) }" - "{ net | dasiamrpn_model.onnx | Path to onnx model of net}" - "{ kernel_cls1 | dasiamrpn_kernel_cls1.onnx | Path to onnx model of kernel_r1 }" - "{ kernel_r1 | dasiamrpn_kernel_r1.onnx | Path to onnx model of kernel_cls1 }"; -std::string backend_keys = cv::format( - "{ backend | 0 | Choose one of computation backends: " - "%d: automatically (by default), " - "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " - "%d: OpenCV implementation, " - "%d: VKCOM, " - "%d: CUDA }", cv::dnn::DNN_BACKEND_DEFAULT, cv::dnn::DNN_BACKEND_INFERENCE_ENGINE, cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_BACKEND_VKCOM, cv::dnn::DNN_BACKEND_CUDA); -std::string target_keys = cv::format( - "{ target | 0 | Choose one of target computation devices: " - "%d: CPU target (by default), " - "%d: OpenCL, " - "%d: OpenCL fp16 (half-float precision), " - "%d: VPU, " - "%d: Vulkan, " - "%d: CUDA, " - "%d: CUDA fp16 (half-float preprocess) }", cv::dnn::DNN_TARGET_CPU, cv::dnn::DNN_TARGET_OPENCL, cv::dnn::DNN_TARGET_OPENCL_FP16, cv::dnn::DNN_TARGET_MYRIAD, cv::dnn::DNN_TARGET_VULKAN, cv::dnn::DNN_TARGET_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16); -std::string keys = param_keys + backend_keys + target_keys; - -static -int run(int argc, char** argv) -{ - // Parse command line arguments. - CommandLineParser parser(argc, argv, keys); - - if (parser.has("help")) - { - parser.printMessage(); - return 0; - } - - std::string inputName = parser.get("input"); - std::string net = parser.get("net"); - std::string kernel_cls1 = parser.get("kernel_cls1"); - std::string kernel_r1 = parser.get("kernel_r1"); - int backend = parser.get("backend"); - int target = parser.get("target"); - - Ptr tracker; - try - { - TrackerDaSiamRPN::Params params; - params.model = samples::findFile(net); - params.kernel_cls1 = samples::findFile(kernel_cls1); - params.kernel_r1 = samples::findFile(kernel_r1); - params.backend = backend; - params.target = target; - tracker = TrackerDaSiamRPN::create(params); - } - catch (const cv::Exception& ee) - { - std::cerr << "Exception: " << ee.what() << std::endl; - std::cout << "Can't load the network by using the following files:" << std::endl; - std::cout << "siamRPN : " << net << std::endl; - std::cout << "siamKernelCL1 : " << kernel_cls1 << std::endl; - std::cout << "siamKernelR1 : " << kernel_r1 << std::endl; - return 2; - } - - const std::string winName = "DaSiamRPN"; - namedWindow(winName, WINDOW_AUTOSIZE); - - // Open a video file or an image file or a camera stream. - VideoCapture cap; - - if (inputName.empty() || (isdigit(inputName[0]) && inputName.size() == 1)) - { - int c = inputName.empty() ? 0 : inputName[0] - '0'; - std::cout << "Trying to open camera #" << c << " ..." << std::endl; - if (!cap.open(c)) - { - std::cout << "Capture from camera #" << c << " didn't work. Specify -i=