Merge pull request #25503 from WanliZhong:remove_goturn

Remove goturn caffe model #25503 **Merged with:** https://github.com/opencv/opencv_extra/pull/1174 **Merged with:** https://github.com/opencv/opencv_contrib/pull/3729 Part of https://github.com/opencv/opencv/issues/25314 This PR aims to remove goturn tracking model because Caffe importer will be remove in 5.0 The GOTURN model will take **388 MB** of traffic for each download if converted to onnx. If the user wants to use the tracking method, we can recommend they use Vit or dasimRPN. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
2025-08-06 14:36:36 +08:00 · 2024-05-06 16:57:30 +08:00 · 2024-05-06 16:57:30 +08:00 · d231b4e362
commit d231b4e362
parent 94f4678d3a
10 changed files with 8 additions and 259 deletions
--- a/modules/video/doc/video.bib
+++ b/modules/video/doc/video.bib
@ -20,13 +20,6 @@
  publisher={ACM}
 }

-@inproceedings{GOTURN,
-  title={Learning to Track at 100 FPS with Deep Regression Networks},
-  author={Held, David and Thrun, Sebastian and Savarese, Silvio},
-  booktitle={European Conference Computer Vision (ECCV)},
-  year={2016}
-}
-
@inproceedings{Kroeger2016,
  author={Till Kroeger and Radu Timofte and Dengxin Dai and Luc Van Gool},
  title={Fast Optical Flow using Dense Inverse Search},
--- a/modules/video/include/opencv2/video/tracking.hpp
+++ b/modules/video/include/opencv2/video/tracking.hpp
@ -789,47 +789,6 @@ public:
    //bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;
 };

-
-
-/** @brief the GOTURN (Generic Object Tracking Using Regression Networks) tracker
- *
- *  GOTURN (@cite GOTURN) is kind of trackers based on Convolutional Neural Networks (CNN). While taking all advantages of CNN trackers,
- *  GOTURN is much faster due to offline training without online fine-tuning nature.
- *  GOTURN tracker addresses the problem of single target tracking: given a bounding box label of an object in the first frame of the video,
- *  we track that object through the rest of the video. NOTE: Current method of GOTURN does not handle occlusions; however, it is fairly
- *  robust to viewpoint changes, lighting changes, and deformations.
- *  Inputs of GOTURN are two RGB patches representing Target and Search patches resized to 227x227.
- *  Outputs of GOTURN are predicted bounding box coordinates, relative to Search patch coordinate system, in format X1,Y1,X2,Y2.
- *  Original paper is here: <http://davheld.github.io/GOTURN/GOTURN.pdf>
- *  As long as original authors implementation: <https://github.com/davheld/GOTURN#train-the-tracker>
- *  Implementation of training algorithm is placed in separately here due to 3d-party dependencies:
- *  <https://github.com/Auron-X/GOTURN_Training_Toolkit>
- *  GOTURN architecture goturn.prototxt and trained model goturn.caffemodel are accessible on opencv_extra GitHub repository.
- */
-class CV_EXPORTS_W TrackerGOTURN : public Tracker
-{
-protected:
-    TrackerGOTURN();  // use ::create()
-public:
-    virtual ~TrackerGOTURN() CV_OVERRIDE;
-
-    struct CV_EXPORTS_W_SIMPLE Params
-    {
-        CV_WRAP Params();
-        CV_PROP_RW std::string modelTxt;
-        CV_PROP_RW std::string modelBin;
-    };
-
-    /** @brief Constructor
-    @param parameters GOTURN parameters TrackerGOTURN::Params
-    */
-    static CV_WRAP
-    Ptr<TrackerGOTURN> create(const TrackerGOTURN::Params& parameters = TrackerGOTURN::Params());
-
-    //void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
-    //bool update(InputArray image, CV_OUT Rect& boundingBox) CV_OVERRIDE;
-};
-
 class CV_EXPORTS_W TrackerDaSiamRPN : public Tracker
 {
 protected:
--- a/modules/video/misc/java/test/TrackerCreateTest.java
+++ b/modules/video/misc/java/test/TrackerCreateTest.java
@ -8,7 +8,7 @@ import org.opencv.core.Rect;
 import org.opencv.test.OpenCVTestCase;

 import org.opencv.video.Tracker;
-import org.opencv.video.TrackerGOTURN;
+import org.opencv.video.TrackerVit;
 import org.opencv.video.TrackerMIL;

 public class TrackerCreateTest extends OpenCVTestCase {
@ -19,9 +19,9 @@ public class TrackerCreateTest extends OpenCVTestCase {
    }


-    public void testCreateTrackerGOTURN() {
+    public void testCreateTrackerVit() {
        try {
-            Tracker tracker = TrackerGOTURN.create();
+            Tracker tracker = TrackerVit.create();
            assert(tracker != null);
        } catch (CvException e) {
            // expected, model files may be missing
--- a/modules/video/misc/python/pyopencv_video.hpp
+++ b/modules/video/misc/python/pyopencv_video.hpp
@ -1,6 +1,5 @@
 #ifdef HAVE_OPENCV_VIDEO
 typedef TrackerMIL::Params TrackerMIL_Params;
-typedef TrackerGOTURN::Params TrackerGOTURN_Params;
 typedef TrackerDaSiamRPN::Params TrackerDaSiamRPN_Params;
 typedef TrackerNano::Params TrackerNano_Params;
 #endif
--- a/modules/video/misc/python/test/test_tracking.py
+++ b/modules/video/misc/python/test/test_tracking.py
@ -1,19 +0,0 @@
-#!/usr/bin/env python
-import os
-import numpy as np
-import cv2 as cv
-
-from tests_common import NewOpenCVTests, unittest
-
-class tracking_test(NewOpenCVTests):
-
-    def test_createTracker(self):
-        t = cv.TrackerMIL_create()
-        try:
-            t = cv.TrackerGOTURN_create()
-        except cv.error as e:
-            pass  # may fail due to missing DL model files
-
-
-if __name__ == '__main__':
-    NewOpenCVTests.bootstrap()
--- a/modules/video/perf/perf_trackers.cpp
+++ b/modules/video/perf/perf_trackers.cpp
@ -90,15 +90,4 @@ PERF_TEST_P(Tracking, MIL, testing::ValuesIn(getTrackingParams()))
    runTrackingTest<Rect>(tracker, GetParam());
 }

-PERF_TEST_P(Tracking, GOTURN, testing::ValuesIn(getTrackingParams()))
-{
-    std::string model = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.prototxt");
-    std::string weights = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.caffemodel", false);
-    TrackerGOTURN::Params params;
-    params.modelTxt = model;
-    params.modelBin = weights;
-    auto tracker = TrackerGOTURN::create(params);
-    runTrackingTest<Rect>(tracker, GetParam());
-}
-
 }} // namespace
--- a/modules/video/src/tracking/tracker_dasiamrpn.cpp
+++ b/modules/video/src/tracking/tracker_dasiamrpn.cpp
@ -434,7 +434,7 @@ Ptr<TrackerDaSiamRPN> TrackerDaSiamRPN::create(const TrackerDaSiamRPN::Params& p
 Ptr<TrackerDaSiamRPN> TrackerDaSiamRPN::create(const TrackerDaSiamRPN::Params& parameters)
 {
    (void)(parameters);
-    CV_Error(cv::Error::StsNotImplemented, "to use GOTURN, the tracking module needs to be built with opencv_dnn !");
+    CV_Error(cv::Error::StsNotImplemented, "to use DaSimRPN, the tracking module needs to be built with opencv_dnn !");
 }
 #endif  // OPENCV_HAVE_DNN
 }
--- a/modules/video/src/tracking/tracker_goturn.cpp
+++ b/modules/video/src/tracking/tracker_goturn.cpp
@ -1,140 +0,0 @@
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-
-#include "../precomp.hpp"
-
-#ifdef HAVE_OPENCV_DNN
-#include "opencv2/dnn.hpp"
-#endif
-
-namespace cv {
-
-TrackerGOTURN::TrackerGOTURN()
-{
-    // nothing
-}
-
-TrackerGOTURN::~TrackerGOTURN()
-{
-    // nothing
-}
-
-TrackerGOTURN::Params::Params()
-{
-    modelTxt = "goturn.prototxt";
-    modelBin = "goturn.caffemodel";
-}
-
-#ifdef HAVE_OPENCV_DNN
-
-class TrackerGOTURNImpl : public TrackerGOTURN
-{
-public:
-    TrackerGOTURNImpl(const TrackerGOTURN::Params& parameters)
-        : params(parameters)
-    {
-        // Load GOTURN architecture from *.prototxt and pretrained weights from *.caffemodel
-        net = dnn::readNetFromCaffe(params.modelTxt, params.modelBin);
-        CV_Assert(!net.empty());
-    }
-
-    void init(InputArray image, const Rect& boundingBox) CV_OVERRIDE;
-    bool update(InputArray image, Rect& boundingBox) CV_OVERRIDE;
-
-    void setBoudingBox(Rect boundingBox)
-    {
-        if (image_.empty())
-            CV_Error(Error::StsInternal, "Set image first");
-        boundingBox_ = boundingBox & Rect(Point(0, 0), image_.size());
-    }
-
-    TrackerGOTURN::Params params;
-
-    dnn::Net net;
-    Rect boundingBox_;
-    Mat image_;
-};
-
-void TrackerGOTURNImpl::init(InputArray image, const Rect& boundingBox)
-{
-    image_ = image.getMat().clone();
-    setBoudingBox(boundingBox);
-}
-
-bool TrackerGOTURNImpl::update(InputArray image, Rect& boundingBox)
-{
-    int INPUT_SIZE = 227;
-    //Using prevFrame & prevBB from model and curFrame GOTURN calculating curBB
-    InputArray curFrame = image;
-    Mat prevFrame = image_;
-    Rect2d prevBB = boundingBox_;
-    Rect curBB;
-
-    float padTargetPatch = 2.0;
-    Rect2f searchPatchRect, targetPatchRect;
-    Point2f currCenter, prevCenter;
-    Mat prevFramePadded, curFramePadded;
-    Mat searchPatch, targetPatch;
-
-    prevCenter.x = (float)(prevBB.x + prevBB.width / 2);
-    prevCenter.y = (float)(prevBB.y + prevBB.height / 2);
-
-    targetPatchRect.width = (float)(prevBB.width * padTargetPatch);
-    targetPatchRect.height = (float)(prevBB.height * padTargetPatch);
-    targetPatchRect.x = (float)(prevCenter.x - prevBB.width * padTargetPatch / 2.0 + targetPatchRect.width);
-    targetPatchRect.y = (float)(prevCenter.y - prevBB.height * padTargetPatch / 2.0 + targetPatchRect.height);
-
-    targetPatchRect.width = std::min(targetPatchRect.width, (float)prevFrame.cols);
-    targetPatchRect.height = std::min(targetPatchRect.height, (float)prevFrame.rows);
-    targetPatchRect.x = std::max(-prevFrame.cols * 0.5f, std::min(targetPatchRect.x, prevFrame.cols * 1.5f));
-    targetPatchRect.y = std::max(-prevFrame.rows * 0.5f, std::min(targetPatchRect.y, prevFrame.rows * 1.5f));
-
-    copyMakeBorder(prevFrame, prevFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE);
-    targetPatch = prevFramePadded(targetPatchRect).clone();
-
-    copyMakeBorder(curFrame, curFramePadded, (int)targetPatchRect.height, (int)targetPatchRect.height, (int)targetPatchRect.width, (int)targetPatchRect.width, BORDER_REPLICATE);
-    searchPatch = curFramePadded(targetPatchRect).clone();
-
-    // Preprocess
-    // Resize
-    resize(targetPatch, targetPatch, Size(INPUT_SIZE, INPUT_SIZE), 0, 0, INTER_LINEAR_EXACT);
-    resize(searchPatch, searchPatch, Size(INPUT_SIZE, INPUT_SIZE), 0, 0, INTER_LINEAR_EXACT);
-
-    // Convert to Float type and subtract mean
-    Mat targetBlob = dnn::blobFromImage(targetPatch, 1.0f, Size(), Scalar::all(128), false);
-    Mat searchBlob = dnn::blobFromImage(searchPatch, 1.0f, Size(), Scalar::all(128), false);
-
-    net.setInput(targetBlob, "data1");
-    net.setInput(searchBlob, "data2");
-
-    Mat resMat = net.forward("scale").reshape(1, 1);
-
-    curBB.x = cvRound(targetPatchRect.x + (resMat.at<float>(0) * targetPatchRect.width / INPUT_SIZE) - targetPatchRect.width);
-    curBB.y = cvRound(targetPatchRect.y + (resMat.at<float>(1) * targetPatchRect.height / INPUT_SIZE) - targetPatchRect.height);
-    curBB.width = cvRound((resMat.at<float>(2) - resMat.at<float>(0)) * targetPatchRect.width / INPUT_SIZE);
-    curBB.height = cvRound((resMat.at<float>(3) - resMat.at<float>(1)) * targetPatchRect.height / INPUT_SIZE);
-
-    // Predicted BB
-    boundingBox = curBB & Rect(Point(0, 0), image_.size());
-
-    // Set new model image and BB from current frame
-    image_ = image.getMat().clone();
-    setBoudingBox(curBB);
-    return true;
-}
-
-Ptr<TrackerGOTURN> TrackerGOTURN::create(const TrackerGOTURN::Params& parameters)
-{
-    return makePtr<TrackerGOTURNImpl>(parameters);
-}
-
-#else  // OPENCV_HAVE_DNN
-Ptr<TrackerGOTURN> TrackerGOTURN::create(const TrackerGOTURN::Params& parameters)
-{
-    (void)(parameters);
-    CV_Error(cv::Error::StsNotImplemented, "to use GOTURN, the tracking module needs to be built with opencv_dnn !");
-}
-#endif  // OPENCV_HAVE_DNN
-
-}  // namespace cv
--- a/modules/video/test/test_trackers.cpp
+++ b/modules/video/test/test_trackers.cpp
@ -51,17 +51,6 @@ TEST_P(DistanceAndOverlap, Scaled_Data_MIL)
    test.run();
 }

-TEST_P(DistanceAndOverlap, GOTURN)
-{
-    std::string model = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.prototxt");
-    std::string weights = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.caffemodel", false);
-    cv::TrackerGOTURN::Params params;
-    params.modelTxt = model;
-    params.modelBin = weights;
-    TrackerTest<Tracker, Rect> test(TrackerGOTURN::create(params), dataset, 35, .35f, NoTransform);
-    test.run();
-}
-
 INSTANTIATE_TEST_CASE_P(Tracking, DistanceAndOverlap, TESTSET_NAMES);

 static bool checkIOU(const Rect& r0, const Rect& r1, double threshold)
@ -111,18 +100,6 @@ static void checkTrackingAccuracy(cv::Ptr<Tracker>& tracker, double iouThreshold
    }
 }

-TEST(GOTURN, accuracy)
-{
-    std::string model = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.prototxt");
-    std::string weights = cvtest::findDataFile("dnn/gsoc2016-goturn/goturn.caffemodel", false);
-    cv::TrackerGOTURN::Params params;
-    params.modelTxt = model;
-    params.modelBin = weights;
-    cv::Ptr<Tracker> tracker = TrackerGOTURN::create(params);
-    // TODO! GOTURN have low accuracy. Try to remove this api at 5.x.
-    checkTrackingAccuracy(tracker, 0.08);
-}
-
 TEST(DaSiamRPN, accuracy)
 {
    std::string model = cvtest::findDataFile("dnn/onnx/models/dasiamrpn_model.onnx", false);
--- a/samples/python/tracker.py
+++ b/samples/python/tracker.py
@ -3,8 +3,6 @@
 Tracker demo

 For usage download models by following links
-For GOTURN:
-    goturn.prototxt and goturn.caffemodel: https://github.com/opencv/opencv_extra/tree/c4219d5eb3105ed8e634278fad312a1a8d2c182d/testdata/tracking
 For DaSiamRPN:
    network:     https://www.dropbox.com/s/rr1lk9355vzolqv/dasiamrpn_model.onnx?dl=0
    kernel_r1:   https://www.dropbox.com/s/999cqx5zrfi7w4p/dasiamrpn_kernel_r1.onnx?dl=0
@ -12,10 +10,10 @@ For DaSiamRPN:
 For NanoTrack:
    nanotrack_backbone: https://github.com/HonglinChu/SiamTrackers/blob/master/NanoTrack/models/nanotrackv2/nanotrack_backbone_sim.onnx
    nanotrack_headneck: https://github.com/HonglinChu/SiamTrackers/blob/master/NanoTrack/models/nanotrackv2/nanotrack_head_sim.onnx
-
+For VitTrack:
+    vitTracker: https://github.com/opencv/opencv_zoo/raw/fef72f8fa7c52eaf116d3df358d24e6e959ada0e/models/object_tracking_vittrack/object_tracking_vittrack_2023sep.onnx
 USAGE:
    tracker.py [-h] [--input INPUT] [--tracker_algo TRACKER_ALGO]
-                    [--goturn GOTURN] [--goturn_model GOTURN_MODEL]
                    [--dasiamrpn_net DASIAMRPN_NET]
                    [--dasiamrpn_kernel_r1 DASIAMRPN_KERNEL_R1]
                    [--dasiamrpn_kernel_cls1 DASIAMRPN_KERNEL_CLS1]
@ -46,11 +44,6 @@ class App(object):
    def createTracker(self):
        if self.trackerAlgorithm == 'mil':
            tracker = cv.TrackerMIL_create()
-        elif self.trackerAlgorithm == 'goturn':
-            params = cv.TrackerGOTURN_Params()
-            params.modelTxt = self.args.goturn
-            params.modelBin = self.args.goturn_model
-            tracker = cv.TrackerGOTURN_create(params)
        elif self.trackerAlgorithm == 'dasiamrpn':
            params = cv.TrackerDaSiamRPN_Params()
            params.model = self.args.dasiamrpn_net
@ -67,7 +60,7 @@ class App(object):
            params.net = args.vittrack_net
            tracker = cv.TrackerVit_create(params)
        else:
-            sys.exit("Tracker {} is not recognized. Please use one of three available: mil, goturn, dasiamrpn, nanotrack.".format(self.trackerAlgorithm))
+            sys.exit("Tracker {} is not recognized. Please use one of three available: mil, dasiamrpn, nanotrack.".format(self.trackerAlgorithm))
        return tracker

    def initializeTracker(self, image):
@ -131,9 +124,7 @@ if __name__ == '__main__':
    print(__doc__)
    parser = argparse.ArgumentParser(description="Run tracker")
    parser.add_argument("--input", type=str, default="vtest.avi", help="Path to video source")
-    parser.add_argument("--tracker_algo", type=str, default="nanotrack", help="One of available tracking algorithms: mil, goturn, dasiamrpn, nanotrack, vittrack")
-    parser.add_argument("--goturn", type=str, default="goturn.prototxt", help="Path to GOTURN architecture")
-    parser.add_argument("--goturn_model", type=str, default="goturn.caffemodel", help="Path to GOTERN model")
+    parser.add_argument("--tracker_algo", type=str, default="nanotrack", help="One of available tracking algorithms: mil, dasiamrpn, nanotrack, vittrack")
    parser.add_argument("--dasiamrpn_net", type=str, default="dasiamrpn_model.onnx", help="Path to onnx model of DaSiamRPN net")
    parser.add_argument("--dasiamrpn_kernel_r1", type=str, default="dasiamrpn_kernel_r1.onnx", help="Path to onnx model of DaSiamRPN kernel_r1")
    parser.add_argument("--dasiamrpn_kernel_cls1", type=str, default="dasiamrpn_kernel_cls1.onnx", help="Path to onnx model of DaSiamRPN kernel_cls1")